blob: 0af09e98a2178cdffad35454ab195372d54c1b92 [file] [log] [blame]
Reid Kleckner7df03c22013-07-16 17:14:33 +00001//===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Support/ConvertUTF.h"
Mehdi Aminib550cb12016-04-18 09:17:29 +000011#include "llvm/ADT/ArrayRef.h"
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000012#include "llvm/Support/Format.h"
Reid Kleckner7df03c22013-07-16 17:14:33 +000013#include "gtest/gtest.h"
14#include <string>
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +000015#include <utility>
Chandler Carruthd9903882015-01-14 11:23:27 +000016#include <vector>
Reid Kleckner7df03c22013-07-16 17:14:33 +000017
18using namespace llvm;
19
20TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
21 // Src is the look of disapproval.
22 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
23 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
24 std::string Result;
25 bool Success = convertUTF16ToUTF8String(Ref, Result);
26 EXPECT_TRUE(Success);
27 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
28 EXPECT_EQ(Expected, Result);
29}
30
31TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
32 // Src is the look of disapproval.
33 static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
34 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
35 std::string Result;
36 bool Success = convertUTF16ToUTF8String(Ref, Result);
37 EXPECT_TRUE(Success);
38 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
39 EXPECT_EQ(Expected, Result);
40}
41
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000042TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
43 // Src is the look of disapproval.
44 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
45 StringRef Ref(Src, sizeof(Src) - 1);
46 SmallVector<UTF16, 5> Result;
47 bool Success = convertUTF8ToUTF16String(Ref, Result);
48 EXPECT_TRUE(Success);
49 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
Eric Christopher7aebb322015-01-27 01:01:39 +000050 ASSERT_EQ(3u, Result.size());
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000051 for (int I = 0, E = 3; I != E; ++I)
52 EXPECT_EQ(Expected[I], Result[I]);
53}
54
Reid Kleckner7df03c22013-07-16 17:14:33 +000055TEST(ConvertUTFTest, OddLengthInput) {
56 std::string Result;
Craig Toppere1d12942014-08-27 05:25:25 +000057 bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000058 EXPECT_FALSE(Success);
59}
60
61TEST(ConvertUTFTest, Empty) {
62 std::string Result;
Marianne Mailhot-Sarrasin7423f402016-03-11 15:59:32 +000063 bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000064 EXPECT_TRUE(Success);
65 EXPECT_TRUE(Result.empty());
66}
67
68TEST(ConvertUTFTest, HasUTF16BOM) {
Craig Toppere1d12942014-08-27 05:25:25 +000069 bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000070 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000071 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000072 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000073 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
Reid Kleckner7df03c22013-07-16 17:14:33 +000074 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
Craig Toppere1d12942014-08-27 05:25:25 +000075 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
Reid Kleckner7df03c22013-07-16 17:14:33 +000076 EXPECT_TRUE(HasBOM);
77
Craig Toppere1d12942014-08-27 05:25:25 +000078 HasBOM = hasUTF16ByteOrderMark(None);
Reid Kleckner7df03c22013-07-16 17:14:33 +000079 EXPECT_FALSE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000080 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
Reid Kleckner7df03c22013-07-16 17:14:33 +000081 EXPECT_FALSE(HasBOM);
82}
Dmitri Gribenko1089db02014-06-16 11:09:46 +000083
Marianne Mailhot-Sarrasin7423f402016-03-11 15:59:32 +000084TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
85 // Src is the look of disapproval.
86 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
87 ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
88 std::string Result;
89 bool Success = convertUTF16ToUTF8String(SrcRef, Result);
90 EXPECT_TRUE(Success);
91 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
92 EXPECT_EQ(Expected, Result);
93}
94
95TEST(ConvertUTFTest, ConvertUTF8toWide) {
96 // Src is the look of disapproval.
97 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
98 std::wstring Result;
99 bool Success = ConvertUTF8toWide((const char*)Src, Result);
100 EXPECT_TRUE(Success);
101 std::wstring Expected(L"\x0ca0_\x0ca0");
102 EXPECT_EQ(Expected, Result);
103 Result.clear();
104 Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
105 EXPECT_TRUE(Success);
106 EXPECT_EQ(Expected, Result);
107}
108
109TEST(ConvertUTFTest, convertWideToUTF8) {
110 // Src is the look of disapproval.
111 static const wchar_t Src[] = L"\x0ca0_\x0ca0";
112 std::string Result;
113 bool Success = convertWideToUTF8(Src, Result);
114 EXPECT_TRUE(Success);
115 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
116 EXPECT_EQ(Expected, Result);
117}
118
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000119struct ConvertUTFResultContainer {
120 ConversionResult ErrorCode;
121 std::vector<unsigned> UnicodeScalars;
122
123 ConvertUTFResultContainer(ConversionResult ErrorCode)
124 : ErrorCode(ErrorCode) {}
125
126 ConvertUTFResultContainer
127 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
128 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
129 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
130 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
131 ConvertUTFResultContainer Result(*this);
132 if (US0 != 0x110000)
133 Result.UnicodeScalars.push_back(US0);
134 if (US1 != 0x110000)
135 Result.UnicodeScalars.push_back(US1);
136 if (US2 != 0x110000)
137 Result.UnicodeScalars.push_back(US2);
138 if (US3 != 0x110000)
139 Result.UnicodeScalars.push_back(US3);
140 if (US4 != 0x110000)
141 Result.UnicodeScalars.push_back(US4);
142 if (US5 != 0x110000)
143 Result.UnicodeScalars.push_back(US5);
144 if (US6 != 0x110000)
145 Result.UnicodeScalars.push_back(US6);
146 if (US7 != 0x110000)
147 Result.UnicodeScalars.push_back(US7);
148 return Result;
149 }
150};
151
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000152std::pair<ConversionResult, std::vector<unsigned>>
153ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
154 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
155
156 const UTF8 *SourceNext = SourceStart;
157 std::vector<UTF32> Decoded(S.size(), 0);
158 UTF32 *TargetStart = Decoded.data();
159
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000160 auto ErrorCode =
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000161 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
162 Decoded.data() + Decoded.size(), lenientConversion);
163
164 Decoded.resize(TargetStart - Decoded.data());
165
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000166 return std::make_pair(ErrorCode, Decoded);
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000167}
168
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000169std::pair<ConversionResult, std::vector<unsigned>>
170ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
171 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
172
173 const UTF8 *SourceNext = SourceStart;
174 std::vector<UTF32> Decoded(S.size(), 0);
175 UTF32 *TargetStart = Decoded.data();
176
177 auto ErrorCode = ConvertUTF8toUTF32Partial(
178 &SourceNext, SourceStart + S.size(), &TargetStart,
179 Decoded.data() + Decoded.size(), lenientConversion);
180
181 Decoded.resize(TargetStart - Decoded.data());
182
183 return std::make_pair(ErrorCode, Decoded);
184}
185
186::testing::AssertionResult
187CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
188 StringRef S, bool Partial = false) {
189 ConversionResult ErrorCode;
190 std::vector<unsigned> Decoded;
191 if (!Partial)
192 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
193 else
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000194 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
Dmitri Gribenkocbc7ae22015-01-10 05:03:29 +0000195
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000196 if (Expected.ErrorCode != ErrorCode)
197 return ::testing::AssertionFailure() << "Expected error code "
198 << Expected.ErrorCode << ", actual "
199 << ErrorCode;
200
201 if (Expected.UnicodeScalars != Decoded)
202 return ::testing::AssertionFailure()
203 << "Expected lenient decoded result:\n"
204 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
205 << "Actual result:\n" << ::testing::PrintToString(Decoded);
206
207 return ::testing::AssertionSuccess();
208}
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000209
210TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
211
212 //
213 // 1-byte sequences
214 //
215
216 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000217 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
218 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000219
220 //
221 // 2-byte sequences
222 //
223
224 // U+0283 LATIN SMALL LETTER ESH
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000225 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
226 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
227 "\xca\x83"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000228
229 // U+03BA GREEK SMALL LETTER KAPPA
230 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
231 // U+03C3 GREEK SMALL LETTER SIGMA
232 // U+03BC GREEK SMALL LETTER MU
233 // U+03B5 GREEK SMALL LETTER EPSILON
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000234 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
235 ConvertUTFResultContainer(conversionOK)
236 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
237 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000238
239 //
240 // 3-byte sequences
241 //
242
243 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
244 // U+6587 CJK UNIFIED IDEOGRAPH-6587
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
246 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
247 "\xe4\xbe\x8b\xe6\x96\x87"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000248
249 // U+D55C HANGUL SYLLABLE HAN
250 // U+AE00 HANGUL SYLLABLE GEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000251 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
252 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
253 "\xed\x95\x9c\xea\xb8\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000254
255 // U+1112 HANGUL CHOSEONG HIEUH
256 // U+1161 HANGUL JUNGSEONG A
257 // U+11AB HANGUL JONGSEONG NIEUN
258 // U+1100 HANGUL CHOSEONG KIYEOK
259 // U+1173 HANGUL JUNGSEONG EU
260 // U+11AF HANGUL JONGSEONG RIEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000261 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
262 ConvertUTFResultContainer(conversionOK)
263 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
264 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
265 "\xe1\x86\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000266
267 //
268 // 4-byte sequences
269 //
270
271 // U+E0100 VARIATION SELECTOR-17
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000272 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
273 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
274 "\xf3\xa0\x84\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000275
276 //
277 // First possible sequence of a certain length
278 //
279
280 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
282 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
283 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000284
285 // U+0080 PADDING CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000286 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
287 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
288 "\xc2\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000289
290 // U+0800 SAMARITAN LETTER ALAF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000291 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
292 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
293 "\xe0\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000294
295 // U+10000 LINEAR B SYLLABLE B008 A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000296 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
297 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
298 "\xf0\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000299
300 // U+200000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000301 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
302 ConvertUTFResultContainer(sourceIllegal)
303 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
304 "\xf8\x88\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000305
306 // U+4000000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000307 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
308 ConvertUTFResultContainer(sourceIllegal)
309 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
310 "\xfc\x84\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000311
312 //
313 // Last possible sequence of a certain length
314 //
315
316 // U+007F DELETE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000317 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
318 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000319
320 // U+07FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000321 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
322 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
323 "\xdf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000324
325 // U+FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000326 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
327 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
328 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000329
330 // U+1FFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000331 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
332 ConvertUTFResultContainer(sourceIllegal)
333 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
334 "\xf7\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000335
336 // U+3FFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000337 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
338 ConvertUTFResultContainer(sourceIllegal)
339 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
340 "\xfb\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000341
342 // U+7FFFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000343 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
344 ConvertUTFResultContainer(sourceIllegal)
345 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
346 "\xfd\xbf\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000347
348 //
349 // Other boundary conditions
350 //
351
352 // U+D7FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000353 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
354 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
355 "\xed\x9f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000356
357 // U+E000 (private use)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000358 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
359 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
360 "\xee\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000361
362 // U+FFFD REPLACEMENT CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000363 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
364 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
365 "\xef\xbf\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000366
367 // U+10FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000368 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
369 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
370 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000371
372 // U+110000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000373 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
374 ConvertUTFResultContainer(sourceIllegal)
375 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
376 "\xf4\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000377
378 //
379 // Unexpected continuation bytes
380 //
381
382 // A sequence of unexpected continuation bytes that don't follow a first
383 // byte, every byte is a maximal subpart.
384
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000385 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
386 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
387 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
388 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
389 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
390 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
391 "\x80\x80"));
392 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
393 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
394 "\x80\xbf"));
395 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
396 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
397 "\xbf\x80"));
398 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
399 ConvertUTFResultContainer(sourceIllegal)
400 .withScalars(0xfffd, 0xfffd, 0xfffd),
401 "\x80\xbf\x80"));
402 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
403 ConvertUTFResultContainer(sourceIllegal)
404 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
405 "\x80\xbf\x80\xbf"));
406 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
407 ConvertUTFResultContainer(sourceIllegal)
408 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
409 "\x80\xbf\x82\xbf\xaa"));
410 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
411 ConvertUTFResultContainer(sourceIllegal)
412 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
413 "\xaa\xb0\xbb\xbf\xaa\xa0"));
414 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
415 ConvertUTFResultContainer(sourceIllegal)
416 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
417 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000418
419 // All continuation bytes (0x80--0xbf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000420 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
421 ConvertUTFResultContainer(sourceIllegal)
422 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
423 0xfffd, 0xfffd, 0xfffd, 0xfffd)
424 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
425 0xfffd, 0xfffd, 0xfffd, 0xfffd)
426 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
427 0xfffd, 0xfffd, 0xfffd, 0xfffd)
428 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
429 0xfffd, 0xfffd, 0xfffd, 0xfffd)
430 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
431 0xfffd, 0xfffd, 0xfffd, 0xfffd)
432 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
433 0xfffd, 0xfffd, 0xfffd, 0xfffd)
434 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
435 0xfffd, 0xfffd, 0xfffd, 0xfffd)
436 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
437 0xfffd, 0xfffd, 0xfffd, 0xfffd),
438 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
439 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
440 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
441 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000442
443 //
444 // Lonely start bytes
445 //
446
447 // Start bytes of 2-byte sequences (0xc0--0xdf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000448 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
449 ConvertUTFResultContainer(sourceIllegal)
450 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
451 0xfffd, 0xfffd, 0xfffd, 0xfffd)
452 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
453 0xfffd, 0xfffd, 0xfffd, 0xfffd)
454 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
455 0xfffd, 0xfffd, 0xfffd, 0xfffd)
456 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
457 0xfffd, 0xfffd, 0xfffd, 0xfffd),
458 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
459 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000460
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000461 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
462 ConvertUTFResultContainer(sourceIllegal)
463 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
464 0xfffd, 0x0020, 0xfffd, 0x0020)
465 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466 0xfffd, 0x0020, 0xfffd, 0x0020)
467 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
468 0xfffd, 0x0020, 0xfffd, 0x0020)
469 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
470 0xfffd, 0x0020, 0xfffd, 0x0020)
471 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
472 0xfffd, 0x0020, 0xfffd, 0x0020)
473 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
474 0xfffd, 0x0020, 0xfffd, 0x0020)
475 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
476 0xfffd, 0x0020, 0xfffd, 0x0020)
477 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
478 0xfffd, 0x0020, 0xfffd, 0x0020),
479 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
480 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
481 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
482 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000483
484 // Start bytes of 3-byte sequences (0xe0--0xef).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000485 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
486 ConvertUTFResultContainer(sourceIllegal)
487 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
488 0xfffd, 0xfffd, 0xfffd, 0xfffd)
489 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
490 0xfffd, 0xfffd, 0xfffd, 0xfffd),
491 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000492
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000493 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
494 ConvertUTFResultContainer(sourceIllegal)
495 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
496 0xfffd, 0x0020, 0xfffd, 0x0020)
497 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
498 0xfffd, 0x0020, 0xfffd, 0x0020)
499 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
500 0xfffd, 0x0020, 0xfffd, 0x0020)
501 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
502 0xfffd, 0x0020, 0xfffd, 0x0020),
503 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
504 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000505
506 // Start bytes of 4-byte sequences (0xf0--0xf7).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000507 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
508 ConvertUTFResultContainer(sourceIllegal)
509 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
510 0xfffd, 0xfffd, 0xfffd, 0xfffd),
511 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000512
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000513 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
514 ConvertUTFResultContainer(sourceIllegal)
515 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
516 0xfffd, 0x0020, 0xfffd, 0x0020)
517 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
518 0xfffd, 0x0020, 0xfffd, 0x0020),
519 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000520
521 // Start bytes of 5-byte sequences (0xf8--0xfb).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000522 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
523 ConvertUTFResultContainer(sourceIllegal)
524 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
525 "\xf8\xf9\xfa\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000526
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000527 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
528 ConvertUTFResultContainer(sourceIllegal)
529 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
530 0xfffd, 0x0020, 0xfffd, 0x0020),
531 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000532
533 // Start bytes of 6-byte sequences (0xfc--0xfd).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000534 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
535 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
536 "\xfc\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000537
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000538 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
539 ConvertUTFResultContainer(sourceIllegal)
540 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
541 "\xfc\x20\xfd\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000542
543 //
544 // Other bytes (0xc0--0xc1, 0xfe--0xff).
545 //
546
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000547 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
548 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
549 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
550 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
551 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
552 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
553 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
554 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000555
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000556 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
557 ConvertUTFResultContainer(sourceIllegal)
558 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
559 "\xc0\xc1\xfe\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000560
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000561 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
562 ConvertUTFResultContainer(sourceIllegal)
563 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
564 "\xfe\xfe\xff\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000565
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000566 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
567 ConvertUTFResultContainer(sourceIllegal)
568 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
569 "\xfe\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000570
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000571 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
572 ConvertUTFResultContainer(sourceIllegal)
573 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
574 "\xff\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000575
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000576 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
577 ConvertUTFResultContainer(sourceIllegal)
578 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
579 0xfffd, 0x0020, 0xfffd, 0x0020),
580 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000581
582 //
583 // Sequences with one continuation byte missing
584 //
585
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000586 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
587 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
588 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
589 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
592 "\xe0\xa0"));
593 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
595 "\xe0\xbf"));
596 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
597 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
598 "\xe1\x80"));
599 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
600 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
601 "\xec\xbf"));
602 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
603 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
604 "\xed\x80"));
605 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
606 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
607 "\xed\x9f"));
608 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
609 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
610 "\xee\x80"));
611 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
612 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
613 "\xef\xbf"));
614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
616 "\xf0\x90\x80"));
617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
618 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
619 "\xf0\xbf\xbf"));
620 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
621 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
622 "\xf1\x80\x80"));
623 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
624 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
625 "\xf3\xbf\xbf"));
626 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
627 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
628 "\xf4\x80\x80"));
629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
630 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
631 "\xf4\x8f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000632
633 // Overlong sequences with one trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000634 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
635 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
636 "\xc0"));
637 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
638 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
639 "\xc1"));
640 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
641 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
642 "\xe0\x80"));
643 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
644 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
645 "\xe0\x9f"));
646 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
647 ConvertUTFResultContainer(sourceIllegal)
648 .withScalars(0xfffd, 0xfffd, 0xfffd),
649 "\xf0\x80\x80"));
650 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
651 ConvertUTFResultContainer(sourceIllegal)
652 .withScalars(0xfffd, 0xfffd, 0xfffd),
653 "\xf0\x8f\x80"));
654 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
655 ConvertUTFResultContainer(sourceIllegal)
656 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
657 "\xf8\x80\x80\x80"));
658 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
659 ConvertUTFResultContainer(sourceIllegal)
660 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
661 "\xfc\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000662
663 // Sequences that represent surrogates with one trailing byte missing.
664 // High surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000665 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
666 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
667 "\xed\xa0"));
668 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
669 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
670 "\xed\xac"));
671 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
672 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
673 "\xed\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000674 // Low surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000675 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
676 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
677 "\xed\xb0"));
678 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
679 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
680 "\xed\xb4"));
681 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
682 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
683 "\xed\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000684
685 // Ill-formed 4-byte sequences.
686 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
687 // U+1100xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000688 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
689 ConvertUTFResultContainer(sourceIllegal)
690 .withScalars(0xfffd, 0xfffd, 0xfffd),
691 "\xf4\x90\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000692 // U+13FBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000693 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
694 ConvertUTFResultContainer(sourceIllegal)
695 .withScalars(0xfffd, 0xfffd, 0xfffd),
696 "\xf4\xbf\xbf"));
697 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
698 ConvertUTFResultContainer(sourceIllegal)
699 .withScalars(0xfffd, 0xfffd, 0xfffd),
700 "\xf5\x80\x80"));
701 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
702 ConvertUTFResultContainer(sourceIllegal)
703 .withScalars(0xfffd, 0xfffd, 0xfffd),
704 "\xf6\x80\x80"));
705 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
706 ConvertUTFResultContainer(sourceIllegal)
707 .withScalars(0xfffd, 0xfffd, 0xfffd),
708 "\xf7\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000709 // U+1FFBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000710 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
711 ConvertUTFResultContainer(sourceIllegal)
712 .withScalars(0xfffd, 0xfffd, 0xfffd),
713 "\xf7\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000714
715 // Ill-formed 5-byte sequences.
716 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
717 // U+2000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000718 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
719 ConvertUTFResultContainer(sourceIllegal)
720 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
721 "\xf8\x88\x80\x80"));
722 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
723 ConvertUTFResultContainer(sourceIllegal)
724 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
725 "\xf8\xbf\xbf\xbf"));
726 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
727 ConvertUTFResultContainer(sourceIllegal)
728 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
729 "\xf9\x80\x80\x80"));
730 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
731 ConvertUTFResultContainer(sourceIllegal)
732 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
733 "\xfa\x80\x80\x80"));
734 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
735 ConvertUTFResultContainer(sourceIllegal)
736 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
737 "\xfb\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000738 // U+3FFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000739 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
740 ConvertUTFResultContainer(sourceIllegal)
741 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
742 "\xfb\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000743
744 // Ill-formed 6-byte sequences.
745 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
746 // U+40000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000747 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
748 ConvertUTFResultContainer(sourceIllegal)
749 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
750 "\xfc\x84\x80\x80\x80"));
751 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
752 ConvertUTFResultContainer(sourceIllegal)
753 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
754 "\xfc\xbf\xbf\xbf\xbf"));
755 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
756 ConvertUTFResultContainer(sourceIllegal)
757 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
758 "\xfd\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000759 // U+7FFFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000760 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
761 ConvertUTFResultContainer(sourceIllegal)
762 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
763 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000764
765 //
766 // Sequences with two continuation bytes missing
767 //
768
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000769 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
770 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
771 "\xf0\x90"));
772 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
773 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
774 "\xf0\xbf"));
775 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
776 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
777 "\xf1\x80"));
778 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
779 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
780 "\xf3\xbf"));
781 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
783 "\xf4\x80"));
784 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
785 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
786 "\xf4\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000787
788 // Overlong sequences with two trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000789 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
790 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
791 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
792 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
793 "\xf0\x80"));
794 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
795 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
796 "\xf0\x8f"));
797 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
798 ConvertUTFResultContainer(sourceIllegal)
799 .withScalars(0xfffd, 0xfffd, 0xfffd),
800 "\xf8\x80\x80"));
801 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
802 ConvertUTFResultContainer(sourceIllegal)
803 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
804 "\xfc\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000805
806 // Sequences that represent surrogates with two trailing bytes missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000807 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
808 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000809
810 // Ill-formed 4-byte sequences.
811 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
812 // U+110yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000813 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
814 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
815 "\xf4\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000816 // U+13Fyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000817 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
818 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
819 "\xf4\xbf"));
820 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
821 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
822 "\xf5\x80"));
823 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
824 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
825 "\xf6\x80"));
826 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
827 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
828 "\xf7\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000829 // U+1FFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000830 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
831 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
832 "\xf7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000833
834 // Ill-formed 5-byte sequences.
835 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
836 // U+200yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000837 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
838 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
839 "\xf8\x88\x80"));
840 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
841 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
842 "\xf8\xbf\xbf"));
843 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
844 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
845 "\xf9\x80\x80"));
846 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
847 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
848 "\xfa\x80\x80"));
849 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
850 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
851 "\xfb\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000852 // U+3FFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000853 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
854 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
855 "\xfb\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000856
857 // Ill-formed 6-byte sequences.
858 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
859 // U+4000yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000860 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
861 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
862 "\xfc\x84\x80\x80"));
863 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
864 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
865 "\xfc\xbf\xbf\xbf"));
866 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
867 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
868 "\xfd\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000869 // U+7FFFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000870 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
871 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
872 "\xfd\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000873
874 //
875 // Sequences with three continuation bytes missing
876 //
877
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000878 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
879 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
880 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
881 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
882 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
883 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
884 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
885 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
886 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
887 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000888
889 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000890 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
891 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
892 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
893 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
894 "\xf8\x80"));
895 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
896 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
897 "\xfc\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000898
899 // Ill-formed 4-byte sequences.
900 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
901 // U+14yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000902 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
903 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
904 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
905 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000906 // U+1Cyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000907 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
908 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000909
910 // Ill-formed 5-byte sequences.
911 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
912 // U+20yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000913 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
914 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
915 "\xf8\x88"));
916 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
917 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
918 "\xf8\xbf"));
919 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
920 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
921 "\xf9\x80"));
922 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
923 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
924 "\xfa\x80"));
925 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
926 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
927 "\xfb\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000928 // U+3FCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000929 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
930 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
931 "\xfb\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000932
933 // Ill-formed 6-byte sequences.
934 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
935 // U+400yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000936 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
937 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
938 "\xfc\x84\x80"));
939 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
940 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
941 "\xfc\xbf\xbf"));
942 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
943 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
944 "\xfd\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000945 // U+7FFCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000946 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
947 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
948 "\xfd\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000949
950 //
951 // Sequences with four continuation bytes missing
952 //
953
954 // Ill-formed 5-byte sequences.
955 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
956 // U+uzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000957 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
958 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
959 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
960 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
961 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
962 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
963 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
964 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000965 // U+3zyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000966 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
967 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000968
969 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000970 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
971 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
972 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
973 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
974 "\xfc\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000975
976 // Ill-formed 6-byte sequences.
977 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
978 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000979 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
980 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
981 "\xfc\x84"));
982 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
983 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
984 "\xfc\xbf"));
985 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
986 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
987 "\xfd\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000988 // U+7Fzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000989 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
990 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
991 "\xfd\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000992
993 //
994 // Sequences with five continuation bytes missing
995 //
996
997 // Ill-formed 6-byte sequences.
998 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
999 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001000 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1001 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001002 // U+uuzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001003 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1004 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001005
1006 //
1007 // Consecutive sequences with trailing bytes missing
1008 //
1009
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001010 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1011 ConvertUTFResultContainer(sourceIllegal)
1012 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1013 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1014 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1015 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1016 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1017 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1018 "\xc0" "\xe0\x80" "\xf0\x80\x80"
1019 "\xf8\x80\x80\x80"
1020 "\xfc\x80\x80\x80\x80"
1021 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1022 "\xfb\xbf\xbf\xbf"
1023 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001024
1025 //
1026 // Overlong UTF-8 sequences
1027 //
1028
1029 // U+002F SOLIDUS
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001030 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1031 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001032
1033 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001034 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1035 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1036 "\xc0\xaf"));
1037 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1038 ConvertUTFResultContainer(sourceIllegal)
1039 .withScalars(0xfffd, 0xfffd, 0xfffd),
1040 "\xe0\x80\xaf"));
1041 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1042 ConvertUTFResultContainer(sourceIllegal)
1043 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1044 "\xf0\x80\x80\xaf"));
1045 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1046 ConvertUTFResultContainer(sourceIllegal)
1047 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1048 "\xf8\x80\x80\x80\xaf"));
1049 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1050 ConvertUTFResultContainer(sourceIllegal)
1051 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1052 "\xfc\x80\x80\x80\x80\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001053
1054 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001055 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1056 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1057 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001058
1059 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001060 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1061 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1062 "\xc0\x80"));
1063 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1064 ConvertUTFResultContainer(sourceIllegal)
1065 .withScalars(0xfffd, 0xfffd, 0xfffd),
1066 "\xe0\x80\x80"));
1067 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1068 ConvertUTFResultContainer(sourceIllegal)
1069 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1070 "\xf0\x80\x80\x80"));
1071 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1072 ConvertUTFResultContainer(sourceIllegal)
1073 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1074 "\xf8\x80\x80\x80\x80"));
1075 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1076 ConvertUTFResultContainer(sourceIllegal)
1077 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1078 "\xfc\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001079
1080 // Other overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001081 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1082 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1083 "\xc0\xbf"));
1084 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1085 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1086 "\xc1\x80"));
1087 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1088 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1089 "\xc1\xbf"));
1090 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1091 ConvertUTFResultContainer(sourceIllegal)
1092 .withScalars(0xfffd, 0xfffd, 0xfffd),
1093 "\xe0\x9f\xbf"));
1094 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1095 ConvertUTFResultContainer(sourceIllegal)
1096 .withScalars(0xfffd, 0xfffd, 0xfffd),
1097 "\xed\xa0\x80"));
1098 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1099 ConvertUTFResultContainer(sourceIllegal)
1100 .withScalars(0xfffd, 0xfffd, 0xfffd),
1101 "\xed\xbf\xbf"));
1102 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1103 ConvertUTFResultContainer(sourceIllegal)
1104 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1105 "\xf0\x8f\x80\x80"));
1106 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1107 ConvertUTFResultContainer(sourceIllegal)
1108 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1109 "\xf0\x8f\xbf\xbf"));
1110 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1111 ConvertUTFResultContainer(sourceIllegal)
1112 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1113 "\xf8\x87\xbf\xbf\xbf"));
1114 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1115 ConvertUTFResultContainer(sourceIllegal)
1116 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1117 "\xfc\x83\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001118
1119 //
1120 // Isolated surrogates
1121 //
1122
1123 // Unicode 6.3.0:
1124 //
1125 // D71. High-surrogate code point: A Unicode code point in the range
1126 // U+D800 to U+DBFF.
1127 //
1128 // D73. Low-surrogate code point: A Unicode code point in the range
1129 // U+DC00 to U+DFFF.
1130
1131 // Note: U+E0100 is <DB40 DD00> in UTF16.
1132
1133 // High surrogates
1134
1135 // U+D800
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001136 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1137 ConvertUTFResultContainer(sourceIllegal)
1138 .withScalars(0xfffd, 0xfffd, 0xfffd),
1139 "\xed\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001140
1141 // U+DB40
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001142 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1143 ConvertUTFResultContainer(sourceIllegal)
1144 .withScalars(0xfffd, 0xfffd, 0xfffd),
1145 "\xed\xac\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001146
1147 // U+DBFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001148 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1149 ConvertUTFResultContainer(sourceIllegal)
1150 .withScalars(0xfffd, 0xfffd, 0xfffd),
1151 "\xed\xaf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001152
1153 // Low surrogates
1154
1155 // U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001156 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1157 ConvertUTFResultContainer(sourceIllegal)
1158 .withScalars(0xfffd, 0xfffd, 0xfffd),
1159 "\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001160
1161 // U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001162 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1163 ConvertUTFResultContainer(sourceIllegal)
1164 .withScalars(0xfffd, 0xfffd, 0xfffd),
1165 "\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001166
1167 // U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001168 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1169 ConvertUTFResultContainer(sourceIllegal)
1170 .withScalars(0xfffd, 0xfffd, 0xfffd),
1171 "\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001172
1173 // Surrogate pairs
1174
1175 // U+D800 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001176 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1177 ConvertUTFResultContainer(sourceIllegal)
1178 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1179 "\xed\xa0\x80\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001180
1181 // U+D800 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001182 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1183 ConvertUTFResultContainer(sourceIllegal)
1184 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1185 "\xed\xa0\x80\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001186
1187 // U+D800 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001188 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1189 ConvertUTFResultContainer(sourceIllegal)
1190 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1191 "\xed\xa0\x80\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001192
1193 // U+DB40 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001194 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1195 ConvertUTFResultContainer(sourceIllegal)
1196 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1197 "\xed\xac\xa0\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001198
1199 // U+DB40 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001200 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1201 ConvertUTFResultContainer(sourceIllegal)
1202 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1203 "\xed\xac\xa0\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001204
1205 // U+DB40 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001206 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1207 ConvertUTFResultContainer(sourceIllegal)
1208 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1209 "\xed\xac\xa0\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001210
1211 // U+DBFF U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001212 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1213 ConvertUTFResultContainer(sourceIllegal)
1214 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1215 "\xed\xaf\xbf\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001216
1217 // U+DBFF U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001218 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1219 ConvertUTFResultContainer(sourceIllegal)
1220 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1221 "\xed\xaf\xbf\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001222
1223 // U+DBFF U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001224 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1225 ConvertUTFResultContainer(sourceIllegal)
1226 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1227 "\xed\xaf\xbf\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001228
1229 //
1230 // Noncharacters
1231 //
1232
1233 // Unicode 6.3.0:
1234 //
1235 // D14. Noncharacter: A code point that is permanently reserved for
1236 // internal use and that should never be interchanged. Noncharacters
1237 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1238 // and the values U+FDD0..U+FDEF.
1239
1240 // U+FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001241 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1242 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1243 "\xef\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001244
1245 // U+FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001246 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1247 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1248 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001249
1250 // U+1FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001251 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1252 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1253 "\xf0\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001254
1255 // U+1FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001256 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1257 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1258 "\xf0\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001259
1260 // U+2FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001261 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1262 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1263 "\xf0\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001264
1265 // U+2FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001266 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1267 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1268 "\xf0\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001269
1270 // U+3FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001271 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1272 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1273 "\xf0\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001274
1275 // U+3FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001276 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1277 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1278 "\xf0\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001279
1280 // U+4FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1282 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1283 "\xf1\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001284
1285 // U+4FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001286 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1287 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1288 "\xf1\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001289
1290 // U+5FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001291 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1292 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1293 "\xf1\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001294
1295 // U+5FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001296 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1297 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1298 "\xf1\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001299
1300 // U+6FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001301 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1302 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1303 "\xf1\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001304
1305 // U+6FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001306 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1307 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1308 "\xf1\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001309
1310 // U+7FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001311 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1312 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1313 "\xf1\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001314
1315 // U+7FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001316 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1317 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1318 "\xf1\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001319
1320 // U+8FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001321 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1322 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1323 "\xf2\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001324
1325 // U+8FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001326 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1327 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1328 "\xf2\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001329
1330 // U+9FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001331 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1332 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1333 "\xf2\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001334
1335 // U+9FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001336 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1337 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1338 "\xf2\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001339
1340 // U+AFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001341 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1342 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1343 "\xf2\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001344
1345 // U+AFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001346 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1347 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1348 "\xf2\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001349
1350 // U+BFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001351 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1352 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1353 "\xf2\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001354
1355 // U+BFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1357 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1358 "\xf2\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001359
1360 // U+CFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001361 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1362 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1363 "\xf3\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001364
1365 // U+CFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001366 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1367 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1368 "\xf3\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001369
1370 // U+DFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001371 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1372 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1373 "\xf3\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001374
1375 // U+DFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001376 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1377 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1378 "\xf3\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001379
1380 // U+EFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001381 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1382 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1383 "\xf3\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001384
1385 // U+EFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001386 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1387 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1388 "\xf3\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001389
1390 // U+FFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001391 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1392 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1393 "\xf3\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001394
1395 // U+FFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001396 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1397 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1398 "\xf3\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001399
1400 // U+10FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001401 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1402 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1403 "\xf4\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001404
1405 // U+10FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001406 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1407 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1408 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001409
1410 // U+FDD0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001411 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1412 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1413 "\xef\xb7\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001414
1415 // U+FDD1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001416 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1417 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1418 "\xef\xb7\x91"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001419
1420 // U+FDD2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001421 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1422 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1423 "\xef\xb7\x92"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001424
1425 // U+FDD3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001426 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1427 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1428 "\xef\xb7\x93"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001429
1430 // U+FDD4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001431 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1432 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1433 "\xef\xb7\x94"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001434
1435 // U+FDD5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001436 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1437 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1438 "\xef\xb7\x95"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001439
1440 // U+FDD6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001441 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1442 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1443 "\xef\xb7\x96"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001444
1445 // U+FDD7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001446 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1447 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1448 "\xef\xb7\x97"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001449
1450 // U+FDD8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001451 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1452 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1453 "\xef\xb7\x98"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001454
1455 // U+FDD9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001456 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1457 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1458 "\xef\xb7\x99"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001459
1460 // U+FDDA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001461 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1462 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1463 "\xef\xb7\x9a"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001464
1465 // U+FDDB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001466 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1467 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1468 "\xef\xb7\x9b"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001469
1470 // U+FDDC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001471 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1472 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1473 "\xef\xb7\x9c"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001474
1475 // U+FDDD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001476 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1477 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1478 "\xef\xb7\x9d"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001479
1480 // U+FDDE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001481 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1482 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1483 "\xef\xb7\x9e"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001484
1485 // U+FDDF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001486 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1487 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1488 "\xef\xb7\x9f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001489
1490 // U+FDE0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001491 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1492 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1493 "\xef\xb7\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001494
1495 // U+FDE1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001496 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1497 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1498 "\xef\xb7\xa1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001499
1500 // U+FDE2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001501 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1502 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1503 "\xef\xb7\xa2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001504
1505 // U+FDE3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001506 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1507 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1508 "\xef\xb7\xa3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001509
1510 // U+FDE4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1512 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1513 "\xef\xb7\xa4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001514
1515 // U+FDE5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001516 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1517 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1518 "\xef\xb7\xa5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001519
1520 // U+FDE6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001521 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1522 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1523 "\xef\xb7\xa6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001524
1525 // U+FDE7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001526 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1527 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1528 "\xef\xb7\xa7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001529
1530 // U+FDE8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001531 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1532 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1533 "\xef\xb7\xa8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001534
1535 // U+FDE9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001536 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1537 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1538 "\xef\xb7\xa9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001539
1540 // U+FDEA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001541 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1542 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1543 "\xef\xb7\xaa"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001544
1545 // U+FDEB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001546 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1547 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1548 "\xef\xb7\xab"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001549
1550 // U+FDEC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001551 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1552 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1553 "\xef\xb7\xac"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001554
1555 // U+FDED
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001556 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1557 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1558 "\xef\xb7\xad"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001559
1560 // U+FDEE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001561 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1562 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1563 "\xef\xb7\xae"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001564
1565 // U+FDEF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001566 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1567 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1568 "\xef\xb7\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001569
1570 // U+FDF0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001571 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1572 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1573 "\xef\xb7\xb0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001574
1575 // U+FDF1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001576 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1577 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1578 "\xef\xb7\xb1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001579
1580 // U+FDF2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001581 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1582 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1583 "\xef\xb7\xb2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001584
1585 // U+FDF3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001586 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1587 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1588 "\xef\xb7\xb3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001589
1590 // U+FDF4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001591 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1592 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1593 "\xef\xb7\xb4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001594
1595 // U+FDF5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001596 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1597 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1598 "\xef\xb7\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001599
1600 // U+FDF6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001601 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1602 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1603 "\xef\xb7\xb6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001604
1605 // U+FDF7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001606 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1607 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1608 "\xef\xb7\xb7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001609
1610 // U+FDF8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001611 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1612 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1613 "\xef\xb7\xb8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001614
1615 // U+FDF9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001616 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1617 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1618 "\xef\xb7\xb9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001619
1620 // U+FDFA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001621 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1622 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1623 "\xef\xb7\xba"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001624
1625 // U+FDFB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001626 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1627 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1628 "\xef\xb7\xbb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001629
1630 // U+FDFC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001631 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1632 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1633 "\xef\xb7\xbc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001634
1635 // U+FDFD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001636 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1637 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1638 "\xef\xb7\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001639
1640 // U+FDFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001641 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1642 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1643 "\xef\xb7\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001644
1645 // U+FDFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001646 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1647 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1648 "\xef\xb7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001649}
1650
1651TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1652 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001653 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1654 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1655 "\x41", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001656
1657 //
1658 // Sequences with one continuation byte missing
1659 //
1660
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1662 ConvertUTFResultContainer(sourceExhausted),
1663 "\xc2", true));
1664 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1665 ConvertUTFResultContainer(sourceExhausted),
1666 "\xdf", true));
1667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668 ConvertUTFResultContainer(sourceExhausted),
1669 "\xe0\xa0", true));
1670 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1671 ConvertUTFResultContainer(sourceExhausted),
1672 "\xe0\xbf", true));
1673 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1674 ConvertUTFResultContainer(sourceExhausted),
1675 "\xe1\x80", true));
1676 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1677 ConvertUTFResultContainer(sourceExhausted),
1678 "\xec\xbf", true));
1679 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1680 ConvertUTFResultContainer(sourceExhausted),
1681 "\xed\x80", true));
1682 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1683 ConvertUTFResultContainer(sourceExhausted),
1684 "\xed\x9f", true));
1685 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1686 ConvertUTFResultContainer(sourceExhausted),
1687 "\xee\x80", true));
1688 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1689 ConvertUTFResultContainer(sourceExhausted),
1690 "\xef\xbf", true));
1691 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1692 ConvertUTFResultContainer(sourceExhausted),
1693 "\xf0\x90\x80", true));
1694 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1695 ConvertUTFResultContainer(sourceExhausted),
1696 "\xf0\xbf\xbf", true));
1697 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1698 ConvertUTFResultContainer(sourceExhausted),
1699 "\xf1\x80\x80", true));
1700 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1701 ConvertUTFResultContainer(sourceExhausted),
1702 "\xf3\xbf\xbf", true));
1703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1704 ConvertUTFResultContainer(sourceExhausted),
1705 "\xf4\x80\x80", true));
1706 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1707 ConvertUTFResultContainer(sourceExhausted),
1708 "\xf4\x8f\xbf", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001709
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001710 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1711 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1712 "\x41\xc2", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001713}
1714