blob: dd6e0df3688fd9dea6051f0758fd8cb3071cf248 [file] [log] [blame]
Reid Kleckner7df03c22013-07-16 17:14:33 +00001//===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Support/ConvertUTF.h"
Mehdi Aminib550cb12016-04-18 09:17:29 +000011#include "llvm/ADT/ArrayRef.h"
Reid Kleckner7df03c22013-07-16 17:14:33 +000012#include "gtest/gtest.h"
13#include <string>
Chandler Carruthd9903882015-01-14 11:23:27 +000014#include <vector>
Reid Kleckner7df03c22013-07-16 17:14:33 +000015
16using namespace llvm;
17
18TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
19 // Src is the look of disapproval.
20 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
21 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
22 std::string Result;
23 bool Success = convertUTF16ToUTF8String(Ref, Result);
24 EXPECT_TRUE(Success);
25 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
26 EXPECT_EQ(Expected, Result);
27}
28
29TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
30 // Src is the look of disapproval.
31 static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
32 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
33 std::string Result;
34 bool Success = convertUTF16ToUTF8String(Ref, Result);
35 EXPECT_TRUE(Success);
36 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
37 EXPECT_EQ(Expected, Result);
38}
39
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000040TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
41 // Src is the look of disapproval.
42 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
43 StringRef Ref(Src, sizeof(Src) - 1);
44 SmallVector<UTF16, 5> Result;
45 bool Success = convertUTF8ToUTF16String(Ref, Result);
46 EXPECT_TRUE(Success);
47 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
Eric Christopher7aebb322015-01-27 01:01:39 +000048 ASSERT_EQ(3u, Result.size());
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000049 for (int I = 0, E = 3; I != E; ++I)
50 EXPECT_EQ(Expected[I], Result[I]);
51}
52
Reid Kleckner7df03c22013-07-16 17:14:33 +000053TEST(ConvertUTFTest, OddLengthInput) {
54 std::string Result;
Craig Toppere1d12942014-08-27 05:25:25 +000055 bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000056 EXPECT_FALSE(Success);
57}
58
59TEST(ConvertUTFTest, Empty) {
60 std::string Result;
Marianne Mailhot-Sarrasin7423f402016-03-11 15:59:32 +000061 bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000062 EXPECT_TRUE(Success);
63 EXPECT_TRUE(Result.empty());
64}
65
66TEST(ConvertUTFTest, HasUTF16BOM) {
Craig Toppere1d12942014-08-27 05:25:25 +000067 bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000068 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000069 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000070 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000071 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
Reid Kleckner7df03c22013-07-16 17:14:33 +000072 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
Craig Toppere1d12942014-08-27 05:25:25 +000073 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
Reid Kleckner7df03c22013-07-16 17:14:33 +000074 EXPECT_TRUE(HasBOM);
75
Craig Toppere1d12942014-08-27 05:25:25 +000076 HasBOM = hasUTF16ByteOrderMark(None);
Reid Kleckner7df03c22013-07-16 17:14:33 +000077 EXPECT_FALSE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000078 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
Reid Kleckner7df03c22013-07-16 17:14:33 +000079 EXPECT_FALSE(HasBOM);
80}
Dmitri Gribenko1089db02014-06-16 11:09:46 +000081
Marianne Mailhot-Sarrasin7423f402016-03-11 15:59:32 +000082TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
83 // Src is the look of disapproval.
84 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
85 ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
86 std::string Result;
87 bool Success = convertUTF16ToUTF8String(SrcRef, Result);
88 EXPECT_TRUE(Success);
89 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
90 EXPECT_EQ(Expected, Result);
91}
92
93TEST(ConvertUTFTest, ConvertUTF8toWide) {
94 // Src is the look of disapproval.
95 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
96 std::wstring Result;
97 bool Success = ConvertUTF8toWide((const char*)Src, Result);
98 EXPECT_TRUE(Success);
99 std::wstring Expected(L"\x0ca0_\x0ca0");
100 EXPECT_EQ(Expected, Result);
101 Result.clear();
102 Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
103 EXPECT_TRUE(Success);
104 EXPECT_EQ(Expected, Result);
105}
106
107TEST(ConvertUTFTest, convertWideToUTF8) {
108 // Src is the look of disapproval.
109 static const wchar_t Src[] = L"\x0ca0_\x0ca0";
110 std::string Result;
111 bool Success = convertWideToUTF8(Src, Result);
112 EXPECT_TRUE(Success);
113 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
114 EXPECT_EQ(Expected, Result);
115}
116
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000117struct ConvertUTFResultContainer {
118 ConversionResult ErrorCode;
119 std::vector<unsigned> UnicodeScalars;
120
121 ConvertUTFResultContainer(ConversionResult ErrorCode)
122 : ErrorCode(ErrorCode) {}
123
124 ConvertUTFResultContainer
125 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
126 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
127 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
128 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
129 ConvertUTFResultContainer Result(*this);
130 if (US0 != 0x110000)
131 Result.UnicodeScalars.push_back(US0);
132 if (US1 != 0x110000)
133 Result.UnicodeScalars.push_back(US1);
134 if (US2 != 0x110000)
135 Result.UnicodeScalars.push_back(US2);
136 if (US3 != 0x110000)
137 Result.UnicodeScalars.push_back(US3);
138 if (US4 != 0x110000)
139 Result.UnicodeScalars.push_back(US4);
140 if (US5 != 0x110000)
141 Result.UnicodeScalars.push_back(US5);
142 if (US6 != 0x110000)
143 Result.UnicodeScalars.push_back(US6);
144 if (US7 != 0x110000)
145 Result.UnicodeScalars.push_back(US7);
146 return Result;
147 }
148};
149
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000150std::pair<ConversionResult, std::vector<unsigned>>
151ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
152 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
153
154 const UTF8 *SourceNext = SourceStart;
155 std::vector<UTF32> Decoded(S.size(), 0);
156 UTF32 *TargetStart = Decoded.data();
157
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000158 auto ErrorCode =
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000159 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
160 Decoded.data() + Decoded.size(), lenientConversion);
161
162 Decoded.resize(TargetStart - Decoded.data());
163
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000164 return std::make_pair(ErrorCode, Decoded);
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000165}
166
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000167std::pair<ConversionResult, std::vector<unsigned>>
168ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
169 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
170
171 const UTF8 *SourceNext = SourceStart;
172 std::vector<UTF32> Decoded(S.size(), 0);
173 UTF32 *TargetStart = Decoded.data();
174
175 auto ErrorCode = ConvertUTF8toUTF32Partial(
176 &SourceNext, SourceStart + S.size(), &TargetStart,
177 Decoded.data() + Decoded.size(), lenientConversion);
178
179 Decoded.resize(TargetStart - Decoded.data());
180
181 return std::make_pair(ErrorCode, Decoded);
182}
183
184::testing::AssertionResult
185CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
186 StringRef S, bool Partial = false) {
187 ConversionResult ErrorCode;
188 std::vector<unsigned> Decoded;
189 if (!Partial)
190 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
191 else
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000192 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
Dmitri Gribenkocbc7ae22015-01-10 05:03:29 +0000193
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000194 if (Expected.ErrorCode != ErrorCode)
195 return ::testing::AssertionFailure() << "Expected error code "
196 << Expected.ErrorCode << ", actual "
197 << ErrorCode;
198
199 if (Expected.UnicodeScalars != Decoded)
200 return ::testing::AssertionFailure()
201 << "Expected lenient decoded result:\n"
202 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
203 << "Actual result:\n" << ::testing::PrintToString(Decoded);
204
205 return ::testing::AssertionSuccess();
206}
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000207
208TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
209
210 //
211 // 1-byte sequences
212 //
213
214 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
216 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000217
218 //
219 // 2-byte sequences
220 //
221
222 // U+0283 LATIN SMALL LETTER ESH
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000223 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
224 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
225 "\xca\x83"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000226
227 // U+03BA GREEK SMALL LETTER KAPPA
228 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
229 // U+03C3 GREEK SMALL LETTER SIGMA
230 // U+03BC GREEK SMALL LETTER MU
231 // U+03B5 GREEK SMALL LETTER EPSILON
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000232 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
233 ConvertUTFResultContainer(conversionOK)
234 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
235 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000236
237 //
238 // 3-byte sequences
239 //
240
241 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
242 // U+6587 CJK UNIFIED IDEOGRAPH-6587
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000243 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
244 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
245 "\xe4\xbe\x8b\xe6\x96\x87"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000246
247 // U+D55C HANGUL SYLLABLE HAN
248 // U+AE00 HANGUL SYLLABLE GEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000249 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
250 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
251 "\xed\x95\x9c\xea\xb8\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000252
253 // U+1112 HANGUL CHOSEONG HIEUH
254 // U+1161 HANGUL JUNGSEONG A
255 // U+11AB HANGUL JONGSEONG NIEUN
256 // U+1100 HANGUL CHOSEONG KIYEOK
257 // U+1173 HANGUL JUNGSEONG EU
258 // U+11AF HANGUL JONGSEONG RIEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000259 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
260 ConvertUTFResultContainer(conversionOK)
261 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
262 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
263 "\xe1\x86\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000264
265 //
266 // 4-byte sequences
267 //
268
269 // U+E0100 VARIATION SELECTOR-17
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000270 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
271 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
272 "\xf3\xa0\x84\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000273
274 //
275 // First possible sequence of a certain length
276 //
277
278 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000279 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
280 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
281 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000282
283 // U+0080 PADDING CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000284 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
285 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
286 "\xc2\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000287
288 // U+0800 SAMARITAN LETTER ALAF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000289 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
290 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
291 "\xe0\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000292
293 // U+10000 LINEAR B SYLLABLE B008 A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000294 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
295 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
296 "\xf0\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000297
298 // U+200000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000299 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
300 ConvertUTFResultContainer(sourceIllegal)
301 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
302 "\xf8\x88\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000303
304 // U+4000000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000305 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
306 ConvertUTFResultContainer(sourceIllegal)
307 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
308 "\xfc\x84\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000309
310 //
311 // Last possible sequence of a certain length
312 //
313
314 // U+007F DELETE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000315 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
316 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000317
318 // U+07FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000319 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
320 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
321 "\xdf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000322
323 // U+FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000324 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
325 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
326 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000327
328 // U+1FFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000329 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
330 ConvertUTFResultContainer(sourceIllegal)
331 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
332 "\xf7\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000333
334 // U+3FFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000335 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
336 ConvertUTFResultContainer(sourceIllegal)
337 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
338 "\xfb\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000339
340 // U+7FFFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000341 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
342 ConvertUTFResultContainer(sourceIllegal)
343 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
344 "\xfd\xbf\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000345
346 //
347 // Other boundary conditions
348 //
349
350 // U+D7FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000351 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
352 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
353 "\xed\x9f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000354
355 // U+E000 (private use)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
357 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
358 "\xee\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000359
360 // U+FFFD REPLACEMENT CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000361 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
362 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
363 "\xef\xbf\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000364
365 // U+10FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000366 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
367 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
368 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000369
370 // U+110000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000371 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
372 ConvertUTFResultContainer(sourceIllegal)
373 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
374 "\xf4\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000375
376 //
377 // Unexpected continuation bytes
378 //
379
380 // A sequence of unexpected continuation bytes that don't follow a first
381 // byte, every byte is a maximal subpart.
382
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000383 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
384 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
385 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
386 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
387 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
388 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
389 "\x80\x80"));
390 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
391 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
392 "\x80\xbf"));
393 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
394 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
395 "\xbf\x80"));
396 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
397 ConvertUTFResultContainer(sourceIllegal)
398 .withScalars(0xfffd, 0xfffd, 0xfffd),
399 "\x80\xbf\x80"));
400 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
401 ConvertUTFResultContainer(sourceIllegal)
402 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
403 "\x80\xbf\x80\xbf"));
404 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
405 ConvertUTFResultContainer(sourceIllegal)
406 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
407 "\x80\xbf\x82\xbf\xaa"));
408 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
409 ConvertUTFResultContainer(sourceIllegal)
410 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
411 "\xaa\xb0\xbb\xbf\xaa\xa0"));
412 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
413 ConvertUTFResultContainer(sourceIllegal)
414 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
415 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000416
417 // All continuation bytes (0x80--0xbf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000418 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
419 ConvertUTFResultContainer(sourceIllegal)
420 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
421 0xfffd, 0xfffd, 0xfffd, 0xfffd)
422 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
423 0xfffd, 0xfffd, 0xfffd, 0xfffd)
424 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
425 0xfffd, 0xfffd, 0xfffd, 0xfffd)
426 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
427 0xfffd, 0xfffd, 0xfffd, 0xfffd)
428 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
429 0xfffd, 0xfffd, 0xfffd, 0xfffd)
430 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
431 0xfffd, 0xfffd, 0xfffd, 0xfffd)
432 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
433 0xfffd, 0xfffd, 0xfffd, 0xfffd)
434 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
435 0xfffd, 0xfffd, 0xfffd, 0xfffd),
436 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
437 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
438 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
439 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000440
441 //
442 // Lonely start bytes
443 //
444
445 // Start bytes of 2-byte sequences (0xc0--0xdf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000446 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
447 ConvertUTFResultContainer(sourceIllegal)
448 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
449 0xfffd, 0xfffd, 0xfffd, 0xfffd)
450 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
451 0xfffd, 0xfffd, 0xfffd, 0xfffd)
452 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
453 0xfffd, 0xfffd, 0xfffd, 0xfffd)
454 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
455 0xfffd, 0xfffd, 0xfffd, 0xfffd),
456 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
457 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000458
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000459 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
460 ConvertUTFResultContainer(sourceIllegal)
461 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
462 0xfffd, 0x0020, 0xfffd, 0x0020)
463 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
464 0xfffd, 0x0020, 0xfffd, 0x0020)
465 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466 0xfffd, 0x0020, 0xfffd, 0x0020)
467 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
468 0xfffd, 0x0020, 0xfffd, 0x0020)
469 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
470 0xfffd, 0x0020, 0xfffd, 0x0020)
471 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
472 0xfffd, 0x0020, 0xfffd, 0x0020)
473 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
474 0xfffd, 0x0020, 0xfffd, 0x0020)
475 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
476 0xfffd, 0x0020, 0xfffd, 0x0020),
477 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
478 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
479 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
480 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000481
482 // Start bytes of 3-byte sequences (0xe0--0xef).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000483 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
484 ConvertUTFResultContainer(sourceIllegal)
485 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
486 0xfffd, 0xfffd, 0xfffd, 0xfffd)
487 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
488 0xfffd, 0xfffd, 0xfffd, 0xfffd),
489 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000490
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000491 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
492 ConvertUTFResultContainer(sourceIllegal)
493 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
494 0xfffd, 0x0020, 0xfffd, 0x0020)
495 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
496 0xfffd, 0x0020, 0xfffd, 0x0020)
497 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
498 0xfffd, 0x0020, 0xfffd, 0x0020)
499 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
500 0xfffd, 0x0020, 0xfffd, 0x0020),
501 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
502 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000503
504 // Start bytes of 4-byte sequences (0xf0--0xf7).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000505 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
506 ConvertUTFResultContainer(sourceIllegal)
507 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
508 0xfffd, 0xfffd, 0xfffd, 0xfffd),
509 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000510
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
512 ConvertUTFResultContainer(sourceIllegal)
513 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
514 0xfffd, 0x0020, 0xfffd, 0x0020)
515 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
516 0xfffd, 0x0020, 0xfffd, 0x0020),
517 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000518
519 // Start bytes of 5-byte sequences (0xf8--0xfb).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000520 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
521 ConvertUTFResultContainer(sourceIllegal)
522 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
523 "\xf8\xf9\xfa\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000524
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000525 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
526 ConvertUTFResultContainer(sourceIllegal)
527 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
528 0xfffd, 0x0020, 0xfffd, 0x0020),
529 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000530
531 // Start bytes of 6-byte sequences (0xfc--0xfd).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000532 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
533 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
534 "\xfc\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000535
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000536 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
537 ConvertUTFResultContainer(sourceIllegal)
538 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
539 "\xfc\x20\xfd\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000540
541 //
542 // Other bytes (0xc0--0xc1, 0xfe--0xff).
543 //
544
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000545 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
546 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
547 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
548 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
549 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
550 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
551 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
552 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000553
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000554 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
555 ConvertUTFResultContainer(sourceIllegal)
556 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
557 "\xc0\xc1\xfe\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000558
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000559 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
560 ConvertUTFResultContainer(sourceIllegal)
561 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
562 "\xfe\xfe\xff\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000563
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000564 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
565 ConvertUTFResultContainer(sourceIllegal)
566 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
567 "\xfe\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000568
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000569 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
570 ConvertUTFResultContainer(sourceIllegal)
571 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
572 "\xff\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000573
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000574 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
575 ConvertUTFResultContainer(sourceIllegal)
576 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
577 0xfffd, 0x0020, 0xfffd, 0x0020),
578 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000579
580 //
581 // Sequences with one continuation byte missing
582 //
583
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000584 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
585 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
586 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
587 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
588 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
589 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
590 "\xe0\xa0"));
591 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
592 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
593 "\xe0\xbf"));
594 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
595 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
596 "\xe1\x80"));
597 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
598 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
599 "\xec\xbf"));
600 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
601 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
602 "\xed\x80"));
603 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
604 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
605 "\xed\x9f"));
606 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
607 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
608 "\xee\x80"));
609 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
610 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
611 "\xef\xbf"));
612 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
613 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
614 "\xf0\x90\x80"));
615 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
616 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
617 "\xf0\xbf\xbf"));
618 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
619 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
620 "\xf1\x80\x80"));
621 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
622 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
623 "\xf3\xbf\xbf"));
624 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
625 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
626 "\xf4\x80\x80"));
627 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
628 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
629 "\xf4\x8f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000630
631 // Overlong sequences with one trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000632 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
633 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
634 "\xc0"));
635 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
636 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
637 "\xc1"));
638 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
639 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
640 "\xe0\x80"));
641 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
642 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
643 "\xe0\x9f"));
644 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
645 ConvertUTFResultContainer(sourceIllegal)
646 .withScalars(0xfffd, 0xfffd, 0xfffd),
647 "\xf0\x80\x80"));
648 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
649 ConvertUTFResultContainer(sourceIllegal)
650 .withScalars(0xfffd, 0xfffd, 0xfffd),
651 "\xf0\x8f\x80"));
652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
653 ConvertUTFResultContainer(sourceIllegal)
654 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
655 "\xf8\x80\x80\x80"));
656 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
657 ConvertUTFResultContainer(sourceIllegal)
658 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
659 "\xfc\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000660
661 // Sequences that represent surrogates with one trailing byte missing.
662 // High surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000663 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
664 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
665 "\xed\xa0"));
666 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
667 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
668 "\xed\xac"));
669 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
670 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
671 "\xed\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000672 // Low surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000673 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
674 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
675 "\xed\xb0"));
676 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
677 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
678 "\xed\xb4"));
679 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
680 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
681 "\xed\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000682
683 // Ill-formed 4-byte sequences.
684 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
685 // U+1100xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000686 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
687 ConvertUTFResultContainer(sourceIllegal)
688 .withScalars(0xfffd, 0xfffd, 0xfffd),
689 "\xf4\x90\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000690 // U+13FBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000691 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
692 ConvertUTFResultContainer(sourceIllegal)
693 .withScalars(0xfffd, 0xfffd, 0xfffd),
694 "\xf4\xbf\xbf"));
695 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
696 ConvertUTFResultContainer(sourceIllegal)
697 .withScalars(0xfffd, 0xfffd, 0xfffd),
698 "\xf5\x80\x80"));
699 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
700 ConvertUTFResultContainer(sourceIllegal)
701 .withScalars(0xfffd, 0xfffd, 0xfffd),
702 "\xf6\x80\x80"));
703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
704 ConvertUTFResultContainer(sourceIllegal)
705 .withScalars(0xfffd, 0xfffd, 0xfffd),
706 "\xf7\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000707 // U+1FFBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000708 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
709 ConvertUTFResultContainer(sourceIllegal)
710 .withScalars(0xfffd, 0xfffd, 0xfffd),
711 "\xf7\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000712
713 // Ill-formed 5-byte sequences.
714 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
715 // U+2000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000716 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
717 ConvertUTFResultContainer(sourceIllegal)
718 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
719 "\xf8\x88\x80\x80"));
720 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
721 ConvertUTFResultContainer(sourceIllegal)
722 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
723 "\xf8\xbf\xbf\xbf"));
724 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
725 ConvertUTFResultContainer(sourceIllegal)
726 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
727 "\xf9\x80\x80\x80"));
728 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
729 ConvertUTFResultContainer(sourceIllegal)
730 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
731 "\xfa\x80\x80\x80"));
732 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
733 ConvertUTFResultContainer(sourceIllegal)
734 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
735 "\xfb\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000736 // U+3FFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000737 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
738 ConvertUTFResultContainer(sourceIllegal)
739 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
740 "\xfb\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000741
742 // Ill-formed 6-byte sequences.
743 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
744 // U+40000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000745 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
746 ConvertUTFResultContainer(sourceIllegal)
747 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
748 "\xfc\x84\x80\x80\x80"));
749 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
750 ConvertUTFResultContainer(sourceIllegal)
751 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
752 "\xfc\xbf\xbf\xbf\xbf"));
753 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
754 ConvertUTFResultContainer(sourceIllegal)
755 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
756 "\xfd\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000757 // U+7FFFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000758 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
759 ConvertUTFResultContainer(sourceIllegal)
760 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
761 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000762
763 //
764 // Sequences with two continuation bytes missing
765 //
766
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000767 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
768 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
769 "\xf0\x90"));
770 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
771 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
772 "\xf0\xbf"));
773 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
774 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
775 "\xf1\x80"));
776 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
777 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
778 "\xf3\xbf"));
779 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
780 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
781 "\xf4\x80"));
782 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
783 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
784 "\xf4\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000785
786 // Overlong sequences with two trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000787 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
788 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
789 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
790 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
791 "\xf0\x80"));
792 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
793 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
794 "\xf0\x8f"));
795 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
796 ConvertUTFResultContainer(sourceIllegal)
797 .withScalars(0xfffd, 0xfffd, 0xfffd),
798 "\xf8\x80\x80"));
799 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
800 ConvertUTFResultContainer(sourceIllegal)
801 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
802 "\xfc\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000803
804 // Sequences that represent surrogates with two trailing bytes missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000805 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
806 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000807
808 // Ill-formed 4-byte sequences.
809 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
810 // U+110yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000811 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
812 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
813 "\xf4\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000814 // U+13Fyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000815 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
816 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
817 "\xf4\xbf"));
818 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
819 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
820 "\xf5\x80"));
821 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
822 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
823 "\xf6\x80"));
824 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
825 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
826 "\xf7\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000827 // U+1FFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000828 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
829 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
830 "\xf7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000831
832 // Ill-formed 5-byte sequences.
833 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
834 // U+200yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000835 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
836 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
837 "\xf8\x88\x80"));
838 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
839 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
840 "\xf8\xbf\xbf"));
841 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
842 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
843 "\xf9\x80\x80"));
844 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
845 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
846 "\xfa\x80\x80"));
847 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
848 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
849 "\xfb\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000850 // U+3FFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000851 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
852 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
853 "\xfb\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000854
855 // Ill-formed 6-byte sequences.
856 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
857 // U+4000yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000858 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
859 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
860 "\xfc\x84\x80\x80"));
861 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
862 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
863 "\xfc\xbf\xbf\xbf"));
864 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
865 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
866 "\xfd\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000867 // U+7FFFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000868 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
869 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
870 "\xfd\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000871
872 //
873 // Sequences with three continuation bytes missing
874 //
875
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000876 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
877 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
878 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
879 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
880 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
881 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
882 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
883 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
884 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
885 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000886
887 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000888 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
889 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
890 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
891 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
892 "\xf8\x80"));
893 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
894 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
895 "\xfc\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000896
897 // Ill-formed 4-byte sequences.
898 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
899 // U+14yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000900 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
901 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
902 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
903 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000904 // U+1Cyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000905 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
906 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000907
908 // Ill-formed 5-byte sequences.
909 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
910 // U+20yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000911 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
912 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
913 "\xf8\x88"));
914 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
915 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
916 "\xf8\xbf"));
917 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
918 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
919 "\xf9\x80"));
920 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
921 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
922 "\xfa\x80"));
923 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
924 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
925 "\xfb\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000926 // U+3FCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000927 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
928 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
929 "\xfb\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000930
931 // Ill-formed 6-byte sequences.
932 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
933 // U+400yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000934 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
935 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
936 "\xfc\x84\x80"));
937 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
938 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
939 "\xfc\xbf\xbf"));
940 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
941 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
942 "\xfd\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000943 // U+7FFCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000944 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
945 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
946 "\xfd\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000947
948 //
949 // Sequences with four continuation bytes missing
950 //
951
952 // Ill-formed 5-byte sequences.
953 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
954 // U+uzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000955 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
956 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
957 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
958 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
959 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
960 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
961 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
962 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000963 // U+3zyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000964 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
965 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000966
967 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000968 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
969 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
970 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
971 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
972 "\xfc\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000973
974 // Ill-formed 6-byte sequences.
975 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
976 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000977 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
978 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
979 "\xfc\x84"));
980 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
981 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
982 "\xfc\xbf"));
983 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
984 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
985 "\xfd\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000986 // U+7Fzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000987 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
988 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
989 "\xfd\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000990
991 //
992 // Sequences with five continuation bytes missing
993 //
994
995 // Ill-formed 6-byte sequences.
996 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
997 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000998 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
999 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001000 // U+uuzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001001 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1002 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001003
1004 //
1005 // Consecutive sequences with trailing bytes missing
1006 //
1007
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001008 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1009 ConvertUTFResultContainer(sourceIllegal)
1010 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1011 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1012 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1013 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1014 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1015 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1016 "\xc0" "\xe0\x80" "\xf0\x80\x80"
1017 "\xf8\x80\x80\x80"
1018 "\xfc\x80\x80\x80\x80"
1019 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1020 "\xfb\xbf\xbf\xbf"
1021 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001022
1023 //
1024 // Overlong UTF-8 sequences
1025 //
1026
1027 // U+002F SOLIDUS
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001028 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1029 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001030
1031 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001032 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1033 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1034 "\xc0\xaf"));
1035 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1036 ConvertUTFResultContainer(sourceIllegal)
1037 .withScalars(0xfffd, 0xfffd, 0xfffd),
1038 "\xe0\x80\xaf"));
1039 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1040 ConvertUTFResultContainer(sourceIllegal)
1041 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1042 "\xf0\x80\x80\xaf"));
1043 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1044 ConvertUTFResultContainer(sourceIllegal)
1045 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1046 "\xf8\x80\x80\x80\xaf"));
1047 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1048 ConvertUTFResultContainer(sourceIllegal)
1049 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1050 "\xfc\x80\x80\x80\x80\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001051
1052 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001053 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1054 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1055 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001056
1057 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001058 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1059 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1060 "\xc0\x80"));
1061 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1062 ConvertUTFResultContainer(sourceIllegal)
1063 .withScalars(0xfffd, 0xfffd, 0xfffd),
1064 "\xe0\x80\x80"));
1065 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1066 ConvertUTFResultContainer(sourceIllegal)
1067 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1068 "\xf0\x80\x80\x80"));
1069 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1070 ConvertUTFResultContainer(sourceIllegal)
1071 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1072 "\xf8\x80\x80\x80\x80"));
1073 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1074 ConvertUTFResultContainer(sourceIllegal)
1075 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1076 "\xfc\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001077
1078 // Other overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001079 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1080 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1081 "\xc0\xbf"));
1082 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1083 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1084 "\xc1\x80"));
1085 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1086 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1087 "\xc1\xbf"));
1088 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1089 ConvertUTFResultContainer(sourceIllegal)
1090 .withScalars(0xfffd, 0xfffd, 0xfffd),
1091 "\xe0\x9f\xbf"));
1092 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1093 ConvertUTFResultContainer(sourceIllegal)
1094 .withScalars(0xfffd, 0xfffd, 0xfffd),
1095 "\xed\xa0\x80"));
1096 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1097 ConvertUTFResultContainer(sourceIllegal)
1098 .withScalars(0xfffd, 0xfffd, 0xfffd),
1099 "\xed\xbf\xbf"));
1100 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1101 ConvertUTFResultContainer(sourceIllegal)
1102 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1103 "\xf0\x8f\x80\x80"));
1104 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1105 ConvertUTFResultContainer(sourceIllegal)
1106 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1107 "\xf0\x8f\xbf\xbf"));
1108 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1109 ConvertUTFResultContainer(sourceIllegal)
1110 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1111 "\xf8\x87\xbf\xbf\xbf"));
1112 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1113 ConvertUTFResultContainer(sourceIllegal)
1114 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1115 "\xfc\x83\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001116
1117 //
1118 // Isolated surrogates
1119 //
1120
1121 // Unicode 6.3.0:
1122 //
1123 // D71. High-surrogate code point: A Unicode code point in the range
1124 // U+D800 to U+DBFF.
1125 //
1126 // D73. Low-surrogate code point: A Unicode code point in the range
1127 // U+DC00 to U+DFFF.
1128
1129 // Note: U+E0100 is <DB40 DD00> in UTF16.
1130
1131 // High surrogates
1132
1133 // U+D800
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001134 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1135 ConvertUTFResultContainer(sourceIllegal)
1136 .withScalars(0xfffd, 0xfffd, 0xfffd),
1137 "\xed\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001138
1139 // U+DB40
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001140 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1141 ConvertUTFResultContainer(sourceIllegal)
1142 .withScalars(0xfffd, 0xfffd, 0xfffd),
1143 "\xed\xac\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001144
1145 // U+DBFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001146 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1147 ConvertUTFResultContainer(sourceIllegal)
1148 .withScalars(0xfffd, 0xfffd, 0xfffd),
1149 "\xed\xaf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001150
1151 // Low surrogates
1152
1153 // U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001154 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1155 ConvertUTFResultContainer(sourceIllegal)
1156 .withScalars(0xfffd, 0xfffd, 0xfffd),
1157 "\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001158
1159 // U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001160 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1161 ConvertUTFResultContainer(sourceIllegal)
1162 .withScalars(0xfffd, 0xfffd, 0xfffd),
1163 "\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001164
1165 // U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001166 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1167 ConvertUTFResultContainer(sourceIllegal)
1168 .withScalars(0xfffd, 0xfffd, 0xfffd),
1169 "\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001170
1171 // Surrogate pairs
1172
1173 // U+D800 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001174 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1175 ConvertUTFResultContainer(sourceIllegal)
1176 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1177 "\xed\xa0\x80\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001178
1179 // U+D800 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001180 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1181 ConvertUTFResultContainer(sourceIllegal)
1182 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1183 "\xed\xa0\x80\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001184
1185 // U+D800 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001186 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1187 ConvertUTFResultContainer(sourceIllegal)
1188 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1189 "\xed\xa0\x80\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001190
1191 // U+DB40 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001192 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1193 ConvertUTFResultContainer(sourceIllegal)
1194 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1195 "\xed\xac\xa0\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001196
1197 // U+DB40 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001198 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1199 ConvertUTFResultContainer(sourceIllegal)
1200 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1201 "\xed\xac\xa0\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001202
1203 // U+DB40 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001204 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1205 ConvertUTFResultContainer(sourceIllegal)
1206 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1207 "\xed\xac\xa0\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001208
1209 // U+DBFF U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001210 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1211 ConvertUTFResultContainer(sourceIllegal)
1212 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1213 "\xed\xaf\xbf\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001214
1215 // U+DBFF U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001216 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1217 ConvertUTFResultContainer(sourceIllegal)
1218 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1219 "\xed\xaf\xbf\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001220
1221 // U+DBFF U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001222 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1223 ConvertUTFResultContainer(sourceIllegal)
1224 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1225 "\xed\xaf\xbf\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001226
1227 //
1228 // Noncharacters
1229 //
1230
1231 // Unicode 6.3.0:
1232 //
1233 // D14. Noncharacter: A code point that is permanently reserved for
1234 // internal use and that should never be interchanged. Noncharacters
1235 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1236 // and the values U+FDD0..U+FDEF.
1237
1238 // U+FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001239 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1240 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1241 "\xef\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001242
1243 // U+FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001244 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1245 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1246 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001247
1248 // U+1FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001249 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1250 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1251 "\xf0\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001252
1253 // U+1FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001254 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1255 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1256 "\xf0\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001257
1258 // U+2FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001259 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1260 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1261 "\xf0\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001262
1263 // U+2FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001264 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1265 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1266 "\xf0\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001267
1268 // U+3FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001269 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1270 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1271 "\xf0\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001272
1273 // U+3FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001274 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1275 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1276 "\xf0\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001277
1278 // U+4FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001279 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1280 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1281 "\xf1\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001282
1283 // U+4FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001284 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1285 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1286 "\xf1\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001287
1288 // U+5FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001289 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1290 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1291 "\xf1\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001292
1293 // U+5FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001294 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1295 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1296 "\xf1\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001297
1298 // U+6FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001299 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1300 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1301 "\xf1\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001302
1303 // U+6FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001304 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1305 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1306 "\xf1\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001307
1308 // U+7FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001309 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1310 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1311 "\xf1\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001312
1313 // U+7FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001314 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1315 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1316 "\xf1\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001317
1318 // U+8FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001319 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1320 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1321 "\xf2\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001322
1323 // U+8FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001324 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1325 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1326 "\xf2\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001327
1328 // U+9FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001329 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1330 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1331 "\xf2\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001332
1333 // U+9FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001334 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1335 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1336 "\xf2\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001337
1338 // U+AFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001339 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1340 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1341 "\xf2\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001342
1343 // U+AFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001344 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1345 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1346 "\xf2\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001347
1348 // U+BFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001349 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1350 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1351 "\xf2\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001352
1353 // U+BFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001354 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1355 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1356 "\xf2\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001357
1358 // U+CFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001359 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1360 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1361 "\xf3\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001362
1363 // U+CFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001364 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1365 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1366 "\xf3\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001367
1368 // U+DFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001369 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1370 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1371 "\xf3\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001372
1373 // U+DFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001374 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1375 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1376 "\xf3\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001377
1378 // U+EFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001379 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1380 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1381 "\xf3\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001382
1383 // U+EFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001384 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1385 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1386 "\xf3\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001387
1388 // U+FFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001389 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1390 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1391 "\xf3\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001392
1393 // U+FFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001394 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1395 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1396 "\xf3\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001397
1398 // U+10FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001399 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1400 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1401 "\xf4\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001402
1403 // U+10FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001404 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1405 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1406 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001407
1408 // U+FDD0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001409 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1410 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1411 "\xef\xb7\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001412
1413 // U+FDD1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001414 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1415 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1416 "\xef\xb7\x91"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001417
1418 // U+FDD2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001419 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1420 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1421 "\xef\xb7\x92"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001422
1423 // U+FDD3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001424 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1425 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1426 "\xef\xb7\x93"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001427
1428 // U+FDD4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001429 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1430 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1431 "\xef\xb7\x94"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001432
1433 // U+FDD5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001434 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1435 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1436 "\xef\xb7\x95"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001437
1438 // U+FDD6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001439 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1440 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1441 "\xef\xb7\x96"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001442
1443 // U+FDD7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001444 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1445 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1446 "\xef\xb7\x97"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001447
1448 // U+FDD8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001449 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1450 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1451 "\xef\xb7\x98"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001452
1453 // U+FDD9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001454 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1455 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1456 "\xef\xb7\x99"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001457
1458 // U+FDDA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001459 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1460 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1461 "\xef\xb7\x9a"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001462
1463 // U+FDDB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001464 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1465 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1466 "\xef\xb7\x9b"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001467
1468 // U+FDDC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001469 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1470 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1471 "\xef\xb7\x9c"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001472
1473 // U+FDDD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001474 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1475 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1476 "\xef\xb7\x9d"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001477
1478 // U+FDDE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001479 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1480 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1481 "\xef\xb7\x9e"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001482
1483 // U+FDDF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001484 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1485 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1486 "\xef\xb7\x9f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001487
1488 // U+FDE0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001489 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1490 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1491 "\xef\xb7\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001492
1493 // U+FDE1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001494 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1495 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1496 "\xef\xb7\xa1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001497
1498 // U+FDE2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001499 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1500 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1501 "\xef\xb7\xa2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001502
1503 // U+FDE3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001504 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1505 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1506 "\xef\xb7\xa3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001507
1508 // U+FDE4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001509 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1510 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1511 "\xef\xb7\xa4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001512
1513 // U+FDE5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001514 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1515 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1516 "\xef\xb7\xa5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001517
1518 // U+FDE6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001519 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1520 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1521 "\xef\xb7\xa6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001522
1523 // U+FDE7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001524 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1525 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1526 "\xef\xb7\xa7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001527
1528 // U+FDE8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001529 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1530 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1531 "\xef\xb7\xa8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001532
1533 // U+FDE9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001534 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1535 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1536 "\xef\xb7\xa9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001537
1538 // U+FDEA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001539 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1540 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1541 "\xef\xb7\xaa"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001542
1543 // U+FDEB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001544 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1545 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1546 "\xef\xb7\xab"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001547
1548 // U+FDEC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001549 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1550 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1551 "\xef\xb7\xac"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001552
1553 // U+FDED
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001554 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1555 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1556 "\xef\xb7\xad"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001557
1558 // U+FDEE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001559 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1560 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1561 "\xef\xb7\xae"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001562
1563 // U+FDEF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001564 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1565 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1566 "\xef\xb7\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001567
1568 // U+FDF0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001569 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1570 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1571 "\xef\xb7\xb0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001572
1573 // U+FDF1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001574 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1575 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1576 "\xef\xb7\xb1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001577
1578 // U+FDF2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001579 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1580 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1581 "\xef\xb7\xb2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001582
1583 // U+FDF3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001584 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1585 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1586 "\xef\xb7\xb3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001587
1588 // U+FDF4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001589 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1590 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1591 "\xef\xb7\xb4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001592
1593 // U+FDF5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001594 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1595 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1596 "\xef\xb7\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001597
1598 // U+FDF6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001599 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1600 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1601 "\xef\xb7\xb6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001602
1603 // U+FDF7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001604 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1605 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1606 "\xef\xb7\xb7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001607
1608 // U+FDF8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001609 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1610 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1611 "\xef\xb7\xb8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001612
1613 // U+FDF9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1615 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1616 "\xef\xb7\xb9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001617
1618 // U+FDFA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001619 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1620 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1621 "\xef\xb7\xba"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001622
1623 // U+FDFB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001624 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1625 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1626 "\xef\xb7\xbb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001627
1628 // U+FDFC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1630 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1631 "\xef\xb7\xbc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001632
1633 // U+FDFD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001634 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1635 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1636 "\xef\xb7\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001637
1638 // U+FDFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001639 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1640 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1641 "\xef\xb7\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001642
1643 // U+FDFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001644 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1645 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1646 "\xef\xb7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001647}
1648
1649TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1650 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001651 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1652 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1653 "\x41", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001654
1655 //
1656 // Sequences with one continuation byte missing
1657 //
1658
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001659 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1660 ConvertUTFResultContainer(sourceExhausted),
1661 "\xc2", true));
1662 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1663 ConvertUTFResultContainer(sourceExhausted),
1664 "\xdf", true));
1665 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1666 ConvertUTFResultContainer(sourceExhausted),
1667 "\xe0\xa0", true));
1668 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1669 ConvertUTFResultContainer(sourceExhausted),
1670 "\xe0\xbf", true));
1671 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1672 ConvertUTFResultContainer(sourceExhausted),
1673 "\xe1\x80", true));
1674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1675 ConvertUTFResultContainer(sourceExhausted),
1676 "\xec\xbf", true));
1677 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1678 ConvertUTFResultContainer(sourceExhausted),
1679 "\xed\x80", true));
1680 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1681 ConvertUTFResultContainer(sourceExhausted),
1682 "\xed\x9f", true));
1683 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1684 ConvertUTFResultContainer(sourceExhausted),
1685 "\xee\x80", true));
1686 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1687 ConvertUTFResultContainer(sourceExhausted),
1688 "\xef\xbf", true));
1689 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1690 ConvertUTFResultContainer(sourceExhausted),
1691 "\xf0\x90\x80", true));
1692 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1693 ConvertUTFResultContainer(sourceExhausted),
1694 "\xf0\xbf\xbf", true));
1695 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1696 ConvertUTFResultContainer(sourceExhausted),
1697 "\xf1\x80\x80", true));
1698 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1699 ConvertUTFResultContainer(sourceExhausted),
1700 "\xf3\xbf\xbf", true));
1701 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1702 ConvertUTFResultContainer(sourceExhausted),
1703 "\xf4\x80\x80", true));
1704 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1705 ConvertUTFResultContainer(sourceExhausted),
1706 "\xf4\x8f\xbf", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001707
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001708 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1709 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1710 "\x41\xc2", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001711}
1712