Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 1 | //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "llvm/Support/ConvertUTF.h" |
Reid Kleckner | d8cb6b0 | 2015-01-26 19:51:00 +0000 | [diff] [blame] | 11 | #include "llvm/Support/Format.h" |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 12 | #include "gtest/gtest.h" |
| 13 | #include <string> |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 14 | #include <utility> |
Chandler Carruth | d990388 | 2015-01-14 11:23:27 +0000 | [diff] [blame] | 15 | #include <vector> |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 16 | |
| 17 | using namespace llvm; |
| 18 | |
| 19 | TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { |
| 20 | // Src is the look of disapproval. |
| 21 | static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; |
| 22 | ArrayRef<char> Ref(Src, sizeof(Src) - 1); |
| 23 | std::string Result; |
| 24 | bool Success = convertUTF16ToUTF8String(Ref, Result); |
| 25 | EXPECT_TRUE(Success); |
| 26 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 27 | EXPECT_EQ(Expected, Result); |
| 28 | } |
| 29 | |
| 30 | TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) { |
| 31 | // Src is the look of disapproval. |
| 32 | static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; |
| 33 | ArrayRef<char> Ref(Src, sizeof(Src) - 1); |
| 34 | std::string Result; |
| 35 | bool Success = convertUTF16ToUTF8String(Ref, Result); |
| 36 | EXPECT_TRUE(Success); |
| 37 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 38 | EXPECT_EQ(Expected, Result); |
| 39 | } |
| 40 | |
Reid Kleckner | d8cb6b0 | 2015-01-26 19:51:00 +0000 | [diff] [blame] | 41 | TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) { |
| 42 | // Src is the look of disapproval. |
| 43 | static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0"; |
| 44 | StringRef Ref(Src, sizeof(Src) - 1); |
| 45 | SmallVector<UTF16, 5> Result; |
| 46 | bool Success = convertUTF8ToUTF16String(Ref, Result); |
| 47 | EXPECT_TRUE(Success); |
| 48 | static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0}; |
Eric Christopher | 7aebb32 | 2015-01-27 01:01:39 +0000 | [diff] [blame] | 49 | ASSERT_EQ(3u, Result.size()); |
Reid Kleckner | d8cb6b0 | 2015-01-26 19:51:00 +0000 | [diff] [blame] | 50 | for (int I = 0, E = 3; I != E; ++I) |
| 51 | EXPECT_EQ(Expected[I], Result[I]); |
| 52 | } |
| 53 | |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 54 | TEST(ConvertUTFTest, OddLengthInput) { |
| 55 | std::string Result; |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 56 | bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 57 | EXPECT_FALSE(Success); |
| 58 | } |
| 59 | |
| 60 | TEST(ConvertUTFTest, Empty) { |
| 61 | std::string Result; |
Marianne Mailhot-Sarrasin | 7423f40 | 2016-03-11 15:59:32 +0000 | [diff] [blame^] | 62 | bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 63 | EXPECT_TRUE(Success); |
| 64 | EXPECT_TRUE(Result.empty()); |
| 65 | } |
| 66 | |
| 67 | TEST(ConvertUTFTest, HasUTF16BOM) { |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 68 | bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 69 | EXPECT_TRUE(HasBOM); |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 70 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 71 | EXPECT_TRUE(HasBOM); |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 72 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 73 | EXPECT_TRUE(HasBOM); // Don't care about odd lengths. |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 74 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 75 | EXPECT_TRUE(HasBOM); |
| 76 | |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 77 | HasBOM = hasUTF16ByteOrderMark(None); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 78 | EXPECT_FALSE(HasBOM); |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 79 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 80 | EXPECT_FALSE(HasBOM); |
| 81 | } |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 82 | |
Marianne Mailhot-Sarrasin | 7423f40 | 2016-03-11 15:59:32 +0000 | [diff] [blame^] | 83 | TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) { |
| 84 | // Src is the look of disapproval. |
| 85 | static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; |
| 86 | ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4); |
| 87 | std::string Result; |
| 88 | bool Success = convertUTF16ToUTF8String(SrcRef, Result); |
| 89 | EXPECT_TRUE(Success); |
| 90 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 91 | EXPECT_EQ(Expected, Result); |
| 92 | } |
| 93 | |
| 94 | TEST(ConvertUTFTest, ConvertUTF8toWide) { |
| 95 | // Src is the look of disapproval. |
| 96 | static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0"; |
| 97 | std::wstring Result; |
| 98 | bool Success = ConvertUTF8toWide((const char*)Src, Result); |
| 99 | EXPECT_TRUE(Success); |
| 100 | std::wstring Expected(L"\x0ca0_\x0ca0"); |
| 101 | EXPECT_EQ(Expected, Result); |
| 102 | Result.clear(); |
| 103 | Success = ConvertUTF8toWide(StringRef(Src, 7), Result); |
| 104 | EXPECT_TRUE(Success); |
| 105 | EXPECT_EQ(Expected, Result); |
| 106 | } |
| 107 | |
| 108 | TEST(ConvertUTFTest, convertWideToUTF8) { |
| 109 | // Src is the look of disapproval. |
| 110 | static const wchar_t Src[] = L"\x0ca0_\x0ca0"; |
| 111 | std::string Result; |
| 112 | bool Success = convertWideToUTF8(Src, Result); |
| 113 | EXPECT_TRUE(Success); |
| 114 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 115 | EXPECT_EQ(Expected, Result); |
| 116 | } |
| 117 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 118 | struct ConvertUTFResultContainer { |
| 119 | ConversionResult ErrorCode; |
| 120 | std::vector<unsigned> UnicodeScalars; |
| 121 | |
| 122 | ConvertUTFResultContainer(ConversionResult ErrorCode) |
| 123 | : ErrorCode(ErrorCode) {} |
| 124 | |
| 125 | ConvertUTFResultContainer |
| 126 | withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000, |
| 127 | unsigned US2 = 0x110000, unsigned US3 = 0x110000, |
| 128 | unsigned US4 = 0x110000, unsigned US5 = 0x110000, |
| 129 | unsigned US6 = 0x110000, unsigned US7 = 0x110000) { |
| 130 | ConvertUTFResultContainer Result(*this); |
| 131 | if (US0 != 0x110000) |
| 132 | Result.UnicodeScalars.push_back(US0); |
| 133 | if (US1 != 0x110000) |
| 134 | Result.UnicodeScalars.push_back(US1); |
| 135 | if (US2 != 0x110000) |
| 136 | Result.UnicodeScalars.push_back(US2); |
| 137 | if (US3 != 0x110000) |
| 138 | Result.UnicodeScalars.push_back(US3); |
| 139 | if (US4 != 0x110000) |
| 140 | Result.UnicodeScalars.push_back(US4); |
| 141 | if (US5 != 0x110000) |
| 142 | Result.UnicodeScalars.push_back(US5); |
| 143 | if (US6 != 0x110000) |
| 144 | Result.UnicodeScalars.push_back(US6); |
| 145 | if (US7 != 0x110000) |
| 146 | Result.UnicodeScalars.push_back(US7); |
| 147 | return Result; |
| 148 | } |
| 149 | }; |
| 150 | |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 151 | std::pair<ConversionResult, std::vector<unsigned>> |
| 152 | ConvertUTF8ToUnicodeScalarsLenient(StringRef S) { |
| 153 | const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data()); |
| 154 | |
| 155 | const UTF8 *SourceNext = SourceStart; |
| 156 | std::vector<UTF32> Decoded(S.size(), 0); |
| 157 | UTF32 *TargetStart = Decoded.data(); |
| 158 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 159 | auto ErrorCode = |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 160 | ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart, |
| 161 | Decoded.data() + Decoded.size(), lenientConversion); |
| 162 | |
| 163 | Decoded.resize(TargetStart - Decoded.data()); |
| 164 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 165 | return std::make_pair(ErrorCode, Decoded); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 166 | } |
| 167 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 168 | std::pair<ConversionResult, std::vector<unsigned>> |
| 169 | ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) { |
| 170 | const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data()); |
| 171 | |
| 172 | const UTF8 *SourceNext = SourceStart; |
| 173 | std::vector<UTF32> Decoded(S.size(), 0); |
| 174 | UTF32 *TargetStart = Decoded.data(); |
| 175 | |
| 176 | auto ErrorCode = ConvertUTF8toUTF32Partial( |
| 177 | &SourceNext, SourceStart + S.size(), &TargetStart, |
| 178 | Decoded.data() + Decoded.size(), lenientConversion); |
| 179 | |
| 180 | Decoded.resize(TargetStart - Decoded.data()); |
| 181 | |
| 182 | return std::make_pair(ErrorCode, Decoded); |
| 183 | } |
| 184 | |
| 185 | ::testing::AssertionResult |
| 186 | CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected, |
| 187 | StringRef S, bool Partial = false) { |
| 188 | ConversionResult ErrorCode; |
| 189 | std::vector<unsigned> Decoded; |
| 190 | if (!Partial) |
| 191 | std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S); |
| 192 | else |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 193 | std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S); |
Dmitri Gribenko | cbc7ae2 | 2015-01-10 05:03:29 +0000 | [diff] [blame] | 194 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 195 | if (Expected.ErrorCode != ErrorCode) |
| 196 | return ::testing::AssertionFailure() << "Expected error code " |
| 197 | << Expected.ErrorCode << ", actual " |
| 198 | << ErrorCode; |
| 199 | |
| 200 | if (Expected.UnicodeScalars != Decoded) |
| 201 | return ::testing::AssertionFailure() |
| 202 | << "Expected lenient decoded result:\n" |
| 203 | << ::testing::PrintToString(Expected.UnicodeScalars) << "\n" |
| 204 | << "Actual result:\n" << ::testing::PrintToString(Decoded); |
| 205 | |
| 206 | return ::testing::AssertionSuccess(); |
| 207 | } |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 208 | |
| 209 | TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { |
| 210 | |
| 211 | // |
| 212 | // 1-byte sequences |
| 213 | // |
| 214 | |
| 215 | // U+0041 LATIN CAPITAL LETTER A |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 216 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 217 | ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 218 | |
| 219 | // |
| 220 | // 2-byte sequences |
| 221 | // |
| 222 | |
| 223 | // U+0283 LATIN SMALL LETTER ESH |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 224 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 225 | ConvertUTFResultContainer(conversionOK).withScalars(0x0283), |
| 226 | "\xca\x83")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 227 | |
| 228 | // U+03BA GREEK SMALL LETTER KAPPA |
| 229 | // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA |
| 230 | // U+03C3 GREEK SMALL LETTER SIGMA |
| 231 | // U+03BC GREEK SMALL LETTER MU |
| 232 | // U+03B5 GREEK SMALL LETTER EPSILON |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 233 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 234 | ConvertUTFResultContainer(conversionOK) |
| 235 | .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5), |
| 236 | "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 237 | |
| 238 | // |
| 239 | // 3-byte sequences |
| 240 | // |
| 241 | |
| 242 | // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B |
| 243 | // U+6587 CJK UNIFIED IDEOGRAPH-6587 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 244 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 245 | ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587), |
| 246 | "\xe4\xbe\x8b\xe6\x96\x87")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 247 | |
| 248 | // U+D55C HANGUL SYLLABLE HAN |
| 249 | // U+AE00 HANGUL SYLLABLE GEUL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 250 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 251 | ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00), |
| 252 | "\xed\x95\x9c\xea\xb8\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 253 | |
| 254 | // U+1112 HANGUL CHOSEONG HIEUH |
| 255 | // U+1161 HANGUL JUNGSEONG A |
| 256 | // U+11AB HANGUL JONGSEONG NIEUN |
| 257 | // U+1100 HANGUL CHOSEONG KIYEOK |
| 258 | // U+1173 HANGUL JUNGSEONG EU |
| 259 | // U+11AF HANGUL JONGSEONG RIEUL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 260 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 261 | ConvertUTFResultContainer(conversionOK) |
| 262 | .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af), |
| 263 | "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3" |
| 264 | "\xe1\x86\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 265 | |
| 266 | // |
| 267 | // 4-byte sequences |
| 268 | // |
| 269 | |
| 270 | // U+E0100 VARIATION SELECTOR-17 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 271 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 272 | ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100), |
| 273 | "\xf3\xa0\x84\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 274 | |
| 275 | // |
| 276 | // First possible sequence of a certain length |
| 277 | // |
| 278 | |
| 279 | // U+0000 NULL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 280 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 281 | ConvertUTFResultContainer(conversionOK).withScalars(0x0000), |
| 282 | StringRef("\x00", 1))); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 283 | |
| 284 | // U+0080 PADDING CHARACTER |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 285 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 286 | ConvertUTFResultContainer(conversionOK).withScalars(0x0080), |
| 287 | "\xc2\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 288 | |
| 289 | // U+0800 SAMARITAN LETTER ALAF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 290 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 291 | ConvertUTFResultContainer(conversionOK).withScalars(0x0800), |
| 292 | "\xe0\xa0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 293 | |
| 294 | // U+10000 LINEAR B SYLLABLE B008 A |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 295 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 296 | ConvertUTFResultContainer(conversionOK).withScalars(0x10000), |
| 297 | "\xf0\x90\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 298 | |
| 299 | // U+200000 (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 300 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 301 | ConvertUTFResultContainer(sourceIllegal) |
| 302 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 303 | "\xf8\x88\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 304 | |
| 305 | // U+4000000 (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 306 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 307 | ConvertUTFResultContainer(sourceIllegal) |
| 308 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 309 | "\xfc\x84\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 310 | |
| 311 | // |
| 312 | // Last possible sequence of a certain length |
| 313 | // |
| 314 | |
| 315 | // U+007F DELETE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 316 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 317 | ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 318 | |
| 319 | // U+07FF (unassigned) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 320 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 321 | ConvertUTFResultContainer(conversionOK).withScalars(0x07ff), |
| 322 | "\xdf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 323 | |
| 324 | // U+FFFF (noncharacter) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 325 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 326 | ConvertUTFResultContainer(conversionOK).withScalars(0xffff), |
| 327 | "\xef\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 328 | |
| 329 | // U+1FFFFF (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 330 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 331 | ConvertUTFResultContainer(sourceIllegal) |
| 332 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 333 | "\xf7\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 334 | |
| 335 | // U+3FFFFFF (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 336 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 337 | ConvertUTFResultContainer(sourceIllegal) |
| 338 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 339 | "\xfb\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 340 | |
| 341 | // U+7FFFFFFF (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 342 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 343 | ConvertUTFResultContainer(sourceIllegal) |
| 344 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 345 | "\xfd\xbf\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 346 | |
| 347 | // |
| 348 | // Other boundary conditions |
| 349 | // |
| 350 | |
| 351 | // U+D7FF (unassigned) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 352 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 353 | ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff), |
| 354 | "\xed\x9f\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 355 | |
| 356 | // U+E000 (private use) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 357 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 358 | ConvertUTFResultContainer(conversionOK).withScalars(0xe000), |
| 359 | "\xee\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 360 | |
| 361 | // U+FFFD REPLACEMENT CHARACTER |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 362 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 363 | ConvertUTFResultContainer(conversionOK).withScalars(0xfffd), |
| 364 | "\xef\xbf\xbd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 365 | |
| 366 | // U+10FFFF (noncharacter) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 367 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 368 | ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), |
| 369 | "\xf4\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 370 | |
| 371 | // U+110000 (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 372 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 373 | ConvertUTFResultContainer(sourceIllegal) |
| 374 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 375 | "\xf4\x90\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 376 | |
| 377 | // |
| 378 | // Unexpected continuation bytes |
| 379 | // |
| 380 | |
| 381 | // A sequence of unexpected continuation bytes that don't follow a first |
| 382 | // byte, every byte is a maximal subpart. |
| 383 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 384 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 385 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80")); |
| 386 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 387 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf")); |
| 388 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 389 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 390 | "\x80\x80")); |
| 391 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 392 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 393 | "\x80\xbf")); |
| 394 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 395 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 396 | "\xbf\x80")); |
| 397 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 398 | ConvertUTFResultContainer(sourceIllegal) |
| 399 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 400 | "\x80\xbf\x80")); |
| 401 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 402 | ConvertUTFResultContainer(sourceIllegal) |
| 403 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 404 | "\x80\xbf\x80\xbf")); |
| 405 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 406 | ConvertUTFResultContainer(sourceIllegal) |
| 407 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 408 | "\x80\xbf\x82\xbf\xaa")); |
| 409 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 410 | ConvertUTFResultContainer(sourceIllegal) |
| 411 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 412 | "\xaa\xb0\xbb\xbf\xaa\xa0")); |
| 413 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 414 | ConvertUTFResultContainer(sourceIllegal) |
| 415 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 416 | "\xaa\xb0\xbb\xbf\xaa\xa0\x8f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 417 | |
| 418 | // All continuation bytes (0x80--0xbf). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 419 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 420 | ConvertUTFResultContainer(sourceIllegal) |
| 421 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 422 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 423 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 424 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 425 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 426 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 427 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 428 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 429 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 430 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 431 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 432 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 433 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 434 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 435 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 436 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 437 | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" |
| 438 | "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" |
| 439 | "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" |
| 440 | "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 441 | |
| 442 | // |
| 443 | // Lonely start bytes |
| 444 | // |
| 445 | |
| 446 | // Start bytes of 2-byte sequences (0xc0--0xdf). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 447 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 448 | ConvertUTFResultContainer(sourceIllegal) |
| 449 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 450 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 451 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 452 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 453 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 454 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 455 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 456 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 457 | "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" |
| 458 | "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 459 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 460 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 461 | ConvertUTFResultContainer(sourceIllegal) |
| 462 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 463 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 464 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 465 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 466 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 467 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 468 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 469 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 470 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 471 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 472 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 473 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 474 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 475 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 476 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 477 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 478 | "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20" |
| 479 | "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20" |
| 480 | "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20" |
| 481 | "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 482 | |
| 483 | // Start bytes of 3-byte sequences (0xe0--0xef). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 484 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 485 | ConvertUTFResultContainer(sourceIllegal) |
| 486 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 487 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 488 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 489 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 490 | "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 491 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 492 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 493 | ConvertUTFResultContainer(sourceIllegal) |
| 494 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 495 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 496 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 497 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 498 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 499 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 500 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 501 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 502 | "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20" |
| 503 | "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 504 | |
| 505 | // Start bytes of 4-byte sequences (0xf0--0xf7). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 506 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 507 | ConvertUTFResultContainer(sourceIllegal) |
| 508 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 509 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 510 | "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 511 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 512 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 513 | ConvertUTFResultContainer(sourceIllegal) |
| 514 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 515 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 516 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 517 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 518 | "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 519 | |
| 520 | // Start bytes of 5-byte sequences (0xf8--0xfb). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 521 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 522 | ConvertUTFResultContainer(sourceIllegal) |
| 523 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 524 | "\xf8\xf9\xfa\xfb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 525 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 526 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 527 | ConvertUTFResultContainer(sourceIllegal) |
| 528 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 529 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 530 | "\xf8\x20\xf9\x20\xfa\x20\xfb\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 531 | |
| 532 | // Start bytes of 6-byte sequences (0xfc--0xfd). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 533 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 534 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 535 | "\xfc\xfd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 536 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 537 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 538 | ConvertUTFResultContainer(sourceIllegal) |
| 539 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020), |
| 540 | "\xfc\x20\xfd\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 541 | |
| 542 | // |
| 543 | // Other bytes (0xc0--0xc1, 0xfe--0xff). |
| 544 | // |
| 545 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 546 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 547 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0")); |
| 548 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 549 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1")); |
| 550 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 551 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe")); |
| 552 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 553 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 554 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 555 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 556 | ConvertUTFResultContainer(sourceIllegal) |
| 557 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 558 | "\xc0\xc1\xfe\xff")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 559 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 560 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 561 | ConvertUTFResultContainer(sourceIllegal) |
| 562 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 563 | "\xfe\xfe\xff\xff")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 564 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 565 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 566 | ConvertUTFResultContainer(sourceIllegal) |
| 567 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 568 | "\xfe\x80\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 569 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 570 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 571 | ConvertUTFResultContainer(sourceIllegal) |
| 572 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 573 | "\xff\x80\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 574 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 575 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 576 | ConvertUTFResultContainer(sourceIllegal) |
| 577 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 578 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 579 | "\xc0\x20\xc1\x20\xfe\x20\xff\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 580 | |
| 581 | // |
| 582 | // Sequences with one continuation byte missing |
| 583 | // |
| 584 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 585 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 586 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2")); |
| 587 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 588 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf")); |
| 589 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 590 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 591 | "\xe0\xa0")); |
| 592 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 593 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 594 | "\xe0\xbf")); |
| 595 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 596 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 597 | "\xe1\x80")); |
| 598 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 599 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 600 | "\xec\xbf")); |
| 601 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 602 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 603 | "\xed\x80")); |
| 604 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 605 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 606 | "\xed\x9f")); |
| 607 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 608 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 609 | "\xee\x80")); |
| 610 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 611 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 612 | "\xef\xbf")); |
| 613 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 614 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 615 | "\xf0\x90\x80")); |
| 616 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 617 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 618 | "\xf0\xbf\xbf")); |
| 619 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 620 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 621 | "\xf1\x80\x80")); |
| 622 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 623 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 624 | "\xf3\xbf\xbf")); |
| 625 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 626 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 627 | "\xf4\x80\x80")); |
| 628 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 629 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 630 | "\xf4\x8f\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 631 | |
| 632 | // Overlong sequences with one trailing byte missing. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 633 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 634 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 635 | "\xc0")); |
| 636 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 637 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 638 | "\xc1")); |
| 639 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 640 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 641 | "\xe0\x80")); |
| 642 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 643 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 644 | "\xe0\x9f")); |
| 645 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 646 | ConvertUTFResultContainer(sourceIllegal) |
| 647 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 648 | "\xf0\x80\x80")); |
| 649 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 650 | ConvertUTFResultContainer(sourceIllegal) |
| 651 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 652 | "\xf0\x8f\x80")); |
| 653 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 654 | ConvertUTFResultContainer(sourceIllegal) |
| 655 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 656 | "\xf8\x80\x80\x80")); |
| 657 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 658 | ConvertUTFResultContainer(sourceIllegal) |
| 659 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 660 | "\xfc\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 661 | |
| 662 | // Sequences that represent surrogates with one trailing byte missing. |
| 663 | // High surrogates |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 664 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 665 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 666 | "\xed\xa0")); |
| 667 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 668 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 669 | "\xed\xac")); |
| 670 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 671 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 672 | "\xed\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 673 | // Low surrogates |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 674 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 675 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 676 | "\xed\xb0")); |
| 677 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 678 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 679 | "\xed\xb4")); |
| 680 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 681 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 682 | "\xed\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 683 | |
| 684 | // Ill-formed 4-byte sequences. |
| 685 | // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 686 | // U+1100xx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 687 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 688 | ConvertUTFResultContainer(sourceIllegal) |
| 689 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 690 | "\xf4\x90\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 691 | // U+13FBxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 692 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 693 | ConvertUTFResultContainer(sourceIllegal) |
| 694 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 695 | "\xf4\xbf\xbf")); |
| 696 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 697 | ConvertUTFResultContainer(sourceIllegal) |
| 698 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 699 | "\xf5\x80\x80")); |
| 700 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 701 | ConvertUTFResultContainer(sourceIllegal) |
| 702 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 703 | "\xf6\x80\x80")); |
| 704 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 705 | ConvertUTFResultContainer(sourceIllegal) |
| 706 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 707 | "\xf7\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 708 | // U+1FFBxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 709 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 710 | ConvertUTFResultContainer(sourceIllegal) |
| 711 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 712 | "\xf7\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 713 | |
| 714 | // Ill-formed 5-byte sequences. |
| 715 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 716 | // U+2000xx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 717 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 718 | ConvertUTFResultContainer(sourceIllegal) |
| 719 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 720 | "\xf8\x88\x80\x80")); |
| 721 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 722 | ConvertUTFResultContainer(sourceIllegal) |
| 723 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 724 | "\xf8\xbf\xbf\xbf")); |
| 725 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 726 | ConvertUTFResultContainer(sourceIllegal) |
| 727 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 728 | "\xf9\x80\x80\x80")); |
| 729 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 730 | ConvertUTFResultContainer(sourceIllegal) |
| 731 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 732 | "\xfa\x80\x80\x80")); |
| 733 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 734 | ConvertUTFResultContainer(sourceIllegal) |
| 735 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 736 | "\xfb\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 737 | // U+3FFFFxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 738 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 739 | ConvertUTFResultContainer(sourceIllegal) |
| 740 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 741 | "\xfb\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 742 | |
| 743 | // Ill-formed 6-byte sequences. |
| 744 | // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx |
| 745 | // U+40000xx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 746 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 747 | ConvertUTFResultContainer(sourceIllegal) |
| 748 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 749 | "\xfc\x84\x80\x80\x80")); |
| 750 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 751 | ConvertUTFResultContainer(sourceIllegal) |
| 752 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 753 | "\xfc\xbf\xbf\xbf\xbf")); |
| 754 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 755 | ConvertUTFResultContainer(sourceIllegal) |
| 756 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 757 | "\xfd\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 758 | // U+7FFFFFxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 759 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 760 | ConvertUTFResultContainer(sourceIllegal) |
| 761 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 762 | "\xfd\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 763 | |
| 764 | // |
| 765 | // Sequences with two continuation bytes missing |
| 766 | // |
| 767 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 768 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 769 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 770 | "\xf0\x90")); |
| 771 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 772 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 773 | "\xf0\xbf")); |
| 774 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 775 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 776 | "\xf1\x80")); |
| 777 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 778 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 779 | "\xf3\xbf")); |
| 780 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 781 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 782 | "\xf4\x80")); |
| 783 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 784 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 785 | "\xf4\x8f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 786 | |
| 787 | // Overlong sequences with two trailing byte missing. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 788 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 789 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0")); |
| 790 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 791 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 792 | "\xf0\x80")); |
| 793 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 794 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 795 | "\xf0\x8f")); |
| 796 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 797 | ConvertUTFResultContainer(sourceIllegal) |
| 798 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 799 | "\xf8\x80\x80")); |
| 800 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 801 | ConvertUTFResultContainer(sourceIllegal) |
| 802 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 803 | "\xfc\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 804 | |
| 805 | // Sequences that represent surrogates with two trailing bytes missing. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 806 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 807 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 808 | |
| 809 | // Ill-formed 4-byte sequences. |
| 810 | // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 811 | // U+110yxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 812 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 813 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 814 | "\xf4\x90")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 815 | // U+13Fyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 816 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 817 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 818 | "\xf4\xbf")); |
| 819 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 820 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 821 | "\xf5\x80")); |
| 822 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 823 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 824 | "\xf6\x80")); |
| 825 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 826 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 827 | "\xf7\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 828 | // U+1FFyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 829 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 830 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 831 | "\xf7\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 832 | |
| 833 | // Ill-formed 5-byte sequences. |
| 834 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 835 | // U+200yxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 836 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 837 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 838 | "\xf8\x88\x80")); |
| 839 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 840 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 841 | "\xf8\xbf\xbf")); |
| 842 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 843 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 844 | "\xf9\x80\x80")); |
| 845 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 846 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 847 | "\xfa\x80\x80")); |
| 848 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 849 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 850 | "\xfb\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 851 | // U+3FFFyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 852 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 853 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 854 | "\xfb\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 855 | |
| 856 | // Ill-formed 6-byte sequences. |
| 857 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 858 | // U+4000yxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 859 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 860 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 861 | "\xfc\x84\x80\x80")); |
| 862 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 863 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 864 | "\xfc\xbf\xbf\xbf")); |
| 865 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 866 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 867 | "\xfd\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 868 | // U+7FFFFyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 869 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 870 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 871 | "\xfd\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 872 | |
| 873 | // |
| 874 | // Sequences with three continuation bytes missing |
| 875 | // |
| 876 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 877 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 878 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); |
| 879 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 880 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1")); |
| 881 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 882 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2")); |
| 883 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 884 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3")); |
| 885 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 886 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 887 | |
| 888 | // Broken overlong sequences. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 889 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 890 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); |
| 891 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 892 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 893 | "\xf8\x80")); |
| 894 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 895 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 896 | "\xfc\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 897 | |
| 898 | // Ill-formed 4-byte sequences. |
| 899 | // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 900 | // U+14yyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 901 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 902 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5")); |
| 903 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 904 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 905 | // U+1Cyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 906 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 907 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 908 | |
| 909 | // Ill-formed 5-byte sequences. |
| 910 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 911 | // U+20yyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 912 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 913 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 914 | "\xf8\x88")); |
| 915 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 916 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 917 | "\xf8\xbf")); |
| 918 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 919 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 920 | "\xf9\x80")); |
| 921 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 922 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 923 | "\xfa\x80")); |
| 924 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 925 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 926 | "\xfb\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 927 | // U+3FCyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 928 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 929 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 930 | "\xfb\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 931 | |
| 932 | // Ill-formed 6-byte sequences. |
| 933 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 934 | // U+400yyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 935 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 936 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 937 | "\xfc\x84\x80")); |
| 938 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 939 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 940 | "\xfc\xbf\xbf")); |
| 941 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 942 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 943 | "\xfd\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 944 | // U+7FFCyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 945 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 946 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 947 | "\xfd\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 948 | |
| 949 | // |
| 950 | // Sequences with four continuation bytes missing |
| 951 | // |
| 952 | |
| 953 | // Ill-formed 5-byte sequences. |
| 954 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 955 | // U+uzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 956 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 957 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); |
| 958 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 959 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9")); |
| 960 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 961 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa")); |
| 962 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 963 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 964 | // U+3zyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 965 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 966 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 967 | |
| 968 | // Broken overlong sequences. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 969 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 970 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); |
| 971 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 972 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 973 | "\xfc\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 974 | |
| 975 | // Ill-formed 6-byte sequences. |
| 976 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 977 | // U+uzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 978 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 979 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 980 | "\xfc\x84")); |
| 981 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 982 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 983 | "\xfc\xbf")); |
| 984 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 985 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 986 | "\xfd\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 987 | // U+7Fzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 988 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 989 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 990 | "\xfd\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 991 | |
| 992 | // |
| 993 | // Sequences with five continuation bytes missing |
| 994 | // |
| 995 | |
| 996 | // Ill-formed 6-byte sequences. |
| 997 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 998 | // U+uzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 999 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1000 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1001 | // U+uuzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1002 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1003 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1004 | |
| 1005 | // |
| 1006 | // Consecutive sequences with trailing bytes missing |
| 1007 | // |
| 1008 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1009 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1010 | ConvertUTFResultContainer(sourceIllegal) |
| 1011 | .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) |
| 1012 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 1013 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 1014 | .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) |
| 1015 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 1016 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1017 | "\xc0" "\xe0\x80" "\xf0\x80\x80" |
| 1018 | "\xf8\x80\x80\x80" |
| 1019 | "\xfc\x80\x80\x80\x80" |
| 1020 | "\xdf" "\xef\xbf" "\xf7\xbf\xbf" |
| 1021 | "\xfb\xbf\xbf\xbf" |
| 1022 | "\xfd\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1023 | |
| 1024 | // |
| 1025 | // Overlong UTF-8 sequences |
| 1026 | // |
| 1027 | |
| 1028 | // U+002F SOLIDUS |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1029 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1030 | ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1031 | |
| 1032 | // Overlong sequences of the above. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1033 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1034 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1035 | "\xc0\xaf")); |
| 1036 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1037 | ConvertUTFResultContainer(sourceIllegal) |
| 1038 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1039 | "\xe0\x80\xaf")); |
| 1040 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1041 | ConvertUTFResultContainer(sourceIllegal) |
| 1042 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1043 | "\xf0\x80\x80\xaf")); |
| 1044 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1045 | ConvertUTFResultContainer(sourceIllegal) |
| 1046 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1047 | "\xf8\x80\x80\x80\xaf")); |
| 1048 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1049 | ConvertUTFResultContainer(sourceIllegal) |
| 1050 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1051 | "\xfc\x80\x80\x80\x80\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1052 | |
| 1053 | // U+0000 NULL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1054 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1055 | ConvertUTFResultContainer(conversionOK).withScalars(0x0000), |
| 1056 | StringRef("\x00", 1))); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1057 | |
| 1058 | // Overlong sequences of the above. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1059 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1060 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1061 | "\xc0\x80")); |
| 1062 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1063 | ConvertUTFResultContainer(sourceIllegal) |
| 1064 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1065 | "\xe0\x80\x80")); |
| 1066 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1067 | ConvertUTFResultContainer(sourceIllegal) |
| 1068 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1069 | "\xf0\x80\x80\x80")); |
| 1070 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1071 | ConvertUTFResultContainer(sourceIllegal) |
| 1072 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1073 | "\xf8\x80\x80\x80\x80")); |
| 1074 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1075 | ConvertUTFResultContainer(sourceIllegal) |
| 1076 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1077 | "\xfc\x80\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1078 | |
| 1079 | // Other overlong sequences. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1080 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1081 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1082 | "\xc0\xbf")); |
| 1083 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1084 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1085 | "\xc1\x80")); |
| 1086 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1087 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1088 | "\xc1\xbf")); |
| 1089 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1090 | ConvertUTFResultContainer(sourceIllegal) |
| 1091 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1092 | "\xe0\x9f\xbf")); |
| 1093 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1094 | ConvertUTFResultContainer(sourceIllegal) |
| 1095 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1096 | "\xed\xa0\x80")); |
| 1097 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1098 | ConvertUTFResultContainer(sourceIllegal) |
| 1099 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1100 | "\xed\xbf\xbf")); |
| 1101 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1102 | ConvertUTFResultContainer(sourceIllegal) |
| 1103 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1104 | "\xf0\x8f\x80\x80")); |
| 1105 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1106 | ConvertUTFResultContainer(sourceIllegal) |
| 1107 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1108 | "\xf0\x8f\xbf\xbf")); |
| 1109 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1110 | ConvertUTFResultContainer(sourceIllegal) |
| 1111 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1112 | "\xf8\x87\xbf\xbf\xbf")); |
| 1113 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1114 | ConvertUTFResultContainer(sourceIllegal) |
| 1115 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1116 | "\xfc\x83\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1117 | |
| 1118 | // |
| 1119 | // Isolated surrogates |
| 1120 | // |
| 1121 | |
| 1122 | // Unicode 6.3.0: |
| 1123 | // |
| 1124 | // D71. High-surrogate code point: A Unicode code point in the range |
| 1125 | // U+D800 to U+DBFF. |
| 1126 | // |
| 1127 | // D73. Low-surrogate code point: A Unicode code point in the range |
| 1128 | // U+DC00 to U+DFFF. |
| 1129 | |
| 1130 | // Note: U+E0100 is <DB40 DD00> in UTF16. |
| 1131 | |
| 1132 | // High surrogates |
| 1133 | |
| 1134 | // U+D800 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1135 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1136 | ConvertUTFResultContainer(sourceIllegal) |
| 1137 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1138 | "\xed\xa0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1139 | |
| 1140 | // U+DB40 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1141 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1142 | ConvertUTFResultContainer(sourceIllegal) |
| 1143 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1144 | "\xed\xac\xa0")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1145 | |
| 1146 | // U+DBFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1147 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1148 | ConvertUTFResultContainer(sourceIllegal) |
| 1149 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1150 | "\xed\xaf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1151 | |
| 1152 | // Low surrogates |
| 1153 | |
| 1154 | // U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1155 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1156 | ConvertUTFResultContainer(sourceIllegal) |
| 1157 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1158 | "\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1159 | |
| 1160 | // U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1161 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1162 | ConvertUTFResultContainer(sourceIllegal) |
| 1163 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1164 | "\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1165 | |
| 1166 | // U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1167 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1168 | ConvertUTFResultContainer(sourceIllegal) |
| 1169 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1170 | "\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1171 | |
| 1172 | // Surrogate pairs |
| 1173 | |
| 1174 | // U+D800 U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1175 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1176 | ConvertUTFResultContainer(sourceIllegal) |
| 1177 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1178 | "\xed\xa0\x80\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1179 | |
| 1180 | // U+D800 U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1181 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1182 | ConvertUTFResultContainer(sourceIllegal) |
| 1183 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1184 | "\xed\xa0\x80\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1185 | |
| 1186 | // U+D800 U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1187 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1188 | ConvertUTFResultContainer(sourceIllegal) |
| 1189 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1190 | "\xed\xa0\x80\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1191 | |
| 1192 | // U+DB40 U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1193 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1194 | ConvertUTFResultContainer(sourceIllegal) |
| 1195 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1196 | "\xed\xac\xa0\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1197 | |
| 1198 | // U+DB40 U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1199 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1200 | ConvertUTFResultContainer(sourceIllegal) |
| 1201 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1202 | "\xed\xac\xa0\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1203 | |
| 1204 | // U+DB40 U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1205 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1206 | ConvertUTFResultContainer(sourceIllegal) |
| 1207 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1208 | "\xed\xac\xa0\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1209 | |
| 1210 | // U+DBFF U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1211 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1212 | ConvertUTFResultContainer(sourceIllegal) |
| 1213 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1214 | "\xed\xaf\xbf\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1215 | |
| 1216 | // U+DBFF U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1217 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1218 | ConvertUTFResultContainer(sourceIllegal) |
| 1219 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1220 | "\xed\xaf\xbf\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1221 | |
| 1222 | // U+DBFF U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1223 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1224 | ConvertUTFResultContainer(sourceIllegal) |
| 1225 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1226 | "\xed\xaf\xbf\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1227 | |
| 1228 | // |
| 1229 | // Noncharacters |
| 1230 | // |
| 1231 | |
| 1232 | // Unicode 6.3.0: |
| 1233 | // |
| 1234 | // D14. Noncharacter: A code point that is permanently reserved for |
| 1235 | // internal use and that should never be interchanged. Noncharacters |
| 1236 | // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016) |
| 1237 | // and the values U+FDD0..U+FDEF. |
| 1238 | |
| 1239 | // U+FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1240 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1241 | ConvertUTFResultContainer(conversionOK).withScalars(0xfffe), |
| 1242 | "\xef\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1243 | |
| 1244 | // U+FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1245 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1246 | ConvertUTFResultContainer(conversionOK).withScalars(0xffff), |
| 1247 | "\xef\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1248 | |
| 1249 | // U+1FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1250 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1251 | ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe), |
| 1252 | "\xf0\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1253 | |
| 1254 | // U+1FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1255 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1256 | ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff), |
| 1257 | "\xf0\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1258 | |
| 1259 | // U+2FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1260 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1261 | ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe), |
| 1262 | "\xf0\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1263 | |
| 1264 | // U+2FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1265 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1266 | ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff), |
| 1267 | "\xf0\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1268 | |
| 1269 | // U+3FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1270 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1271 | ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe), |
| 1272 | "\xf0\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1273 | |
| 1274 | // U+3FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1275 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1276 | ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff), |
| 1277 | "\xf0\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1278 | |
| 1279 | // U+4FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1280 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1281 | ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe), |
| 1282 | "\xf1\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1283 | |
| 1284 | // U+4FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1285 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1286 | ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff), |
| 1287 | "\xf1\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1288 | |
| 1289 | // U+5FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1290 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1291 | ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe), |
| 1292 | "\xf1\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1293 | |
| 1294 | // U+5FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1295 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1296 | ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff), |
| 1297 | "\xf1\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1298 | |
| 1299 | // U+6FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1300 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1301 | ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe), |
| 1302 | "\xf1\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1303 | |
| 1304 | // U+6FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1305 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1306 | ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff), |
| 1307 | "\xf1\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1308 | |
| 1309 | // U+7FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1310 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1311 | ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe), |
| 1312 | "\xf1\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1313 | |
| 1314 | // U+7FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1315 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1316 | ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff), |
| 1317 | "\xf1\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1318 | |
| 1319 | // U+8FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1320 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1321 | ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe), |
| 1322 | "\xf2\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1323 | |
| 1324 | // U+8FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1325 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1326 | ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff), |
| 1327 | "\xf2\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1328 | |
| 1329 | // U+9FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1330 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1331 | ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe), |
| 1332 | "\xf2\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1333 | |
| 1334 | // U+9FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1335 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1336 | ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff), |
| 1337 | "\xf2\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1338 | |
| 1339 | // U+AFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1340 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1341 | ConvertUTFResultContainer(conversionOK).withScalars(0xafffe), |
| 1342 | "\xf2\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1343 | |
| 1344 | // U+AFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1345 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1346 | ConvertUTFResultContainer(conversionOK).withScalars(0xaffff), |
| 1347 | "\xf2\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1348 | |
| 1349 | // U+BFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1350 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1351 | ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe), |
| 1352 | "\xf2\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1353 | |
| 1354 | // U+BFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1355 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1356 | ConvertUTFResultContainer(conversionOK).withScalars(0xbffff), |
| 1357 | "\xf2\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1358 | |
| 1359 | // U+CFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1360 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1361 | ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe), |
| 1362 | "\xf3\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1363 | |
| 1364 | // U+CFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1365 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1366 | ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF), |
| 1367 | "\xf3\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1368 | |
| 1369 | // U+DFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1370 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1371 | ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe), |
| 1372 | "\xf3\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1373 | |
| 1374 | // U+DFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1375 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1376 | ConvertUTFResultContainer(conversionOK).withScalars(0xdffff), |
| 1377 | "\xf3\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1378 | |
| 1379 | // U+EFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1380 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1381 | ConvertUTFResultContainer(conversionOK).withScalars(0xefffe), |
| 1382 | "\xf3\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1383 | |
| 1384 | // U+EFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1385 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1386 | ConvertUTFResultContainer(conversionOK).withScalars(0xeffff), |
| 1387 | "\xf3\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1388 | |
| 1389 | // U+FFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1390 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1391 | ConvertUTFResultContainer(conversionOK).withScalars(0xffffe), |
| 1392 | "\xf3\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1393 | |
| 1394 | // U+FFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1395 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1396 | ConvertUTFResultContainer(conversionOK).withScalars(0xfffff), |
| 1397 | "\xf3\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1398 | |
| 1399 | // U+10FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1400 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1401 | ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe), |
| 1402 | "\xf4\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1403 | |
| 1404 | // U+10FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1405 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1406 | ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), |
| 1407 | "\xf4\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1408 | |
| 1409 | // U+FDD0 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1410 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1411 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0), |
| 1412 | "\xef\xb7\x90")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1413 | |
| 1414 | // U+FDD1 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1415 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1416 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1), |
| 1417 | "\xef\xb7\x91")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1418 | |
| 1419 | // U+FDD2 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1420 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1421 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2), |
| 1422 | "\xef\xb7\x92")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1423 | |
| 1424 | // U+FDD3 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1425 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1426 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3), |
| 1427 | "\xef\xb7\x93")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1428 | |
| 1429 | // U+FDD4 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1430 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1431 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4), |
| 1432 | "\xef\xb7\x94")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1433 | |
| 1434 | // U+FDD5 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1435 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1436 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5), |
| 1437 | "\xef\xb7\x95")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1438 | |
| 1439 | // U+FDD6 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1440 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1441 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6), |
| 1442 | "\xef\xb7\x96")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1443 | |
| 1444 | // U+FDD7 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1445 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1446 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7), |
| 1447 | "\xef\xb7\x97")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1448 | |
| 1449 | // U+FDD8 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1450 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1451 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8), |
| 1452 | "\xef\xb7\x98")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1453 | |
| 1454 | // U+FDD9 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1455 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1456 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9), |
| 1457 | "\xef\xb7\x99")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1458 | |
| 1459 | // U+FDDA |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1460 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1461 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdda), |
| 1462 | "\xef\xb7\x9a")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1463 | |
| 1464 | // U+FDDB |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1465 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1466 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddb), |
| 1467 | "\xef\xb7\x9b")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1468 | |
| 1469 | // U+FDDC |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1470 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1471 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddc), |
| 1472 | "\xef\xb7\x9c")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1473 | |
| 1474 | // U+FDDD |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1475 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1476 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddd), |
| 1477 | "\xef\xb7\x9d")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1478 | |
| 1479 | // U+FDDE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1480 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1481 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdde), |
| 1482 | "\xef\xb7\x9e")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1483 | |
| 1484 | // U+FDDF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1485 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1486 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddf), |
| 1487 | "\xef\xb7\x9f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1488 | |
| 1489 | // U+FDE0 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1490 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1491 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde0), |
| 1492 | "\xef\xb7\xa0")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1493 | |
| 1494 | // U+FDE1 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1495 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1496 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde1), |
| 1497 | "\xef\xb7\xa1")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1498 | |
| 1499 | // U+FDE2 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1500 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1501 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde2), |
| 1502 | "\xef\xb7\xa2")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1503 | |
| 1504 | // U+FDE3 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1505 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1506 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde3), |
| 1507 | "\xef\xb7\xa3")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1508 | |
| 1509 | // U+FDE4 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1510 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1511 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde4), |
| 1512 | "\xef\xb7\xa4")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1513 | |
| 1514 | // U+FDE5 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1515 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1516 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde5), |
| 1517 | "\xef\xb7\xa5")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1518 | |
| 1519 | // U+FDE6 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1520 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1521 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde6), |
| 1522 | "\xef\xb7\xa6")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1523 | |
| 1524 | // U+FDE7 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1525 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1526 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde7), |
| 1527 | "\xef\xb7\xa7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1528 | |
| 1529 | // U+FDE8 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1530 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1531 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde8), |
| 1532 | "\xef\xb7\xa8")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1533 | |
| 1534 | // U+FDE9 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1535 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1536 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde9), |
| 1537 | "\xef\xb7\xa9")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1538 | |
| 1539 | // U+FDEA |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1540 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1541 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdea), |
| 1542 | "\xef\xb7\xaa")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1543 | |
| 1544 | // U+FDEB |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1545 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1546 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb), |
| 1547 | "\xef\xb7\xab")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1548 | |
| 1549 | // U+FDEC |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1550 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1551 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdec), |
| 1552 | "\xef\xb7\xac")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1553 | |
| 1554 | // U+FDED |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1555 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1556 | ConvertUTFResultContainer(conversionOK).withScalars(0xfded), |
| 1557 | "\xef\xb7\xad")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1558 | |
| 1559 | // U+FDEE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1560 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1561 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdee), |
| 1562 | "\xef\xb7\xae")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1563 | |
| 1564 | // U+FDEF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1565 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1566 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdef), |
| 1567 | "\xef\xb7\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1568 | |
| 1569 | // U+FDF0 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1570 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1571 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0), |
| 1572 | "\xef\xb7\xb0")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1573 | |
| 1574 | // U+FDF1 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1575 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1576 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1), |
| 1577 | "\xef\xb7\xb1")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1578 | |
| 1579 | // U+FDF2 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1580 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1581 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2), |
| 1582 | "\xef\xb7\xb2")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1583 | |
| 1584 | // U+FDF3 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1585 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1586 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3), |
| 1587 | "\xef\xb7\xb3")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1588 | |
| 1589 | // U+FDF4 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1590 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1591 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4), |
| 1592 | "\xef\xb7\xb4")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1593 | |
| 1594 | // U+FDF5 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1595 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1596 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5), |
| 1597 | "\xef\xb7\xb5")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1598 | |
| 1599 | // U+FDF6 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1600 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1601 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6), |
| 1602 | "\xef\xb7\xb6")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1603 | |
| 1604 | // U+FDF7 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1605 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1606 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7), |
| 1607 | "\xef\xb7\xb7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1608 | |
| 1609 | // U+FDF8 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1610 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1611 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8), |
| 1612 | "\xef\xb7\xb8")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1613 | |
| 1614 | // U+FDF9 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1615 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1616 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9), |
| 1617 | "\xef\xb7\xb9")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1618 | |
| 1619 | // U+FDFA |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1620 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1621 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa), |
| 1622 | "\xef\xb7\xba")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1623 | |
| 1624 | // U+FDFB |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1625 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1626 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb), |
| 1627 | "\xef\xb7\xbb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1628 | |
| 1629 | // U+FDFC |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1630 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1631 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc), |
| 1632 | "\xef\xb7\xbc")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1633 | |
| 1634 | // U+FDFD |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1635 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1636 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd), |
| 1637 | "\xef\xb7\xbd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1638 | |
| 1639 | // U+FDFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1640 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1641 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe), |
| 1642 | "\xef\xb7\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1643 | |
| 1644 | // U+FDFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1645 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1646 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdff), |
| 1647 | "\xef\xb7\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1648 | } |
| 1649 | |
| 1650 | TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) { |
| 1651 | // U+0041 LATIN CAPITAL LETTER A |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1652 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1653 | ConvertUTFResultContainer(conversionOK).withScalars(0x0041), |
| 1654 | "\x41", true)); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1655 | |
| 1656 | // |
| 1657 | // Sequences with one continuation byte missing |
| 1658 | // |
| 1659 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1660 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1661 | ConvertUTFResultContainer(sourceExhausted), |
| 1662 | "\xc2", true)); |
| 1663 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1664 | ConvertUTFResultContainer(sourceExhausted), |
| 1665 | "\xdf", true)); |
| 1666 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1667 | ConvertUTFResultContainer(sourceExhausted), |
| 1668 | "\xe0\xa0", true)); |
| 1669 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1670 | ConvertUTFResultContainer(sourceExhausted), |
| 1671 | "\xe0\xbf", true)); |
| 1672 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1673 | ConvertUTFResultContainer(sourceExhausted), |
| 1674 | "\xe1\x80", true)); |
| 1675 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1676 | ConvertUTFResultContainer(sourceExhausted), |
| 1677 | "\xec\xbf", true)); |
| 1678 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1679 | ConvertUTFResultContainer(sourceExhausted), |
| 1680 | "\xed\x80", true)); |
| 1681 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1682 | ConvertUTFResultContainer(sourceExhausted), |
| 1683 | "\xed\x9f", true)); |
| 1684 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1685 | ConvertUTFResultContainer(sourceExhausted), |
| 1686 | "\xee\x80", true)); |
| 1687 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1688 | ConvertUTFResultContainer(sourceExhausted), |
| 1689 | "\xef\xbf", true)); |
| 1690 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1691 | ConvertUTFResultContainer(sourceExhausted), |
| 1692 | "\xf0\x90\x80", true)); |
| 1693 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1694 | ConvertUTFResultContainer(sourceExhausted), |
| 1695 | "\xf0\xbf\xbf", true)); |
| 1696 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1697 | ConvertUTFResultContainer(sourceExhausted), |
| 1698 | "\xf1\x80\x80", true)); |
| 1699 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1700 | ConvertUTFResultContainer(sourceExhausted), |
| 1701 | "\xf3\xbf\xbf", true)); |
| 1702 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1703 | ConvertUTFResultContainer(sourceExhausted), |
| 1704 | "\xf4\x80\x80", true)); |
| 1705 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1706 | ConvertUTFResultContainer(sourceExhausted), |
| 1707 | "\xf4\x8f\xbf", true)); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1708 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1709 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1710 | ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041), |
| 1711 | "\x41\xc2", true)); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1712 | } |
| 1713 | |