Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 1 | //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "llvm/Support/ConvertUTF.h" |
Mehdi Amini | b550cb1 | 2016-04-18 09:17:29 +0000 | [diff] [blame] | 11 | #include "llvm/ADT/ArrayRef.h" |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 12 | #include "gtest/gtest.h" |
| 13 | #include <string> |
Chandler Carruth | d990388 | 2015-01-14 11:23:27 +0000 | [diff] [blame] | 14 | #include <vector> |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 15 | |
| 16 | using namespace llvm; |
| 17 | |
| 18 | TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { |
| 19 | // Src is the look of disapproval. |
| 20 | static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; |
| 21 | ArrayRef<char> Ref(Src, sizeof(Src) - 1); |
| 22 | std::string Result; |
| 23 | bool Success = convertUTF16ToUTF8String(Ref, Result); |
| 24 | EXPECT_TRUE(Success); |
| 25 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 26 | EXPECT_EQ(Expected, Result); |
| 27 | } |
| 28 | |
| 29 | TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) { |
| 30 | // Src is the look of disapproval. |
| 31 | static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; |
| 32 | ArrayRef<char> Ref(Src, sizeof(Src) - 1); |
| 33 | std::string Result; |
| 34 | bool Success = convertUTF16ToUTF8String(Ref, Result); |
| 35 | EXPECT_TRUE(Success); |
| 36 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 37 | EXPECT_EQ(Expected, Result); |
| 38 | } |
| 39 | |
Reid Kleckner | d8cb6b0 | 2015-01-26 19:51:00 +0000 | [diff] [blame] | 40 | TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) { |
| 41 | // Src is the look of disapproval. |
| 42 | static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0"; |
| 43 | StringRef Ref(Src, sizeof(Src) - 1); |
| 44 | SmallVector<UTF16, 5> Result; |
| 45 | bool Success = convertUTF8ToUTF16String(Ref, Result); |
| 46 | EXPECT_TRUE(Success); |
| 47 | static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0}; |
Eric Christopher | 7aebb32 | 2015-01-27 01:01:39 +0000 | [diff] [blame] | 48 | ASSERT_EQ(3u, Result.size()); |
Reid Kleckner | d8cb6b0 | 2015-01-26 19:51:00 +0000 | [diff] [blame] | 49 | for (int I = 0, E = 3; I != E; ++I) |
| 50 | EXPECT_EQ(Expected[I], Result[I]); |
| 51 | } |
| 52 | |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 53 | TEST(ConvertUTFTest, OddLengthInput) { |
| 54 | std::string Result; |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 55 | bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 56 | EXPECT_FALSE(Success); |
| 57 | } |
| 58 | |
| 59 | TEST(ConvertUTFTest, Empty) { |
| 60 | std::string Result; |
Marianne Mailhot-Sarrasin | 7423f40 | 2016-03-11 15:59:32 +0000 | [diff] [blame] | 61 | bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 62 | EXPECT_TRUE(Success); |
| 63 | EXPECT_TRUE(Result.empty()); |
| 64 | } |
| 65 | |
| 66 | TEST(ConvertUTFTest, HasUTF16BOM) { |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 67 | bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 68 | EXPECT_TRUE(HasBOM); |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 69 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 70 | EXPECT_TRUE(HasBOM); |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 71 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 72 | EXPECT_TRUE(HasBOM); // Don't care about odd lengths. |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 73 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 74 | EXPECT_TRUE(HasBOM); |
| 75 | |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 76 | HasBOM = hasUTF16ByteOrderMark(None); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 77 | EXPECT_FALSE(HasBOM); |
Craig Topper | e1d1294 | 2014-08-27 05:25:25 +0000 | [diff] [blame] | 78 | HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1)); |
Reid Kleckner | 7df03c2 | 2013-07-16 17:14:33 +0000 | [diff] [blame] | 79 | EXPECT_FALSE(HasBOM); |
| 80 | } |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 81 | |
Marianne Mailhot-Sarrasin | 7423f40 | 2016-03-11 15:59:32 +0000 | [diff] [blame] | 82 | TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) { |
| 83 | // Src is the look of disapproval. |
| 84 | static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; |
| 85 | ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4); |
| 86 | std::string Result; |
| 87 | bool Success = convertUTF16ToUTF8String(SrcRef, Result); |
| 88 | EXPECT_TRUE(Success); |
| 89 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 90 | EXPECT_EQ(Expected, Result); |
| 91 | } |
| 92 | |
| 93 | TEST(ConvertUTFTest, ConvertUTF8toWide) { |
| 94 | // Src is the look of disapproval. |
| 95 | static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0"; |
| 96 | std::wstring Result; |
| 97 | bool Success = ConvertUTF8toWide((const char*)Src, Result); |
| 98 | EXPECT_TRUE(Success); |
| 99 | std::wstring Expected(L"\x0ca0_\x0ca0"); |
| 100 | EXPECT_EQ(Expected, Result); |
| 101 | Result.clear(); |
| 102 | Success = ConvertUTF8toWide(StringRef(Src, 7), Result); |
| 103 | EXPECT_TRUE(Success); |
| 104 | EXPECT_EQ(Expected, Result); |
| 105 | } |
| 106 | |
| 107 | TEST(ConvertUTFTest, convertWideToUTF8) { |
| 108 | // Src is the look of disapproval. |
| 109 | static const wchar_t Src[] = L"\x0ca0_\x0ca0"; |
| 110 | std::string Result; |
| 111 | bool Success = convertWideToUTF8(Src, Result); |
| 112 | EXPECT_TRUE(Success); |
| 113 | std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); |
| 114 | EXPECT_EQ(Expected, Result); |
| 115 | } |
| 116 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 117 | struct ConvertUTFResultContainer { |
| 118 | ConversionResult ErrorCode; |
| 119 | std::vector<unsigned> UnicodeScalars; |
| 120 | |
| 121 | ConvertUTFResultContainer(ConversionResult ErrorCode) |
| 122 | : ErrorCode(ErrorCode) {} |
| 123 | |
| 124 | ConvertUTFResultContainer |
| 125 | withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000, |
| 126 | unsigned US2 = 0x110000, unsigned US3 = 0x110000, |
| 127 | unsigned US4 = 0x110000, unsigned US5 = 0x110000, |
| 128 | unsigned US6 = 0x110000, unsigned US7 = 0x110000) { |
| 129 | ConvertUTFResultContainer Result(*this); |
| 130 | if (US0 != 0x110000) |
| 131 | Result.UnicodeScalars.push_back(US0); |
| 132 | if (US1 != 0x110000) |
| 133 | Result.UnicodeScalars.push_back(US1); |
| 134 | if (US2 != 0x110000) |
| 135 | Result.UnicodeScalars.push_back(US2); |
| 136 | if (US3 != 0x110000) |
| 137 | Result.UnicodeScalars.push_back(US3); |
| 138 | if (US4 != 0x110000) |
| 139 | Result.UnicodeScalars.push_back(US4); |
| 140 | if (US5 != 0x110000) |
| 141 | Result.UnicodeScalars.push_back(US5); |
| 142 | if (US6 != 0x110000) |
| 143 | Result.UnicodeScalars.push_back(US6); |
| 144 | if (US7 != 0x110000) |
| 145 | Result.UnicodeScalars.push_back(US7); |
| 146 | return Result; |
| 147 | } |
| 148 | }; |
| 149 | |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 150 | std::pair<ConversionResult, std::vector<unsigned>> |
| 151 | ConvertUTF8ToUnicodeScalarsLenient(StringRef S) { |
| 152 | const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data()); |
| 153 | |
| 154 | const UTF8 *SourceNext = SourceStart; |
| 155 | std::vector<UTF32> Decoded(S.size(), 0); |
| 156 | UTF32 *TargetStart = Decoded.data(); |
| 157 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 158 | auto ErrorCode = |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 159 | ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart, |
| 160 | Decoded.data() + Decoded.size(), lenientConversion); |
| 161 | |
| 162 | Decoded.resize(TargetStart - Decoded.data()); |
| 163 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 164 | return std::make_pair(ErrorCode, Decoded); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 165 | } |
| 166 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 167 | std::pair<ConversionResult, std::vector<unsigned>> |
| 168 | ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) { |
| 169 | const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data()); |
| 170 | |
| 171 | const UTF8 *SourceNext = SourceStart; |
| 172 | std::vector<UTF32> Decoded(S.size(), 0); |
| 173 | UTF32 *TargetStart = Decoded.data(); |
| 174 | |
| 175 | auto ErrorCode = ConvertUTF8toUTF32Partial( |
| 176 | &SourceNext, SourceStart + S.size(), &TargetStart, |
| 177 | Decoded.data() + Decoded.size(), lenientConversion); |
| 178 | |
| 179 | Decoded.resize(TargetStart - Decoded.data()); |
| 180 | |
| 181 | return std::make_pair(ErrorCode, Decoded); |
| 182 | } |
| 183 | |
| 184 | ::testing::AssertionResult |
| 185 | CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected, |
| 186 | StringRef S, bool Partial = false) { |
| 187 | ConversionResult ErrorCode; |
| 188 | std::vector<unsigned> Decoded; |
| 189 | if (!Partial) |
| 190 | std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S); |
| 191 | else |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 192 | std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S); |
Dmitri Gribenko | cbc7ae2 | 2015-01-10 05:03:29 +0000 | [diff] [blame] | 193 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 194 | if (Expected.ErrorCode != ErrorCode) |
| 195 | return ::testing::AssertionFailure() << "Expected error code " |
| 196 | << Expected.ErrorCode << ", actual " |
| 197 | << ErrorCode; |
| 198 | |
| 199 | if (Expected.UnicodeScalars != Decoded) |
| 200 | return ::testing::AssertionFailure() |
| 201 | << "Expected lenient decoded result:\n" |
| 202 | << ::testing::PrintToString(Expected.UnicodeScalars) << "\n" |
| 203 | << "Actual result:\n" << ::testing::PrintToString(Decoded); |
| 204 | |
| 205 | return ::testing::AssertionSuccess(); |
| 206 | } |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 207 | |
| 208 | TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { |
| 209 | |
| 210 | // |
| 211 | // 1-byte sequences |
| 212 | // |
| 213 | |
| 214 | // U+0041 LATIN CAPITAL LETTER A |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 215 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 216 | ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 217 | |
| 218 | // |
| 219 | // 2-byte sequences |
| 220 | // |
| 221 | |
| 222 | // U+0283 LATIN SMALL LETTER ESH |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 223 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 224 | ConvertUTFResultContainer(conversionOK).withScalars(0x0283), |
| 225 | "\xca\x83")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 226 | |
| 227 | // U+03BA GREEK SMALL LETTER KAPPA |
| 228 | // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA |
| 229 | // U+03C3 GREEK SMALL LETTER SIGMA |
| 230 | // U+03BC GREEK SMALL LETTER MU |
| 231 | // U+03B5 GREEK SMALL LETTER EPSILON |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 232 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 233 | ConvertUTFResultContainer(conversionOK) |
| 234 | .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5), |
| 235 | "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 236 | |
| 237 | // |
| 238 | // 3-byte sequences |
| 239 | // |
| 240 | |
| 241 | // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B |
| 242 | // U+6587 CJK UNIFIED IDEOGRAPH-6587 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 243 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 244 | ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587), |
| 245 | "\xe4\xbe\x8b\xe6\x96\x87")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 246 | |
| 247 | // U+D55C HANGUL SYLLABLE HAN |
| 248 | // U+AE00 HANGUL SYLLABLE GEUL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 249 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 250 | ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00), |
| 251 | "\xed\x95\x9c\xea\xb8\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 252 | |
| 253 | // U+1112 HANGUL CHOSEONG HIEUH |
| 254 | // U+1161 HANGUL JUNGSEONG A |
| 255 | // U+11AB HANGUL JONGSEONG NIEUN |
| 256 | // U+1100 HANGUL CHOSEONG KIYEOK |
| 257 | // U+1173 HANGUL JUNGSEONG EU |
| 258 | // U+11AF HANGUL JONGSEONG RIEUL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 259 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 260 | ConvertUTFResultContainer(conversionOK) |
| 261 | .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af), |
| 262 | "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3" |
| 263 | "\xe1\x86\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 264 | |
| 265 | // |
| 266 | // 4-byte sequences |
| 267 | // |
| 268 | |
| 269 | // U+E0100 VARIATION SELECTOR-17 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 270 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 271 | ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100), |
| 272 | "\xf3\xa0\x84\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 273 | |
| 274 | // |
| 275 | // First possible sequence of a certain length |
| 276 | // |
| 277 | |
| 278 | // U+0000 NULL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 279 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 280 | ConvertUTFResultContainer(conversionOK).withScalars(0x0000), |
| 281 | StringRef("\x00", 1))); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 282 | |
| 283 | // U+0080 PADDING CHARACTER |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 284 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 285 | ConvertUTFResultContainer(conversionOK).withScalars(0x0080), |
| 286 | "\xc2\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 287 | |
| 288 | // U+0800 SAMARITAN LETTER ALAF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 289 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 290 | ConvertUTFResultContainer(conversionOK).withScalars(0x0800), |
| 291 | "\xe0\xa0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 292 | |
| 293 | // U+10000 LINEAR B SYLLABLE B008 A |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 294 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 295 | ConvertUTFResultContainer(conversionOK).withScalars(0x10000), |
| 296 | "\xf0\x90\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 297 | |
| 298 | // U+200000 (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 299 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 300 | ConvertUTFResultContainer(sourceIllegal) |
| 301 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 302 | "\xf8\x88\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 303 | |
| 304 | // U+4000000 (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 305 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 306 | ConvertUTFResultContainer(sourceIllegal) |
| 307 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 308 | "\xfc\x84\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 309 | |
| 310 | // |
| 311 | // Last possible sequence of a certain length |
| 312 | // |
| 313 | |
| 314 | // U+007F DELETE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 315 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 316 | ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 317 | |
| 318 | // U+07FF (unassigned) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 319 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 320 | ConvertUTFResultContainer(conversionOK).withScalars(0x07ff), |
| 321 | "\xdf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 322 | |
| 323 | // U+FFFF (noncharacter) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 324 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 325 | ConvertUTFResultContainer(conversionOK).withScalars(0xffff), |
| 326 | "\xef\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 327 | |
| 328 | // U+1FFFFF (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 329 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 330 | ConvertUTFResultContainer(sourceIllegal) |
| 331 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 332 | "\xf7\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 333 | |
| 334 | // U+3FFFFFF (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 335 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 336 | ConvertUTFResultContainer(sourceIllegal) |
| 337 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 338 | "\xfb\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 339 | |
| 340 | // U+7FFFFFFF (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 341 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 342 | ConvertUTFResultContainer(sourceIllegal) |
| 343 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 344 | "\xfd\xbf\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 345 | |
| 346 | // |
| 347 | // Other boundary conditions |
| 348 | // |
| 349 | |
| 350 | // U+D7FF (unassigned) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 351 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 352 | ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff), |
| 353 | "\xed\x9f\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 354 | |
| 355 | // U+E000 (private use) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 356 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 357 | ConvertUTFResultContainer(conversionOK).withScalars(0xe000), |
| 358 | "\xee\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 359 | |
| 360 | // U+FFFD REPLACEMENT CHARACTER |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 361 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 362 | ConvertUTFResultContainer(conversionOK).withScalars(0xfffd), |
| 363 | "\xef\xbf\xbd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 364 | |
| 365 | // U+10FFFF (noncharacter) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 366 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 367 | ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), |
| 368 | "\xf4\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 369 | |
| 370 | // U+110000 (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 371 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 372 | ConvertUTFResultContainer(sourceIllegal) |
| 373 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 374 | "\xf4\x90\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 375 | |
| 376 | // |
| 377 | // Unexpected continuation bytes |
| 378 | // |
| 379 | |
| 380 | // A sequence of unexpected continuation bytes that don't follow a first |
| 381 | // byte, every byte is a maximal subpart. |
| 382 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 383 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 384 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80")); |
| 385 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 386 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf")); |
| 387 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 388 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 389 | "\x80\x80")); |
| 390 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 391 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 392 | "\x80\xbf")); |
| 393 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 394 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 395 | "\xbf\x80")); |
| 396 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 397 | ConvertUTFResultContainer(sourceIllegal) |
| 398 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 399 | "\x80\xbf\x80")); |
| 400 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 401 | ConvertUTFResultContainer(sourceIllegal) |
| 402 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 403 | "\x80\xbf\x80\xbf")); |
| 404 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 405 | ConvertUTFResultContainer(sourceIllegal) |
| 406 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 407 | "\x80\xbf\x82\xbf\xaa")); |
| 408 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 409 | ConvertUTFResultContainer(sourceIllegal) |
| 410 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 411 | "\xaa\xb0\xbb\xbf\xaa\xa0")); |
| 412 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 413 | ConvertUTFResultContainer(sourceIllegal) |
| 414 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 415 | "\xaa\xb0\xbb\xbf\xaa\xa0\x8f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 416 | |
| 417 | // All continuation bytes (0x80--0xbf). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 418 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 419 | ConvertUTFResultContainer(sourceIllegal) |
| 420 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 421 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 422 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 423 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 424 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 425 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 426 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 427 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 428 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 429 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 430 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 431 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 432 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 433 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 434 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 435 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 436 | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" |
| 437 | "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" |
| 438 | "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" |
| 439 | "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 440 | |
| 441 | // |
| 442 | // Lonely start bytes |
| 443 | // |
| 444 | |
| 445 | // Start bytes of 2-byte sequences (0xc0--0xdf). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 446 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 447 | ConvertUTFResultContainer(sourceIllegal) |
| 448 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 449 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 450 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 451 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 452 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 453 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 454 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 455 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 456 | "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" |
| 457 | "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 458 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 459 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 460 | ConvertUTFResultContainer(sourceIllegal) |
| 461 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 462 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 463 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 464 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 465 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 466 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 467 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 468 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 469 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 470 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 471 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 472 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 473 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 474 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 475 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 476 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 477 | "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20" |
| 478 | "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20" |
| 479 | "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20" |
| 480 | "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 481 | |
| 482 | // Start bytes of 3-byte sequences (0xe0--0xef). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 483 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 484 | ConvertUTFResultContainer(sourceIllegal) |
| 485 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 486 | 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 487 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 488 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 489 | "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 490 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 491 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 492 | ConvertUTFResultContainer(sourceIllegal) |
| 493 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 494 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 495 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 496 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 497 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 498 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 499 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 500 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 501 | "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20" |
| 502 | "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 503 | |
| 504 | // Start bytes of 4-byte sequences (0xf0--0xf7). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 505 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 506 | ConvertUTFResultContainer(sourceIllegal) |
| 507 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| 508 | 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 509 | "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 510 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 511 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 512 | ConvertUTFResultContainer(sourceIllegal) |
| 513 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 514 | 0xfffd, 0x0020, 0xfffd, 0x0020) |
| 515 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 516 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 517 | "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 518 | |
| 519 | // Start bytes of 5-byte sequences (0xf8--0xfb). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 520 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 521 | ConvertUTFResultContainer(sourceIllegal) |
| 522 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 523 | "\xf8\xf9\xfa\xfb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 524 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 525 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 526 | ConvertUTFResultContainer(sourceIllegal) |
| 527 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 528 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 529 | "\xf8\x20\xf9\x20\xfa\x20\xfb\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 530 | |
| 531 | // Start bytes of 6-byte sequences (0xfc--0xfd). |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 532 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 533 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 534 | "\xfc\xfd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 535 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 536 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 537 | ConvertUTFResultContainer(sourceIllegal) |
| 538 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020), |
| 539 | "\xfc\x20\xfd\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 540 | |
| 541 | // |
| 542 | // Other bytes (0xc0--0xc1, 0xfe--0xff). |
| 543 | // |
| 544 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 545 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 546 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0")); |
| 547 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 548 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1")); |
| 549 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 550 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe")); |
| 551 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 552 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 553 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 554 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 555 | ConvertUTFResultContainer(sourceIllegal) |
| 556 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 557 | "\xc0\xc1\xfe\xff")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 558 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 559 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 560 | ConvertUTFResultContainer(sourceIllegal) |
| 561 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 562 | "\xfe\xfe\xff\xff")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 563 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 564 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 565 | ConvertUTFResultContainer(sourceIllegal) |
| 566 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 567 | "\xfe\x80\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 568 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 569 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 570 | ConvertUTFResultContainer(sourceIllegal) |
| 571 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 572 | "\xff\x80\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 573 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 574 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 575 | ConvertUTFResultContainer(sourceIllegal) |
| 576 | .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, |
| 577 | 0xfffd, 0x0020, 0xfffd, 0x0020), |
| 578 | "\xc0\x20\xc1\x20\xfe\x20\xff\x20")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 579 | |
| 580 | // |
| 581 | // Sequences with one continuation byte missing |
| 582 | // |
| 583 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 584 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 585 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2")); |
| 586 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 587 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf")); |
| 588 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 589 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 590 | "\xe0\xa0")); |
| 591 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 592 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 593 | "\xe0\xbf")); |
| 594 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 595 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 596 | "\xe1\x80")); |
| 597 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 598 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 599 | "\xec\xbf")); |
| 600 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 601 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 602 | "\xed\x80")); |
| 603 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 604 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 605 | "\xed\x9f")); |
| 606 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 607 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 608 | "\xee\x80")); |
| 609 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 610 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 611 | "\xef\xbf")); |
| 612 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 613 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 614 | "\xf0\x90\x80")); |
| 615 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 616 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 617 | "\xf0\xbf\xbf")); |
| 618 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 619 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 620 | "\xf1\x80\x80")); |
| 621 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 622 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 623 | "\xf3\xbf\xbf")); |
| 624 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 625 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 626 | "\xf4\x80\x80")); |
| 627 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 628 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 629 | "\xf4\x8f\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 630 | |
| 631 | // Overlong sequences with one trailing byte missing. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 632 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 633 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 634 | "\xc0")); |
| 635 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 636 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 637 | "\xc1")); |
| 638 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 639 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 640 | "\xe0\x80")); |
| 641 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 642 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 643 | "\xe0\x9f")); |
| 644 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 645 | ConvertUTFResultContainer(sourceIllegal) |
| 646 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 647 | "\xf0\x80\x80")); |
| 648 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 649 | ConvertUTFResultContainer(sourceIllegal) |
| 650 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 651 | "\xf0\x8f\x80")); |
| 652 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 653 | ConvertUTFResultContainer(sourceIllegal) |
| 654 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 655 | "\xf8\x80\x80\x80")); |
| 656 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 657 | ConvertUTFResultContainer(sourceIllegal) |
| 658 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 659 | "\xfc\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 660 | |
| 661 | // Sequences that represent surrogates with one trailing byte missing. |
| 662 | // High surrogates |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 663 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 664 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 665 | "\xed\xa0")); |
| 666 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 667 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 668 | "\xed\xac")); |
| 669 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 670 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 671 | "\xed\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 672 | // Low surrogates |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 673 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 674 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 675 | "\xed\xb0")); |
| 676 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 677 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 678 | "\xed\xb4")); |
| 679 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 680 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 681 | "\xed\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 682 | |
| 683 | // Ill-formed 4-byte sequences. |
| 684 | // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 685 | // U+1100xx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 686 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 687 | ConvertUTFResultContainer(sourceIllegal) |
| 688 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 689 | "\xf4\x90\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 690 | // U+13FBxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 691 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 692 | ConvertUTFResultContainer(sourceIllegal) |
| 693 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 694 | "\xf4\xbf\xbf")); |
| 695 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 696 | ConvertUTFResultContainer(sourceIllegal) |
| 697 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 698 | "\xf5\x80\x80")); |
| 699 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 700 | ConvertUTFResultContainer(sourceIllegal) |
| 701 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 702 | "\xf6\x80\x80")); |
| 703 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 704 | ConvertUTFResultContainer(sourceIllegal) |
| 705 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 706 | "\xf7\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 707 | // U+1FFBxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 708 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 709 | ConvertUTFResultContainer(sourceIllegal) |
| 710 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 711 | "\xf7\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 712 | |
| 713 | // Ill-formed 5-byte sequences. |
| 714 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 715 | // U+2000xx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 716 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 717 | ConvertUTFResultContainer(sourceIllegal) |
| 718 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 719 | "\xf8\x88\x80\x80")); |
| 720 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 721 | ConvertUTFResultContainer(sourceIllegal) |
| 722 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 723 | "\xf8\xbf\xbf\xbf")); |
| 724 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 725 | ConvertUTFResultContainer(sourceIllegal) |
| 726 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 727 | "\xf9\x80\x80\x80")); |
| 728 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 729 | ConvertUTFResultContainer(sourceIllegal) |
| 730 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 731 | "\xfa\x80\x80\x80")); |
| 732 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 733 | ConvertUTFResultContainer(sourceIllegal) |
| 734 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 735 | "\xfb\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 736 | // U+3FFFFxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 737 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 738 | ConvertUTFResultContainer(sourceIllegal) |
| 739 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 740 | "\xfb\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 741 | |
| 742 | // Ill-formed 6-byte sequences. |
| 743 | // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx |
| 744 | // U+40000xx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 745 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 746 | ConvertUTFResultContainer(sourceIllegal) |
| 747 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 748 | "\xfc\x84\x80\x80\x80")); |
| 749 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 750 | ConvertUTFResultContainer(sourceIllegal) |
| 751 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 752 | "\xfc\xbf\xbf\xbf\xbf")); |
| 753 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 754 | ConvertUTFResultContainer(sourceIllegal) |
| 755 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 756 | "\xfd\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 757 | // U+7FFFFFxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 758 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 759 | ConvertUTFResultContainer(sourceIllegal) |
| 760 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 761 | "\xfd\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 762 | |
| 763 | // |
| 764 | // Sequences with two continuation bytes missing |
| 765 | // |
| 766 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 767 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 768 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 769 | "\xf0\x90")); |
| 770 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 771 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 772 | "\xf0\xbf")); |
| 773 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 774 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 775 | "\xf1\x80")); |
| 776 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 777 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 778 | "\xf3\xbf")); |
| 779 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 780 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 781 | "\xf4\x80")); |
| 782 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 783 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), |
| 784 | "\xf4\x8f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 785 | |
| 786 | // Overlong sequences with two trailing byte missing. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 787 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 788 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0")); |
| 789 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 790 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 791 | "\xf0\x80")); |
| 792 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 793 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 794 | "\xf0\x8f")); |
| 795 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 796 | ConvertUTFResultContainer(sourceIllegal) |
| 797 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 798 | "\xf8\x80\x80")); |
| 799 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 800 | ConvertUTFResultContainer(sourceIllegal) |
| 801 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 802 | "\xfc\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 803 | |
| 804 | // Sequences that represent surrogates with two trailing bytes missing. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 805 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 806 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 807 | |
| 808 | // Ill-formed 4-byte sequences. |
| 809 | // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 810 | // U+110yxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 811 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 812 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 813 | "\xf4\x90")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 814 | // U+13Fyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 815 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 816 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 817 | "\xf4\xbf")); |
| 818 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 819 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 820 | "\xf5\x80")); |
| 821 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 822 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 823 | "\xf6\x80")); |
| 824 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 825 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 826 | "\xf7\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 827 | // U+1FFyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 828 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 829 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 830 | "\xf7\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 831 | |
| 832 | // Ill-formed 5-byte sequences. |
| 833 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 834 | // U+200yxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 835 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 836 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 837 | "\xf8\x88\x80")); |
| 838 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 839 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 840 | "\xf8\xbf\xbf")); |
| 841 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 842 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 843 | "\xf9\x80\x80")); |
| 844 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 845 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 846 | "\xfa\x80\x80")); |
| 847 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 848 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 849 | "\xfb\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 850 | // U+3FFFyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 851 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 852 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 853 | "\xfb\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 854 | |
| 855 | // Ill-formed 6-byte sequences. |
| 856 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 857 | // U+4000yxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 858 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 859 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 860 | "\xfc\x84\x80\x80")); |
| 861 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 862 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 863 | "\xfc\xbf\xbf\xbf")); |
| 864 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 865 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 866 | "\xfd\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 867 | // U+7FFFFyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 868 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 869 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 870 | "\xfd\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 871 | |
| 872 | // |
| 873 | // Sequences with three continuation bytes missing |
| 874 | // |
| 875 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 876 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 877 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); |
| 878 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 879 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1")); |
| 880 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 881 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2")); |
| 882 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 883 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3")); |
| 884 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 885 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 886 | |
| 887 | // Broken overlong sequences. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 888 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 889 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); |
| 890 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 891 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 892 | "\xf8\x80")); |
| 893 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 894 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 895 | "\xfc\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 896 | |
| 897 | // Ill-formed 4-byte sequences. |
| 898 | // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 899 | // U+14yyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 900 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 901 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5")); |
| 902 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 903 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 904 | // U+1Cyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 905 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 906 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 907 | |
| 908 | // Ill-formed 5-byte sequences. |
| 909 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 910 | // U+20yyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 911 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 912 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 913 | "\xf8\x88")); |
| 914 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 915 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 916 | "\xf8\xbf")); |
| 917 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 918 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 919 | "\xf9\x80")); |
| 920 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 921 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 922 | "\xfa\x80")); |
| 923 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 924 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 925 | "\xfb\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 926 | // U+3FCyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 927 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 928 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 929 | "\xfb\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 930 | |
| 931 | // Ill-formed 6-byte sequences. |
| 932 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 933 | // U+400yyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 934 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 935 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 936 | "\xfc\x84\x80")); |
| 937 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 938 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 939 | "\xfc\xbf\xbf")); |
| 940 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 941 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 942 | "\xfd\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 943 | // U+7FFCyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 944 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 945 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), |
| 946 | "\xfd\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 947 | |
| 948 | // |
| 949 | // Sequences with four continuation bytes missing |
| 950 | // |
| 951 | |
| 952 | // Ill-formed 5-byte sequences. |
| 953 | // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 954 | // U+uzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 955 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 956 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); |
| 957 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 958 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9")); |
| 959 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 960 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa")); |
| 961 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 962 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 963 | // U+3zyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 964 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 965 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 966 | |
| 967 | // Broken overlong sequences. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 968 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 969 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); |
| 970 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 971 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 972 | "\xfc\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 973 | |
| 974 | // Ill-formed 6-byte sequences. |
| 975 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 976 | // U+uzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 977 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 978 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 979 | "\xfc\x84")); |
| 980 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 981 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 982 | "\xfc\xbf")); |
| 983 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 984 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 985 | "\xfd\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 986 | // U+7Fzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 987 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 988 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 989 | "\xfd\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 990 | |
| 991 | // |
| 992 | // Sequences with five continuation bytes missing |
| 993 | // |
| 994 | |
| 995 | // Ill-formed 6-byte sequences. |
| 996 | // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx |
| 997 | // U+uzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 998 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 999 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1000 | // U+uuzzyyxx (invalid) |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1001 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1002 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1003 | |
| 1004 | // |
| 1005 | // Consecutive sequences with trailing bytes missing |
| 1006 | // |
| 1007 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1008 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1009 | ConvertUTFResultContainer(sourceIllegal) |
| 1010 | .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) |
| 1011 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 1012 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 1013 | .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) |
| 1014 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) |
| 1015 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1016 | "\xc0" "\xe0\x80" "\xf0\x80\x80" |
| 1017 | "\xf8\x80\x80\x80" |
| 1018 | "\xfc\x80\x80\x80\x80" |
| 1019 | "\xdf" "\xef\xbf" "\xf7\xbf\xbf" |
| 1020 | "\xfb\xbf\xbf\xbf" |
| 1021 | "\xfd\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1022 | |
| 1023 | // |
| 1024 | // Overlong UTF-8 sequences |
| 1025 | // |
| 1026 | |
| 1027 | // U+002F SOLIDUS |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1028 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1029 | ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1030 | |
| 1031 | // Overlong sequences of the above. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1032 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1033 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1034 | "\xc0\xaf")); |
| 1035 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1036 | ConvertUTFResultContainer(sourceIllegal) |
| 1037 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1038 | "\xe0\x80\xaf")); |
| 1039 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1040 | ConvertUTFResultContainer(sourceIllegal) |
| 1041 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1042 | "\xf0\x80\x80\xaf")); |
| 1043 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1044 | ConvertUTFResultContainer(sourceIllegal) |
| 1045 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1046 | "\xf8\x80\x80\x80\xaf")); |
| 1047 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1048 | ConvertUTFResultContainer(sourceIllegal) |
| 1049 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1050 | "\xfc\x80\x80\x80\x80\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1051 | |
| 1052 | // U+0000 NULL |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1053 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1054 | ConvertUTFResultContainer(conversionOK).withScalars(0x0000), |
| 1055 | StringRef("\x00", 1))); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1056 | |
| 1057 | // Overlong sequences of the above. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1058 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1059 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1060 | "\xc0\x80")); |
| 1061 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1062 | ConvertUTFResultContainer(sourceIllegal) |
| 1063 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1064 | "\xe0\x80\x80")); |
| 1065 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1066 | ConvertUTFResultContainer(sourceIllegal) |
| 1067 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1068 | "\xf0\x80\x80\x80")); |
| 1069 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1070 | ConvertUTFResultContainer(sourceIllegal) |
| 1071 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1072 | "\xf8\x80\x80\x80\x80")); |
| 1073 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1074 | ConvertUTFResultContainer(sourceIllegal) |
| 1075 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1076 | "\xfc\x80\x80\x80\x80\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1077 | |
| 1078 | // Other overlong sequences. |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1079 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1080 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1081 | "\xc0\xbf")); |
| 1082 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1083 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1084 | "\xc1\x80")); |
| 1085 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1086 | ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), |
| 1087 | "\xc1\xbf")); |
| 1088 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1089 | ConvertUTFResultContainer(sourceIllegal) |
| 1090 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1091 | "\xe0\x9f\xbf")); |
| 1092 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1093 | ConvertUTFResultContainer(sourceIllegal) |
| 1094 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1095 | "\xed\xa0\x80")); |
| 1096 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1097 | ConvertUTFResultContainer(sourceIllegal) |
| 1098 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1099 | "\xed\xbf\xbf")); |
| 1100 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1101 | ConvertUTFResultContainer(sourceIllegal) |
| 1102 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1103 | "\xf0\x8f\x80\x80")); |
| 1104 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1105 | ConvertUTFResultContainer(sourceIllegal) |
| 1106 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1107 | "\xf0\x8f\xbf\xbf")); |
| 1108 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1109 | ConvertUTFResultContainer(sourceIllegal) |
| 1110 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1111 | "\xf8\x87\xbf\xbf\xbf")); |
| 1112 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1113 | ConvertUTFResultContainer(sourceIllegal) |
| 1114 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1115 | "\xfc\x83\xbf\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1116 | |
| 1117 | // |
| 1118 | // Isolated surrogates |
| 1119 | // |
| 1120 | |
| 1121 | // Unicode 6.3.0: |
| 1122 | // |
| 1123 | // D71. High-surrogate code point: A Unicode code point in the range |
| 1124 | // U+D800 to U+DBFF. |
| 1125 | // |
| 1126 | // D73. Low-surrogate code point: A Unicode code point in the range |
| 1127 | // U+DC00 to U+DFFF. |
| 1128 | |
| 1129 | // Note: U+E0100 is <DB40 DD00> in UTF16. |
| 1130 | |
| 1131 | // High surrogates |
| 1132 | |
| 1133 | // U+D800 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1134 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1135 | ConvertUTFResultContainer(sourceIllegal) |
| 1136 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1137 | "\xed\xa0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1138 | |
| 1139 | // U+DB40 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1140 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1141 | ConvertUTFResultContainer(sourceIllegal) |
| 1142 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1143 | "\xed\xac\xa0")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1144 | |
| 1145 | // U+DBFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1146 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1147 | ConvertUTFResultContainer(sourceIllegal) |
| 1148 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1149 | "\xed\xaf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1150 | |
| 1151 | // Low surrogates |
| 1152 | |
| 1153 | // U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1154 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1155 | ConvertUTFResultContainer(sourceIllegal) |
| 1156 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1157 | "\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1158 | |
| 1159 | // U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1160 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1161 | ConvertUTFResultContainer(sourceIllegal) |
| 1162 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1163 | "\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1164 | |
| 1165 | // U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1166 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1167 | ConvertUTFResultContainer(sourceIllegal) |
| 1168 | .withScalars(0xfffd, 0xfffd, 0xfffd), |
| 1169 | "\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1170 | |
| 1171 | // Surrogate pairs |
| 1172 | |
| 1173 | // U+D800 U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1174 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1175 | ConvertUTFResultContainer(sourceIllegal) |
| 1176 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1177 | "\xed\xa0\x80\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1178 | |
| 1179 | // U+D800 U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1180 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1181 | ConvertUTFResultContainer(sourceIllegal) |
| 1182 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1183 | "\xed\xa0\x80\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1184 | |
| 1185 | // U+D800 U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1186 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1187 | ConvertUTFResultContainer(sourceIllegal) |
| 1188 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1189 | "\xed\xa0\x80\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1190 | |
| 1191 | // U+DB40 U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1192 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1193 | ConvertUTFResultContainer(sourceIllegal) |
| 1194 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1195 | "\xed\xac\xa0\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1196 | |
| 1197 | // U+DB40 U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1198 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1199 | ConvertUTFResultContainer(sourceIllegal) |
| 1200 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1201 | "\xed\xac\xa0\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1202 | |
| 1203 | // U+DB40 U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1204 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1205 | ConvertUTFResultContainer(sourceIllegal) |
| 1206 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1207 | "\xed\xac\xa0\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1208 | |
| 1209 | // U+DBFF U+DC00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1210 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1211 | ConvertUTFResultContainer(sourceIllegal) |
| 1212 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1213 | "\xed\xaf\xbf\xed\xb0\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1214 | |
| 1215 | // U+DBFF U+DD00 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1216 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1217 | ConvertUTFResultContainer(sourceIllegal) |
| 1218 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1219 | "\xed\xaf\xbf\xed\xb4\x80")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1220 | |
| 1221 | // U+DBFF U+DFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1222 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1223 | ConvertUTFResultContainer(sourceIllegal) |
| 1224 | .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), |
| 1225 | "\xed\xaf\xbf\xed\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1226 | |
| 1227 | // |
| 1228 | // Noncharacters |
| 1229 | // |
| 1230 | |
| 1231 | // Unicode 6.3.0: |
| 1232 | // |
| 1233 | // D14. Noncharacter: A code point that is permanently reserved for |
| 1234 | // internal use and that should never be interchanged. Noncharacters |
| 1235 | // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016) |
| 1236 | // and the values U+FDD0..U+FDEF. |
| 1237 | |
| 1238 | // U+FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1239 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1240 | ConvertUTFResultContainer(conversionOK).withScalars(0xfffe), |
| 1241 | "\xef\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1242 | |
| 1243 | // U+FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1244 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1245 | ConvertUTFResultContainer(conversionOK).withScalars(0xffff), |
| 1246 | "\xef\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1247 | |
| 1248 | // U+1FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1249 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1250 | ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe), |
| 1251 | "\xf0\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1252 | |
| 1253 | // U+1FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1254 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1255 | ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff), |
| 1256 | "\xf0\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1257 | |
| 1258 | // U+2FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1259 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1260 | ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe), |
| 1261 | "\xf0\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1262 | |
| 1263 | // U+2FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1264 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1265 | ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff), |
| 1266 | "\xf0\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1267 | |
| 1268 | // U+3FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1269 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1270 | ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe), |
| 1271 | "\xf0\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1272 | |
| 1273 | // U+3FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1274 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1275 | ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff), |
| 1276 | "\xf0\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1277 | |
| 1278 | // U+4FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1279 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1280 | ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe), |
| 1281 | "\xf1\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1282 | |
| 1283 | // U+4FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1284 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1285 | ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff), |
| 1286 | "\xf1\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1287 | |
| 1288 | // U+5FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1289 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1290 | ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe), |
| 1291 | "\xf1\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1292 | |
| 1293 | // U+5FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1294 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1295 | ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff), |
| 1296 | "\xf1\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1297 | |
| 1298 | // U+6FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1299 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1300 | ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe), |
| 1301 | "\xf1\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1302 | |
| 1303 | // U+6FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1304 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1305 | ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff), |
| 1306 | "\xf1\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1307 | |
| 1308 | // U+7FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1309 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1310 | ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe), |
| 1311 | "\xf1\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1312 | |
| 1313 | // U+7FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1314 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1315 | ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff), |
| 1316 | "\xf1\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1317 | |
| 1318 | // U+8FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1319 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1320 | ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe), |
| 1321 | "\xf2\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1322 | |
| 1323 | // U+8FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1324 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1325 | ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff), |
| 1326 | "\xf2\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1327 | |
| 1328 | // U+9FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1329 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1330 | ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe), |
| 1331 | "\xf2\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1332 | |
| 1333 | // U+9FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1334 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1335 | ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff), |
| 1336 | "\xf2\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1337 | |
| 1338 | // U+AFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1339 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1340 | ConvertUTFResultContainer(conversionOK).withScalars(0xafffe), |
| 1341 | "\xf2\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1342 | |
| 1343 | // U+AFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1344 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1345 | ConvertUTFResultContainer(conversionOK).withScalars(0xaffff), |
| 1346 | "\xf2\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1347 | |
| 1348 | // U+BFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1349 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1350 | ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe), |
| 1351 | "\xf2\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1352 | |
| 1353 | // U+BFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1354 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1355 | ConvertUTFResultContainer(conversionOK).withScalars(0xbffff), |
| 1356 | "\xf2\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1357 | |
| 1358 | // U+CFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1359 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1360 | ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe), |
| 1361 | "\xf3\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1362 | |
| 1363 | // U+CFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1364 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1365 | ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF), |
| 1366 | "\xf3\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1367 | |
| 1368 | // U+DFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1369 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1370 | ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe), |
| 1371 | "\xf3\x9f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1372 | |
| 1373 | // U+DFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1374 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1375 | ConvertUTFResultContainer(conversionOK).withScalars(0xdffff), |
| 1376 | "\xf3\x9f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1377 | |
| 1378 | // U+EFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1379 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1380 | ConvertUTFResultContainer(conversionOK).withScalars(0xefffe), |
| 1381 | "\xf3\xaf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1382 | |
| 1383 | // U+EFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1384 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1385 | ConvertUTFResultContainer(conversionOK).withScalars(0xeffff), |
| 1386 | "\xf3\xaf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1387 | |
| 1388 | // U+FFFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1389 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1390 | ConvertUTFResultContainer(conversionOK).withScalars(0xffffe), |
| 1391 | "\xf3\xbf\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1392 | |
| 1393 | // U+FFFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1394 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1395 | ConvertUTFResultContainer(conversionOK).withScalars(0xfffff), |
| 1396 | "\xf3\xbf\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1397 | |
| 1398 | // U+10FFFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1399 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1400 | ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe), |
| 1401 | "\xf4\x8f\xbf\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1402 | |
| 1403 | // U+10FFFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1404 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1405 | ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), |
| 1406 | "\xf4\x8f\xbf\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1407 | |
| 1408 | // U+FDD0 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1409 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1410 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0), |
| 1411 | "\xef\xb7\x90")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1412 | |
| 1413 | // U+FDD1 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1414 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1415 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1), |
| 1416 | "\xef\xb7\x91")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1417 | |
| 1418 | // U+FDD2 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1419 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1420 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2), |
| 1421 | "\xef\xb7\x92")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1422 | |
| 1423 | // U+FDD3 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1424 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1425 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3), |
| 1426 | "\xef\xb7\x93")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1427 | |
| 1428 | // U+FDD4 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1429 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1430 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4), |
| 1431 | "\xef\xb7\x94")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1432 | |
| 1433 | // U+FDD5 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1434 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1435 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5), |
| 1436 | "\xef\xb7\x95")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1437 | |
| 1438 | // U+FDD6 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1439 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1440 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6), |
| 1441 | "\xef\xb7\x96")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1442 | |
| 1443 | // U+FDD7 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1444 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1445 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7), |
| 1446 | "\xef\xb7\x97")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1447 | |
| 1448 | // U+FDD8 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1449 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1450 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8), |
| 1451 | "\xef\xb7\x98")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1452 | |
| 1453 | // U+FDD9 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1454 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1455 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9), |
| 1456 | "\xef\xb7\x99")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1457 | |
| 1458 | // U+FDDA |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1459 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1460 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdda), |
| 1461 | "\xef\xb7\x9a")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1462 | |
| 1463 | // U+FDDB |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1464 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1465 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddb), |
| 1466 | "\xef\xb7\x9b")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1467 | |
| 1468 | // U+FDDC |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1469 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1470 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddc), |
| 1471 | "\xef\xb7\x9c")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1472 | |
| 1473 | // U+FDDD |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1474 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1475 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddd), |
| 1476 | "\xef\xb7\x9d")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1477 | |
| 1478 | // U+FDDE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1479 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1480 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdde), |
| 1481 | "\xef\xb7\x9e")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1482 | |
| 1483 | // U+FDDF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1484 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1485 | ConvertUTFResultContainer(conversionOK).withScalars(0xfddf), |
| 1486 | "\xef\xb7\x9f")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1487 | |
| 1488 | // U+FDE0 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1489 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1490 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde0), |
| 1491 | "\xef\xb7\xa0")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1492 | |
| 1493 | // U+FDE1 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1494 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1495 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde1), |
| 1496 | "\xef\xb7\xa1")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1497 | |
| 1498 | // U+FDE2 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1499 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1500 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde2), |
| 1501 | "\xef\xb7\xa2")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1502 | |
| 1503 | // U+FDE3 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1504 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1505 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde3), |
| 1506 | "\xef\xb7\xa3")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1507 | |
| 1508 | // U+FDE4 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1509 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1510 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde4), |
| 1511 | "\xef\xb7\xa4")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1512 | |
| 1513 | // U+FDE5 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1514 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1515 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde5), |
| 1516 | "\xef\xb7\xa5")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1517 | |
| 1518 | // U+FDE6 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1519 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1520 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde6), |
| 1521 | "\xef\xb7\xa6")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1522 | |
| 1523 | // U+FDE7 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1524 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1525 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde7), |
| 1526 | "\xef\xb7\xa7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1527 | |
| 1528 | // U+FDE8 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1529 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1530 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde8), |
| 1531 | "\xef\xb7\xa8")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1532 | |
| 1533 | // U+FDE9 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1534 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1535 | ConvertUTFResultContainer(conversionOK).withScalars(0xfde9), |
| 1536 | "\xef\xb7\xa9")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1537 | |
| 1538 | // U+FDEA |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1539 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1540 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdea), |
| 1541 | "\xef\xb7\xaa")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1542 | |
| 1543 | // U+FDEB |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1544 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1545 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb), |
| 1546 | "\xef\xb7\xab")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1547 | |
| 1548 | // U+FDEC |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1549 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1550 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdec), |
| 1551 | "\xef\xb7\xac")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1552 | |
| 1553 | // U+FDED |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1554 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1555 | ConvertUTFResultContainer(conversionOK).withScalars(0xfded), |
| 1556 | "\xef\xb7\xad")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1557 | |
| 1558 | // U+FDEE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1559 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1560 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdee), |
| 1561 | "\xef\xb7\xae")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1562 | |
| 1563 | // U+FDEF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1564 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1565 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdef), |
| 1566 | "\xef\xb7\xaf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1567 | |
| 1568 | // U+FDF0 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1569 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1570 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0), |
| 1571 | "\xef\xb7\xb0")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1572 | |
| 1573 | // U+FDF1 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1574 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1575 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1), |
| 1576 | "\xef\xb7\xb1")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1577 | |
| 1578 | // U+FDF2 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1579 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1580 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2), |
| 1581 | "\xef\xb7\xb2")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1582 | |
| 1583 | // U+FDF3 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1584 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1585 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3), |
| 1586 | "\xef\xb7\xb3")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1587 | |
| 1588 | // U+FDF4 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1589 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1590 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4), |
| 1591 | "\xef\xb7\xb4")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1592 | |
| 1593 | // U+FDF5 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1594 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1595 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5), |
| 1596 | "\xef\xb7\xb5")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1597 | |
| 1598 | // U+FDF6 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1599 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1600 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6), |
| 1601 | "\xef\xb7\xb6")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1602 | |
| 1603 | // U+FDF7 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1604 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1605 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7), |
| 1606 | "\xef\xb7\xb7")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1607 | |
| 1608 | // U+FDF8 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1609 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1610 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8), |
| 1611 | "\xef\xb7\xb8")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1612 | |
| 1613 | // U+FDF9 |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1614 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1615 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9), |
| 1616 | "\xef\xb7\xb9")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1617 | |
| 1618 | // U+FDFA |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1619 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1620 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa), |
| 1621 | "\xef\xb7\xba")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1622 | |
| 1623 | // U+FDFB |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1624 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1625 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb), |
| 1626 | "\xef\xb7\xbb")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1627 | |
| 1628 | // U+FDFC |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1629 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1630 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc), |
| 1631 | "\xef\xb7\xbc")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1632 | |
| 1633 | // U+FDFD |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1634 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1635 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd), |
| 1636 | "\xef\xb7\xbd")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1637 | |
| 1638 | // U+FDFE |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1639 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1640 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe), |
| 1641 | "\xef\xb7\xbe")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1642 | |
| 1643 | // U+FDFF |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1644 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1645 | ConvertUTFResultContainer(conversionOK).withScalars(0xfdff), |
| 1646 | "\xef\xb7\xbf")); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1647 | } |
| 1648 | |
| 1649 | TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) { |
| 1650 | // U+0041 LATIN CAPITAL LETTER A |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1651 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1652 | ConvertUTFResultContainer(conversionOK).withScalars(0x0041), |
| 1653 | "\x41", true)); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1654 | |
| 1655 | // |
| 1656 | // Sequences with one continuation byte missing |
| 1657 | // |
| 1658 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1659 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1660 | ConvertUTFResultContainer(sourceExhausted), |
| 1661 | "\xc2", true)); |
| 1662 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1663 | ConvertUTFResultContainer(sourceExhausted), |
| 1664 | "\xdf", true)); |
| 1665 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1666 | ConvertUTFResultContainer(sourceExhausted), |
| 1667 | "\xe0\xa0", true)); |
| 1668 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1669 | ConvertUTFResultContainer(sourceExhausted), |
| 1670 | "\xe0\xbf", true)); |
| 1671 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1672 | ConvertUTFResultContainer(sourceExhausted), |
| 1673 | "\xe1\x80", true)); |
| 1674 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1675 | ConvertUTFResultContainer(sourceExhausted), |
| 1676 | "\xec\xbf", true)); |
| 1677 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1678 | ConvertUTFResultContainer(sourceExhausted), |
| 1679 | "\xed\x80", true)); |
| 1680 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1681 | ConvertUTFResultContainer(sourceExhausted), |
| 1682 | "\xed\x9f", true)); |
| 1683 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1684 | ConvertUTFResultContainer(sourceExhausted), |
| 1685 | "\xee\x80", true)); |
| 1686 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1687 | ConvertUTFResultContainer(sourceExhausted), |
| 1688 | "\xef\xbf", true)); |
| 1689 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1690 | ConvertUTFResultContainer(sourceExhausted), |
| 1691 | "\xf0\x90\x80", true)); |
| 1692 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1693 | ConvertUTFResultContainer(sourceExhausted), |
| 1694 | "\xf0\xbf\xbf", true)); |
| 1695 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1696 | ConvertUTFResultContainer(sourceExhausted), |
| 1697 | "\xf1\x80\x80", true)); |
| 1698 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1699 | ConvertUTFResultContainer(sourceExhausted), |
| 1700 | "\xf3\xbf\xbf", true)); |
| 1701 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1702 | ConvertUTFResultContainer(sourceExhausted), |
| 1703 | "\xf4\x80\x80", true)); |
| 1704 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1705 | ConvertUTFResultContainer(sourceExhausted), |
| 1706 | "\xf4\x8f\xbf", true)); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1707 | |
Dmitri Gribenko | ebdd0a5 | 2014-06-17 09:33:24 +0000 | [diff] [blame] | 1708 | EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( |
| 1709 | ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041), |
| 1710 | "\x41\xc2", true)); |
Dmitri Gribenko | 1089db0 | 2014-06-16 11:09:46 +0000 | [diff] [blame] | 1711 | } |
| 1712 | |