license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 4 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 5 | #include <math.h> |
mmentovai@google.com | ae034d1 | 2008-08-13 04:23:14 +0900 | [diff] [blame] | 6 | #include <stdarg.h> |
mmentovai@google.com | ec644e4 | 2008-08-13 03:48:58 +0900 | [diff] [blame] | 7 | |
deanm@google.com | d649845 | 2008-08-13 20:09:33 +0900 | [diff] [blame] | 8 | #include <limits> |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 9 | #include <sstream> |
| 10 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 11 | #include "base/basictypes.h" |
| 12 | #include "base/logging.h" |
| 13 | #include "base/string_util.h" |
| 14 | #include "testing/gtest/include/gtest/gtest.h" |
| 15 | |
| 16 | namespace { |
| 17 | } |
| 18 | |
| 19 | static const struct trim_case { |
| 20 | const wchar_t* input; |
| 21 | const TrimPositions positions; |
| 22 | const wchar_t* output; |
| 23 | const TrimPositions return_value; |
| 24 | } trim_cases[] = { |
| 25 | {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, |
| 26 | {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, |
| 27 | {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, |
| 28 | {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, |
| 29 | {L"", TRIM_ALL, L"", TRIM_NONE}, |
| 30 | {L" ", TRIM_LEADING, L"", TRIM_LEADING}, |
| 31 | {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, |
| 32 | {L" ", TRIM_ALL, L"", TRIM_ALL}, |
| 33 | {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, |
| 34 | {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, |
| 35 | }; |
| 36 | |
| 37 | static const struct trim_case_ascii { |
| 38 | const char* input; |
| 39 | const TrimPositions positions; |
| 40 | const char* output; |
| 41 | const TrimPositions return_value; |
| 42 | } trim_cases_ascii[] = { |
| 43 | {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, |
| 44 | {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, |
| 45 | {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, |
| 46 | {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, |
| 47 | {"", TRIM_ALL, "", TRIM_NONE}, |
| 48 | {" ", TRIM_LEADING, "", TRIM_LEADING}, |
| 49 | {" ", TRIM_TRAILING, "", TRIM_TRAILING}, |
| 50 | {" ", TRIM_ALL, "", TRIM_ALL}, |
| 51 | {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, |
| 52 | {"\x85Test String\xa0\x20", TRIM_ALL, "Test String", TRIM_ALL}, |
| 53 | }; |
| 54 | |
| 55 | TEST(StringUtilTest, TrimWhitespace) { |
| 56 | std::wstring output; // Allow contents to carry over to next testcase |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 57 | for (size_t i = 0; i < arraysize(trim_cases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 58 | const trim_case& value = trim_cases[i]; |
| 59 | EXPECT_EQ(value.return_value, |
| 60 | TrimWhitespace(value.input, value.positions, &output)); |
| 61 | EXPECT_EQ(value.output, output); |
| 62 | } |
| 63 | |
| 64 | // Test that TrimWhitespace() can take the same string for input and output |
| 65 | output = L" This is a test \r\n"; |
| 66 | EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); |
| 67 | EXPECT_EQ(L"This is a test", output); |
| 68 | |
| 69 | // Once more, but with a string of whitespace |
| 70 | output = L" \r\n"; |
| 71 | EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); |
| 72 | EXPECT_EQ(L"", output); |
| 73 | |
| 74 | std::string output_ascii; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 75 | for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 76 | const trim_case_ascii& value = trim_cases_ascii[i]; |
| 77 | EXPECT_EQ(value.return_value, |
| 78 | TrimWhitespace(value.input, value.positions, &output_ascii)); |
| 79 | EXPECT_EQ(value.output, output_ascii); |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | static const struct collapse_case { |
| 84 | const wchar_t* input; |
| 85 | const bool trim; |
| 86 | const wchar_t* output; |
| 87 | } collapse_cases[] = { |
| 88 | {L" Google Video ", false, L"Google Video"}, |
| 89 | {L"Google Video", false, L"Google Video"}, |
| 90 | {L"", false, L""}, |
| 91 | {L" ", false, L""}, |
| 92 | {L"\t\rTest String\n", false, L"Test String"}, |
| 93 | {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, |
| 94 | {L" Test \n \t String ", false, L"Test String"}, |
| 95 | {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, |
| 96 | {L" Test String", false, L"Test String"}, |
| 97 | {L"Test String ", false, L"Test String"}, |
| 98 | {L"Test String", false, L"Test String"}, |
| 99 | {L"", true, L""}, |
| 100 | {L"\n", true, L""}, |
| 101 | {L" \r ", true, L""}, |
| 102 | {L"\nFoo", true, L"Foo"}, |
| 103 | {L"\r Foo ", true, L"Foo"}, |
| 104 | {L" Foo bar ", true, L"Foo bar"}, |
| 105 | {L" \tFoo bar \n", true, L"Foo bar"}, |
| 106 | {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, |
| 107 | }; |
| 108 | |
| 109 | TEST(StringUtilTest, CollapseWhitespace) { |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 110 | for (size_t i = 0; i < arraysize(collapse_cases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 111 | const collapse_case& value = collapse_cases[i]; |
| 112 | EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | static const wchar_t* const kConvertRoundtripCases[] = { |
| 117 | L"Google Video", |
| 118 | // "网页 图片 资讯更多 »" |
| 119 | L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", |
| 120 | // "Παγκόσμιος Ιστός" |
| 121 | L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" |
| 122 | L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", |
| 123 | // "Поиск страниц на русском" |
| 124 | L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" |
| 125 | L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" |
| 126 | L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", |
| 127 | // "전체서비스" |
| 128 | L"\xc804\xccb4\xc11c\xbe44\xc2a4", |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 129 | |
| 130 | // Test characters that take more than 16 bits. This will depend on whether |
| 131 | // wchar_t is 16 or 32 bits. |
| 132 | #if defined(WCHAR_T_IS_UTF16) |
| 133 | L"\xd800\xdf00", |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 134 | // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) |
| 135 | L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 136 | #elif defined(WCHAR_T_IS_UTF32) |
| 137 | L"\x10300", |
| 138 | // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) |
| 139 | L"\x11d40\x11d41\x11d42\x11d43\x11d44", |
| 140 | #endif |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 141 | }; |
| 142 | |
| 143 | TEST(StringUtilTest, ConvertUTF8AndWide) { |
| 144 | // we round-trip all the wide strings through UTF-8 to make sure everything |
| 145 | // agrees on the conversion. This uses the stream operators to test them |
| 146 | // simultaneously. |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 147 | for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 148 | std::ostringstream utf8; |
| 149 | utf8 << WideToUTF8(kConvertRoundtripCases[i]); |
| 150 | std::wostringstream wide; |
| 151 | wide << UTF8ToWide(utf8.str()); |
| 152 | |
| 153 | EXPECT_EQ(kConvertRoundtripCases[i], wide.str()); |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | TEST(StringUtilTest, ConvertUTF8AndWideEmptyString) { |
| 158 | // An empty std::wstring should be converted to an empty std::string, |
| 159 | // and vice versa. |
| 160 | std::wstring wempty; |
| 161 | std::string empty; |
| 162 | EXPECT_EQ(empty, WideToUTF8(wempty)); |
| 163 | EXPECT_EQ(wempty, UTF8ToWide(empty)); |
| 164 | } |
| 165 | |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 166 | TEST(StringUtilTest, ConvertUTF8ToWide) { |
| 167 | struct UTF8ToWideCase { |
| 168 | const char* utf8; |
| 169 | const wchar_t* wide; |
| 170 | bool success; |
| 171 | } convert_cases[] = { |
| 172 | // Regular UTF-8 input. |
| 173 | {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, |
| 174 | // Invalid Unicode code point. |
| 175 | {"\xef\xbf\xbfHello", L"Hello", false}, |
| 176 | // Truncated UTF-8 sequence. |
| 177 | {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false}, |
| 178 | // Truncated off the end. |
| 179 | {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false}, |
| 180 | // Non-shortest-form UTF-8. |
| 181 | {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false}, |
| 182 | // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. |
| 183 | {"\xed\xb0\x80", L"", false}, |
brettw@google.com | fa49905 | 2008-08-08 05:27:57 +0900 | [diff] [blame] | 184 | // Non-BMP character. The result will either be in UTF-16 or UTF-32. |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 185 | #if defined(WCHAR_T_IS_UTF16) |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 186 | {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 187 | #elif defined(WCHAR_T_IS_UTF32) |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 188 | {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, |
| 189 | #endif |
| 190 | }; |
| 191 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 192 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 193 | std::wstring converted; |
| 194 | EXPECT_EQ(convert_cases[i].success, |
| 195 | UTF8ToWide(convert_cases[i].utf8, |
| 196 | strlen(convert_cases[i].utf8), |
| 197 | &converted)); |
| 198 | std::wstring expected(convert_cases[i].wide); |
| 199 | EXPECT_EQ(expected, converted); |
| 200 | } |
| 201 | |
| 202 | // Manually test an embedded NULL. |
| 203 | std::wstring converted; |
| 204 | EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted)); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 205 | ASSERT_EQ(3U, converted.length()); |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 206 | EXPECT_EQ(0, converted[0]); |
| 207 | EXPECT_EQ('Z', converted[1]); |
| 208 | EXPECT_EQ('\t', converted[2]); |
| 209 | |
| 210 | // Make sure that conversion replaces, not appends. |
| 211 | EXPECT_TRUE(UTF8ToWide("B", 1, &converted)); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 212 | ASSERT_EQ(1U, converted.length()); |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 213 | EXPECT_EQ('B', converted[0]); |
| 214 | } |
| 215 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 216 | #if defined(WCHAR_T_IS_UTF16) |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 217 | // This test is only valid when wchar_t == UTF-16. |
| 218 | TEST(StringUtilTest, ConvertUTF16ToUTF8) { |
| 219 | struct UTF16ToUTF8Case { |
| 220 | const wchar_t* utf16; |
| 221 | const char* utf8; |
| 222 | bool success; |
| 223 | } convert_cases[] = { |
| 224 | // Regular UTF-16 input. |
| 225 | {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
| 226 | // Test a non-BMP character. |
| 227 | {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, |
| 228 | // Invalid Unicode code point. |
| 229 | {L"\xffffHello", "Hello", false}, |
| 230 | // The first character is a truncated UTF-16 character. |
| 231 | {L"\xd800\x597d", "\xe5\xa5\xbd", false}, |
| 232 | // Truncated at the end. |
| 233 | {L"\x597d\xd800", "\xe5\xa5\xbd", false}, |
| 234 | }; |
| 235 | |
| 236 | for (int i = 0; i < arraysize(convert_cases); i++) { |
| 237 | std::string converted; |
| 238 | EXPECT_EQ(convert_cases[i].success, |
| 239 | WideToUTF8(convert_cases[i].utf16, |
| 240 | wcslen(convert_cases[i].utf16), |
| 241 | &converted)); |
| 242 | std::string expected(convert_cases[i].utf8); |
| 243 | EXPECT_EQ(expected, converted); |
| 244 | } |
| 245 | } |
| 246 | |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 247 | #elif defined(WCHAR_T_IS_UTF32) |
brettw@google.com | fa49905 | 2008-08-08 05:27:57 +0900 | [diff] [blame] | 248 | // This test is only valid when wchar_t == UTF-32. |
| 249 | TEST(StringUtilTest, ConvertUTF32ToUTF8) { |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 250 | struct UTF8ToWideCase { |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 251 | const wchar_t* utf32; |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 252 | const char* utf8; |
| 253 | bool success; |
| 254 | } convert_cases[] = { |
| 255 | // Regular 16-bit input. |
| 256 | {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
| 257 | // Test a non-BMP character. |
| 258 | {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, |
| 259 | // Invalid Unicode code points. |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 260 | {L"\xffffHello", "Hello", false}, |
| 261 | {L"\xfffffffHello", "Hello", false}, |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 262 | // The first character is a truncated UTF-16 character. |
| 263 | {L"\xd800\x597d", "\xe5\xa5\xbd", false}, |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 264 | }; |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 265 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 266 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 267 | std::string converted; |
| 268 | EXPECT_EQ(convert_cases[i].success, |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 269 | WideToUTF8(convert_cases[i].utf32, |
| 270 | wcslen(convert_cases[i].utf32), |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 271 | &converted)); |
| 272 | std::string expected(convert_cases[i].utf8); |
| 273 | EXPECT_EQ(expected, converted); |
| 274 | } |
| 275 | } |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 276 | #endif // defined(WCHAR_T_IS_UTF32) |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 277 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 278 | TEST(StringUtilTest, ConvertMultiString) { |
| 279 | static wchar_t wmulti[] = { |
| 280 | L'f', L'o', L'o', L'\0', |
| 281 | L'b', L'a', L'r', L'\0', |
| 282 | L'b', L'a', L'z', L'\0', |
| 283 | L'\0' |
| 284 | }; |
| 285 | static char multi[] = { |
| 286 | 'f', 'o', 'o', '\0', |
| 287 | 'b', 'a', 'r', '\0', |
| 288 | 'b', 'a', 'z', '\0', |
| 289 | '\0' |
| 290 | }; |
| 291 | std::wstring wmultistring; |
| 292 | memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti)); |
| 293 | EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); |
| 294 | std::string expected; |
| 295 | memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); |
| 296 | EXPECT_EQ(arraysize(multi) - 1, expected.length()); |
| 297 | const std::string& converted = WideToUTF8(wmultistring); |
| 298 | EXPECT_EQ(arraysize(multi) - 1, converted.length()); |
| 299 | EXPECT_EQ(expected, converted); |
| 300 | } |
| 301 | |
| 302 | TEST(StringUtilTest, ConvertCodepageUTF8) { |
| 303 | // Make sure WideToCodepage works like WideToUTF8. |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 304 | for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 305 | std::string expected(WideToUTF8(kConvertRoundtripCases[i])); |
| 306 | std::string utf8; |
| 307 | EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8, |
| 308 | OnStringUtilConversionError::SKIP, &utf8)); |
| 309 | EXPECT_EQ(expected, utf8); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | TEST(StringUtilTest, ConvertBetweenCodepageAndWide) { |
| 314 | static const struct { |
| 315 | const char* codepage_name; |
| 316 | const char* encoded; |
| 317 | OnStringUtilConversionError::Type on_error; |
| 318 | bool success; |
| 319 | const wchar_t* wide; |
| 320 | } kConvertCodepageCases[] = { |
| 321 | // Test a case where the input can no be decoded, using both SKIP and FAIL |
| 322 | // error handling rules. "A7 41" is valid, but "A6" isn't. |
| 323 | {"big5", |
| 324 | "\xA7\x41\xA6", |
| 325 | OnStringUtilConversionError::FAIL, |
| 326 | false, |
| 327 | L""}, |
| 328 | {"big5", |
| 329 | "\xA7\x41\xA6", |
| 330 | OnStringUtilConversionError::SKIP, |
| 331 | true, |
| 332 | L"\x4F60"}, |
| 333 | // Arabic (ISO-8859) |
| 334 | {"iso-8859-6", |
| 335 | "\xC7\xEE\xE4\xD3\xF1\xEE\xE4\xC7\xE5\xEF" " " |
| 336 | "\xD9\xEE\xE4\xEE\xEA\xF2\xE3\xEF\xE5\xF2", |
| 337 | OnStringUtilConversionError::FAIL, |
| 338 | true, |
| 339 | L"\x0627\x064E\x0644\x0633\x0651\x064E\x0644\x0627\x0645\x064F" L" " |
| 340 | L"\x0639\x064E\x0644\x064E\x064A\x0652\x0643\x064F\x0645\x0652"}, |
| 341 | // Chinese Simplified (GB2312) |
| 342 | {"gb2312", |
| 343 | "\xC4\xE3\xBA\xC3", |
| 344 | OnStringUtilConversionError::FAIL, |
| 345 | true, |
| 346 | L"\x4F60\x597D"}, |
| 347 | // Chinese Traditional (BIG5) |
| 348 | {"big5", |
| 349 | "\xA7\x41\xA6\x6E", |
| 350 | OnStringUtilConversionError::FAIL, |
| 351 | true, |
| 352 | L"\x4F60\x597D"}, |
| 353 | // Greek (ISO-8859) |
| 354 | {"iso-8859-7", |
| 355 | "\xE3\xE5\xE9\xDC" " " "\xF3\xEF\xF5", |
| 356 | OnStringUtilConversionError::FAIL, |
| 357 | true, |
| 358 | L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"}, |
| 359 | // Hebrew (Windows) |
| 360 | {"windows-1255", /* to be replaced with "iso-8859-8-I"? */ |
| 361 | "\xF9\xD1\xC8\xEC\xE5\xC9\xED", |
| 362 | OnStringUtilConversionError::FAIL, |
| 363 | true, |
| 364 | L"\x05E9\x05C1\x05B8\x05DC\x05D5\x05B9\x05DD"}, |
| 365 | // Hindi Devanagari (ISCII) |
| 366 | {"iscii-dev", |
| 367 | "\xEF\x42" "\xC6\xCC\xD7\xE8\xB3\xDA\xCF", |
| 368 | OnStringUtilConversionError::FAIL, |
| 369 | true, |
| 370 | L"\x0928\x092E\x0938\x094D\x0915\x093E\x0930"}, |
| 371 | // Korean (EUC) |
| 372 | {"euc-kr", |
| 373 | "\xBE\xC8\xB3\xE7\xC7\xCF\xBC\xBC\xBF\xE4", |
| 374 | OnStringUtilConversionError::FAIL, |
| 375 | true, |
| 376 | L"\xC548\xB155\xD558\xC138\xC694"}, |
| 377 | // Japanese (EUC) |
| 378 | {"euc-jp", |
| 379 | "\xA4\xB3\xA4\xF3\xA4\xCB\xA4\xC1\xA4\xCF", |
| 380 | OnStringUtilConversionError::FAIL, |
| 381 | true, |
| 382 | L"\x3053\x3093\x306B\x3061\x306F"}, |
| 383 | // Japanese (ISO-2022) |
| 384 | {"iso-2022-jp", |
| 385 | "\x1B\x24\x42" "\x24\x33\x24\x73\x24\x4B\x24\x41\x24\x4F" "\x1B\x28\x42", |
| 386 | OnStringUtilConversionError::FAIL, |
| 387 | true, |
| 388 | L"\x3053\x3093\x306B\x3061\x306F"}, |
| 389 | // Japanese (Shift-JIS) |
| 390 | {"sjis", |
| 391 | "\x82\xB1\x82\xF1\x82\xC9\x82\xBF\x82\xCD", |
| 392 | OnStringUtilConversionError::FAIL, |
| 393 | true, |
| 394 | L"\x3053\x3093\x306B\x3061\x306F"}, |
| 395 | // Russian (KOI8) |
| 396 | {"koi8-r", |
| 397 | "\xDA\xC4\xD2\xC1\xD7\xD3\xD4\xD7\xD5\xCA\xD4\xC5", |
| 398 | OnStringUtilConversionError::FAIL, |
| 399 | true, |
| 400 | L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432" |
| 401 | L"\x0443\x0439\x0442\x0435"}, |
| 402 | // Thai (ISO-8859) |
| 403 | {"windows-874", /* to be replaced with "iso-8859-11". */ |
| 404 | "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", |
| 405 | OnStringUtilConversionError::FAIL, |
| 406 | true, |
| 407 | L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" |
| 408 | L"\x0E04\x0E23\x0e31\x0E1A"}, |
| 409 | }; |
| 410 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 411 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 412 | std::wstring wide; |
| 413 | bool success = CodepageToWide(kConvertCodepageCases[i].encoded, |
| 414 | kConvertCodepageCases[i].codepage_name, |
| 415 | kConvertCodepageCases[i].on_error, |
| 416 | &wide); |
| 417 | EXPECT_EQ(kConvertCodepageCases[i].success, success); |
| 418 | EXPECT_EQ(kConvertCodepageCases[i].wide, wide); |
| 419 | |
| 420 | // When decoding was successful and nothing was skipped, we also check the |
| 421 | // reverse conversion. |
| 422 | if (success && |
| 423 | kConvertCodepageCases[i].on_error == |
| 424 | OnStringUtilConversionError::FAIL) { |
| 425 | std::string encoded; |
| 426 | success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name, |
| 427 | kConvertCodepageCases[i].on_error, &encoded); |
| 428 | EXPECT_EQ(kConvertCodepageCases[i].success, success); |
| 429 | EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded); |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | // The above cases handled codepage->wide errors, but not wide->codepage. |
| 434 | // Test that here. |
| 435 | std::string encoded("Temp data"); // Make sure the string gets cleared. |
| 436 | |
| 437 | // First test going to an encoding that can not represent that character. |
| 438 | EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1", |
| 439 | OnStringUtilConversionError::FAIL, &encoded)); |
| 440 | EXPECT_TRUE(encoded.empty()); |
| 441 | EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1", |
| 442 | OnStringUtilConversionError::SKIP, &encoded)); |
| 443 | EXPECT_STREQ("Chinese", encoded.c_str()); |
| 444 | |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 445 | #if defined(WCHAR_T_IS_UTF16) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 446 | // When we're in UTF-16 mode, test an invalid UTF-16 character in the input. |
| 447 | EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1", |
| 448 | OnStringUtilConversionError::FAIL, &encoded)); |
| 449 | EXPECT_TRUE(encoded.empty()); |
| 450 | EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1", |
| 451 | OnStringUtilConversionError::SKIP, &encoded)); |
| 452 | EXPECT_STREQ("az", encoded.c_str()); |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 453 | #endif // WCHAR_T_IS_UTF16 |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 454 | |
| 455 | // Invalid characters should fail. |
| 456 | EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1", |
| 457 | OnStringUtilConversionError::SKIP, &encoded)); |
| 458 | EXPECT_STREQ("az", encoded.c_str()); |
| 459 | |
| 460 | // Invalid codepages should fail. |
| 461 | EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2", |
| 462 | OnStringUtilConversionError::SKIP, &encoded)); |
| 463 | } |
| 464 | |
| 465 | TEST(StringUtilTest, ConvertASCII) { |
| 466 | static const char* char_cases[] = { |
| 467 | "Google Video", |
| 468 | "Hello, world\n", |
| 469 | "0123ABCDwxyz \a\b\t\r\n!+,.~" |
| 470 | }; |
| 471 | |
| 472 | static const wchar_t* const wchar_cases[] = { |
| 473 | L"Google Video", |
| 474 | L"Hello, world\n", |
| 475 | L"0123ABCDwxyz \a\b\t\r\n!+,.~" |
| 476 | }; |
| 477 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 478 | for (size_t i = 0; i < arraysize(char_cases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 479 | EXPECT_TRUE(IsStringASCII(char_cases[i])); |
| 480 | std::wstring wide = ASCIIToWide(char_cases[i]); |
| 481 | EXPECT_EQ(wchar_cases[i], wide); |
| 482 | |
| 483 | EXPECT_TRUE(IsStringASCII(wchar_cases[i])); |
| 484 | std::string ascii = WideToASCII(wchar_cases[i]); |
| 485 | EXPECT_EQ(char_cases[i], ascii); |
| 486 | } |
| 487 | |
| 488 | EXPECT_FALSE(IsStringASCII("Google \x80Video")); |
| 489 | EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); |
| 490 | |
| 491 | // Convert empty strings. |
| 492 | std::wstring wempty; |
| 493 | std::string empty; |
| 494 | EXPECT_EQ(empty, WideToASCII(wempty)); |
| 495 | EXPECT_EQ(wempty, ASCIIToWide(empty)); |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 496 | |
| 497 | // Convert strings with an embedded NUL character. |
| 498 | const char chars_with_nul[] = "test\0string"; |
| 499 | const int length_with_nul = arraysize(chars_with_nul) - 1; |
| 500 | std::string string_with_nul(chars_with_nul, length_with_nul); |
| 501 | std::wstring wide_with_nul = ASCIIToWide(string_with_nul); |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 502 | EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), |
| 503 | wide_with_nul.length()); |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 504 | std::string narrow_with_nul = WideToASCII(wide_with_nul); |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 505 | EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), |
| 506 | narrow_with_nul.length()); |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 507 | EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 508 | } |
| 509 | |
| 510 | static const struct { |
| 511 | const wchar_t* src_w; |
| 512 | const char* src_a; |
| 513 | const char* dst; |
| 514 | } lowercase_cases[] = { |
| 515 | {L"FoO", "FoO", "foo"}, |
| 516 | {L"foo", "foo", "foo"}, |
| 517 | {L"FOO", "FOO", "foo"}, |
| 518 | }; |
| 519 | |
| 520 | TEST(StringUtilTest, LowerCaseEqualsASCII) { |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 521 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 522 | EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, |
| 523 | lowercase_cases[i].dst)); |
| 524 | EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, |
| 525 | lowercase_cases[i].dst)); |
| 526 | } |
| 527 | } |
| 528 | |
| 529 | TEST(StringUtilTest, GetByteDisplayUnits) { |
| 530 | static const struct { |
| 531 | int64 bytes; |
| 532 | DataUnits expected; |
| 533 | } cases[] = { |
| 534 | {0, DATA_UNITS_BYTE}, |
| 535 | {512, DATA_UNITS_BYTE}, |
| 536 | {10*1024, DATA_UNITS_KILOBYTE}, |
| 537 | {10*1024*1024, DATA_UNITS_MEGABYTE}, |
| 538 | {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE}, |
| 539 | {~(1LL<<63), DATA_UNITS_GIGABYTE}, |
| 540 | #ifdef NDEBUG |
| 541 | {-1, DATA_UNITS_BYTE}, |
| 542 | #endif |
| 543 | }; |
| 544 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 545 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 546 | EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes)); |
| 547 | } |
| 548 | |
| 549 | TEST(StringUtilTest, FormatBytes) { |
| 550 | static const struct { |
| 551 | int64 bytes; |
| 552 | DataUnits units; |
| 553 | const wchar_t* expected; |
| 554 | const wchar_t* expected_with_units; |
| 555 | } cases[] = { |
| 556 | {0, DATA_UNITS_BYTE, L"0", L"0 B"}, |
| 557 | {512, DATA_UNITS_BYTE, L"512", L"512 B"}, |
| 558 | {512, DATA_UNITS_KILOBYTE, L"0.5", L"0.5 kB"}, |
| 559 | {1024*1024, DATA_UNITS_KILOBYTE, L"1024", L"1024 kB"}, |
| 560 | {1024*1024, DATA_UNITS_MEGABYTE, L"1", L"1 MB"}, |
| 561 | {1024*1024*1024, DATA_UNITS_GIGABYTE, L"1", L"1 GB"}, |
| 562 | {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"}, |
| 563 | {~(1LL<<63), DATA_UNITS_GIGABYTE, L"8589934592", L"8589934592 GB"}, |
| 564 | // Make sure the first digit of the fractional part works. |
| 565 | {1024*1024 + 103, DATA_UNITS_KILOBYTE, L"1024.1", L"1024.1 kB"}, |
| 566 | {1024*1024 + 205 * 1024, DATA_UNITS_MEGABYTE, L"1.2", L"1.2 MB"}, |
| 567 | {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIGABYTE, |
| 568 | L"1.9", L"1.9 GB"}, |
| 569 | {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"}, |
| 570 | #ifdef NDEBUG |
| 571 | {-1, DATA_UNITS_BYTE, L"", L""}, |
| 572 | #endif |
| 573 | }; |
| 574 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 575 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 576 | EXPECT_EQ(cases[i].expected, |
| 577 | FormatBytes(cases[i].bytes, cases[i].units, false)); |
| 578 | EXPECT_EQ(cases[i].expected_with_units, |
| 579 | FormatBytes(cases[i].bytes, cases[i].units, true)); |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { |
| 584 | static const struct { |
evanm@google.com | e41d3b3 | 2008-08-15 10:04:11 +0900 | [diff] [blame] | 585 | const wchar_t* str; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 586 | std::wstring::size_type start_offset; |
evanm@google.com | e41d3b3 | 2008-08-15 10:04:11 +0900 | [diff] [blame] | 587 | const wchar_t* find_this; |
| 588 | const wchar_t* replace_with; |
| 589 | const wchar_t* expected; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 590 | } cases[] = { |
| 591 | {L"aaa", 0, L"a", L"b", L"bbb"}, |
| 592 | {L"abb", 0, L"ab", L"a", L"ab"}, |
| 593 | {L"Removing some substrings inging", 0, L"ing", L"", L"Remov some substrs "}, |
| 594 | {L"Not found", 0, L"x", L"0", L"Not found"}, |
| 595 | {L"Not found again", 5, L"x", L"0", L"Not found again"}, |
| 596 | {L" Making it much longer ", 0, L" ", L"Four score and seven years ago", |
| 597 | L"Four score and seven years agoMakingFour score and seven years agoit" |
| 598 | L"Four score and seven years agomuchFour score and seven years agolonger" |
| 599 | L"Four score and seven years ago"}, |
| 600 | {L"Invalid offset", 9999, L"t", L"foobar", L"Invalid offset"}, |
| 601 | {L"Replace me only me once", 9, L"me ", L"", L"Replace me only once"}, |
| 602 | {L"abababab", 2, L"ab", L"c", L"abccc"}, |
| 603 | }; |
| 604 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 605 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 606 | std::wstring str(cases[i].str); |
| 607 | ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, |
| 608 | cases[i].find_this, cases[i].replace_with); |
| 609 | EXPECT_EQ(cases[i].expected, str); |
| 610 | } |
| 611 | } |
| 612 | |
deanm@google.com | d649845 | 2008-08-13 20:09:33 +0900 | [diff] [blame] | 613 | namespace { |
| 614 | |
| 615 | template <typename INT> |
| 616 | struct IntToStringTest { |
| 617 | INT num; |
| 618 | const char* sexpected; |
| 619 | const char* uexpected; |
| 620 | }; |
| 621 | |
| 622 | } |
| 623 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 624 | TEST(StringUtilTest, IntToString) { |
deanm@google.com | d649845 | 2008-08-13 20:09:33 +0900 | [diff] [blame] | 625 | |
| 626 | static const IntToStringTest<int> int_tests[] = { |
| 627 | { 0, "0", "0" }, |
| 628 | { -1, "-1", "4294967295" }, |
| 629 | { std::numeric_limits<int>::max(), "2147483647", "2147483647" }, |
| 630 | { std::numeric_limits<int>::min(), "-2147483648", "2147483648" }, |
| 631 | }; |
| 632 | static const IntToStringTest<int64> int64_tests[] = { |
| 633 | { 0, "0", "0" }, |
| 634 | { -1, "-1", "18446744073709551615" }, |
| 635 | { std::numeric_limits<int64>::max(), |
| 636 | "9223372036854775807", |
| 637 | "9223372036854775807", }, |
| 638 | { std::numeric_limits<int64>::min(), |
| 639 | "-9223372036854775808", |
| 640 | "9223372036854775808" }, |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 641 | }; |
| 642 | |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 643 | for (size_t i = 0; i < arraysize(int_tests); ++i) { |
deanm@google.com | d649845 | 2008-08-13 20:09:33 +0900 | [diff] [blame] | 644 | const IntToStringTest<int>* test = &int_tests[i]; |
| 645 | EXPECT_EQ(IntToString(test->num), test->sexpected); |
| 646 | EXPECT_EQ(IntToWString(test->num), UTF8ToWide(test->sexpected)); |
| 647 | EXPECT_EQ(UintToString(test->num), test->uexpected); |
| 648 | EXPECT_EQ(UintToWString(test->num), UTF8ToWide(test->uexpected)); |
| 649 | } |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 650 | for (size_t i = 0; i < arraysize(int64_tests); ++i) { |
deanm@google.com | d649845 | 2008-08-13 20:09:33 +0900 | [diff] [blame] | 651 | const IntToStringTest<int64>* test = &int64_tests[i]; |
| 652 | EXPECT_EQ(Int64ToString(test->num), test->sexpected); |
| 653 | EXPECT_EQ(Int64ToWString(test->num), UTF8ToWide(test->sexpected)); |
| 654 | EXPECT_EQ(Uint64ToString(test->num), test->uexpected); |
| 655 | EXPECT_EQ(Uint64ToWString(test->num), UTF8ToWide(test->uexpected)); |
| 656 | } |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 657 | } |
| 658 | |
| 659 | TEST(StringUtilTest, Uint64ToString) { |
| 660 | static const struct { |
| 661 | uint64 input; |
| 662 | std::string output; |
| 663 | } cases[] = { |
| 664 | {0, "0"}, |
| 665 | {42, "42"}, |
| 666 | {INT_MAX, "2147483647"}, |
| 667 | {kuint64max, "18446744073709551615"}, |
| 668 | }; |
| 669 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 670 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 671 | EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input)); |
| 672 | } |
| 673 | |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 674 | TEST(StringUtilTest, StringToInt) { |
| 675 | static const struct { |
| 676 | std::string input; |
| 677 | int output; |
| 678 | bool success; |
| 679 | } cases[] = { |
| 680 | {"0", 0, true}, |
| 681 | {"42", 42, true}, |
| 682 | {"-2147483648", INT_MIN, true}, |
| 683 | {"2147483647", INT_MAX, true}, |
| 684 | {"", 0, false}, |
| 685 | {" 42", 42, false}, |
ericroman@google.com | 491d873 | 2008-08-09 07:03:26 +0900 | [diff] [blame] | 686 | {"42 ", 42, false}, |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 687 | {"\t\n\v\f\r 42", 42, false}, |
| 688 | {"blah42", 0, false}, |
| 689 | {"42blah", 42, false}, |
| 690 | {"blah42blah", 0, false}, |
| 691 | {"-273.15", -273, false}, |
| 692 | {"+98.6", 98, false}, |
| 693 | {"--123", 0, false}, |
| 694 | {"++123", 0, false}, |
| 695 | {"-+123", 0, false}, |
| 696 | {"+-123", 0, false}, |
| 697 | {"-", 0, false}, |
| 698 | {"-2147483649", INT_MIN, false}, |
| 699 | {"-99999999999", INT_MIN, false}, |
| 700 | {"2147483648", INT_MAX, false}, |
| 701 | {"99999999999", INT_MAX, false}, |
| 702 | }; |
| 703 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 704 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 705 | EXPECT_EQ(cases[i].output, StringToInt(cases[i].input)); |
| 706 | int output; |
| 707 | EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output)); |
| 708 | EXPECT_EQ(cases[i].output, output); |
| 709 | |
| 710 | std::wstring wide_input = ASCIIToWide(cases[i].input); |
| 711 | EXPECT_EQ(cases[i].output, StringToInt(wide_input)); |
| 712 | EXPECT_EQ(cases[i].success, StringToInt(wide_input, &output)); |
| 713 | EXPECT_EQ(cases[i].output, output); |
| 714 | } |
| 715 | |
| 716 | // One additional test to verify that conversion of numbers in strings with |
| 717 | // embedded NUL characters. The NUL and extra data after it should be |
| 718 | // interpreted as junk after the number. |
| 719 | const char input[] = "6\06"; |
| 720 | std::string input_string(input, arraysize(input) - 1); |
| 721 | int output; |
| 722 | EXPECT_FALSE(StringToInt(input_string, &output)); |
| 723 | EXPECT_EQ(6, output); |
| 724 | |
| 725 | std::wstring wide_input = ASCIIToWide(input_string); |
| 726 | EXPECT_FALSE(StringToInt(wide_input, &output)); |
| 727 | EXPECT_EQ(6, output); |
| 728 | } |
| 729 | |
| 730 | TEST(StringUtilTest, StringToInt64) { |
| 731 | static const struct { |
| 732 | std::string input; |
| 733 | int64 output; |
| 734 | bool success; |
| 735 | } cases[] = { |
| 736 | {"0", 0, true}, |
| 737 | {"42", 42, true}, |
| 738 | {"-2147483648", INT_MIN, true}, |
| 739 | {"2147483647", INT_MAX, true}, |
| 740 | {"-2147483649", GG_INT64_C(-2147483649), true}, |
| 741 | {"-99999999999", GG_INT64_C(-99999999999), true}, |
| 742 | {"2147483648", GG_INT64_C(2147483648), true}, |
| 743 | {"99999999999", GG_INT64_C(99999999999), true}, |
| 744 | {"9223372036854775807", kint64max, true}, |
| 745 | {"-9223372036854775808", kint64min, true}, |
mmentovai@google.com | 8dcf71c | 2008-08-08 02:15:41 +0900 | [diff] [blame] | 746 | {"09", 9, true}, |
| 747 | {"-09", -9, true}, |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 748 | {"", 0, false}, |
| 749 | {" 42", 42, false}, |
ericroman@google.com | 491d873 | 2008-08-09 07:03:26 +0900 | [diff] [blame] | 750 | {"42 ", 42, false}, |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 751 | {"\t\n\v\f\r 42", 42, false}, |
| 752 | {"blah42", 0, false}, |
| 753 | {"42blah", 42, false}, |
| 754 | {"blah42blah", 0, false}, |
| 755 | {"-273.15", -273, false}, |
| 756 | {"+98.6", 98, false}, |
| 757 | {"--123", 0, false}, |
| 758 | {"++123", 0, false}, |
| 759 | {"-+123", 0, false}, |
| 760 | {"+-123", 0, false}, |
| 761 | {"-", 0, false}, |
| 762 | {"-9223372036854775809", kint64min, false}, |
| 763 | {"-99999999999999999999", kint64min, false}, |
| 764 | {"9223372036854775808", kint64max, false}, |
| 765 | {"99999999999999999999", kint64max, false}, |
| 766 | }; |
| 767 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 768 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 769 | EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input)); |
| 770 | int64 output; |
| 771 | EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output)); |
| 772 | EXPECT_EQ(cases[i].output, output); |
| 773 | |
| 774 | std::wstring wide_input = ASCIIToWide(cases[i].input); |
| 775 | EXPECT_EQ(cases[i].output, StringToInt64(wide_input)); |
| 776 | EXPECT_EQ(cases[i].success, StringToInt64(wide_input, &output)); |
| 777 | EXPECT_EQ(cases[i].output, output); |
| 778 | } |
| 779 | |
| 780 | // One additional test to verify that conversion of numbers in strings with |
| 781 | // embedded NUL characters. The NUL and extra data after it should be |
| 782 | // interpreted as junk after the number. |
| 783 | const char input[] = "6\06"; |
| 784 | std::string input_string(input, arraysize(input) - 1); |
| 785 | int64 output; |
| 786 | EXPECT_FALSE(StringToInt64(input_string, &output)); |
| 787 | EXPECT_EQ(6, output); |
| 788 | |
| 789 | std::wstring wide_input = ASCIIToWide(input_string); |
| 790 | EXPECT_FALSE(StringToInt64(wide_input, &output)); |
| 791 | EXPECT_EQ(6, output); |
| 792 | } |
| 793 | |
| 794 | TEST(StringUtilTest, HexStringToInt) { |
| 795 | static const struct { |
| 796 | std::string input; |
| 797 | int output; |
| 798 | bool success; |
| 799 | } cases[] = { |
| 800 | {"0", 0, true}, |
| 801 | {"42", 66, true}, |
| 802 | {"-42", -66, true}, |
| 803 | {"+42", 66, true}, |
| 804 | {"7fffffff", INT_MAX, true}, |
| 805 | {"80000000", INT_MIN, true}, |
| 806 | {"ffffffff", -1, true}, |
| 807 | {"DeadBeef", 0xdeadbeef, true}, |
| 808 | {"0x42", 66, true}, |
| 809 | {"-0x42", -66, true}, |
| 810 | {"+0x42", 66, true}, |
| 811 | {"0x7fffffff", INT_MAX, true}, |
| 812 | {"0x80000000", INT_MIN, true}, |
| 813 | {"0xffffffff", -1, true}, |
| 814 | {"0XDeadBeef", 0xdeadbeef, true}, |
mmentovai@google.com | 8dcf71c | 2008-08-08 02:15:41 +0900 | [diff] [blame] | 815 | {"0x0f", 15, true}, |
| 816 | {"0f", 15, true}, |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 817 | {" 45", 0x45, false}, |
| 818 | {"\t\n\v\f\r 0x45", 0x45, false}, |
ericroman@google.com | 491d873 | 2008-08-09 07:03:26 +0900 | [diff] [blame] | 819 | {" 45", 0x45, false}, |
| 820 | {"45 ", 0x45, false}, |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 821 | {"efgh", 0xef, false}, |
| 822 | {"0xefgh", 0xef, false}, |
| 823 | {"hgfe", 0, false}, |
| 824 | {"100000000", -1, false}, // don't care about |output|, just |success| |
| 825 | {"-", 0, false}, |
| 826 | {"", 0, false}, |
| 827 | }; |
| 828 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 829 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 830 | EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input)); |
| 831 | int output; |
| 832 | EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output)); |
| 833 | EXPECT_EQ(cases[i].output, output); |
| 834 | |
| 835 | std::wstring wide_input = ASCIIToWide(cases[i].input); |
| 836 | EXPECT_EQ(cases[i].output, HexStringToInt(wide_input)); |
| 837 | EXPECT_EQ(cases[i].success, HexStringToInt(wide_input, &output)); |
| 838 | EXPECT_EQ(cases[i].output, output); |
| 839 | } |
| 840 | // One additional test to verify that conversion of numbers in strings with |
| 841 | // embedded NUL characters. The NUL and extra data after it should be |
| 842 | // interpreted as junk after the number. |
| 843 | const char input[] = "0xc0ffee\09"; |
| 844 | std::string input_string(input, arraysize(input) - 1); |
| 845 | int output; |
| 846 | EXPECT_FALSE(HexStringToInt(input_string, &output)); |
| 847 | EXPECT_EQ(0xc0ffee, output); |
| 848 | |
| 849 | std::wstring wide_input = ASCIIToWide(input_string); |
| 850 | EXPECT_FALSE(HexStringToInt(wide_input, &output)); |
| 851 | EXPECT_EQ(0xc0ffee, output); |
| 852 | } |
| 853 | |
mmentovai@google.com | 8dcf71c | 2008-08-08 02:15:41 +0900 | [diff] [blame] | 854 | TEST(StringUtilTest, StringToDouble) { |
| 855 | static const struct { |
| 856 | std::string input; |
| 857 | double output; |
| 858 | bool success; |
| 859 | } cases[] = { |
| 860 | {"0", 0.0, true}, |
| 861 | {"42", 42.0, true}, |
| 862 | {"-42", -42.0, true}, |
| 863 | {"123.45", 123.45, true}, |
| 864 | {"-123.45", -123.45, true}, |
| 865 | {"+123.45", 123.45, true}, |
| 866 | {"2.99792458e8", 299792458.0, true}, |
| 867 | {"149597870.691E+3", 149597870691.0, true}, |
| 868 | {"6.", 6.0, true}, |
| 869 | {"9e99999999999999999999", HUGE_VAL, false}, |
| 870 | {"-9e99999999999999999999", -HUGE_VAL, false}, |
| 871 | {"1e-2", 0.01, true}, |
ericroman@google.com | 491d873 | 2008-08-09 07:03:26 +0900 | [diff] [blame] | 872 | {" 1e-2", 0.01, false}, |
| 873 | {"1e-2 ", 0.01, false}, |
mmentovai@google.com | 8dcf71c | 2008-08-08 02:15:41 +0900 | [diff] [blame] | 874 | {"-1E-7", -0.0000001, true}, |
| 875 | {"01e02", 100, true}, |
| 876 | {"2.3e15", 2.3e15, true}, |
| 877 | {"\t\n\v\f\r -123.45e2", -12345.0, false}, |
| 878 | {"+123 e4", 123.0, false}, |
| 879 | {"123e ", 123.0, false}, |
| 880 | {"123e", 123.0, false}, |
| 881 | {" 2.99", 2.99, false}, |
| 882 | {"1e3.4", 1000.0, false}, |
| 883 | {"nothing", 0.0, false}, |
| 884 | {"-", 0.0, false}, |
| 885 | {"+", 0.0, false}, |
| 886 | {"", 0.0, false}, |
| 887 | }; |
| 888 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 889 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { |
mmentovai@google.com | 8dcf71c | 2008-08-08 02:15:41 +0900 | [diff] [blame] | 890 | EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(cases[i].input)); |
| 891 | double output; |
| 892 | EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output)); |
| 893 | EXPECT_DOUBLE_EQ(cases[i].output, output); |
| 894 | |
| 895 | std::wstring wide_input = ASCIIToWide(cases[i].input); |
| 896 | EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(wide_input)); |
| 897 | EXPECT_EQ(cases[i].success, StringToDouble(wide_input, &output)); |
| 898 | EXPECT_DOUBLE_EQ(cases[i].output, output); |
| 899 | } |
| 900 | |
| 901 | // One additional test to verify that conversion of numbers in strings with |
| 902 | // embedded NUL characters. The NUL and extra data after it should be |
| 903 | // interpreted as junk after the number. |
| 904 | const char input[] = "3.14\0159"; |
| 905 | std::string input_string(input, arraysize(input) - 1); |
| 906 | double output; |
| 907 | EXPECT_FALSE(StringToDouble(input_string, &output)); |
| 908 | EXPECT_DOUBLE_EQ(3.14, output); |
| 909 | |
| 910 | std::wstring wide_input = ASCIIToWide(input_string); |
| 911 | EXPECT_FALSE(StringToDouble(wide_input, &output)); |
| 912 | EXPECT_DOUBLE_EQ(3.14, output); |
| 913 | } |
| 914 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 915 | // This checks where we can use the assignment operator for a va_list. We need |
| 916 | // a way to do this since Visual C doesn't support va_copy, but assignment on |
| 917 | // va_list is not guaranteed to be a copy. See StringAppendVT which uses this |
| 918 | // capability. |
| 919 | static void VariableArgsFunc(const char* format, ...) { |
| 920 | va_list org; |
| 921 | va_start(org, format); |
| 922 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 923 | va_list dup; |
| 924 | base::va_copy(dup, org); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 925 | int i1 = va_arg(org, int); |
| 926 | int j1 = va_arg(org, int); |
| 927 | char* s1 = va_arg(org, char*); |
| 928 | double d1 = va_arg(org, double); |
| 929 | va_end(org); |
| 930 | |
| 931 | int i2 = va_arg(dup, int); |
| 932 | int j2 = va_arg(dup, int); |
| 933 | char* s2 = va_arg(dup, char*); |
| 934 | double d2 = va_arg(dup, double); |
| 935 | |
| 936 | EXPECT_EQ(i1, i2); |
| 937 | EXPECT_EQ(j1, j2); |
| 938 | EXPECT_STREQ(s1, s2); |
| 939 | EXPECT_EQ(d1, d2); |
| 940 | |
| 941 | va_end(dup); |
| 942 | } |
| 943 | |
| 944 | TEST(StringUtilTest, VAList) { |
| 945 | VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); |
| 946 | } |
| 947 | |
| 948 | TEST(StringUtilTest, StringPrintfEmptyFormat) { |
| 949 | const char* empty = ""; |
| 950 | EXPECT_EQ("", StringPrintf(empty)); |
| 951 | EXPECT_EQ("", StringPrintf("%s", "")); |
| 952 | } |
| 953 | |
| 954 | TEST(StringUtilTest, StringPrintfMisc) { |
| 955 | EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w')); |
mmentovai@google.com | 8ae0c2c | 2008-08-14 10:25:32 +0900 | [diff] [blame] | 956 | EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w')); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 957 | } |
| 958 | |
| 959 | TEST(StringUtilTest, StringAppendfStringEmptyParam) { |
| 960 | std::string value("Hello"); |
| 961 | StringAppendF(&value, ""); |
| 962 | EXPECT_EQ("Hello", value); |
| 963 | |
| 964 | std::wstring valuew(L"Hello"); |
| 965 | StringAppendF(&valuew, L""); |
| 966 | EXPECT_EQ(L"Hello", valuew); |
| 967 | } |
| 968 | |
| 969 | TEST(StringUtilTest, StringAppendfEmptyString) { |
| 970 | std::string value("Hello"); |
| 971 | StringAppendF(&value, "%s", ""); |
| 972 | EXPECT_EQ("Hello", value); |
| 973 | |
| 974 | std::wstring valuew(L"Hello"); |
mmentovai@google.com | 8ae0c2c | 2008-08-14 10:25:32 +0900 | [diff] [blame] | 975 | StringAppendF(&valuew, L"%ls", L""); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 976 | EXPECT_EQ(L"Hello", valuew); |
| 977 | } |
| 978 | |
| 979 | TEST(StringUtilTest, StringAppendfString) { |
| 980 | std::string value("Hello"); |
| 981 | StringAppendF(&value, " %s", "World"); |
| 982 | EXPECT_EQ("Hello World", value); |
| 983 | |
| 984 | std::wstring valuew(L"Hello"); |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 985 | StringAppendF(&valuew, L" %ls", L"World"); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 986 | EXPECT_EQ(L"Hello World", valuew); |
| 987 | } |
| 988 | |
| 989 | TEST(StringUtilTest, StringAppendfInt) { |
| 990 | std::string value("Hello"); |
| 991 | StringAppendF(&value, " %d", 123); |
| 992 | EXPECT_EQ("Hello 123", value); |
| 993 | |
| 994 | std::wstring valuew(L"Hello"); |
| 995 | StringAppendF(&valuew, L" %d", 123); |
| 996 | EXPECT_EQ(L"Hello 123", valuew); |
| 997 | } |
| 998 | |
| 999 | // Make sure that lengths exactly around the initial buffer size are handled |
| 1000 | // correctly. |
| 1001 | TEST(StringUtilTest, StringPrintfBounds) { |
| 1002 | const int src_len = 1026; |
| 1003 | char src[src_len]; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1004 | for (size_t i = 0; i < arraysize(src); i++) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1005 | src[i] = 'A'; |
| 1006 | |
| 1007 | wchar_t srcw[src_len]; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1008 | for (size_t i = 0; i < arraysize(srcw); i++) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1009 | srcw[i] = 'A'; |
| 1010 | |
| 1011 | for (int i = 1; i < 3; i++) { |
| 1012 | src[src_len - i] = 0; |
| 1013 | std::string out; |
| 1014 | SStringPrintf(&out, "%s", src); |
| 1015 | EXPECT_STREQ(src, out.c_str()); |
| 1016 | |
| 1017 | srcw[src_len - i] = 0; |
| 1018 | std::wstring outw; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1019 | SStringPrintf(&outw, L"%ls", srcw); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1020 | EXPECT_STREQ(srcw, outw.c_str()); |
| 1021 | } |
| 1022 | } |
| 1023 | |
| 1024 | // Test very large sprintfs that will cause the buffer to grow. |
| 1025 | TEST(StringUtilTest, Grow) { |
| 1026 | char src[1026]; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1027 | for (size_t i = 0; i < arraysize(src); i++) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1028 | src[i] = 'A'; |
| 1029 | src[1025] = 0; |
| 1030 | |
evanm@google.com | e41d3b3 | 2008-08-15 10:04:11 +0900 | [diff] [blame] | 1031 | const char* fmt = "%sB%sB%sB%sB%sB%sB%s"; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1032 | |
| 1033 | std::string out; |
| 1034 | SStringPrintf(&out, fmt, src, src, src, src, src, src, src); |
| 1035 | |
| 1036 | char* ref = new char[320000]; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1037 | #if defined(OS_WIN) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1038 | sprintf_s(ref, 320000, fmt, src, src, src, src, src, src, src); |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1039 | #elif defined(OS_POSIX) |
| 1040 | snprintf(ref, 320000, fmt, src, src, src, src, src, src, src); |
| 1041 | #endif |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1042 | |
| 1043 | EXPECT_STREQ(ref, out.c_str()); |
deanm@google.com | 78715b7 | 2008-08-19 23:02:18 +0900 | [diff] [blame] | 1044 | delete[] ref; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1045 | } |
| 1046 | |
| 1047 | // Test the boundary condition for the size of the string_util's |
| 1048 | // internal buffer. |
| 1049 | TEST(StringUtilTest, GrowBoundary) { |
| 1050 | const int string_util_buf_len = 1024; |
| 1051 | // Our buffer should be one larger than the size of StringAppendVT's stack |
| 1052 | // buffer. |
| 1053 | const int buf_len = string_util_buf_len + 1; |
| 1054 | char src[buf_len + 1]; // Need extra one for NULL-terminator. |
| 1055 | for (int i = 0; i < buf_len; ++i) |
| 1056 | src[i] = 'a'; |
| 1057 | src[buf_len] = 0; |
| 1058 | |
| 1059 | std::string out; |
| 1060 | SStringPrintf(&out, "%s", src); |
| 1061 | |
| 1062 | EXPECT_STREQ(src, out.c_str()); |
| 1063 | } |
| 1064 | |
evanm@google.com | e41d3b3 | 2008-08-15 10:04:11 +0900 | [diff] [blame] | 1065 | // TODO(evanm): what's the proper cross-platform test here? |
| 1066 | #if defined(OS_WIN) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1067 | // sprintf in Visual Studio fails when given U+FFFF. This tests that the |
| 1068 | // failure case is gracefuly handled. |
| 1069 | TEST(StringUtilTest, Invalid) { |
| 1070 | wchar_t invalid[2]; |
| 1071 | invalid[0] = 0xffff; |
| 1072 | invalid[1] = 0; |
| 1073 | |
| 1074 | std::wstring out; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1075 | SStringPrintf(&out, L"%ls", invalid); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1076 | EXPECT_STREQ(L"", out.c_str()); |
| 1077 | } |
evanm@google.com | e41d3b3 | 2008-08-15 10:04:11 +0900 | [diff] [blame] | 1078 | #endif |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1079 | |
| 1080 | // Test for SplitString |
| 1081 | TEST(StringUtilTest, SplitString) { |
| 1082 | std::vector<std::wstring> r; |
| 1083 | |
| 1084 | SplitString(L"a,b,c", L',', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1085 | EXPECT_EQ(3U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1086 | EXPECT_EQ(r[0], L"a"); |
| 1087 | EXPECT_EQ(r[1], L"b"); |
| 1088 | EXPECT_EQ(r[2], L"c"); |
| 1089 | r.clear(); |
| 1090 | |
| 1091 | SplitString(L"a, b, c", L',', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1092 | EXPECT_EQ(3U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1093 | EXPECT_EQ(r[0], L"a"); |
| 1094 | EXPECT_EQ(r[1], L"b"); |
| 1095 | EXPECT_EQ(r[2], L"c"); |
| 1096 | r.clear(); |
| 1097 | |
| 1098 | SplitString(L"a,,c", L',', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1099 | EXPECT_EQ(3U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1100 | EXPECT_EQ(r[0], L"a"); |
| 1101 | EXPECT_EQ(r[1], L""); |
| 1102 | EXPECT_EQ(r[2], L"c"); |
| 1103 | r.clear(); |
| 1104 | |
| 1105 | SplitString(L"", L'*', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1106 | EXPECT_EQ(1U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1107 | EXPECT_EQ(r[0], L""); |
| 1108 | r.clear(); |
| 1109 | |
| 1110 | SplitString(L"foo", L'*', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1111 | EXPECT_EQ(1U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1112 | EXPECT_EQ(r[0], L"foo"); |
| 1113 | r.clear(); |
| 1114 | |
| 1115 | SplitString(L"foo ,", L',', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1116 | EXPECT_EQ(2U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1117 | EXPECT_EQ(r[0], L"foo"); |
| 1118 | EXPECT_EQ(r[1], L""); |
| 1119 | r.clear(); |
| 1120 | |
| 1121 | SplitString(L",", L',', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1122 | EXPECT_EQ(2U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1123 | EXPECT_EQ(r[0], L""); |
| 1124 | EXPECT_EQ(r[1], L""); |
| 1125 | r.clear(); |
| 1126 | |
| 1127 | SplitString(L"\t\ta\t", L'\t', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1128 | EXPECT_EQ(4U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1129 | EXPECT_EQ(r[0], L""); |
| 1130 | EXPECT_EQ(r[1], L""); |
| 1131 | EXPECT_EQ(r[2], L"a"); |
| 1132 | EXPECT_EQ(r[3], L""); |
| 1133 | r.clear(); |
| 1134 | |
| 1135 | SplitStringDontTrim(L"\t\ta\t", L'\t', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1136 | EXPECT_EQ(4U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1137 | EXPECT_EQ(r[0], L""); |
| 1138 | EXPECT_EQ(r[1], L""); |
| 1139 | EXPECT_EQ(r[2], L"a"); |
| 1140 | EXPECT_EQ(r[3], L""); |
| 1141 | r.clear(); |
| 1142 | |
| 1143 | SplitString(L"\ta\t\nb\tcc", L'\n', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1144 | EXPECT_EQ(2U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1145 | EXPECT_EQ(r[0], L"a"); |
| 1146 | EXPECT_EQ(r[1], L"b\tcc"); |
| 1147 | r.clear(); |
| 1148 | |
| 1149 | SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1150 | EXPECT_EQ(2U, r.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1151 | EXPECT_EQ(r[0], L"\ta\t"); |
| 1152 | EXPECT_EQ(r[1], L"b\tcc"); |
| 1153 | r.clear(); |
| 1154 | } |
| 1155 | |
| 1156 | TEST(StringUtilTest, StartsWith) { |
| 1157 | EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", true)); |
| 1158 | EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", false)); |
| 1159 | EXPECT_EQ(true, StartsWithASCII("JavaScript:url", "javascript", false)); |
| 1160 | EXPECT_EQ(false, StartsWithASCII("java", "javascript", true)); |
| 1161 | EXPECT_EQ(false, StartsWithASCII("java", "javascript", false)); |
| 1162 | } |
| 1163 | |
| 1164 | TEST(StringUtilTest, GetStringFWithOffsets) { |
| 1165 | std::vector<size_t> offsets; |
| 1166 | |
| 1167 | ReplaceStringPlaceholders(L"Hello, $1. Your number is $2.", L"1", L"2", |
| 1168 | &offsets); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1169 | EXPECT_EQ(2U, offsets.size()); |
| 1170 | EXPECT_EQ(7U, offsets[0]); |
| 1171 | EXPECT_EQ(25U, offsets[1]); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1172 | offsets.clear(); |
| 1173 | |
| 1174 | ReplaceStringPlaceholders(L"Hello, $2. Your number is $1.", L"1", L"2", |
| 1175 | &offsets); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1176 | EXPECT_EQ(2U, offsets.size()); |
| 1177 | EXPECT_EQ(25U, offsets[0]); |
| 1178 | EXPECT_EQ(7U, offsets[1]); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1179 | offsets.clear(); |
| 1180 | } |
| 1181 | |
| 1182 | TEST(StringUtilTest, SplitStringAlongWhitespace) { |
| 1183 | struct TestData { |
| 1184 | const std::wstring input; |
darin@google.com | c01678f | 2008-08-15 05:49:08 +0900 | [diff] [blame] | 1185 | const size_t expected_result_count; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1186 | const std::wstring output1; |
| 1187 | const std::wstring output2; |
| 1188 | } data[] = { |
| 1189 | { L"a", 1, L"a", L"" }, |
| 1190 | { L" ", 0, L"", L"" }, |
| 1191 | { L" a", 1, L"a", L"" }, |
| 1192 | { L" ab ", 1, L"ab", L"" }, |
| 1193 | { L" ab c", 2, L"ab", L"c" }, |
| 1194 | { L" ab c ", 2, L"ab", L"c" }, |
| 1195 | { L" ab cd", 2, L"ab", L"cd" }, |
| 1196 | { L" ab cd ", 2, L"ab", L"cd" }, |
| 1197 | { L" \ta\t", 1, L"a", L"" }, |
| 1198 | { L" b\ta\t", 2, L"b", L"a" }, |
| 1199 | { L" b\tat", 2, L"b", L"at" }, |
| 1200 | { L"b\tat", 2, L"b", L"at" }, |
| 1201 | { L"b\t at", 2, L"b", L"at" }, |
| 1202 | }; |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 1203 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1204 | std::vector<std::wstring> results; |
| 1205 | SplitStringAlongWhitespace(data[i].input, &results); |
darin@google.com | c01678f | 2008-08-15 05:49:08 +0900 | [diff] [blame] | 1206 | ASSERT_EQ(data[i].expected_result_count, results.size()); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1207 | if (data[i].expected_result_count > 0) |
| 1208 | ASSERT_EQ(data[i].output1, results[0]); |
| 1209 | if (data[i].expected_result_count > 1) |
| 1210 | ASSERT_EQ(data[i].output2, results[1]); |
| 1211 | } |
| 1212 | } |
| 1213 | |
| 1214 | TEST(StringUtilTest, MatchPatternTest) { |
| 1215 | EXPECT_EQ(MatchPattern(L"www.google.com", L"*.com"), true); |
| 1216 | EXPECT_EQ(MatchPattern(L"www.google.com", L"*"), true); |
| 1217 | EXPECT_EQ(MatchPattern(L"www.google.com", L"www*.g*.org"), false); |
| 1218 | EXPECT_EQ(MatchPattern(L"Hello", L"H?l?o"), true); |
| 1219 | EXPECT_EQ(MatchPattern(L"www.google.com", L"http://*)"), false); |
| 1220 | EXPECT_EQ(MatchPattern(L"www.msn.com", L"*.COM"), false); |
| 1221 | EXPECT_EQ(MatchPattern(L"Hello*1234", L"He??o\\*1*"), true); |
| 1222 | EXPECT_EQ(MatchPattern(L"", L"*.*"), false); |
| 1223 | EXPECT_EQ(MatchPattern(L"", L"*"), true); |
| 1224 | EXPECT_EQ(MatchPattern(L"", L"?"), true); |
| 1225 | EXPECT_EQ(MatchPattern(L"", L""), true); |
| 1226 | EXPECT_EQ(MatchPattern(L"Hello", L""), false); |
| 1227 | EXPECT_EQ(MatchPattern(L"Hello*", L"Hello*"), true); |
| 1228 | EXPECT_EQ(MatchPattern("Hello*", "Hello*"), true); // narrow string |
| 1229 | } |
| 1230 | |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1231 | TEST(StringUtilTest, LcpyTest) { |
| 1232 | // Test the normal case where we fit in our buffer. |
| 1233 | { |
| 1234 | char dst[10]; |
| 1235 | wchar_t wdst[10]; |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1236 | EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1237 | EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1238 | EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1239 | EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); |
| 1240 | } |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 1241 | |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1242 | // Test dst_size == 0, nothing should be written to |dst| and we should |
| 1243 | // have the equivalent of strlen(src). |
| 1244 | { |
| 1245 | char dst[2] = {1, 2}; |
| 1246 | wchar_t wdst[2] = {1, 2}; |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1247 | EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1248 | EXPECT_EQ(1, dst[0]); |
| 1249 | EXPECT_EQ(2, dst[1]); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1250 | EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1251 | EXPECT_EQ(1, wdst[0]); |
| 1252 | EXPECT_EQ(2, wdst[1]); |
| 1253 | } |
| 1254 | |
| 1255 | // Test the case were we _just_ competely fit including the null. |
| 1256 | { |
| 1257 | char dst[8]; |
| 1258 | wchar_t wdst[8]; |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1259 | EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1260 | EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1261 | EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1262 | EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); |
| 1263 | } |
| 1264 | |
| 1265 | // Test the case were we we are one smaller, so we can't fit the null. |
| 1266 | { |
| 1267 | char dst[7]; |
| 1268 | wchar_t wdst[7]; |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1269 | EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1270 | EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1271 | EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1272 | EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); |
| 1273 | } |
| 1274 | |
| 1275 | // Test the case were we are just too small. |
| 1276 | { |
| 1277 | char dst[3]; |
| 1278 | wchar_t wdst[3]; |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1279 | EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1280 | EXPECT_EQ(0, memcmp(dst, "ab", 3)); |
darin@google.com | f327280 | 2008-08-15 05:27:29 +0900 | [diff] [blame] | 1281 | EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 1282 | EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); |
| 1283 | } |
| 1284 | } |
mmentovai@google.com | 8ae0c2c | 2008-08-14 10:25:32 +0900 | [diff] [blame] | 1285 | |
| 1286 | TEST(StringUtilTest, WprintfFormatPortabilityTest) { |
| 1287 | struct TestData { |
| 1288 | const wchar_t* input; |
| 1289 | bool portable; |
| 1290 | } cases[] = { |
| 1291 | { L"%ls", true }, |
| 1292 | { L"%s", false }, |
| 1293 | { L"%S", false }, |
| 1294 | { L"%lS", false }, |
| 1295 | { L"Hello, %s", false }, |
| 1296 | { L"%lc", true }, |
| 1297 | { L"%c", false }, |
| 1298 | { L"%C", false }, |
| 1299 | { L"%lC", false }, |
| 1300 | { L"%ls %s", false }, |
| 1301 | { L"%s %ls", false }, |
| 1302 | { L"%s %ls %s", false }, |
| 1303 | { L"%f", true }, |
| 1304 | { L"%f %F", false }, |
| 1305 | { L"%d %D", false }, |
| 1306 | { L"%o %O", false }, |
| 1307 | { L"%u %U", false }, |
| 1308 | { L"%f %d %o %u", true }, |
| 1309 | { L"%-8d (%02.1f%)", true }, |
| 1310 | { L"% 10s", false }, |
| 1311 | { L"% 10ls", true } |
| 1312 | }; |
| 1313 | for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { |
| 1314 | EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); |
| 1315 | } |
| 1316 | } |
license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 1317 | |