blob: 89cec22ce45e5a284a10ef3590f1afe4d0fbff05 [file] [log] [blame]
license.botf003cfe2008-08-24 09:55:55 +09001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit3f4a7322008-07-27 06:49:38 +09004
mmentovai@google.com38cabad2008-08-13 10:17:18 +09005#include <math.h>
mmentovai@google.comae034d12008-08-13 04:23:14 +09006#include <stdarg.h>
mmentovai@google.comec644e42008-08-13 03:48:58 +09007
deanm@google.comd6498452008-08-13 20:09:33 +09008#include <limits>
mmentovai@google.com38cabad2008-08-13 10:17:18 +09009#include <sstream>
10
initial.commit3f4a7322008-07-27 06:49:38 +090011#include "base/basictypes.h"
12#include "base/logging.h"
13#include "base/string_util.h"
14#include "testing/gtest/include/gtest/gtest.h"
15
16namespace {
17}
18
19static const struct trim_case {
20 const wchar_t* input;
21 const TrimPositions positions;
22 const wchar_t* output;
23 const TrimPositions return_value;
24} trim_cases[] = {
25 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
26 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
27 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
28 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
29 {L"", TRIM_ALL, L"", TRIM_NONE},
30 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
31 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
32 {L" ", TRIM_ALL, L"", TRIM_ALL},
33 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
34 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
35};
36
37static const struct trim_case_ascii {
38 const char* input;
39 const TrimPositions positions;
40 const char* output;
41 const TrimPositions return_value;
42} trim_cases_ascii[] = {
43 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
44 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
45 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
46 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
47 {"", TRIM_ALL, "", TRIM_NONE},
48 {" ", TRIM_LEADING, "", TRIM_LEADING},
49 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
50 {" ", TRIM_ALL, "", TRIM_ALL},
51 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
52 {"\x85Test String\xa0\x20", TRIM_ALL, "Test String", TRIM_ALL},
53};
54
55TEST(StringUtilTest, TrimWhitespace) {
56 std::wstring output; // Allow contents to carry over to next testcase
mmentovai@google.com38cabad2008-08-13 10:17:18 +090057 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +090058 const trim_case& value = trim_cases[i];
59 EXPECT_EQ(value.return_value,
60 TrimWhitespace(value.input, value.positions, &output));
61 EXPECT_EQ(value.output, output);
62 }
63
64 // Test that TrimWhitespace() can take the same string for input and output
65 output = L" This is a test \r\n";
66 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
67 EXPECT_EQ(L"This is a test", output);
68
69 // Once more, but with a string of whitespace
70 output = L" \r\n";
71 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
72 EXPECT_EQ(L"", output);
73
74 std::string output_ascii;
mmentovai@google.com38cabad2008-08-13 10:17:18 +090075 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +090076 const trim_case_ascii& value = trim_cases_ascii[i];
77 EXPECT_EQ(value.return_value,
78 TrimWhitespace(value.input, value.positions, &output_ascii));
79 EXPECT_EQ(value.output, output_ascii);
80 }
81}
82
83static const struct collapse_case {
84 const wchar_t* input;
85 const bool trim;
86 const wchar_t* output;
87} collapse_cases[] = {
88 {L" Google Video ", false, L"Google Video"},
89 {L"Google Video", false, L"Google Video"},
90 {L"", false, L""},
91 {L" ", false, L""},
92 {L"\t\rTest String\n", false, L"Test String"},
93 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
94 {L" Test \n \t String ", false, L"Test String"},
95 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
96 {L" Test String", false, L"Test String"},
97 {L"Test String ", false, L"Test String"},
98 {L"Test String", false, L"Test String"},
99 {L"", true, L""},
100 {L"\n", true, L""},
101 {L" \r ", true, L""},
102 {L"\nFoo", true, L"Foo"},
103 {L"\r Foo ", true, L"Foo"},
104 {L" Foo bar ", true, L"Foo bar"},
105 {L" \tFoo bar \n", true, L"Foo bar"},
106 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
107};
108
109TEST(StringUtilTest, CollapseWhitespace) {
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900110 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900111 const collapse_case& value = collapse_cases[i];
112 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
113 }
114}
115
116static const wchar_t* const kConvertRoundtripCases[] = {
117 L"Google Video",
118 // "网页 图片 资讯更多 »"
119 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
120 // "Παγκόσμιος Ιστός"
121 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
122 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
123 // "Поиск страниц на русском"
124 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
125 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
126 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
127 // "전체서비스"
128 L"\xc804\xccb4\xc11c\xbe44\xc2a4",
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900129
130 // Test characters that take more than 16 bits. This will depend on whether
131 // wchar_t is 16 or 32 bits.
132#if defined(WCHAR_T_IS_UTF16)
133 L"\xd800\xdf00",
initial.commit3f4a7322008-07-27 06:49:38 +0900134 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
135 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900136#elif defined(WCHAR_T_IS_UTF32)
137 L"\x10300",
138 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
139 L"\x11d40\x11d41\x11d42\x11d43\x11d44",
140#endif
initial.commit3f4a7322008-07-27 06:49:38 +0900141};
142
143TEST(StringUtilTest, ConvertUTF8AndWide) {
144 // we round-trip all the wide strings through UTF-8 to make sure everything
145 // agrees on the conversion. This uses the stream operators to test them
146 // simultaneously.
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900147 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900148 std::ostringstream utf8;
149 utf8 << WideToUTF8(kConvertRoundtripCases[i]);
150 std::wostringstream wide;
151 wide << UTF8ToWide(utf8.str());
152
153 EXPECT_EQ(kConvertRoundtripCases[i], wide.str());
154 }
155}
156
157TEST(StringUtilTest, ConvertUTF8AndWideEmptyString) {
158 // An empty std::wstring should be converted to an empty std::string,
159 // and vice versa.
160 std::wstring wempty;
161 std::string empty;
162 EXPECT_EQ(empty, WideToUTF8(wempty));
163 EXPECT_EQ(wempty, UTF8ToWide(empty));
164}
165
brettw@google.comfed55ab2008-08-08 00:29:49 +0900166TEST(StringUtilTest, ConvertUTF8ToWide) {
167 struct UTF8ToWideCase {
168 const char* utf8;
169 const wchar_t* wide;
170 bool success;
171 } convert_cases[] = {
172 // Regular UTF-8 input.
173 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
174 // Invalid Unicode code point.
175 {"\xef\xbf\xbfHello", L"Hello", false},
176 // Truncated UTF-8 sequence.
177 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},
178 // Truncated off the end.
179 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},
180 // Non-shortest-form UTF-8.
181 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},
182 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
183 {"\xed\xb0\x80", L"", false},
brettw@google.comfa499052008-08-08 05:27:57 +0900184 // Non-BMP character. The result will either be in UTF-16 or UTF-32.
brettw@google.come3c034a2008-08-08 03:31:40 +0900185#if defined(WCHAR_T_IS_UTF16)
brettw@google.comfed55ab2008-08-08 00:29:49 +0900186 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
brettw@google.come3c034a2008-08-08 03:31:40 +0900187#elif defined(WCHAR_T_IS_UTF32)
brettw@google.comfed55ab2008-08-08 00:29:49 +0900188 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
189#endif
190 };
191
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900192 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
brettw@google.comfed55ab2008-08-08 00:29:49 +0900193 std::wstring converted;
194 EXPECT_EQ(convert_cases[i].success,
195 UTF8ToWide(convert_cases[i].utf8,
196 strlen(convert_cases[i].utf8),
197 &converted));
198 std::wstring expected(convert_cases[i].wide);
199 EXPECT_EQ(expected, converted);
200 }
201
202 // Manually test an embedded NULL.
203 std::wstring converted;
204 EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted));
darin@google.comf3272802008-08-15 05:27:29 +0900205 ASSERT_EQ(3U, converted.length());
brettw@google.comfed55ab2008-08-08 00:29:49 +0900206 EXPECT_EQ(0, converted[0]);
207 EXPECT_EQ('Z', converted[1]);
208 EXPECT_EQ('\t', converted[2]);
209
210 // Make sure that conversion replaces, not appends.
211 EXPECT_TRUE(UTF8ToWide("B", 1, &converted));
darin@google.comf3272802008-08-15 05:27:29 +0900212 ASSERT_EQ(1U, converted.length());
brettw@google.comfed55ab2008-08-08 00:29:49 +0900213 EXPECT_EQ('B', converted[0]);
214}
215
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900216#if defined(WCHAR_T_IS_UTF16)
brettw@google.comfed55ab2008-08-08 00:29:49 +0900217// This test is only valid when wchar_t == UTF-16.
218TEST(StringUtilTest, ConvertUTF16ToUTF8) {
219 struct UTF16ToUTF8Case {
220 const wchar_t* utf16;
221 const char* utf8;
222 bool success;
223 } convert_cases[] = {
224 // Regular UTF-16 input.
225 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
226 // Test a non-BMP character.
227 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
228 // Invalid Unicode code point.
229 {L"\xffffHello", "Hello", false},
230 // The first character is a truncated UTF-16 character.
231 {L"\xd800\x597d", "\xe5\xa5\xbd", false},
232 // Truncated at the end.
233 {L"\x597d\xd800", "\xe5\xa5\xbd", false},
234 };
235
236 for (int i = 0; i < arraysize(convert_cases); i++) {
237 std::string converted;
238 EXPECT_EQ(convert_cases[i].success,
239 WideToUTF8(convert_cases[i].utf16,
240 wcslen(convert_cases[i].utf16),
241 &converted));
242 std::string expected(convert_cases[i].utf8);
243 EXPECT_EQ(expected, converted);
244 }
245}
246
brettw@google.come3c034a2008-08-08 03:31:40 +0900247#elif defined(WCHAR_T_IS_UTF32)
brettw@google.comfa499052008-08-08 05:27:57 +0900248// This test is only valid when wchar_t == UTF-32.
249TEST(StringUtilTest, ConvertUTF32ToUTF8) {
brettw@google.comfed55ab2008-08-08 00:29:49 +0900250 struct UTF8ToWideCase {
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900251 const wchar_t* utf32;
brettw@google.comfed55ab2008-08-08 00:29:49 +0900252 const char* utf8;
253 bool success;
254 } convert_cases[] = {
255 // Regular 16-bit input.
256 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
257 // Test a non-BMP character.
258 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
259 // Invalid Unicode code points.
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900260 {L"\xffffHello", "Hello", false},
261 {L"\xfffffffHello", "Hello", false},
brettw@google.comfed55ab2008-08-08 00:29:49 +0900262 // The first character is a truncated UTF-16 character.
263 {L"\xd800\x597d", "\xe5\xa5\xbd", false},
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900264 };
brettw@google.comfed55ab2008-08-08 00:29:49 +0900265
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900266 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
brettw@google.comfed55ab2008-08-08 00:29:49 +0900267 std::string converted;
268 EXPECT_EQ(convert_cases[i].success,
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900269 WideToUTF8(convert_cases[i].utf32,
270 wcslen(convert_cases[i].utf32),
brettw@google.comfed55ab2008-08-08 00:29:49 +0900271 &converted));
272 std::string expected(convert_cases[i].utf8);
273 EXPECT_EQ(expected, converted);
274 }
275}
brettw@google.come3c034a2008-08-08 03:31:40 +0900276#endif // defined(WCHAR_T_IS_UTF32)
brettw@google.comfed55ab2008-08-08 00:29:49 +0900277
initial.commit3f4a7322008-07-27 06:49:38 +0900278TEST(StringUtilTest, ConvertMultiString) {
279 static wchar_t wmulti[] = {
280 L'f', L'o', L'o', L'\0',
281 L'b', L'a', L'r', L'\0',
282 L'b', L'a', L'z', L'\0',
283 L'\0'
284 };
285 static char multi[] = {
286 'f', 'o', 'o', '\0',
287 'b', 'a', 'r', '\0',
288 'b', 'a', 'z', '\0',
289 '\0'
290 };
291 std::wstring wmultistring;
292 memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti));
293 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length());
294 std::string expected;
295 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));
296 EXPECT_EQ(arraysize(multi) - 1, expected.length());
297 const std::string& converted = WideToUTF8(wmultistring);
298 EXPECT_EQ(arraysize(multi) - 1, converted.length());
299 EXPECT_EQ(expected, converted);
300}
301
302TEST(StringUtilTest, ConvertCodepageUTF8) {
303 // Make sure WideToCodepage works like WideToUTF8.
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900304 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900305 std::string expected(WideToUTF8(kConvertRoundtripCases[i]));
306 std::string utf8;
307 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,
308 OnStringUtilConversionError::SKIP, &utf8));
309 EXPECT_EQ(expected, utf8);
310 }
311}
312
313TEST(StringUtilTest, ConvertBetweenCodepageAndWide) {
314 static const struct {
315 const char* codepage_name;
316 const char* encoded;
317 OnStringUtilConversionError::Type on_error;
318 bool success;
319 const wchar_t* wide;
320 } kConvertCodepageCases[] = {
321 // Test a case where the input can no be decoded, using both SKIP and FAIL
322 // error handling rules. "A7 41" is valid, but "A6" isn't.
323 {"big5",
324 "\xA7\x41\xA6",
325 OnStringUtilConversionError::FAIL,
326 false,
327 L""},
328 {"big5",
329 "\xA7\x41\xA6",
330 OnStringUtilConversionError::SKIP,
331 true,
332 L"\x4F60"},
333 // Arabic (ISO-8859)
334 {"iso-8859-6",
335 "\xC7\xEE\xE4\xD3\xF1\xEE\xE4\xC7\xE5\xEF" " "
336 "\xD9\xEE\xE4\xEE\xEA\xF2\xE3\xEF\xE5\xF2",
337 OnStringUtilConversionError::FAIL,
338 true,
339 L"\x0627\x064E\x0644\x0633\x0651\x064E\x0644\x0627\x0645\x064F" L" "
340 L"\x0639\x064E\x0644\x064E\x064A\x0652\x0643\x064F\x0645\x0652"},
341 // Chinese Simplified (GB2312)
342 {"gb2312",
343 "\xC4\xE3\xBA\xC3",
344 OnStringUtilConversionError::FAIL,
345 true,
346 L"\x4F60\x597D"},
347 // Chinese Traditional (BIG5)
348 {"big5",
349 "\xA7\x41\xA6\x6E",
350 OnStringUtilConversionError::FAIL,
351 true,
352 L"\x4F60\x597D"},
353 // Greek (ISO-8859)
354 {"iso-8859-7",
355 "\xE3\xE5\xE9\xDC" " " "\xF3\xEF\xF5",
356 OnStringUtilConversionError::FAIL,
357 true,
358 L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"},
359 // Hebrew (Windows)
360 {"windows-1255", /* to be replaced with "iso-8859-8-I"? */
361 "\xF9\xD1\xC8\xEC\xE5\xC9\xED",
362 OnStringUtilConversionError::FAIL,
363 true,
364 L"\x05E9\x05C1\x05B8\x05DC\x05D5\x05B9\x05DD"},
365 // Hindi Devanagari (ISCII)
366 {"iscii-dev",
367 "\xEF\x42" "\xC6\xCC\xD7\xE8\xB3\xDA\xCF",
368 OnStringUtilConversionError::FAIL,
369 true,
370 L"\x0928\x092E\x0938\x094D\x0915\x093E\x0930"},
371 // Korean (EUC)
372 {"euc-kr",
373 "\xBE\xC8\xB3\xE7\xC7\xCF\xBC\xBC\xBF\xE4",
374 OnStringUtilConversionError::FAIL,
375 true,
376 L"\xC548\xB155\xD558\xC138\xC694"},
377 // Japanese (EUC)
378 {"euc-jp",
379 "\xA4\xB3\xA4\xF3\xA4\xCB\xA4\xC1\xA4\xCF",
380 OnStringUtilConversionError::FAIL,
381 true,
382 L"\x3053\x3093\x306B\x3061\x306F"},
383 // Japanese (ISO-2022)
384 {"iso-2022-jp",
385 "\x1B\x24\x42" "\x24\x33\x24\x73\x24\x4B\x24\x41\x24\x4F" "\x1B\x28\x42",
386 OnStringUtilConversionError::FAIL,
387 true,
388 L"\x3053\x3093\x306B\x3061\x306F"},
389 // Japanese (Shift-JIS)
390 {"sjis",
391 "\x82\xB1\x82\xF1\x82\xC9\x82\xBF\x82\xCD",
392 OnStringUtilConversionError::FAIL,
393 true,
394 L"\x3053\x3093\x306B\x3061\x306F"},
395 // Russian (KOI8)
396 {"koi8-r",
397 "\xDA\xC4\xD2\xC1\xD7\xD3\xD4\xD7\xD5\xCA\xD4\xC5",
398 OnStringUtilConversionError::FAIL,
399 true,
400 L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"
401 L"\x0443\x0439\x0442\x0435"},
402 // Thai (ISO-8859)
403 {"windows-874", /* to be replaced with "iso-8859-11". */
404 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",
405 OnStringUtilConversionError::FAIL,
406 true,
407 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"
408 L"\x0E04\x0E23\x0e31\x0E1A"},
409 };
410
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900411 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900412 std::wstring wide;
413 bool success = CodepageToWide(kConvertCodepageCases[i].encoded,
414 kConvertCodepageCases[i].codepage_name,
415 kConvertCodepageCases[i].on_error,
416 &wide);
417 EXPECT_EQ(kConvertCodepageCases[i].success, success);
418 EXPECT_EQ(kConvertCodepageCases[i].wide, wide);
419
420 // When decoding was successful and nothing was skipped, we also check the
421 // reverse conversion.
422 if (success &&
423 kConvertCodepageCases[i].on_error ==
424 OnStringUtilConversionError::FAIL) {
425 std::string encoded;
426 success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name,
427 kConvertCodepageCases[i].on_error, &encoded);
428 EXPECT_EQ(kConvertCodepageCases[i].success, success);
429 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
430 }
431 }
432
433 // The above cases handled codepage->wide errors, but not wide->codepage.
434 // Test that here.
435 std::string encoded("Temp data"); // Make sure the string gets cleared.
436
437 // First test going to an encoding that can not represent that character.
438 EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
439 OnStringUtilConversionError::FAIL, &encoded));
440 EXPECT_TRUE(encoded.empty());
441 EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
442 OnStringUtilConversionError::SKIP, &encoded));
443 EXPECT_STREQ("Chinese", encoded.c_str());
444
brettw@google.come3c034a2008-08-08 03:31:40 +0900445#if defined(WCHAR_T_IS_UTF16)
initial.commit3f4a7322008-07-27 06:49:38 +0900446 // When we're in UTF-16 mode, test an invalid UTF-16 character in the input.
447 EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1",
448 OnStringUtilConversionError::FAIL, &encoded));
449 EXPECT_TRUE(encoded.empty());
450 EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1",
451 OnStringUtilConversionError::SKIP, &encoded));
452 EXPECT_STREQ("az", encoded.c_str());
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900453#endif // WCHAR_T_IS_UTF16
initial.commit3f4a7322008-07-27 06:49:38 +0900454
455 // Invalid characters should fail.
456 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",
457 OnStringUtilConversionError::SKIP, &encoded));
458 EXPECT_STREQ("az", encoded.c_str());
459
460 // Invalid codepages should fail.
461 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",
462 OnStringUtilConversionError::SKIP, &encoded));
463}
464
465TEST(StringUtilTest, ConvertASCII) {
466 static const char* char_cases[] = {
467 "Google Video",
468 "Hello, world\n",
469 "0123ABCDwxyz \a\b\t\r\n!+,.~"
470 };
471
472 static const wchar_t* const wchar_cases[] = {
473 L"Google Video",
474 L"Hello, world\n",
475 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
476 };
477
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900478 for (size_t i = 0; i < arraysize(char_cases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900479 EXPECT_TRUE(IsStringASCII(char_cases[i]));
480 std::wstring wide = ASCIIToWide(char_cases[i]);
481 EXPECT_EQ(wchar_cases[i], wide);
482
483 EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
484 std::string ascii = WideToASCII(wchar_cases[i]);
485 EXPECT_EQ(char_cases[i], ascii);
486 }
487
488 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
489 EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
490
491 // Convert empty strings.
492 std::wstring wempty;
493 std::string empty;
494 EXPECT_EQ(empty, WideToASCII(wempty));
495 EXPECT_EQ(wempty, ASCIIToWide(empty));
mmentovai@google.com93285682008-08-06 07:46:15 +0900496
497 // Convert strings with an embedded NUL character.
498 const char chars_with_nul[] = "test\0string";
499 const int length_with_nul = arraysize(chars_with_nul) - 1;
500 std::string string_with_nul(chars_with_nul, length_with_nul);
501 std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900502 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
503 wide_with_nul.length());
mmentovai@google.com93285682008-08-06 07:46:15 +0900504 std::string narrow_with_nul = WideToASCII(wide_with_nul);
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900505 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
506 narrow_with_nul.length());
mmentovai@google.com93285682008-08-06 07:46:15 +0900507 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
initial.commit3f4a7322008-07-27 06:49:38 +0900508}
509
510static const struct {
511 const wchar_t* src_w;
512 const char* src_a;
513 const char* dst;
514} lowercase_cases[] = {
515 {L"FoO", "FoO", "foo"},
516 {L"foo", "foo", "foo"},
517 {L"FOO", "FOO", "foo"},
518};
519
520TEST(StringUtilTest, LowerCaseEqualsASCII) {
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900521 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900522 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
523 lowercase_cases[i].dst));
524 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
525 lowercase_cases[i].dst));
526 }
527}
528
529TEST(StringUtilTest, GetByteDisplayUnits) {
530 static const struct {
531 int64 bytes;
532 DataUnits expected;
533 } cases[] = {
534 {0, DATA_UNITS_BYTE},
535 {512, DATA_UNITS_BYTE},
536 {10*1024, DATA_UNITS_KILOBYTE},
537 {10*1024*1024, DATA_UNITS_MEGABYTE},
538 {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE},
539 {~(1LL<<63), DATA_UNITS_GIGABYTE},
540#ifdef NDEBUG
541 {-1, DATA_UNITS_BYTE},
542#endif
543 };
544
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900545 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
initial.commit3f4a7322008-07-27 06:49:38 +0900546 EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
547}
548
549TEST(StringUtilTest, FormatBytes) {
550 static const struct {
551 int64 bytes;
552 DataUnits units;
553 const wchar_t* expected;
554 const wchar_t* expected_with_units;
555 } cases[] = {
556 {0, DATA_UNITS_BYTE, L"0", L"0 B"},
557 {512, DATA_UNITS_BYTE, L"512", L"512 B"},
558 {512, DATA_UNITS_KILOBYTE, L"0.5", L"0.5 kB"},
559 {1024*1024, DATA_UNITS_KILOBYTE, L"1024", L"1024 kB"},
560 {1024*1024, DATA_UNITS_MEGABYTE, L"1", L"1 MB"},
561 {1024*1024*1024, DATA_UNITS_GIGABYTE, L"1", L"1 GB"},
562 {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"},
563 {~(1LL<<63), DATA_UNITS_GIGABYTE, L"8589934592", L"8589934592 GB"},
564 // Make sure the first digit of the fractional part works.
565 {1024*1024 + 103, DATA_UNITS_KILOBYTE, L"1024.1", L"1024.1 kB"},
566 {1024*1024 + 205 * 1024, DATA_UNITS_MEGABYTE, L"1.2", L"1.2 MB"},
567 {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIGABYTE,
568 L"1.9", L"1.9 GB"},
569 {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"},
570#ifdef NDEBUG
571 {-1, DATA_UNITS_BYTE, L"", L""},
572#endif
573 };
574
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900575 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900576 EXPECT_EQ(cases[i].expected,
577 FormatBytes(cases[i].bytes, cases[i].units, false));
578 EXPECT_EQ(cases[i].expected_with_units,
579 FormatBytes(cases[i].bytes, cases[i].units, true));
580 }
581}
582
583TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
584 static const struct {
evanm@google.come41d3b32008-08-15 10:04:11 +0900585 const wchar_t* str;
initial.commit3f4a7322008-07-27 06:49:38 +0900586 std::wstring::size_type start_offset;
evanm@google.come41d3b32008-08-15 10:04:11 +0900587 const wchar_t* find_this;
588 const wchar_t* replace_with;
589 const wchar_t* expected;
initial.commit3f4a7322008-07-27 06:49:38 +0900590 } cases[] = {
591 {L"aaa", 0, L"a", L"b", L"bbb"},
592 {L"abb", 0, L"ab", L"a", L"ab"},
593 {L"Removing some substrings inging", 0, L"ing", L"", L"Remov some substrs "},
594 {L"Not found", 0, L"x", L"0", L"Not found"},
595 {L"Not found again", 5, L"x", L"0", L"Not found again"},
596 {L" Making it much longer ", 0, L" ", L"Four score and seven years ago",
597 L"Four score and seven years agoMakingFour score and seven years agoit"
598 L"Four score and seven years agomuchFour score and seven years agolonger"
599 L"Four score and seven years ago"},
600 {L"Invalid offset", 9999, L"t", L"foobar", L"Invalid offset"},
601 {L"Replace me only me once", 9, L"me ", L"", L"Replace me only once"},
602 {L"abababab", 2, L"ab", L"c", L"abccc"},
603 };
604
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900605 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
initial.commit3f4a7322008-07-27 06:49:38 +0900606 std::wstring str(cases[i].str);
607 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
608 cases[i].find_this, cases[i].replace_with);
609 EXPECT_EQ(cases[i].expected, str);
610 }
611}
612
deanm@google.comd6498452008-08-13 20:09:33 +0900613namespace {
614
615template <typename INT>
616struct IntToStringTest {
617 INT num;
618 const char* sexpected;
619 const char* uexpected;
620};
621
622}
623
initial.commit3f4a7322008-07-27 06:49:38 +0900624TEST(StringUtilTest, IntToString) {
deanm@google.comd6498452008-08-13 20:09:33 +0900625
626 static const IntToStringTest<int> int_tests[] = {
627 { 0, "0", "0" },
628 { -1, "-1", "4294967295" },
629 { std::numeric_limits<int>::max(), "2147483647", "2147483647" },
630 { std::numeric_limits<int>::min(), "-2147483648", "2147483648" },
631 };
632 static const IntToStringTest<int64> int64_tests[] = {
633 { 0, "0", "0" },
634 { -1, "-1", "18446744073709551615" },
635 { std::numeric_limits<int64>::max(),
636 "9223372036854775807",
637 "9223372036854775807", },
638 { std::numeric_limits<int64>::min(),
639 "-9223372036854775808",
640 "9223372036854775808" },
initial.commit3f4a7322008-07-27 06:49:38 +0900641 };
642
darin@google.comf3272802008-08-15 05:27:29 +0900643 for (size_t i = 0; i < arraysize(int_tests); ++i) {
deanm@google.comd6498452008-08-13 20:09:33 +0900644 const IntToStringTest<int>* test = &int_tests[i];
645 EXPECT_EQ(IntToString(test->num), test->sexpected);
646 EXPECT_EQ(IntToWString(test->num), UTF8ToWide(test->sexpected));
647 EXPECT_EQ(UintToString(test->num), test->uexpected);
648 EXPECT_EQ(UintToWString(test->num), UTF8ToWide(test->uexpected));
649 }
darin@google.comf3272802008-08-15 05:27:29 +0900650 for (size_t i = 0; i < arraysize(int64_tests); ++i) {
deanm@google.comd6498452008-08-13 20:09:33 +0900651 const IntToStringTest<int64>* test = &int64_tests[i];
652 EXPECT_EQ(Int64ToString(test->num), test->sexpected);
653 EXPECT_EQ(Int64ToWString(test->num), UTF8ToWide(test->sexpected));
654 EXPECT_EQ(Uint64ToString(test->num), test->uexpected);
655 EXPECT_EQ(Uint64ToWString(test->num), UTF8ToWide(test->uexpected));
656 }
initial.commit3f4a7322008-07-27 06:49:38 +0900657}
658
659TEST(StringUtilTest, Uint64ToString) {
660 static const struct {
661 uint64 input;
662 std::string output;
663 } cases[] = {
664 {0, "0"},
665 {42, "42"},
666 {INT_MAX, "2147483647"},
667 {kuint64max, "18446744073709551615"},
668 };
669
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900670 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
initial.commit3f4a7322008-07-27 06:49:38 +0900671 EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input));
672}
673
mmentovai@google.com93285682008-08-06 07:46:15 +0900674TEST(StringUtilTest, StringToInt) {
675 static const struct {
676 std::string input;
677 int output;
678 bool success;
679 } cases[] = {
680 {"0", 0, true},
681 {"42", 42, true},
682 {"-2147483648", INT_MIN, true},
683 {"2147483647", INT_MAX, true},
684 {"", 0, false},
685 {" 42", 42, false},
ericroman@google.com491d8732008-08-09 07:03:26 +0900686 {"42 ", 42, false},
mmentovai@google.com93285682008-08-06 07:46:15 +0900687 {"\t\n\v\f\r 42", 42, false},
688 {"blah42", 0, false},
689 {"42blah", 42, false},
690 {"blah42blah", 0, false},
691 {"-273.15", -273, false},
692 {"+98.6", 98, false},
693 {"--123", 0, false},
694 {"++123", 0, false},
695 {"-+123", 0, false},
696 {"+-123", 0, false},
697 {"-", 0, false},
698 {"-2147483649", INT_MIN, false},
699 {"-99999999999", INT_MIN, false},
700 {"2147483648", INT_MAX, false},
701 {"99999999999", INT_MAX, false},
702 };
703
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900704 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
mmentovai@google.com93285682008-08-06 07:46:15 +0900705 EXPECT_EQ(cases[i].output, StringToInt(cases[i].input));
706 int output;
707 EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output));
708 EXPECT_EQ(cases[i].output, output);
709
710 std::wstring wide_input = ASCIIToWide(cases[i].input);
711 EXPECT_EQ(cases[i].output, StringToInt(wide_input));
712 EXPECT_EQ(cases[i].success, StringToInt(wide_input, &output));
713 EXPECT_EQ(cases[i].output, output);
714 }
715
716 // One additional test to verify that conversion of numbers in strings with
717 // embedded NUL characters. The NUL and extra data after it should be
718 // interpreted as junk after the number.
719 const char input[] = "6\06";
720 std::string input_string(input, arraysize(input) - 1);
721 int output;
722 EXPECT_FALSE(StringToInt(input_string, &output));
723 EXPECT_EQ(6, output);
724
725 std::wstring wide_input = ASCIIToWide(input_string);
726 EXPECT_FALSE(StringToInt(wide_input, &output));
727 EXPECT_EQ(6, output);
728}
729
730TEST(StringUtilTest, StringToInt64) {
731 static const struct {
732 std::string input;
733 int64 output;
734 bool success;
735 } cases[] = {
736 {"0", 0, true},
737 {"42", 42, true},
738 {"-2147483648", INT_MIN, true},
739 {"2147483647", INT_MAX, true},
740 {"-2147483649", GG_INT64_C(-2147483649), true},
741 {"-99999999999", GG_INT64_C(-99999999999), true},
742 {"2147483648", GG_INT64_C(2147483648), true},
743 {"99999999999", GG_INT64_C(99999999999), true},
744 {"9223372036854775807", kint64max, true},
745 {"-9223372036854775808", kint64min, true},
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900746 {"09", 9, true},
747 {"-09", -9, true},
mmentovai@google.com93285682008-08-06 07:46:15 +0900748 {"", 0, false},
749 {" 42", 42, false},
ericroman@google.com491d8732008-08-09 07:03:26 +0900750 {"42 ", 42, false},
mmentovai@google.com93285682008-08-06 07:46:15 +0900751 {"\t\n\v\f\r 42", 42, false},
752 {"blah42", 0, false},
753 {"42blah", 42, false},
754 {"blah42blah", 0, false},
755 {"-273.15", -273, false},
756 {"+98.6", 98, false},
757 {"--123", 0, false},
758 {"++123", 0, false},
759 {"-+123", 0, false},
760 {"+-123", 0, false},
761 {"-", 0, false},
762 {"-9223372036854775809", kint64min, false},
763 {"-99999999999999999999", kint64min, false},
764 {"9223372036854775808", kint64max, false},
765 {"99999999999999999999", kint64max, false},
766 };
767
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900768 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
mmentovai@google.com93285682008-08-06 07:46:15 +0900769 EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input));
770 int64 output;
771 EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output));
772 EXPECT_EQ(cases[i].output, output);
773
774 std::wstring wide_input = ASCIIToWide(cases[i].input);
775 EXPECT_EQ(cases[i].output, StringToInt64(wide_input));
776 EXPECT_EQ(cases[i].success, StringToInt64(wide_input, &output));
777 EXPECT_EQ(cases[i].output, output);
778 }
779
780 // One additional test to verify that conversion of numbers in strings with
781 // embedded NUL characters. The NUL and extra data after it should be
782 // interpreted as junk after the number.
783 const char input[] = "6\06";
784 std::string input_string(input, arraysize(input) - 1);
785 int64 output;
786 EXPECT_FALSE(StringToInt64(input_string, &output));
787 EXPECT_EQ(6, output);
788
789 std::wstring wide_input = ASCIIToWide(input_string);
790 EXPECT_FALSE(StringToInt64(wide_input, &output));
791 EXPECT_EQ(6, output);
792}
793
794TEST(StringUtilTest, HexStringToInt) {
795 static const struct {
796 std::string input;
797 int output;
798 bool success;
799 } cases[] = {
800 {"0", 0, true},
801 {"42", 66, true},
802 {"-42", -66, true},
803 {"+42", 66, true},
804 {"7fffffff", INT_MAX, true},
805 {"80000000", INT_MIN, true},
806 {"ffffffff", -1, true},
807 {"DeadBeef", 0xdeadbeef, true},
808 {"0x42", 66, true},
809 {"-0x42", -66, true},
810 {"+0x42", 66, true},
811 {"0x7fffffff", INT_MAX, true},
812 {"0x80000000", INT_MIN, true},
813 {"0xffffffff", -1, true},
814 {"0XDeadBeef", 0xdeadbeef, true},
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900815 {"0x0f", 15, true},
816 {"0f", 15, true},
mmentovai@google.com93285682008-08-06 07:46:15 +0900817 {" 45", 0x45, false},
818 {"\t\n\v\f\r 0x45", 0x45, false},
ericroman@google.com491d8732008-08-09 07:03:26 +0900819 {" 45", 0x45, false},
820 {"45 ", 0x45, false},
mmentovai@google.com93285682008-08-06 07:46:15 +0900821 {"efgh", 0xef, false},
822 {"0xefgh", 0xef, false},
823 {"hgfe", 0, false},
824 {"100000000", -1, false}, // don't care about |output|, just |success|
825 {"-", 0, false},
826 {"", 0, false},
827 };
828
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900829 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
mmentovai@google.com93285682008-08-06 07:46:15 +0900830 EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input));
831 int output;
832 EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output));
833 EXPECT_EQ(cases[i].output, output);
834
835 std::wstring wide_input = ASCIIToWide(cases[i].input);
836 EXPECT_EQ(cases[i].output, HexStringToInt(wide_input));
837 EXPECT_EQ(cases[i].success, HexStringToInt(wide_input, &output));
838 EXPECT_EQ(cases[i].output, output);
839 }
840 // One additional test to verify that conversion of numbers in strings with
841 // embedded NUL characters. The NUL and extra data after it should be
842 // interpreted as junk after the number.
843 const char input[] = "0xc0ffee\09";
844 std::string input_string(input, arraysize(input) - 1);
845 int output;
846 EXPECT_FALSE(HexStringToInt(input_string, &output));
847 EXPECT_EQ(0xc0ffee, output);
848
849 std::wstring wide_input = ASCIIToWide(input_string);
850 EXPECT_FALSE(HexStringToInt(wide_input, &output));
851 EXPECT_EQ(0xc0ffee, output);
852}
853
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900854TEST(StringUtilTest, StringToDouble) {
855 static const struct {
856 std::string input;
857 double output;
858 bool success;
859 } cases[] = {
860 {"0", 0.0, true},
861 {"42", 42.0, true},
862 {"-42", -42.0, true},
863 {"123.45", 123.45, true},
864 {"-123.45", -123.45, true},
865 {"+123.45", 123.45, true},
866 {"2.99792458e8", 299792458.0, true},
867 {"149597870.691E+3", 149597870691.0, true},
868 {"6.", 6.0, true},
869 {"9e99999999999999999999", HUGE_VAL, false},
870 {"-9e99999999999999999999", -HUGE_VAL, false},
871 {"1e-2", 0.01, true},
ericroman@google.com491d8732008-08-09 07:03:26 +0900872 {" 1e-2", 0.01, false},
873 {"1e-2 ", 0.01, false},
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900874 {"-1E-7", -0.0000001, true},
875 {"01e02", 100, true},
876 {"2.3e15", 2.3e15, true},
877 {"\t\n\v\f\r -123.45e2", -12345.0, false},
878 {"+123 e4", 123.0, false},
879 {"123e ", 123.0, false},
880 {"123e", 123.0, false},
881 {" 2.99", 2.99, false},
882 {"1e3.4", 1000.0, false},
883 {"nothing", 0.0, false},
884 {"-", 0.0, false},
885 {"+", 0.0, false},
886 {"", 0.0, false},
887 };
888
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900889 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900890 EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(cases[i].input));
891 double output;
892 EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output));
893 EXPECT_DOUBLE_EQ(cases[i].output, output);
894
895 std::wstring wide_input = ASCIIToWide(cases[i].input);
896 EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(wide_input));
897 EXPECT_EQ(cases[i].success, StringToDouble(wide_input, &output));
898 EXPECT_DOUBLE_EQ(cases[i].output, output);
899 }
900
901 // One additional test to verify that conversion of numbers in strings with
902 // embedded NUL characters. The NUL and extra data after it should be
903 // interpreted as junk after the number.
904 const char input[] = "3.14\0159";
905 std::string input_string(input, arraysize(input) - 1);
906 double output;
907 EXPECT_FALSE(StringToDouble(input_string, &output));
908 EXPECT_DOUBLE_EQ(3.14, output);
909
910 std::wstring wide_input = ASCIIToWide(input_string);
911 EXPECT_FALSE(StringToDouble(wide_input, &output));
912 EXPECT_DOUBLE_EQ(3.14, output);
913}
914
initial.commit3f4a7322008-07-27 06:49:38 +0900915// This checks where we can use the assignment operator for a va_list. We need
916// a way to do this since Visual C doesn't support va_copy, but assignment on
917// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
918// capability.
919static void VariableArgsFunc(const char* format, ...) {
920 va_list org;
921 va_start(org, format);
922
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900923 va_list dup;
924 base::va_copy(dup, org);
initial.commit3f4a7322008-07-27 06:49:38 +0900925 int i1 = va_arg(org, int);
926 int j1 = va_arg(org, int);
927 char* s1 = va_arg(org, char*);
928 double d1 = va_arg(org, double);
929 va_end(org);
930
931 int i2 = va_arg(dup, int);
932 int j2 = va_arg(dup, int);
933 char* s2 = va_arg(dup, char*);
934 double d2 = va_arg(dup, double);
935
936 EXPECT_EQ(i1, i2);
937 EXPECT_EQ(j1, j2);
938 EXPECT_STREQ(s1, s2);
939 EXPECT_EQ(d1, d2);
940
941 va_end(dup);
942}
943
944TEST(StringUtilTest, VAList) {
945 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
946}
947
948TEST(StringUtilTest, StringPrintfEmptyFormat) {
949 const char* empty = "";
950 EXPECT_EQ("", StringPrintf(empty));
951 EXPECT_EQ("", StringPrintf("%s", ""));
952}
953
954TEST(StringUtilTest, StringPrintfMisc) {
955 EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
mmentovai@google.com8ae0c2c2008-08-14 10:25:32 +0900956 EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w'));
initial.commit3f4a7322008-07-27 06:49:38 +0900957}
958
959TEST(StringUtilTest, StringAppendfStringEmptyParam) {
960 std::string value("Hello");
961 StringAppendF(&value, "");
962 EXPECT_EQ("Hello", value);
963
964 std::wstring valuew(L"Hello");
965 StringAppendF(&valuew, L"");
966 EXPECT_EQ(L"Hello", valuew);
967}
968
969TEST(StringUtilTest, StringAppendfEmptyString) {
970 std::string value("Hello");
971 StringAppendF(&value, "%s", "");
972 EXPECT_EQ("Hello", value);
973
974 std::wstring valuew(L"Hello");
mmentovai@google.com8ae0c2c2008-08-14 10:25:32 +0900975 StringAppendF(&valuew, L"%ls", L"");
initial.commit3f4a7322008-07-27 06:49:38 +0900976 EXPECT_EQ(L"Hello", valuew);
977}
978
979TEST(StringUtilTest, StringAppendfString) {
980 std::string value("Hello");
981 StringAppendF(&value, " %s", "World");
982 EXPECT_EQ("Hello World", value);
983
984 std::wstring valuew(L"Hello");
mmentovai@google.com38cabad2008-08-13 10:17:18 +0900985 StringAppendF(&valuew, L" %ls", L"World");
initial.commit3f4a7322008-07-27 06:49:38 +0900986 EXPECT_EQ(L"Hello World", valuew);
987}
988
989TEST(StringUtilTest, StringAppendfInt) {
990 std::string value("Hello");
991 StringAppendF(&value, " %d", 123);
992 EXPECT_EQ("Hello 123", value);
993
994 std::wstring valuew(L"Hello");
995 StringAppendF(&valuew, L" %d", 123);
996 EXPECT_EQ(L"Hello 123", valuew);
997}
998
999// Make sure that lengths exactly around the initial buffer size are handled
1000// correctly.
1001TEST(StringUtilTest, StringPrintfBounds) {
1002 const int src_len = 1026;
1003 char src[src_len];
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001004 for (size_t i = 0; i < arraysize(src); i++)
initial.commit3f4a7322008-07-27 06:49:38 +09001005 src[i] = 'A';
1006
1007 wchar_t srcw[src_len];
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001008 for (size_t i = 0; i < arraysize(srcw); i++)
initial.commit3f4a7322008-07-27 06:49:38 +09001009 srcw[i] = 'A';
1010
1011 for (int i = 1; i < 3; i++) {
1012 src[src_len - i] = 0;
1013 std::string out;
1014 SStringPrintf(&out, "%s", src);
1015 EXPECT_STREQ(src, out.c_str());
1016
1017 srcw[src_len - i] = 0;
1018 std::wstring outw;
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001019 SStringPrintf(&outw, L"%ls", srcw);
initial.commit3f4a7322008-07-27 06:49:38 +09001020 EXPECT_STREQ(srcw, outw.c_str());
1021 }
1022}
1023
1024// Test very large sprintfs that will cause the buffer to grow.
1025TEST(StringUtilTest, Grow) {
1026 char src[1026];
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001027 for (size_t i = 0; i < arraysize(src); i++)
initial.commit3f4a7322008-07-27 06:49:38 +09001028 src[i] = 'A';
1029 src[1025] = 0;
1030
evanm@google.come41d3b32008-08-15 10:04:11 +09001031 const char* fmt = "%sB%sB%sB%sB%sB%sB%s";
initial.commit3f4a7322008-07-27 06:49:38 +09001032
1033 std::string out;
1034 SStringPrintf(&out, fmt, src, src, src, src, src, src, src);
1035
1036 char* ref = new char[320000];
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001037#if defined(OS_WIN)
initial.commit3f4a7322008-07-27 06:49:38 +09001038 sprintf_s(ref, 320000, fmt, src, src, src, src, src, src, src);
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001039#elif defined(OS_POSIX)
1040 snprintf(ref, 320000, fmt, src, src, src, src, src, src, src);
1041#endif
initial.commit3f4a7322008-07-27 06:49:38 +09001042
1043 EXPECT_STREQ(ref, out.c_str());
deanm@google.com78715b72008-08-19 23:02:18 +09001044 delete[] ref;
initial.commit3f4a7322008-07-27 06:49:38 +09001045}
1046
1047// Test the boundary condition for the size of the string_util's
1048// internal buffer.
1049TEST(StringUtilTest, GrowBoundary) {
1050 const int string_util_buf_len = 1024;
1051 // Our buffer should be one larger than the size of StringAppendVT's stack
1052 // buffer.
1053 const int buf_len = string_util_buf_len + 1;
1054 char src[buf_len + 1]; // Need extra one for NULL-terminator.
1055 for (int i = 0; i < buf_len; ++i)
1056 src[i] = 'a';
1057 src[buf_len] = 0;
1058
1059 std::string out;
1060 SStringPrintf(&out, "%s", src);
1061
1062 EXPECT_STREQ(src, out.c_str());
1063}
1064
evanm@google.come41d3b32008-08-15 10:04:11 +09001065// TODO(evanm): what's the proper cross-platform test here?
1066#if defined(OS_WIN)
initial.commit3f4a7322008-07-27 06:49:38 +09001067// sprintf in Visual Studio fails when given U+FFFF. This tests that the
1068// failure case is gracefuly handled.
1069TEST(StringUtilTest, Invalid) {
1070 wchar_t invalid[2];
1071 invalid[0] = 0xffff;
1072 invalid[1] = 0;
1073
1074 std::wstring out;
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001075 SStringPrintf(&out, L"%ls", invalid);
initial.commit3f4a7322008-07-27 06:49:38 +09001076 EXPECT_STREQ(L"", out.c_str());
1077}
evanm@google.come41d3b32008-08-15 10:04:11 +09001078#endif
initial.commit3f4a7322008-07-27 06:49:38 +09001079
1080// Test for SplitString
1081TEST(StringUtilTest, SplitString) {
1082 std::vector<std::wstring> r;
1083
1084 SplitString(L"a,b,c", L',', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001085 EXPECT_EQ(3U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001086 EXPECT_EQ(r[0], L"a");
1087 EXPECT_EQ(r[1], L"b");
1088 EXPECT_EQ(r[2], L"c");
1089 r.clear();
1090
1091 SplitString(L"a, b, c", L',', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001092 EXPECT_EQ(3U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001093 EXPECT_EQ(r[0], L"a");
1094 EXPECT_EQ(r[1], L"b");
1095 EXPECT_EQ(r[2], L"c");
1096 r.clear();
1097
1098 SplitString(L"a,,c", L',', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001099 EXPECT_EQ(3U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001100 EXPECT_EQ(r[0], L"a");
1101 EXPECT_EQ(r[1], L"");
1102 EXPECT_EQ(r[2], L"c");
1103 r.clear();
1104
1105 SplitString(L"", L'*', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001106 EXPECT_EQ(1U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001107 EXPECT_EQ(r[0], L"");
1108 r.clear();
1109
1110 SplitString(L"foo", L'*', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001111 EXPECT_EQ(1U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001112 EXPECT_EQ(r[0], L"foo");
1113 r.clear();
1114
1115 SplitString(L"foo ,", L',', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001116 EXPECT_EQ(2U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001117 EXPECT_EQ(r[0], L"foo");
1118 EXPECT_EQ(r[1], L"");
1119 r.clear();
1120
1121 SplitString(L",", L',', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001122 EXPECT_EQ(2U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001123 EXPECT_EQ(r[0], L"");
1124 EXPECT_EQ(r[1], L"");
1125 r.clear();
1126
1127 SplitString(L"\t\ta\t", L'\t', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001128 EXPECT_EQ(4U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001129 EXPECT_EQ(r[0], L"");
1130 EXPECT_EQ(r[1], L"");
1131 EXPECT_EQ(r[2], L"a");
1132 EXPECT_EQ(r[3], L"");
1133 r.clear();
1134
1135 SplitStringDontTrim(L"\t\ta\t", L'\t', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001136 EXPECT_EQ(4U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001137 EXPECT_EQ(r[0], L"");
1138 EXPECT_EQ(r[1], L"");
1139 EXPECT_EQ(r[2], L"a");
1140 EXPECT_EQ(r[3], L"");
1141 r.clear();
1142
1143 SplitString(L"\ta\t\nb\tcc", L'\n', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001144 EXPECT_EQ(2U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001145 EXPECT_EQ(r[0], L"a");
1146 EXPECT_EQ(r[1], L"b\tcc");
1147 r.clear();
1148
1149 SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r);
darin@google.comf3272802008-08-15 05:27:29 +09001150 EXPECT_EQ(2U, r.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001151 EXPECT_EQ(r[0], L"\ta\t");
1152 EXPECT_EQ(r[1], L"b\tcc");
1153 r.clear();
1154}
1155
1156TEST(StringUtilTest, StartsWith) {
1157 EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", true));
1158 EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", false));
1159 EXPECT_EQ(true, StartsWithASCII("JavaScript:url", "javascript", false));
1160 EXPECT_EQ(false, StartsWithASCII("java", "javascript", true));
1161 EXPECT_EQ(false, StartsWithASCII("java", "javascript", false));
1162}
1163
1164TEST(StringUtilTest, GetStringFWithOffsets) {
1165 std::vector<size_t> offsets;
1166
1167 ReplaceStringPlaceholders(L"Hello, $1. Your number is $2.", L"1", L"2",
1168 &offsets);
darin@google.comf3272802008-08-15 05:27:29 +09001169 EXPECT_EQ(2U, offsets.size());
1170 EXPECT_EQ(7U, offsets[0]);
1171 EXPECT_EQ(25U, offsets[1]);
initial.commit3f4a7322008-07-27 06:49:38 +09001172 offsets.clear();
1173
1174 ReplaceStringPlaceholders(L"Hello, $2. Your number is $1.", L"1", L"2",
1175 &offsets);
darin@google.comf3272802008-08-15 05:27:29 +09001176 EXPECT_EQ(2U, offsets.size());
1177 EXPECT_EQ(25U, offsets[0]);
1178 EXPECT_EQ(7U, offsets[1]);
initial.commit3f4a7322008-07-27 06:49:38 +09001179 offsets.clear();
1180}
1181
1182TEST(StringUtilTest, SplitStringAlongWhitespace) {
1183 struct TestData {
1184 const std::wstring input;
darin@google.comc01678f2008-08-15 05:49:08 +09001185 const size_t expected_result_count;
initial.commit3f4a7322008-07-27 06:49:38 +09001186 const std::wstring output1;
1187 const std::wstring output2;
1188 } data[] = {
1189 { L"a", 1, L"a", L"" },
1190 { L" ", 0, L"", L"" },
1191 { L" a", 1, L"a", L"" },
1192 { L" ab ", 1, L"ab", L"" },
1193 { L" ab c", 2, L"ab", L"c" },
1194 { L" ab c ", 2, L"ab", L"c" },
1195 { L" ab cd", 2, L"ab", L"cd" },
1196 { L" ab cd ", 2, L"ab", L"cd" },
1197 { L" \ta\t", 1, L"a", L"" },
1198 { L" b\ta\t", 2, L"b", L"a" },
1199 { L" b\tat", 2, L"b", L"at" },
1200 { L"b\tat", 2, L"b", L"at" },
1201 { L"b\t at", 2, L"b", L"at" },
1202 };
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001203 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +09001204 std::vector<std::wstring> results;
1205 SplitStringAlongWhitespace(data[i].input, &results);
darin@google.comc01678f2008-08-15 05:49:08 +09001206 ASSERT_EQ(data[i].expected_result_count, results.size());
initial.commit3f4a7322008-07-27 06:49:38 +09001207 if (data[i].expected_result_count > 0)
1208 ASSERT_EQ(data[i].output1, results[0]);
1209 if (data[i].expected_result_count > 1)
1210 ASSERT_EQ(data[i].output2, results[1]);
1211 }
1212}
1213
1214TEST(StringUtilTest, MatchPatternTest) {
1215 EXPECT_EQ(MatchPattern(L"www.google.com", L"*.com"), true);
1216 EXPECT_EQ(MatchPattern(L"www.google.com", L"*"), true);
1217 EXPECT_EQ(MatchPattern(L"www.google.com", L"www*.g*.org"), false);
1218 EXPECT_EQ(MatchPattern(L"Hello", L"H?l?o"), true);
1219 EXPECT_EQ(MatchPattern(L"www.google.com", L"http://*)"), false);
1220 EXPECT_EQ(MatchPattern(L"www.msn.com", L"*.COM"), false);
1221 EXPECT_EQ(MatchPattern(L"Hello*1234", L"He??o\\*1*"), true);
1222 EXPECT_EQ(MatchPattern(L"", L"*.*"), false);
1223 EXPECT_EQ(MatchPattern(L"", L"*"), true);
1224 EXPECT_EQ(MatchPattern(L"", L"?"), true);
1225 EXPECT_EQ(MatchPattern(L"", L""), true);
1226 EXPECT_EQ(MatchPattern(L"Hello", L""), false);
1227 EXPECT_EQ(MatchPattern(L"Hello*", L"Hello*"), true);
1228 EXPECT_EQ(MatchPattern("Hello*", "Hello*"), true); // narrow string
1229}
1230
deanm@google.comb5335212008-08-13 23:33:40 +09001231TEST(StringUtilTest, LcpyTest) {
1232 // Test the normal case where we fit in our buffer.
1233 {
1234 char dst[10];
1235 wchar_t wdst[10];
darin@google.comf3272802008-08-15 05:27:29 +09001236 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001237 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
darin@google.comf3272802008-08-15 05:27:29 +09001238 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001239 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1240 }
initial.commit3f4a7322008-07-27 06:49:38 +09001241
deanm@google.comb5335212008-08-13 23:33:40 +09001242 // Test dst_size == 0, nothing should be written to |dst| and we should
1243 // have the equivalent of strlen(src).
1244 {
1245 char dst[2] = {1, 2};
1246 wchar_t wdst[2] = {1, 2};
darin@google.comf3272802008-08-15 05:27:29 +09001247 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
deanm@google.comb5335212008-08-13 23:33:40 +09001248 EXPECT_EQ(1, dst[0]);
1249 EXPECT_EQ(2, dst[1]);
darin@google.comf3272802008-08-15 05:27:29 +09001250 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
deanm@google.comb5335212008-08-13 23:33:40 +09001251 EXPECT_EQ(1, wdst[0]);
1252 EXPECT_EQ(2, wdst[1]);
1253 }
1254
1255 // Test the case were we _just_ competely fit including the null.
1256 {
1257 char dst[8];
1258 wchar_t wdst[8];
darin@google.comf3272802008-08-15 05:27:29 +09001259 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001260 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
darin@google.comf3272802008-08-15 05:27:29 +09001261 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001262 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1263 }
1264
1265 // Test the case were we we are one smaller, so we can't fit the null.
1266 {
1267 char dst[7];
1268 wchar_t wdst[7];
darin@google.comf3272802008-08-15 05:27:29 +09001269 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001270 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
darin@google.comf3272802008-08-15 05:27:29 +09001271 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001272 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1273 }
1274
1275 // Test the case were we are just too small.
1276 {
1277 char dst[3];
1278 wchar_t wdst[3];
darin@google.comf3272802008-08-15 05:27:29 +09001279 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001280 EXPECT_EQ(0, memcmp(dst, "ab", 3));
darin@google.comf3272802008-08-15 05:27:29 +09001281 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
deanm@google.comb5335212008-08-13 23:33:40 +09001282 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1283 }
1284}
mmentovai@google.com8ae0c2c2008-08-14 10:25:32 +09001285
1286TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1287 struct TestData {
1288 const wchar_t* input;
1289 bool portable;
1290 } cases[] = {
1291 { L"%ls", true },
1292 { L"%s", false },
1293 { L"%S", false },
1294 { L"%lS", false },
1295 { L"Hello, %s", false },
1296 { L"%lc", true },
1297 { L"%c", false },
1298 { L"%C", false },
1299 { L"%lC", false },
1300 { L"%ls %s", false },
1301 { L"%s %ls", false },
1302 { L"%s %ls %s", false },
1303 { L"%f", true },
1304 { L"%f %F", false },
1305 { L"%d %D", false },
1306 { L"%o %O", false },
1307 { L"%u %U", false },
1308 { L"%f %d %o %u", true },
1309 { L"%-8d (%02.1f%)", true },
1310 { L"% 10s", false },
1311 { L"% 10ls", true }
1312 };
1313 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1314 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1315 }
1316}
license.botf003cfe2008-08-24 09:55:55 +09001317