blob: fb0beada4b344b5223c3610e42a071ca63295bab [file] [log] [blame]
Daniel Eratb8cf9492015-07-06 13:18:13 -06001// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_util.h"
6
7#include <math.h>
8#include <stdarg.h>
9
10#include <algorithm>
11
12#include "base/basictypes.h"
13#include "base/strings/string16.h"
14#include "base/strings/utf_string_conversions.h"
15#include "testing/gmock/include/gmock/gmock.h"
16#include "testing/gtest/include/gtest/gtest.h"
17
18using ::testing::ElementsAre;
19
20namespace base {
21
22static const struct trim_case {
23 const wchar_t* input;
24 const TrimPositions positions;
25 const wchar_t* output;
26 const TrimPositions return_value;
27} trim_cases[] = {
28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
32 {L"", TRIM_ALL, L"", TRIM_NONE},
33 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
35 {L" ", TRIM_ALL, L"", TRIM_ALL},
36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
38};
39
40static const struct trim_case_ascii {
41 const char* input;
42 const TrimPositions positions;
43 const char* output;
44 const TrimPositions return_value;
45} trim_cases_ascii[] = {
46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
50 {"", TRIM_ALL, "", TRIM_NONE},
51 {" ", TRIM_LEADING, "", TRIM_LEADING},
52 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
53 {" ", TRIM_ALL, "", TRIM_ALL},
54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
55};
56
57namespace {
58
59// Helper used to test TruncateUTF8ToByteSize.
60bool Truncated(const std::string& input,
61 const size_t byte_size,
62 std::string* output) {
63 size_t prev = input.length();
64 TruncateUTF8ToByteSize(input, byte_size, output);
65 return prev != output->length();
66}
67
68} // namespace
69
70TEST(StringUtilTest, TruncateUTF8ToByteSize) {
71 std::string output;
72
73 // Empty strings and invalid byte_size arguments
74 EXPECT_FALSE(Truncated(std::string(), 0, &output));
75 EXPECT_EQ(output, "");
76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
77 EXPECT_EQ(output, "");
78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
80
81 // Testing the truncation of valid UTF8 correctly
82 EXPECT_TRUE(Truncated("abc", 2, &output));
83 EXPECT_EQ(output, "ab");
84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
85 EXPECT_EQ(output.compare("\xc2\x81"), 0);
86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
87 EXPECT_EQ(output.compare("\xc2\x81"), 0);
88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
89 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
90
91 {
92 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
93 const std::string array_string(array, arraysize(array));
94 EXPECT_TRUE(Truncated(array_string, 4, &output));
95 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
96 }
97
98 {
99 const char array[] = "\x00\xc2\x81\xc2\x81";
100 const std::string array_string(array, arraysize(array));
101 EXPECT_TRUE(Truncated(array_string, 4, &output));
102 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
103 }
104
105 // Testing invalid UTF8
106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
107 EXPECT_EQ(output.compare(""), 0);
108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
109 EXPECT_EQ(output.compare(""), 0);
110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
111 EXPECT_EQ(output.compare(""), 0);
112
113 // Testing invalid UTF8 mixed with valid UTF8
114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
115 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
117 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
119 10, &output));
120 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
122 10, &output));
123 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
125 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
126
127 // Overlong sequences
128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
129 EXPECT_EQ(output.compare(""), 0);
130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
131 EXPECT_EQ(output.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
133 EXPECT_EQ(output.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
135 EXPECT_EQ(output.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
137 EXPECT_EQ(output.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
139 EXPECT_EQ(output.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
141 EXPECT_EQ(output.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
143 EXPECT_EQ(output.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
145 EXPECT_EQ(output.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
147 EXPECT_EQ(output.compare(""), 0);
148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
149 EXPECT_EQ(output.compare(""), 0);
150
151 // Beyond U+10FFFF (the upper limit of Unicode codespace)
152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
153 EXPECT_EQ(output.compare(""), 0);
154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
155 EXPECT_EQ(output.compare(""), 0);
156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
157 EXPECT_EQ(output.compare(""), 0);
158
159 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
161 EXPECT_EQ(output.compare(""), 0);
162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
163 EXPECT_EQ(output.compare(""), 0);
164
165 {
166 const char array[] = "\x00\x00\xfe\xff";
167 const std::string array_string(array, arraysize(array));
168 EXPECT_TRUE(Truncated(array_string, 4, &output));
169 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
170 }
171
172 // Variants on the previous test
173 {
174 const char array[] = "\xff\xfe\x00\x00";
175 const std::string array_string(array, 4);
176 EXPECT_FALSE(Truncated(array_string, 4, &output));
177 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
178 }
179 {
180 const char array[] = "\xff\x00\x00\xfe";
181 const std::string array_string(array, arraysize(array));
182 EXPECT_TRUE(Truncated(array_string, 4, &output));
183 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
184 }
185
186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
188 EXPECT_EQ(output.compare(""), 0);
189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
190 EXPECT_EQ(output.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
192 EXPECT_EQ(output.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
194 EXPECT_EQ(output.compare(""), 0);
195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
196 EXPECT_EQ(output.compare(""), 0);
197
198 // Strings in legacy encodings that are valid in UTF-8, but
199 // are invalid as UTF-8 in real data.
200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
201 EXPECT_EQ(output.compare("caf"), 0);
202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
203 EXPECT_EQ(output.compare(""), 0);
204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
205 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
207 &output));
208 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
209
210 // Testing using the same string as input and output.
211 EXPECT_FALSE(Truncated(output, 4, &output));
212 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
213 EXPECT_TRUE(Truncated(output, 3, &output));
214 EXPECT_EQ(output.compare("\xa7\x41"), 0);
215
216 // "abc" with U+201[CD] in windows-125[0-8]
217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
218 EXPECT_EQ(output.compare("\x93" "abc"), 0);
219
220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
222 EXPECT_EQ(output.compare(""), 0);
223
224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
226 EXPECT_EQ(output.compare(""), 0);
227}
228
229TEST(StringUtilTest, TrimWhitespace) {
230 string16 output; // Allow contents to carry over to next testcase
231 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
232 const trim_case& value = trim_cases[i];
233 EXPECT_EQ(value.return_value,
234 TrimWhitespace(WideToUTF16(value.input), value.positions,
235 &output));
236 EXPECT_EQ(WideToUTF16(value.output), output);
237 }
238
239 // Test that TrimWhitespace() can take the same string for input and output
240 output = ASCIIToUTF16(" This is a test \r\n");
241 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
243
244 // Once more, but with a string of whitespace
245 output = ASCIIToUTF16(" \r\n");
246 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
247 EXPECT_EQ(string16(), output);
248
249 std::string output_ascii;
250 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
251 const trim_case_ascii& value = trim_cases_ascii[i];
252 EXPECT_EQ(value.return_value,
253 TrimWhitespace(value.input, value.positions, &output_ascii));
254 EXPECT_EQ(value.output, output_ascii);
255 }
256}
257
258static const struct collapse_case {
259 const wchar_t* input;
260 const bool trim;
261 const wchar_t* output;
262} collapse_cases[] = {
263 {L" Google Video ", false, L"Google Video"},
264 {L"Google Video", false, L"Google Video"},
265 {L"", false, L""},
266 {L" ", false, L""},
267 {L"\t\rTest String\n", false, L"Test String"},
268 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
269 {L" Test \n \t String ", false, L"Test String"},
270 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
271 {L" Test String", false, L"Test String"},
272 {L"Test String ", false, L"Test String"},
273 {L"Test String", false, L"Test String"},
274 {L"", true, L""},
275 {L"\n", true, L""},
276 {L" \r ", true, L""},
277 {L"\nFoo", true, L"Foo"},
278 {L"\r Foo ", true, L"Foo"},
279 {L" Foo bar ", true, L"Foo bar"},
280 {L" \tFoo bar \n", true, L"Foo bar"},
281 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
282};
283
284TEST(StringUtilTest, CollapseWhitespace) {
285 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
286 const collapse_case& value = collapse_cases[i];
287 EXPECT_EQ(WideToUTF16(value.output),
288 CollapseWhitespace(WideToUTF16(value.input), value.trim));
289 }
290}
291
292static const struct collapse_case_ascii {
293 const char* input;
294 const bool trim;
295 const char* output;
296} collapse_cases_ascii[] = {
297 {" Google Video ", false, "Google Video"},
298 {"Google Video", false, "Google Video"},
299 {"", false, ""},
300 {" ", false, ""},
301 {"\t\rTest String\n", false, "Test String"},
302 {" Test \n \t String ", false, "Test String"},
303 {" Test String", false, "Test String"},
304 {"Test String ", false, "Test String"},
305 {"Test String", false, "Test String"},
306 {"", true, ""},
307 {"\n", true, ""},
308 {" \r ", true, ""},
309 {"\nFoo", true, "Foo"},
310 {"\r Foo ", true, "Foo"},
311 {" Foo bar ", true, "Foo bar"},
312 {" \tFoo bar \n", true, "Foo bar"},
313 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
314};
315
316TEST(StringUtilTest, CollapseWhitespaceASCII) {
317 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
318 const collapse_case_ascii& value = collapse_cases_ascii[i];
319 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
320 }
321}
322
323TEST(StringUtilTest, IsStringUTF8) {
324 EXPECT_TRUE(IsStringUTF8("abc"));
325 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
326 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
327 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
328 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
329 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
330
331 // surrogate code points
332 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
333 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
334 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
335
336 // overlong sequences
337 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
338 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
339 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
340 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
341 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
342 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
343 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
345 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
346 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
347 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
348
349 // Beyond U+10FFFF (the upper limit of Unicode codespace)
350 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
351 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
352 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
353
354 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
355 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
356 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
357 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
358 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
359
360 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
361 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
363 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
364 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
365 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
366 // Strings in legacy encodings. We can certainly make up strings
367 // in a legacy encoding that are valid in UTF-8, but in real data,
368 // most of them are invalid as UTF-8.
369 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
370 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
371 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
372 // "abc" with U+201[CD] in windows-125[0-8]
373 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
374 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
375 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
376 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
377 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
378
379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
380 // representation, and the second uses a 2-byte sequence. The second version
381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
382 // given codepoint must be used.
383 static const char kEmbeddedNull[] = "embedded\0null";
384 EXPECT_TRUE(IsStringUTF8(
385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
387}
388
389TEST(StringUtilTest, IsStringASCII) {
390 static char char_ascii[] =
391 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
392 static char16 char16_ascii[] = {
393 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
394 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
395 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
396 static std::wstring wchar_ascii(
397 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
398
399 // Test a variety of the fragment start positions and lengths in order to make
400 // sure that bit masking in IsStringASCII works correctly.
401 // Also, test that a non-ASCII character will be detected regardless of its
402 // position inside the string.
403 {
404 const size_t string_length = arraysize(char_ascii) - 1;
405 for (size_t offset = 0; offset < 8; ++offset) {
406 for (size_t len = 0, max_len = string_length - offset; len < max_len;
407 ++len) {
408 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
409 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
410 char_ascii[char_pos] |= '\x80';
411 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
412 char_ascii[char_pos] &= ~'\x80';
413 }
414 }
415 }
416 }
417
418 {
419 const size_t string_length = arraysize(char16_ascii) - 1;
420 for (size_t offset = 0; offset < 4; ++offset) {
421 for (size_t len = 0, max_len = string_length - offset; len < max_len;
422 ++len) {
423 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
424 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
425 char16_ascii[char_pos] |= 0x80;
426 EXPECT_FALSE(
427 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
428 char16_ascii[char_pos] &= ~0x80;
429 // Also test when the upper half is non-zero.
430 char16_ascii[char_pos] |= 0x100;
431 EXPECT_FALSE(
432 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
433 char16_ascii[char_pos] &= ~0x100;
434 }
435 }
436 }
437 }
438
439 {
440 const size_t string_length = wchar_ascii.length();
441 for (size_t len = 0; len < string_length; ++len) {
442 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
443 for (size_t char_pos = 0; char_pos < len; ++char_pos) {
444 wchar_ascii[char_pos] |= 0x80;
445 EXPECT_FALSE(
446 IsStringASCII(wchar_ascii.substr(0, len)));
447 wchar_ascii[char_pos] &= ~0x80;
448 wchar_ascii[char_pos] |= 0x100;
449 EXPECT_FALSE(
450 IsStringASCII(wchar_ascii.substr(0, len)));
451 wchar_ascii[char_pos] &= ~0x100;
452#if defined(WCHAR_T_IS_UTF32)
453 wchar_ascii[char_pos] |= 0x10000;
454 EXPECT_FALSE(
455 IsStringASCII(wchar_ascii.substr(0, len)));
456 wchar_ascii[char_pos] &= ~0x10000;
457#endif // WCHAR_T_IS_UTF32
458 }
459 }
460 }
461}
462
463TEST(StringUtilTest, ConvertASCII) {
464 static const char* const char_cases[] = {
465 "Google Video",
466 "Hello, world\n",
467 "0123ABCDwxyz \a\b\t\r\n!+,.~"
468 };
469
470 static const wchar_t* const wchar_cases[] = {
471 L"Google Video",
472 L"Hello, world\n",
473 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
474 };
475
476 for (size_t i = 0; i < arraysize(char_cases); ++i) {
477 EXPECT_TRUE(IsStringASCII(char_cases[i]));
478 string16 utf16 = ASCIIToUTF16(char_cases[i]);
479 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
480
481 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
482 EXPECT_EQ(char_cases[i], ascii);
483 }
484
485 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
486
487 // Convert empty strings.
488 string16 empty16;
489 std::string empty;
490 EXPECT_EQ(empty, UTF16ToASCII(empty16));
491 EXPECT_EQ(empty16, ASCIIToUTF16(empty));
492
493 // Convert strings with an embedded NUL character.
494 const char chars_with_nul[] = "test\0string";
495 const int length_with_nul = arraysize(chars_with_nul) - 1;
496 std::string string_with_nul(chars_with_nul, length_with_nul);
497 string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
498 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
499 string16_with_nul.length());
500 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
501 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
502 narrow_with_nul.length());
503 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
504}
505
506TEST(StringUtilTest, ToUpperASCII) {
507 EXPECT_EQ('C', ToUpperASCII('C'));
508 EXPECT_EQ('C', ToUpperASCII('c'));
509 EXPECT_EQ('2', ToUpperASCII('2'));
510
511 EXPECT_EQ(L'C', ToUpperASCII(L'C'));
512 EXPECT_EQ(L'C', ToUpperASCII(L'c'));
513 EXPECT_EQ(L'2', ToUpperASCII(L'2'));
514
515 std::string in_place_a("Cc2");
516 StringToUpperASCII(&in_place_a);
517 EXPECT_EQ("CC2", in_place_a);
518
519 std::wstring in_place_w(L"Cc2");
520 StringToUpperASCII(&in_place_w);
521 EXPECT_EQ(L"CC2", in_place_w);
522
523 std::string original_a("Cc2");
524 std::string upper_a = StringToUpperASCII(original_a);
525 EXPECT_EQ("CC2", upper_a);
526
527 std::wstring original_w(L"Cc2");
528 std::wstring upper_w = StringToUpperASCII(original_w);
529 EXPECT_EQ(L"CC2", upper_w);
530}
531
532TEST(StringUtilTest, LowerCaseEqualsASCII) {
533 static const struct {
534 const char* src_a;
535 const char* dst;
536 } lowercase_cases[] = {
537 { "FoO", "foo" },
538 { "foo", "foo" },
539 { "FOO", "foo" },
540 };
541
542 for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
543 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
544 lowercase_cases[i].dst));
545 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
546 lowercase_cases[i].dst));
547 }
548}
549
550TEST(StringUtilTest, FormatBytesUnlocalized) {
551 static const struct {
552 int64 bytes;
553 const char* expected;
554 } cases[] = {
555 // Expected behavior: we show one post-decimal digit when we have
556 // under two pre-decimal digits, except in cases where it makes no
557 // sense (zero or bytes).
558 // Since we switch units once we cross the 1000 mark, this keeps
559 // the display of file sizes or bytes consistently around three
560 // digits.
561 {0, "0 B"},
562 {512, "512 B"},
563 {1024*1024, "1.0 MB"},
564 {1024*1024*1024, "1.0 GB"},
565 {10LL*1024*1024*1024, "10.0 GB"},
566 {99LL*1024*1024*1024, "99.0 GB"},
567 {105LL*1024*1024*1024, "105 GB"},
568 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
569 {~(1LL << 63), "8192 PB"},
570
571 {99*1024 + 103, "99.1 kB"},
572 {1024*1024 + 103, "1.0 MB"},
573 {1024*1024 + 205 * 1024, "1.2 MB"},
574 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
575 {10LL*1024*1024*1024, "10.0 GB"},
576 {100LL*1024*1024*1024, "100 GB"},
577 };
578
579 for (size_t i = 0; i < arraysize(cases); ++i) {
580 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
581 FormatBytesUnlocalized(cases[i].bytes));
582 }
583}
584TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
585 static const struct {
586 const char* str;
587 string16::size_type start_offset;
588 const char* find_this;
589 const char* replace_with;
590 const char* expected;
591 } cases[] = {
592 {"aaa", 0, "a", "b", "bbb"},
593 {"abb", 0, "ab", "a", "ab"},
594 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
595 {"Not found", 0, "x", "0", "Not found"},
596 {"Not found again", 5, "x", "0", "Not found again"},
597 {" Making it much longer ", 0, " ", "Four score and seven years ago",
598 "Four score and seven years agoMakingFour score and seven years agoit"
599 "Four score and seven years agomuchFour score and seven years agolonger"
600 "Four score and seven years ago"},
601 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
602 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
603 {"abababab", 2, "ab", "c", "abccc"},
604 };
605
606 for (size_t i = 0; i < arraysize(cases); i++) {
607 string16 str = ASCIIToUTF16(cases[i].str);
608 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
609 ASCIIToUTF16(cases[i].find_this),
610 ASCIIToUTF16(cases[i].replace_with));
611 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
612 }
613}
614
615TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
616 static const struct {
617 const char* str;
618 string16::size_type start_offset;
619 const char* find_this;
620 const char* replace_with;
621 const char* expected;
622 } cases[] = {
623 {"aaa", 0, "a", "b", "baa"},
624 {"abb", 0, "ab", "a", "ab"},
625 {"Removing some substrings inging", 0, "ing", "",
626 "Remov some substrings inging"},
627 {"Not found", 0, "x", "0", "Not found"},
628 {"Not found again", 5, "x", "0", "Not found again"},
629 {" Making it much longer ", 0, " ", "Four score and seven years ago",
630 "Four score and seven years agoMaking it much longer "},
631 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
632 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
633 {"abababab", 2, "ab", "c", "abcabab"},
634 };
635
636 for (size_t i = 0; i < arraysize(cases); i++) {
637 string16 str = ASCIIToUTF16(cases[i].str);
638 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
639 ASCIIToUTF16(cases[i].find_this),
640 ASCIIToUTF16(cases[i].replace_with));
641 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
642 }
643}
644
645TEST(StringUtilTest, HexDigitToInt) {
646 EXPECT_EQ(0, HexDigitToInt('0'));
647 EXPECT_EQ(1, HexDigitToInt('1'));
648 EXPECT_EQ(2, HexDigitToInt('2'));
649 EXPECT_EQ(3, HexDigitToInt('3'));
650 EXPECT_EQ(4, HexDigitToInt('4'));
651 EXPECT_EQ(5, HexDigitToInt('5'));
652 EXPECT_EQ(6, HexDigitToInt('6'));
653 EXPECT_EQ(7, HexDigitToInt('7'));
654 EXPECT_EQ(8, HexDigitToInt('8'));
655 EXPECT_EQ(9, HexDigitToInt('9'));
656 EXPECT_EQ(10, HexDigitToInt('A'));
657 EXPECT_EQ(11, HexDigitToInt('B'));
658 EXPECT_EQ(12, HexDigitToInt('C'));
659 EXPECT_EQ(13, HexDigitToInt('D'));
660 EXPECT_EQ(14, HexDigitToInt('E'));
661 EXPECT_EQ(15, HexDigitToInt('F'));
662
663 // Verify the lower case as well.
664 EXPECT_EQ(10, HexDigitToInt('a'));
665 EXPECT_EQ(11, HexDigitToInt('b'));
666 EXPECT_EQ(12, HexDigitToInt('c'));
667 EXPECT_EQ(13, HexDigitToInt('d'));
668 EXPECT_EQ(14, HexDigitToInt('e'));
669 EXPECT_EQ(15, HexDigitToInt('f'));
670}
671
672// Test for Tokenize
673template <typename STR>
674void TokenizeTest() {
675 std::vector<STR> r;
676 size_t size;
677
678 size = Tokenize(STR("This is a string"), STR(" "), &r);
679 EXPECT_EQ(4U, size);
680 ASSERT_EQ(4U, r.size());
681 EXPECT_EQ(r[0], STR("This"));
682 EXPECT_EQ(r[1], STR("is"));
683 EXPECT_EQ(r[2], STR("a"));
684 EXPECT_EQ(r[3], STR("string"));
685 r.clear();
686
687 size = Tokenize(STR("one,two,three"), STR(","), &r);
688 EXPECT_EQ(3U, size);
689 ASSERT_EQ(3U, r.size());
690 EXPECT_EQ(r[0], STR("one"));
691 EXPECT_EQ(r[1], STR("two"));
692 EXPECT_EQ(r[2], STR("three"));
693 r.clear();
694
695 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
696 EXPECT_EQ(3U, size);
697 ASSERT_EQ(3U, r.size());
698 EXPECT_EQ(r[0], STR("one"));
699 EXPECT_EQ(r[1], STR("two"));
700 EXPECT_EQ(r[2], STR("three;four"));
701 r.clear();
702
703 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
704 EXPECT_EQ(4U, size);
705 ASSERT_EQ(4U, r.size());
706 EXPECT_EQ(r[0], STR("one"));
707 EXPECT_EQ(r[1], STR("two"));
708 EXPECT_EQ(r[2], STR("three"));
709 EXPECT_EQ(r[3], STR("four"));
710 r.clear();
711
712 size = Tokenize(STR("one, two, three"), STR(","), &r);
713 EXPECT_EQ(3U, size);
714 ASSERT_EQ(3U, r.size());
715 EXPECT_EQ(r[0], STR("one"));
716 EXPECT_EQ(r[1], STR(" two"));
717 EXPECT_EQ(r[2], STR(" three"));
718 r.clear();
719
720 size = Tokenize(STR("one, two, three, "), STR(","), &r);
721 EXPECT_EQ(4U, size);
722 ASSERT_EQ(4U, r.size());
723 EXPECT_EQ(r[0], STR("one"));
724 EXPECT_EQ(r[1], STR(" two"));
725 EXPECT_EQ(r[2], STR(" three"));
726 EXPECT_EQ(r[3], STR(" "));
727 r.clear();
728
729 size = Tokenize(STR("one, two, three,"), STR(","), &r);
730 EXPECT_EQ(3U, size);
731 ASSERT_EQ(3U, r.size());
732 EXPECT_EQ(r[0], STR("one"));
733 EXPECT_EQ(r[1], STR(" two"));
734 EXPECT_EQ(r[2], STR(" three"));
735 r.clear();
736
737 size = Tokenize(STR(), STR(","), &r);
738 EXPECT_EQ(0U, size);
739 ASSERT_EQ(0U, r.size());
740 r.clear();
741
742 size = Tokenize(STR(","), STR(","), &r);
743 EXPECT_EQ(0U, size);
744 ASSERT_EQ(0U, r.size());
745 r.clear();
746
747 size = Tokenize(STR(",;:."), STR(".:;,"), &r);
748 EXPECT_EQ(0U, size);
749 ASSERT_EQ(0U, r.size());
750 r.clear();
751
752 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
753 EXPECT_EQ(1U, size);
754 ASSERT_EQ(1U, r.size());
755 EXPECT_EQ(r[0], STR("a"));
756 r.clear();
757
758 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
759 EXPECT_EQ(2U, size);
760 ASSERT_EQ(2U, r.size());
761 EXPECT_EQ(r[0], STR("\ta\t"));
762 EXPECT_EQ(r[1], STR("b\tcc"));
763 r.clear();
764}
765
766TEST(StringUtilTest, TokenizeStdString) {
767 TokenizeTest<std::string>();
768}
769
770TEST(StringUtilTest, TokenizeStringPiece) {
771 TokenizeTest<StringPiece>();
772}
773
774// Test for JoinString
775TEST(StringUtilTest, JoinString) {
776 std::vector<std::string> in;
777 EXPECT_EQ("", JoinString(in, ','));
778
779 in.push_back("a");
780 EXPECT_EQ("a", JoinString(in, ','));
781
782 in.push_back("b");
783 in.push_back("c");
784 EXPECT_EQ("a,b,c", JoinString(in, ','));
785
786 in.push_back(std::string());
787 EXPECT_EQ("a,b,c,", JoinString(in, ','));
788 in.push_back(" ");
789 EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
790}
791
792// Test for JoinString overloaded with std::string separator
793TEST(StringUtilTest, JoinStringWithString) {
794 std::string separator(", ");
795 std::vector<std::string> parts;
796 EXPECT_EQ(std::string(), JoinString(parts, separator));
797
798 parts.push_back("a");
799 EXPECT_EQ("a", JoinString(parts, separator));
800
801 parts.push_back("b");
802 parts.push_back("c");
803 EXPECT_EQ("a, b, c", JoinString(parts, separator));
804
805 parts.push_back(std::string());
806 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
807 parts.push_back(" ");
808 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
809}
810
811// Test for JoinString overloaded with string16 separator
812TEST(StringUtilTest, JoinStringWithString16) {
813 string16 separator = ASCIIToUTF16(", ");
814 std::vector<string16> parts;
815 EXPECT_EQ(string16(), JoinString(parts, separator));
816
817 parts.push_back(ASCIIToUTF16("a"));
818 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
819
820 parts.push_back(ASCIIToUTF16("b"));
821 parts.push_back(ASCIIToUTF16("c"));
822 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
823
824 parts.push_back(ASCIIToUTF16(""));
825 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
826 parts.push_back(ASCIIToUTF16(" "));
827 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
828}
829
830TEST(StringUtilTest, StartsWith) {
831 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
832 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
833 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
834 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
835 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
836 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
837 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
838 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
839 EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
840 EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
841
842 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
843 ASCIIToUTF16("javascript"), true));
844 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
845 ASCIIToUTF16("javascript"), true));
846 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
847 ASCIIToUTF16("javascript"), false));
848 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
849 ASCIIToUTF16("javascript"), false));
850 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
851 ASCIIToUTF16("javascript"), true));
852 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
853 ASCIIToUTF16("javascript"), false));
854 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
855 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
856 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
857 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
858}
859
860TEST(StringUtilTest, EndsWith) {
861 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
862 ASCIIToUTF16(".plugin"), true));
863 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
864 ASCIIToUTF16(".plugin"), true));
865 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
866 ASCIIToUTF16(".plugin"), false));
867 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
868 ASCIIToUTF16(".plugin"), false));
869 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
870 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
871 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
872 ASCIIToUTF16(".plugin"), true));
873 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
874 ASCIIToUTF16(".plugin"), false));
875 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
876 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
877 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
878 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
879 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
880 ASCIIToUTF16(".plugin"), false));
881 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
882 EXPECT_TRUE(EndsWith(string16(), string16(), false));
883 EXPECT_TRUE(EndsWith(string16(), string16(), true));
884}
885
886TEST(StringUtilTest, GetStringFWithOffsets) {
887 std::vector<string16> subst;
888 subst.push_back(ASCIIToUTF16("1"));
889 subst.push_back(ASCIIToUTF16("2"));
890 std::vector<size_t> offsets;
891
892 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
893 subst,
894 &offsets);
895 EXPECT_EQ(2U, offsets.size());
896 EXPECT_EQ(7U, offsets[0]);
897 EXPECT_EQ(25U, offsets[1]);
898 offsets.clear();
899
900 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
901 subst,
902 &offsets);
903 EXPECT_EQ(2U, offsets.size());
904 EXPECT_EQ(25U, offsets[0]);
905 EXPECT_EQ(7U, offsets[1]);
906 offsets.clear();
907}
908
909TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
910 // Test whether replacestringplaceholders works as expected when there
911 // are fewer inputs than outputs.
912 std::vector<string16> subst;
913 subst.push_back(ASCIIToUTF16("9a"));
914 subst.push_back(ASCIIToUTF16("8b"));
915 subst.push_back(ASCIIToUTF16("7c"));
916
917 string16 formatted =
918 ReplaceStringPlaceholders(
919 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
920
921 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
922}
923
924TEST(StringUtilTest, ReplaceStringPlaceholders) {
925 std::vector<string16> subst;
926 subst.push_back(ASCIIToUTF16("9a"));
927 subst.push_back(ASCIIToUTF16("8b"));
928 subst.push_back(ASCIIToUTF16("7c"));
929 subst.push_back(ASCIIToUTF16("6d"));
930 subst.push_back(ASCIIToUTF16("5e"));
931 subst.push_back(ASCIIToUTF16("4f"));
932 subst.push_back(ASCIIToUTF16("3g"));
933 subst.push_back(ASCIIToUTF16("2h"));
934 subst.push_back(ASCIIToUTF16("1i"));
935
936 string16 formatted =
937 ReplaceStringPlaceholders(
938 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
939
940 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
941}
942
943TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
944 std::vector<string16> subst;
945 subst.push_back(ASCIIToUTF16("9a"));
946 subst.push_back(ASCIIToUTF16("8b"));
947 subst.push_back(ASCIIToUTF16("7c"));
948 subst.push_back(ASCIIToUTF16("6d"));
949 subst.push_back(ASCIIToUTF16("5e"));
950 subst.push_back(ASCIIToUTF16("4f"));
951 subst.push_back(ASCIIToUTF16("3g"));
952 subst.push_back(ASCIIToUTF16("2h"));
953 subst.push_back(ASCIIToUTF16("1i"));
954 subst.push_back(ASCIIToUTF16("0j"));
955 subst.push_back(ASCIIToUTF16("-1k"));
956 subst.push_back(ASCIIToUTF16("-2l"));
957 subst.push_back(ASCIIToUTF16("-3m"));
958 subst.push_back(ASCIIToUTF16("-4n"));
959
960 string16 formatted =
961 ReplaceStringPlaceholders(
962 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
963 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
964
965 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
966 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
967}
968
969TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
970 std::vector<std::string> subst;
971 subst.push_back("9a");
972 subst.push_back("8b");
973 subst.push_back("7c");
974 subst.push_back("6d");
975 subst.push_back("5e");
976 subst.push_back("4f");
977 subst.push_back("3g");
978 subst.push_back("2h");
979 subst.push_back("1i");
980
981 std::string formatted =
982 ReplaceStringPlaceholders(
983 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
984
985 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
986}
987
988TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
989 std::vector<std::string> subst;
990 subst.push_back("a");
991 subst.push_back("b");
992 subst.push_back("c");
993 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
994 "$1 $$2 $$$3");
995}
996
997TEST(StringUtilTest, MatchPatternTest) {
998 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
999 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
1000 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
1001 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
1002 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
1003 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
1004 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
1005 EXPECT_FALSE(MatchPattern("", "*.*"));
1006 EXPECT_TRUE(MatchPattern("", "*"));
1007 EXPECT_TRUE(MatchPattern("", "?"));
1008 EXPECT_TRUE(MatchPattern("", ""));
1009 EXPECT_FALSE(MatchPattern("Hello", ""));
1010 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
1011 // Stop after a certain recursion depth.
1012 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
1013
1014 // Test UTF8 matching.
1015 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
1016 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
1017 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
1018 // Invalid sequences should be handled as a single invalid character.
1019 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
1020 // If the pattern has invalid characters, it shouldn't match anything.
1021 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
1022
1023 // Test UTF16 character matching.
1024 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
1025 UTF8ToUTF16("*.com")));
1026 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
1027 UTF8ToUTF16("He??o\\*1*")));
1028
1029 // This test verifies that consecutive wild cards are collapsed into 1
1030 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
1031 // recursion depth).
1032 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
1033 UTF8ToUTF16("He********************************o")));
1034}
1035
1036TEST(StringUtilTest, LcpyTest) {
1037 // Test the normal case where we fit in our buffer.
1038 {
1039 char dst[10];
1040 wchar_t wdst[10];
1041 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1042 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1043 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1044 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1045 }
1046
1047 // Test dst_size == 0, nothing should be written to |dst| and we should
1048 // have the equivalent of strlen(src).
1049 {
1050 char dst[2] = {1, 2};
1051 wchar_t wdst[2] = {1, 2};
1052 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
1053 EXPECT_EQ(1, dst[0]);
1054 EXPECT_EQ(2, dst[1]);
1055 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
1056 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1057 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1058 }
1059
1060 // Test the case were we _just_ competely fit including the null.
1061 {
1062 char dst[8];
1063 wchar_t wdst[8];
1064 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1065 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1066 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1067 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1068 }
1069
1070 // Test the case were we we are one smaller, so we can't fit the null.
1071 {
1072 char dst[7];
1073 wchar_t wdst[7];
1074 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1075 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1076 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1077 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1078 }
1079
1080 // Test the case were we are just too small.
1081 {
1082 char dst[3];
1083 wchar_t wdst[3];
1084 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
1085 EXPECT_EQ(0, memcmp(dst, "ab", 3));
1086 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1087 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1088 }
1089}
1090
1091TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1092 static const struct {
1093 const wchar_t* input;
1094 bool portable;
1095 } cases[] = {
1096 { L"%ls", true },
1097 { L"%s", false },
1098 { L"%S", false },
1099 { L"%lS", false },
1100 { L"Hello, %s", false },
1101 { L"%lc", true },
1102 { L"%c", false },
1103 { L"%C", false },
1104 { L"%lC", false },
1105 { L"%ls %s", false },
1106 { L"%s %ls", false },
1107 { L"%s %ls %s", false },
1108 { L"%f", true },
1109 { L"%f %F", false },
1110 { L"%d %D", false },
1111 { L"%o %O", false },
1112 { L"%u %U", false },
1113 { L"%f %d %o %u", true },
1114 { L"%-8d (%02.1f%)", true },
1115 { L"% 10s", false },
1116 { L"% 10ls", true }
1117 };
1118 for (size_t i = 0; i < arraysize(cases); ++i)
1119 EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
1120}
1121
1122TEST(StringUtilTest, RemoveChars) {
1123 const char kRemoveChars[] = "-/+*";
1124 std::string input = "A-+bc/d!*";
1125 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1126 EXPECT_EQ("Abcd!", input);
1127
1128 // No characters match kRemoveChars.
1129 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1130 EXPECT_EQ("Abcd!", input);
1131
1132 // Empty string.
1133 input.clear();
1134 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1135 EXPECT_EQ(std::string(), input);
1136}
1137
1138TEST(StringUtilTest, ReplaceChars) {
1139 struct TestData {
1140 const char* input;
1141 const char* replace_chars;
1142 const char* replace_with;
1143 const char* output;
1144 bool result;
1145 } cases[] = {
1146 { "", "", "", "", false },
1147 { "test", "", "", "test", false },
1148 { "test", "", "!", "test", false },
1149 { "test", "z", "!", "test", false },
1150 { "test", "e", "!", "t!st", true },
1151 { "test", "e", "!?", "t!?st", true },
1152 { "test", "ez", "!", "t!st", true },
1153 { "test", "zed", "!?", "t!?st", true },
1154 { "test", "t", "!?", "!?es!?", true },
1155 { "test", "et", "!>", "!>!>s!>", true },
1156 { "test", "zest", "!", "!!!!", true },
1157 { "test", "szt", "!", "!e!!", true },
1158 { "test", "t", "test", "testestest", true },
1159 };
1160
1161 for (size_t i = 0; i < arraysize(cases); ++i) {
1162 std::string output;
1163 bool result = ReplaceChars(cases[i].input,
1164 cases[i].replace_chars,
1165 cases[i].replace_with,
1166 &output);
1167 EXPECT_EQ(cases[i].result, result);
1168 EXPECT_EQ(cases[i].output, output);
1169 }
1170}
1171
1172TEST(StringUtilTest, ContainsOnlyChars) {
1173 // Providing an empty list of characters should return false but for the empty
1174 // string.
1175 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1176 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1177
1178 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1179 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1180 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1181 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1182 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1183
1184 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1185 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1186 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1187 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
1188 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1189 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
1190
1191 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1192 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1193 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1194 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
1195 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1196 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1197 kWhitespaceUTF16));
1198}
1199
1200class WriteIntoTest : public testing::Test {
1201 protected:
1202 static void WritesCorrectly(size_t num_chars) {
1203 std::string buffer;
1204 char kOriginal[] = "supercali";
1205 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1206 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1207 // string at the first \0.
1208 EXPECT_EQ(std::string(kOriginal,
1209 std::min(num_chars, arraysize(kOriginal) - 1)),
1210 std::string(buffer.c_str()));
1211 EXPECT_EQ(num_chars, buffer.size());
1212 }
1213};
1214
1215TEST_F(WriteIntoTest, WriteInto) {
1216 // Validate that WriteInto reserves enough space and
1217 // sizes a string correctly.
1218 WritesCorrectly(1);
1219 WritesCorrectly(2);
1220 WritesCorrectly(5000);
1221
1222 // Validate that WriteInto doesn't modify other strings
1223 // when using a Copy-on-Write implementation.
1224 const char kLive[] = "live";
1225 const char kDead[] = "dead";
1226 const std::string live = kLive;
1227 std::string dead = live;
1228 strncpy(WriteInto(&dead, 5), kDead, 4);
1229 EXPECT_EQ(kDead, dead);
1230 EXPECT_EQ(4u, dead.size());
1231 EXPECT_EQ(kLive, live);
1232 EXPECT_EQ(4u, live.size());
1233}
1234
1235} // namespace base