Blame - base/strings/string_util_unittest.cc - platform/external/libchrome

blob: fb0beada4b344b5223c3610e42a071ca63295bab [file] [log] [blame]

Daniel Erat	b8cf949	2015-07-06 13:18:13 -0600	[diff] [blame^]	1	// Copyright 2013 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "base/strings/string_util.h"
				6
				7	#include <math.h>
				8	#include <stdarg.h>
				9
				10	#include <algorithm>
				11
				12	#include "base/basictypes.h"
				13	#include "base/strings/string16.h"
				14	#include "base/strings/utf_string_conversions.h"
				15	#include "testing/gmock/include/gmock/gmock.h"
				16	#include "testing/gtest/include/gtest/gtest.h"
				17
				18	using ::testing::ElementsAre;
				19
				20	namespace base {
				21
				22	static const struct trim_case {
				23	const wchar_t* input;
				24	const TrimPositions positions;
				25	const wchar_t* output;
				26	const TrimPositions return_value;
				27	} trim_cases[] = {
				28	{L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
				29	{L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
				30	{L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
				31	{L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
				32	{L"", TRIM_ALL, L"", TRIM_NONE},
				33	{L" ", TRIM_LEADING, L"", TRIM_LEADING},
				34	{L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
				35	{L" ", TRIM_ALL, L"", TRIM_ALL},
				36	{L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
				37	{L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
				38	};
				39
				40	static const struct trim_case_ascii {
				41	const char* input;
				42	const TrimPositions positions;
				43	const char* output;
				44	const TrimPositions return_value;
				45	} trim_cases_ascii[] = {
				46	{" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
				47	{" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
				48	{" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
				49	{"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
				50	{"", TRIM_ALL, "", TRIM_NONE},
				51	{" ", TRIM_LEADING, "", TRIM_LEADING},
				52	{" ", TRIM_TRAILING, "", TRIM_TRAILING},
				53	{" ", TRIM_ALL, "", TRIM_ALL},
				54	{"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
				55	};
				56
				57	namespace {
				58
				59	// Helper used to test TruncateUTF8ToByteSize.
				60	bool Truncated(const std::string& input,
				61	const size_t byte_size,
				62	std::string* output) {
				63	size_t prev = input.length();
				64	TruncateUTF8ToByteSize(input, byte_size, output);
				65	return prev != output->length();
				66	}
				67
				68	} // namespace
				69
				70	TEST(StringUtilTest, TruncateUTF8ToByteSize) {
				71	std::string output;
				72
				73	// Empty strings and invalid byte_size arguments
				74	EXPECT_FALSE(Truncated(std::string(), 0, &output));
				75	EXPECT_EQ(output, "");
				76	EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
				77	EXPECT_EQ(output, "");
				78	EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
				79	EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
				80
				81	// Testing the truncation of valid UTF8 correctly
				82	EXPECT_TRUE(Truncated("abc", 2, &output));
				83	EXPECT_EQ(output, "ab");
				84	EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
				85	EXPECT_EQ(output.compare("\xc2\x81"), 0);
				86	EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
				87	EXPECT_EQ(output.compare("\xc2\x81"), 0);
				88	EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
				89	EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
				90
				91	{
				92	const char array[] = "\x00\x00\xc2\x81\xc2\x81";
				93	const std::string array_string(array, arraysize(array));
				94	EXPECT_TRUE(Truncated(array_string, 4, &output));
				95	EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
				96	}
				97
				98	{
				99	const char array[] = "\x00\xc2\x81\xc2\x81";
				100	const std::string array_string(array, arraysize(array));
				101	EXPECT_TRUE(Truncated(array_string, 4, &output));
				102	EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
				103	}
				104
				105	// Testing invalid UTF8
				106	EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
				107	EXPECT_EQ(output.compare(""), 0);
				108	EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
				109	EXPECT_EQ(output.compare(""), 0);
				110	EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
				111	EXPECT_EQ(output.compare(""), 0);
				112
				113	// Testing invalid UTF8 mixed with valid UTF8
				114	EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
				115	EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
				116	EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
				117	EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
				118	EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
				119	10, &output));
				120	EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
				121	EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
				122	10, &output));
				123	EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
				124	EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
				125	EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
				126
				127	// Overlong sequences
				128	EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
				129	EXPECT_EQ(output.compare(""), 0);
				130	EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
				131	EXPECT_EQ(output.compare(""), 0);
				132	EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
				133	EXPECT_EQ(output.compare(""), 0);
				134	EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
				135	EXPECT_EQ(output.compare(""), 0);
				136	EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
				137	EXPECT_EQ(output.compare(""), 0);
				138	EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
				139	EXPECT_EQ(output.compare(""), 0);
				140	EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
				141	EXPECT_EQ(output.compare(""), 0);
				142	EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
				143	EXPECT_EQ(output.compare(""), 0);
				144	EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
				145	EXPECT_EQ(output.compare(""), 0);
				146	EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
				147	EXPECT_EQ(output.compare(""), 0);
				148	EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
				149	EXPECT_EQ(output.compare(""), 0);
				150
				151	// Beyond U+10FFFF (the upper limit of Unicode codespace)
				152	EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
				153	EXPECT_EQ(output.compare(""), 0);
				154	EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
				155	EXPECT_EQ(output.compare(""), 0);
				156	EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
				157	EXPECT_EQ(output.compare(""), 0);
				158
				159	// BOMs in UTF-16(BE\|LE) and UTF-32(BE\|LE)
				160	EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
				161	EXPECT_EQ(output.compare(""), 0);
				162	EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
				163	EXPECT_EQ(output.compare(""), 0);
				164
				165	{
				166	const char array[] = "\x00\x00\xfe\xff";
				167	const std::string array_string(array, arraysize(array));
				168	EXPECT_TRUE(Truncated(array_string, 4, &output));
				169	EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
				170	}
				171
				172	// Variants on the previous test
				173	{
				174	const char array[] = "\xff\xfe\x00\x00";
				175	const std::string array_string(array, 4);
				176	EXPECT_FALSE(Truncated(array_string, 4, &output));
				177	EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
				178	}
				179	{
				180	const char array[] = "\xff\x00\x00\xfe";
				181	const std::string array_string(array, arraysize(array));
				182	EXPECT_TRUE(Truncated(array_string, 4, &output));
				183	EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
				184	}
				185
				186	// Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
				187	EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
				188	EXPECT_EQ(output.compare(""), 0);
				189	EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
				190	EXPECT_EQ(output.compare(""), 0);
				191	EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
				192	EXPECT_EQ(output.compare(""), 0);
				193	EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
				194	EXPECT_EQ(output.compare(""), 0);
				195	EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
				196	EXPECT_EQ(output.compare(""), 0);
				197
				198	// Strings in legacy encodings that are valid in UTF-8, but
				199	// are invalid as UTF-8 in real data.
				200	EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
				201	EXPECT_EQ(output.compare("caf"), 0);
				202	EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
				203	EXPECT_EQ(output.compare(""), 0);
				204	EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
				205	EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
				206	EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
				207	&output));
				208	EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
				209
				210	// Testing using the same string as input and output.
				211	EXPECT_FALSE(Truncated(output, 4, &output));
				212	EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
				213	EXPECT_TRUE(Truncated(output, 3, &output));
				214	EXPECT_EQ(output.compare("\xa7\x41"), 0);
				215
				216	// "abc" with U+201[CD] in windows-125[0-8]
				217	EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
				218	EXPECT_EQ(output.compare("\x93" "abc"), 0);
				219
				220	// U+0639 U+064E U+0644 U+064E in ISO-8859-6
				221	EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
				222	EXPECT_EQ(output.compare(""), 0);
				223
				224	// U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
				225	EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
				226	EXPECT_EQ(output.compare(""), 0);
				227	}
				228
				229	TEST(StringUtilTest, TrimWhitespace) {
				230	string16 output; // Allow contents to carry over to next testcase
				231	for (size_t i = 0; i < arraysize(trim_cases); ++i) {
				232	const trim_case& value = trim_cases[i];
				233	EXPECT_EQ(value.return_value,
				234	TrimWhitespace(WideToUTF16(value.input), value.positions,
				235	&output));
				236	EXPECT_EQ(WideToUTF16(value.output), output);
				237	}
				238
				239	// Test that TrimWhitespace() can take the same string for input and output
				240	output = ASCIIToUTF16(" This is a test \r\n");
				241	EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
				242	EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
				243
				244	// Once more, but with a string of whitespace
				245	output = ASCIIToUTF16(" \r\n");
				246	EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
				247	EXPECT_EQ(string16(), output);
				248
				249	std::string output_ascii;
				250	for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
				251	const trim_case_ascii& value = trim_cases_ascii[i];
				252	EXPECT_EQ(value.return_value,
				253	TrimWhitespace(value.input, value.positions, &output_ascii));
				254	EXPECT_EQ(value.output, output_ascii);
				255	}
				256	}
				257
				258	static const struct collapse_case {
				259	const wchar_t* input;
				260	const bool trim;
				261	const wchar_t* output;
				262	} collapse_cases[] = {
				263	{L" Google Video ", false, L"Google Video"},
				264	{L"Google Video", false, L"Google Video"},
				265	{L"", false, L""},
				266	{L" ", false, L""},
				267	{L"\t\rTest String\n", false, L"Test String"},
				268	{L"\x2002Test String\x00A0\x3000", false, L"Test String"},
				269	{L" Test \n \t String ", false, L"Test String"},
				270	{L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
				271	{L" Test String", false, L"Test String"},
				272	{L"Test String ", false, L"Test String"},
				273	{L"Test String", false, L"Test String"},
				274	{L"", true, L""},
				275	{L"\n", true, L""},
				276	{L" \r ", true, L""},
				277	{L"\nFoo", true, L"Foo"},
				278	{L"\r Foo ", true, L"Foo"},
				279	{L" Foo bar ", true, L"Foo bar"},
				280	{L" \tFoo bar \n", true, L"Foo bar"},
				281	{L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
				282	};
				283
				284	TEST(StringUtilTest, CollapseWhitespace) {
				285	for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
				286	const collapse_case& value = collapse_cases[i];
				287	EXPECT_EQ(WideToUTF16(value.output),
				288	CollapseWhitespace(WideToUTF16(value.input), value.trim));
				289	}
				290	}
				291
				292	static const struct collapse_case_ascii {
				293	const char* input;
				294	const bool trim;
				295	const char* output;
				296	} collapse_cases_ascii[] = {
				297	{" Google Video ", false, "Google Video"},
				298	{"Google Video", false, "Google Video"},
				299	{"", false, ""},
				300	{" ", false, ""},
				301	{"\t\rTest String\n", false, "Test String"},
				302	{" Test \n \t String ", false, "Test String"},
				303	{" Test String", false, "Test String"},
				304	{"Test String ", false, "Test String"},
				305	{"Test String", false, "Test String"},
				306	{"", true, ""},
				307	{"\n", true, ""},
				308	{" \r ", true, ""},
				309	{"\nFoo", true, "Foo"},
				310	{"\r Foo ", true, "Foo"},
				311	{" Foo bar ", true, "Foo bar"},
				312	{" \tFoo bar \n", true, "Foo bar"},
				313	{" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
				314	};
				315
				316	TEST(StringUtilTest, CollapseWhitespaceASCII) {
				317	for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
				318	const collapse_case_ascii& value = collapse_cases_ascii[i];
				319	EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
				320	}
				321	}
				322
				323	TEST(StringUtilTest, IsStringUTF8) {
				324	EXPECT_TRUE(IsStringUTF8("abc"));
				325	EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
				326	EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
				327	EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
				328	EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
				329	EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
				330
				331	// surrogate code points
				332	EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
				333	EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
				334	EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
				335
				336	// overlong sequences
				337	EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
				338	EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
				339	EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
				340	EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
				341	EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
				342	EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
				343	EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
				344	EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
				345	EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
				346	EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
				347	EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
				348
				349	// Beyond U+10FFFF (the upper limit of Unicode codespace)
				350	EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
				351	EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
				352	EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
				353
				354	// BOMs in UTF-16(BE\|LE) and UTF-32(BE\|LE)
				355	EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
				356	EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
				357	EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
				358	EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
				359
				360	// Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
				361	EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
				362	EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
				363	EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
				364	EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
				365	EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
				366	// Strings in legacy encodings. We can certainly make up strings
				367	// in a legacy encoding that are valid in UTF-8, but in real data,
				368	// most of them are invalid as UTF-8.
				369	EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
				370	EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
				371	EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
				372	// "abc" with U+201[CD] in windows-125[0-8]
				373	EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
				374	// U+0639 U+064E U+0644 U+064E in ISO-8859-6
				375	EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
				376	// U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
				377	EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
				378
				379	// Check that we support Embedded Nulls. The first uses the canonical UTF-8
				380	// representation, and the second uses a 2-byte sequence. The second version
				381	// is invalid UTF-8 since UTF-8 states that the shortest encoding for a
				382	// given codepoint must be used.
				383	static const char kEmbeddedNull[] = "embedded\0null";
				384	EXPECT_TRUE(IsStringUTF8(
				385	std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
				386	EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
				387	}
				388
				389	TEST(StringUtilTest, IsStringASCII) {
				390	static char char_ascii[] =
				391	"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
				392	static char16 char16_ascii[] = {
				393	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
				394	'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
				395	'7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
				396	static std::wstring wchar_ascii(
				397	L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
				398
				399	// Test a variety of the fragment start positions and lengths in order to make
				400	// sure that bit masking in IsStringASCII works correctly.
				401	// Also, test that a non-ASCII character will be detected regardless of its
				402	// position inside the string.
				403	{
				404	const size_t string_length = arraysize(char_ascii) - 1;
				405	for (size_t offset = 0; offset < 8; ++offset) {
				406	for (size_t len = 0, max_len = string_length - offset; len < max_len;
				407	++len) {
				408	EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
				409	for (size_t char_pos = offset; char_pos < len; ++char_pos) {
				410	char_ascii[char_pos] \|= '\x80';
				411	EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
				412	char_ascii[char_pos] &= ~'\x80';
				413	}
				414	}
				415	}
				416	}
				417
				418	{
				419	const size_t string_length = arraysize(char16_ascii) - 1;
				420	for (size_t offset = 0; offset < 4; ++offset) {
				421	for (size_t len = 0, max_len = string_length - offset; len < max_len;
				422	++len) {
				423	EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
				424	for (size_t char_pos = offset; char_pos < len; ++char_pos) {
				425	char16_ascii[char_pos] \|= 0x80;
				426	EXPECT_FALSE(
				427	IsStringASCII(StringPiece16(char16_ascii + offset, len)));
				428	char16_ascii[char_pos] &= ~0x80;
				429	// Also test when the upper half is non-zero.
				430	char16_ascii[char_pos] \|= 0x100;
				431	EXPECT_FALSE(
				432	IsStringASCII(StringPiece16(char16_ascii + offset, len)));
				433	char16_ascii[char_pos] &= ~0x100;
				434	}
				435	}
				436	}
				437	}
				438
				439	{
				440	const size_t string_length = wchar_ascii.length();
				441	for (size_t len = 0; len < string_length; ++len) {
				442	EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
				443	for (size_t char_pos = 0; char_pos < len; ++char_pos) {
				444	wchar_ascii[char_pos] \|= 0x80;
				445	EXPECT_FALSE(
				446	IsStringASCII(wchar_ascii.substr(0, len)));
				447	wchar_ascii[char_pos] &= ~0x80;
				448	wchar_ascii[char_pos] \|= 0x100;
				449	EXPECT_FALSE(
				450	IsStringASCII(wchar_ascii.substr(0, len)));
				451	wchar_ascii[char_pos] &= ~0x100;
				452	#if defined(WCHAR_T_IS_UTF32)
				453	wchar_ascii[char_pos] \|= 0x10000;
				454	EXPECT_FALSE(
				455	IsStringASCII(wchar_ascii.substr(0, len)));
				456	wchar_ascii[char_pos] &= ~0x10000;
				457	#endif // WCHAR_T_IS_UTF32
				458	}
				459	}
				460	}
				461	}
				462
				463	TEST(StringUtilTest, ConvertASCII) {
				464	static const char* const char_cases[] = {
				465	"Google Video",
				466	"Hello, world\n",
				467	"0123ABCDwxyz \a\b\t\r\n!+,.~"
				468	};
				469
				470	static const wchar_t* const wchar_cases[] = {
				471	L"Google Video",
				472	L"Hello, world\n",
				473	L"0123ABCDwxyz \a\b\t\r\n!+,.~"
				474	};
				475
				476	for (size_t i = 0; i < arraysize(char_cases); ++i) {
				477	EXPECT_TRUE(IsStringASCII(char_cases[i]));
				478	string16 utf16 = ASCIIToUTF16(char_cases[i]);
				479	EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
				480
				481	std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
				482	EXPECT_EQ(char_cases[i], ascii);
				483	}
				484
				485	EXPECT_FALSE(IsStringASCII("Google \x80Video"));
				486
				487	// Convert empty strings.
				488	string16 empty16;
				489	std::string empty;
				490	EXPECT_EQ(empty, UTF16ToASCII(empty16));
				491	EXPECT_EQ(empty16, ASCIIToUTF16(empty));
				492
				493	// Convert strings with an embedded NUL character.
				494	const char chars_with_nul[] = "test\0string";
				495	const int length_with_nul = arraysize(chars_with_nul) - 1;
				496	std::string string_with_nul(chars_with_nul, length_with_nul);
				497	string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
				498	EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
				499	string16_with_nul.length());
				500	std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
				501	EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
				502	narrow_with_nul.length());
				503	EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
				504	}
				505
				506	TEST(StringUtilTest, ToUpperASCII) {
				507	EXPECT_EQ('C', ToUpperASCII('C'));
				508	EXPECT_EQ('C', ToUpperASCII('c'));
				509	EXPECT_EQ('2', ToUpperASCII('2'));
				510
				511	EXPECT_EQ(L'C', ToUpperASCII(L'C'));
				512	EXPECT_EQ(L'C', ToUpperASCII(L'c'));
				513	EXPECT_EQ(L'2', ToUpperASCII(L'2'));
				514
				515	std::string in_place_a("Cc2");
				516	StringToUpperASCII(&in_place_a);
				517	EXPECT_EQ("CC2", in_place_a);
				518
				519	std::wstring in_place_w(L"Cc2");
				520	StringToUpperASCII(&in_place_w);
				521	EXPECT_EQ(L"CC2", in_place_w);
				522
				523	std::string original_a("Cc2");
				524	std::string upper_a = StringToUpperASCII(original_a);
				525	EXPECT_EQ("CC2", upper_a);
				526
				527	std::wstring original_w(L"Cc2");
				528	std::wstring upper_w = StringToUpperASCII(original_w);
				529	EXPECT_EQ(L"CC2", upper_w);
				530	}
				531
				532	TEST(StringUtilTest, LowerCaseEqualsASCII) {
				533	static const struct {
				534	const char* src_a;
				535	const char* dst;
				536	} lowercase_cases[] = {
				537	{ "FoO", "foo" },
				538	{ "foo", "foo" },
				539	{ "FOO", "foo" },
				540	};
				541
				542	for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
				543	EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
				544	lowercase_cases[i].dst));
				545	EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
				546	lowercase_cases[i].dst));
				547	}
				548	}
				549
				550	TEST(StringUtilTest, FormatBytesUnlocalized) {
				551	static const struct {
				552	int64 bytes;
				553	const char* expected;
				554	} cases[] = {
				555	// Expected behavior: we show one post-decimal digit when we have
				556	// under two pre-decimal digits, except in cases where it makes no
				557	// sense (zero or bytes).
				558	// Since we switch units once we cross the 1000 mark, this keeps
				559	// the display of file sizes or bytes consistently around three
				560	// digits.
				561	{0, "0 B"},
				562	{512, "512 B"},
				563	{1024*1024, "1.0 MB"},
				564	{102410241024, "1.0 GB"},
				565	{10LL10241024*1024, "10.0 GB"},
				566	{99LL10241024*1024, "99.0 GB"},
				567	{105LL10241024*1024, "105 GB"},
				568	{105LL102410241024 + 500LL1024*1024, "105 GB"},
				569	{~(1LL << 63), "8192 PB"},
				570
				571	{99*1024 + 103, "99.1 kB"},
				572	{1024*1024 + 103, "1.0 MB"},
				573	{10241024 + 205 1024, "1.2 MB"},
				574	{102410241024 + (927 * 1024*1024), "1.9 GB"},
				575	{10LL10241024*1024, "10.0 GB"},
				576	{100LL10241024*1024, "100 GB"},
				577	};
				578
				579	for (size_t i = 0; i < arraysize(cases); ++i) {
				580	EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
				581	FormatBytesUnlocalized(cases[i].bytes));
				582	}
				583	}
				584	TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
				585	static const struct {
				586	const char* str;
				587	string16::size_type start_offset;
				588	const char* find_this;
				589	const char* replace_with;
				590	const char* expected;
				591	} cases[] = {
				592	{"aaa", 0, "a", "b", "bbb"},
				593	{"abb", 0, "ab", "a", "ab"},
				594	{"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
				595	{"Not found", 0, "x", "0", "Not found"},
				596	{"Not found again", 5, "x", "0", "Not found again"},
				597	{" Making it much longer ", 0, " ", "Four score and seven years ago",
				598	"Four score and seven years agoMakingFour score and seven years agoit"
				599	"Four score and seven years agomuchFour score and seven years agolonger"
				600	"Four score and seven years ago"},
				601	{"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
				602	{"Replace me only me once", 9, "me ", "", "Replace me only once"},
				603	{"abababab", 2, "ab", "c", "abccc"},
				604	};
				605
				606	for (size_t i = 0; i < arraysize(cases); i++) {
				607	string16 str = ASCIIToUTF16(cases[i].str);
				608	ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
				609	ASCIIToUTF16(cases[i].find_this),
				610	ASCIIToUTF16(cases[i].replace_with));
				611	EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
				612	}
				613	}
				614
				615	TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
				616	static const struct {
				617	const char* str;
				618	string16::size_type start_offset;
				619	const char* find_this;
				620	const char* replace_with;
				621	const char* expected;
				622	} cases[] = {
				623	{"aaa", 0, "a", "b", "baa"},
				624	{"abb", 0, "ab", "a", "ab"},
				625	{"Removing some substrings inging", 0, "ing", "",
				626	"Remov some substrings inging"},
				627	{"Not found", 0, "x", "0", "Not found"},
				628	{"Not found again", 5, "x", "0", "Not found again"},
				629	{" Making it much longer ", 0, " ", "Four score and seven years ago",
				630	"Four score and seven years agoMaking it much longer "},
				631	{"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
				632	{"Replace me only me once", 4, "me ", "", "Replace only me once"},
				633	{"abababab", 2, "ab", "c", "abcabab"},
				634	};
				635
				636	for (size_t i = 0; i < arraysize(cases); i++) {
				637	string16 str = ASCIIToUTF16(cases[i].str);
				638	ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
				639	ASCIIToUTF16(cases[i].find_this),
				640	ASCIIToUTF16(cases[i].replace_with));
				641	EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
				642	}
				643	}
				644
				645	TEST(StringUtilTest, HexDigitToInt) {
				646	EXPECT_EQ(0, HexDigitToInt('0'));
				647	EXPECT_EQ(1, HexDigitToInt('1'));
				648	EXPECT_EQ(2, HexDigitToInt('2'));
				649	EXPECT_EQ(3, HexDigitToInt('3'));
				650	EXPECT_EQ(4, HexDigitToInt('4'));
				651	EXPECT_EQ(5, HexDigitToInt('5'));
				652	EXPECT_EQ(6, HexDigitToInt('6'));
				653	EXPECT_EQ(7, HexDigitToInt('7'));
				654	EXPECT_EQ(8, HexDigitToInt('8'));
				655	EXPECT_EQ(9, HexDigitToInt('9'));
				656	EXPECT_EQ(10, HexDigitToInt('A'));
				657	EXPECT_EQ(11, HexDigitToInt('B'));
				658	EXPECT_EQ(12, HexDigitToInt('C'));
				659	EXPECT_EQ(13, HexDigitToInt('D'));
				660	EXPECT_EQ(14, HexDigitToInt('E'));
				661	EXPECT_EQ(15, HexDigitToInt('F'));
				662
				663	// Verify the lower case as well.
				664	EXPECT_EQ(10, HexDigitToInt('a'));
				665	EXPECT_EQ(11, HexDigitToInt('b'));
				666	EXPECT_EQ(12, HexDigitToInt('c'));
				667	EXPECT_EQ(13, HexDigitToInt('d'));
				668	EXPECT_EQ(14, HexDigitToInt('e'));
				669	EXPECT_EQ(15, HexDigitToInt('f'));
				670	}
				671
				672	// Test for Tokenize
				673	template <typename STR>
				674	void TokenizeTest() {
				675	std::vector<STR> r;
				676	size_t size;
				677
				678	size = Tokenize(STR("This is a string"), STR(" "), &r);
				679	EXPECT_EQ(4U, size);
				680	ASSERT_EQ(4U, r.size());
				681	EXPECT_EQ(r[0], STR("This"));
				682	EXPECT_EQ(r[1], STR("is"));
				683	EXPECT_EQ(r[2], STR("a"));
				684	EXPECT_EQ(r[3], STR("string"));
				685	r.clear();
				686
				687	size = Tokenize(STR("one,two,three"), STR(","), &r);
				688	EXPECT_EQ(3U, size);
				689	ASSERT_EQ(3U, r.size());
				690	EXPECT_EQ(r[0], STR("one"));
				691	EXPECT_EQ(r[1], STR("two"));
				692	EXPECT_EQ(r[2], STR("three"));
				693	r.clear();
				694
				695	size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
				696	EXPECT_EQ(3U, size);
				697	ASSERT_EQ(3U, r.size());
				698	EXPECT_EQ(r[0], STR("one"));
				699	EXPECT_EQ(r[1], STR("two"));
				700	EXPECT_EQ(r[2], STR("three;four"));
				701	r.clear();
				702
				703	size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
				704	EXPECT_EQ(4U, size);
				705	ASSERT_EQ(4U, r.size());
				706	EXPECT_EQ(r[0], STR("one"));
				707	EXPECT_EQ(r[1], STR("two"));
				708	EXPECT_EQ(r[2], STR("three"));
				709	EXPECT_EQ(r[3], STR("four"));
				710	r.clear();
				711
				712	size = Tokenize(STR("one, two, three"), STR(","), &r);
				713	EXPECT_EQ(3U, size);
				714	ASSERT_EQ(3U, r.size());
				715	EXPECT_EQ(r[0], STR("one"));
				716	EXPECT_EQ(r[1], STR(" two"));
				717	EXPECT_EQ(r[2], STR(" three"));
				718	r.clear();
				719
				720	size = Tokenize(STR("one, two, three, "), STR(","), &r);
				721	EXPECT_EQ(4U, size);
				722	ASSERT_EQ(4U, r.size());
				723	EXPECT_EQ(r[0], STR("one"));
				724	EXPECT_EQ(r[1], STR(" two"));
				725	EXPECT_EQ(r[2], STR(" three"));
				726	EXPECT_EQ(r[3], STR(" "));
				727	r.clear();
				728
				729	size = Tokenize(STR("one, two, three,"), STR(","), &r);
				730	EXPECT_EQ(3U, size);
				731	ASSERT_EQ(3U, r.size());
				732	EXPECT_EQ(r[0], STR("one"));
				733	EXPECT_EQ(r[1], STR(" two"));
				734	EXPECT_EQ(r[2], STR(" three"));
				735	r.clear();
				736
				737	size = Tokenize(STR(), STR(","), &r);
				738	EXPECT_EQ(0U, size);
				739	ASSERT_EQ(0U, r.size());
				740	r.clear();
				741
				742	size = Tokenize(STR(","), STR(","), &r);
				743	EXPECT_EQ(0U, size);
				744	ASSERT_EQ(0U, r.size());
				745	r.clear();
				746
				747	size = Tokenize(STR(",;:."), STR(".:;,"), &r);
				748	EXPECT_EQ(0U, size);
				749	ASSERT_EQ(0U, r.size());
				750	r.clear();
				751
				752	size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
				753	EXPECT_EQ(1U, size);
				754	ASSERT_EQ(1U, r.size());
				755	EXPECT_EQ(r[0], STR("a"));
				756	r.clear();
				757
				758	size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
				759	EXPECT_EQ(2U, size);
				760	ASSERT_EQ(2U, r.size());
				761	EXPECT_EQ(r[0], STR("\ta\t"));
				762	EXPECT_EQ(r[1], STR("b\tcc"));
				763	r.clear();
				764	}
				765
				766	TEST(StringUtilTest, TokenizeStdString) {
				767	TokenizeTest<std::string>();
				768	}
				769
				770	TEST(StringUtilTest, TokenizeStringPiece) {
				771	TokenizeTest<StringPiece>();
				772	}
				773
				774	// Test for JoinString
				775	TEST(StringUtilTest, JoinString) {
				776	std::vector<std::string> in;
				777	EXPECT_EQ("", JoinString(in, ','));
				778
				779	in.push_back("a");
				780	EXPECT_EQ("a", JoinString(in, ','));
				781
				782	in.push_back("b");
				783	in.push_back("c");
				784	EXPECT_EQ("a,b,c", JoinString(in, ','));
				785
				786	in.push_back(std::string());
				787	EXPECT_EQ("a,b,c,", JoinString(in, ','));
				788	in.push_back(" ");
				789	EXPECT_EQ("a\|b\|c\|\| ", JoinString(in, '\|'));
				790	}
				791
				792	// Test for JoinString overloaded with std::string separator
				793	TEST(StringUtilTest, JoinStringWithString) {
				794	std::string separator(", ");
				795	std::vector<std::string> parts;
				796	EXPECT_EQ(std::string(), JoinString(parts, separator));
				797
				798	parts.push_back("a");
				799	EXPECT_EQ("a", JoinString(parts, separator));
				800
				801	parts.push_back("b");
				802	parts.push_back("c");
				803	EXPECT_EQ("a, b, c", JoinString(parts, separator));
				804
				805	parts.push_back(std::string());
				806	EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
				807	parts.push_back(" ");
				808	EXPECT_EQ("a\|b\|c\|\| ", JoinString(parts, "\|"));
				809	}
				810
				811	// Test for JoinString overloaded with string16 separator
				812	TEST(StringUtilTest, JoinStringWithString16) {
				813	string16 separator = ASCIIToUTF16(", ");
				814	std::vector<string16> parts;
				815	EXPECT_EQ(string16(), JoinString(parts, separator));
				816
				817	parts.push_back(ASCIIToUTF16("a"));
				818	EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
				819
				820	parts.push_back(ASCIIToUTF16("b"));
				821	parts.push_back(ASCIIToUTF16("c"));
				822	EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
				823
				824	parts.push_back(ASCIIToUTF16(""));
				825	EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
				826	parts.push_back(ASCIIToUTF16(" "));
				827	EXPECT_EQ(ASCIIToUTF16("a\|b\|c\|\| "), JoinString(parts, ASCIIToUTF16("\|")));
				828	}
				829
				830	TEST(StringUtilTest, StartsWith) {
				831	EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
				832	EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
				833	EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
				834	EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
				835	EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
				836	EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
				837	EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
				838	EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
				839	EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
				840	EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
				841
				842	EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
				843	ASCIIToUTF16("javascript"), true));
				844	EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
				845	ASCIIToUTF16("javascript"), true));
				846	EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
				847	ASCIIToUTF16("javascript"), false));
				848	EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
				849	ASCIIToUTF16("javascript"), false));
				850	EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
				851	ASCIIToUTF16("javascript"), true));
				852	EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
				853	ASCIIToUTF16("javascript"), false));
				854	EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
				855	EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
				856	EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
				857	EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
				858	}
				859
				860	TEST(StringUtilTest, EndsWith) {
				861	EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
				862	ASCIIToUTF16(".plugin"), true));
				863	EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
				864	ASCIIToUTF16(".plugin"), true));
				865	EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
				866	ASCIIToUTF16(".plugin"), false));
				867	EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
				868	ASCIIToUTF16(".plugin"), false));
				869	EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
				870	EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
				871	EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
				872	ASCIIToUTF16(".plugin"), true));
				873	EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
				874	ASCIIToUTF16(".plugin"), false));
				875	EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
				876	EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
				877	EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
				878	EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
				879	EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
				880	ASCIIToUTF16(".plugin"), false));
				881	EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
				882	EXPECT_TRUE(EndsWith(string16(), string16(), false));
				883	EXPECT_TRUE(EndsWith(string16(), string16(), true));
				884	}
				885
				886	TEST(StringUtilTest, GetStringFWithOffsets) {
				887	std::vector<string16> subst;
				888	subst.push_back(ASCIIToUTF16("1"));
				889	subst.push_back(ASCIIToUTF16("2"));
				890	std::vector<size_t> offsets;
				891
				892	ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
				893	subst,
				894	&offsets);
				895	EXPECT_EQ(2U, offsets.size());
				896	EXPECT_EQ(7U, offsets[0]);
				897	EXPECT_EQ(25U, offsets[1]);
				898	offsets.clear();
				899
				900	ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
				901	subst,
				902	&offsets);
				903	EXPECT_EQ(2U, offsets.size());
				904	EXPECT_EQ(25U, offsets[0]);
				905	EXPECT_EQ(7U, offsets[1]);
				906	offsets.clear();
				907	}
				908
				909	TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
				910	// Test whether replacestringplaceholders works as expected when there
				911	// are fewer inputs than outputs.
				912	std::vector<string16> subst;
				913	subst.push_back(ASCIIToUTF16("9a"));
				914	subst.push_back(ASCIIToUTF16("8b"));
				915	subst.push_back(ASCIIToUTF16("7c"));
				916
				917	string16 formatted =
				918	ReplaceStringPlaceholders(
				919	ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
				920
				921	EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
				922	}
				923
				924	TEST(StringUtilTest, ReplaceStringPlaceholders) {
				925	std::vector<string16> subst;
				926	subst.push_back(ASCIIToUTF16("9a"));
				927	subst.push_back(ASCIIToUTF16("8b"));
				928	subst.push_back(ASCIIToUTF16("7c"));
				929	subst.push_back(ASCIIToUTF16("6d"));
				930	subst.push_back(ASCIIToUTF16("5e"));
				931	subst.push_back(ASCIIToUTF16("4f"));
				932	subst.push_back(ASCIIToUTF16("3g"));
				933	subst.push_back(ASCIIToUTF16("2h"));
				934	subst.push_back(ASCIIToUTF16("1i"));
				935
				936	string16 formatted =
				937	ReplaceStringPlaceholders(
				938	ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
				939
				940	EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
				941	}
				942
				943	TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
				944	std::vector<string16> subst;
				945	subst.push_back(ASCIIToUTF16("9a"));
				946	subst.push_back(ASCIIToUTF16("8b"));
				947	subst.push_back(ASCIIToUTF16("7c"));
				948	subst.push_back(ASCIIToUTF16("6d"));
				949	subst.push_back(ASCIIToUTF16("5e"));
				950	subst.push_back(ASCIIToUTF16("4f"));
				951	subst.push_back(ASCIIToUTF16("3g"));
				952	subst.push_back(ASCIIToUTF16("2h"));
				953	subst.push_back(ASCIIToUTF16("1i"));
				954	subst.push_back(ASCIIToUTF16("0j"));
				955	subst.push_back(ASCIIToUTF16("-1k"));
				956	subst.push_back(ASCIIToUTF16("-2l"));
				957	subst.push_back(ASCIIToUTF16("-3m"));
				958	subst.push_back(ASCIIToUTF16("-4n"));
				959
				960	string16 formatted =
				961	ReplaceStringPlaceholders(
				962	ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
				963	"$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
				964
				965	EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
				966	"1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
				967	}
				968
				969	TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
				970	std::vector<std::string> subst;
				971	subst.push_back("9a");
				972	subst.push_back("8b");
				973	subst.push_back("7c");
				974	subst.push_back("6d");
				975	subst.push_back("5e");
				976	subst.push_back("4f");
				977	subst.push_back("3g");
				978	subst.push_back("2h");
				979	subst.push_back("1i");
				980
				981	std::string formatted =
				982	ReplaceStringPlaceholders(
				983	"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
				984
				985	EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
				986	}
				987
				988	TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
				989	std::vector<std::string> subst;
				990	subst.push_back("a");
				991	subst.push_back("b");
				992	subst.push_back("c");
				993	EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
				994	"$1 $$2 $$$3");
				995	}
				996
				997	TEST(StringUtilTest, MatchPatternTest) {
				998	EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
				999	EXPECT_TRUE(MatchPattern("www.google.com", "*"));
				1000	EXPECT_FALSE(MatchPattern("www.google.com", "www.g.org"));
				1001	EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
				1002	EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
				1003	EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
				1004	EXPECT_TRUE(MatchPattern("Hello1234", "He??o\\1*"));
				1005	EXPECT_FALSE(MatchPattern("", "."));
				1006	EXPECT_TRUE(MatchPattern("", "*"));
				1007	EXPECT_TRUE(MatchPattern("", "?"));
				1008	EXPECT_TRUE(MatchPattern("", ""));
				1009	EXPECT_FALSE(MatchPattern("Hello", ""));
				1010	EXPECT_TRUE(MatchPattern("Hello", "Hello"));
				1011	// Stop after a certain recursion depth.
				1012	EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
				1013
				1014	// Test UTF8 matching.
				1015	EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
				1016	EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
				1017	EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
				1018	// Invalid sequences should be handled as a single invalid character.
				1019	EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
				1020	// If the pattern has invalid characters, it shouldn't match anything.
				1021	EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
				1022
				1023	// Test UTF16 character matching.
				1024	EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
				1025	UTF8ToUTF16("*.com")));
				1026	EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
				1027	UTF8ToUTF16("He??o\\1")));
				1028
				1029	// This test verifies that consecutive wild cards are collapsed into 1
				1030	// wildcard (when this doesn't occur, MatchPattern reaches it's maximum
				1031	// recursion depth).
				1032	EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
				1033	UTF8ToUTF16("He********************************o")));
				1034	}
				1035
				1036	TEST(StringUtilTest, LcpyTest) {
				1037	// Test the normal case where we fit in our buffer.
				1038	{
				1039	char dst[10];
				1040	wchar_t wdst[10];
				1041	EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
				1042	EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
				1043	EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
				1044	EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
				1045	}
				1046
				1047	// Test dst_size == 0, nothing should be written to \|dst\| and we should
				1048	// have the equivalent of strlen(src).
				1049	{
				1050	char dst[2] = {1, 2};
				1051	wchar_t wdst[2] = {1, 2};
				1052	EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
				1053	EXPECT_EQ(1, dst[0]);
				1054	EXPECT_EQ(2, dst[1]);
				1055	EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
				1056	EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
				1057	EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
				1058	}
				1059
				1060	// Test the case were we _just_ competely fit including the null.
				1061	{
				1062	char dst[8];
				1063	wchar_t wdst[8];
				1064	EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
				1065	EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
				1066	EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
				1067	EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
				1068	}
				1069
				1070	// Test the case were we we are one smaller, so we can't fit the null.
				1071	{
				1072	char dst[7];
				1073	wchar_t wdst[7];
				1074	EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
				1075	EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
				1076	EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
				1077	EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
				1078	}
				1079
				1080	// Test the case were we are just too small.
				1081	{
				1082	char dst[3];
				1083	wchar_t wdst[3];
				1084	EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
				1085	EXPECT_EQ(0, memcmp(dst, "ab", 3));
				1086	EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
				1087	EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
				1088	}
				1089	}
				1090
				1091	TEST(StringUtilTest, WprintfFormatPortabilityTest) {
				1092	static const struct {
				1093	const wchar_t* input;
				1094	bool portable;
				1095	} cases[] = {
				1096	{ L"%ls", true },
				1097	{ L"%s", false },
				1098	{ L"%S", false },
				1099	{ L"%lS", false },
				1100	{ L"Hello, %s", false },
				1101	{ L"%lc", true },
				1102	{ L"%c", false },
				1103	{ L"%C", false },
				1104	{ L"%lC", false },
				1105	{ L"%ls %s", false },
				1106	{ L"%s %ls", false },
				1107	{ L"%s %ls %s", false },
				1108	{ L"%f", true },
				1109	{ L"%f %F", false },
				1110	{ L"%d %D", false },
				1111	{ L"%o %O", false },
				1112	{ L"%u %U", false },
				1113	{ L"%f %d %o %u", true },
				1114	{ L"%-8d (%02.1f%)", true },
				1115	{ L"% 10s", false },
				1116	{ L"% 10ls", true }
				1117	};
				1118	for (size_t i = 0; i < arraysize(cases); ++i)
				1119	EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
				1120	}
				1121
				1122	TEST(StringUtilTest, RemoveChars) {
				1123	const char kRemoveChars[] = "-/+*";
				1124	std::string input = "A-+bc/d!*";
				1125	EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
				1126	EXPECT_EQ("Abcd!", input);
				1127
				1128	// No characters match kRemoveChars.
				1129	EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
				1130	EXPECT_EQ("Abcd!", input);
				1131
				1132	// Empty string.
				1133	input.clear();
				1134	EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
				1135	EXPECT_EQ(std::string(), input);
				1136	}
				1137
				1138	TEST(StringUtilTest, ReplaceChars) {
				1139	struct TestData {
				1140	const char* input;
				1141	const char* replace_chars;
				1142	const char* replace_with;
				1143	const char* output;
				1144	bool result;
				1145	} cases[] = {
				1146	{ "", "", "", "", false },
				1147	{ "test", "", "", "test", false },
				1148	{ "test", "", "!", "test", false },
				1149	{ "test", "z", "!", "test", false },
				1150	{ "test", "e", "!", "t!st", true },
				1151	{ "test", "e", "!?", "t!?st", true },
				1152	{ "test", "ez", "!", "t!st", true },
				1153	{ "test", "zed", "!?", "t!?st", true },
				1154	{ "test", "t", "!?", "!?es!?", true },
				1155	{ "test", "et", "!>", "!>!>s!>", true },
				1156	{ "test", "zest", "!", "!!!!", true },
				1157	{ "test", "szt", "!", "!e!!", true },
				1158	{ "test", "t", "test", "testestest", true },
				1159	};
				1160
				1161	for (size_t i = 0; i < arraysize(cases); ++i) {
				1162	std::string output;
				1163	bool result = ReplaceChars(cases[i].input,
				1164	cases[i].replace_chars,
				1165	cases[i].replace_with,
				1166	&output);
				1167	EXPECT_EQ(cases[i].result, result);
				1168	EXPECT_EQ(cases[i].output, output);
				1169	}
				1170	}
				1171
				1172	TEST(StringUtilTest, ContainsOnlyChars) {
				1173	// Providing an empty list of characters should return false but for the empty
				1174	// string.
				1175	EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
				1176	EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
				1177
				1178	EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
				1179	EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
				1180	EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
				1181	EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
				1182	EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
				1183
				1184	EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
				1185	EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
				1186	EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
				1187	EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
				1188	EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
				1189	EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
				1190
				1191	EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
				1192	EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
				1193	EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
				1194	EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
				1195	EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
				1196	EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
				1197	kWhitespaceUTF16));
				1198	}
				1199
				1200	class WriteIntoTest : public testing::Test {
				1201	protected:
				1202	static void WritesCorrectly(size_t num_chars) {
				1203	std::string buffer;
				1204	char kOriginal[] = "supercali";
				1205	strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
				1206	// Using std::string(buffer.c_str()) instead of \|buffer\| truncates the
				1207	// string at the first \0.
				1208	EXPECT_EQ(std::string(kOriginal,
				1209	std::min(num_chars, arraysize(kOriginal) - 1)),
				1210	std::string(buffer.c_str()));
				1211	EXPECT_EQ(num_chars, buffer.size());
				1212	}
				1213	};
				1214
				1215	TEST_F(WriteIntoTest, WriteInto) {
				1216	// Validate that WriteInto reserves enough space and
				1217	// sizes a string correctly.
				1218	WritesCorrectly(1);
				1219	WritesCorrectly(2);
				1220	WritesCorrectly(5000);
				1221
				1222	// Validate that WriteInto doesn't modify other strings
				1223	// when using a Copy-on-Write implementation.
				1224	const char kLive[] = "live";
				1225	const char kDead[] = "dead";
				1226	const std::string live = kLive;
				1227	std::string dead = live;
				1228	strncpy(WriteInto(&dead, 5), kDead, 4);
				1229	EXPECT_EQ(kDead, dead);
				1230	EXPECT_EQ(4u, dead.size());
				1231	EXPECT_EQ(kLive, live);
				1232	EXPECT_EQ(4u, live.size());
				1233	}
				1234
				1235	} // namespace base