Blame - base/string_util.cc - platform/external/libchrome

blob: 851d3abe7b2df4e6d1c8a19220d14b81bc691706 [file] [log] [blame]

license.bot	f003cfe	2008-08-24 09:55:55 +0900	[diff] [blame^]	1	// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	4
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	5	#include "base/string_util.h"
				6
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	7	#include <ctype.h>
				8	#include <errno.h>
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	9	#include <math.h>
				10	#include <stdarg.h>
				11	#include <stdio.h>
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	12	#include <stdlib.h>
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	13	#include <string.h>
				14	#include <time.h>
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	15	#include <wchar.h>
				16	#include <wctype.h>
				17
				18	#include <algorithm>
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	19	#include <vector>
				20
				21	#include "base/basictypes.h"
				22	#include "base/logging.h"
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	23	#include "base/singleton.h"
				24
				25	namespace {
				26
				27	// Hack to convert any char-like type to its unsigned counterpart.
				28	// For example, it will convert char, signed char and unsigned char to unsigned
				29	// char.
				30	template<typename T>
				31	struct ToUnsigned {
				32	typedef T Unsigned;
				33	};
				34
				35	template<>
				36	struct ToUnsigned<char> {
				37	typedef unsigned char Unsigned;
				38	};
				39	template<>
				40	struct ToUnsigned<signed char> {
				41	typedef unsigned char Unsigned;
				42	};
				43	template<>
				44	struct ToUnsigned<wchar_t> {
mmentovai@google.com	3a1aa64	2008-08-13 00:06:52 +0900	[diff] [blame]	45	#if defined(WCHAR_T_IS_UTF16)
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	46	typedef unsigned short Unsigned;
mmentovai@google.com	3a1aa64	2008-08-13 00:06:52 +0900	[diff] [blame]	47	#elif defined(WCHAR_T_IS_UTF32)
				48	typedef uint32 Unsigned;
				49	#endif
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	50	};
				51	template<>
				52	struct ToUnsigned<short> {
				53	typedef unsigned short Unsigned;
				54	};
				55
				56	// Used by ReplaceStringPlaceholders to track the position in the string of
				57	// replaced parameters.
				58	struct ReplacementOffset {
				59	ReplacementOffset(int parameter, size_t offset)
				60	: parameter(parameter),
				61	offset(offset) {}
				62
				63	// Index of the parameter.
				64	int parameter;
				65
				66	// Starting position in the string.
				67	size_t offset;
				68	};
				69
				70	static bool CompareParameter(const ReplacementOffset& elem1,
				71	const ReplacementOffset& elem2) {
				72	return elem1.parameter < elem2.parameter;
				73	}
				74
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	75	// Generalized string-to-number conversion.
				76	//
				77	// StringToNumberTraits should provide:
				78	// - a typedef for string_type, the STL string type used as input.
				79	// - a typedef for value_type, the target numeric type.
				80	// - a static function, convert_func, which dispatches to an appropriate
				81	// strtol-like function and returns type value_type.
				82	// - a static function, valid_func, which validates \|input\| and returns a bool
				83	// indicating whether it is in proper form. This is used to check for
				84	// conditions that convert_func tolerates but should result in
				85	// StringToNumber returning false. For strtol-like funtions, valid_func
				86	// should check for leading whitespace.
				87	template<typename StringToNumberTraits>
				88	bool StringToNumber(const typename StringToNumberTraits::string_type& input,
				89	typename StringToNumberTraits::value_type* output) {
				90	typedef StringToNumberTraits traits;
				91
				92	errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows.
				93	typename traits::string_type::value_type* endptr = NULL;
				94	typename traits::value_type value = traits::convert_func(input.c_str(),
				95	&endptr);
				96	*output = value;
				97
				98	// Cases to return false:
				99	// - If errno is ERANGE, there was an overflow or underflow.
				100	// - If the input string is empty, there was nothing to parse.
				101	// - If endptr does not point to the end of the string, there are either
				102	// characters remaining in the string after a parsed number, or the string
				103	// does not begin with a parseable number. endptr is compared to the
				104	// expected end given the string's stated length to correctly catch cases
				105	// where the string contains embedded NUL characters.
				106	// - valid_func determines that the input is not in preferred form.
				107	return errno == 0 &&
				108	!input.empty() &&
				109	input.c_str() + input.length() == endptr &&
				110	traits::valid_func(input);
				111	}
				112
				113	class StringToLongTraits {
				114	public:
				115	typedef std::string string_type;
				116	typedef long value_type;
				117	static const int kBase = 10;
				118	static inline value_type convert_func(const string_type::value_type* str,
				119	string_type::value_type** endptr) {
				120	return strtol(str, endptr, kBase);
				121	}
				122	static inline bool valid_func(const string_type& str) {
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	123	return !str.empty() && !isspace(str[0]);
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	124	}
				125	};
				126
				127	class WStringToLongTraits {
				128	public:
				129	typedef std::wstring string_type;
				130	typedef long value_type;
				131	static const int kBase = 10;
				132	static inline value_type convert_func(const string_type::value_type* str,
				133	string_type::value_type** endptr) {
				134	return wcstol(str, endptr, kBase);
				135	}
				136	static inline bool valid_func(const string_type& str) {
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	137	return !str.empty() && !iswspace(str[0]);
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	138	}
				139	};
				140
				141	class StringToInt64Traits {
				142	public:
				143	typedef std::string string_type;
				144	typedef int64 value_type;
				145	static const int kBase = 10;
				146	static inline value_type convert_func(const string_type::value_type* str,
				147	string_type::value_type** endptr) {
				148	#ifdef OS_WIN
				149	return _strtoi64(str, endptr, kBase);
				150	#else // assume OS_POSIX
				151	return strtoll(str, endptr, kBase);
				152	#endif
				153	}
				154	static inline bool valid_func(const string_type& str) {
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	155	return !str.empty() && !isspace(str[0]);
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	156	}
				157	};
				158
				159	class WStringToInt64Traits {
				160	public:
				161	typedef std::wstring string_type;
				162	typedef int64 value_type;
				163	static const int kBase = 10;
				164	static inline value_type convert_func(const string_type::value_type* str,
				165	string_type::value_type** endptr) {
				166	#ifdef OS_WIN
				167	return _wcstoi64(str, endptr, kBase);
				168	#else // assume OS_POSIX
				169	return wcstoll(str, endptr, kBase);
				170	#endif
				171	}
				172	static inline bool valid_func(const string_type& str) {
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	173	return !str.empty() && !iswspace(str[0]);
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	174	}
				175	};
				176
				177	// For the HexString variants, use the unsigned variants like strtoul for
				178	// convert_func so that input like "0x80000000" doesn't result in an overflow.
				179
				180	class HexStringToLongTraits {
				181	public:
				182	typedef std::string string_type;
				183	typedef long value_type;
				184	static const int kBase = 16;
				185	static inline value_type convert_func(const string_type::value_type* str,
				186	string_type::value_type** endptr) {
				187	return strtoul(str, endptr, kBase);
				188	}
				189	static inline bool valid_func(const string_type& str) {
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	190	return !str.empty() && !isspace(str[0]);
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	191	}
				192	};
				193
				194	class HexWStringToLongTraits {
				195	public:
				196	typedef std::wstring string_type;
				197	typedef long value_type;
				198	static const int kBase = 16;
				199	static inline value_type convert_func(const string_type::value_type* str,
				200	string_type::value_type** endptr) {
				201	return wcstoul(str, endptr, kBase);
				202	}
				203	static inline bool valid_func(const string_type& str) {
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	204	return !str.empty() && !iswspace(str[0]);
				205	}
				206	};
				207
				208	class StringToDoubleTraits {
				209	public:
				210	typedef std::string string_type;
				211	typedef double value_type;
				212	static inline value_type convert_func(const string_type::value_type* str,
				213	string_type::value_type** endptr) {
				214	return strtod(str, endptr);
				215	}
				216	static inline bool valid_func(const string_type& str) {
				217	return !str.empty() && !isspace(str[0]);
				218	}
				219	};
				220
				221	class WStringToDoubleTraits {
				222	public:
				223	typedef std::wstring string_type;
				224	typedef double value_type;
				225	static inline value_type convert_func(const string_type::value_type* str,
				226	string_type::value_type** endptr) {
				227	return wcstod(str, endptr);
				228	}
				229	static inline bool valid_func(const string_type& str) {
				230	return !str.empty() && !iswspace(str[0]);
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	231	}
				232	};
				233
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	234	} // namespace
				235
				236
mmentovai@google.com	8ae0c2c	2008-08-14 10:25:32 +0900	[diff] [blame]	237	namespace base {
				238
				239	bool IsWprintfFormatPortable(const wchar_t* format) {
				240	for (const wchar_t* position = format; *position != '\0'; ++position) {
				241
				242	if (*position == '%') {
				243	bool in_specification = true;
				244	bool modifier_l = false;
				245	while (in_specification) {
				246	// Eat up characters until reaching a known specifier.
				247	if (*++position == '\0') {
				248	// The format string ended in the middle of a specification. Call
				249	// it portable because no unportable specifications were found. The
				250	// string is equally broken on all platforms.
				251	return true;
				252	}
				253
				254	if (*position == 'l') {
				255	// 'l' is the only thing that can save the 's' and 'c' specifiers.
				256	modifier_l = true;
				257	} else if (((position == 's' \|\| position == 'c') && !modifier_l) \|\|
				258	position == 'S' \|\| position == 'C' \|\| *position == 'F' \|\|
				259	position == 'D' \|\| position == 'O' \|\| *position == 'U') {
				260	// Not portable.
				261	return false;
				262	}
				263
				264	if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
				265	// Portable, keep scanning the rest of the format string.
				266	in_specification = false;
				267	}
				268	}
				269	}
				270
				271	}
				272
				273	return true;
				274	}
				275
				276	} // namespace base
				277
				278
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	279	const std::string& EmptyString() {
				280	return *Singleton<std::string>::get();
				281	}
				282
				283	const std::wstring& EmptyWString() {
				284	return *Singleton<std::wstring>::get();
				285	}
				286
				287	const wchar_t kWhitespaceWide[] = {
				288	0x0009, // <control-0009> to <control-000D>
				289	0x000A,
				290	0x000B,
				291	0x000C,
				292	0x000D,
				293	0x0020, // Space
				294	0x0085, // <control-0085>
				295	0x00A0, // No-Break Space
				296	0x1680, // Ogham Space Mark
				297	0x180E, // Mongolian Vowel Separator
				298	0x2000, // En Quad to Hair Space
				299	0x2001,
				300	0x2002,
				301	0x2003,
				302	0x2004,
				303	0x2005,
				304	0x2006,
				305	0x2007,
				306	0x2008,
				307	0x2009,
				308	0x200A,
				309	0x200C, // Zero Width Non-Joiner
				310	0x2028, // Line Separator
				311	0x2029, // Paragraph Separator
				312	0x202F, // Narrow No-Break Space
				313	0x205F, // Medium Mathematical Space
				314	0x3000, // Ideographic Space
				315	0
				316	};
				317	const char kWhitespaceASCII[] = {
				318	0x09, // <control-0009> to <control-000D>
				319	0x0A,
				320	0x0B,
				321	0x0C,
				322	0x0D,
				323	0x20, // Space
				324	'\x85', // <control-0085>
				325	'\xa0', // No-Break Space
				326	0
				327	};
				328	const char* const kCodepageUTF8 = "UTF-8";
				329
				330	template<typename STR>
				331	TrimPositions TrimStringT(const STR& input,
				332	const typename STR::value_type trim_chars[],
				333	TrimPositions positions,
				334	STR* output) {
				335	// Find the edges of leading/trailing whitespace as desired.
				336	const typename STR::size_type last_char = input.length() - 1;
				337	const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
				338	input.find_first_not_of(trim_chars) : 0;
				339	const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
				340	input.find_last_not_of(trim_chars) : last_char;
				341
				342	// When the string was all whitespace, report that we stripped off whitespace
				343	// from whichever position the caller was interested in. For empty input, we
				344	// stripped no whitespace, but we still need to clear \|output\|.
				345	if (input.empty() \|\|
				346	(first_good_char == STR::npos) \|\| (last_good_char == STR::npos)) {
				347	bool input_was_empty = input.empty(); // in case output == &input
				348	output->clear();
				349	return input_was_empty ? TRIM_NONE : positions;
				350	}
				351
				352	// Trim the whitespace.
				353	*output =
				354	input.substr(first_good_char, last_good_char - first_good_char + 1);
				355
				356	// Return where we trimmed from.
				357	return static_cast<TrimPositions>(
				358	((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) \|
				359	((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
				360	}
				361
				362	bool TrimString(const std::wstring& input,
evanm@google.com	6dacc03	2008-08-21 08:41:20 +0900	[diff] [blame]	363	const wchar_t trim_chars[],
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	364	std::wstring* output) {
				365	return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
				366	}
				367
				368	bool TrimString(const std::string& input,
evanm@google.com	6dacc03	2008-08-21 08:41:20 +0900	[diff] [blame]	369	const char trim_chars[],
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	370	std::string* output) {
				371	return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
				372	}
				373
				374	TrimPositions TrimWhitespace(const std::wstring& input,
				375	TrimPositions positions,
				376	std::wstring* output) {
				377	return TrimStringT(input, kWhitespaceWide, positions, output);
				378	}
				379
				380	TrimPositions TrimWhitespace(const std::string& input,
				381	TrimPositions positions,
				382	std::string* output) {
				383	return TrimStringT(input, kWhitespaceASCII, positions, output);
				384	}
				385
				386	std::wstring CollapseWhitespace(const std::wstring& text,
				387	bool trim_sequences_with_line_breaks) {
				388	std::wstring result;
				389	result.resize(text.size());
				390
				391	// Set flags to pretend we're already in a trimmed whitespace sequence, so we
				392	// will trim any leading whitespace.
				393	bool in_whitespace = true;
				394	bool already_trimmed = true;
				395
				396	int chars_written = 0;
				397	for (std::wstring::const_iterator i(text.begin()); i != text.end(); ++i) {
				398	if (IsWhitespace(*i)) {
				399	if (!in_whitespace) {
				400	// Reduce all whitespace sequences to a single space.
				401	in_whitespace = true;
				402	result[chars_written++] = L' ';
				403	}
				404	if (trim_sequences_with_line_breaks && !already_trimmed &&
				405	((i == '\n') \|\| (i == '\r'))) {
				406	// Whitespace sequences containing CR or LF are eliminated entirely.
				407	already_trimmed = true;
				408	--chars_written;
				409	}
				410	} else {
				411	// Non-whitespace chracters are copied straight across.
				412	in_whitespace = false;
				413	already_trimmed = false;
				414	result[chars_written++] = *i;
				415	}
				416	}
				417
				418	if (in_whitespace && !already_trimmed) {
				419	// Any trailing whitespace is eliminated.
				420	--chars_written;
				421	}
				422
				423	result.resize(chars_written);
				424	return result;
				425	}
				426
				427	std::string WideToASCII(const std::wstring& wide) {
				428	DCHECK(IsStringASCII(wide));
				429	return std::string(wide.begin(), wide.end());
				430	}
				431
				432	std::wstring ASCIIToWide(const std::string& ascii) {
				433	DCHECK(IsStringASCII(ascii));
				434	return std::wstring(ascii.begin(), ascii.end());
				435	}
				436
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	437	// Latin1 is just the low range of Unicode, so we can copy directly to convert.
				438	bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
				439	std::string output;
				440	output.resize(wide.size());
				441	latin1->clear();
				442	for (size_t i = 0; i < wide.size(); i++) {
				443	if (wide[i] > 255)
				444	return false;
				445	output[i] = static_cast<char>(wide[i]);
				446	}
				447	latin1->swap(output);
				448	return true;
				449	}
				450
				451	bool IsString8Bit(const std::wstring& str) {
				452	for (size_t i = 0; i < str.length(); i++) {
				453	if (str[i] > 255)
				454	return false;
				455	}
				456	return true;
				457	}
				458
				459	bool IsStringASCII(const std::wstring& str) {
				460	for (size_t i = 0; i < str.length(); i++) {
				461	if (str[i] > 0x7F)
				462	return false;
				463	}
				464	return true;
				465	}
				466
				467	bool IsStringASCII(const std::string& str) {
				468	for (size_t i = 0; i < str.length(); i++) {
				469	if (static_cast<unsigned char>(str[i]) > 0x7F)
				470	return false;
				471	}
				472	return true;
				473	}
				474
				475	// Helper functions that determine whether the given character begins a
				476	// UTF-8 sequence of bytes with the given length. A character satisfies
				477	// "IsInUTF8Sequence" if it is anything but the first byte in a multi-byte
				478	// character.
				479	static inline bool IsBegin2ByteUTF8(int c) {
				480	return (c & 0xE0) == 0xC0;
				481	}
				482	static inline bool IsBegin3ByteUTF8(int c) {
				483	return (c & 0xF0) == 0xE0;
				484	}
				485	static inline bool IsBegin4ByteUTF8(int c) {
				486	return (c & 0xF8) == 0xF0;
				487	}
				488	static inline bool IsInUTF8Sequence(int c) {
				489	return (c & 0xC0) == 0x80;
				490	}
				491
				492	// This function was copied from Mozilla, with modifications. The original code
				493	// was 'IsUTF8' in xpcom/string/src/nsReadableUtils.cpp. The license block for
				494	// this function is:
				495	// This function subject to the Mozilla Public License Version
				496	// 1.1 (the "License"); you may not use this code except in compliance with
				497	// the License. You may obtain a copy of the License at
				498	// http://www.mozilla.org/MPL/
				499	//
				500	// Software distributed under the License is distributed on an "AS IS" basis,
				501	// WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
				502	// for the specific language governing rights and limitations under the
				503	// License.
				504	//
				505	// The Original Code is mozilla.org code.
				506	//
				507	// The Initial Developer of the Original Code is
				508	// Netscape Communications Corporation.
				509	// Portions created by the Initial Developer are Copyright (C) 2000
				510	// the Initial Developer. All Rights Reserved.
				511	//
				512	// Contributor(s):
				513	// Scott Collins <scc@mozilla.org> (original author)
				514	//
				515	// This is a template so that it can be run on wide and 8-bit strings. We want
				516	// to run it on wide strings when we have input that we think may have
				517	// originally been UTF-8, but has been converted to wide characters because
				518	// that's what we (and Windows) use internally.
				519	template<typename CHAR>
				520	static bool IsStringUTF8T(const CHAR* str) {
				521	bool overlong = false;
				522	bool surrogate = false;
				523	bool nonchar = false;
				524
				525	// overlong byte upper bound
				526	typename ToUnsigned<CHAR>::Unsigned olupper = 0;
				527
				528	// surrogate byte lower bound
				529	typename ToUnsigned<CHAR>::Unsigned slower = 0;
				530
				531	// incremented when inside a multi-byte char to indicate how many bytes
				532	// are left in the sequence
				533	int positions_left = 0;
				534
				535	for (int i = 0; str[i] != 0; i++) {
				536	// This whole function assume an unsigned value so force its conversion to
				537	// an unsigned value.
				538	typename ToUnsigned<CHAR>::Unsigned c = str[i];
				539	if (c < 0x80)
				540	continue; // ASCII
				541
				542	if (c <= 0xC1) {
				543	// [80-BF] where not expected, [C0-C1] for overlong
				544	return false;
				545	} else if (IsBegin2ByteUTF8(c)) {
				546	positions_left = 1;
				547	} else if (IsBegin3ByteUTF8(c)) {
				548	positions_left = 2;
				549	if (c == 0xE0) {
				550	// to exclude E0[80-9F][80-BF]
				551	overlong = true;
				552	olupper = 0x9F;
				553	} else if (c == 0xED) {
				554	// ED[A0-BF][80-BF]: surrogate codepoint
				555	surrogate = true;
				556	slower = 0xA0;
				557	} else if (c == 0xEF) {
				558	// EF BF [BE-BF] : non-character
				559	nonchar = true;
				560	}
				561	} else if (c <= 0xF4) {
				562	positions_left = 3;
				563	nonchar = true;
				564	if (c == 0xF0) {
				565	// to exclude F0[80-8F][80-BF]{2}
				566	overlong = true;
				567	olupper = 0x8F;
				568	} else if (c == 0xF4) {
				569	// to exclude F4[90-BF][80-BF]
				570	// actually not surrogates but codepoints beyond 0x10FFFF
				571	surrogate = true;
				572	slower = 0x90;
				573	}
				574	} else {
				575	return false;
				576	}
				577
				578	// eat the rest of this multi-byte character
				579	while (positions_left) {
				580	positions_left--;
				581	i++;
				582	c = str[i];
				583	if (!c)
				584	return false; // end of string but not end of character sequence
				585
				586	// non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
dkegel@google.com	fc934f1	2008-08-22 05:29:49 +0900	[diff] [blame]	587	if (nonchar && ((!positions_left && c < 0xBE) \|\|
				588	(positions_left == 1 && c != 0xBF) \|\|
				589	(positions_left == 2 && 0x0F != (0x0F & c) ))) {
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	590	nonchar = false;
				591	}
dkegel@google.com	fc934f1	2008-08-22 05:29:49 +0900	[diff] [blame]	592	if (!IsInUTF8Sequence(c) \|\| (overlong && c <= olupper) \|\|
				593	(surrogate && slower <= c) \|\| (nonchar && !positions_left) ) {
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	594	return false;
				595	}
				596	overlong = surrogate = false;
				597	}
				598	}
				599	return true;
				600	}
				601
				602	bool IsStringUTF8(const char* str) {
				603	return IsStringUTF8T(str);
				604	}
				605
				606	bool IsStringWideUTF8(const wchar_t* str) {
				607	return IsStringUTF8T(str);
				608	}
				609
				610	template<typename Iter>
				611	static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
				612	Iter a_end,
				613	const char* b) {
				614	for (Iter it = a_begin; it != a_end; ++it, ++b) {
				615	if (!b \|\| ToLowerASCII(it) != *b)
				616	return false;
				617	}
				618	return *b == 0;
				619	}
				620
				621	// Front-ends for LowerCaseEqualsASCII.
				622	bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
				623	return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
				624	}
				625
				626	bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
				627	return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
				628	}
				629
				630	bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
				631	std::string::const_iterator a_end,
				632	const char* b) {
				633	return DoLowerCaseEqualsASCII(a_begin, a_end, b);
				634	}
				635
				636	bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
				637	std::wstring::const_iterator a_end,
				638	const char* b) {
				639	return DoLowerCaseEqualsASCII(a_begin, a_end, b);
				640	}
				641	bool LowerCaseEqualsASCII(const char* a_begin,
				642	const char* a_end,
				643	const char* b) {
				644	return DoLowerCaseEqualsASCII(a_begin, a_end, b);
				645	}
				646	bool LowerCaseEqualsASCII(const wchar_t* a_begin,
				647	const wchar_t* a_end,
				648	const char* b) {
				649	return DoLowerCaseEqualsASCII(a_begin, a_end, b);
				650	}
				651
				652	bool StartsWithASCII(const std::string& str,
				653	const std::string& search,
				654	bool case_sensitive) {
				655	if (case_sensitive)
				656	return str.compare(0, search.length(), search) == 0;
				657	else
evanm@google.com	b8a8f37	2008-08-08 04:26:37 +0900	[diff] [blame]	658	return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	659	}
				660
				661	DataUnits GetByteDisplayUnits(int64 bytes) {
				662	// The byte thresholds at which we display amounts. A byte count is displayed
				663	// in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
				664	// This must match the DataUnits enum.
				665	static const int64 kUnitThresholds[] = {
				666	0, // DATA_UNITS_BYTE,
				667	3*1024, // DATA_UNITS_KILOBYTE,
				668	210241024, // DATA_UNITS_MEGABYTE,
				669	102410241024 // DATA_UNITS_GIGABYTE,
				670	};
				671
				672	if (bytes < 0) {
				673	NOTREACHED() << "Negative bytes value";
				674	return DATA_UNITS_BYTE;
				675	}
				676
				677	int unit_index = arraysize(kUnitThresholds);
				678	while (--unit_index > 0) {
				679	if (bytes >= kUnitThresholds[unit_index])
				680	break;
				681	}
				682
				683	DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIGABYTE);
				684	return DataUnits(unit_index);
				685	}
				686
				687	// TODO(mpcomplete): deal with locale
				688	// Byte suffixes. This must match the DataUnits enum.
				689	static const wchar_t* const kByteStrings[] = {
				690	L"B",
				691	L"kB",
				692	L"MB",
				693	L"GB"
				694	};
				695
				696	static const wchar_t* const kSpeedStrings[] = {
				697	L"B/s",
				698	L"kB/s",
				699	L"MB/s",
				700	L"GB/s"
				701	};
				702
				703	std::wstring FormatBytesInternal(int64 bytes,
				704	DataUnits units,
				705	bool show_units,
				706	const wchar_t* const* suffix) {
				707	if (bytes < 0) {
				708	NOTREACHED() << "Negative bytes value";
				709	return std::wstring();
				710	}
				711
				712	DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIGABYTE);
				713
				714	// Put the quantity in the right units.
				715	double unit_amount = static_cast<double>(bytes);
				716	for (int i = 0; i < units; ++i)
				717	unit_amount /= 1024.0;
				718
				719	wchar_t tmp[64];
				720	// If the first decimal digit is 0, don't show it.
				721	double int_part;
				722	double fractional_part = modf(unit_amount, &int_part);
				723	modf(fractional_part * 10, &int_part);
evanm@google.com	b8a8f37	2008-08-08 04:26:37 +0900	[diff] [blame]	724	if (int_part == 0) {
				725	base::swprintf(tmp, arraysize(tmp),
				726	L"%lld", static_cast<int64>(unit_amount));
				727	} else {
				728	base::swprintf(tmp, arraysize(tmp), L"%.1lf", unit_amount);
				729	}
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	730
				731	std::wstring ret(tmp);
				732	if (show_units) {
				733	ret += L" ";
				734	ret += suffix[units];
				735	}
				736
				737	return ret;
				738	}
				739
				740	std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units) {
				741	return FormatBytesInternal(bytes, units, show_units, kByteStrings);
				742	}
				743
				744	std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
				745	return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
				746	}
				747
				748	template<class StringType>
				749	void DoReplaceSubstringsAfterOffset(StringType* str,
				750	typename StringType::size_type start_offset,
				751	const StringType& find_this,
				752	const StringType& replace_with) {
				753	if ((start_offset == StringType::npos) \|\| (start_offset >= str->length()))
				754	return;
				755
				756	DCHECK(!find_this.empty());
				757	for (typename StringType::size_type offs(str->find(find_this, start_offset));
				758	offs != StringType::npos; offs = str->find(find_this, offs)) {
				759	str->replace(offs, find_this.length(), replace_with);
				760	offs += replace_with.length();
				761	}
				762	}
				763
				764	void ReplaceSubstringsAfterOffset(std::wstring* str,
				765	std::wstring::size_type start_offset,
				766	const std::wstring& find_this,
				767	const std::wstring& replace_with) {
				768	DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with);
				769	}
				770
				771	void ReplaceSubstringsAfterOffset(std::string* str,
				772	std::string::size_type start_offset,
				773	const std::string& find_this,
				774	const std::string& replace_with) {
				775	DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with);
				776	}
				777
				778	// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
				779	// is the size of the buffer. These return the number of characters in the
mmentovai@google.com	38cabad	2008-08-13 10:17:18 +0900	[diff] [blame]	780	// formatted string excluding the NUL terminator. If the buffer is not
				781	// large enough to accommodate the formatted string without truncation, they
				782	// return the number of characters that would be in the fully-formatted string
				783	// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	784	inline int vsnprintfT(char* buffer,
				785	size_t buf_size,
				786	const char* format,
				787	va_list argptr) {
evanm@google.com	b8a8f37	2008-08-08 04:26:37 +0900	[diff] [blame]	788	return base::vsnprintf(buffer, buf_size, format, argptr);
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	789	}
				790
				791	inline int vsnprintfT(wchar_t* buffer,
				792	size_t buf_size,
				793	const wchar_t* format,
				794	va_list argptr) {
evanm@google.com	b8a8f37	2008-08-08 04:26:37 +0900	[diff] [blame]	795	return base::vswprintf(buffer, buf_size, format, argptr);
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	796	}
				797
				798	// Templatized backend for StringPrintF/StringAppendF. This does not finalize
				799	// the va_list, the caller is expected to do that.
				800	template <class char_type>
				801	static void StringAppendVT(
				802	std::basic_string<char_type, std::char_traits<char_type> >* dst,
				803	const char_type* format,
				804	va_list ap) {
				805
				806	// First try with a small fixed size buffer.
mmentovai@google.com	38cabad	2008-08-13 10:17:18 +0900	[diff] [blame]	807	// This buffer size should be kept in sync with StringUtilTest.GrowBoundary
				808	// and StringUtilTest.StringPrintfBounds.
				809	char_type stack_buf[1024];
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	810
mmentovai@google.com	38cabad	2008-08-13 10:17:18 +0900	[diff] [blame]	811	va_list backup_ap;
				812	base::va_copy(backup_ap, ap);
				813
				814	#if !defined(OS_WIN)
				815	errno = 0;
				816	#endif
				817	int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, backup_ap);
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	818	va_end(backup_ap);
				819
mmentovai@google.com	38cabad	2008-08-13 10:17:18 +0900	[diff] [blame]	820	if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) {
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	821	// It fit.
				822	dst->append(stack_buf, result);
				823	return;
				824	}
				825
mmentovai@google.com	38cabad	2008-08-13 10:17:18 +0900	[diff] [blame]	826	// Repeatedly increase buffer size until it fits.
				827	int mem_length = arraysize(stack_buf);
				828	while (true) {
				829	if (result < 0) {
				830	#if !defined(OS_WIN)
				831	// On Windows, vsnprintfT always returns the number of characters in a
				832	// fully-formatted string, so if we reach this point, something else is
				833	// wrong and no amount of buffer-doubling is going to fix it.
				834	if (errno != 0 && errno != EOVERFLOW)
				835	#endif
				836	{
				837	// If an error other than overflow occurred, it's never going to work.
				838	DLOG(WARNING) << "Unable to printf the requested string due to error.";
				839	return;
				840	}
				841	// Try doubling the buffer size.
				842	mem_length *= 2;
				843	} else {
				844	// We need exactly "result + 1" characters.
				845	mem_length = result + 1;
				846	}
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	847
mmentovai@google.com	38cabad	2008-08-13 10:17:18 +0900	[diff] [blame]	848	if (mem_length > 32 * 1024 * 1024) {
				849	// That should be plenty, don't try anything larger. This protects
				850	// against huge allocations when using vsnprintfT implementations that
				851	// return -1 for reasons other than overflow without setting errno.
				852	DLOG(WARNING) << "Unable to printf the requested string due to size.";
				853	return;
				854	}
				855
				856	std::vector<char_type> mem_buf(mem_length);
				857
				858	// Restore the va_list before we use it again.
				859	base::va_copy(backup_ap, ap);
				860
				861	result = vsnprintfT(&mem_buf[0], mem_length, format, ap);
				862	va_end(backup_ap);
				863
				864	if ((result >= 0) && (result < mem_length)) {
				865	// It fit.
				866	dst->append(&mem_buf[0], result);
				867	return;
				868	}
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	869	}
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	870	}
				871
deanm@google.com	d649845	2008-08-13 20:09:33 +0900	[diff] [blame]	872	namespace {
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	873
deanm@google.com	d649845	2008-08-13 20:09:33 +0900	[diff] [blame]	874	template <typename STR, typename INT, typename UINT, bool NEG>
				875	struct IntToStringT {
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	876
deanm@google.com	d649845	2008-08-13 20:09:33 +0900	[diff] [blame]	877	// This is to avoid a compiler warning about unary minus on unsigned type.
				878	// For example, say you had the following code:
				879	// template <typename INT>
				880	// INT abs(INT value) { return value < 0 ? -value : value; }
				881	// Even though if INT is unsigned, it's impossible for value < 0, so the
				882	// unary minus will never be taken, the compiler will still generate a
				883	// warning. We do a little specialization dance...
				884	template <typename INT2, typename UINT2, bool NEG2>
				885	struct ToUnsignedT { };
				886
				887	template <typename INT2, typename UINT2>
				888	struct ToUnsignedT<INT2, UINT2, false> {
				889	static UINT2 ToUnsigned(INT2 value) {
				890	return static_cast<UINT2>(value);
				891	}
				892	};
				893
				894	template <typename INT2, typename UINT2>
				895	struct ToUnsignedT<INT2, UINT2, true> {
				896	static UINT2 ToUnsigned(INT2 value) {
				897	return static_cast<UINT2>(value < 0 ? -value : value);
				898	}
				899	};
				900
				901	static STR IntToString(INT value) {
				902	// log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
				903	// So round up to allocate 3 output characters per byte, plus 1 for '-'.
				904	const int kOutputBufSize = 3 * sizeof(INT) + 1;
				905
				906	// Allocate the whole string right away, we will right back to front, and
				907	// then return the substr of what we ended up using.
				908	STR outbuf(kOutputBufSize, 0);
				909
				910	bool is_neg = value < 0;
				911	// Even though is_neg will never be true when INT is parameterized as
				912	// unsigned, even the presence of the unary operation causes a warning.
				913	UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
				914
				915	for (typename STR::iterator it = outbuf.end();;) {
				916	--it;
				917	DCHECK(it != outbuf.begin());
				918	*it = static_cast<typename STR::value_type>((res % 10) + '0');
				919	res /= 10;
				920
				921	// We're done..
				922	if (res == 0) {
				923	if (is_neg) {
				924	--it;
				925	DCHECK(it != outbuf.begin());
				926	*it = static_cast<typename STR::value_type>('-');
				927	}
				928	return STR(it, outbuf.end());
				929	}
				930	}
				931	NOTREACHED();
				932	return STR();
				933	}
				934	};
				935
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	936	}
				937
				938	std::string IntToString(int value) {
deanm@google.com	d649845	2008-08-13 20:09:33 +0900	[diff] [blame]	939	return IntToStringT<std::string, int, unsigned int, true>::
				940	IntToString(value);
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	941	}
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	942	std::wstring IntToWString(int value) {
deanm@google.com	d649845	2008-08-13 20:09:33 +0900	[diff] [blame]	943	return IntToStringT<std::wstring, int, unsigned int, true>::
				944	IntToString(value);
				945	}
				946	std::string UintToString(unsigned int value) {
				947	return IntToStringT<std::string, unsigned int, unsigned int, false>::
				948	IntToString(value);
				949	}
				950	std::wstring UintToWString(unsigned int value) {
				951	return IntToStringT<std::wstring, unsigned int, unsigned int, false>::
				952	IntToString(value);
				953	}
				954	std::string Int64ToString(int64 value) {
				955	return IntToStringT<std::string, int64, uint64, true>::
				956	IntToString(value);
				957	}
				958	std::wstring Int64ToWString(int64 value) {
				959	return IntToStringT<std::wstring, int64, uint64, true>::
				960	IntToString(value);
				961	}
				962	std::string Uint64ToString(uint64 value) {
				963	return IntToStringT<std::string, uint64, uint64, false>::
				964	IntToString(value);
				965	}
				966	std::wstring Uint64ToWString(uint64 value) {
				967	return IntToStringT<std::wstring, uint64, uint64, false>::
				968	IntToString(value);
initial.commit	3f4a732	2008-07-27 06:49:38 +0900	[diff] [blame]	969	}
				970
				971	inline void StringAppendV(std::string* dst, const char* format, va_list ap) {
				972	StringAppendVT<char>(dst, format, ap);
				973	}
				974
				975	inline void StringAppendV(std::wstring* dst,
				976	const wchar_t* format,
				977	va_list ap) {
				978	StringAppendVT<wchar_t>(dst, format, ap);
				979	}
				980
				981	std::string StringPrintf(const char* format, ...) {
				982	va_list ap;
				983	va_start(ap, format);
				984	std::string result;
				985	StringAppendV(&result, format, ap);
				986	va_end(ap);
				987	return result;
				988	}
				989
				990	std::wstring StringPrintf(const wchar_t* format, ...) {
				991	va_list ap;
				992	va_start(ap, format);
				993	std::wstring result;
				994	StringAppendV(&result, format, ap);
				995	va_end(ap);
				996	return result;
				997	}
				998
				999	const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
				1000	va_list ap;
				1001	va_start(ap, format);
				1002	dst->clear();
				1003	StringAppendV(dst, format, ap);
				1004	va_end(ap);
				1005	return *dst;
				1006	}
				1007
				1008	const std::wstring& SStringPrintf(std::wstring* dst,
				1009	const wchar_t* format, ...) {
				1010	va_list ap;
				1011	va_start(ap, format);
				1012	dst->clear();
				1013	StringAppendV(dst, format, ap);
				1014	va_end(ap);
				1015	return *dst;
				1016	}
				1017
				1018	void StringAppendF(std::string* dst, const char* format, ...) {
				1019	va_list ap;
				1020	va_start(ap, format);
				1021	StringAppendV(dst, format, ap);
				1022	va_end(ap);
				1023	}
				1024
				1025	void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
				1026	va_list ap;
				1027	va_start(ap, format);
				1028	StringAppendV(dst, format, ap);
				1029	va_end(ap);
				1030	}
				1031
				1032	template<typename STR>
				1033	static void SplitStringT(const STR& str,
				1034	const typename STR::value_type s,
				1035	bool trim_whitespace,
				1036	std::vector<STR>* r) {
				1037	size_t last = 0;
				1038	size_t i;
				1039	size_t c = str.size();
				1040	for (i = 0; i <= c; ++i) {
				1041	if (i == c \|\| str[i] == s) {
				1042	size_t len = i - last;
				1043	STR tmp = str.substr(last, len);
				1044	if (trim_whitespace) {
				1045	STR t_tmp;
				1046	TrimWhitespace(tmp, TRIM_ALL, &t_tmp);
				1047	r->push_back(t_tmp);
				1048	} else {
				1049	r->push_back(tmp);
				1050	}
				1051	last = i + 1;
				1052	}
				1053	}
				1054	}
				1055
				1056	void SplitString(const std::wstring& str,
				1057	wchar_t s,
				1058	std::vector<std::wstring>* r) {
				1059	SplitStringT(str, s, true, r);
				1060	}
				1061
				1062	void SplitString(const std::string& str,
				1063	char s,
				1064	std::vector<std::string>* r) {
				1065	SplitStringT(str, s, true, r);
				1066	}
				1067
				1068	void SplitStringDontTrim(const std::wstring& str,
				1069	wchar_t s,
				1070	std::vector<std::wstring>* r) {
				1071	SplitStringT(str, s, false, r);
				1072	}
				1073
				1074	void SplitStringDontTrim(const std::string& str,
				1075	char s,
				1076	std::vector<std::string>* r) {
				1077	SplitStringT(str, s, false, r);
				1078	}
				1079
				1080	void SplitStringAlongWhitespace(const std::wstring& str,
				1081	std::vector<std::wstring>* result) {
				1082	const size_t length = str.length();
				1083	if (!length)
				1084	return;
				1085
				1086	bool last_was_ws = false;
				1087	size_t last_non_ws_start = 0;
				1088	for (size_t i = 0; i < length; ++i) {
				1089	switch(str[i]) {
				1090	// HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
				1091	case L' ':
				1092	case L'\t':
				1093	case L'\xA':
				1094	case L'\xB':
				1095	case L'\xC':
				1096	case L'\xD':
				1097	if (!last_was_ws) {
				1098	if (i > 0) {
				1099	result->push_back(
				1100	str.substr(last_non_ws_start, i - last_non_ws_start));
				1101	}
				1102	last_was_ws = true;
				1103	}
				1104	break;
				1105
				1106	default: // Not a space character.
				1107	if (last_was_ws) {
				1108	last_was_ws = false;
				1109	last_non_ws_start = i;
				1110	}
				1111	break;
				1112	}
				1113	}
				1114	if (!last_was_ws) {
				1115	result->push_back(
				1116	str.substr(last_non_ws_start, length - last_non_ws_start));
				1117	}
				1118	}
				1119
				1120	std::wstring ReplaceStringPlaceholders(const std::wstring& format_string,
				1121	const std::wstring& a,
				1122	size_t* offset) {
				1123	std::vector<size_t> offsets;
				1124	std::wstring result = ReplaceStringPlaceholders(format_string, a,
				1125	std::wstring(),
				1126	std::wstring(),
				1127	std::wstring(), &offsets);
				1128	DCHECK(offsets.size() == 1);
				1129	if (offset) {
				1130	*offset = offsets[0];
				1131	}
				1132	return result;
				1133	}
				1134
				1135	std::wstring ReplaceStringPlaceholders(const std::wstring& format_string,
				1136	const std::wstring& a,
				1137	const std::wstring& b,
				1138	std::vector<size_t>* offsets) {
				1139	return ReplaceStringPlaceholders(format_string, a, b, std::wstring(),
				1140	std::wstring(), offsets);
				1141	}
				1142
				1143	std::wstring ReplaceStringPlaceholders(const std::wstring& format_string,
				1144	const std::wstring& a,
				1145	const std::wstring& b,
				1146	const std::wstring& c,
				1147	std::vector<size_t>* offsets) {
				1148	return ReplaceStringPlaceholders(format_string, a, b, c, std::wstring(),
				1149	offsets);
				1150	}
				1151
				1152	std::wstring ReplaceStringPlaceholders(const std::wstring& format_string,
				1153	const std::wstring& a,
				1154	const std::wstring& b,
				1155	const std::wstring& c,
				1156	const std::wstring& d,
				1157	std::vector<size_t>* offsets) {
				1158	// We currently only support up to 4 place holders ($1 through $4), although
				1159	// it's easy enough to add more.
				1160	const std::wstring* subst_texts[] = { &a, &b, &c, &d };
				1161
				1162	std::wstring formatted;
				1163	formatted.reserve(format_string.length() + a.length() +
				1164	b.length() + c.length() + d.length());
				1165
				1166	std::vector<ReplacementOffset> r_offsets;
				1167
				1168	// Replace $$ with $ and $1-$4 with placeholder text if it exists.
				1169	for (std::wstring::const_iterator i = format_string.begin();
				1170	i != format_string.end(); ++i) {
				1171	if ('$' == *i) {
				1172	if (i + 1 != format_string.end()) {
				1173	++i;
				1174	DCHECK('$' == i \|\| ('1' <= i && *i <= '4')) <<
				1175	"Invalid placeholder: " << *i;
				1176	if ('$' == *i) {
				1177	formatted.push_back('$');
				1178	} else {
				1179	int index = *i - '1';
				1180	if (offsets) {
				1181	ReplacementOffset r_offset(index,
				1182	static_cast<int>(formatted.size()));
				1183	r_offsets.insert(std::lower_bound(r_offsets.begin(),
				1184	r_offsets.end(), r_offset,
				1185	&CompareParameter),
				1186	r_offset);
				1187	}
				1188	formatted.append(*subst_texts[index]);
				1189	}
				1190	}
				1191	} else {
				1192	formatted.push_back(*i);
				1193	}
				1194	}
				1195	if (offsets) {
				1196	for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
				1197	i != r_offsets.end(); ++i) {
				1198	offsets->push_back(i->offset);
				1199	}
				1200	}
				1201	return formatted;
				1202	}
				1203
				1204	template <class CHAR>
				1205	static bool IsWildcard(CHAR character) {
				1206	return character == '*' \|\| character == '?';
				1207	}
				1208
				1209	// Move the strings pointers to the point where they start to differ.
				1210	template <class CHAR>
				1211	static void EatSameChars(const CHAR pattern, const CHAR string) {
				1212	bool escaped = false;
				1213	while (pattern && string) {
				1214	if (!escaped && IsWildcard(**pattern)) {
				1215	// We don't want to match wildcard here, except if it's escaped.
				1216	return;
				1217	}
				1218
				1219	// Check if the escapement char is found. If so, skip it and move to the
				1220	// next character.
				1221	if (!escaped && **pattern == L'\\') {
				1222	escaped = true;
				1223	(*pattern)++;
				1224	continue;
				1225	}
				1226
				1227	// Check if the chars match, if so, increment the ptrs.
				1228	if (pattern == string) {
				1229	(*pattern)++;
				1230	(*string)++;
				1231	} else {
				1232	// Uh ho, it did not match, we are done. If the last char was an
				1233	// escapement, that means that it was an error to advance the ptr here,
				1234	// let's put it back where it was. This also mean that the MatchPattern
				1235	// function will return false because if we can't match an escape char
				1236	// here, then no one will.
				1237	if (escaped) {
				1238	(*pattern)--;
				1239	}
				1240	return;
				1241	}
				1242
				1243	escaped = false;
				1244	}
				1245	}
				1246
				1247	template <class CHAR>
				1248	static void EatWildcard(const CHAR** pattern) {
				1249	while(**pattern) {
				1250	if (!IsWildcard(**pattern))
				1251	return;
				1252	(*pattern)++;
				1253	}
				1254	}
				1255
				1256	template <class CHAR>
				1257	static bool MatchPatternT(const CHAR* eval, const CHAR* pattern) {
				1258	// Eat all the matching chars.
				1259	EatSameChars(&pattern, &eval);
				1260
				1261	// If the string is empty, then the pattern must be empty too, or contains
				1262	// only wildcards.
				1263	if (*eval == 0) {
				1264	EatWildcard(&pattern);
				1265	if (*pattern)
				1266	return false;
				1267	return true;
				1268	}
				1269
				1270	// Pattern is empty but not string, this is not a match.
				1271	if (*pattern == 0)
				1272	return false;
				1273
				1274	// If this is a question mark, then we need to compare the rest with
				1275	// the current string or the string with one character eaten.
				1276	if (pattern[0] == '?') {
				1277	if (MatchPatternT(eval, pattern + 1) \|\|
				1278	MatchPatternT(eval + 1, pattern + 1))
				1279	return true;
				1280	}
				1281
				1282	// This is a *, try to match all the possible substrings with the remainder
				1283	// of the pattern.
				1284	if (pattern[0] == '*') {
				1285	while (*eval) {
				1286	if (MatchPatternT(eval, pattern + 1))
				1287	return true;
				1288	eval++;
				1289	}
				1290
				1291	// We reached the end of the string, let see if the pattern contains only
				1292	// wildcards.
				1293	if (*eval == 0) {
				1294	EatWildcard(&pattern);
				1295	if (*pattern)
				1296	return false;
				1297	return true;
				1298	}
				1299	}
				1300
				1301	return false;
				1302	}
				1303
				1304	bool MatchPattern(const std::wstring& eval, const std::wstring& pattern) {
				1305	return MatchPatternT(eval.c_str(), pattern.c_str());
				1306	}
				1307
				1308	bool MatchPattern(const std::string& eval, const std::string& pattern) {
				1309	return MatchPatternT(eval.c_str(), pattern.c_str());
				1310	}
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	1311
				1312	// For the various ToInt conversions, there are no ToIntTraits classes to use
				1313	// because there's no such thing as strtoi. Use *ToLongTraits through a cast
				1314	// instead, requiring that long and int are compatible and equal-width. They
				1315	// are on our target platforms.
				1316
				1317	bool StringToInt(const std::string& input, int* output) {
				1318	DCHECK(sizeof(int) == sizeof(long));
				1319	return StringToNumber<StringToLongTraits>(input,
				1320	reinterpret_cast<long*>(output));
				1321	}
				1322
				1323	bool StringToInt(const std::wstring& input, int* output) {
				1324	DCHECK(sizeof(int) == sizeof(long));
				1325	return StringToNumber<WStringToLongTraits>(input,
				1326	reinterpret_cast<long*>(output));
				1327	}
				1328
				1329	bool StringToInt64(const std::string& input, int64* output) {
				1330	return StringToNumber<StringToInt64Traits>(input, output);
				1331	}
				1332
				1333	bool StringToInt64(const std::wstring& input, int64* output) {
				1334	return StringToNumber<WStringToInt64Traits>(input, output);
				1335	}
				1336
				1337	bool HexStringToInt(const std::string& input, int* output) {
				1338	DCHECK(sizeof(int) == sizeof(long));
				1339	return StringToNumber<HexStringToLongTraits>(input,
				1340	reinterpret_cast<long*>(output));
				1341	}
				1342
				1343	bool HexStringToInt(const std::wstring& input, int* output) {
				1344	DCHECK(sizeof(int) == sizeof(long));
				1345	return StringToNumber<HexWStringToLongTraits>(
				1346	input, reinterpret_cast<long*>(output));
				1347	}
				1348
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	1349	bool StringToDouble(const std::string& input, double* output) {
				1350	return StringToNumber<StringToDoubleTraits>(input, output);
				1351	}
				1352
				1353	bool StringToDouble(const std::wstring& input, double* output) {
				1354	return StringToNumber<WStringToDoubleTraits>(input, output);
				1355	}
				1356
mmentovai@google.com	9328568	2008-08-06 07:46:15 +0900	[diff] [blame]	1357	int StringToInt(const std::string& value) {
				1358	int result;
				1359	StringToInt(value, &result);
				1360	return result;
				1361	}
				1362
				1363	int StringToInt(const std::wstring& value) {
				1364	int result;
				1365	StringToInt(value, &result);
				1366	return result;
				1367	}
				1368
				1369	int64 StringToInt64(const std::string& value) {
				1370	int64 result;
				1371	StringToInt64(value, &result);
				1372	return result;
				1373	}
				1374
				1375	int64 StringToInt64(const std::wstring& value) {
				1376	int64 result;
				1377	StringToInt64(value, &result);
				1378	return result;
				1379	}
				1380
				1381	int HexStringToInt(const std::string& value) {
				1382	int result;
				1383	HexStringToInt(value, &result);
				1384	return result;
				1385	}
				1386
				1387	int HexStringToInt(const std::wstring& value) {
				1388	int result;
				1389	HexStringToInt(value, &result);
				1390	return result;
				1391	}
mmentovai@google.com	8dcf71c	2008-08-08 02:15:41 +0900	[diff] [blame]	1392
				1393	double StringToDouble(const std::string& value) {
				1394	double result;
				1395	StringToDouble(value, &result);
				1396	return result;
				1397	}
				1398
				1399	double StringToDouble(const std::wstring& value) {
				1400	double result;
				1401	StringToDouble(value, &result);
				1402	return result;
				1403	}
deanm@google.com	b533521	2008-08-13 23:33:40 +0900	[diff] [blame]	1404
				1405	// The following code is compatible with the OpenBSD lcpy interface. See:
				1406	// http://www.gratisoft.us/todd/papers/strlcpy.html
				1407	// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
				1408
				1409	namespace {
				1410
				1411	template <typename CHAR>
				1412	size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
				1413	for (size_t i = 0; i < dst_size; ++i) {
				1414	if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
				1415	return i;
				1416	}
				1417
				1418	// We were left off at dst_size. We over copied 1 byte. Null terminate.
				1419	if (dst_size != 0)
				1420	dst[dst_size - 1] = 0;
				1421
				1422	// Count the rest of the \|src\|, and return it's length in characters.
				1423	while (src[dst_size]) ++dst_size;
				1424	return dst_size;
				1425	}
				1426
				1427	} // namespace
				1428
				1429	size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
				1430	return lcpyT<char>(dst, src, dst_size);
				1431	}
				1432	size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
				1433	return lcpyT<wchar_t>(dst, src, dst_size);
				1434	}
license.bot	f003cfe	2008-08-24 09:55:55 +0900	[diff] [blame^]	1435