Blame - src/google/protobuf/stubs/strutil.cc - platform/external/protobuf-javalite

blob: 7955d261130d984662e7680dbb8b7af49f573714 [file] [log] [blame]

temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1	// Protocol Buffers - Google's data interchange format
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	2	// Copyright 2008 Google Inc. All rights reserved.
Feng Xiao	e428862	2014-10-01 16:26:23 -0700	[diff] [blame]	3	// https://developers.google.com/protocol-buffers/
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	4	//
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are
				7	// met:
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	8	//
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	9	// * Redistributions of source code must retain the above copyright
				10	// notice, this list of conditions and the following disclaimer.
				11	// * Redistributions in binary form must reproduce the above
				12	// copyright notice, this list of conditions and the following disclaimer
				13	// in the documentation and/or other materials provided with the
				14	// distribution.
				15	// * Neither the name of Google Inc. nor the names of its
				16	// contributors may be used to endorse or promote products derived from
				17	// this software without specific prior written permission.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	18	//
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	30
				31	// from google3/strings/strutil.cc
				32
				33	#include <google/protobuf/stubs/strutil.h>
				34	#include <errno.h>
				35	#include <float.h> // FLT_DIG and DBL_DIG
				36	#include <limits>
				37	#include <limits.h>
kenton@google.com	25bc5cd	2008-12-04 20:34:50 +0000	[diff] [blame]	38	#include <stdio.h>
kenton@google.com	c91e1fe	2009-10-12 19:22:03 +0000	[diff] [blame]	39	#include <iterator>
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	40
				41	#ifdef _WIN32
				42	// MSVC has only _snprintf, not snprintf.
				43	//
				44	// MinGW has both snprintf and _snprintf, but they appear to be different
				45	// functions. The former is buggy. When invoked like so:
				46	// char buffer[32];
				47	// snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
				48	// it prints "1.23000e+10". This is plainly wrong: %g should never print
				49	// trailing zeros after the decimal point. For some reason this bug only
				50	// occurs with some input values, not all. In any case, _snprintf does the
				51	// right thing, so we use it.
				52	#define snprintf _snprintf
				53	#endif
				54
				55	namespace google {
				56	namespace protobuf {
				57
				58	inline bool IsNaN(double value) {
				59	// NaN is never equal to anything, even itself.
				60	return value != value;
				61	}
				62
kenton@google.com	a2a32c2	2008-11-14 17:29:32 +0000	[diff] [blame]	63	// These are defined as macros on some platforms. #undef them so that we can
				64	// redefine them.
				65	#undef isxdigit
				66	#undef isprint
				67
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	68	// The definitions of these in ctype.h change based on locale. Since our
				69	// string manipulation is all in relation to the protocol buffer and C++
				70	// languages, we always want to use the C locale. So, we re-define these
				71	// exactly as we want them.
kenton@google.com	a2a32c2	2008-11-14 17:29:32 +0000	[diff] [blame]	72	inline bool isxdigit(char c) {
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	73	return ('0' <= c && c <= '9') \|\|
				74	('a' <= c && c <= 'f') \|\|
				75	('A' <= c && c <= 'F');
				76	}
				77
kenton@google.com	a2a32c2	2008-11-14 17:29:32 +0000	[diff] [blame]	78	inline bool isprint(char c) {
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	79	return c >= 0x20 && c <= 0x7E;
				80	}
				81
				82	// ----------------------------------------------------------------------
				83	// StripString
				84	// Replaces any occurrence of the character 'remove' (or the characters
				85	// in 'remove') with the character 'replacewith'.
				86	// ----------------------------------------------------------------------
				87	void StripString(string* s, const char* remove, char replacewith) {
				88	const char * str_start = s->c_str();
				89	const char * str = str_start;
				90	for (str = strpbrk(str, remove);
				91	str != NULL;
				92	str = strpbrk(str + 1, remove)) {
				93	(*s)[str - str_start] = replacewith;
				94	}
				95	}
				96
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	97	void StripWhitespace(string* str) {
				98	int str_length = str->length();
				99
				100	// Strip off leading whitespace.
				101	int first = 0;
				102	while (first < str_length && ascii_isspace(str->at(first))) {
				103	++first;
				104	}
				105	// If entire string is white space.
				106	if (first == str_length) {
				107	str->clear();
				108	return;
				109	}
				110	if (first > 0) {
				111	str->erase(0, first);
				112	str_length -= first;
				113	}
				114
				115	// Strip off trailing whitespace.
				116	int last = str_length - 1;
				117	while (last >= 0 && ascii_isspace(str->at(last))) {
				118	--last;
				119	}
				120	if (last != (str_length - 1) && last >= 0) {
				121	str->erase(last + 1, string::npos);
				122	}
				123	}
				124
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	125	// ----------------------------------------------------------------------
				126	// StringReplace()
				127	// Replace the "old" pattern with the "new" pattern in a string,
				128	// and append the result to "res". If replace_all is false,
				129	// it only replaces the first instance of "old."
				130	// ----------------------------------------------------------------------
				131
				132	void StringReplace(const string& s, const string& oldsub,
				133	const string& newsub, bool replace_all,
				134	string* res) {
				135	if (oldsub.empty()) {
				136	res->append(s); // if empty, append the given string.
				137	return;
				138	}
				139
				140	string::size_type start_pos = 0;
				141	string::size_type pos;
				142	do {
				143	pos = s.find(oldsub, start_pos);
				144	if (pos == string::npos) {
				145	break;
				146	}
				147	res->append(s, start_pos, pos - start_pos);
				148	res->append(newsub);
				149	start_pos = pos + oldsub.size(); // start searching again after the "old"
				150	} while (replace_all);
				151	res->append(s, start_pos, s.length() - start_pos);
				152	}
				153
				154	// ----------------------------------------------------------------------
				155	// StringReplace()
				156	// Give me a string and two patterns "old" and "new", and I replace
				157	// the first instance of "old" in the string with "new", if it
				158	// exists. If "global" is true; call this repeatedly until it
				159	// fails. RETURN a new string, regardless of whether the replacement
				160	// happened or not.
				161	// ----------------------------------------------------------------------
				162
				163	string StringReplace(const string& s, const string& oldsub,
				164	const string& newsub, bool replace_all) {
				165	string ret;
				166	StringReplace(s, oldsub, newsub, replace_all, &ret);
				167	return ret;
				168	}
				169
				170	// ----------------------------------------------------------------------
				171	// SplitStringUsing()
				172	// Split a string using a character delimiter. Append the components
				173	// to 'result'.
				174	//
				175	// Note: For multi-character delimiters, this routine will split on ANY of
				176	// the characters in the string, not the entire string as a single delimiter.
				177	// ----------------------------------------------------------------------
				178	template <typename ITR>
				179	static inline
				180	void SplitStringToIteratorUsing(const string& full,
				181	const char* delim,
				182	ITR& result) {
				183	// Optimize the common case where delim is a single character.
				184	if (delim[0] != '\0' && delim[1] == '\0') {
				185	char c = delim[0];
				186	const char* p = full.data();
				187	const char* end = p + full.size();
				188	while (p != end) {
				189	if (*p == c) {
				190	++p;
				191	} else {
				192	const char* start = p;
				193	while (++p != end && *p != c);
				194	*result++ = string(start, p - start);
				195	}
				196	}
				197	return;
				198	}
				199
				200	string::size_type begin_index, end_index;
				201	begin_index = full.find_first_not_of(delim);
				202	while (begin_index != string::npos) {
				203	end_index = full.find_first_of(delim, begin_index);
				204	if (end_index == string::npos) {
				205	*result++ = full.substr(begin_index);
				206	return;
				207	}
				208	*result++ = full.substr(begin_index, (end_index - begin_index));
				209	begin_index = full.find_first_not_of(delim, end_index);
				210	}
				211	}
				212
				213	void SplitStringUsing(const string& full,
				214	const char* delim,
				215	vector<string>* result) {
				216	back_insert_iterator< vector<string> > it(*result);
				217	SplitStringToIteratorUsing(full, delim, it);
				218	}
				219
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	220	// Split a string using a character delimiter. Append the components
				221	// to 'result'. If there are consecutive delimiters, this function
				222	// will return corresponding empty strings. The string is split into
				223	// at most the specified number of pieces greedily. This means that the
				224	// last piece may possibly be split further. To split into as many pieces
				225	// as possible, specify 0 as the number of pieces.
				226	//
				227	// If "full" is the empty string, yields an empty string as the only value.
				228	//
				229	// If "pieces" is negative for some reason, it returns the whole string
				230	// ----------------------------------------------------------------------
				231	template <typename StringType, typename ITR>
				232	static inline
				233	void SplitStringToIteratorAllowEmpty(const StringType& full,
				234	const char* delim,
				235	int pieces,
				236	ITR& result) {
				237	string::size_type begin_index, end_index;
				238	begin_index = 0;
				239
				240	for (int i = 0; (i < pieces-1) \|\| (pieces == 0); i++) {
				241	end_index = full.find_first_of(delim, begin_index);
				242	if (end_index == string::npos) {
				243	*result++ = full.substr(begin_index);
				244	return;
				245	}
				246	*result++ = full.substr(begin_index, (end_index - begin_index));
				247	begin_index = end_index + 1;
				248	}
				249	*result++ = full.substr(begin_index);
				250	}
				251
				252	void SplitStringAllowEmpty(const string& full, const char* delim,
				253	vector<string>* result) {
				254	back_insert_iterator<vector<string> > it(*result);
				255	SplitStringToIteratorAllowEmpty(full, delim, 0, it);
				256	}
				257
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	258	// ----------------------------------------------------------------------
				259	// JoinStrings()
				260	// This merges a vector of string components with delim inserted
				261	// as separaters between components.
				262	//
				263	// ----------------------------------------------------------------------
				264	template <class ITERATOR>
				265	static void JoinStringsIterator(const ITERATOR& start,
				266	const ITERATOR& end,
				267	const char* delim,
				268	string* result) {
				269	GOOGLE_CHECK(result != NULL);
				270	result->clear();
				271	int delim_length = strlen(delim);
				272
				273	// Precompute resulting length so we can reserve() memory in one shot.
				274	int length = 0;
				275	for (ITERATOR iter = start; iter != end; ++iter) {
				276	if (iter != start) {
				277	length += delim_length;
				278	}
				279	length += iter->size();
				280	}
				281	result->reserve(length);
				282
				283	// Now combine everything.
				284	for (ITERATOR iter = start; iter != end; ++iter) {
				285	if (iter != start) {
				286	result->append(delim, delim_length);
				287	}
				288	result->append(iter->data(), iter->size());
				289	}
				290	}
				291
				292	void JoinStrings(const vector<string>& components,
				293	const char* delim,
				294	string * result) {
				295	JoinStringsIterator(components.begin(), components.end(), delim, result);
				296	}
				297
				298	// ----------------------------------------------------------------------
				299	// UnescapeCEscapeSequences()
				300	// This does all the unescaping that C does: \ooo, \r, \n, etc
				301	// Returns length of resulting string.
				302	// The implementation of \x parses any positive number of hex digits,
				303	// but it is an error if the value requires more than 8 bits, and the
				304	// result is truncated to 8 bits.
				305	//
				306	// The second call stores its errors in a supplied string vector.
				307	// If the string vector pointer is NULL, it reports the errors with LOG().
				308	// ----------------------------------------------------------------------
				309
				310	#define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
				311
				312	inline int hex_digit_to_int(char c) {
				313	/* Assume ASCII. */
				314	assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);
				315	assert(isxdigit(c));
				316	int x = static_cast<unsigned char>(c);
				317	if (x > '9') {
				318	x += 9;
				319	}
				320	return x & 0xf;
				321	}
				322
				323	// Protocol buffers doesn't ever care about errors, but I don't want to remove
				324	// the code.
				325	#define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
				326
				327	int UnescapeCEscapeSequences(const char* source, char* dest) {
				328	return UnescapeCEscapeSequences(source, dest, NULL);
				329	}
				330
				331	int UnescapeCEscapeSequences(const char* source, char* dest,
				332	vector<string> *errors) {
				333	GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";
				334
				335	char* d = dest;
				336	const char* p = source;
				337
				338	// Small optimization for case where source = dest and there's no escaping
				339	while ( p == d && p != '\0' && p != '\\' )
				340	p++, d++;
				341
				342	while (*p != '\0') {
				343	if (*p != '\\') {
				344	d++ = p++;
				345	} else {
				346	switch ( *++p ) { // skip past the '\\'
				347	case '\0':
				348	LOG_STRING(ERROR, errors) << "String cannot end with \\";
				349	*d = '\0';
				350	return d - dest; // we're done with p
				351	case 'a': *d++ = '\a'; break;
				352	case 'b': *d++ = '\b'; break;
				353	case 'f': *d++ = '\f'; break;
				354	case 'n': *d++ = '\n'; break;
				355	case 'r': *d++ = '\r'; break;
				356	case 't': *d++ = '\t'; break;
				357	case 'v': *d++ = '\v'; break;
				358	case '\\': *d++ = '\\'; break;
				359	case '?': *d++ = '\?'; break; // \? Who knew?
				360	case '\'': *d++ = '\''; break;
				361	case '"': *d++ = '\"'; break;
				362	case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits
				363	case '4': case '5': case '6': case '7': {
				364	char ch = *p - '0';
				365	if ( IS_OCTAL_DIGIT(p[1]) )
				366	ch = ch * 8 + *++p - '0';
				367	if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice
				368	ch = ch * 8 + *++p - '0'; // now points at last digit
				369	*d++ = ch;
				370	break;
				371	}
				372	case 'x': case 'X': {
				373	if (!isxdigit(p[1])) {
				374	if (p[1] == '\0') {
				375	LOG_STRING(ERROR, errors) << "String cannot end with \\x";
				376	} else {
				377	LOG_STRING(ERROR, errors) <<
				378	"\\x cannot be followed by non-hex digit: \\" << *p << p[1];
				379	}
				380	break;
				381	}
				382	unsigned int ch = 0;
				383	const char *hex_start = p;
				384	while (isxdigit(p[1])) // arbitrarily many hex digits
				385	ch = (ch << 4) + hex_digit_to_int(*++p);
				386	if (ch > 0xFF)
				387	LOG_STRING(ERROR, errors) << "Value of " <<
				388	"\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
				389	*d++ = ch;
				390	break;
				391	}
				392	#if 0 // TODO(kenton): Support \u and \U? Requires runetochar().
				393	case 'u': {
				394	// \uhhhh => convert 4 hex digits to UTF-8
				395	char32 rune = 0;
				396	const char *hex_start = p;
				397	for (int i = 0; i < 4; ++i) {
				398	if (isxdigit(p[1])) { // Look one char ahead.
				399	rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
				400	} else {
				401	LOG_STRING(ERROR, errors)
				402	<< "\\u must be followed by 4 hex digits: \\"
				403	<< string(hex_start, p+1-hex_start);
				404	break;
				405	}
				406	}
				407	d += runetochar(d, &rune);
				408	break;
				409	}
				410	case 'U': {
				411	// \Uhhhhhhhh => convert 8 hex digits to UTF-8
				412	char32 rune = 0;
				413	const char *hex_start = p;
				414	for (int i = 0; i < 8; ++i) {
				415	if (isxdigit(p[1])) { // Look one char ahead.
				416	// Don't change rune until we're sure this
				417	// is within the Unicode limit, but do advance p.
				418	char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
				419	if (newrune > 0x10FFFF) {
				420	LOG_STRING(ERROR, errors)
				421	<< "Value of \\"
				422	<< string(hex_start, p + 1 - hex_start)
				423	<< " exceeds Unicode limit (0x10FFFF)";
				424	break;
				425	} else {
				426	rune = newrune;
				427	}
				428	} else {
				429	LOG_STRING(ERROR, errors)
				430	<< "\\U must be followed by 8 hex digits: \\"
				431	<< string(hex_start, p+1-hex_start);
				432	break;
				433	}
				434	}
				435	d += runetochar(d, &rune);
				436	break;
				437	}
				438	#endif
				439	default:
				440	LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
				441	}
				442	p++; // read past letter we escaped
				443	}
				444	}
				445	*d = '\0';
				446	return d - dest;
				447	}
				448
				449	// ----------------------------------------------------------------------
				450	// UnescapeCEscapeString()
				451	// This does the same thing as UnescapeCEscapeSequences, but creates
				452	// a new string. The caller does not need to worry about allocating
				453	// a dest buffer. This should be used for non performance critical
				454	// tasks such as printing debug messages. It is safe for src and dest
				455	// to be the same.
				456	//
				457	// The second call stores its errors in a supplied string vector.
				458	// If the string vector pointer is NULL, it reports the errors with LOG().
				459	//
				460	// In the first and second calls, the length of dest is returned. In the
				461	// the third call, the new string is returned.
				462	// ----------------------------------------------------------------------
				463	int UnescapeCEscapeString(const string& src, string* dest) {
				464	return UnescapeCEscapeString(src, dest, NULL);
				465	}
				466
				467	int UnescapeCEscapeString(const string& src, string* dest,
				468	vector<string> *errors) {
				469	scoped_array<char> unescaped(new char[src.size() + 1]);
				470	int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors);
				471	GOOGLE_CHECK(dest);
				472	dest->assign(unescaped.get(), len);
				473	return len;
				474	}
				475
				476	string UnescapeCEscapeString(const string& src) {
				477	scoped_array<char> unescaped(new char[src.size() + 1]);
				478	int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL);
				479	return string(unescaped.get(), len);
				480	}
				481
				482	// ----------------------------------------------------------------------
				483	// CEscapeString()
				484	// CHexEscapeString()
				485	// Copies 'src' to 'dest', escaping dangerous characters using
				486	// C-style escape sequences. This is very useful for preparing query
				487	// flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
				488	// hexadecimal rather than octal sequences.
				489	// Returns the number of bytes written to 'dest' (not including the \0)
				490	// or -1 if there was insufficient space.
				491	//
				492	// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
				493	// ----------------------------------------------------------------------
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	494	int CEscapeInternal(const char* src, int src_len, char* dest,
				495	int dest_len, bool use_hex, bool utf8_safe) {
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	496	const char* src_end = src + src_len;
				497	int used = 0;
				498	bool last_hex_escape = false; // true if last output char was \xNN
				499
				500	for (; src < src_end; src++) {
				501	if (dest_len - used < 2) // Need space for two letter escape
				502	return -1;
				503
				504	bool is_hex_escape = false;
				505	switch (*src) {
				506	case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
				507	case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
				508	case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
				509	case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
				510	case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
				511	case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
				512	default:
				513	// Note that if we emit \xNN and the src character after that is a hex
				514	// digit then that digit must be escaped too to prevent it being
				515	// interpreted as part of the character code by C.
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	516	if ((!utf8_safe \|\| static_cast<uint8>(*src) < 0x80) &&
				517	(!isprint(*src) \|\|
				518	(last_hex_escape && isxdigit(*src)))) {
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	519	if (dest_len - used < 4) // need space for 4 letter escape
				520	return -1;
				521	sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"),
				522	static_cast<uint8>(*src));
				523	is_hex_escape = use_hex;
				524	used += 4;
				525	} else {
				526	dest[used++] = *src; break;
				527	}
				528	}
				529	last_hex_escape = is_hex_escape;
				530	}
				531
				532	if (dest_len - used < 1) // make sure that there is room for \0
				533	return -1;
				534
				535	dest[used] = '\0'; // doesn't count towards return value though
				536	return used;
				537	}
				538
				539	int CEscapeString(const char* src, int src_len, char* dest, int dest_len) {
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	540	return CEscapeInternal(src, src_len, dest, dest_len, false, false);
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	541	}
				542
				543	// ----------------------------------------------------------------------
				544	// CEscape()
				545	// CHexEscape()
				546	// Copies 'src' to result, escaping dangerous characters using
				547	// C-style escape sequences. This is very useful for preparing query
				548	// flags. 'src' and 'dest' should not overlap. The 'Hex' version
				549	// hexadecimal rather than octal sequences.
				550	//
				551	// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
				552	// ----------------------------------------------------------------------
				553	string CEscape(const string& src) {
				554	const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
				555	scoped_array<char> dest(new char[dest_length]);
				556	const int len = CEscapeInternal(src.data(), src.size(),
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	557	dest.get(), dest_length, false, false);
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	558	GOOGLE_DCHECK_GE(len, 0);
				559	return string(dest.get(), len);
				560	}
				561
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	562	namespace strings {
				563
				564	string Utf8SafeCEscape(const string& src) {
				565	const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
				566	scoped_array<char> dest(new char[dest_length]);
				567	const int len = CEscapeInternal(src.data(), src.size(),
				568	dest.get(), dest_length, false, true);
				569	GOOGLE_DCHECK_GE(len, 0);
				570	return string(dest.get(), len);
				571	}
				572
				573	string CHexEscape(const string& src) {
				574	const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
				575	scoped_array<char> dest(new char[dest_length]);
				576	const int len = CEscapeInternal(src.data(), src.size(),
				577	dest.get(), dest_length, true, false);
				578	GOOGLE_DCHECK_GE(len, 0);
				579	return string(dest.get(), len);
				580	}
				581
				582	} // namespace strings
				583
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	584	// ----------------------------------------------------------------------
				585	// strto32_adaptor()
				586	// strtou32_adaptor()
				587	// Implementation of strto[u]l replacements that have identical
				588	// overflow and underflow characteristics for both ILP-32 and LP-64
				589	// platforms, including errno preservation in error-free calls.
				590	// ----------------------------------------------------------------------
				591
				592	int32 strto32_adaptor(const char nptr, char *endptr, int base) {
				593	const int saved_errno = errno;
				594	errno = 0;
				595	const long result = strtol(nptr, endptr, base);
				596	if (errno == ERANGE && result == LONG_MIN) {
				597	return kint32min;
				598	} else if (errno == ERANGE && result == LONG_MAX) {
				599	return kint32max;
				600	} else if (errno == 0 && result < kint32min) {
				601	errno = ERANGE;
				602	return kint32min;
				603	} else if (errno == 0 && result > kint32max) {
				604	errno = ERANGE;
				605	return kint32max;
				606	}
				607	if (errno == 0)
				608	errno = saved_errno;
				609	return static_cast<int32>(result);
				610	}
				611
				612	uint32 strtou32_adaptor(const char nptr, char *endptr, int base) {
				613	const int saved_errno = errno;
				614	errno = 0;
				615	const unsigned long result = strtoul(nptr, endptr, base);
				616	if (errno == ERANGE && result == ULONG_MAX) {
				617	return kuint32max;
				618	} else if (errno == 0 && result > kuint32max) {
				619	errno = ERANGE;
				620	return kuint32max;
				621	}
				622	if (errno == 0)
				623	errno = saved_errno;
				624	return static_cast<uint32>(result);
				625	}
				626
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	627	inline bool safe_parse_sign(string* text /inout/,
				628	bool* negative_ptr /output/) {
				629	const char* start = text->data();
				630	const char* end = start + text->size();
				631
				632	// Consume whitespace.
				633	while (start < end && (start[0] == ' ')) {
				634	++start;
				635	}
				636	while (start < end && (end[-1] == ' ')) {
				637	--end;
				638	}
				639	if (start >= end) {
				640	return false;
				641	}
				642
				643	// Consume sign.
				644	*negative_ptr = (start[0] == '-');
				645	if (*negative_ptr \|\| start[0] == '+') {
				646	++start;
				647	if (start >= end) {
				648	return false;
				649	}
				650	}
				651	*text = text->substr(start - text->data(), end - start);
				652	return true;
				653	}
				654
				655	inline bool safe_parse_positive_int(
				656	string text, int32* value_p) {
				657	int base = 10;
				658	int32 value = 0;
				659	const int32 vmax = std::numeric_limits<int32>::max();
				660	assert(vmax > 0);
				661	assert(vmax >= base);
				662	const int32 vmax_over_base = vmax / base;
				663	const char* start = text.data();
				664	const char* end = start + text.size();
				665	// loop over digits
				666	for (; start < end; ++start) {
				667	unsigned char c = static_cast<unsigned char>(start[0]);
				668	int digit = c - '0';
				669	if (digit >= base \|\| digit < 0) {
				670	*value_p = value;
				671	return false;
				672	}
				673	if (value > vmax_over_base) {
				674	*value_p = vmax;
				675	return false;
				676	}
				677	value *= base;
				678	if (value > vmax - digit) {
				679	*value_p = vmax;
				680	return false;
				681	}
				682	value += digit;
				683	}
				684	*value_p = value;
				685	return true;
				686	}
				687
				688	inline bool safe_parse_negative_int(
				689	string text, int32* value_p) {
				690	int base = 10;
				691	int32 value = 0;
				692	const int32 vmin = std::numeric_limits<int32>::min();
				693	assert(vmin < 0);
				694	assert(vmin <= 0 - base);
				695	int32 vmin_over_base = vmin / base;
				696	// 2003 c++ standard [expr.mul]
				697	// "... the sign of the remainder is implementation-defined."
				698	// Although (vmin/base)*base + vmin%base is always vmin.
				699	// 2011 c++ standard tightens the spec but we cannot rely on it.
				700	if (vmin % base > 0) {
				701	vmin_over_base += 1;
				702	}
				703	const char* start = text.data();
				704	const char* end = start + text.size();
				705	// loop over digits
				706	for (; start < end; ++start) {
				707	unsigned char c = static_cast<unsigned char>(start[0]);
				708	int digit = c - '0';
				709	if (digit >= base \|\| digit < 0) {
				710	*value_p = value;
				711	return false;
				712	}
				713	if (value < vmin_over_base) {
				714	*value_p = vmin;
				715	return false;
				716	}
				717	value *= base;
				718	if (value < vmin + digit) {
				719	*value_p = vmin;
				720	return false;
				721	}
				722	value -= digit;
				723	}
				724	*value_p = value;
				725	return true;
				726	}
				727
				728	bool safe_int(string text, int32* value_p) {
				729	*value_p = 0;
				730	bool negative;
				731	if (!safe_parse_sign(&text, &negative)) {
				732	return false;
				733	}
				734	if (!negative) {
				735	return safe_parse_positive_int(text, value_p);
				736	} else {
				737	return safe_parse_negative_int(text, value_p);
				738	}
				739	}
				740
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	741	// ----------------------------------------------------------------------
				742	// FastIntToBuffer()
				743	// FastInt64ToBuffer()
				744	// FastHexToBuffer()
				745	// FastHex64ToBuffer()
				746	// FastHex32ToBuffer()
				747	// ----------------------------------------------------------------------
				748
				749	// Offset into buffer where FastInt64ToBuffer places the end of string
				750	// null character. Also used by FastInt64ToBufferLeft.
				751	static const int kFastInt64ToBufferOffset = 21;
				752
				753	char FastInt64ToBuffer(int64 i, char buffer) {
				754	// We could collapse the positive and negative sections, but that
				755	// would be slightly slower for positive numbers...
				756	// 22 bytes is enough to store -2**64, -18446744073709551616.
				757	char* p = buffer + kFastInt64ToBufferOffset;
				758	*p-- = '\0';
				759	if (i >= 0) {
				760	do {
				761	*p-- = '0' + i % 10;
				762	i /= 10;
				763	} while (i > 0);
				764	return p + 1;
				765	} else {
				766	// On different platforms, % and / have different behaviors for
				767	// negative numbers, so we need to jump through hoops to make sure
				768	// we don't divide negative numbers.
				769	if (i > -10) {
				770	i = -i;
				771	*p-- = '0' + i;
				772	*p = '-';
				773	return p;
				774	} else {
				775	// Make sure we aren't at MIN_INT, in which case we can't say i = -i
				776	i = i + 10;
				777	i = -i;
				778	*p-- = '0' + i % 10;
				779	// Undo what we did a moment ago
				780	i = i / 10 + 1;
				781	do {
				782	*p-- = '0' + i % 10;
				783	i /= 10;
				784	} while (i > 0);
				785	*p = '-';
				786	return p;
				787	}
				788	}
				789	}
				790
				791	// Offset into buffer where FastInt32ToBuffer places the end of string
				792	// null character. Also used by FastInt32ToBufferLeft
				793	static const int kFastInt32ToBufferOffset = 11;
				794
				795	// Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the
				796	// compiler to generate 32 bit arithmetic instructions. It's much faster, at
				797	// least with 32 bit binaries.
				798	char FastInt32ToBuffer(int32 i, char buffer) {
				799	// We could collapse the positive and negative sections, but that
				800	// would be slightly slower for positive numbers...
				801	// 12 bytes is enough to store -2**32, -4294967296.
				802	char* p = buffer + kFastInt32ToBufferOffset;
				803	*p-- = '\0';
				804	if (i >= 0) {
				805	do {
				806	*p-- = '0' + i % 10;
				807	i /= 10;
				808	} while (i > 0);
				809	return p + 1;
				810	} else {
				811	// On different platforms, % and / have different behaviors for
				812	// negative numbers, so we need to jump through hoops to make sure
				813	// we don't divide negative numbers.
				814	if (i > -10) {
				815	i = -i;
				816	*p-- = '0' + i;
				817	*p = '-';
				818	return p;
				819	} else {
				820	// Make sure we aren't at MIN_INT, in which case we can't say i = -i
				821	i = i + 10;
				822	i = -i;
				823	*p-- = '0' + i % 10;
				824	// Undo what we did a moment ago
				825	i = i / 10 + 1;
				826	do {
				827	*p-- = '0' + i % 10;
				828	i /= 10;
				829	} while (i > 0);
				830	*p = '-';
				831	return p;
				832	}
				833	}
				834	}
				835
				836	char FastHexToBuffer(int i, char buffer) {
				837	GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
				838
				839	static const char *hexdigits = "0123456789abcdef";
				840	char *p = buffer + 21;
				841	*p-- = '\0';
				842	do {
				843	*p-- = hexdigits[i & 15]; // mod by 16
				844	i >>= 4; // divide by 16
				845	} while (i > 0);
				846	return p + 1;
				847	}
				848
				849	char InternalFastHexToBuffer(uint64 value, char buffer, int num_byte) {
				850	static const char *hexdigits = "0123456789abcdef";
				851	buffer[num_byte] = '\0';
				852	for (int i = num_byte - 1; i >= 0; i--) {
liujisi@google.com	cb6dd4e	2011-07-05 21:05:40 +0000	[diff] [blame]	853	#ifdef _M_X64
				854	// MSVC x64 platform has a bug optimizing the uint32(value) in the #else
				855	// block. Given that the uint32 cast was to improve performance on 32-bit
				856	// platforms, we use 64-bit '&' directly.
				857	buffer[i] = hexdigits[value & 0xf];
				858	#else
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	859	buffer[i] = hexdigits[uint32(value) & 0xf];
liujisi@google.com	cb6dd4e	2011-07-05 21:05:40 +0000	[diff] [blame]	860	#endif
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	861	value >>= 4;
				862	}
				863	return buffer;
				864	}
				865
				866	char FastHex64ToBuffer(uint64 value, char buffer) {
				867	return InternalFastHexToBuffer(value, buffer, 16);
				868	}
				869
				870	char FastHex32ToBuffer(uint32 value, char buffer) {
				871	return InternalFastHexToBuffer(value, buffer, 8);
				872	}
				873
				874	static inline char* PlaceNum(char* p, int num, char prev_sep) {
				875	*p-- = '0' + num % 10;
				876	*p-- = '0' + num / 10;
				877	*p-- = prev_sep;
				878	return p;
				879	}
				880
				881	// ----------------------------------------------------------------------
				882	// FastInt32ToBufferLeft()
				883	// FastUInt32ToBufferLeft()
				884	// FastInt64ToBufferLeft()
				885	// FastUInt64ToBufferLeft()
				886	//
				887	// Like the Fast*ToBuffer() functions above, these are intended for speed.
				888	// Unlike the Fast*ToBuffer() functions, however, these functions write
				889	// their output to the beginning of the buffer (hence the name, as the
				890	// output is left-aligned). The caller is responsible for ensuring that
				891	// the buffer has enough space to hold the output.
				892	//
				893	// Returns a pointer to the end of the string (i.e. the null character
				894	// terminating the string).
				895	// ----------------------------------------------------------------------
				896
				897	static const char two_ASCII_digits[100][2] = {
				898	{'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
				899	{'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
				900	{'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
				901	{'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
				902	{'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
				903	{'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
				904	{'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
				905	{'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
				906	{'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
				907	{'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
				908	{'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
				909	{'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
				910	{'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
				911	{'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
				912	{'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
				913	{'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
				914	{'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
				915	{'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
				916	{'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
				917	{'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
				918	};
				919
				920	char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
				921	int digits;
				922	const char *ASCII_digits = NULL;
				923	// The idea of this implementation is to trim the number of divides to as few
				924	// as possible by using multiplication and subtraction rather than mod (%),
				925	// and by outputting two digits at a time rather than one.
				926	// The huge-number case is first, in the hopes that the compiler will output
				927	// that case in one branch-free block of code, and only output conditional
				928	// branches into it from below.
				929	if (u >= 1000000000) { // >= 1,000,000,000
				930	digits = u / 100000000; // 100,000,000
				931	ASCII_digits = two_ASCII_digits[digits];
				932	buffer[0] = ASCII_digits[0];
				933	buffer[1] = ASCII_digits[1];
				934	buffer += 2;
				935	sublt100_000_000:
				936	u -= digits * 100000000; // 100,000,000
				937	lt100_000_000:
				938	digits = u / 1000000; // 1,000,000
				939	ASCII_digits = two_ASCII_digits[digits];
				940	buffer[0] = ASCII_digits[0];
				941	buffer[1] = ASCII_digits[1];
				942	buffer += 2;
				943	sublt1_000_000:
				944	u -= digits * 1000000; // 1,000,000
				945	lt1_000_000:
				946	digits = u / 10000; // 10,000
				947	ASCII_digits = two_ASCII_digits[digits];
				948	buffer[0] = ASCII_digits[0];
				949	buffer[1] = ASCII_digits[1];
				950	buffer += 2;
				951	sublt10_000:
				952	u -= digits * 10000; // 10,000
				953	lt10_000:
				954	digits = u / 100;
				955	ASCII_digits = two_ASCII_digits[digits];
				956	buffer[0] = ASCII_digits[0];
				957	buffer[1] = ASCII_digits[1];
				958	buffer += 2;
				959	sublt100:
				960	u -= digits * 100;
				961	lt100:
				962	digits = u;
				963	ASCII_digits = two_ASCII_digits[digits];
				964	buffer[0] = ASCII_digits[0];
				965	buffer[1] = ASCII_digits[1];
				966	buffer += 2;
				967	done:
				968	*buffer = 0;
				969	return buffer;
				970	}
				971
				972	if (u < 100) {
				973	digits = u;
				974	if (u >= 10) goto lt100;
				975	*buffer++ = '0' + digits;
				976	goto done;
				977	}
				978	if (u < 10000) { // 10,000
				979	if (u >= 1000) goto lt10_000;
				980	digits = u / 100;
				981	*buffer++ = '0' + digits;
				982	goto sublt100;
				983	}
				984	if (u < 1000000) { // 1,000,000
				985	if (u >= 100000) goto lt1_000_000;
				986	digits = u / 10000; // 10,000
				987	*buffer++ = '0' + digits;
				988	goto sublt10_000;
				989	}
				990	if (u < 100000000) { // 100,000,000
				991	if (u >= 10000000) goto lt100_000_000;
				992	digits = u / 1000000; // 1,000,000
				993	*buffer++ = '0' + digits;
				994	goto sublt1_000_000;
				995	}
				996	// we already know that u < 1,000,000,000
				997	digits = u / 100000000; // 100,000,000
				998	*buffer++ = '0' + digits;
				999	goto sublt100_000_000;
				1000	}
				1001
				1002	char* FastInt32ToBufferLeft(int32 i, char* buffer) {
				1003	uint32 u = i;
				1004	if (i < 0) {
				1005	*buffer++ = '-';
				1006	u = -i;
				1007	}
				1008	return FastUInt32ToBufferLeft(u, buffer);
				1009	}
				1010
				1011	char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
				1012	int digits;
				1013	const char *ASCII_digits = NULL;
				1014
				1015	uint32 u = static_cast<uint32>(u64);
				1016	if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
				1017
				1018	uint64 top_11_digits = u64 / 1000000000;
				1019	buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
				1020	u = u64 - (top_11_digits * 1000000000);
				1021
				1022	digits = u / 10000000; // 10,000,000
				1023	GOOGLE_DCHECK_LT(digits, 100);
				1024	ASCII_digits = two_ASCII_digits[digits];
				1025	buffer[0] = ASCII_digits[0];
				1026	buffer[1] = ASCII_digits[1];
				1027	buffer += 2;
				1028	u -= digits * 10000000; // 10,000,000
				1029	digits = u / 100000; // 100,000
				1030	ASCII_digits = two_ASCII_digits[digits];
				1031	buffer[0] = ASCII_digits[0];
				1032	buffer[1] = ASCII_digits[1];
				1033	buffer += 2;
				1034	u -= digits * 100000; // 100,000
				1035	digits = u / 1000; // 1,000
				1036	ASCII_digits = two_ASCII_digits[digits];
				1037	buffer[0] = ASCII_digits[0];
				1038	buffer[1] = ASCII_digits[1];
				1039	buffer += 2;
				1040	u -= digits * 1000; // 1,000
				1041	digits = u / 10;
				1042	ASCII_digits = two_ASCII_digits[digits];
				1043	buffer[0] = ASCII_digits[0];
				1044	buffer[1] = ASCII_digits[1];
				1045	buffer += 2;
				1046	u -= digits * 10;
				1047	digits = u;
				1048	*buffer++ = '0' + digits;
				1049	*buffer = 0;
				1050	return buffer;
				1051	}
				1052
				1053	char* FastInt64ToBufferLeft(int64 i, char* buffer) {
				1054	uint64 u = i;
				1055	if (i < 0) {
				1056	*buffer++ = '-';
				1057	u = -i;
				1058	}
				1059	return FastUInt64ToBufferLeft(u, buffer);
				1060	}
				1061
				1062	// ----------------------------------------------------------------------
				1063	// SimpleItoa()
				1064	// Description: converts an integer to a string.
				1065	//
				1066	// Return value: string
				1067	// ----------------------------------------------------------------------
				1068
				1069	string SimpleItoa(int i) {
				1070	char buffer[kFastToBufferSize];
				1071	return (sizeof(i) == 4) ?
				1072	FastInt32ToBuffer(i, buffer) :
				1073	FastInt64ToBuffer(i, buffer);
				1074	}
				1075
				1076	string SimpleItoa(unsigned int i) {
				1077	char buffer[kFastToBufferSize];
				1078	return string(buffer, (sizeof(i) == 4) ?
				1079	FastUInt32ToBufferLeft(i, buffer) :
				1080	FastUInt64ToBufferLeft(i, buffer));
				1081	}
				1082
				1083	string SimpleItoa(long i) {
				1084	char buffer[kFastToBufferSize];
				1085	return (sizeof(i) == 4) ?
				1086	FastInt32ToBuffer(i, buffer) :
				1087	FastInt64ToBuffer(i, buffer);
				1088	}
				1089
				1090	string SimpleItoa(unsigned long i) {
				1091	char buffer[kFastToBufferSize];
				1092	return string(buffer, (sizeof(i) == 4) ?
				1093	FastUInt32ToBufferLeft(i, buffer) :
				1094	FastUInt64ToBufferLeft(i, buffer));
				1095	}
				1096
				1097	string SimpleItoa(long long i) {
				1098	char buffer[kFastToBufferSize];
				1099	return (sizeof(i) == 4) ?
				1100	FastInt32ToBuffer(i, buffer) :
				1101	FastInt64ToBuffer(i, buffer);
				1102	}
				1103
				1104	string SimpleItoa(unsigned long long i) {
				1105	char buffer[kFastToBufferSize];
				1106	return string(buffer, (sizeof(i) == 4) ?
				1107	FastUInt32ToBufferLeft(i, buffer) :
				1108	FastUInt64ToBufferLeft(i, buffer));
				1109	}
				1110
				1111	// ----------------------------------------------------------------------
				1112	// SimpleDtoa()
				1113	// SimpleFtoa()
				1114	// DoubleToBuffer()
				1115	// FloatToBuffer()
				1116	// We want to print the value without losing precision, but we also do
				1117	// not want to print more digits than necessary. This turns out to be
				1118	// trickier than it sounds. Numbers like 0.2 cannot be represented
				1119	// exactly in binary. If we print 0.2 with a very large precision,
				1120	// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
				1121	// On the other hand, if we set the precision too low, we lose
				1122	// significant digits when printing numbers that actually need them.
				1123	// It turns out there is no precision value that does the right thing
				1124	// for all numbers.
				1125	//
				1126	// Our strategy is to first try printing with a precision that is never
				1127	// over-precise, then parse the result with strtod() to see if it
				1128	// matches. If not, we print again with a precision that will always
				1129	// give a precise result, but may use more digits than necessary.
				1130	//
				1131	// An arguably better strategy would be to use the algorithm described
				1132	// in "How to Print Floating-Point Numbers Accurately" by Steele &
				1133	// White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
				1134	// however, that the following implementation is about as fast as
				1135	// DMG's code. Furthermore, DMG's code locks mutexes, which means it
				1136	// will not scale well on multi-core machines. DMG's code is slightly
				1137	// more accurate (in that it will never use more digits than
				1138	// necessary), but this is probably irrelevant for most users.
				1139	//
				1140	// Rob Pike and Ken Thompson also have an implementation of dtoa() in
				1141	// third_party/fmt/fltfmt.cc. Their implementation is similar to this
				1142	// one in that it makes guesses and then uses strtod() to check them.
				1143	// Their implementation is faster because they use their own code to
				1144	// generate the digits in the first place rather than use snprintf(),
				1145	// thus avoiding format string parsing overhead. However, this makes
				1146	// it considerably more complicated than the following implementation,
				1147	// and it is embedded in a larger library. If speed turns out to be
				1148	// an issue, we could re-implement this in terms of their
				1149	// implementation.
				1150	// ----------------------------------------------------------------------
				1151
				1152	string SimpleDtoa(double value) {
				1153	char buffer[kDoubleToBufferSize];
				1154	return DoubleToBuffer(value, buffer);
				1155	}
				1156
				1157	string SimpleFtoa(float value) {
				1158	char buffer[kFloatToBufferSize];
				1159	return FloatToBuffer(value, buffer);
				1160	}
				1161
				1162	static inline bool IsValidFloatChar(char c) {
				1163	return ('0' <= c && c <= '9') \|\|
				1164	c == 'e' \|\| c == 'E' \|\|
				1165	c == '+' \|\| c == '-';
				1166	}
				1167
				1168	void DelocalizeRadix(char* buffer) {
				1169	// Fast check: if the buffer has a normal decimal point, assume no
				1170	// translation is needed.
				1171	if (strchr(buffer, '.') != NULL) return;
				1172
				1173	// Find the first unknown character.
				1174	while (IsValidFloatChar(*buffer)) ++buffer;
				1175
				1176	if (*buffer == '\0') {
				1177	// No radix character found.
				1178	return;
				1179	}
				1180
				1181	// We are now pointing at the locale-specific radix character. Replace it
				1182	// with '.'.
				1183	*buffer = '.';
				1184	++buffer;
				1185
				1186	if (!IsValidFloatChar(buffer) && buffer != '\0') {
				1187	// It appears the radix was a multi-byte character. We need to remove the
				1188	// extra bytes.
				1189	char* target = buffer;
				1190	do { ++buffer; } while (!IsValidFloatChar(buffer) && buffer != '\0');
				1191	memmove(target, buffer, strlen(buffer) + 1);
				1192	}
				1193	}
				1194
				1195	char* DoubleToBuffer(double value, char* buffer) {
				1196	// DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
				1197	// platforms these days. Just in case some system exists where DBL_DIG
				1198	// is significantly larger -- and risks overflowing our buffer -- we have
				1199	// this assert.
				1200	GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
				1201
				1202	if (value == numeric_limits<double>::infinity()) {
				1203	strcpy(buffer, "inf");
				1204	return buffer;
				1205	} else if (value == -numeric_limits<double>::infinity()) {
				1206	strcpy(buffer, "-inf");
				1207	return buffer;
				1208	} else if (IsNaN(value)) {
				1209	strcpy(buffer, "nan");
				1210	return buffer;
				1211	}
				1212
				1213	int snprintf_result =
				1214	snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
				1215
				1216	// The snprintf should never overflow because the buffer is significantly
				1217	// larger than the precision we asked for.
				1218	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
				1219
				1220	// We need to make parsed_value volatile in order to force the compiler to
				1221	// write it out to the stack. Otherwise, it may keep the value in a
				1222	// register, and if it does that, it may keep it as a long double instead
				1223	// of a double. This long double may have extra bits that make it compare
				1224	// unequal to "value" even though it would be exactly equal if it were
				1225	// truncated to a double.
				1226	volatile double parsed_value = strtod(buffer, NULL);
				1227	if (parsed_value != value) {
				1228	int snprintf_result =
				1229	snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
				1230
				1231	// Should never overflow; see above.
				1232	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
				1233	}
				1234
				1235	DelocalizeRadix(buffer);
				1236	return buffer;
				1237	}
				1238
				1239	bool safe_strtof(const char* str, float* value) {
				1240	char* endptr;
				1241	errno = 0; // errno only gets set on errors
kenton@google.com	3aa7a0d	2009-08-17 20:34:29 +0000	[diff] [blame]	1242	#if defined(_WIN32) \|\| defined (__hpux) // has no strtof()
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1243	*value = strtod(str, &endptr);
				1244	#else
				1245	*value = strtof(str, &endptr);
				1246	#endif
				1247	return str != 0 && endptr == 0 && errno == 0;
				1248	}
				1249
				1250	char* FloatToBuffer(float value, char* buffer) {
				1251	// FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
				1252	// platforms these days. Just in case some system exists where FLT_DIG
				1253	// is significantly larger -- and risks overflowing our buffer -- we have
				1254	// this assert.
				1255	GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
				1256
				1257	if (value == numeric_limits<double>::infinity()) {
				1258	strcpy(buffer, "inf");
				1259	return buffer;
				1260	} else if (value == -numeric_limits<double>::infinity()) {
				1261	strcpy(buffer, "-inf");
				1262	return buffer;
				1263	} else if (IsNaN(value)) {
				1264	strcpy(buffer, "nan");
				1265	return buffer;
				1266	}
				1267
				1268	int snprintf_result =
				1269	snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
				1270
				1271	// The snprintf should never overflow because the buffer is significantly
				1272	// larger than the precision we asked for.
				1273	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
				1274
				1275	float parsed_value;
				1276	if (!safe_strtof(buffer, &parsed_value) \|\| parsed_value != value) {
				1277	int snprintf_result =
				1278	snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value);
				1279
				1280	// Should never overflow; see above.
				1281	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
				1282	}
				1283
				1284	DelocalizeRadix(buffer);
				1285	return buffer;
				1286	}
				1287
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	1288	string ToHex(uint64 num) {
				1289	if (num == 0) {
				1290	return string("0");
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1291	}
				1292
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	1293	// Compute hex bytes in reverse order, writing to the back of the
				1294	// buffer.
				1295	char buf[16]; // No more than 16 hex digits needed.
				1296	char* bufptr = buf + 16;
				1297	static const char kHexChars[] = "0123456789abcdef";
				1298	while (num != 0) {
				1299	*--bufptr = kHexChars[num & 0xf];
				1300	num >>= 4;
				1301	}
				1302
				1303	return string(bufptr, buf + 16 - bufptr);
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1304	}
				1305
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame^]	1306	namespace strings {
				1307
				1308	AlphaNum::AlphaNum(strings::Hex hex) {
				1309	char *const end = &digits[kFastToBufferSize];
				1310	char *writer = end;
				1311	uint64 value = hex.value;
				1312	uint64 width = hex.spec;
				1313	// We accomplish minimum width by OR'ing in 0x10000 to the user's value,
				1314	// where 0x10000 is the smallest hex number that is as wide as the user
				1315	// asked for.
				1316	uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) \| value;
				1317	static const char hexdigits[] = "0123456789abcdef";
				1318	do {
				1319	*--writer = hexdigits[value & 0xF];
				1320	value >>= 4;
				1321	mask >>= 4;
				1322	} while (mask != 0);
				1323	piece_data_ = writer;
				1324	piece_size_ = end - writer;
				1325	}
				1326
				1327	} // namespace strings
				1328
				1329	// ----------------------------------------------------------------------
				1330	// StrCat()
				1331	// This merges the given strings or integers, with no delimiter. This
				1332	// is designed to be the fastest possible way to construct a string out
				1333	// of a mix of raw C strings, C++ strings, and integer values.
				1334	// ----------------------------------------------------------------------
				1335
				1336	// Append is merely a version of memcpy that returns the address of the byte
				1337	// after the area just overwritten. It comes in multiple flavors to minimize
				1338	// call overhead.
				1339	static char Append1(char out, const AlphaNum &x) {
				1340	memcpy(out, x.data(), x.size());
				1341	return out + x.size();
				1342	}
				1343
				1344	static char Append2(char out, const AlphaNum &x1, const AlphaNum &x2) {
				1345	memcpy(out, x1.data(), x1.size());
				1346	out += x1.size();
				1347
				1348	memcpy(out, x2.data(), x2.size());
				1349	return out + x2.size();
				1350	}
				1351
				1352	static char Append4(char out,
				1353	const AlphaNum &x1, const AlphaNum &x2,
				1354	const AlphaNum &x3, const AlphaNum &x4) {
				1355	memcpy(out, x1.data(), x1.size());
				1356	out += x1.size();
				1357
				1358	memcpy(out, x2.data(), x2.size());
				1359	out += x2.size();
				1360
				1361	memcpy(out, x3.data(), x3.size());
				1362	out += x3.size();
				1363
				1364	memcpy(out, x4.data(), x4.size());
				1365	return out + x4.size();
				1366	}
				1367
				1368	string StrCat(const AlphaNum &a, const AlphaNum &b) {
				1369	string result;
				1370	result.resize(a.size() + b.size());
				1371	char const begin = &result.begin();
				1372	char *out = Append2(begin, a, b);
				1373	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1374	return result;
				1375	}
				1376
				1377	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
				1378	string result;
				1379	result.resize(a.size() + b.size() + c.size());
				1380	char const begin = &result.begin();
				1381	char *out = Append2(begin, a, b);
				1382	out = Append1(out, c);
				1383	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1384	return result;
				1385	}
				1386
				1387	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1388	const AlphaNum &d) {
				1389	string result;
				1390	result.resize(a.size() + b.size() + c.size() + d.size());
				1391	char const begin = &result.begin();
				1392	char *out = Append4(begin, a, b, c, d);
				1393	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1394	return result;
				1395	}
				1396
				1397	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1398	const AlphaNum &d, const AlphaNum &e) {
				1399	string result;
				1400	result.resize(a.size() + b.size() + c.size() + d.size() + e.size());
				1401	char const begin = &result.begin();
				1402	char *out = Append4(begin, a, b, c, d);
				1403	out = Append1(out, e);
				1404	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1405	return result;
				1406	}
				1407
				1408	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1409	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {
				1410	string result;
				1411	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1412	f.size());
				1413	char const begin = &result.begin();
				1414	char *out = Append4(begin, a, b, c, d);
				1415	out = Append2(out, e, f);
				1416	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1417	return result;
				1418	}
				1419
				1420	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1421	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
				1422	const AlphaNum &g) {
				1423	string result;
				1424	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1425	f.size() + g.size());
				1426	char const begin = &result.begin();
				1427	char *out = Append4(begin, a, b, c, d);
				1428	out = Append2(out, e, f);
				1429	out = Append1(out, g);
				1430	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1431	return result;
				1432	}
				1433
				1434	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1435	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
				1436	const AlphaNum &g, const AlphaNum &h) {
				1437	string result;
				1438	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1439	f.size() + g.size() + h.size());
				1440	char const begin = &result.begin();
				1441	char *out = Append4(begin, a, b, c, d);
				1442	out = Append4(out, e, f, g, h);
				1443	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1444	return result;
				1445	}
				1446
				1447	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1448	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
				1449	const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {
				1450	string result;
				1451	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1452	f.size() + g.size() + h.size() + i.size());
				1453	char const begin = &result.begin();
				1454	char *out = Append4(begin, a, b, c, d);
				1455	out = Append4(out, e, f, g, h);
				1456	out = Append1(out, i);
				1457	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1458	return result;
				1459	}
				1460
				1461	// It's possible to call StrAppend with a char * pointer that is partway into
				1462	// the string we're appending to. However the results of this are random.
				1463	// Therefore, check for this in debug mode. Use unsigned math so we only have
				1464	// to do one comparison.
				1465	#define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \
				1466	GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \
				1467	uintptr_t((dest).size()))
				1468
				1469	void StrAppend(string *result, const AlphaNum &a) {
				1470	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1471	result->append(a.data(), a.size());
				1472	}
				1473
				1474	void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {
				1475	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1476	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
				1477	string::size_type old_size = result->size();
				1478	result->resize(old_size + a.size() + b.size());
				1479	char const begin = &result->begin();
				1480	char *out = Append2(begin + old_size, a, b);
				1481	GOOGLE_DCHECK_EQ(out, begin + result->size());
				1482	}
				1483
				1484	void StrAppend(string *result,
				1485	const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
				1486	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1487	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
				1488	GOOGLE_DCHECK_NO_OVERLAP(*result, c);
				1489	string::size_type old_size = result->size();
				1490	result->resize(old_size + a.size() + b.size() + c.size());
				1491	char const begin = &result->begin();
				1492	char *out = Append2(begin + old_size, a, b);
				1493	out = Append1(out, c);
				1494	GOOGLE_DCHECK_EQ(out, begin + result->size());
				1495	}
				1496
				1497	void StrAppend(string *result,
				1498	const AlphaNum &a, const AlphaNum &b,
				1499	const AlphaNum &c, const AlphaNum &d) {
				1500	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1501	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
				1502	GOOGLE_DCHECK_NO_OVERLAP(*result, c);
				1503	GOOGLE_DCHECK_NO_OVERLAP(*result, d);
				1504	string::size_type old_size = result->size();
				1505	result->resize(old_size + a.size() + b.size() + c.size() + d.size());
				1506	char const begin = &result->begin();
				1507	char *out = Append4(begin + old_size, a, b, c, d);
				1508	GOOGLE_DCHECK_EQ(out, begin + result->size());
				1509	}
				1510
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	1511	int GlobalReplaceSubstring(const string& substring,
				1512	const string& replacement,
				1513	string* s) {
				1514	GOOGLE_CHECK(s != NULL);
				1515	if (s->empty() \|\| substring.empty())
				1516	return 0;
				1517	string tmp;
				1518	int num_replacements = 0;
				1519	int pos = 0;
				1520	for (int match_pos = s->find(substring.data(), pos, substring.length());
				1521	match_pos != string::npos;
				1522	pos = match_pos + substring.length(),
				1523	match_pos = s->find(substring.data(), pos, substring.length())) {
				1524	++num_replacements;
				1525	// Append the original content before the match.
				1526	tmp.append(*s, pos, match_pos - pos);
				1527	// Append the replacement for the match.
				1528	tmp.append(replacement.begin(), replacement.end());
				1529	}
				1530	// Append the content after the last match. If no replacements were made, the
				1531	// original string is left untouched.
				1532	if (num_replacements > 0) {
				1533	tmp.append(*s, pos, s->length() - pos);
				1534	s->swap(tmp);
				1535	}
				1536	return num_replacements;
				1537	}
				1538
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1539	} // namespace protobuf
				1540	} // namespace google