Blame - src/google/protobuf/stubs/strutil.h - platform/external/protobuf-javalite

blob: 27d475754ae735ef7205a8e0b48601baaaca607b [file] [log] [blame]

temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1	// Protocol Buffers - Google's data interchange format
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	2	// Copyright 2008 Google Inc. All rights reserved.
Feng Xiao	e428862	2014-10-01 16:26:23 -0700	[diff] [blame]	3	// https://developers.google.com/protocol-buffers/
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	4	//
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are
				7	// met:
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	8	//
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	9	// * Redistributions of source code must retain the above copyright
				10	// notice, this list of conditions and the following disclaimer.
				11	// * Redistributions in binary form must reproduce the above
				12	// copyright notice, this list of conditions and the following disclaimer
				13	// in the documentation and/or other materials provided with the
				14	// distribution.
				15	// * Neither the name of Google Inc. nor the names of its
				16	// contributors may be used to endorse or promote products derived from
				17	// this software without specific prior written permission.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	18	//
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	30
				31	// from google3/strings/strutil.h
				32
				33	#ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__
				34	#define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__
				35
kenton@google.com	3aa7a0d	2009-08-17 20:34:29 +0000	[diff] [blame]	36	#include <stdlib.h>
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	37	#include <vector>
				38	#include <google/protobuf/stubs/common.h>
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	39	#include <google/protobuf/stubs/stringpiece.h>
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	40
				41	namespace google {
				42	namespace protobuf {
				43
				44	#ifdef _MSC_VER
				45	#define strtoll _strtoi64
				46	#define strtoull _strtoui64
kenton@google.com	a2a32c2	2008-11-14 17:29:32 +0000	[diff] [blame]	47	#elif defined(__DECCXX) && defined(__osf__)
				48	// HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit.
				49	#define strtoll strtol
				50	#define strtoull strtoul
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	51	#endif
				52
				53	// ----------------------------------------------------------------------
				54	// ascii_isalnum()
				55	// Check if an ASCII character is alphanumeric. We can't use ctype's
				56	// isalnum() because it is affected by locale. This function is applied
				57	// to identifiers in the protocol buffer language, not to natural-language
				58	// strings, so locale should not be taken into account.
				59	// ascii_isdigit()
				60	// Like above, but only accepts digits.
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	61	// ascii_isspace()
				62	// Check if the character is a space character.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	63	// ----------------------------------------------------------------------
				64
				65	inline bool ascii_isalnum(char c) {
				66	return ('a' <= c && c <= 'z') \|\|
				67	('A' <= c && c <= 'Z') \|\|
				68	('0' <= c && c <= '9');
				69	}
				70
				71	inline bool ascii_isdigit(char c) {
				72	return ('0' <= c && c <= '9');
				73	}
				74
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	75	inline bool ascii_isspace(char c) {
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	76	return c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\v' \|\| c == '\f' \|\|
				77	c == '\r';
				78	}
				79
				80	inline bool ascii_isupper(char c) {
				81	return c >= 'A' && c <= 'Z';
				82	}
				83
				84	inline bool ascii_islower(char c) {
				85	return c >= 'a' && c <= 'z';
				86	}
				87
				88	inline char ascii_toupper(char c) {
				89	return ascii_islower(c) ? c - ('a' - 'A') : c;
				90	}
				91
				92	inline char ascii_tolower(char c) {
				93	return ascii_isupper(c) ? c + ('a' - 'A') : c;
				94	}
				95
				96	inline int hex_digit_to_int(char c) {
				97	/* Assume ASCII. */
				98	int x = static_cast<unsigned char>(c);
				99	if (x > '9') {
				100	x += 9;
				101	}
				102	return x & 0xf;
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	103	}
				104
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	105	// ----------------------------------------------------------------------
				106	// HasPrefixString()
				107	// Check if a string begins with a given prefix.
				108	// StripPrefixString()
				109	// Given a string and a putative prefix, returns the string minus the
				110	// prefix string if the prefix matches, otherwise the original
				111	// string.
				112	// ----------------------------------------------------------------------
				113	inline bool HasPrefixString(const string& str,
				114	const string& prefix) {
				115	return str.size() >= prefix.size() &&
				116	str.compare(0, prefix.size(), prefix) == 0;
				117	}
				118
				119	inline string StripPrefixString(const string& str, const string& prefix) {
				120	if (HasPrefixString(str, prefix)) {
				121	return str.substr(prefix.size());
				122	} else {
				123	return str;
				124	}
				125	}
				126
				127	// ----------------------------------------------------------------------
				128	// HasSuffixString()
				129	// Return true if str ends in suffix.
				130	// StripSuffixString()
				131	// Given a string and a putative suffix, returns the string minus the
				132	// suffix string if the suffix matches, otherwise the original
				133	// string.
				134	// ----------------------------------------------------------------------
				135	inline bool HasSuffixString(const string& str,
				136	const string& suffix) {
				137	return str.size() >= suffix.size() &&
				138	str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
				139	}
				140
				141	inline string StripSuffixString(const string& str, const string& suffix) {
				142	if (HasSuffixString(str, suffix)) {
				143	return str.substr(0, str.size() - suffix.size());
				144	} else {
				145	return str;
				146	}
				147	}
				148
				149	// ----------------------------------------------------------------------
				150	// StripString
				151	// Replaces any occurrence of the character 'remove' (or the characters
				152	// in 'remove') with the character 'replacewith'.
				153	// Good for keeping html characters or protocol characters (\t) out
				154	// of places where they might cause a problem.
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	155	// StripWhitespace
				156	// Removes whitespaces from both ends of the given string.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	157	// ----------------------------------------------------------------------
				158	LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove,
				159	char replacewith);
				160
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	161	LIBPROTOBUF_EXPORT void StripWhitespace(string* s);
				162
				163
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	164	// ----------------------------------------------------------------------
				165	// LowerString()
				166	// UpperString()
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	167	// ToUpper()
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	168	// Convert the characters in "s" to lowercase or uppercase. ASCII-only:
				169	// these functions intentionally ignore locale because they are applied to
				170	// identifiers used in the Protocol Buffer language, not to natural-language
				171	// strings.
				172	// ----------------------------------------------------------------------
				173
				174	inline void LowerString(string * s) {
				175	string::iterator end = s->end();
				176	for (string::iterator i = s->begin(); i != end; ++i) {
				177	// tolower() changes based on locale. We don't want this!
				178	if ('A' <= i && i <= 'Z') *i += 'a' - 'A';
				179	}
				180	}
				181
				182	inline void UpperString(string * s) {
				183	string::iterator end = s->end();
				184	for (string::iterator i = s->begin(); i != end; ++i) {
				185	// toupper() changes based on locale. We don't want this!
				186	if ('a' <= i && i <= 'z') *i += 'A' - 'a';
				187	}
				188	}
				189
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	190	inline string ToUpper(const string& s) {
				191	string out = s;
				192	UpperString(&out);
				193	return out;
				194	}
				195
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	196	// ----------------------------------------------------------------------
				197	// StringReplace()
				198	// Give me a string and two patterns "old" and "new", and I replace
				199	// the first instance of "old" in the string with "new", if it
				200	// exists. RETURN a new string, regardless of whether the replacement
				201	// happened or not.
				202	// ----------------------------------------------------------------------
				203
				204	LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub,
				205	const string& newsub, bool replace_all);
				206
				207	// ----------------------------------------------------------------------
				208	// SplitStringUsing()
				209	// Split a string using a character delimiter. Append the components
				210	// to 'result'. If there are consecutive delimiters, this function skips
				211	// over all of them.
				212	// ----------------------------------------------------------------------
				213	LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim,
				214	vector<string>* res);
				215
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	216	// Split a string using one or more byte delimiters, presented
				217	// as a nul-terminated c string. Append the components to 'result'.
				218	// If there are consecutive delimiters, this function will return
				219	// corresponding empty strings. If you want to drop the empty
				220	// strings, try SplitStringUsing().
				221	//
				222	// If "full" is the empty string, yields an empty string as the only value.
				223	// ----------------------------------------------------------------------
				224	LIBPROTOBUF_EXPORT void SplitStringAllowEmpty(const string& full,
				225	const char* delim,
				226	vector<string>* result);
				227
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	228	// ----------------------------------------------------------------------
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	229	// Split()
				230	// Split a string using a character delimiter.
				231	// ----------------------------------------------------------------------
				232	inline vector<string> Split(
				233	const string& full, const char* delim, bool skip_empty = true) {
				234	vector<string> result;
				235	if (skip_empty) {
				236	SplitStringUsing(full, delim, &result);
				237	} else {
				238	SplitStringAllowEmpty(full, delim, &result);
				239	}
				240	return result;
				241	}
				242
				243	// ----------------------------------------------------------------------
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	244	// JoinStrings()
				245	// These methods concatenate a vector of strings into a C++ string, using
				246	// the C-string "delim" as a separator between components. There are two
				247	// flavors of the function, one flavor returns the concatenated string,
				248	// another takes a pointer to the target string. In the latter case the
				249	// target string is cleared and overwritten.
				250	// ----------------------------------------------------------------------
				251	LIBPROTOBUF_EXPORT void JoinStrings(const vector<string>& components,
				252	const char* delim, string* result);
				253
				254	inline string JoinStrings(const vector<string>& components,
				255	const char* delim) {
				256	string result;
				257	JoinStrings(components, delim, &result);
				258	return result;
				259	}
				260
				261	// ----------------------------------------------------------------------
				262	// UnescapeCEscapeSequences()
				263	// Copies "source" to "dest", rewriting C-style escape sequences
				264	// -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
				265	// equivalents. "dest" must be sufficiently large to hold all
				266	// the characters in the rewritten string (i.e. at least as large
				267	// as strlen(source) + 1 should be safe, since the replacements
				268	// are always shorter than the original escaped sequences). It's
				269	// safe for source and dest to be the same. RETURNS the length
				270	// of dest.
				271	//
				272	// It allows hex sequences \xhh, or generally \xhhhhh with an
				273	// arbitrary number of hex digits, but all of them together must
				274	// specify a value of a single byte (e.g. \x0045 is equivalent
				275	// to \x45, and \x1234 is erroneous).
				276	//
				277	// It also allows escape sequences of the form \uhhhh (exactly four
				278	// hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
				279	// hex digits, upper or lower case) to specify a Unicode code
				280	// point. The dest array will contain the UTF8-encoded version of
				281	// that code-point (e.g., if source contains \u2019, then dest will
kenton@google.com	2036374	2010-02-10 00:13:33 +0000	[diff] [blame]	282	// contain the three bytes 0xE2, 0x80, and 0x99).
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	283	//
				284	// Errors: In the first form of the call, errors are reported with
				285	// LOG(ERROR). The same is true for the second form of the call if
				286	// the pointer to the string vector is NULL; otherwise, error
				287	// messages are stored in the vector. In either case, the effect on
				288	// the dest array is not defined, but rest of the source will be
				289	// processed.
				290	// ----------------------------------------------------------------------
				291
				292	LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest);
				293	LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest,
				294	vector<string> *errors);
				295
				296	// ----------------------------------------------------------------------
				297	// UnescapeCEscapeString()
				298	// This does the same thing as UnescapeCEscapeSequences, but creates
				299	// a new string. The caller does not need to worry about allocating
				300	// a dest buffer. This should be used for non performance critical
				301	// tasks such as printing debug messages. It is safe for src and dest
				302	// to be the same.
				303	//
				304	// The second call stores its errors in a supplied string vector.
				305	// If the string vector pointer is NULL, it reports the errors with LOG().
				306	//
				307	// In the first and second calls, the length of dest is returned. In the
				308	// the third call, the new string is returned.
				309	// ----------------------------------------------------------------------
				310
				311	LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest);
				312	LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest,
				313	vector<string> *errors);
				314	LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src);
				315
				316	// ----------------------------------------------------------------------
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	317	// CEscape()
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	318	// Escapes 'src' using C-style escape sequences and returns the resulting
				319	// string.
				320	//
				321	// Escaped chars: \n, \r, \t, ", ', \, and !isprint().
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	322	// ----------------------------------------------------------------------
				323	LIBPROTOBUF_EXPORT string CEscape(const string& src);
				324
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	325	// ----------------------------------------------------------------------
				326	// CEscapeAndAppend()
				327	// Escapes 'src' using C-style escape sequences, and appends the escaped
				328	// string to 'dest'.
				329	// ----------------------------------------------------------------------
				330	LIBPROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, string* dest);
				331
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	332	namespace strings {
				333	// Like CEscape() but does not escape bytes with the upper bit set.
				334	LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src);
				335
				336	// Like CEscape() but uses hex (\x) escapes instead of octals.
				337	LIBPROTOBUF_EXPORT string CHexEscape(const string& src);
				338	} // namespace strings
				339
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	340	// ----------------------------------------------------------------------
				341	// strto32()
				342	// strtou32()
				343	// strto64()
				344	// strtou64()
				345	// Architecture-neutral plug compatible replacements for strtol() and
				346	// strtoul(). Long's have different lengths on ILP-32 and LP-64
				347	// platforms, so using these is safer, from the point of view of
				348	// overflow behavior, than using the standard libc functions.
				349	// ----------------------------------------------------------------------
				350	LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char nptr, char *endptr,
				351	int base);
				352	LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char nptr, char *endptr,
				353	int base);
				354
				355	inline int32 strto32(const char nptr, char *endptr, int base) {
				356	if (sizeof(int32) == sizeof(long))
				357	return strtol(nptr, endptr, base);
				358	else
				359	return strto32_adaptor(nptr, endptr, base);
				360	}
				361
				362	inline uint32 strtou32(const char nptr, char *endptr, int base) {
				363	if (sizeof(uint32) == sizeof(unsigned long))
				364	return strtoul(nptr, endptr, base);
				365	else
				366	return strtou32_adaptor(nptr, endptr, base);
				367	}
				368
				369	// For now, long long is 64-bit on all the platforms we care about, so these
				370	// functions can simply pass the call to strto[u]ll.
				371	inline int64 strto64(const char nptr, char *endptr, int base) {
				372	GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long),
				373	sizeof_int64_is_not_sizeof_long_long);
				374	return strtoll(nptr, endptr, base);
				375	}
				376
				377	inline uint64 strtou64(const char nptr, char *endptr, int base) {
				378	GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long),
				379	sizeof_uint64_is_not_sizeof_long_long);
				380	return strtoull(nptr, endptr, base);
				381	}
				382
				383	// ----------------------------------------------------------------------
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	384	// safe_strtob()
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	385	// safe_strto32()
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	386	// safe_strtou32()
				387	// safe_strto64()
				388	// safe_strtou64()
				389	// safe_strtof()
				390	// safe_strtod()
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	391	// ----------------------------------------------------------------------
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	392	LIBPROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value);
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	393
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	394	LIBPROTOBUF_EXPORT bool safe_strto32(const string& str, int32* value);
				395	LIBPROTOBUF_EXPORT bool safe_strtou32(const string& str, uint32* value);
				396	inline bool safe_strto32(const char* str, int32* value) {
				397	return safe_strto32(string(str), value);
				398	}
				399	inline bool safe_strto32(StringPiece str, int32* value) {
				400	return safe_strto32(str.ToString(), value);
				401	}
				402	inline bool safe_strtou32(const char* str, uint32* value) {
				403	return safe_strtou32(string(str), value);
				404	}
				405	inline bool safe_strtou32(StringPiece str, uint32* value) {
				406	return safe_strtou32(str.ToString(), value);
				407	}
				408
				409	LIBPROTOBUF_EXPORT bool safe_strto64(const string& str, int64* value);
				410	LIBPROTOBUF_EXPORT bool safe_strtou64(const string& str, uint64* value);
				411	inline bool safe_strto64(const char* str, int64* value) {
				412	return safe_strto64(string(str), value);
				413	}
				414	inline bool safe_strto64(StringPiece str, int64* value) {
				415	return safe_strto64(str.ToString(), value);
				416	}
				417	inline bool safe_strtou64(const char* str, uint64* value) {
				418	return safe_strtou64(string(str), value);
				419	}
				420	inline bool safe_strtou64(StringPiece str, uint64* value) {
				421	return safe_strtou64(str.ToString(), value);
				422	}
				423
				424	LIBPROTOBUF_EXPORT bool safe_strtof(const char* str, float* value);
				425	LIBPROTOBUF_EXPORT bool safe_strtod(const char* str, double* value);
				426	inline bool safe_strtof(const string& str, float* value) {
				427	return safe_strtof(str.c_str(), value);
				428	}
				429	inline bool safe_strtod(const string& str, double* value) {
				430	return safe_strtod(str.c_str(), value);
				431	}
				432	inline bool safe_strtof(StringPiece str, float* value) {
				433	return safe_strtof(str.ToString(), value);
				434	}
				435	inline bool safe_strtod(StringPiece str, double* value) {
				436	return safe_strtod(str.ToString(), value);
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	437	}
				438
				439	// ----------------------------------------------------------------------
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	440	// FastIntToBuffer()
				441	// FastHexToBuffer()
				442	// FastHex64ToBuffer()
				443	// FastHex32ToBuffer()
				444	// FastTimeToBuffer()
				445	// These are intended for speed. FastIntToBuffer() assumes the
				446	// integer is non-negative. FastHexToBuffer() puts output in
				447	// hex rather than decimal. FastTimeToBuffer() puts the output
				448	// into RFC822 format.
				449	//
				450	// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
				451	// padded to exactly 16 bytes (plus one byte for '\0')
				452	//
				453	// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
				454	// padded to exactly 8 bytes (plus one byte for '\0')
				455	//
				456	// All functions take the output buffer as an arg.
				457	// They all return a pointer to the beginning of the output,
				458	// which may not be the beginning of the input buffer.
				459	// ----------------------------------------------------------------------
				460
				461	// Suggested buffer size for FastToBuffer functions. Also works with
				462	// DoubleToBuffer() and FloatToBuffer().
				463	static const int kFastToBufferSize = 32;
				464
				465	LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer);
				466	LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer);
				467	char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below
				468	char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below
				469	LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer);
				470	LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer);
				471	LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer);
				472
				473	// at least 22 bytes long
				474	inline char* FastIntToBuffer(int i, char* buffer) {
				475	return (sizeof(i) == 4 ?
				476	FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer));
				477	}
				478	inline char* FastUIntToBuffer(unsigned int i, char* buffer) {
				479	return (sizeof(i) == 4 ?
				480	FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer));
				481	}
				482	inline char* FastLongToBuffer(long i, char* buffer) {
				483	return (sizeof(i) == 4 ?
				484	FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer));
				485	}
				486	inline char* FastULongToBuffer(unsigned long i, char* buffer) {
				487	return (sizeof(i) == 4 ?
				488	FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer));
				489	}
				490
				491	// ----------------------------------------------------------------------
				492	// FastInt32ToBufferLeft()
				493	// FastUInt32ToBufferLeft()
				494	// FastInt64ToBufferLeft()
				495	// FastUInt64ToBufferLeft()
				496	//
				497	// Like the Fast*ToBuffer() functions above, these are intended for speed.
				498	// Unlike the Fast*ToBuffer() functions, however, these functions write
				499	// their output to the beginning of the buffer (hence the name, as the
				500	// output is left-aligned). The caller is responsible for ensuring that
				501	// the buffer has enough space to hold the output.
				502	//
				503	// Returns a pointer to the end of the string (i.e. the null character
				504	// terminating the string).
				505	// ----------------------------------------------------------------------
				506
				507	LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer);
				508	LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer);
				509	LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer);
				510	LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer);
				511
				512	// Just define these in terms of the above.
				513	inline char* FastUInt32ToBuffer(uint32 i, char* buffer) {
				514	FastUInt32ToBufferLeft(i, buffer);
				515	return buffer;
				516	}
				517	inline char* FastUInt64ToBuffer(uint64 i, char* buffer) {
				518	FastUInt64ToBufferLeft(i, buffer);
				519	return buffer;
				520	}
				521
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	522	inline string SimpleBtoa(bool value) {
				523	return value ? "true" : "false";
				524	}
				525
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	526	// ----------------------------------------------------------------------
				527	// SimpleItoa()
				528	// Description: converts an integer to a string.
				529	//
				530	// Return value: string
				531	// ----------------------------------------------------------------------
				532	LIBPROTOBUF_EXPORT string SimpleItoa(int i);
				533	LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i);
				534	LIBPROTOBUF_EXPORT string SimpleItoa(long i);
				535	LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i);
				536	LIBPROTOBUF_EXPORT string SimpleItoa(long long i);
				537	LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i);
				538
				539	// ----------------------------------------------------------------------
				540	// SimpleDtoa()
				541	// SimpleFtoa()
				542	// DoubleToBuffer()
				543	// FloatToBuffer()
				544	// Description: converts a double or float to a string which, if
				545	// passed to NoLocaleStrtod(), will produce the exact same original double
				546	// (except in case of NaN; all NaNs are considered the same value).
				547	// We try to keep the string short but it's not guaranteed to be as
				548	// short as possible.
				549	//
				550	// DoubleToBuffer() and FloatToBuffer() write the text to the given
				551	// buffer and return it. The buffer must be at least
				552	// kDoubleToBufferSize bytes for doubles and kFloatToBufferSize
				553	// bytes for floats. kFastToBufferSize is also guaranteed to be large
				554	// enough to hold either.
				555	//
				556	// Return value: string
				557	// ----------------------------------------------------------------------
				558	LIBPROTOBUF_EXPORT string SimpleDtoa(double value);
				559	LIBPROTOBUF_EXPORT string SimpleFtoa(float value);
				560
				561	LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer);
				562	LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer);
				563
				564	// In practice, doubles should never need more than 24 bytes and floats
				565	// should never need more than 14 (including null terminators), but we
				566	// overestimate to be safe.
				567	static const int kDoubleToBufferSize = 32;
				568	static const int kFloatToBufferSize = 24;
				569
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	570	namespace strings {
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	571
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	572	enum PadSpec {
				573	NO_PAD = 1,
				574	ZERO_PAD_2,
				575	ZERO_PAD_3,
				576	ZERO_PAD_4,
				577	ZERO_PAD_5,
				578	ZERO_PAD_6,
				579	ZERO_PAD_7,
				580	ZERO_PAD_8,
				581	ZERO_PAD_9,
				582	ZERO_PAD_10,
				583	ZERO_PAD_11,
				584	ZERO_PAD_12,
				585	ZERO_PAD_13,
				586	ZERO_PAD_14,
				587	ZERO_PAD_15,
				588	ZERO_PAD_16,
				589	};
				590
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	591	struct Hex {
				592	uint64 value;
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	593	enum PadSpec spec;
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	594	template <class Int>
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	595	explicit Hex(Int v, PadSpec s = NO_PAD)
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	596	: spec(s) {
				597	// Prevent sign-extension by casting integers to
				598	// their unsigned counterparts.
				599	#ifdef LANG_CXX11
				600	static_assert(
				601	sizeof(v) == 1 \|\| sizeof(v) == 2 \|\| sizeof(v) == 4 \|\| sizeof(v) == 8,
				602	"Unknown integer type");
				603	#endif
				604	value = sizeof(v) == 1 ? static_cast<uint8>(v)
				605	: sizeof(v) == 2 ? static_cast<uint16>(v)
				606	: sizeof(v) == 4 ? static_cast<uint32>(v)
				607	: static_cast<uint64>(v);
				608	}
				609	};
				610
Bo Yang	cf603a9	2015-05-24 22:28:04 -0700	[diff] [blame]	611	struct LIBPROTOBUF_EXPORT AlphaNum {
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	612	const char *piece_data_; // move these to string_ref eventually
				613	size_t piece_size_; // move these to string_ref eventually
				614
				615	char digits[kFastToBufferSize];
				616
				617	// No bool ctor -- bools convert to an integral type.
				618	// A bool ctor would also convert incoming pointers (bletch).
				619
				620	AlphaNum(int32 i32)
				621	: piece_data_(digits),
				622	piece_size_(FastInt32ToBufferLeft(i32, digits) - &digits[0]) {}
				623	AlphaNum(uint32 u32)
				624	: piece_data_(digits),
				625	piece_size_(FastUInt32ToBufferLeft(u32, digits) - &digits[0]) {}
				626	AlphaNum(int64 i64)
				627	: piece_data_(digits),
				628	piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0]) {}
				629	AlphaNum(uint64 u64)
				630	: piece_data_(digits),
				631	piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {}
				632
				633	AlphaNum(float f)
				634	: piece_data_(digits), piece_size_(strlen(FloatToBuffer(f, digits))) {}
				635	AlphaNum(double f)
				636	: piece_data_(digits), piece_size_(strlen(DoubleToBuffer(f, digits))) {}
				637
				638	AlphaNum(Hex hex);
				639
				640	AlphaNum(const char* c_str)
				641	: piece_data_(c_str), piece_size_(strlen(c_str)) {}
				642	// TODO: Add a string_ref constructor, eventually
				643	// AlphaNum(const StringPiece &pc) : piece(pc) {}
				644
				645	AlphaNum(const string& str)
				646	: piece_data_(str.data()), piece_size_(str.size()) {}
				647
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	648	AlphaNum(StringPiece str)
				649	: piece_data_(str.data()), piece_size_(str.size()) {}
				650
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	651	size_t size() const { return piece_size_; }
				652	const char *data() const { return piece_data_; }
				653
				654	private:
				655	// Use ":" not ':'
				656	AlphaNum(char c); // NOLINT(runtime/explicit)
				657
				658	// Disallow copy and assign.
				659	AlphaNum(const AlphaNum&);
				660	void operator=(const AlphaNum&);
				661	};
				662
				663	} // namespace strings
				664
				665	using strings::AlphaNum;
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	666
				667	// ----------------------------------------------------------------------
				668	// StrCat()
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	669	// This merges the given strings or numbers, with no delimiter. This
				670	// is designed to be the fastest possible way to construct a string out
				671	// of a mix of raw C strings, strings, bool values,
				672	// and numeric values.
				673	//
				674	// Don't use this for user-visible strings. The localization process
				675	// works poorly on strings built up out of fragments.
				676	//
				677	// For clarity and performance, don't use StrCat when appending to a
				678	// string. In particular, avoid using any of these (anti-)patterns:
				679	// str.append(StrCat(...)
				680	// str += StrCat(...)
				681	// str = StrCat(str, ...)
				682	// where the last is the worse, with the potential to change a loop
				683	// from a linear time operation with O(1) dynamic allocations into a
				684	// quadratic time operation with O(n) dynamic allocations. StrAppend
				685	// is a better choice than any of the above, subject to the restriction
				686	// of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may
				687	// be a reference into str.
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	688	// ----------------------------------------------------------------------
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	689
Bo Yang	cf603a9	2015-05-24 22:28:04 -0700	[diff] [blame]	690	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b);
				691	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b,
				692	const AlphaNum& c);
				693	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b,
				694	const AlphaNum& c, const AlphaNum& d);
				695	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b,
				696	const AlphaNum& c, const AlphaNum& d,
				697	const AlphaNum& e);
				698	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b,
				699	const AlphaNum& c, const AlphaNum& d,
				700	const AlphaNum& e, const AlphaNum& f);
				701	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b,
				702	const AlphaNum& c, const AlphaNum& d,
				703	const AlphaNum& e, const AlphaNum& f,
				704	const AlphaNum& g);
				705	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b,
				706	const AlphaNum& c, const AlphaNum& d,
				707	const AlphaNum& e, const AlphaNum& f,
				708	const AlphaNum& g, const AlphaNum& h);
				709	LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b,
				710	const AlphaNum& c, const AlphaNum& d,
				711	const AlphaNum& e, const AlphaNum& f,
				712	const AlphaNum& g, const AlphaNum& h,
				713	const AlphaNum& i);
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	714
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	715	inline string StrCat(const AlphaNum& a) { return string(a.data(), a.size()); }
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	716
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	717	// ----------------------------------------------------------------------
				718	// StrAppend()
				719	// Same as above, but adds the output to the given string.
				720	// WARNING: For speed, StrAppend does not try to check each of its input
				721	// arguments to be sure that they are not a subset of the string being
				722	// appended to. That is, while this will work:
				723	//
				724	// string s = "foo";
				725	// s += s;
				726	//
				727	// This will not (necessarily) work:
				728	//
				729	// string s = "foo";
				730	// StrAppend(&s, s);
				731	//
				732	// Note: while StrCat supports appending up to 9 arguments, StrAppend
				733	// is currently limited to 4. That's rarely an issue except when
				734	// automatically transforming StrCat to StrAppend, and can easily be
				735	// worked around as consecutive calls to StrAppend are quite efficient.
				736	// ----------------------------------------------------------------------
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	737
Bo Yang	cf603a9	2015-05-24 22:28:04 -0700	[diff] [blame]	738	LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a);
				739	LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a,
				740	const AlphaNum& b);
				741	LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a,
				742	const AlphaNum& b, const AlphaNum& c);
				743	LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a,
				744	const AlphaNum& b, const AlphaNum& c,
				745	const AlphaNum& d);
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	746
				747	// ----------------------------------------------------------------------
				748	// Join()
				749	// These methods concatenate a range of components into a C++ string, using
				750	// the C-string "delim" as a separator between components.
				751	// ----------------------------------------------------------------------
				752	template <typename Iterator>
				753	void Join(Iterator start, Iterator end,
				754	const char* delim, string* result) {
				755	for (Iterator it = start; it != end; ++it) {
				756	if (it != start) {
				757	result->append(delim);
				758	}
Jisi Liu	885b612	2015-02-28 14:51:22 -0800	[diff] [blame]	759	StrAppend(result, *it);
jieluo@google.com	4de8f55	2014-07-18 00:47:59 +0000	[diff] [blame]	760	}
				761	}
				762
				763	template <typename Range>
				764	string Join(const Range& components,
				765	const char* delim) {
				766	string result;
				767	Join(components.begin(), components.end(), delim, &result);
				768	return result;
				769	}
				770
				771	// ----------------------------------------------------------------------
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	772	// ToHex()
				773	// Return a lower-case hex string representation of the given integer.
				774	// ----------------------------------------------------------------------
				775	LIBPROTOBUF_EXPORT string ToHex(uint64 num);
				776
				777	// ----------------------------------------------------------------------
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	778	// GlobalReplaceSubstring()
				779	// Replaces all instances of a substring in a string. Does nothing
				780	// if 'substring' is empty. Returns the number of replacements.
				781	//
				782	// NOTE: The string pieces must not overlap s.
				783	// ----------------------------------------------------------------------
				784	LIBPROTOBUF_EXPORT int GlobalReplaceSubstring(const string& substring,
				785	const string& replacement,
				786	string* s);
				787
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	788	// ----------------------------------------------------------------------
				789	// Base64Unescape()
				790	// Converts "src" which is encoded in Base64 to its binary equivalent and
				791	// writes it to "dest". If src contains invalid characters, dest is cleared
				792	// and the function returns false. Returns true on success.
				793	// ----------------------------------------------------------------------
				794	LIBPROTOBUF_EXPORT bool Base64Unescape(StringPiece src, string* dest);
				795
				796	// ----------------------------------------------------------------------
				797	// WebSafeBase64Unescape()
				798	// This is a variation of Base64Unescape which uses '-' instead of '+', and
				799	// '_' instead of '/'. src is not null terminated, instead specify len. I
				800	// recommend that slen<szdest, but we honor szdest anyway.
				801	// RETURNS the length of dest, or -1 if src contains invalid chars.
				802
				803	// The variation that stores into a string clears the string first, and
				804	// returns false (with dest empty) if src contains invalid chars; for
				805	// this version src and dest must be different strings.
				806	// ----------------------------------------------------------------------
				807	LIBPROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen,
				808	char* dest, int szdest);
				809	LIBPROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, string* dest);
				810
				811	// Return the length to use for the output buffer given to the base64 escape
				812	// routines. Make sure to use the same value for do_padding in both.
				813	// This function may return incorrect results if given input_len values that
				814	// are extremely high, which should happen rarely.
				815	LIBPROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len,
				816	bool do_padding);
				817	// Use this version when calling Base64Escape without a do_padding arg.
				818	LIBPROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len);
				819
				820	// ----------------------------------------------------------------------
				821	// Base64Escape()
				822	// WebSafeBase64Escape()
				823	// Encode "src" to "dest" using base64 encoding.
				824	// src is not null terminated, instead specify len.
				825	// 'dest' should have at least CalculateBase64EscapedLen() length.
				826	// RETURNS the length of dest.
				827	// The WebSafe variation use '-' instead of '+' and '_' instead of '/'
				828	// so that we can place the out in the URL or cookies without having
				829	// to escape them. It also has an extra parameter "do_padding",
				830	// which when set to false will prevent padding with "=".
				831	// ----------------------------------------------------------------------
				832	LIBPROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen,
				833	char* dest, int szdest);
				834	LIBPROTOBUF_EXPORT int WebSafeBase64Escape(
				835	const unsigned char* src, int slen, char* dest,
				836	int szdest, bool do_padding);
				837	// Encode src into dest with padding.
				838	LIBPROTOBUF_EXPORT void Base64Escape(StringPiece src, string* dest);
				839	// Encode src into dest web-safely without padding.
				840	LIBPROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, string* dest);
				841	// Encode src into dest web-safely with padding.
				842	LIBPROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src,
				843	string* dest);
				844
				845	LIBPROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc,
				846	string* dest, bool do_padding);
				847	LIBPROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc,
				848	string* dest, bool do_padding);
				849
				850	static const int UTFmax = 4;
				851	// ----------------------------------------------------------------------
				852	// EncodeAsUTF8Char()
				853	// Helper to append a Unicode code point to a string as UTF8, without bringing
				854	// in any external dependencies. The output buffer must be as least 4 bytes
				855	// large.
				856	// ----------------------------------------------------------------------
				857	LIBPROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output);
				858
				859	// ----------------------------------------------------------------------
				860	// UTF8FirstLetterNumBytes()
				861	// Length of the first UTF-8 character.
				862	// ----------------------------------------------------------------------
				863	LIBPROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len);
				864
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	865	} // namespace protobuf
				866	} // namespace google
				867
				868	#endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__