blob: 7955d261130d984662e7680dbb8b7af49f573714 [file] [log] [blame]
temporal40ee5512008-07-10 02:12:20 +00001// Protocol Buffers - Google's data interchange format
kenton@google.com24bf56f2008-09-24 20:31:01 +00002// Copyright 2008 Google Inc. All rights reserved.
Feng Xiaoe4288622014-10-01 16:26:23 -07003// https://developers.google.com/protocol-buffers/
temporal40ee5512008-07-10 02:12:20 +00004//
kenton@google.com24bf56f2008-09-24 20:31:01 +00005// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
temporal40ee5512008-07-10 02:12:20 +00008//
kenton@google.com24bf56f2008-09-24 20:31:01 +00009// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
temporal40ee5512008-07-10 02:12:20 +000018//
kenton@google.com24bf56f2008-09-24 20:31:01 +000019// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal40ee5512008-07-10 02:12:20 +000030
31// from google3/strings/strutil.cc
32
33#include <google/protobuf/stubs/strutil.h>
34#include <errno.h>
35#include <float.h> // FLT_DIG and DBL_DIG
36#include <limits>
37#include <limits.h>
kenton@google.com25bc5cd2008-12-04 20:34:50 +000038#include <stdio.h>
kenton@google.comc91e1fe2009-10-12 19:22:03 +000039#include <iterator>
temporal40ee5512008-07-10 02:12:20 +000040
41#ifdef _WIN32
42// MSVC has only _snprintf, not snprintf.
43//
44// MinGW has both snprintf and _snprintf, but they appear to be different
45// functions. The former is buggy. When invoked like so:
46// char buffer[32];
47// snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
48// it prints "1.23000e+10". This is plainly wrong: %g should never print
49// trailing zeros after the decimal point. For some reason this bug only
50// occurs with some input values, not all. In any case, _snprintf does the
51// right thing, so we use it.
52#define snprintf _snprintf
53#endif
54
55namespace google {
56namespace protobuf {
57
58inline bool IsNaN(double value) {
59 // NaN is never equal to anything, even itself.
60 return value != value;
61}
62
kenton@google.coma2a32c22008-11-14 17:29:32 +000063// These are defined as macros on some platforms. #undef them so that we can
64// redefine them.
65#undef isxdigit
66#undef isprint
67
temporal40ee5512008-07-10 02:12:20 +000068// The definitions of these in ctype.h change based on locale. Since our
69// string manipulation is all in relation to the protocol buffer and C++
70// languages, we always want to use the C locale. So, we re-define these
71// exactly as we want them.
kenton@google.coma2a32c22008-11-14 17:29:32 +000072inline bool isxdigit(char c) {
temporal40ee5512008-07-10 02:12:20 +000073 return ('0' <= c && c <= '9') ||
74 ('a' <= c && c <= 'f') ||
75 ('A' <= c && c <= 'F');
76}
77
kenton@google.coma2a32c22008-11-14 17:29:32 +000078inline bool isprint(char c) {
temporal40ee5512008-07-10 02:12:20 +000079 return c >= 0x20 && c <= 0x7E;
80}
81
82// ----------------------------------------------------------------------
83// StripString
84// Replaces any occurrence of the character 'remove' (or the characters
85// in 'remove') with the character 'replacewith'.
86// ----------------------------------------------------------------------
87void StripString(string* s, const char* remove, char replacewith) {
88 const char * str_start = s->c_str();
89 const char * str = str_start;
90 for (str = strpbrk(str, remove);
91 str != NULL;
92 str = strpbrk(str + 1, remove)) {
93 (*s)[str - str_start] = replacewith;
94 }
95}
96
Feng Xiao6ef984a2014-11-10 17:34:54 -080097void StripWhitespace(string* str) {
98 int str_length = str->length();
99
100 // Strip off leading whitespace.
101 int first = 0;
102 while (first < str_length && ascii_isspace(str->at(first))) {
103 ++first;
104 }
105 // If entire string is white space.
106 if (first == str_length) {
107 str->clear();
108 return;
109 }
110 if (first > 0) {
111 str->erase(0, first);
112 str_length -= first;
113 }
114
115 // Strip off trailing whitespace.
116 int last = str_length - 1;
117 while (last >= 0 && ascii_isspace(str->at(last))) {
118 --last;
119 }
120 if (last != (str_length - 1) && last >= 0) {
121 str->erase(last + 1, string::npos);
122 }
123}
124
temporal40ee5512008-07-10 02:12:20 +0000125// ----------------------------------------------------------------------
126// StringReplace()
127// Replace the "old" pattern with the "new" pattern in a string,
128// and append the result to "res". If replace_all is false,
129// it only replaces the first instance of "old."
130// ----------------------------------------------------------------------
131
132void StringReplace(const string& s, const string& oldsub,
133 const string& newsub, bool replace_all,
134 string* res) {
135 if (oldsub.empty()) {
136 res->append(s); // if empty, append the given string.
137 return;
138 }
139
140 string::size_type start_pos = 0;
141 string::size_type pos;
142 do {
143 pos = s.find(oldsub, start_pos);
144 if (pos == string::npos) {
145 break;
146 }
147 res->append(s, start_pos, pos - start_pos);
148 res->append(newsub);
149 start_pos = pos + oldsub.size(); // start searching again after the "old"
150 } while (replace_all);
151 res->append(s, start_pos, s.length() - start_pos);
152}
153
154// ----------------------------------------------------------------------
155// StringReplace()
156// Give me a string and two patterns "old" and "new", and I replace
157// the first instance of "old" in the string with "new", if it
158// exists. If "global" is true; call this repeatedly until it
159// fails. RETURN a new string, regardless of whether the replacement
160// happened or not.
161// ----------------------------------------------------------------------
162
163string StringReplace(const string& s, const string& oldsub,
164 const string& newsub, bool replace_all) {
165 string ret;
166 StringReplace(s, oldsub, newsub, replace_all, &ret);
167 return ret;
168}
169
170// ----------------------------------------------------------------------
171// SplitStringUsing()
172// Split a string using a character delimiter. Append the components
173// to 'result'.
174//
175// Note: For multi-character delimiters, this routine will split on *ANY* of
176// the characters in the string, not the entire string as a single delimiter.
177// ----------------------------------------------------------------------
178template <typename ITR>
179static inline
180void SplitStringToIteratorUsing(const string& full,
181 const char* delim,
182 ITR& result) {
183 // Optimize the common case where delim is a single character.
184 if (delim[0] != '\0' && delim[1] == '\0') {
185 char c = delim[0];
186 const char* p = full.data();
187 const char* end = p + full.size();
188 while (p != end) {
189 if (*p == c) {
190 ++p;
191 } else {
192 const char* start = p;
193 while (++p != end && *p != c);
194 *result++ = string(start, p - start);
195 }
196 }
197 return;
198 }
199
200 string::size_type begin_index, end_index;
201 begin_index = full.find_first_not_of(delim);
202 while (begin_index != string::npos) {
203 end_index = full.find_first_of(delim, begin_index);
204 if (end_index == string::npos) {
205 *result++ = full.substr(begin_index);
206 return;
207 }
208 *result++ = full.substr(begin_index, (end_index - begin_index));
209 begin_index = full.find_first_not_of(delim, end_index);
210 }
211}
212
213void SplitStringUsing(const string& full,
214 const char* delim,
215 vector<string>* result) {
216 back_insert_iterator< vector<string> > it(*result);
217 SplitStringToIteratorUsing(full, delim, it);
218}
219
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000220// Split a string using a character delimiter. Append the components
221// to 'result'. If there are consecutive delimiters, this function
222// will return corresponding empty strings. The string is split into
223// at most the specified number of pieces greedily. This means that the
224// last piece may possibly be split further. To split into as many pieces
225// as possible, specify 0 as the number of pieces.
226//
227// If "full" is the empty string, yields an empty string as the only value.
228//
229// If "pieces" is negative for some reason, it returns the whole string
230// ----------------------------------------------------------------------
231template <typename StringType, typename ITR>
232static inline
233void SplitStringToIteratorAllowEmpty(const StringType& full,
234 const char* delim,
235 int pieces,
236 ITR& result) {
237 string::size_type begin_index, end_index;
238 begin_index = 0;
239
240 for (int i = 0; (i < pieces-1) || (pieces == 0); i++) {
241 end_index = full.find_first_of(delim, begin_index);
242 if (end_index == string::npos) {
243 *result++ = full.substr(begin_index);
244 return;
245 }
246 *result++ = full.substr(begin_index, (end_index - begin_index));
247 begin_index = end_index + 1;
248 }
249 *result++ = full.substr(begin_index);
250}
251
252void SplitStringAllowEmpty(const string& full, const char* delim,
253 vector<string>* result) {
254 back_insert_iterator<vector<string> > it(*result);
255 SplitStringToIteratorAllowEmpty(full, delim, 0, it);
256}
257
temporal40ee5512008-07-10 02:12:20 +0000258// ----------------------------------------------------------------------
259// JoinStrings()
260// This merges a vector of string components with delim inserted
261// as separaters between components.
262//
263// ----------------------------------------------------------------------
264template <class ITERATOR>
265static void JoinStringsIterator(const ITERATOR& start,
266 const ITERATOR& end,
267 const char* delim,
268 string* result) {
269 GOOGLE_CHECK(result != NULL);
270 result->clear();
271 int delim_length = strlen(delim);
272
273 // Precompute resulting length so we can reserve() memory in one shot.
274 int length = 0;
275 for (ITERATOR iter = start; iter != end; ++iter) {
276 if (iter != start) {
277 length += delim_length;
278 }
279 length += iter->size();
280 }
281 result->reserve(length);
282
283 // Now combine everything.
284 for (ITERATOR iter = start; iter != end; ++iter) {
285 if (iter != start) {
286 result->append(delim, delim_length);
287 }
288 result->append(iter->data(), iter->size());
289 }
290}
291
292void JoinStrings(const vector<string>& components,
293 const char* delim,
294 string * result) {
295 JoinStringsIterator(components.begin(), components.end(), delim, result);
296}
297
298// ----------------------------------------------------------------------
299// UnescapeCEscapeSequences()
300// This does all the unescaping that C does: \ooo, \r, \n, etc
301// Returns length of resulting string.
302// The implementation of \x parses any positive number of hex digits,
303// but it is an error if the value requires more than 8 bits, and the
304// result is truncated to 8 bits.
305//
306// The second call stores its errors in a supplied string vector.
307// If the string vector pointer is NULL, it reports the errors with LOG().
308// ----------------------------------------------------------------------
309
310#define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
311
312inline int hex_digit_to_int(char c) {
313 /* Assume ASCII. */
314 assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);
315 assert(isxdigit(c));
316 int x = static_cast<unsigned char>(c);
317 if (x > '9') {
318 x += 9;
319 }
320 return x & 0xf;
321}
322
323// Protocol buffers doesn't ever care about errors, but I don't want to remove
324// the code.
325#define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
326
327int UnescapeCEscapeSequences(const char* source, char* dest) {
328 return UnescapeCEscapeSequences(source, dest, NULL);
329}
330
331int UnescapeCEscapeSequences(const char* source, char* dest,
332 vector<string> *errors) {
333 GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";
334
335 char* d = dest;
336 const char* p = source;
337
338 // Small optimization for case where source = dest and there's no escaping
339 while ( p == d && *p != '\0' && *p != '\\' )
340 p++, d++;
341
342 while (*p != '\0') {
343 if (*p != '\\') {
344 *d++ = *p++;
345 } else {
346 switch ( *++p ) { // skip past the '\\'
347 case '\0':
348 LOG_STRING(ERROR, errors) << "String cannot end with \\";
349 *d = '\0';
350 return d - dest; // we're done with p
351 case 'a': *d++ = '\a'; break;
352 case 'b': *d++ = '\b'; break;
353 case 'f': *d++ = '\f'; break;
354 case 'n': *d++ = '\n'; break;
355 case 'r': *d++ = '\r'; break;
356 case 't': *d++ = '\t'; break;
357 case 'v': *d++ = '\v'; break;
358 case '\\': *d++ = '\\'; break;
359 case '?': *d++ = '\?'; break; // \? Who knew?
360 case '\'': *d++ = '\''; break;
361 case '"': *d++ = '\"'; break;
362 case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits
363 case '4': case '5': case '6': case '7': {
364 char ch = *p - '0';
365 if ( IS_OCTAL_DIGIT(p[1]) )
366 ch = ch * 8 + *++p - '0';
367 if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice
368 ch = ch * 8 + *++p - '0'; // now points at last digit
369 *d++ = ch;
370 break;
371 }
372 case 'x': case 'X': {
373 if (!isxdigit(p[1])) {
374 if (p[1] == '\0') {
375 LOG_STRING(ERROR, errors) << "String cannot end with \\x";
376 } else {
377 LOG_STRING(ERROR, errors) <<
378 "\\x cannot be followed by non-hex digit: \\" << *p << p[1];
379 }
380 break;
381 }
382 unsigned int ch = 0;
383 const char *hex_start = p;
384 while (isxdigit(p[1])) // arbitrarily many hex digits
385 ch = (ch << 4) + hex_digit_to_int(*++p);
386 if (ch > 0xFF)
387 LOG_STRING(ERROR, errors) << "Value of " <<
388 "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
389 *d++ = ch;
390 break;
391 }
392#if 0 // TODO(kenton): Support \u and \U? Requires runetochar().
393 case 'u': {
394 // \uhhhh => convert 4 hex digits to UTF-8
395 char32 rune = 0;
396 const char *hex_start = p;
397 for (int i = 0; i < 4; ++i) {
398 if (isxdigit(p[1])) { // Look one char ahead.
399 rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
400 } else {
401 LOG_STRING(ERROR, errors)
402 << "\\u must be followed by 4 hex digits: \\"
403 << string(hex_start, p+1-hex_start);
404 break;
405 }
406 }
407 d += runetochar(d, &rune);
408 break;
409 }
410 case 'U': {
411 // \Uhhhhhhhh => convert 8 hex digits to UTF-8
412 char32 rune = 0;
413 const char *hex_start = p;
414 for (int i = 0; i < 8; ++i) {
415 if (isxdigit(p[1])) { // Look one char ahead.
416 // Don't change rune until we're sure this
417 // is within the Unicode limit, but do advance p.
418 char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
419 if (newrune > 0x10FFFF) {
420 LOG_STRING(ERROR, errors)
421 << "Value of \\"
422 << string(hex_start, p + 1 - hex_start)
423 << " exceeds Unicode limit (0x10FFFF)";
424 break;
425 } else {
426 rune = newrune;
427 }
428 } else {
429 LOG_STRING(ERROR, errors)
430 << "\\U must be followed by 8 hex digits: \\"
431 << string(hex_start, p+1-hex_start);
432 break;
433 }
434 }
435 d += runetochar(d, &rune);
436 break;
437 }
438#endif
439 default:
440 LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
441 }
442 p++; // read past letter we escaped
443 }
444 }
445 *d = '\0';
446 return d - dest;
447}
448
449// ----------------------------------------------------------------------
450// UnescapeCEscapeString()
451// This does the same thing as UnescapeCEscapeSequences, but creates
452// a new string. The caller does not need to worry about allocating
453// a dest buffer. This should be used for non performance critical
454// tasks such as printing debug messages. It is safe for src and dest
455// to be the same.
456//
457// The second call stores its errors in a supplied string vector.
458// If the string vector pointer is NULL, it reports the errors with LOG().
459//
460// In the first and second calls, the length of dest is returned. In the
461// the third call, the new string is returned.
462// ----------------------------------------------------------------------
463int UnescapeCEscapeString(const string& src, string* dest) {
464 return UnescapeCEscapeString(src, dest, NULL);
465}
466
467int UnescapeCEscapeString(const string& src, string* dest,
468 vector<string> *errors) {
469 scoped_array<char> unescaped(new char[src.size() + 1]);
470 int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors);
471 GOOGLE_CHECK(dest);
472 dest->assign(unescaped.get(), len);
473 return len;
474}
475
476string UnescapeCEscapeString(const string& src) {
477 scoped_array<char> unescaped(new char[src.size() + 1]);
478 int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL);
479 return string(unescaped.get(), len);
480}
481
482// ----------------------------------------------------------------------
483// CEscapeString()
484// CHexEscapeString()
485// Copies 'src' to 'dest', escaping dangerous characters using
486// C-style escape sequences. This is very useful for preparing query
487// flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
488// hexadecimal rather than octal sequences.
489// Returns the number of bytes written to 'dest' (not including the \0)
490// or -1 if there was insufficient space.
491//
492// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
493// ----------------------------------------------------------------------
kenton@google.comfccb1462009-12-18 02:11:36 +0000494int CEscapeInternal(const char* src, int src_len, char* dest,
495 int dest_len, bool use_hex, bool utf8_safe) {
temporal40ee5512008-07-10 02:12:20 +0000496 const char* src_end = src + src_len;
497 int used = 0;
498 bool last_hex_escape = false; // true if last output char was \xNN
499
500 for (; src < src_end; src++) {
501 if (dest_len - used < 2) // Need space for two letter escape
502 return -1;
503
504 bool is_hex_escape = false;
505 switch (*src) {
506 case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
507 case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
508 case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
509 case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
510 case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
511 case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
512 default:
513 // Note that if we emit \xNN and the src character after that is a hex
514 // digit then that digit must be escaped too to prevent it being
515 // interpreted as part of the character code by C.
kenton@google.comfccb1462009-12-18 02:11:36 +0000516 if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) &&
517 (!isprint(*src) ||
518 (last_hex_escape && isxdigit(*src)))) {
temporal40ee5512008-07-10 02:12:20 +0000519 if (dest_len - used < 4) // need space for 4 letter escape
520 return -1;
521 sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"),
522 static_cast<uint8>(*src));
523 is_hex_escape = use_hex;
524 used += 4;
525 } else {
526 dest[used++] = *src; break;
527 }
528 }
529 last_hex_escape = is_hex_escape;
530 }
531
532 if (dest_len - used < 1) // make sure that there is room for \0
533 return -1;
534
535 dest[used] = '\0'; // doesn't count towards return value though
536 return used;
537}
538
539int CEscapeString(const char* src, int src_len, char* dest, int dest_len) {
kenton@google.comfccb1462009-12-18 02:11:36 +0000540 return CEscapeInternal(src, src_len, dest, dest_len, false, false);
temporal40ee5512008-07-10 02:12:20 +0000541}
542
543// ----------------------------------------------------------------------
544// CEscape()
545// CHexEscape()
546// Copies 'src' to result, escaping dangerous characters using
547// C-style escape sequences. This is very useful for preparing query
548// flags. 'src' and 'dest' should not overlap. The 'Hex' version
549// hexadecimal rather than octal sequences.
550//
551// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
552// ----------------------------------------------------------------------
553string CEscape(const string& src) {
554 const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
555 scoped_array<char> dest(new char[dest_length]);
556 const int len = CEscapeInternal(src.data(), src.size(),
kenton@google.comfccb1462009-12-18 02:11:36 +0000557 dest.get(), dest_length, false, false);
temporal40ee5512008-07-10 02:12:20 +0000558 GOOGLE_DCHECK_GE(len, 0);
559 return string(dest.get(), len);
560}
561
kenton@google.comfccb1462009-12-18 02:11:36 +0000562namespace strings {
563
564string Utf8SafeCEscape(const string& src) {
565 const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
566 scoped_array<char> dest(new char[dest_length]);
567 const int len = CEscapeInternal(src.data(), src.size(),
568 dest.get(), dest_length, false, true);
569 GOOGLE_DCHECK_GE(len, 0);
570 return string(dest.get(), len);
571}
572
573string CHexEscape(const string& src) {
574 const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
575 scoped_array<char> dest(new char[dest_length]);
576 const int len = CEscapeInternal(src.data(), src.size(),
577 dest.get(), dest_length, true, false);
578 GOOGLE_DCHECK_GE(len, 0);
579 return string(dest.get(), len);
580}
581
582} // namespace strings
583
temporal40ee5512008-07-10 02:12:20 +0000584// ----------------------------------------------------------------------
585// strto32_adaptor()
586// strtou32_adaptor()
587// Implementation of strto[u]l replacements that have identical
588// overflow and underflow characteristics for both ILP-32 and LP-64
589// platforms, including errno preservation in error-free calls.
590// ----------------------------------------------------------------------
591
592int32 strto32_adaptor(const char *nptr, char **endptr, int base) {
593 const int saved_errno = errno;
594 errno = 0;
595 const long result = strtol(nptr, endptr, base);
596 if (errno == ERANGE && result == LONG_MIN) {
597 return kint32min;
598 } else if (errno == ERANGE && result == LONG_MAX) {
599 return kint32max;
600 } else if (errno == 0 && result < kint32min) {
601 errno = ERANGE;
602 return kint32min;
603 } else if (errno == 0 && result > kint32max) {
604 errno = ERANGE;
605 return kint32max;
606 }
607 if (errno == 0)
608 errno = saved_errno;
609 return static_cast<int32>(result);
610}
611
612uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) {
613 const int saved_errno = errno;
614 errno = 0;
615 const unsigned long result = strtoul(nptr, endptr, base);
616 if (errno == ERANGE && result == ULONG_MAX) {
617 return kuint32max;
618 } else if (errno == 0 && result > kuint32max) {
619 errno = ERANGE;
620 return kuint32max;
621 }
622 if (errno == 0)
623 errno = saved_errno;
624 return static_cast<uint32>(result);
625}
626
jieluo@google.com4de8f552014-07-18 00:47:59 +0000627inline bool safe_parse_sign(string* text /*inout*/,
628 bool* negative_ptr /*output*/) {
629 const char* start = text->data();
630 const char* end = start + text->size();
631
632 // Consume whitespace.
633 while (start < end && (start[0] == ' ')) {
634 ++start;
635 }
636 while (start < end && (end[-1] == ' ')) {
637 --end;
638 }
639 if (start >= end) {
640 return false;
641 }
642
643 // Consume sign.
644 *negative_ptr = (start[0] == '-');
645 if (*negative_ptr || start[0] == '+') {
646 ++start;
647 if (start >= end) {
648 return false;
649 }
650 }
651 *text = text->substr(start - text->data(), end - start);
652 return true;
653}
654
655inline bool safe_parse_positive_int(
656 string text, int32* value_p) {
657 int base = 10;
658 int32 value = 0;
659 const int32 vmax = std::numeric_limits<int32>::max();
660 assert(vmax > 0);
661 assert(vmax >= base);
662 const int32 vmax_over_base = vmax / base;
663 const char* start = text.data();
664 const char* end = start + text.size();
665 // loop over digits
666 for (; start < end; ++start) {
667 unsigned char c = static_cast<unsigned char>(start[0]);
668 int digit = c - '0';
669 if (digit >= base || digit < 0) {
670 *value_p = value;
671 return false;
672 }
673 if (value > vmax_over_base) {
674 *value_p = vmax;
675 return false;
676 }
677 value *= base;
678 if (value > vmax - digit) {
679 *value_p = vmax;
680 return false;
681 }
682 value += digit;
683 }
684 *value_p = value;
685 return true;
686}
687
688inline bool safe_parse_negative_int(
689 string text, int32* value_p) {
690 int base = 10;
691 int32 value = 0;
692 const int32 vmin = std::numeric_limits<int32>::min();
693 assert(vmin < 0);
694 assert(vmin <= 0 - base);
695 int32 vmin_over_base = vmin / base;
696 // 2003 c++ standard [expr.mul]
697 // "... the sign of the remainder is implementation-defined."
698 // Although (vmin/base)*base + vmin%base is always vmin.
699 // 2011 c++ standard tightens the spec but we cannot rely on it.
700 if (vmin % base > 0) {
701 vmin_over_base += 1;
702 }
703 const char* start = text.data();
704 const char* end = start + text.size();
705 // loop over digits
706 for (; start < end; ++start) {
707 unsigned char c = static_cast<unsigned char>(start[0]);
708 int digit = c - '0';
709 if (digit >= base || digit < 0) {
710 *value_p = value;
711 return false;
712 }
713 if (value < vmin_over_base) {
714 *value_p = vmin;
715 return false;
716 }
717 value *= base;
718 if (value < vmin + digit) {
719 *value_p = vmin;
720 return false;
721 }
722 value -= digit;
723 }
724 *value_p = value;
725 return true;
726}
727
728bool safe_int(string text, int32* value_p) {
729 *value_p = 0;
730 bool negative;
731 if (!safe_parse_sign(&text, &negative)) {
732 return false;
733 }
734 if (!negative) {
735 return safe_parse_positive_int(text, value_p);
736 } else {
737 return safe_parse_negative_int(text, value_p);
738 }
739}
740
temporal40ee5512008-07-10 02:12:20 +0000741// ----------------------------------------------------------------------
742// FastIntToBuffer()
743// FastInt64ToBuffer()
744// FastHexToBuffer()
745// FastHex64ToBuffer()
746// FastHex32ToBuffer()
747// ----------------------------------------------------------------------
748
749// Offset into buffer where FastInt64ToBuffer places the end of string
750// null character. Also used by FastInt64ToBufferLeft.
751static const int kFastInt64ToBufferOffset = 21;
752
753char *FastInt64ToBuffer(int64 i, char* buffer) {
754 // We could collapse the positive and negative sections, but that
755 // would be slightly slower for positive numbers...
756 // 22 bytes is enough to store -2**64, -18446744073709551616.
757 char* p = buffer + kFastInt64ToBufferOffset;
758 *p-- = '\0';
759 if (i >= 0) {
760 do {
761 *p-- = '0' + i % 10;
762 i /= 10;
763 } while (i > 0);
764 return p + 1;
765 } else {
766 // On different platforms, % and / have different behaviors for
767 // negative numbers, so we need to jump through hoops to make sure
768 // we don't divide negative numbers.
769 if (i > -10) {
770 i = -i;
771 *p-- = '0' + i;
772 *p = '-';
773 return p;
774 } else {
775 // Make sure we aren't at MIN_INT, in which case we can't say i = -i
776 i = i + 10;
777 i = -i;
778 *p-- = '0' + i % 10;
779 // Undo what we did a moment ago
780 i = i / 10 + 1;
781 do {
782 *p-- = '0' + i % 10;
783 i /= 10;
784 } while (i > 0);
785 *p = '-';
786 return p;
787 }
788 }
789}
790
791// Offset into buffer where FastInt32ToBuffer places the end of string
792// null character. Also used by FastInt32ToBufferLeft
793static const int kFastInt32ToBufferOffset = 11;
794
795// Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the
796// compiler to generate 32 bit arithmetic instructions. It's much faster, at
797// least with 32 bit binaries.
798char *FastInt32ToBuffer(int32 i, char* buffer) {
799 // We could collapse the positive and negative sections, but that
800 // would be slightly slower for positive numbers...
801 // 12 bytes is enough to store -2**32, -4294967296.
802 char* p = buffer + kFastInt32ToBufferOffset;
803 *p-- = '\0';
804 if (i >= 0) {
805 do {
806 *p-- = '0' + i % 10;
807 i /= 10;
808 } while (i > 0);
809 return p + 1;
810 } else {
811 // On different platforms, % and / have different behaviors for
812 // negative numbers, so we need to jump through hoops to make sure
813 // we don't divide negative numbers.
814 if (i > -10) {
815 i = -i;
816 *p-- = '0' + i;
817 *p = '-';
818 return p;
819 } else {
820 // Make sure we aren't at MIN_INT, in which case we can't say i = -i
821 i = i + 10;
822 i = -i;
823 *p-- = '0' + i % 10;
824 // Undo what we did a moment ago
825 i = i / 10 + 1;
826 do {
827 *p-- = '0' + i % 10;
828 i /= 10;
829 } while (i > 0);
830 *p = '-';
831 return p;
832 }
833 }
834}
835
836char *FastHexToBuffer(int i, char* buffer) {
837 GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
838
839 static const char *hexdigits = "0123456789abcdef";
840 char *p = buffer + 21;
841 *p-- = '\0';
842 do {
843 *p-- = hexdigits[i & 15]; // mod by 16
844 i >>= 4; // divide by 16
845 } while (i > 0);
846 return p + 1;
847}
848
849char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
850 static const char *hexdigits = "0123456789abcdef";
851 buffer[num_byte] = '\0';
852 for (int i = num_byte - 1; i >= 0; i--) {
liujisi@google.comcb6dd4e2011-07-05 21:05:40 +0000853#ifdef _M_X64
854 // MSVC x64 platform has a bug optimizing the uint32(value) in the #else
855 // block. Given that the uint32 cast was to improve performance on 32-bit
856 // platforms, we use 64-bit '&' directly.
857 buffer[i] = hexdigits[value & 0xf];
858#else
temporal40ee5512008-07-10 02:12:20 +0000859 buffer[i] = hexdigits[uint32(value) & 0xf];
liujisi@google.comcb6dd4e2011-07-05 21:05:40 +0000860#endif
temporal40ee5512008-07-10 02:12:20 +0000861 value >>= 4;
862 }
863 return buffer;
864}
865
866char *FastHex64ToBuffer(uint64 value, char* buffer) {
867 return InternalFastHexToBuffer(value, buffer, 16);
868}
869
870char *FastHex32ToBuffer(uint32 value, char* buffer) {
871 return InternalFastHexToBuffer(value, buffer, 8);
872}
873
874static inline char* PlaceNum(char* p, int num, char prev_sep) {
875 *p-- = '0' + num % 10;
876 *p-- = '0' + num / 10;
877 *p-- = prev_sep;
878 return p;
879}
880
881// ----------------------------------------------------------------------
882// FastInt32ToBufferLeft()
883// FastUInt32ToBufferLeft()
884// FastInt64ToBufferLeft()
885// FastUInt64ToBufferLeft()
886//
887// Like the Fast*ToBuffer() functions above, these are intended for speed.
888// Unlike the Fast*ToBuffer() functions, however, these functions write
889// their output to the beginning of the buffer (hence the name, as the
890// output is left-aligned). The caller is responsible for ensuring that
891// the buffer has enough space to hold the output.
892//
893// Returns a pointer to the end of the string (i.e. the null character
894// terminating the string).
895// ----------------------------------------------------------------------
896
897static const char two_ASCII_digits[100][2] = {
898 {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
899 {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
900 {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
901 {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
902 {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
903 {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
904 {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
905 {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
906 {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
907 {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
908 {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
909 {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
910 {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
911 {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
912 {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
913 {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
914 {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
915 {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
916 {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
917 {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
918};
919
920char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
921 int digits;
922 const char *ASCII_digits = NULL;
923 // The idea of this implementation is to trim the number of divides to as few
924 // as possible by using multiplication and subtraction rather than mod (%),
925 // and by outputting two digits at a time rather than one.
926 // The huge-number case is first, in the hopes that the compiler will output
927 // that case in one branch-free block of code, and only output conditional
928 // branches into it from below.
929 if (u >= 1000000000) { // >= 1,000,000,000
930 digits = u / 100000000; // 100,000,000
931 ASCII_digits = two_ASCII_digits[digits];
932 buffer[0] = ASCII_digits[0];
933 buffer[1] = ASCII_digits[1];
934 buffer += 2;
935sublt100_000_000:
936 u -= digits * 100000000; // 100,000,000
937lt100_000_000:
938 digits = u / 1000000; // 1,000,000
939 ASCII_digits = two_ASCII_digits[digits];
940 buffer[0] = ASCII_digits[0];
941 buffer[1] = ASCII_digits[1];
942 buffer += 2;
943sublt1_000_000:
944 u -= digits * 1000000; // 1,000,000
945lt1_000_000:
946 digits = u / 10000; // 10,000
947 ASCII_digits = two_ASCII_digits[digits];
948 buffer[0] = ASCII_digits[0];
949 buffer[1] = ASCII_digits[1];
950 buffer += 2;
951sublt10_000:
952 u -= digits * 10000; // 10,000
953lt10_000:
954 digits = u / 100;
955 ASCII_digits = two_ASCII_digits[digits];
956 buffer[0] = ASCII_digits[0];
957 buffer[1] = ASCII_digits[1];
958 buffer += 2;
959sublt100:
960 u -= digits * 100;
961lt100:
962 digits = u;
963 ASCII_digits = two_ASCII_digits[digits];
964 buffer[0] = ASCII_digits[0];
965 buffer[1] = ASCII_digits[1];
966 buffer += 2;
967done:
968 *buffer = 0;
969 return buffer;
970 }
971
972 if (u < 100) {
973 digits = u;
974 if (u >= 10) goto lt100;
975 *buffer++ = '0' + digits;
976 goto done;
977 }
978 if (u < 10000) { // 10,000
979 if (u >= 1000) goto lt10_000;
980 digits = u / 100;
981 *buffer++ = '0' + digits;
982 goto sublt100;
983 }
984 if (u < 1000000) { // 1,000,000
985 if (u >= 100000) goto lt1_000_000;
986 digits = u / 10000; // 10,000
987 *buffer++ = '0' + digits;
988 goto sublt10_000;
989 }
990 if (u < 100000000) { // 100,000,000
991 if (u >= 10000000) goto lt100_000_000;
992 digits = u / 1000000; // 1,000,000
993 *buffer++ = '0' + digits;
994 goto sublt1_000_000;
995 }
996 // we already know that u < 1,000,000,000
997 digits = u / 100000000; // 100,000,000
998 *buffer++ = '0' + digits;
999 goto sublt100_000_000;
1000}
1001
1002char* FastInt32ToBufferLeft(int32 i, char* buffer) {
1003 uint32 u = i;
1004 if (i < 0) {
1005 *buffer++ = '-';
1006 u = -i;
1007 }
1008 return FastUInt32ToBufferLeft(u, buffer);
1009}
1010
1011char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
1012 int digits;
1013 const char *ASCII_digits = NULL;
1014
1015 uint32 u = static_cast<uint32>(u64);
1016 if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
1017
1018 uint64 top_11_digits = u64 / 1000000000;
1019 buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
1020 u = u64 - (top_11_digits * 1000000000);
1021
1022 digits = u / 10000000; // 10,000,000
1023 GOOGLE_DCHECK_LT(digits, 100);
1024 ASCII_digits = two_ASCII_digits[digits];
1025 buffer[0] = ASCII_digits[0];
1026 buffer[1] = ASCII_digits[1];
1027 buffer += 2;
1028 u -= digits * 10000000; // 10,000,000
1029 digits = u / 100000; // 100,000
1030 ASCII_digits = two_ASCII_digits[digits];
1031 buffer[0] = ASCII_digits[0];
1032 buffer[1] = ASCII_digits[1];
1033 buffer += 2;
1034 u -= digits * 100000; // 100,000
1035 digits = u / 1000; // 1,000
1036 ASCII_digits = two_ASCII_digits[digits];
1037 buffer[0] = ASCII_digits[0];
1038 buffer[1] = ASCII_digits[1];
1039 buffer += 2;
1040 u -= digits * 1000; // 1,000
1041 digits = u / 10;
1042 ASCII_digits = two_ASCII_digits[digits];
1043 buffer[0] = ASCII_digits[0];
1044 buffer[1] = ASCII_digits[1];
1045 buffer += 2;
1046 u -= digits * 10;
1047 digits = u;
1048 *buffer++ = '0' + digits;
1049 *buffer = 0;
1050 return buffer;
1051}
1052
1053char* FastInt64ToBufferLeft(int64 i, char* buffer) {
1054 uint64 u = i;
1055 if (i < 0) {
1056 *buffer++ = '-';
1057 u = -i;
1058 }
1059 return FastUInt64ToBufferLeft(u, buffer);
1060}
1061
1062// ----------------------------------------------------------------------
1063// SimpleItoa()
1064// Description: converts an integer to a string.
1065//
1066// Return value: string
1067// ----------------------------------------------------------------------
1068
1069string SimpleItoa(int i) {
1070 char buffer[kFastToBufferSize];
1071 return (sizeof(i) == 4) ?
1072 FastInt32ToBuffer(i, buffer) :
1073 FastInt64ToBuffer(i, buffer);
1074}
1075
1076string SimpleItoa(unsigned int i) {
1077 char buffer[kFastToBufferSize];
1078 return string(buffer, (sizeof(i) == 4) ?
1079 FastUInt32ToBufferLeft(i, buffer) :
1080 FastUInt64ToBufferLeft(i, buffer));
1081}
1082
1083string SimpleItoa(long i) {
1084 char buffer[kFastToBufferSize];
1085 return (sizeof(i) == 4) ?
1086 FastInt32ToBuffer(i, buffer) :
1087 FastInt64ToBuffer(i, buffer);
1088}
1089
1090string SimpleItoa(unsigned long i) {
1091 char buffer[kFastToBufferSize];
1092 return string(buffer, (sizeof(i) == 4) ?
1093 FastUInt32ToBufferLeft(i, buffer) :
1094 FastUInt64ToBufferLeft(i, buffer));
1095}
1096
1097string SimpleItoa(long long i) {
1098 char buffer[kFastToBufferSize];
1099 return (sizeof(i) == 4) ?
1100 FastInt32ToBuffer(i, buffer) :
1101 FastInt64ToBuffer(i, buffer);
1102}
1103
1104string SimpleItoa(unsigned long long i) {
1105 char buffer[kFastToBufferSize];
1106 return string(buffer, (sizeof(i) == 4) ?
1107 FastUInt32ToBufferLeft(i, buffer) :
1108 FastUInt64ToBufferLeft(i, buffer));
1109}
1110
1111// ----------------------------------------------------------------------
1112// SimpleDtoa()
1113// SimpleFtoa()
1114// DoubleToBuffer()
1115// FloatToBuffer()
1116// We want to print the value without losing precision, but we also do
1117// not want to print more digits than necessary. This turns out to be
1118// trickier than it sounds. Numbers like 0.2 cannot be represented
1119// exactly in binary. If we print 0.2 with a very large precision,
1120// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1121// On the other hand, if we set the precision too low, we lose
1122// significant digits when printing numbers that actually need them.
1123// It turns out there is no precision value that does the right thing
1124// for all numbers.
1125//
1126// Our strategy is to first try printing with a precision that is never
1127// over-precise, then parse the result with strtod() to see if it
1128// matches. If not, we print again with a precision that will always
1129// give a precise result, but may use more digits than necessary.
1130//
1131// An arguably better strategy would be to use the algorithm described
1132// in "How to Print Floating-Point Numbers Accurately" by Steele &
1133// White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
1134// however, that the following implementation is about as fast as
1135// DMG's code. Furthermore, DMG's code locks mutexes, which means it
1136// will not scale well on multi-core machines. DMG's code is slightly
1137// more accurate (in that it will never use more digits than
1138// necessary), but this is probably irrelevant for most users.
1139//
1140// Rob Pike and Ken Thompson also have an implementation of dtoa() in
1141// third_party/fmt/fltfmt.cc. Their implementation is similar to this
1142// one in that it makes guesses and then uses strtod() to check them.
1143// Their implementation is faster because they use their own code to
1144// generate the digits in the first place rather than use snprintf(),
1145// thus avoiding format string parsing overhead. However, this makes
1146// it considerably more complicated than the following implementation,
1147// and it is embedded in a larger library. If speed turns out to be
1148// an issue, we could re-implement this in terms of their
1149// implementation.
1150// ----------------------------------------------------------------------
1151
1152string SimpleDtoa(double value) {
1153 char buffer[kDoubleToBufferSize];
1154 return DoubleToBuffer(value, buffer);
1155}
1156
1157string SimpleFtoa(float value) {
1158 char buffer[kFloatToBufferSize];
1159 return FloatToBuffer(value, buffer);
1160}
1161
1162static inline bool IsValidFloatChar(char c) {
1163 return ('0' <= c && c <= '9') ||
1164 c == 'e' || c == 'E' ||
1165 c == '+' || c == '-';
1166}
1167
1168void DelocalizeRadix(char* buffer) {
1169 // Fast check: if the buffer has a normal decimal point, assume no
1170 // translation is needed.
1171 if (strchr(buffer, '.') != NULL) return;
1172
1173 // Find the first unknown character.
1174 while (IsValidFloatChar(*buffer)) ++buffer;
1175
1176 if (*buffer == '\0') {
1177 // No radix character found.
1178 return;
1179 }
1180
1181 // We are now pointing at the locale-specific radix character. Replace it
1182 // with '.'.
1183 *buffer = '.';
1184 ++buffer;
1185
1186 if (!IsValidFloatChar(*buffer) && *buffer != '\0') {
1187 // It appears the radix was a multi-byte character. We need to remove the
1188 // extra bytes.
1189 char* target = buffer;
1190 do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0');
1191 memmove(target, buffer, strlen(buffer) + 1);
1192 }
1193}
1194
1195char* DoubleToBuffer(double value, char* buffer) {
1196 // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1197 // platforms these days. Just in case some system exists where DBL_DIG
1198 // is significantly larger -- and risks overflowing our buffer -- we have
1199 // this assert.
1200 GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1201
1202 if (value == numeric_limits<double>::infinity()) {
1203 strcpy(buffer, "inf");
1204 return buffer;
1205 } else if (value == -numeric_limits<double>::infinity()) {
1206 strcpy(buffer, "-inf");
1207 return buffer;
1208 } else if (IsNaN(value)) {
1209 strcpy(buffer, "nan");
1210 return buffer;
1211 }
1212
1213 int snprintf_result =
1214 snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1215
1216 // The snprintf should never overflow because the buffer is significantly
1217 // larger than the precision we asked for.
1218 GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1219
1220 // We need to make parsed_value volatile in order to force the compiler to
1221 // write it out to the stack. Otherwise, it may keep the value in a
1222 // register, and if it does that, it may keep it as a long double instead
1223 // of a double. This long double may have extra bits that make it compare
1224 // unequal to "value" even though it would be exactly equal if it were
1225 // truncated to a double.
1226 volatile double parsed_value = strtod(buffer, NULL);
1227 if (parsed_value != value) {
1228 int snprintf_result =
1229 snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
1230
1231 // Should never overflow; see above.
1232 GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1233 }
1234
1235 DelocalizeRadix(buffer);
1236 return buffer;
1237}
1238
1239bool safe_strtof(const char* str, float* value) {
1240 char* endptr;
1241 errno = 0; // errno only gets set on errors
kenton@google.com3aa7a0d2009-08-17 20:34:29 +00001242#if defined(_WIN32) || defined (__hpux) // has no strtof()
temporal40ee5512008-07-10 02:12:20 +00001243 *value = strtod(str, &endptr);
1244#else
1245 *value = strtof(str, &endptr);
1246#endif
1247 return *str != 0 && *endptr == 0 && errno == 0;
1248}
1249
1250char* FloatToBuffer(float value, char* buffer) {
1251 // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1252 // platforms these days. Just in case some system exists where FLT_DIG
1253 // is significantly larger -- and risks overflowing our buffer -- we have
1254 // this assert.
1255 GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1256
1257 if (value == numeric_limits<double>::infinity()) {
1258 strcpy(buffer, "inf");
1259 return buffer;
1260 } else if (value == -numeric_limits<double>::infinity()) {
1261 strcpy(buffer, "-inf");
1262 return buffer;
1263 } else if (IsNaN(value)) {
1264 strcpy(buffer, "nan");
1265 return buffer;
1266 }
1267
1268 int snprintf_result =
1269 snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1270
1271 // The snprintf should never overflow because the buffer is significantly
1272 // larger than the precision we asked for.
1273 GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1274
1275 float parsed_value;
1276 if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1277 int snprintf_result =
1278 snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value);
1279
1280 // Should never overflow; see above.
1281 GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1282 }
1283
1284 DelocalizeRadix(buffer);
1285 return buffer;
1286}
1287
jieluo@google.com4de8f552014-07-18 00:47:59 +00001288string ToHex(uint64 num) {
1289 if (num == 0) {
1290 return string("0");
temporal40ee5512008-07-10 02:12:20 +00001291 }
1292
jieluo@google.com4de8f552014-07-18 00:47:59 +00001293 // Compute hex bytes in reverse order, writing to the back of the
1294 // buffer.
1295 char buf[16]; // No more than 16 hex digits needed.
1296 char* bufptr = buf + 16;
1297 static const char kHexChars[] = "0123456789abcdef";
1298 while (num != 0) {
1299 *--bufptr = kHexChars[num & 0xf];
1300 num >>= 4;
1301 }
1302
1303 return string(bufptr, buf + 16 - bufptr);
temporal40ee5512008-07-10 02:12:20 +00001304}
1305
Jisi Liu885b6122015-02-28 14:51:22 -08001306namespace strings {
1307
1308AlphaNum::AlphaNum(strings::Hex hex) {
1309 char *const end = &digits[kFastToBufferSize];
1310 char *writer = end;
1311 uint64 value = hex.value;
1312 uint64 width = hex.spec;
1313 // We accomplish minimum width by OR'ing in 0x10000 to the user's value,
1314 // where 0x10000 is the smallest hex number that is as wide as the user
1315 // asked for.
1316 uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value;
1317 static const char hexdigits[] = "0123456789abcdef";
1318 do {
1319 *--writer = hexdigits[value & 0xF];
1320 value >>= 4;
1321 mask >>= 4;
1322 } while (mask != 0);
1323 piece_data_ = writer;
1324 piece_size_ = end - writer;
1325}
1326
1327} // namespace strings
1328
1329// ----------------------------------------------------------------------
1330// StrCat()
1331// This merges the given strings or integers, with no delimiter. This
1332// is designed to be the fastest possible way to construct a string out
1333// of a mix of raw C strings, C++ strings, and integer values.
1334// ----------------------------------------------------------------------
1335
1336// Append is merely a version of memcpy that returns the address of the byte
1337// after the area just overwritten. It comes in multiple flavors to minimize
1338// call overhead.
1339static char *Append1(char *out, const AlphaNum &x) {
1340 memcpy(out, x.data(), x.size());
1341 return out + x.size();
1342}
1343
1344static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) {
1345 memcpy(out, x1.data(), x1.size());
1346 out += x1.size();
1347
1348 memcpy(out, x2.data(), x2.size());
1349 return out + x2.size();
1350}
1351
1352static char *Append4(char *out,
1353 const AlphaNum &x1, const AlphaNum &x2,
1354 const AlphaNum &x3, const AlphaNum &x4) {
1355 memcpy(out, x1.data(), x1.size());
1356 out += x1.size();
1357
1358 memcpy(out, x2.data(), x2.size());
1359 out += x2.size();
1360
1361 memcpy(out, x3.data(), x3.size());
1362 out += x3.size();
1363
1364 memcpy(out, x4.data(), x4.size());
1365 return out + x4.size();
1366}
1367
1368string StrCat(const AlphaNum &a, const AlphaNum &b) {
1369 string result;
1370 result.resize(a.size() + b.size());
1371 char *const begin = &*result.begin();
1372 char *out = Append2(begin, a, b);
1373 GOOGLE_DCHECK_EQ(out, begin + result.size());
1374 return result;
1375}
1376
1377string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
1378 string result;
1379 result.resize(a.size() + b.size() + c.size());
1380 char *const begin = &*result.begin();
1381 char *out = Append2(begin, a, b);
1382 out = Append1(out, c);
1383 GOOGLE_DCHECK_EQ(out, begin + result.size());
1384 return result;
1385}
1386
1387string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1388 const AlphaNum &d) {
1389 string result;
1390 result.resize(a.size() + b.size() + c.size() + d.size());
1391 char *const begin = &*result.begin();
1392 char *out = Append4(begin, a, b, c, d);
1393 GOOGLE_DCHECK_EQ(out, begin + result.size());
1394 return result;
1395}
1396
1397string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1398 const AlphaNum &d, const AlphaNum &e) {
1399 string result;
1400 result.resize(a.size() + b.size() + c.size() + d.size() + e.size());
1401 char *const begin = &*result.begin();
1402 char *out = Append4(begin, a, b, c, d);
1403 out = Append1(out, e);
1404 GOOGLE_DCHECK_EQ(out, begin + result.size());
1405 return result;
1406}
1407
1408string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1409 const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {
1410 string result;
1411 result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1412 f.size());
1413 char *const begin = &*result.begin();
1414 char *out = Append4(begin, a, b, c, d);
1415 out = Append2(out, e, f);
1416 GOOGLE_DCHECK_EQ(out, begin + result.size());
1417 return result;
1418}
1419
1420string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1421 const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1422 const AlphaNum &g) {
1423 string result;
1424 result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1425 f.size() + g.size());
1426 char *const begin = &*result.begin();
1427 char *out = Append4(begin, a, b, c, d);
1428 out = Append2(out, e, f);
1429 out = Append1(out, g);
1430 GOOGLE_DCHECK_EQ(out, begin + result.size());
1431 return result;
1432}
1433
1434string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1435 const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1436 const AlphaNum &g, const AlphaNum &h) {
1437 string result;
1438 result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1439 f.size() + g.size() + h.size());
1440 char *const begin = &*result.begin();
1441 char *out = Append4(begin, a, b, c, d);
1442 out = Append4(out, e, f, g, h);
1443 GOOGLE_DCHECK_EQ(out, begin + result.size());
1444 return result;
1445}
1446
1447string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1448 const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1449 const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {
1450 string result;
1451 result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1452 f.size() + g.size() + h.size() + i.size());
1453 char *const begin = &*result.begin();
1454 char *out = Append4(begin, a, b, c, d);
1455 out = Append4(out, e, f, g, h);
1456 out = Append1(out, i);
1457 GOOGLE_DCHECK_EQ(out, begin + result.size());
1458 return result;
1459}
1460
1461// It's possible to call StrAppend with a char * pointer that is partway into
1462// the string we're appending to. However the results of this are random.
1463// Therefore, check for this in debug mode. Use unsigned math so we only have
1464// to do one comparison.
1465#define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \
1466 GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \
1467 uintptr_t((dest).size()))
1468
1469void StrAppend(string *result, const AlphaNum &a) {
1470 GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1471 result->append(a.data(), a.size());
1472}
1473
1474void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {
1475 GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1476 GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1477 string::size_type old_size = result->size();
1478 result->resize(old_size + a.size() + b.size());
1479 char *const begin = &*result->begin();
1480 char *out = Append2(begin + old_size, a, b);
1481 GOOGLE_DCHECK_EQ(out, begin + result->size());
1482}
1483
1484void StrAppend(string *result,
1485 const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
1486 GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1487 GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1488 GOOGLE_DCHECK_NO_OVERLAP(*result, c);
1489 string::size_type old_size = result->size();
1490 result->resize(old_size + a.size() + b.size() + c.size());
1491 char *const begin = &*result->begin();
1492 char *out = Append2(begin + old_size, a, b);
1493 out = Append1(out, c);
1494 GOOGLE_DCHECK_EQ(out, begin + result->size());
1495}
1496
1497void StrAppend(string *result,
1498 const AlphaNum &a, const AlphaNum &b,
1499 const AlphaNum &c, const AlphaNum &d) {
1500 GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1501 GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1502 GOOGLE_DCHECK_NO_OVERLAP(*result, c);
1503 GOOGLE_DCHECK_NO_OVERLAP(*result, d);
1504 string::size_type old_size = result->size();
1505 result->resize(old_size + a.size() + b.size() + c.size() + d.size());
1506 char *const begin = &*result->begin();
1507 char *out = Append4(begin + old_size, a, b, c, d);
1508 GOOGLE_DCHECK_EQ(out, begin + result->size());
1509}
1510
Feng Xiao6ef984a2014-11-10 17:34:54 -08001511int GlobalReplaceSubstring(const string& substring,
1512 const string& replacement,
1513 string* s) {
1514 GOOGLE_CHECK(s != NULL);
1515 if (s->empty() || substring.empty())
1516 return 0;
1517 string tmp;
1518 int num_replacements = 0;
1519 int pos = 0;
1520 for (int match_pos = s->find(substring.data(), pos, substring.length());
1521 match_pos != string::npos;
1522 pos = match_pos + substring.length(),
1523 match_pos = s->find(substring.data(), pos, substring.length())) {
1524 ++num_replacements;
1525 // Append the original content before the match.
1526 tmp.append(*s, pos, match_pos - pos);
1527 // Append the replacement for the match.
1528 tmp.append(replacement.begin(), replacement.end());
1529 }
1530 // Append the content after the last match. If no replacements were made, the
1531 // original string is left untouched.
1532 if (num_replacements > 0) {
1533 tmp.append(*s, pos, s->length() - pos);
1534 s->swap(tmp);
1535 }
1536 return num_replacements;
1537}
1538
temporal40ee5512008-07-10 02:12:20 +00001539} // namespace protobuf
1540} // namespace google