temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 1 | // Protocol Buffers - Google's data interchange format |
kenton@google.com | 24bf56f | 2008-09-24 20:31:01 +0000 | [diff] [blame] | 2 | // Copyright 2008 Google Inc. All rights reserved. |
Feng Xiao | e428862 | 2014-10-01 16:26:23 -0700 | [diff] [blame] | 3 | // https://developers.google.com/protocol-buffers/ |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 4 | // |
kenton@google.com | 24bf56f | 2008-09-24 20:31:01 +0000 | [diff] [blame] | 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 8 | // |
kenton@google.com | 24bf56f | 2008-09-24 20:31:01 +0000 | [diff] [blame] | 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 18 | // |
kenton@google.com | 24bf56f | 2008-09-24 20:31:01 +0000 | [diff] [blame] | 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 30 | |
| 31 | // from google3/strings/strutil.cc |
| 32 | |
| 33 | #include <google/protobuf/stubs/strutil.h> |
| 34 | #include <errno.h> |
| 35 | #include <float.h> // FLT_DIG and DBL_DIG |
| 36 | #include <limits> |
| 37 | #include <limits.h> |
kenton@google.com | 25bc5cd | 2008-12-04 20:34:50 +0000 | [diff] [blame] | 38 | #include <stdio.h> |
kenton@google.com | c91e1fe | 2009-10-12 19:22:03 +0000 | [diff] [blame] | 39 | #include <iterator> |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 40 | |
| 41 | #ifdef _WIN32 |
| 42 | // MSVC has only _snprintf, not snprintf. |
| 43 | // |
| 44 | // MinGW has both snprintf and _snprintf, but they appear to be different |
| 45 | // functions. The former is buggy. When invoked like so: |
| 46 | // char buffer[32]; |
| 47 | // snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f); |
| 48 | // it prints "1.23000e+10". This is plainly wrong: %g should never print |
| 49 | // trailing zeros after the decimal point. For some reason this bug only |
| 50 | // occurs with some input values, not all. In any case, _snprintf does the |
| 51 | // right thing, so we use it. |
| 52 | #define snprintf _snprintf |
| 53 | #endif |
| 54 | |
| 55 | namespace google { |
| 56 | namespace protobuf { |
| 57 | |
| 58 | inline bool IsNaN(double value) { |
| 59 | // NaN is never equal to anything, even itself. |
| 60 | return value != value; |
| 61 | } |
| 62 | |
kenton@google.com | a2a32c2 | 2008-11-14 17:29:32 +0000 | [diff] [blame] | 63 | // These are defined as macros on some platforms. #undef them so that we can |
| 64 | // redefine them. |
| 65 | #undef isxdigit |
| 66 | #undef isprint |
| 67 | |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 68 | // The definitions of these in ctype.h change based on locale. Since our |
| 69 | // string manipulation is all in relation to the protocol buffer and C++ |
| 70 | // languages, we always want to use the C locale. So, we re-define these |
| 71 | // exactly as we want them. |
kenton@google.com | a2a32c2 | 2008-11-14 17:29:32 +0000 | [diff] [blame] | 72 | inline bool isxdigit(char c) { |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 73 | return ('0' <= c && c <= '9') || |
| 74 | ('a' <= c && c <= 'f') || |
| 75 | ('A' <= c && c <= 'F'); |
| 76 | } |
| 77 | |
kenton@google.com | a2a32c2 | 2008-11-14 17:29:32 +0000 | [diff] [blame] | 78 | inline bool isprint(char c) { |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 79 | return c >= 0x20 && c <= 0x7E; |
| 80 | } |
| 81 | |
| 82 | // ---------------------------------------------------------------------- |
| 83 | // StripString |
| 84 | // Replaces any occurrence of the character 'remove' (or the characters |
| 85 | // in 'remove') with the character 'replacewith'. |
| 86 | // ---------------------------------------------------------------------- |
| 87 | void StripString(string* s, const char* remove, char replacewith) { |
| 88 | const char * str_start = s->c_str(); |
| 89 | const char * str = str_start; |
| 90 | for (str = strpbrk(str, remove); |
| 91 | str != NULL; |
| 92 | str = strpbrk(str + 1, remove)) { |
| 93 | (*s)[str - str_start] = replacewith; |
| 94 | } |
| 95 | } |
| 96 | |
Feng Xiao | 6ef984a | 2014-11-10 17:34:54 -0800 | [diff] [blame] | 97 | void StripWhitespace(string* str) { |
| 98 | int str_length = str->length(); |
| 99 | |
| 100 | // Strip off leading whitespace. |
| 101 | int first = 0; |
| 102 | while (first < str_length && ascii_isspace(str->at(first))) { |
| 103 | ++first; |
| 104 | } |
| 105 | // If entire string is white space. |
| 106 | if (first == str_length) { |
| 107 | str->clear(); |
| 108 | return; |
| 109 | } |
| 110 | if (first > 0) { |
| 111 | str->erase(0, first); |
| 112 | str_length -= first; |
| 113 | } |
| 114 | |
| 115 | // Strip off trailing whitespace. |
| 116 | int last = str_length - 1; |
| 117 | while (last >= 0 && ascii_isspace(str->at(last))) { |
| 118 | --last; |
| 119 | } |
| 120 | if (last != (str_length - 1) && last >= 0) { |
| 121 | str->erase(last + 1, string::npos); |
| 122 | } |
| 123 | } |
| 124 | |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 125 | // ---------------------------------------------------------------------- |
| 126 | // StringReplace() |
| 127 | // Replace the "old" pattern with the "new" pattern in a string, |
| 128 | // and append the result to "res". If replace_all is false, |
| 129 | // it only replaces the first instance of "old." |
| 130 | // ---------------------------------------------------------------------- |
| 131 | |
| 132 | void StringReplace(const string& s, const string& oldsub, |
| 133 | const string& newsub, bool replace_all, |
| 134 | string* res) { |
| 135 | if (oldsub.empty()) { |
| 136 | res->append(s); // if empty, append the given string. |
| 137 | return; |
| 138 | } |
| 139 | |
| 140 | string::size_type start_pos = 0; |
| 141 | string::size_type pos; |
| 142 | do { |
| 143 | pos = s.find(oldsub, start_pos); |
| 144 | if (pos == string::npos) { |
| 145 | break; |
| 146 | } |
| 147 | res->append(s, start_pos, pos - start_pos); |
| 148 | res->append(newsub); |
| 149 | start_pos = pos + oldsub.size(); // start searching again after the "old" |
| 150 | } while (replace_all); |
| 151 | res->append(s, start_pos, s.length() - start_pos); |
| 152 | } |
| 153 | |
| 154 | // ---------------------------------------------------------------------- |
| 155 | // StringReplace() |
| 156 | // Give me a string and two patterns "old" and "new", and I replace |
| 157 | // the first instance of "old" in the string with "new", if it |
| 158 | // exists. If "global" is true; call this repeatedly until it |
| 159 | // fails. RETURN a new string, regardless of whether the replacement |
| 160 | // happened or not. |
| 161 | // ---------------------------------------------------------------------- |
| 162 | |
| 163 | string StringReplace(const string& s, const string& oldsub, |
| 164 | const string& newsub, bool replace_all) { |
| 165 | string ret; |
| 166 | StringReplace(s, oldsub, newsub, replace_all, &ret); |
| 167 | return ret; |
| 168 | } |
| 169 | |
| 170 | // ---------------------------------------------------------------------- |
| 171 | // SplitStringUsing() |
| 172 | // Split a string using a character delimiter. Append the components |
| 173 | // to 'result'. |
| 174 | // |
| 175 | // Note: For multi-character delimiters, this routine will split on *ANY* of |
| 176 | // the characters in the string, not the entire string as a single delimiter. |
| 177 | // ---------------------------------------------------------------------- |
| 178 | template <typename ITR> |
| 179 | static inline |
| 180 | void SplitStringToIteratorUsing(const string& full, |
| 181 | const char* delim, |
| 182 | ITR& result) { |
| 183 | // Optimize the common case where delim is a single character. |
| 184 | if (delim[0] != '\0' && delim[1] == '\0') { |
| 185 | char c = delim[0]; |
| 186 | const char* p = full.data(); |
| 187 | const char* end = p + full.size(); |
| 188 | while (p != end) { |
| 189 | if (*p == c) { |
| 190 | ++p; |
| 191 | } else { |
| 192 | const char* start = p; |
| 193 | while (++p != end && *p != c); |
| 194 | *result++ = string(start, p - start); |
| 195 | } |
| 196 | } |
| 197 | return; |
| 198 | } |
| 199 | |
| 200 | string::size_type begin_index, end_index; |
| 201 | begin_index = full.find_first_not_of(delim); |
| 202 | while (begin_index != string::npos) { |
| 203 | end_index = full.find_first_of(delim, begin_index); |
| 204 | if (end_index == string::npos) { |
| 205 | *result++ = full.substr(begin_index); |
| 206 | return; |
| 207 | } |
| 208 | *result++ = full.substr(begin_index, (end_index - begin_index)); |
| 209 | begin_index = full.find_first_not_of(delim, end_index); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | void SplitStringUsing(const string& full, |
| 214 | const char* delim, |
| 215 | vector<string>* result) { |
| 216 | back_insert_iterator< vector<string> > it(*result); |
| 217 | SplitStringToIteratorUsing(full, delim, it); |
| 218 | } |
| 219 | |
xiaofeng@google.com | b55a20f | 2012-09-22 02:40:50 +0000 | [diff] [blame] | 220 | // Split a string using a character delimiter. Append the components |
| 221 | // to 'result'. If there are consecutive delimiters, this function |
| 222 | // will return corresponding empty strings. The string is split into |
| 223 | // at most the specified number of pieces greedily. This means that the |
| 224 | // last piece may possibly be split further. To split into as many pieces |
| 225 | // as possible, specify 0 as the number of pieces. |
| 226 | // |
| 227 | // If "full" is the empty string, yields an empty string as the only value. |
| 228 | // |
| 229 | // If "pieces" is negative for some reason, it returns the whole string |
| 230 | // ---------------------------------------------------------------------- |
| 231 | template <typename StringType, typename ITR> |
| 232 | static inline |
| 233 | void SplitStringToIteratorAllowEmpty(const StringType& full, |
| 234 | const char* delim, |
| 235 | int pieces, |
| 236 | ITR& result) { |
| 237 | string::size_type begin_index, end_index; |
| 238 | begin_index = 0; |
| 239 | |
| 240 | for (int i = 0; (i < pieces-1) || (pieces == 0); i++) { |
| 241 | end_index = full.find_first_of(delim, begin_index); |
| 242 | if (end_index == string::npos) { |
| 243 | *result++ = full.substr(begin_index); |
| 244 | return; |
| 245 | } |
| 246 | *result++ = full.substr(begin_index, (end_index - begin_index)); |
| 247 | begin_index = end_index + 1; |
| 248 | } |
| 249 | *result++ = full.substr(begin_index); |
| 250 | } |
| 251 | |
| 252 | void SplitStringAllowEmpty(const string& full, const char* delim, |
| 253 | vector<string>* result) { |
| 254 | back_insert_iterator<vector<string> > it(*result); |
| 255 | SplitStringToIteratorAllowEmpty(full, delim, 0, it); |
| 256 | } |
| 257 | |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 258 | // ---------------------------------------------------------------------- |
| 259 | // JoinStrings() |
| 260 | // This merges a vector of string components with delim inserted |
| 261 | // as separaters between components. |
| 262 | // |
| 263 | // ---------------------------------------------------------------------- |
| 264 | template <class ITERATOR> |
| 265 | static void JoinStringsIterator(const ITERATOR& start, |
| 266 | const ITERATOR& end, |
| 267 | const char* delim, |
| 268 | string* result) { |
| 269 | GOOGLE_CHECK(result != NULL); |
| 270 | result->clear(); |
| 271 | int delim_length = strlen(delim); |
| 272 | |
| 273 | // Precompute resulting length so we can reserve() memory in one shot. |
| 274 | int length = 0; |
| 275 | for (ITERATOR iter = start; iter != end; ++iter) { |
| 276 | if (iter != start) { |
| 277 | length += delim_length; |
| 278 | } |
| 279 | length += iter->size(); |
| 280 | } |
| 281 | result->reserve(length); |
| 282 | |
| 283 | // Now combine everything. |
| 284 | for (ITERATOR iter = start; iter != end; ++iter) { |
| 285 | if (iter != start) { |
| 286 | result->append(delim, delim_length); |
| 287 | } |
| 288 | result->append(iter->data(), iter->size()); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | void JoinStrings(const vector<string>& components, |
| 293 | const char* delim, |
| 294 | string * result) { |
| 295 | JoinStringsIterator(components.begin(), components.end(), delim, result); |
| 296 | } |
| 297 | |
| 298 | // ---------------------------------------------------------------------- |
| 299 | // UnescapeCEscapeSequences() |
| 300 | // This does all the unescaping that C does: \ooo, \r, \n, etc |
| 301 | // Returns length of resulting string. |
| 302 | // The implementation of \x parses any positive number of hex digits, |
| 303 | // but it is an error if the value requires more than 8 bits, and the |
| 304 | // result is truncated to 8 bits. |
| 305 | // |
| 306 | // The second call stores its errors in a supplied string vector. |
| 307 | // If the string vector pointer is NULL, it reports the errors with LOG(). |
| 308 | // ---------------------------------------------------------------------- |
| 309 | |
| 310 | #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7')) |
| 311 | |
| 312 | inline int hex_digit_to_int(char c) { |
| 313 | /* Assume ASCII. */ |
| 314 | assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61); |
| 315 | assert(isxdigit(c)); |
| 316 | int x = static_cast<unsigned char>(c); |
| 317 | if (x > '9') { |
| 318 | x += 9; |
| 319 | } |
| 320 | return x & 0xf; |
| 321 | } |
| 322 | |
| 323 | // Protocol buffers doesn't ever care about errors, but I don't want to remove |
| 324 | // the code. |
| 325 | #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false) |
| 326 | |
| 327 | int UnescapeCEscapeSequences(const char* source, char* dest) { |
| 328 | return UnescapeCEscapeSequences(source, dest, NULL); |
| 329 | } |
| 330 | |
| 331 | int UnescapeCEscapeSequences(const char* source, char* dest, |
| 332 | vector<string> *errors) { |
| 333 | GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented."; |
| 334 | |
| 335 | char* d = dest; |
| 336 | const char* p = source; |
| 337 | |
| 338 | // Small optimization for case where source = dest and there's no escaping |
| 339 | while ( p == d && *p != '\0' && *p != '\\' ) |
| 340 | p++, d++; |
| 341 | |
| 342 | while (*p != '\0') { |
| 343 | if (*p != '\\') { |
| 344 | *d++ = *p++; |
| 345 | } else { |
| 346 | switch ( *++p ) { // skip past the '\\' |
| 347 | case '\0': |
| 348 | LOG_STRING(ERROR, errors) << "String cannot end with \\"; |
| 349 | *d = '\0'; |
| 350 | return d - dest; // we're done with p |
| 351 | case 'a': *d++ = '\a'; break; |
| 352 | case 'b': *d++ = '\b'; break; |
| 353 | case 'f': *d++ = '\f'; break; |
| 354 | case 'n': *d++ = '\n'; break; |
| 355 | case 'r': *d++ = '\r'; break; |
| 356 | case 't': *d++ = '\t'; break; |
| 357 | case 'v': *d++ = '\v'; break; |
| 358 | case '\\': *d++ = '\\'; break; |
| 359 | case '?': *d++ = '\?'; break; // \? Who knew? |
| 360 | case '\'': *d++ = '\''; break; |
| 361 | case '"': *d++ = '\"'; break; |
| 362 | case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits |
| 363 | case '4': case '5': case '6': case '7': { |
| 364 | char ch = *p - '0'; |
| 365 | if ( IS_OCTAL_DIGIT(p[1]) ) |
| 366 | ch = ch * 8 + *++p - '0'; |
| 367 | if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice |
| 368 | ch = ch * 8 + *++p - '0'; // now points at last digit |
| 369 | *d++ = ch; |
| 370 | break; |
| 371 | } |
| 372 | case 'x': case 'X': { |
| 373 | if (!isxdigit(p[1])) { |
| 374 | if (p[1] == '\0') { |
| 375 | LOG_STRING(ERROR, errors) << "String cannot end with \\x"; |
| 376 | } else { |
| 377 | LOG_STRING(ERROR, errors) << |
| 378 | "\\x cannot be followed by non-hex digit: \\" << *p << p[1]; |
| 379 | } |
| 380 | break; |
| 381 | } |
| 382 | unsigned int ch = 0; |
| 383 | const char *hex_start = p; |
| 384 | while (isxdigit(p[1])) // arbitrarily many hex digits |
| 385 | ch = (ch << 4) + hex_digit_to_int(*++p); |
| 386 | if (ch > 0xFF) |
| 387 | LOG_STRING(ERROR, errors) << "Value of " << |
| 388 | "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits"; |
| 389 | *d++ = ch; |
| 390 | break; |
| 391 | } |
| 392 | #if 0 // TODO(kenton): Support \u and \U? Requires runetochar(). |
| 393 | case 'u': { |
| 394 | // \uhhhh => convert 4 hex digits to UTF-8 |
| 395 | char32 rune = 0; |
| 396 | const char *hex_start = p; |
| 397 | for (int i = 0; i < 4; ++i) { |
| 398 | if (isxdigit(p[1])) { // Look one char ahead. |
| 399 | rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. |
| 400 | } else { |
| 401 | LOG_STRING(ERROR, errors) |
| 402 | << "\\u must be followed by 4 hex digits: \\" |
| 403 | << string(hex_start, p+1-hex_start); |
| 404 | break; |
| 405 | } |
| 406 | } |
| 407 | d += runetochar(d, &rune); |
| 408 | break; |
| 409 | } |
| 410 | case 'U': { |
| 411 | // \Uhhhhhhhh => convert 8 hex digits to UTF-8 |
| 412 | char32 rune = 0; |
| 413 | const char *hex_start = p; |
| 414 | for (int i = 0; i < 8; ++i) { |
| 415 | if (isxdigit(p[1])) { // Look one char ahead. |
| 416 | // Don't change rune until we're sure this |
| 417 | // is within the Unicode limit, but do advance p. |
| 418 | char32 newrune = (rune << 4) + hex_digit_to_int(*++p); |
| 419 | if (newrune > 0x10FFFF) { |
| 420 | LOG_STRING(ERROR, errors) |
| 421 | << "Value of \\" |
| 422 | << string(hex_start, p + 1 - hex_start) |
| 423 | << " exceeds Unicode limit (0x10FFFF)"; |
| 424 | break; |
| 425 | } else { |
| 426 | rune = newrune; |
| 427 | } |
| 428 | } else { |
| 429 | LOG_STRING(ERROR, errors) |
| 430 | << "\\U must be followed by 8 hex digits: \\" |
| 431 | << string(hex_start, p+1-hex_start); |
| 432 | break; |
| 433 | } |
| 434 | } |
| 435 | d += runetochar(d, &rune); |
| 436 | break; |
| 437 | } |
| 438 | #endif |
| 439 | default: |
| 440 | LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p; |
| 441 | } |
| 442 | p++; // read past letter we escaped |
| 443 | } |
| 444 | } |
| 445 | *d = '\0'; |
| 446 | return d - dest; |
| 447 | } |
| 448 | |
| 449 | // ---------------------------------------------------------------------- |
| 450 | // UnescapeCEscapeString() |
| 451 | // This does the same thing as UnescapeCEscapeSequences, but creates |
| 452 | // a new string. The caller does not need to worry about allocating |
| 453 | // a dest buffer. This should be used for non performance critical |
| 454 | // tasks such as printing debug messages. It is safe for src and dest |
| 455 | // to be the same. |
| 456 | // |
| 457 | // The second call stores its errors in a supplied string vector. |
| 458 | // If the string vector pointer is NULL, it reports the errors with LOG(). |
| 459 | // |
| 460 | // In the first and second calls, the length of dest is returned. In the |
| 461 | // the third call, the new string is returned. |
| 462 | // ---------------------------------------------------------------------- |
| 463 | int UnescapeCEscapeString(const string& src, string* dest) { |
| 464 | return UnescapeCEscapeString(src, dest, NULL); |
| 465 | } |
| 466 | |
| 467 | int UnescapeCEscapeString(const string& src, string* dest, |
| 468 | vector<string> *errors) { |
| 469 | scoped_array<char> unescaped(new char[src.size() + 1]); |
| 470 | int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors); |
| 471 | GOOGLE_CHECK(dest); |
| 472 | dest->assign(unescaped.get(), len); |
| 473 | return len; |
| 474 | } |
| 475 | |
| 476 | string UnescapeCEscapeString(const string& src) { |
| 477 | scoped_array<char> unescaped(new char[src.size() + 1]); |
| 478 | int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL); |
| 479 | return string(unescaped.get(), len); |
| 480 | } |
| 481 | |
| 482 | // ---------------------------------------------------------------------- |
| 483 | // CEscapeString() |
| 484 | // CHexEscapeString() |
| 485 | // Copies 'src' to 'dest', escaping dangerous characters using |
| 486 | // C-style escape sequences. This is very useful for preparing query |
| 487 | // flags. 'src' and 'dest' should not overlap. The 'Hex' version uses |
| 488 | // hexadecimal rather than octal sequences. |
| 489 | // Returns the number of bytes written to 'dest' (not including the \0) |
| 490 | // or -1 if there was insufficient space. |
| 491 | // |
| 492 | // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. |
| 493 | // ---------------------------------------------------------------------- |
kenton@google.com | fccb146 | 2009-12-18 02:11:36 +0000 | [diff] [blame] | 494 | int CEscapeInternal(const char* src, int src_len, char* dest, |
| 495 | int dest_len, bool use_hex, bool utf8_safe) { |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 496 | const char* src_end = src + src_len; |
| 497 | int used = 0; |
| 498 | bool last_hex_escape = false; // true if last output char was \xNN |
| 499 | |
| 500 | for (; src < src_end; src++) { |
| 501 | if (dest_len - used < 2) // Need space for two letter escape |
| 502 | return -1; |
| 503 | |
| 504 | bool is_hex_escape = false; |
| 505 | switch (*src) { |
| 506 | case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; |
| 507 | case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; |
| 508 | case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; |
| 509 | case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; |
| 510 | case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; |
| 511 | case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; |
| 512 | default: |
| 513 | // Note that if we emit \xNN and the src character after that is a hex |
| 514 | // digit then that digit must be escaped too to prevent it being |
| 515 | // interpreted as part of the character code by C. |
kenton@google.com | fccb146 | 2009-12-18 02:11:36 +0000 | [diff] [blame] | 516 | if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) && |
| 517 | (!isprint(*src) || |
| 518 | (last_hex_escape && isxdigit(*src)))) { |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 519 | if (dest_len - used < 4) // need space for 4 letter escape |
| 520 | return -1; |
| 521 | sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"), |
| 522 | static_cast<uint8>(*src)); |
| 523 | is_hex_escape = use_hex; |
| 524 | used += 4; |
| 525 | } else { |
| 526 | dest[used++] = *src; break; |
| 527 | } |
| 528 | } |
| 529 | last_hex_escape = is_hex_escape; |
| 530 | } |
| 531 | |
| 532 | if (dest_len - used < 1) // make sure that there is room for \0 |
| 533 | return -1; |
| 534 | |
| 535 | dest[used] = '\0'; // doesn't count towards return value though |
| 536 | return used; |
| 537 | } |
| 538 | |
| 539 | int CEscapeString(const char* src, int src_len, char* dest, int dest_len) { |
kenton@google.com | fccb146 | 2009-12-18 02:11:36 +0000 | [diff] [blame] | 540 | return CEscapeInternal(src, src_len, dest, dest_len, false, false); |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 541 | } |
| 542 | |
| 543 | // ---------------------------------------------------------------------- |
| 544 | // CEscape() |
| 545 | // CHexEscape() |
| 546 | // Copies 'src' to result, escaping dangerous characters using |
| 547 | // C-style escape sequences. This is very useful for preparing query |
| 548 | // flags. 'src' and 'dest' should not overlap. The 'Hex' version |
| 549 | // hexadecimal rather than octal sequences. |
| 550 | // |
| 551 | // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. |
| 552 | // ---------------------------------------------------------------------- |
| 553 | string CEscape(const string& src) { |
| 554 | const int dest_length = src.size() * 4 + 1; // Maximum possible expansion |
| 555 | scoped_array<char> dest(new char[dest_length]); |
| 556 | const int len = CEscapeInternal(src.data(), src.size(), |
kenton@google.com | fccb146 | 2009-12-18 02:11:36 +0000 | [diff] [blame] | 557 | dest.get(), dest_length, false, false); |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 558 | GOOGLE_DCHECK_GE(len, 0); |
| 559 | return string(dest.get(), len); |
| 560 | } |
| 561 | |
kenton@google.com | fccb146 | 2009-12-18 02:11:36 +0000 | [diff] [blame] | 562 | namespace strings { |
| 563 | |
| 564 | string Utf8SafeCEscape(const string& src) { |
| 565 | const int dest_length = src.size() * 4 + 1; // Maximum possible expansion |
| 566 | scoped_array<char> dest(new char[dest_length]); |
| 567 | const int len = CEscapeInternal(src.data(), src.size(), |
| 568 | dest.get(), dest_length, false, true); |
| 569 | GOOGLE_DCHECK_GE(len, 0); |
| 570 | return string(dest.get(), len); |
| 571 | } |
| 572 | |
| 573 | string CHexEscape(const string& src) { |
| 574 | const int dest_length = src.size() * 4 + 1; // Maximum possible expansion |
| 575 | scoped_array<char> dest(new char[dest_length]); |
| 576 | const int len = CEscapeInternal(src.data(), src.size(), |
| 577 | dest.get(), dest_length, true, false); |
| 578 | GOOGLE_DCHECK_GE(len, 0); |
| 579 | return string(dest.get(), len); |
| 580 | } |
| 581 | |
| 582 | } // namespace strings |
| 583 | |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 584 | // ---------------------------------------------------------------------- |
| 585 | // strto32_adaptor() |
| 586 | // strtou32_adaptor() |
| 587 | // Implementation of strto[u]l replacements that have identical |
| 588 | // overflow and underflow characteristics for both ILP-32 and LP-64 |
| 589 | // platforms, including errno preservation in error-free calls. |
| 590 | // ---------------------------------------------------------------------- |
| 591 | |
| 592 | int32 strto32_adaptor(const char *nptr, char **endptr, int base) { |
| 593 | const int saved_errno = errno; |
| 594 | errno = 0; |
| 595 | const long result = strtol(nptr, endptr, base); |
| 596 | if (errno == ERANGE && result == LONG_MIN) { |
| 597 | return kint32min; |
| 598 | } else if (errno == ERANGE && result == LONG_MAX) { |
| 599 | return kint32max; |
| 600 | } else if (errno == 0 && result < kint32min) { |
| 601 | errno = ERANGE; |
| 602 | return kint32min; |
| 603 | } else if (errno == 0 && result > kint32max) { |
| 604 | errno = ERANGE; |
| 605 | return kint32max; |
| 606 | } |
| 607 | if (errno == 0) |
| 608 | errno = saved_errno; |
| 609 | return static_cast<int32>(result); |
| 610 | } |
| 611 | |
| 612 | uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) { |
| 613 | const int saved_errno = errno; |
| 614 | errno = 0; |
| 615 | const unsigned long result = strtoul(nptr, endptr, base); |
| 616 | if (errno == ERANGE && result == ULONG_MAX) { |
| 617 | return kuint32max; |
| 618 | } else if (errno == 0 && result > kuint32max) { |
| 619 | errno = ERANGE; |
| 620 | return kuint32max; |
| 621 | } |
| 622 | if (errno == 0) |
| 623 | errno = saved_errno; |
| 624 | return static_cast<uint32>(result); |
| 625 | } |
| 626 | |
jieluo@google.com | 4de8f55 | 2014-07-18 00:47:59 +0000 | [diff] [blame] | 627 | inline bool safe_parse_sign(string* text /*inout*/, |
| 628 | bool* negative_ptr /*output*/) { |
| 629 | const char* start = text->data(); |
| 630 | const char* end = start + text->size(); |
| 631 | |
| 632 | // Consume whitespace. |
| 633 | while (start < end && (start[0] == ' ')) { |
| 634 | ++start; |
| 635 | } |
| 636 | while (start < end && (end[-1] == ' ')) { |
| 637 | --end; |
| 638 | } |
| 639 | if (start >= end) { |
| 640 | return false; |
| 641 | } |
| 642 | |
| 643 | // Consume sign. |
| 644 | *negative_ptr = (start[0] == '-'); |
| 645 | if (*negative_ptr || start[0] == '+') { |
| 646 | ++start; |
| 647 | if (start >= end) { |
| 648 | return false; |
| 649 | } |
| 650 | } |
| 651 | *text = text->substr(start - text->data(), end - start); |
| 652 | return true; |
| 653 | } |
| 654 | |
| 655 | inline bool safe_parse_positive_int( |
| 656 | string text, int32* value_p) { |
| 657 | int base = 10; |
| 658 | int32 value = 0; |
| 659 | const int32 vmax = std::numeric_limits<int32>::max(); |
| 660 | assert(vmax > 0); |
| 661 | assert(vmax >= base); |
| 662 | const int32 vmax_over_base = vmax / base; |
| 663 | const char* start = text.data(); |
| 664 | const char* end = start + text.size(); |
| 665 | // loop over digits |
| 666 | for (; start < end; ++start) { |
| 667 | unsigned char c = static_cast<unsigned char>(start[0]); |
| 668 | int digit = c - '0'; |
| 669 | if (digit >= base || digit < 0) { |
| 670 | *value_p = value; |
| 671 | return false; |
| 672 | } |
| 673 | if (value > vmax_over_base) { |
| 674 | *value_p = vmax; |
| 675 | return false; |
| 676 | } |
| 677 | value *= base; |
| 678 | if (value > vmax - digit) { |
| 679 | *value_p = vmax; |
| 680 | return false; |
| 681 | } |
| 682 | value += digit; |
| 683 | } |
| 684 | *value_p = value; |
| 685 | return true; |
| 686 | } |
| 687 | |
| 688 | inline bool safe_parse_negative_int( |
| 689 | string text, int32* value_p) { |
| 690 | int base = 10; |
| 691 | int32 value = 0; |
| 692 | const int32 vmin = std::numeric_limits<int32>::min(); |
| 693 | assert(vmin < 0); |
| 694 | assert(vmin <= 0 - base); |
| 695 | int32 vmin_over_base = vmin / base; |
| 696 | // 2003 c++ standard [expr.mul] |
| 697 | // "... the sign of the remainder is implementation-defined." |
| 698 | // Although (vmin/base)*base + vmin%base is always vmin. |
| 699 | // 2011 c++ standard tightens the spec but we cannot rely on it. |
| 700 | if (vmin % base > 0) { |
| 701 | vmin_over_base += 1; |
| 702 | } |
| 703 | const char* start = text.data(); |
| 704 | const char* end = start + text.size(); |
| 705 | // loop over digits |
| 706 | for (; start < end; ++start) { |
| 707 | unsigned char c = static_cast<unsigned char>(start[0]); |
| 708 | int digit = c - '0'; |
| 709 | if (digit >= base || digit < 0) { |
| 710 | *value_p = value; |
| 711 | return false; |
| 712 | } |
| 713 | if (value < vmin_over_base) { |
| 714 | *value_p = vmin; |
| 715 | return false; |
| 716 | } |
| 717 | value *= base; |
| 718 | if (value < vmin + digit) { |
| 719 | *value_p = vmin; |
| 720 | return false; |
| 721 | } |
| 722 | value -= digit; |
| 723 | } |
| 724 | *value_p = value; |
| 725 | return true; |
| 726 | } |
| 727 | |
| 728 | bool safe_int(string text, int32* value_p) { |
| 729 | *value_p = 0; |
| 730 | bool negative; |
| 731 | if (!safe_parse_sign(&text, &negative)) { |
| 732 | return false; |
| 733 | } |
| 734 | if (!negative) { |
| 735 | return safe_parse_positive_int(text, value_p); |
| 736 | } else { |
| 737 | return safe_parse_negative_int(text, value_p); |
| 738 | } |
| 739 | } |
| 740 | |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 741 | // ---------------------------------------------------------------------- |
| 742 | // FastIntToBuffer() |
| 743 | // FastInt64ToBuffer() |
| 744 | // FastHexToBuffer() |
| 745 | // FastHex64ToBuffer() |
| 746 | // FastHex32ToBuffer() |
| 747 | // ---------------------------------------------------------------------- |
| 748 | |
| 749 | // Offset into buffer where FastInt64ToBuffer places the end of string |
| 750 | // null character. Also used by FastInt64ToBufferLeft. |
| 751 | static const int kFastInt64ToBufferOffset = 21; |
| 752 | |
| 753 | char *FastInt64ToBuffer(int64 i, char* buffer) { |
| 754 | // We could collapse the positive and negative sections, but that |
| 755 | // would be slightly slower for positive numbers... |
| 756 | // 22 bytes is enough to store -2**64, -18446744073709551616. |
| 757 | char* p = buffer + kFastInt64ToBufferOffset; |
| 758 | *p-- = '\0'; |
| 759 | if (i >= 0) { |
| 760 | do { |
| 761 | *p-- = '0' + i % 10; |
| 762 | i /= 10; |
| 763 | } while (i > 0); |
| 764 | return p + 1; |
| 765 | } else { |
| 766 | // On different platforms, % and / have different behaviors for |
| 767 | // negative numbers, so we need to jump through hoops to make sure |
| 768 | // we don't divide negative numbers. |
| 769 | if (i > -10) { |
| 770 | i = -i; |
| 771 | *p-- = '0' + i; |
| 772 | *p = '-'; |
| 773 | return p; |
| 774 | } else { |
| 775 | // Make sure we aren't at MIN_INT, in which case we can't say i = -i |
| 776 | i = i + 10; |
| 777 | i = -i; |
| 778 | *p-- = '0' + i % 10; |
| 779 | // Undo what we did a moment ago |
| 780 | i = i / 10 + 1; |
| 781 | do { |
| 782 | *p-- = '0' + i % 10; |
| 783 | i /= 10; |
| 784 | } while (i > 0); |
| 785 | *p = '-'; |
| 786 | return p; |
| 787 | } |
| 788 | } |
| 789 | } |
| 790 | |
| 791 | // Offset into buffer where FastInt32ToBuffer places the end of string |
| 792 | // null character. Also used by FastInt32ToBufferLeft |
| 793 | static const int kFastInt32ToBufferOffset = 11; |
| 794 | |
| 795 | // Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the |
| 796 | // compiler to generate 32 bit arithmetic instructions. It's much faster, at |
| 797 | // least with 32 bit binaries. |
| 798 | char *FastInt32ToBuffer(int32 i, char* buffer) { |
| 799 | // We could collapse the positive and negative sections, but that |
| 800 | // would be slightly slower for positive numbers... |
| 801 | // 12 bytes is enough to store -2**32, -4294967296. |
| 802 | char* p = buffer + kFastInt32ToBufferOffset; |
| 803 | *p-- = '\0'; |
| 804 | if (i >= 0) { |
| 805 | do { |
| 806 | *p-- = '0' + i % 10; |
| 807 | i /= 10; |
| 808 | } while (i > 0); |
| 809 | return p + 1; |
| 810 | } else { |
| 811 | // On different platforms, % and / have different behaviors for |
| 812 | // negative numbers, so we need to jump through hoops to make sure |
| 813 | // we don't divide negative numbers. |
| 814 | if (i > -10) { |
| 815 | i = -i; |
| 816 | *p-- = '0' + i; |
| 817 | *p = '-'; |
| 818 | return p; |
| 819 | } else { |
| 820 | // Make sure we aren't at MIN_INT, in which case we can't say i = -i |
| 821 | i = i + 10; |
| 822 | i = -i; |
| 823 | *p-- = '0' + i % 10; |
| 824 | // Undo what we did a moment ago |
| 825 | i = i / 10 + 1; |
| 826 | do { |
| 827 | *p-- = '0' + i % 10; |
| 828 | i /= 10; |
| 829 | } while (i > 0); |
| 830 | *p = '-'; |
| 831 | return p; |
| 832 | } |
| 833 | } |
| 834 | } |
| 835 | |
| 836 | char *FastHexToBuffer(int i, char* buffer) { |
| 837 | GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i; |
| 838 | |
| 839 | static const char *hexdigits = "0123456789abcdef"; |
| 840 | char *p = buffer + 21; |
| 841 | *p-- = '\0'; |
| 842 | do { |
| 843 | *p-- = hexdigits[i & 15]; // mod by 16 |
| 844 | i >>= 4; // divide by 16 |
| 845 | } while (i > 0); |
| 846 | return p + 1; |
| 847 | } |
| 848 | |
| 849 | char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { |
| 850 | static const char *hexdigits = "0123456789abcdef"; |
| 851 | buffer[num_byte] = '\0'; |
| 852 | for (int i = num_byte - 1; i >= 0; i--) { |
liujisi@google.com | cb6dd4e | 2011-07-05 21:05:40 +0000 | [diff] [blame] | 853 | #ifdef _M_X64 |
| 854 | // MSVC x64 platform has a bug optimizing the uint32(value) in the #else |
| 855 | // block. Given that the uint32 cast was to improve performance on 32-bit |
| 856 | // platforms, we use 64-bit '&' directly. |
| 857 | buffer[i] = hexdigits[value & 0xf]; |
| 858 | #else |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 859 | buffer[i] = hexdigits[uint32(value) & 0xf]; |
liujisi@google.com | cb6dd4e | 2011-07-05 21:05:40 +0000 | [diff] [blame] | 860 | #endif |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 861 | value >>= 4; |
| 862 | } |
| 863 | return buffer; |
| 864 | } |
| 865 | |
| 866 | char *FastHex64ToBuffer(uint64 value, char* buffer) { |
| 867 | return InternalFastHexToBuffer(value, buffer, 16); |
| 868 | } |
| 869 | |
| 870 | char *FastHex32ToBuffer(uint32 value, char* buffer) { |
| 871 | return InternalFastHexToBuffer(value, buffer, 8); |
| 872 | } |
| 873 | |
| 874 | static inline char* PlaceNum(char* p, int num, char prev_sep) { |
| 875 | *p-- = '0' + num % 10; |
| 876 | *p-- = '0' + num / 10; |
| 877 | *p-- = prev_sep; |
| 878 | return p; |
| 879 | } |
| 880 | |
| 881 | // ---------------------------------------------------------------------- |
| 882 | // FastInt32ToBufferLeft() |
| 883 | // FastUInt32ToBufferLeft() |
| 884 | // FastInt64ToBufferLeft() |
| 885 | // FastUInt64ToBufferLeft() |
| 886 | // |
| 887 | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
| 888 | // Unlike the Fast*ToBuffer() functions, however, these functions write |
| 889 | // their output to the beginning of the buffer (hence the name, as the |
| 890 | // output is left-aligned). The caller is responsible for ensuring that |
| 891 | // the buffer has enough space to hold the output. |
| 892 | // |
| 893 | // Returns a pointer to the end of the string (i.e. the null character |
| 894 | // terminating the string). |
| 895 | // ---------------------------------------------------------------------- |
| 896 | |
| 897 | static const char two_ASCII_digits[100][2] = { |
| 898 | {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'}, |
| 899 | {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'}, |
| 900 | {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, |
| 901 | {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'}, |
| 902 | {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, |
| 903 | {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'}, |
| 904 | {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, |
| 905 | {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'}, |
| 906 | {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, |
| 907 | {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'}, |
| 908 | {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, |
| 909 | {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'}, |
| 910 | {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, |
| 911 | {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'}, |
| 912 | {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, |
| 913 | {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'}, |
| 914 | {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, |
| 915 | {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'}, |
| 916 | {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, |
| 917 | {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'} |
| 918 | }; |
| 919 | |
| 920 | char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { |
| 921 | int digits; |
| 922 | const char *ASCII_digits = NULL; |
| 923 | // The idea of this implementation is to trim the number of divides to as few |
| 924 | // as possible by using multiplication and subtraction rather than mod (%), |
| 925 | // and by outputting two digits at a time rather than one. |
| 926 | // The huge-number case is first, in the hopes that the compiler will output |
| 927 | // that case in one branch-free block of code, and only output conditional |
| 928 | // branches into it from below. |
| 929 | if (u >= 1000000000) { // >= 1,000,000,000 |
| 930 | digits = u / 100000000; // 100,000,000 |
| 931 | ASCII_digits = two_ASCII_digits[digits]; |
| 932 | buffer[0] = ASCII_digits[0]; |
| 933 | buffer[1] = ASCII_digits[1]; |
| 934 | buffer += 2; |
| 935 | sublt100_000_000: |
| 936 | u -= digits * 100000000; // 100,000,000 |
| 937 | lt100_000_000: |
| 938 | digits = u / 1000000; // 1,000,000 |
| 939 | ASCII_digits = two_ASCII_digits[digits]; |
| 940 | buffer[0] = ASCII_digits[0]; |
| 941 | buffer[1] = ASCII_digits[1]; |
| 942 | buffer += 2; |
| 943 | sublt1_000_000: |
| 944 | u -= digits * 1000000; // 1,000,000 |
| 945 | lt1_000_000: |
| 946 | digits = u / 10000; // 10,000 |
| 947 | ASCII_digits = two_ASCII_digits[digits]; |
| 948 | buffer[0] = ASCII_digits[0]; |
| 949 | buffer[1] = ASCII_digits[1]; |
| 950 | buffer += 2; |
| 951 | sublt10_000: |
| 952 | u -= digits * 10000; // 10,000 |
| 953 | lt10_000: |
| 954 | digits = u / 100; |
| 955 | ASCII_digits = two_ASCII_digits[digits]; |
| 956 | buffer[0] = ASCII_digits[0]; |
| 957 | buffer[1] = ASCII_digits[1]; |
| 958 | buffer += 2; |
| 959 | sublt100: |
| 960 | u -= digits * 100; |
| 961 | lt100: |
| 962 | digits = u; |
| 963 | ASCII_digits = two_ASCII_digits[digits]; |
| 964 | buffer[0] = ASCII_digits[0]; |
| 965 | buffer[1] = ASCII_digits[1]; |
| 966 | buffer += 2; |
| 967 | done: |
| 968 | *buffer = 0; |
| 969 | return buffer; |
| 970 | } |
| 971 | |
| 972 | if (u < 100) { |
| 973 | digits = u; |
| 974 | if (u >= 10) goto lt100; |
| 975 | *buffer++ = '0' + digits; |
| 976 | goto done; |
| 977 | } |
| 978 | if (u < 10000) { // 10,000 |
| 979 | if (u >= 1000) goto lt10_000; |
| 980 | digits = u / 100; |
| 981 | *buffer++ = '0' + digits; |
| 982 | goto sublt100; |
| 983 | } |
| 984 | if (u < 1000000) { // 1,000,000 |
| 985 | if (u >= 100000) goto lt1_000_000; |
| 986 | digits = u / 10000; // 10,000 |
| 987 | *buffer++ = '0' + digits; |
| 988 | goto sublt10_000; |
| 989 | } |
| 990 | if (u < 100000000) { // 100,000,000 |
| 991 | if (u >= 10000000) goto lt100_000_000; |
| 992 | digits = u / 1000000; // 1,000,000 |
| 993 | *buffer++ = '0' + digits; |
| 994 | goto sublt1_000_000; |
| 995 | } |
| 996 | // we already know that u < 1,000,000,000 |
| 997 | digits = u / 100000000; // 100,000,000 |
| 998 | *buffer++ = '0' + digits; |
| 999 | goto sublt100_000_000; |
| 1000 | } |
| 1001 | |
| 1002 | char* FastInt32ToBufferLeft(int32 i, char* buffer) { |
| 1003 | uint32 u = i; |
| 1004 | if (i < 0) { |
| 1005 | *buffer++ = '-'; |
| 1006 | u = -i; |
| 1007 | } |
| 1008 | return FastUInt32ToBufferLeft(u, buffer); |
| 1009 | } |
| 1010 | |
| 1011 | char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { |
| 1012 | int digits; |
| 1013 | const char *ASCII_digits = NULL; |
| 1014 | |
| 1015 | uint32 u = static_cast<uint32>(u64); |
| 1016 | if (u == u64) return FastUInt32ToBufferLeft(u, buffer); |
| 1017 | |
| 1018 | uint64 top_11_digits = u64 / 1000000000; |
| 1019 | buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); |
| 1020 | u = u64 - (top_11_digits * 1000000000); |
| 1021 | |
| 1022 | digits = u / 10000000; // 10,000,000 |
| 1023 | GOOGLE_DCHECK_LT(digits, 100); |
| 1024 | ASCII_digits = two_ASCII_digits[digits]; |
| 1025 | buffer[0] = ASCII_digits[0]; |
| 1026 | buffer[1] = ASCII_digits[1]; |
| 1027 | buffer += 2; |
| 1028 | u -= digits * 10000000; // 10,000,000 |
| 1029 | digits = u / 100000; // 100,000 |
| 1030 | ASCII_digits = two_ASCII_digits[digits]; |
| 1031 | buffer[0] = ASCII_digits[0]; |
| 1032 | buffer[1] = ASCII_digits[1]; |
| 1033 | buffer += 2; |
| 1034 | u -= digits * 100000; // 100,000 |
| 1035 | digits = u / 1000; // 1,000 |
| 1036 | ASCII_digits = two_ASCII_digits[digits]; |
| 1037 | buffer[0] = ASCII_digits[0]; |
| 1038 | buffer[1] = ASCII_digits[1]; |
| 1039 | buffer += 2; |
| 1040 | u -= digits * 1000; // 1,000 |
| 1041 | digits = u / 10; |
| 1042 | ASCII_digits = two_ASCII_digits[digits]; |
| 1043 | buffer[0] = ASCII_digits[0]; |
| 1044 | buffer[1] = ASCII_digits[1]; |
| 1045 | buffer += 2; |
| 1046 | u -= digits * 10; |
| 1047 | digits = u; |
| 1048 | *buffer++ = '0' + digits; |
| 1049 | *buffer = 0; |
| 1050 | return buffer; |
| 1051 | } |
| 1052 | |
| 1053 | char* FastInt64ToBufferLeft(int64 i, char* buffer) { |
| 1054 | uint64 u = i; |
| 1055 | if (i < 0) { |
| 1056 | *buffer++ = '-'; |
| 1057 | u = -i; |
| 1058 | } |
| 1059 | return FastUInt64ToBufferLeft(u, buffer); |
| 1060 | } |
| 1061 | |
| 1062 | // ---------------------------------------------------------------------- |
| 1063 | // SimpleItoa() |
| 1064 | // Description: converts an integer to a string. |
| 1065 | // |
| 1066 | // Return value: string |
| 1067 | // ---------------------------------------------------------------------- |
| 1068 | |
| 1069 | string SimpleItoa(int i) { |
| 1070 | char buffer[kFastToBufferSize]; |
| 1071 | return (sizeof(i) == 4) ? |
| 1072 | FastInt32ToBuffer(i, buffer) : |
| 1073 | FastInt64ToBuffer(i, buffer); |
| 1074 | } |
| 1075 | |
| 1076 | string SimpleItoa(unsigned int i) { |
| 1077 | char buffer[kFastToBufferSize]; |
| 1078 | return string(buffer, (sizeof(i) == 4) ? |
| 1079 | FastUInt32ToBufferLeft(i, buffer) : |
| 1080 | FastUInt64ToBufferLeft(i, buffer)); |
| 1081 | } |
| 1082 | |
| 1083 | string SimpleItoa(long i) { |
| 1084 | char buffer[kFastToBufferSize]; |
| 1085 | return (sizeof(i) == 4) ? |
| 1086 | FastInt32ToBuffer(i, buffer) : |
| 1087 | FastInt64ToBuffer(i, buffer); |
| 1088 | } |
| 1089 | |
| 1090 | string SimpleItoa(unsigned long i) { |
| 1091 | char buffer[kFastToBufferSize]; |
| 1092 | return string(buffer, (sizeof(i) == 4) ? |
| 1093 | FastUInt32ToBufferLeft(i, buffer) : |
| 1094 | FastUInt64ToBufferLeft(i, buffer)); |
| 1095 | } |
| 1096 | |
| 1097 | string SimpleItoa(long long i) { |
| 1098 | char buffer[kFastToBufferSize]; |
| 1099 | return (sizeof(i) == 4) ? |
| 1100 | FastInt32ToBuffer(i, buffer) : |
| 1101 | FastInt64ToBuffer(i, buffer); |
| 1102 | } |
| 1103 | |
| 1104 | string SimpleItoa(unsigned long long i) { |
| 1105 | char buffer[kFastToBufferSize]; |
| 1106 | return string(buffer, (sizeof(i) == 4) ? |
| 1107 | FastUInt32ToBufferLeft(i, buffer) : |
| 1108 | FastUInt64ToBufferLeft(i, buffer)); |
| 1109 | } |
| 1110 | |
| 1111 | // ---------------------------------------------------------------------- |
| 1112 | // SimpleDtoa() |
| 1113 | // SimpleFtoa() |
| 1114 | // DoubleToBuffer() |
| 1115 | // FloatToBuffer() |
| 1116 | // We want to print the value without losing precision, but we also do |
| 1117 | // not want to print more digits than necessary. This turns out to be |
| 1118 | // trickier than it sounds. Numbers like 0.2 cannot be represented |
| 1119 | // exactly in binary. If we print 0.2 with a very large precision, |
| 1120 | // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". |
| 1121 | // On the other hand, if we set the precision too low, we lose |
| 1122 | // significant digits when printing numbers that actually need them. |
| 1123 | // It turns out there is no precision value that does the right thing |
| 1124 | // for all numbers. |
| 1125 | // |
| 1126 | // Our strategy is to first try printing with a precision that is never |
| 1127 | // over-precise, then parse the result with strtod() to see if it |
| 1128 | // matches. If not, we print again with a precision that will always |
| 1129 | // give a precise result, but may use more digits than necessary. |
| 1130 | // |
| 1131 | // An arguably better strategy would be to use the algorithm described |
| 1132 | // in "How to Print Floating-Point Numbers Accurately" by Steele & |
| 1133 | // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, |
| 1134 | // however, that the following implementation is about as fast as |
| 1135 | // DMG's code. Furthermore, DMG's code locks mutexes, which means it |
| 1136 | // will not scale well on multi-core machines. DMG's code is slightly |
| 1137 | // more accurate (in that it will never use more digits than |
| 1138 | // necessary), but this is probably irrelevant for most users. |
| 1139 | // |
| 1140 | // Rob Pike and Ken Thompson also have an implementation of dtoa() in |
| 1141 | // third_party/fmt/fltfmt.cc. Their implementation is similar to this |
| 1142 | // one in that it makes guesses and then uses strtod() to check them. |
| 1143 | // Their implementation is faster because they use their own code to |
| 1144 | // generate the digits in the first place rather than use snprintf(), |
| 1145 | // thus avoiding format string parsing overhead. However, this makes |
| 1146 | // it considerably more complicated than the following implementation, |
| 1147 | // and it is embedded in a larger library. If speed turns out to be |
| 1148 | // an issue, we could re-implement this in terms of their |
| 1149 | // implementation. |
| 1150 | // ---------------------------------------------------------------------- |
| 1151 | |
| 1152 | string SimpleDtoa(double value) { |
| 1153 | char buffer[kDoubleToBufferSize]; |
| 1154 | return DoubleToBuffer(value, buffer); |
| 1155 | } |
| 1156 | |
| 1157 | string SimpleFtoa(float value) { |
| 1158 | char buffer[kFloatToBufferSize]; |
| 1159 | return FloatToBuffer(value, buffer); |
| 1160 | } |
| 1161 | |
| 1162 | static inline bool IsValidFloatChar(char c) { |
| 1163 | return ('0' <= c && c <= '9') || |
| 1164 | c == 'e' || c == 'E' || |
| 1165 | c == '+' || c == '-'; |
| 1166 | } |
| 1167 | |
| 1168 | void DelocalizeRadix(char* buffer) { |
| 1169 | // Fast check: if the buffer has a normal decimal point, assume no |
| 1170 | // translation is needed. |
| 1171 | if (strchr(buffer, '.') != NULL) return; |
| 1172 | |
| 1173 | // Find the first unknown character. |
| 1174 | while (IsValidFloatChar(*buffer)) ++buffer; |
| 1175 | |
| 1176 | if (*buffer == '\0') { |
| 1177 | // No radix character found. |
| 1178 | return; |
| 1179 | } |
| 1180 | |
| 1181 | // We are now pointing at the locale-specific radix character. Replace it |
| 1182 | // with '.'. |
| 1183 | *buffer = '.'; |
| 1184 | ++buffer; |
| 1185 | |
| 1186 | if (!IsValidFloatChar(*buffer) && *buffer != '\0') { |
| 1187 | // It appears the radix was a multi-byte character. We need to remove the |
| 1188 | // extra bytes. |
| 1189 | char* target = buffer; |
| 1190 | do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0'); |
| 1191 | memmove(target, buffer, strlen(buffer) + 1); |
| 1192 | } |
| 1193 | } |
| 1194 | |
| 1195 | char* DoubleToBuffer(double value, char* buffer) { |
| 1196 | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
| 1197 | // platforms these days. Just in case some system exists where DBL_DIG |
| 1198 | // is significantly larger -- and risks overflowing our buffer -- we have |
| 1199 | // this assert. |
| 1200 | GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); |
| 1201 | |
| 1202 | if (value == numeric_limits<double>::infinity()) { |
| 1203 | strcpy(buffer, "inf"); |
| 1204 | return buffer; |
| 1205 | } else if (value == -numeric_limits<double>::infinity()) { |
| 1206 | strcpy(buffer, "-inf"); |
| 1207 | return buffer; |
| 1208 | } else if (IsNaN(value)) { |
| 1209 | strcpy(buffer, "nan"); |
| 1210 | return buffer; |
| 1211 | } |
| 1212 | |
| 1213 | int snprintf_result = |
| 1214 | snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); |
| 1215 | |
| 1216 | // The snprintf should never overflow because the buffer is significantly |
| 1217 | // larger than the precision we asked for. |
| 1218 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
| 1219 | |
| 1220 | // We need to make parsed_value volatile in order to force the compiler to |
| 1221 | // write it out to the stack. Otherwise, it may keep the value in a |
| 1222 | // register, and if it does that, it may keep it as a long double instead |
| 1223 | // of a double. This long double may have extra bits that make it compare |
| 1224 | // unequal to "value" even though it would be exactly equal if it were |
| 1225 | // truncated to a double. |
| 1226 | volatile double parsed_value = strtod(buffer, NULL); |
| 1227 | if (parsed_value != value) { |
| 1228 | int snprintf_result = |
| 1229 | snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); |
| 1230 | |
| 1231 | // Should never overflow; see above. |
| 1232 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
| 1233 | } |
| 1234 | |
| 1235 | DelocalizeRadix(buffer); |
| 1236 | return buffer; |
| 1237 | } |
| 1238 | |
| 1239 | bool safe_strtof(const char* str, float* value) { |
| 1240 | char* endptr; |
| 1241 | errno = 0; // errno only gets set on errors |
kenton@google.com | 3aa7a0d | 2009-08-17 20:34:29 +0000 | [diff] [blame] | 1242 | #if defined(_WIN32) || defined (__hpux) // has no strtof() |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 1243 | *value = strtod(str, &endptr); |
| 1244 | #else |
| 1245 | *value = strtof(str, &endptr); |
| 1246 | #endif |
| 1247 | return *str != 0 && *endptr == 0 && errno == 0; |
| 1248 | } |
| 1249 | |
| 1250 | char* FloatToBuffer(float value, char* buffer) { |
| 1251 | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
| 1252 | // platforms these days. Just in case some system exists where FLT_DIG |
| 1253 | // is significantly larger -- and risks overflowing our buffer -- we have |
| 1254 | // this assert. |
| 1255 | GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); |
| 1256 | |
| 1257 | if (value == numeric_limits<double>::infinity()) { |
| 1258 | strcpy(buffer, "inf"); |
| 1259 | return buffer; |
| 1260 | } else if (value == -numeric_limits<double>::infinity()) { |
| 1261 | strcpy(buffer, "-inf"); |
| 1262 | return buffer; |
| 1263 | } else if (IsNaN(value)) { |
| 1264 | strcpy(buffer, "nan"); |
| 1265 | return buffer; |
| 1266 | } |
| 1267 | |
| 1268 | int snprintf_result = |
| 1269 | snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); |
| 1270 | |
| 1271 | // The snprintf should never overflow because the buffer is significantly |
| 1272 | // larger than the precision we asked for. |
| 1273 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
| 1274 | |
| 1275 | float parsed_value; |
| 1276 | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
| 1277 | int snprintf_result = |
| 1278 | snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value); |
| 1279 | |
| 1280 | // Should never overflow; see above. |
| 1281 | GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
| 1282 | } |
| 1283 | |
| 1284 | DelocalizeRadix(buffer); |
| 1285 | return buffer; |
| 1286 | } |
| 1287 | |
jieluo@google.com | 4de8f55 | 2014-07-18 00:47:59 +0000 | [diff] [blame] | 1288 | string ToHex(uint64 num) { |
| 1289 | if (num == 0) { |
| 1290 | return string("0"); |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 1291 | } |
| 1292 | |
jieluo@google.com | 4de8f55 | 2014-07-18 00:47:59 +0000 | [diff] [blame] | 1293 | // Compute hex bytes in reverse order, writing to the back of the |
| 1294 | // buffer. |
| 1295 | char buf[16]; // No more than 16 hex digits needed. |
| 1296 | char* bufptr = buf + 16; |
| 1297 | static const char kHexChars[] = "0123456789abcdef"; |
| 1298 | while (num != 0) { |
| 1299 | *--bufptr = kHexChars[num & 0xf]; |
| 1300 | num >>= 4; |
| 1301 | } |
| 1302 | |
| 1303 | return string(bufptr, buf + 16 - bufptr); |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 1304 | } |
| 1305 | |
Jisi Liu | 885b612 | 2015-02-28 14:51:22 -0800 | [diff] [blame^] | 1306 | namespace strings { |
| 1307 | |
| 1308 | AlphaNum::AlphaNum(strings::Hex hex) { |
| 1309 | char *const end = &digits[kFastToBufferSize]; |
| 1310 | char *writer = end; |
| 1311 | uint64 value = hex.value; |
| 1312 | uint64 width = hex.spec; |
| 1313 | // We accomplish minimum width by OR'ing in 0x10000 to the user's value, |
| 1314 | // where 0x10000 is the smallest hex number that is as wide as the user |
| 1315 | // asked for. |
| 1316 | uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value; |
| 1317 | static const char hexdigits[] = "0123456789abcdef"; |
| 1318 | do { |
| 1319 | *--writer = hexdigits[value & 0xF]; |
| 1320 | value >>= 4; |
| 1321 | mask >>= 4; |
| 1322 | } while (mask != 0); |
| 1323 | piece_data_ = writer; |
| 1324 | piece_size_ = end - writer; |
| 1325 | } |
| 1326 | |
| 1327 | } // namespace strings |
| 1328 | |
| 1329 | // ---------------------------------------------------------------------- |
| 1330 | // StrCat() |
| 1331 | // This merges the given strings or integers, with no delimiter. This |
| 1332 | // is designed to be the fastest possible way to construct a string out |
| 1333 | // of a mix of raw C strings, C++ strings, and integer values. |
| 1334 | // ---------------------------------------------------------------------- |
| 1335 | |
| 1336 | // Append is merely a version of memcpy that returns the address of the byte |
| 1337 | // after the area just overwritten. It comes in multiple flavors to minimize |
| 1338 | // call overhead. |
| 1339 | static char *Append1(char *out, const AlphaNum &x) { |
| 1340 | memcpy(out, x.data(), x.size()); |
| 1341 | return out + x.size(); |
| 1342 | } |
| 1343 | |
| 1344 | static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) { |
| 1345 | memcpy(out, x1.data(), x1.size()); |
| 1346 | out += x1.size(); |
| 1347 | |
| 1348 | memcpy(out, x2.data(), x2.size()); |
| 1349 | return out + x2.size(); |
| 1350 | } |
| 1351 | |
| 1352 | static char *Append4(char *out, |
| 1353 | const AlphaNum &x1, const AlphaNum &x2, |
| 1354 | const AlphaNum &x3, const AlphaNum &x4) { |
| 1355 | memcpy(out, x1.data(), x1.size()); |
| 1356 | out += x1.size(); |
| 1357 | |
| 1358 | memcpy(out, x2.data(), x2.size()); |
| 1359 | out += x2.size(); |
| 1360 | |
| 1361 | memcpy(out, x3.data(), x3.size()); |
| 1362 | out += x3.size(); |
| 1363 | |
| 1364 | memcpy(out, x4.data(), x4.size()); |
| 1365 | return out + x4.size(); |
| 1366 | } |
| 1367 | |
| 1368 | string StrCat(const AlphaNum &a, const AlphaNum &b) { |
| 1369 | string result; |
| 1370 | result.resize(a.size() + b.size()); |
| 1371 | char *const begin = &*result.begin(); |
| 1372 | char *out = Append2(begin, a, b); |
| 1373 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1374 | return result; |
| 1375 | } |
| 1376 | |
| 1377 | string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { |
| 1378 | string result; |
| 1379 | result.resize(a.size() + b.size() + c.size()); |
| 1380 | char *const begin = &*result.begin(); |
| 1381 | char *out = Append2(begin, a, b); |
| 1382 | out = Append1(out, c); |
| 1383 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1384 | return result; |
| 1385 | } |
| 1386 | |
| 1387 | string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| 1388 | const AlphaNum &d) { |
| 1389 | string result; |
| 1390 | result.resize(a.size() + b.size() + c.size() + d.size()); |
| 1391 | char *const begin = &*result.begin(); |
| 1392 | char *out = Append4(begin, a, b, c, d); |
| 1393 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1394 | return result; |
| 1395 | } |
| 1396 | |
| 1397 | string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| 1398 | const AlphaNum &d, const AlphaNum &e) { |
| 1399 | string result; |
| 1400 | result.resize(a.size() + b.size() + c.size() + d.size() + e.size()); |
| 1401 | char *const begin = &*result.begin(); |
| 1402 | char *out = Append4(begin, a, b, c, d); |
| 1403 | out = Append1(out, e); |
| 1404 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1405 | return result; |
| 1406 | } |
| 1407 | |
| 1408 | string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| 1409 | const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) { |
| 1410 | string result; |
| 1411 | result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| 1412 | f.size()); |
| 1413 | char *const begin = &*result.begin(); |
| 1414 | char *out = Append4(begin, a, b, c, d); |
| 1415 | out = Append2(out, e, f); |
| 1416 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1417 | return result; |
| 1418 | } |
| 1419 | |
| 1420 | string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| 1421 | const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, |
| 1422 | const AlphaNum &g) { |
| 1423 | string result; |
| 1424 | result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| 1425 | f.size() + g.size()); |
| 1426 | char *const begin = &*result.begin(); |
| 1427 | char *out = Append4(begin, a, b, c, d); |
| 1428 | out = Append2(out, e, f); |
| 1429 | out = Append1(out, g); |
| 1430 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1431 | return result; |
| 1432 | } |
| 1433 | |
| 1434 | string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| 1435 | const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, |
| 1436 | const AlphaNum &g, const AlphaNum &h) { |
| 1437 | string result; |
| 1438 | result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| 1439 | f.size() + g.size() + h.size()); |
| 1440 | char *const begin = &*result.begin(); |
| 1441 | char *out = Append4(begin, a, b, c, d); |
| 1442 | out = Append4(out, e, f, g, h); |
| 1443 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1444 | return result; |
| 1445 | } |
| 1446 | |
| 1447 | string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, |
| 1448 | const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, |
| 1449 | const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) { |
| 1450 | string result; |
| 1451 | result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + |
| 1452 | f.size() + g.size() + h.size() + i.size()); |
| 1453 | char *const begin = &*result.begin(); |
| 1454 | char *out = Append4(begin, a, b, c, d); |
| 1455 | out = Append4(out, e, f, g, h); |
| 1456 | out = Append1(out, i); |
| 1457 | GOOGLE_DCHECK_EQ(out, begin + result.size()); |
| 1458 | return result; |
| 1459 | } |
| 1460 | |
| 1461 | // It's possible to call StrAppend with a char * pointer that is partway into |
| 1462 | // the string we're appending to. However the results of this are random. |
| 1463 | // Therefore, check for this in debug mode. Use unsigned math so we only have |
| 1464 | // to do one comparison. |
| 1465 | #define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \ |
| 1466 | GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \ |
| 1467 | uintptr_t((dest).size())) |
| 1468 | |
| 1469 | void StrAppend(string *result, const AlphaNum &a) { |
| 1470 | GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| 1471 | result->append(a.data(), a.size()); |
| 1472 | } |
| 1473 | |
| 1474 | void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) { |
| 1475 | GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| 1476 | GOOGLE_DCHECK_NO_OVERLAP(*result, b); |
| 1477 | string::size_type old_size = result->size(); |
| 1478 | result->resize(old_size + a.size() + b.size()); |
| 1479 | char *const begin = &*result->begin(); |
| 1480 | char *out = Append2(begin + old_size, a, b); |
| 1481 | GOOGLE_DCHECK_EQ(out, begin + result->size()); |
| 1482 | } |
| 1483 | |
| 1484 | void StrAppend(string *result, |
| 1485 | const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { |
| 1486 | GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| 1487 | GOOGLE_DCHECK_NO_OVERLAP(*result, b); |
| 1488 | GOOGLE_DCHECK_NO_OVERLAP(*result, c); |
| 1489 | string::size_type old_size = result->size(); |
| 1490 | result->resize(old_size + a.size() + b.size() + c.size()); |
| 1491 | char *const begin = &*result->begin(); |
| 1492 | char *out = Append2(begin + old_size, a, b); |
| 1493 | out = Append1(out, c); |
| 1494 | GOOGLE_DCHECK_EQ(out, begin + result->size()); |
| 1495 | } |
| 1496 | |
| 1497 | void StrAppend(string *result, |
| 1498 | const AlphaNum &a, const AlphaNum &b, |
| 1499 | const AlphaNum &c, const AlphaNum &d) { |
| 1500 | GOOGLE_DCHECK_NO_OVERLAP(*result, a); |
| 1501 | GOOGLE_DCHECK_NO_OVERLAP(*result, b); |
| 1502 | GOOGLE_DCHECK_NO_OVERLAP(*result, c); |
| 1503 | GOOGLE_DCHECK_NO_OVERLAP(*result, d); |
| 1504 | string::size_type old_size = result->size(); |
| 1505 | result->resize(old_size + a.size() + b.size() + c.size() + d.size()); |
| 1506 | char *const begin = &*result->begin(); |
| 1507 | char *out = Append4(begin + old_size, a, b, c, d); |
| 1508 | GOOGLE_DCHECK_EQ(out, begin + result->size()); |
| 1509 | } |
| 1510 | |
Feng Xiao | 6ef984a | 2014-11-10 17:34:54 -0800 | [diff] [blame] | 1511 | int GlobalReplaceSubstring(const string& substring, |
| 1512 | const string& replacement, |
| 1513 | string* s) { |
| 1514 | GOOGLE_CHECK(s != NULL); |
| 1515 | if (s->empty() || substring.empty()) |
| 1516 | return 0; |
| 1517 | string tmp; |
| 1518 | int num_replacements = 0; |
| 1519 | int pos = 0; |
| 1520 | for (int match_pos = s->find(substring.data(), pos, substring.length()); |
| 1521 | match_pos != string::npos; |
| 1522 | pos = match_pos + substring.length(), |
| 1523 | match_pos = s->find(substring.data(), pos, substring.length())) { |
| 1524 | ++num_replacements; |
| 1525 | // Append the original content before the match. |
| 1526 | tmp.append(*s, pos, match_pos - pos); |
| 1527 | // Append the replacement for the match. |
| 1528 | tmp.append(replacement.begin(), replacement.end()); |
| 1529 | } |
| 1530 | // Append the content after the last match. If no replacements were made, the |
| 1531 | // original string is left untouched. |
| 1532 | if (num_replacements > 0) { |
| 1533 | tmp.append(*s, pos, s->length() - pos); |
| 1534 | s->swap(tmp); |
| 1535 | } |
| 1536 | return num_replacements; |
| 1537 | } |
| 1538 | |
temporal | 40ee551 | 2008-07-10 02:12:20 +0000 | [diff] [blame] | 1539 | } // namespace protobuf |
| 1540 | } // namespace google |