license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 4 | // |
| 5 | // This file defines utility functions for working with strings. |
| 6 | |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 7 | #ifndef BASE_STRING_UTIL_H_ |
| 8 | #define BASE_STRING_UTIL_H_ |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 9 | |
| 10 | #include <string> |
| 11 | #include <vector> |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 12 | #include <stdarg.h> // va_list |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 13 | |
| 14 | #include "base/basictypes.h" |
brettw@google.com | fa49905 | 2008-08-08 05:27:57 +0900 | [diff] [blame] | 15 | #include "base/string16.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 16 | |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 17 | // Safe standard library wrappers for all platforms. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 18 | |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 19 | namespace base { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 20 | |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 21 | // C standard-library functions like "strncasecmp" and "snprintf" that aren't |
| 22 | // cross-platform are provided as "base::strncasecmp", and their prototypes |
| 23 | // are listed below. These functions are then implemented as inline calls |
| 24 | // to the platform-specific equivalents in the platform-specific headers. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 25 | |
| 26 | // Compare up to count characters of s1 and s2 without regard to case using |
| 27 | // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
| 28 | // s2 > s1 according to a lexicographic comparison. |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 29 | int strncasecmp(const char* s1, const char* s2, size_t count); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 30 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 31 | // Wrapper for vsnprintf that always null-terminates and always returns the |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 32 | // number of characters that would be in an untruncated formatted |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 33 | // string, even when truncation occurs. |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 34 | int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 35 | |
mmentovai@google.com | 38cabad | 2008-08-13 10:17:18 +0900 | [diff] [blame] | 36 | // vswprintf always null-terminates, but when truncation occurs, it will either |
| 37 | // return -1 or the number of characters that would be in an untruncated |
| 38 | // formatted string. The actual return value depends on the underlying |
| 39 | // C library's vswprintf implementation. |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 40 | int vswprintf(wchar_t* buffer, size_t size, |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 41 | const wchar_t* format, va_list arguments); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 42 | |
| 43 | // Some of these implementations need to be inlined. |
| 44 | |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 45 | inline int snprintf(char* buffer, size_t size, const char* format, ...) { |
| 46 | va_list arguments; |
| 47 | va_start(arguments, format); |
| 48 | int result = vsnprintf(buffer, size, format, arguments); |
| 49 | va_end(arguments); |
| 50 | return result; |
| 51 | } |
| 52 | |
| 53 | inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { |
| 54 | va_list arguments; |
| 55 | va_start(arguments, format); |
| 56 | int result = vswprintf(buffer, size, format, arguments); |
| 57 | va_end(arguments); |
| 58 | return result; |
| 59 | } |
| 60 | |
deanm@google.com | b533521 | 2008-08-13 23:33:40 +0900 | [diff] [blame] | 61 | // BSD-style safe and consistent string copy functions. |
| 62 | // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. |
| 63 | // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as |
| 64 | // long as |dst_size| is not 0. Returns the length of |src| in characters. |
| 65 | // If the return value is >= dst_size, then the output was truncated. |
| 66 | // NOTE: All sizes are in number of characters, NOT in bytes. |
| 67 | size_t strlcpy(char* dst, const char* src, size_t dst_size); |
| 68 | size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); |
| 69 | |
mmentovai@google.com | 8ae0c2c | 2008-08-14 10:25:32 +0900 | [diff] [blame] | 70 | // Scan a wprintf format string to determine whether it's portable across a |
| 71 | // variety of systems. This function only checks that the conversion |
| 72 | // specifiers used by the format string are supported and have the same meaning |
| 73 | // on a variety of systems. It doesn't check for other errors that might occur |
| 74 | // within a format string. |
| 75 | // |
| 76 | // Nonportable conversion specifiers for wprintf are: |
| 77 | // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char |
| 78 | // data on all systems except Windows, which treat them as wchar_t data. |
| 79 | // Use %ls and %lc for wchar_t data instead. |
| 80 | // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, |
| 81 | // which treat them as char data. Use %ls and %lc for wchar_t data |
| 82 | // instead. |
| 83 | // - 'F', which is not identified by Windows wprintf documentation. |
| 84 | // - 'D', 'O', and 'U', which are deprecated and not available on all systems. |
| 85 | // Use %ld, %lo, and %lu instead. |
| 86 | // |
| 87 | // Note that there is no portable conversion specifier for char data when |
| 88 | // working with wprintf. |
| 89 | // |
| 90 | // This function is intended to be called from base::vswprintf. |
| 91 | bool IsWprintfFormatPortable(const wchar_t* format); |
| 92 | |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 93 | } // namespace base |
| 94 | |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 95 | #if defined(OS_WIN) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 96 | #include "base/string_util_win.h" |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 97 | #elif defined(OS_POSIX) |
| 98 | #include "base/string_util_posix.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 99 | #else |
| 100 | #error Define string operations appropriately for your platform |
| 101 | #endif |
| 102 | |
evanm@google.com | b8a8f37 | 2008-08-08 04:26:37 +0900 | [diff] [blame] | 103 | // Old names for the above string functions, kept for compatibility. |
| 104 | // TODO(evanm): excise all references to these old names. |
| 105 | #define StrNCaseCmp base::strncasecmp |
| 106 | #define SWPrintF base::swprintf |
| 107 | #define VSNPrintF base::vsnprintf |
| 108 | #define SNPrintF base::snprintf |
| 109 | #define SWPrintF base::swprintf |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 110 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 111 | |
| 112 | // Returns a reference to a globally unique empty string that functions can |
| 113 | // return. Use this to avoid static construction of strings, not to replace |
| 114 | // any and all uses of "std::string()" as nicer-looking sugar. |
| 115 | // These functions are threadsafe. |
| 116 | const std::string& EmptyString(); |
| 117 | const std::wstring& EmptyWString(); |
| 118 | |
| 119 | extern const wchar_t kWhitespaceWide[]; |
| 120 | extern const char kWhitespaceASCII[]; |
| 121 | |
| 122 | // Names of codepages (charsets) understood by icu. |
| 123 | extern const char* const kCodepageUTF8; |
| 124 | |
| 125 | // Removes characters in trim_chars from the beginning and end of input. |
| 126 | // NOTE: Safe to use the same variable for both input and output. |
| 127 | bool TrimString(const std::wstring& input, |
evanm@google.com | 6dacc03 | 2008-08-21 08:41:20 +0900 | [diff] [blame] | 128 | const wchar_t trim_chars[], |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 129 | std::wstring* output); |
| 130 | bool TrimString(const std::string& input, |
evanm@google.com | 6dacc03 | 2008-08-21 08:41:20 +0900 | [diff] [blame] | 131 | const char trim_chars[], |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 132 | std::string* output); |
| 133 | |
| 134 | // Trims any whitespace from either end of the input string. Returns where |
| 135 | // whitespace was found. The non-wide version of this function only looks for |
| 136 | // ASCII whitespace; UTF-8 code-points are not searched for (use the wide |
| 137 | // version instead). |
| 138 | // NOTE: Safe to use the same variable for both input and output. |
| 139 | enum TrimPositions { |
| 140 | TRIM_NONE = 0, |
| 141 | TRIM_LEADING = 1 << 0, |
| 142 | TRIM_TRAILING = 1 << 1, |
| 143 | TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
| 144 | }; |
| 145 | TrimPositions TrimWhitespace(const std::wstring& input, |
| 146 | TrimPositions positions, |
| 147 | std::wstring* output); |
| 148 | TrimPositions TrimWhitespace(const std::string& input, |
| 149 | TrimPositions positions, |
| 150 | std::string* output); |
| 151 | |
| 152 | // Searches for CR or LF characters. Removes all contiguous whitespace |
| 153 | // strings that contain them. This is useful when trying to deal with text |
| 154 | // copied from terminals. |
| 155 | // Returns |text, with the following three transformations: |
| 156 | // (1) Leading and trailing whitespace is trimmed. |
| 157 | // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace |
| 158 | // sequences containing a CR or LF are trimmed. |
| 159 | // (3) All other whitespace sequences are converted to single spaces. |
| 160 | std::wstring CollapseWhitespace(const std::wstring& text, |
| 161 | bool trim_sequences_with_line_breaks); |
| 162 | |
| 163 | // These convert between ASCII (7-bit) and UTF16 strings. |
| 164 | std::string WideToASCII(const std::wstring& wide); |
| 165 | std::wstring ASCIIToWide(const std::string& ascii); |
| 166 | |
brettw@google.com | fa49905 | 2008-08-08 05:27:57 +0900 | [diff] [blame] | 167 | // These convert between UTF-8, -16, and -32 strings. They are potentially slow, |
| 168 | // so avoid unnecessary conversions. The low-level versions return a boolean |
| 169 | // indicating whether the conversion was 100% valid. In this case, it will still |
| 170 | // do the best it can and put the result in the output buffer. The versions that |
| 171 | // return strings ignore this error and just return the best conversion |
| 172 | // possible. |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 173 | bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 174 | std::string WideToUTF8(const std::wstring& wide); |
brettw@google.com | fed55ab | 2008-08-08 00:29:49 +0900 | [diff] [blame] | 175 | bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 176 | std::wstring UTF8ToWide(const std::string& utf8); |
| 177 | |
brettw@google.com | fa49905 | 2008-08-08 05:27:57 +0900 | [diff] [blame] | 178 | bool WideToUTF16(const wchar_t* src, size_t src_len, std::string16* output); |
| 179 | std::string16 WideToUTF16(const std::wstring& wide); |
| 180 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output); |
| 181 | std::wstring UTF16ToWide(const std::string16& utf8); |
| 182 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 183 | // Defines the error handling modes of WideToCodepage and CodepageToWide. |
| 184 | class OnStringUtilConversionError { |
| 185 | public: |
| 186 | enum Type { |
| 187 | // The function will return failure. The output buffer will be empty. |
| 188 | FAIL, |
| 189 | |
| 190 | // The offending characters are skipped and the conversion will proceed as |
| 191 | // if they did not exist. |
| 192 | SKIP, |
| 193 | }; |
| 194 | |
| 195 | private: |
| 196 | OnStringUtilConversionError(); |
| 197 | }; |
| 198 | |
| 199 | // Converts between wide strings and the encoding specified. If the |
| 200 | // encoding doesn't exist or the encoding fails (when on_error is FAIL), |
| 201 | // returns false. |
| 202 | bool WideToCodepage(const std::wstring& wide, |
| 203 | const char* codepage_name, |
| 204 | OnStringUtilConversionError::Type on_error, |
| 205 | std::string* encoded); |
| 206 | bool CodepageToWide(const std::string& encoded, |
| 207 | const char* codepage_name, |
| 208 | OnStringUtilConversionError::Type on_error, |
| 209 | std::wstring* wide); |
| 210 | |
| 211 | // Converts the given wide string to the corresponding Latin1. This will fail |
| 212 | // (return false) if any characters are more than 255. |
| 213 | bool WideToLatin1(const std::wstring& wide, std::string* latin1); |
| 214 | |
| 215 | // Returns true if the specified string matches the criteria. How can a wide |
| 216 | // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
| 217 | // first case) or characters that use only 8-bits and whose 8-bit |
| 218 | // representation looks like a UTF-8 string (the second case). |
| 219 | bool IsString8Bit(const std::wstring& str); |
| 220 | bool IsStringUTF8(const char* str); |
| 221 | bool IsStringWideUTF8(const wchar_t* str); |
| 222 | bool IsStringASCII(const std::wstring& str); |
| 223 | bool IsStringASCII(const std::string& str); |
| 224 | |
| 225 | // ASCII-specific tolower. The standard library's tolower is locale sensitive, |
| 226 | // so we don't want to use it here. |
| 227 | template <class Char> inline Char ToLowerASCII(Char c) { |
| 228 | return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; |
| 229 | } |
| 230 | |
| 231 | // Converts the elements of the given string. This version uses a pointer to |
| 232 | // clearly differentiate it from the non-pointer variant. |
| 233 | template <class str> inline void StringToLowerASCII(str* s) { |
| 234 | for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
| 235 | *i = ToLowerASCII(*i); |
| 236 | } |
| 237 | |
| 238 | template <class str> inline str StringToLowerASCII(const str& s) { |
| 239 | // for std::string and std::wstring |
| 240 | str output(s); |
| 241 | StringToLowerASCII(&output); |
| 242 | return output; |
| 243 | } |
| 244 | |
| 245 | // Compare the lower-case form of the given string against the given ASCII |
| 246 | // string. This is useful for doing checking if an input string matches some |
| 247 | // token, and it is optimized to avoid intermediate string copies. This API is |
| 248 | // borrowed from the equivalent APIs in Mozilla. |
| 249 | bool LowerCaseEqualsASCII(const std::string& a, const char* b); |
| 250 | bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); |
| 251 | |
| 252 | // Same thing, but with string iterators instead. |
| 253 | bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, |
| 254 | std::string::const_iterator a_end, |
| 255 | const char* b); |
| 256 | bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, |
| 257 | std::wstring::const_iterator a_end, |
| 258 | const char* b); |
| 259 | bool LowerCaseEqualsASCII(const char* a_begin, |
| 260 | const char* a_end, |
| 261 | const char* b); |
| 262 | bool LowerCaseEqualsASCII(const wchar_t* a_begin, |
| 263 | const wchar_t* a_end, |
| 264 | const char* b); |
| 265 | |
| 266 | // Returns true if str starts with search, or false otherwise. |
| 267 | // This only works on ASCII strings. |
| 268 | bool StartsWithASCII(const std::string& str, |
| 269 | const std::string& search, |
| 270 | bool case_sensitive); |
| 271 | |
| 272 | // Determines the type of ASCII character, independent of locale (the C |
| 273 | // library versions will change based on locale). |
| 274 | template <typename Char> |
| 275 | inline bool IsAsciiWhitespace(Char c) { |
| 276 | return c == ' ' || c == '\r' || c == '\n' || c == '\t'; |
| 277 | } |
| 278 | template <typename Char> |
| 279 | inline bool IsAsciiAlpha(Char c) { |
| 280 | return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); |
| 281 | } |
| 282 | template <typename Char> |
| 283 | inline bool IsAsciiDigit(Char c) { |
| 284 | return c >= '0' && c <= '9'; |
| 285 | } |
| 286 | |
| 287 | // Returns true if it's a whitespace character. |
| 288 | inline bool IsWhitespace(wchar_t c) { |
| 289 | return wcschr(kWhitespaceWide, c) != NULL; |
| 290 | } |
| 291 | |
| 292 | // TODO(mpcomplete): Decide if we should change these names to KIBI, etc, |
| 293 | // or if we should actually use metric units, or leave as is. |
| 294 | enum DataUnits { |
| 295 | DATA_UNITS_BYTE = 0, |
| 296 | DATA_UNITS_KILOBYTE, |
| 297 | DATA_UNITS_MEGABYTE, |
| 298 | DATA_UNITS_GIGABYTE, |
| 299 | }; |
| 300 | |
| 301 | // Return the unit type that is appropriate for displaying the amount of bytes |
| 302 | // passed in. |
| 303 | DataUnits GetByteDisplayUnits(int64 bytes); |
| 304 | |
| 305 | // Return a byte string in human-readable format, displayed in units appropriate |
| 306 | // specified by 'units', with an optional unit suffix. |
| 307 | // Ex: FormatBytes(512, DATA_UNITS_KILOBYTE, true) => "0.5 KB" |
| 308 | // Ex: FormatBytes(10*1024, DATA_UNITS_MEGABYTE, false) => "0.1" |
| 309 | std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units); |
| 310 | |
| 311 | // As above, but with "/s" units. |
| 312 | // Ex: FormatSpeed(512, DATA_UNITS_KILOBYTE, true) => "0.5 KB/s" |
| 313 | // Ex: FormatSpeed(10*1024, DATA_UNITS_MEGABYTE, false) => "0.1" |
| 314 | std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units); |
| 315 | |
| 316 | // Return a number formated with separators in the user's locale way. |
| 317 | // Ex: FormatNumber(1234567) => 1,234,567 |
| 318 | std::wstring FormatNumber(int64 number); |
| 319 | |
| 320 | // Starting at |start_offset| (usually 0), look through |str| and replace all |
| 321 | // instances of |find_this| with |replace_with|. |
| 322 | // |
| 323 | // This does entire substrings; use std::replace in <algorithm> for single |
| 324 | // characters, for example: |
| 325 | // std::replace(str.begin(), str.end(), 'a', 'b'); |
| 326 | void ReplaceSubstringsAfterOffset(std::wstring* str, |
| 327 | std::wstring::size_type start_offset, |
| 328 | const std::wstring& find_this, |
| 329 | const std::wstring& replace_with); |
| 330 | void ReplaceSubstringsAfterOffset(std::string* str, |
| 331 | std::string::size_type start_offset, |
| 332 | const std::string& find_this, |
| 333 | const std::string& replace_with); |
| 334 | |
| 335 | // Specialized string-conversion functions. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 336 | std::string IntToString(int value); |
deanm@google.com | d649845 | 2008-08-13 20:09:33 +0900 | [diff] [blame] | 337 | std::wstring IntToWString(int value); |
| 338 | std::string UintToString(unsigned int value); |
| 339 | std::wstring UintToWString(unsigned int value); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 340 | std::string Int64ToString(int64 value); |
| 341 | std::wstring Int64ToWString(int64 value); |
deanm@google.com | d649845 | 2008-08-13 20:09:33 +0900 | [diff] [blame] | 342 | std::string Uint64ToString(uint64 value); |
| 343 | std::wstring Uint64ToWString(uint64 value); |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 344 | |
| 345 | // Perform a best-effort conversion of the input string to a numeric type, |
| 346 | // setting |*output| to the result of the conversion. Returns true for |
| 347 | // "perfect" conversions; returns false in the following cases: |
| 348 | // - Overflow/underflow. |*output| will be set to the maximum value supported |
| 349 | // by the data type. |
| 350 | // - Trailing characters in the string after parsing the number. |*output| |
| 351 | // will be set to the value of the number that was parsed. |
| 352 | // - No characters parseable as a number at the beginning of the string. |
| 353 | // |*output| will be set to 0. |
| 354 | // - Empty string. |*output| will be set to 0. |
| 355 | bool StringToInt(const std::string& input, int* output); |
| 356 | bool StringToInt(const std::wstring& input, int* output); |
| 357 | bool StringToInt64(const std::string& input, int64* output); |
| 358 | bool StringToInt64(const std::wstring& input, int64* output); |
| 359 | bool HexStringToInt(const std::string& input, int* output); |
| 360 | bool HexStringToInt(const std::wstring& input, int* output); |
| 361 | |
mmentovai@google.com | 8dcf71c | 2008-08-08 02:15:41 +0900 | [diff] [blame] | 362 | // For floating-point conversions, only conversions of input strings in decimal |
| 363 | // form are defined to work. Behavior with strings representing floating-point |
| 364 | // numbers in hexadecimal, and strings representing non-fininte values (such |
| 365 | // as NaN and inf) is undefined. Otherwise, these behave the same as the |
| 366 | // integral variants above. |
| 367 | bool StringToDouble(const std::string& input, double* output); |
| 368 | bool StringToDouble(const std::wstring& input, double* output); |
| 369 | |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 370 | // Convenience forms of the above, when the caller is uninterested in the |
| 371 | // boolean return value. These return only the |*output| value from the |
| 372 | // above conversions: a best-effort conversion when possible, otherwise, 0. |
| 373 | int StringToInt(const std::string& value); |
| 374 | int StringToInt(const std::wstring& value); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 375 | int64 StringToInt64(const std::string& value); |
| 376 | int64 StringToInt64(const std::wstring& value); |
mmentovai@google.com | 9328568 | 2008-08-06 07:46:15 +0900 | [diff] [blame] | 377 | int HexStringToInt(const std::string& value); |
| 378 | int HexStringToInt(const std::wstring& value); |
mmentovai@google.com | 8dcf71c | 2008-08-08 02:15:41 +0900 | [diff] [blame] | 379 | double StringToDouble(const std::string& value); |
| 380 | double StringToDouble(const std::wstring& value); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 381 | |
| 382 | // Return a C++ string given printf-like input. |
| 383 | std::string StringPrintf(const char* format, ...); |
| 384 | std::wstring StringPrintf(const wchar_t* format, ...); |
| 385 | |
| 386 | // Store result into a supplied string and return it |
| 387 | const std::string& SStringPrintf(std::string* dst, const char* format, ...); |
| 388 | const std::wstring& SStringPrintf(std::wstring* dst, |
| 389 | const wchar_t* format, ...); |
| 390 | |
| 391 | // Append result to a supplied string |
| 392 | void StringAppendF(std::string* dst, const char* format, ...); |
| 393 | void StringAppendF(std::wstring* dst, const wchar_t* format, ...); |
| 394 | |
| 395 | // Lower-level routine that takes a va_list and appends to a specified |
| 396 | // string. All other routines are just convenience wrappers around it. |
| 397 | void StringAppendV(std::string* dst, const char* format, va_list ap); |
| 398 | void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap); |
| 399 | |
| 400 | // This is mpcomplete's pattern for saving a string copy when dealing with |
| 401 | // a function that writes results into a wchar_t[] and wanting the result to |
| 402 | // end up in a std::wstring. It ensures that the std::wstring's internal |
| 403 | // buffer has enough room to store the characters to be written into it, and |
| 404 | // sets its .length() attribute to the right value. |
| 405 | // |
| 406 | // The reserve() call allocates the memory required to hold the string |
| 407 | // plus a terminating null. This is done because resize() isn't |
| 408 | // guaranteed to reserve space for the null. The resize() call is |
| 409 | // simply the only way to change the string's 'length' member. |
| 410 | // |
| 411 | // XXX-performance: the call to wide.resize() takes linear time, since it fills |
| 412 | // the string's buffer with nulls. I call it to change the length of the |
| 413 | // string (needed because writing directly to the buffer doesn't do this). |
| 414 | // Perhaps there's a constant-time way to change the string's length. |
| 415 | template <class char_type> |
| 416 | inline char_type* WriteInto( |
| 417 | std::basic_string<char_type, std::char_traits<char_type>, |
| 418 | std::allocator<char_type> >* str, |
| 419 | size_t length_including_null) { |
| 420 | str->reserve(length_including_null); |
| 421 | str->resize(length_including_null - 1); |
| 422 | return &((*str)[0]); |
| 423 | } |
| 424 | |
| 425 | //----------------------------------------------------------------------------- |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 426 | |
| 427 | // Function objects to aid in comparing/searching strings. |
| 428 | |
| 429 | template<typename Char> struct CaseInsensitiveCompare { |
| 430 | public: |
| 431 | bool operator()(Char x, Char y) const { |
| 432 | return tolower(x) == tolower(y); |
| 433 | } |
| 434 | }; |
| 435 | |
| 436 | template<typename Char> struct CaseInsensitiveCompareASCII { |
| 437 | public: |
| 438 | bool operator()(Char x, Char y) const { |
| 439 | return ToLowerASCII(x) == ToLowerASCII(y); |
| 440 | } |
| 441 | }; |
| 442 | |
| 443 | //----------------------------------------------------------------------------- |
| 444 | |
| 445 | // Splits |str| into a vector of strings delimited by |s|. Append the results |
| 446 | // into |r| as they appear. If several instances of |s| are contiguous, or if |
| 447 | // |str| begins with or ends with |s|, then an empty string is inserted. |
| 448 | // |
| 449 | // Every substring is trimmed of any leading or trailing white space. |
| 450 | void SplitString(const std::wstring& str, |
| 451 | wchar_t s, |
| 452 | std::vector<std::wstring>* r); |
| 453 | void SplitString(const std::string& str, |
| 454 | char s, |
| 455 | std::vector<std::string>* r); |
| 456 | |
| 457 | // The same as SplitString, but don't trim white space. |
| 458 | void SplitStringDontTrim(const std::wstring& str, |
| 459 | wchar_t s, |
| 460 | std::vector<std::wstring>* r); |
| 461 | void SplitStringDontTrim(const std::string& str, |
| 462 | char s, |
| 463 | std::vector<std::string>* r); |
| 464 | |
| 465 | // WARNING: this uses whitespace as defined by the HTML5 spec. If you need |
| 466 | // a function similar to this but want to trim all types of whitespace, then |
| 467 | // factor this out into a function that takes a string containing the characters |
| 468 | // that are treated as whitespace. |
| 469 | // |
| 470 | // Splits the string along whitespace (where whitespace is the five space |
| 471 | // characters defined by HTML 5). Each contiguous block of non-whitespace |
| 472 | // characters is added to result. |
| 473 | void SplitStringAlongWhitespace(const std::wstring& str, |
| 474 | std::vector<std::wstring>* result); |
| 475 | |
| 476 | // Replace $1-$2-$3 in the format string with |a| and |b| respectively. |
| 477 | // Additionally, $$ is replaced by $. The offset/offsets parameter here can be |
| 478 | // NULL. |
| 479 | std::wstring ReplaceStringPlaceholders(const std::wstring& format_string, |
| 480 | const std::wstring& a, |
| 481 | size_t* offset); |
| 482 | |
| 483 | std::wstring ReplaceStringPlaceholders(const std::wstring& format_string, |
| 484 | const std::wstring& a, |
| 485 | const std::wstring& b, |
| 486 | std::vector<size_t>* offsets); |
| 487 | |
| 488 | std::wstring ReplaceStringPlaceholders(const std::wstring& format_string, |
| 489 | const std::wstring& a, |
| 490 | const std::wstring& b, |
| 491 | const std::wstring& c, |
| 492 | std::vector<size_t>* offsets); |
| 493 | |
| 494 | std::wstring ReplaceStringPlaceholders(const std::wstring& format_string, |
| 495 | const std::wstring& a, |
| 496 | const std::wstring& b, |
| 497 | const std::wstring& c, |
| 498 | const std::wstring& d, |
| 499 | std::vector<size_t>* offsets); |
| 500 | |
| 501 | // Returns true if the string passed in matches the pattern. The pattern |
| 502 | // string can contain wildcards like * and ? |
| 503 | // TODO(iyengar) This function may not work correctly for CJK strings as |
| 504 | // it does individual character matches. |
| 505 | // The backslash character (\) is an escape character for * and ? |
| 506 | bool MatchPattern(const std::wstring& string, const std::wstring& pattern); |
| 507 | bool MatchPattern(const std::string& string, const std::string& pattern); |
| 508 | |
brettw@google.com | e3c034a | 2008-08-08 03:31:40 +0900 | [diff] [blame] | 509 | #endif // BASE_STRING_UTIL_H_ |
license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 510 | |