blob: 5a45f275e497633d6140539fa3872385750f808e [file] [log] [blame]
license.botf003cfe2008-08-24 09:55:55 +09001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
mmentovai@google.com93285682008-08-06 07:46:15 +09004
initial.commit3f4a7322008-07-27 06:49:38 +09005#include "base/string_util.h"
6
tc@google.com866a31a2008-11-13 08:19:30 +09007#include "build/build_config.h"
8
mmentovai@google.com93285682008-08-06 07:46:15 +09009#include <ctype.h>
10#include <errno.h>
initial.commit3f4a7322008-07-27 06:49:38 +090011#include <math.h>
12#include <stdarg.h>
13#include <stdio.h>
mmentovai@google.com93285682008-08-06 07:46:15 +090014#include <stdlib.h>
initial.commit3f4a7322008-07-27 06:49:38 +090015#include <string.h>
16#include <time.h>
mmentovai@google.com93285682008-08-06 07:46:15 +090017#include <wchar.h>
18#include <wctype.h>
19
20#include <algorithm>
initial.commit3f4a7322008-07-27 06:49:38 +090021#include <vector>
22
23#include "base/basictypes.h"
24#include "base/logging.h"
initial.commit3f4a7322008-07-27 06:49:38 +090025#include "base/singleton.h"
tony@chromium.org7523d992008-11-15 10:40:31 +090026#include "base/third_party/dmg_fp/dmg_fp.h"
initial.commit3f4a7322008-07-27 06:49:38 +090027
28namespace {
29
brettw@chromium.org84620612009-03-04 14:29:27 +090030// Force the singleton used by Empty[W]String[16] to be a unique type. This
31// prevents other code that might accidentally use Singleton<string> from
32// getting our internal one.
33struct EmptyStrings {
34 EmptyStrings() {}
35 const std::string s;
36 const std::wstring ws;
37 const string16 s16;
38};
39
initial.commit3f4a7322008-07-27 06:49:38 +090040// Used by ReplaceStringPlaceholders to track the position in the string of
41// replaced parameters.
42struct ReplacementOffset {
gregoryd@google.com3734a872009-11-07 08:24:09 +090043 ReplacementOffset(uintptr_t parameter, size_t offset)
initial.commit3f4a7322008-07-27 06:49:38 +090044 : parameter(parameter),
45 offset(offset) {}
46
47 // Index of the parameter.
gregoryd@google.com3734a872009-11-07 08:24:09 +090048 uintptr_t parameter;
initial.commit3f4a7322008-07-27 06:49:38 +090049
50 // Starting position in the string.
51 size_t offset;
52};
53
54static bool CompareParameter(const ReplacementOffset& elem1,
55 const ReplacementOffset& elem2) {
56 return elem1.parameter < elem2.parameter;
57}
58
mmentovai@google.com93285682008-08-06 07:46:15 +090059// Generalized string-to-number conversion.
60//
61// StringToNumberTraits should provide:
62// - a typedef for string_type, the STL string type used as input.
63// - a typedef for value_type, the target numeric type.
64// - a static function, convert_func, which dispatches to an appropriate
65// strtol-like function and returns type value_type.
66// - a static function, valid_func, which validates |input| and returns a bool
67// indicating whether it is in proper form. This is used to check for
68// conditions that convert_func tolerates but should result in
69// StringToNumber returning false. For strtol-like funtions, valid_func
70// should check for leading whitespace.
71template<typename StringToNumberTraits>
72bool StringToNumber(const typename StringToNumberTraits::string_type& input,
tc@google.comfbc20492008-11-18 09:14:28 +090073 typename StringToNumberTraits::value_type* output) {
mmentovai@google.com93285682008-08-06 07:46:15 +090074 typedef StringToNumberTraits traits;
75
76 errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows.
77 typename traits::string_type::value_type* endptr = NULL;
78 typename traits::value_type value = traits::convert_func(input.c_str(),
tc@google.comfbc20492008-11-18 09:14:28 +090079 &endptr);
mmentovai@google.com93285682008-08-06 07:46:15 +090080 *output = value;
81
82 // Cases to return false:
83 // - If errno is ERANGE, there was an overflow or underflow.
84 // - If the input string is empty, there was nothing to parse.
85 // - If endptr does not point to the end of the string, there are either
86 // characters remaining in the string after a parsed number, or the string
87 // does not begin with a parseable number. endptr is compared to the
88 // expected end given the string's stated length to correctly catch cases
89 // where the string contains embedded NUL characters.
90 // - valid_func determines that the input is not in preferred form.
91 return errno == 0 &&
92 !input.empty() &&
93 input.c_str() + input.length() == endptr &&
94 traits::valid_func(input);
95}
96
deanm@chromium.org57612e02009-07-29 02:34:43 +090097static int strtoi(const char *nptr, char **endptr, int base) {
98 long res = strtol(nptr, endptr, base);
99#if __LP64__
100 // Long is 64-bits, we have to handle under/overflow ourselves.
101 if (res > kint32max) {
102 res = kint32max;
103 errno = ERANGE;
104 } else if (res < kint32min) {
105 res = kint32min;
106 errno = ERANGE;
107 }
108#endif
109 return static_cast<int>(res);
110}
111
112static unsigned int strtoui(const char *nptr, char **endptr, int base) {
113 unsigned long res = strtoul(nptr, endptr, base);
114#if __LP64__
115 // Long is 64-bits, we have to handle under/overflow ourselves. Test to see
116 // if the result can fit into 32-bits (as signed or unsigned).
117 if (static_cast<int>(static_cast<long>(res)) != static_cast<long>(res) &&
118 static_cast<unsigned int>(res) != res) {
119 res = kuint32max;
120 errno = ERANGE;
121 }
122#endif
123 return static_cast<unsigned int>(res);
124}
125
126class StringToIntTraits {
mmentovai@google.com93285682008-08-06 07:46:15 +0900127 public:
128 typedef std::string string_type;
deanm@chromium.org57612e02009-07-29 02:34:43 +0900129 typedef int value_type;
mmentovai@google.com93285682008-08-06 07:46:15 +0900130 static const int kBase = 10;
131 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900132 string_type::value_type** endptr) {
deanm@chromium.org57612e02009-07-29 02:34:43 +0900133 return strtoi(str, endptr, kBase);
mmentovai@google.com93285682008-08-06 07:46:15 +0900134 }
135 static inline bool valid_func(const string_type& str) {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900136 return !str.empty() && !isspace(str[0]);
mmentovai@google.com93285682008-08-06 07:46:15 +0900137 }
138};
139
deanm@chromium.org57612e02009-07-29 02:34:43 +0900140class String16ToIntTraits {
mmentovai@google.com93285682008-08-06 07:46:15 +0900141 public:
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900142 typedef string16 string_type;
deanm@chromium.org57612e02009-07-29 02:34:43 +0900143 typedef int value_type;
mmentovai@google.com93285682008-08-06 07:46:15 +0900144 static const int kBase = 10;
145 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900146 string_type::value_type** endptr) {
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900147#if defined(WCHAR_T_IS_UTF16)
mmentovai@google.com93285682008-08-06 07:46:15 +0900148 return wcstol(str, endptr, kBase);
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900149#elif defined(WCHAR_T_IS_UTF32)
150 std::string ascii_string = UTF16ToASCII(string16(str));
151 char* ascii_end = NULL;
deanm@chromium.org57612e02009-07-29 02:34:43 +0900152 value_type ret = strtoi(ascii_string.c_str(), &ascii_end, kBase);
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900153 if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
154 *endptr =
155 const_cast<string_type::value_type*>(str) + ascii_string.length();
156 }
157 return ret;
158#endif
mmentovai@google.com93285682008-08-06 07:46:15 +0900159 }
160 static inline bool valid_func(const string_type& str) {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900161 return !str.empty() && !iswspace(str[0]);
mmentovai@google.com93285682008-08-06 07:46:15 +0900162 }
163};
164
165class StringToInt64Traits {
166 public:
167 typedef std::string string_type;
168 typedef int64 value_type;
169 static const int kBase = 10;
170 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900171 string_type::value_type** endptr) {
mmentovai@google.com93285682008-08-06 07:46:15 +0900172#ifdef OS_WIN
173 return _strtoi64(str, endptr, kBase);
174#else // assume OS_POSIX
175 return strtoll(str, endptr, kBase);
176#endif
177 }
178 static inline bool valid_func(const string_type& str) {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900179 return !str.empty() && !isspace(str[0]);
mmentovai@google.com93285682008-08-06 07:46:15 +0900180 }
181};
182
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900183class String16ToInt64Traits {
mmentovai@google.com93285682008-08-06 07:46:15 +0900184 public:
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900185 typedef string16 string_type;
mmentovai@google.com93285682008-08-06 07:46:15 +0900186 typedef int64 value_type;
187 static const int kBase = 10;
188 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900189 string_type::value_type** endptr) {
mmentovai@google.com93285682008-08-06 07:46:15 +0900190#ifdef OS_WIN
191 return _wcstoi64(str, endptr, kBase);
192#else // assume OS_POSIX
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900193 std::string ascii_string = UTF16ToASCII(string16(str));
194 char* ascii_end = NULL;
195 value_type ret = strtoll(ascii_string.c_str(), &ascii_end, kBase);
196 if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
197 *endptr =
198 const_cast<string_type::value_type*>(str) + ascii_string.length();
199 }
200 return ret;
mmentovai@google.com93285682008-08-06 07:46:15 +0900201#endif
202 }
203 static inline bool valid_func(const string_type& str) {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900204 return !str.empty() && !iswspace(str[0]);
mmentovai@google.com93285682008-08-06 07:46:15 +0900205 }
206};
207
208// For the HexString variants, use the unsigned variants like strtoul for
209// convert_func so that input like "0x80000000" doesn't result in an overflow.
210
deanm@chromium.org57612e02009-07-29 02:34:43 +0900211class HexStringToIntTraits {
mmentovai@google.com93285682008-08-06 07:46:15 +0900212 public:
213 typedef std::string string_type;
deanm@chromium.org57612e02009-07-29 02:34:43 +0900214 typedef int value_type;
mmentovai@google.com93285682008-08-06 07:46:15 +0900215 static const int kBase = 16;
216 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900217 string_type::value_type** endptr) {
deanm@chromium.org57612e02009-07-29 02:34:43 +0900218 return strtoui(str, endptr, kBase);
mmentovai@google.com93285682008-08-06 07:46:15 +0900219 }
220 static inline bool valid_func(const string_type& str) {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900221 return !str.empty() && !isspace(str[0]);
mmentovai@google.com93285682008-08-06 07:46:15 +0900222 }
223};
224
deanm@chromium.org57612e02009-07-29 02:34:43 +0900225class HexString16ToIntTraits {
mmentovai@google.com93285682008-08-06 07:46:15 +0900226 public:
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900227 typedef string16 string_type;
deanm@chromium.org57612e02009-07-29 02:34:43 +0900228 typedef int value_type;
mmentovai@google.com93285682008-08-06 07:46:15 +0900229 static const int kBase = 16;
230 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900231 string_type::value_type** endptr) {
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900232#if defined(WCHAR_T_IS_UTF16)
mmentovai@google.com93285682008-08-06 07:46:15 +0900233 return wcstoul(str, endptr, kBase);
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900234#elif defined(WCHAR_T_IS_UTF32)
235 std::string ascii_string = UTF16ToASCII(string16(str));
236 char* ascii_end = NULL;
deanm@chromium.org57612e02009-07-29 02:34:43 +0900237 value_type ret = strtoui(ascii_string.c_str(), &ascii_end, kBase);
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900238 if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
239 *endptr =
240 const_cast<string_type::value_type*>(str) + ascii_string.length();
241 }
242 return ret;
243#endif
mmentovai@google.com93285682008-08-06 07:46:15 +0900244 }
245 static inline bool valid_func(const string_type& str) {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900246 return !str.empty() && !iswspace(str[0]);
247 }
248};
249
250class StringToDoubleTraits {
251 public:
252 typedef std::string string_type;
253 typedef double value_type;
254 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900255 string_type::value_type** endptr) {
256 return dmg_fp::strtod(str, endptr);
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900257 }
258 static inline bool valid_func(const string_type& str) {
259 return !str.empty() && !isspace(str[0]);
260 }
261};
262
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900263class String16ToDoubleTraits {
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900264 public:
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900265 typedef string16 string_type;
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900266 typedef double value_type;
267 static inline value_type convert_func(const string_type::value_type* str,
tc@google.comfbc20492008-11-18 09:14:28 +0900268 string_type::value_type** endptr) {
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900269 // Because dmg_fp::strtod does not like char16, we convert it to ASCII.
270 // In theory, this should be safe, but it's possible that 16-bit chars
tc@google.comfbc20492008-11-18 09:14:28 +0900271 // might get ignored by accident causing something to be parsed when it
272 // shouldn't.
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900273 std::string ascii_string = UTF16ToASCII(string16(str));
tc@google.comfbc20492008-11-18 09:14:28 +0900274 char* ascii_end = NULL;
275 value_type ret = dmg_fp::strtod(ascii_string.c_str(), &ascii_end);
276 if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
277 // Put endptr at end of input string, so it's not recognized as an error.
dsh@google.com3b0f5f22009-02-28 10:01:50 +0900278 *endptr =
279 const_cast<string_type::value_type*>(str) + ascii_string.length();
tc@google.com866a31a2008-11-13 08:19:30 +0900280 }
tc@google.comfbc20492008-11-18 09:14:28 +0900281
282 return ret;
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +0900283 }
284 static inline bool valid_func(const string_type& str) {
285 return !str.empty() && !iswspace(str[0]);
mmentovai@google.com93285682008-08-06 07:46:15 +0900286 }
287};
288
initial.commit3f4a7322008-07-27 06:49:38 +0900289} // namespace
290
291
mmentovai@google.com8ae0c2c2008-08-14 10:25:32 +0900292namespace base {
293
294bool IsWprintfFormatPortable(const wchar_t* format) {
295 for (const wchar_t* position = format; *position != '\0'; ++position) {
296
297 if (*position == '%') {
298 bool in_specification = true;
299 bool modifier_l = false;
300 while (in_specification) {
301 // Eat up characters until reaching a known specifier.
302 if (*++position == '\0') {
303 // The format string ended in the middle of a specification. Call
304 // it portable because no unportable specifications were found. The
305 // string is equally broken on all platforms.
306 return true;
307 }
308
309 if (*position == 'l') {
310 // 'l' is the only thing that can save the 's' and 'c' specifiers.
311 modifier_l = true;
312 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
313 *position == 'S' || *position == 'C' || *position == 'F' ||
314 *position == 'D' || *position == 'O' || *position == 'U') {
315 // Not portable.
316 return false;
317 }
318
319 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
320 // Portable, keep scanning the rest of the format string.
321 in_specification = false;
322 }
323 }
324 }
325
326 }
327
328 return true;
329}
330
tc@google.com866a31a2008-11-13 08:19:30 +0900331
mmentovai@google.com8ae0c2c2008-08-14 10:25:32 +0900332} // namespace base
333
334
initial.commit3f4a7322008-07-27 06:49:38 +0900335const std::string& EmptyString() {
brettw@chromium.org84620612009-03-04 14:29:27 +0900336 return Singleton<EmptyStrings>::get()->s;
initial.commit3f4a7322008-07-27 06:49:38 +0900337}
338
339const std::wstring& EmptyWString() {
brettw@chromium.org84620612009-03-04 14:29:27 +0900340 return Singleton<EmptyStrings>::get()->ws;
341}
342
343const string16& EmptyString16() {
344 return Singleton<EmptyStrings>::get()->s16;
initial.commit3f4a7322008-07-27 06:49:38 +0900345}
346
darin@chromium.org00f3a0b2009-10-15 05:31:49 +0900347#define WHITESPACE_UNICODE \
348 0x0009, /* <control-0009> to <control-000D> */ \
349 0x000A, \
350 0x000B, \
351 0x000C, \
352 0x000D, \
353 0x0020, /* Space */ \
354 0x0085, /* <control-0085> */ \
355 0x00A0, /* No-Break Space */ \
356 0x1680, /* Ogham Space Mark */ \
357 0x180E, /* Mongolian Vowel Separator */ \
358 0x2000, /* En Quad to Hair Space */ \
359 0x2001, \
360 0x2002, \
361 0x2003, \
362 0x2004, \
363 0x2005, \
364 0x2006, \
365 0x2007, \
366 0x2008, \
367 0x2009, \
368 0x200A, \
369 0x200C, /* Zero Width Non-Joiner */ \
370 0x2028, /* Line Separator */ \
371 0x2029, /* Paragraph Separator */ \
372 0x202F, /* Narrow No-Break Space */ \
373 0x205F, /* Medium Mathematical Space */ \
374 0x3000, /* Ideographic Space */ \
darin@chromium.orgd4b16ec2009-10-15 04:14:29 +0900375 0
darin@chromium.org00f3a0b2009-10-15 05:31:49 +0900376
377const wchar_t kWhitespaceWide[] = {
378 WHITESPACE_UNICODE
379};
380const char16 kWhitespaceUTF16[] = {
381 WHITESPACE_UNICODE
initial.commit3f4a7322008-07-27 06:49:38 +0900382};
383const char kWhitespaceASCII[] = {
384 0x09, // <control-0009> to <control-000D>
385 0x0A,
386 0x0B,
387 0x0C,
388 0x0D,
389 0x20, // Space
initial.commit3f4a7322008-07-27 06:49:38 +0900390 0
391};
initial.commit3f4a7322008-07-27 06:49:38 +0900392
393template<typename STR>
394TrimPositions TrimStringT(const STR& input,
395 const typename STR::value_type trim_chars[],
396 TrimPositions positions,
397 STR* output) {
398 // Find the edges of leading/trailing whitespace as desired.
399 const typename STR::size_type last_char = input.length() - 1;
400 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
401 input.find_first_not_of(trim_chars) : 0;
402 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
403 input.find_last_not_of(trim_chars) : last_char;
404
405 // When the string was all whitespace, report that we stripped off whitespace
406 // from whichever position the caller was interested in. For empty input, we
407 // stripped no whitespace, but we still need to clear |output|.
408 if (input.empty() ||
409 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
410 bool input_was_empty = input.empty(); // in case output == &input
411 output->clear();
412 return input_was_empty ? TRIM_NONE : positions;
413 }
414
415 // Trim the whitespace.
416 *output =
417 input.substr(first_good_char, last_good_char - first_good_char + 1);
418
419 // Return where we trimmed from.
420 return static_cast<TrimPositions>(
421 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
422 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
423}
424
425bool TrimString(const std::wstring& input,
evanm@google.com6dacc032008-08-21 08:41:20 +0900426 const wchar_t trim_chars[],
initial.commit3f4a7322008-07-27 06:49:38 +0900427 std::wstring* output) {
428 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
429}
430
darin@chromium.org00f3a0b2009-10-15 05:31:49 +0900431#if !defined(WCHAR_T_IS_UTF16)
432bool TrimString(const string16& input,
433 const char16 trim_chars[],
434 string16* output) {
435 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
436}
437#endif
438
initial.commit3f4a7322008-07-27 06:49:38 +0900439bool TrimString(const std::string& input,
evanm@google.com6dacc032008-08-21 08:41:20 +0900440 const char trim_chars[],
initial.commit3f4a7322008-07-27 06:49:38 +0900441 std::string* output) {
442 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
443}
444
445TrimPositions TrimWhitespace(const std::wstring& input,
446 TrimPositions positions,
447 std::wstring* output) {
448 return TrimStringT(input, kWhitespaceWide, positions, output);
449}
450
darin@chromium.org00f3a0b2009-10-15 05:31:49 +0900451#if !defined(WCHAR_T_IS_UTF16)
452TrimPositions TrimWhitespace(const string16& input,
453 TrimPositions positions,
454 string16* output) {
455 return TrimStringT(input, kWhitespaceUTF16, positions, output);
456}
457#endif
458
hbono@chromium.org51f8eb32009-03-05 12:41:51 +0900459TrimPositions TrimWhitespaceASCII(const std::string& input,
460 TrimPositions positions,
461 std::string* output) {
462 return TrimStringT(input, kWhitespaceASCII, positions, output);
463}
464
465// This function is only for backward-compatibility.
466// To be removed when all callers are updated.
initial.commit3f4a7322008-07-27 06:49:38 +0900467TrimPositions TrimWhitespace(const std::string& input,
468 TrimPositions positions,
469 std::string* output) {
hbono@chromium.org51f8eb32009-03-05 12:41:51 +0900470 return TrimWhitespaceASCII(input, positions, output);
initial.commit3f4a7322008-07-27 06:49:38 +0900471}
472
rafaelw@chromium.org75189cd2009-06-05 08:10:39 +0900473template<typename STR>
474STR CollapseWhitespaceT(const STR& text,
475 bool trim_sequences_with_line_breaks) {
476 STR result;
initial.commit3f4a7322008-07-27 06:49:38 +0900477 result.resize(text.size());
478
479 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
480 // will trim any leading whitespace.
481 bool in_whitespace = true;
482 bool already_trimmed = true;
483
484 int chars_written = 0;
rafaelw@chromium.org75189cd2009-06-05 08:10:39 +0900485 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +0900486 if (IsWhitespace(*i)) {
487 if (!in_whitespace) {
488 // Reduce all whitespace sequences to a single space.
489 in_whitespace = true;
490 result[chars_written++] = L' ';
491 }
492 if (trim_sequences_with_line_breaks && !already_trimmed &&
493 ((*i == '\n') || (*i == '\r'))) {
494 // Whitespace sequences containing CR or LF are eliminated entirely.
495 already_trimmed = true;
496 --chars_written;
497 }
498 } else {
499 // Non-whitespace chracters are copied straight across.
500 in_whitespace = false;
501 already_trimmed = false;
502 result[chars_written++] = *i;
503 }
504 }
505
506 if (in_whitespace && !already_trimmed) {
507 // Any trailing whitespace is eliminated.
508 --chars_written;
509 }
510
511 result.resize(chars_written);
512 return result;
513}
514
rafaelw@chromium.org75189cd2009-06-05 08:10:39 +0900515std::wstring CollapseWhitespace(const std::wstring& text,
516 bool trim_sequences_with_line_breaks) {
517 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
518}
519
darin@chromium.org00f3a0b2009-10-15 05:31:49 +0900520#if !defined(WCHAR_T_IS_UTF16)
521string16 CollapseWhitespace(const string16& text,
522 bool trim_sequences_with_line_breaks) {
523 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
524}
525#endif
526
rafaelw@chromium.org75189cd2009-06-05 08:10:39 +0900527std::string CollapseWhitespaceASCII(const std::string& text,
528 bool trim_sequences_with_line_breaks) {
529 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
530}
531
initial.commit3f4a7322008-07-27 06:49:38 +0900532std::string WideToASCII(const std::wstring& wide) {
phajdan.jr@chromium.org198cbc82009-08-21 07:44:02 +0900533 DCHECK(IsStringASCII(wide)) << wide;
initial.commit3f4a7322008-07-27 06:49:38 +0900534 return std::string(wide.begin(), wide.end());
535}
536
tony@chromium.orgb84e9bd2009-09-11 06:08:39 +0900537std::wstring ASCIIToWide(const base::StringPiece& ascii) {
phajdan.jr@chromium.org198cbc82009-08-21 07:44:02 +0900538 DCHECK(IsStringASCII(ascii)) << ascii;
initial.commit3f4a7322008-07-27 06:49:38 +0900539 return std::wstring(ascii.begin(), ascii.end());
540}
541
kuchhal@chromium.org38ab3002009-02-27 08:18:17 +0900542std::string UTF16ToASCII(const string16& utf16) {
phajdan.jr@chromium.org198cbc82009-08-21 07:44:02 +0900543 DCHECK(IsStringASCII(utf16)) << utf16;
kuchhal@chromium.org38ab3002009-02-27 08:18:17 +0900544 return std::string(utf16.begin(), utf16.end());
545}
546
tony@chromium.orgb84e9bd2009-09-11 06:08:39 +0900547string16 ASCIIToUTF16(const base::StringPiece& ascii) {
phajdan.jr@chromium.org198cbc82009-08-21 07:44:02 +0900548 DCHECK(IsStringASCII(ascii)) << ascii;
kuchhal@chromium.org38ab3002009-02-27 08:18:17 +0900549 return string16(ascii.begin(), ascii.end());
550}
551
initial.commit3f4a7322008-07-27 06:49:38 +0900552// Latin1 is just the low range of Unicode, so we can copy directly to convert.
553bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
554 std::string output;
555 output.resize(wide.size());
556 latin1->clear();
557 for (size_t i = 0; i < wide.size(); i++) {
558 if (wide[i] > 255)
559 return false;
560 output[i] = static_cast<char>(wide[i]);
561 }
562 latin1->swap(output);
563 return true;
564}
565
566bool IsString8Bit(const std::wstring& str) {
567 for (size_t i = 0; i < str.length(); i++) {
568 if (str[i] > 255)
569 return false;
570 }
571 return true;
572}
573
kuchhal@chromium.org38ab3002009-02-27 08:18:17 +0900574template<class STR>
575static bool DoIsStringASCII(const STR& str) {
initial.commit3f4a7322008-07-27 06:49:38 +0900576 for (size_t i = 0; i < str.length(); i++) {
kuchhal@chromium.org38ab3002009-02-27 08:18:17 +0900577 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
578 if (c > 0x7F)
initial.commit3f4a7322008-07-27 06:49:38 +0900579 return false;
580 }
581 return true;
582}
583
kuchhal@chromium.org38ab3002009-02-27 08:18:17 +0900584bool IsStringASCII(const std::wstring& str) {
585 return DoIsStringASCII(str);
586}
587
588#if !defined(WCHAR_T_IS_UTF16)
589bool IsStringASCII(const string16& str) {
590 return DoIsStringASCII(str);
591}
592#endif
593
tony@chromium.orgb84e9bd2009-09-11 06:08:39 +0900594bool IsStringASCII(const base::StringPiece& str) {
kuchhal@chromium.org38ab3002009-02-27 08:18:17 +0900595 return DoIsStringASCII(str);
initial.commit3f4a7322008-07-27 06:49:38 +0900596}
597
598// Helper functions that determine whether the given character begins a
599// UTF-8 sequence of bytes with the given length. A character satisfies
600// "IsInUTF8Sequence" if it is anything but the first byte in a multi-byte
601// character.
602static inline bool IsBegin2ByteUTF8(int c) {
603 return (c & 0xE0) == 0xC0;
604}
605static inline bool IsBegin3ByteUTF8(int c) {
606 return (c & 0xF0) == 0xE0;
607}
608static inline bool IsBegin4ByteUTF8(int c) {
609 return (c & 0xF8) == 0xF0;
610}
611static inline bool IsInUTF8Sequence(int c) {
612 return (c & 0xC0) == 0x80;
613}
614
615// This function was copied from Mozilla, with modifications. The original code
616// was 'IsUTF8' in xpcom/string/src/nsReadableUtils.cpp. The license block for
617// this function is:
618// This function subject to the Mozilla Public License Version
619// 1.1 (the "License"); you may not use this code except in compliance with
620// the License. You may obtain a copy of the License at
621// http://www.mozilla.org/MPL/
622//
623// Software distributed under the License is distributed on an "AS IS" basis,
624// WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
625// for the specific language governing rights and limitations under the
626// License.
627//
628// The Original Code is mozilla.org code.
629//
630// The Initial Developer of the Original Code is
631// Netscape Communications Corporation.
632// Portions created by the Initial Developer are Copyright (C) 2000
633// the Initial Developer. All Rights Reserved.
634//
635// Contributor(s):
636// Scott Collins <scc@mozilla.org> (original author)
637//
638// This is a template so that it can be run on wide and 8-bit strings. We want
639// to run it on wide strings when we have input that we think may have
640// originally been UTF-8, but has been converted to wide characters because
641// that's what we (and Windows) use internally.
642template<typename CHAR>
gregoryd@google.com3734a872009-11-07 08:24:09 +0900643static bool IsStringUTF8T(const CHAR* str, size_t length) {
initial.commit3f4a7322008-07-27 06:49:38 +0900644 bool overlong = false;
645 bool surrogate = false;
646 bool nonchar = false;
647
648 // overlong byte upper bound
649 typename ToUnsigned<CHAR>::Unsigned olupper = 0;
650
651 // surrogate byte lower bound
652 typename ToUnsigned<CHAR>::Unsigned slower = 0;
653
654 // incremented when inside a multi-byte char to indicate how many bytes
655 // are left in the sequence
656 int positions_left = 0;
657
gregoryd@google.com3734a872009-11-07 08:24:09 +0900658 for (uintptr_t i = 0; i < length; i++) {
initial.commit3f4a7322008-07-27 06:49:38 +0900659 // This whole function assume an unsigned value so force its conversion to
660 // an unsigned value.
661 typename ToUnsigned<CHAR>::Unsigned c = str[i];
662 if (c < 0x80)
663 continue; // ASCII
664
665 if (c <= 0xC1) {
666 // [80-BF] where not expected, [C0-C1] for overlong
667 return false;
668 } else if (IsBegin2ByteUTF8(c)) {
669 positions_left = 1;
670 } else if (IsBegin3ByteUTF8(c)) {
671 positions_left = 2;
672 if (c == 0xE0) {
673 // to exclude E0[80-9F][80-BF]
674 overlong = true;
675 olupper = 0x9F;
676 } else if (c == 0xED) {
677 // ED[A0-BF][80-BF]: surrogate codepoint
678 surrogate = true;
679 slower = 0xA0;
680 } else if (c == 0xEF) {
681 // EF BF [BE-BF] : non-character
jungshik@google.com37790f32008-09-26 06:42:00 +0900682 // TODO(jungshik): EF B7 [90-AF] should be checked as well.
initial.commit3f4a7322008-07-27 06:49:38 +0900683 nonchar = true;
684 }
685 } else if (c <= 0xF4) {
686 positions_left = 3;
687 nonchar = true;
688 if (c == 0xF0) {
689 // to exclude F0[80-8F][80-BF]{2}
690 overlong = true;
691 olupper = 0x8F;
692 } else if (c == 0xF4) {
693 // to exclude F4[90-BF][80-BF]
694 // actually not surrogates but codepoints beyond 0x10FFFF
695 surrogate = true;
696 slower = 0x90;
697 }
698 } else {
699 return false;
700 }
701
702 // eat the rest of this multi-byte character
703 while (positions_left) {
704 positions_left--;
705 i++;
706 c = str[i];
707 if (!c)
708 return false; // end of string but not end of character sequence
709
710 // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
dkegel@google.comfc934f12008-08-22 05:29:49 +0900711 if (nonchar && ((!positions_left && c < 0xBE) ||
712 (positions_left == 1 && c != 0xBF) ||
713 (positions_left == 2 && 0x0F != (0x0F & c) ))) {
initial.commit3f4a7322008-07-27 06:49:38 +0900714 nonchar = false;
715 }
dkegel@google.comfc934f12008-08-22 05:29:49 +0900716 if (!IsInUTF8Sequence(c) || (overlong && c <= olupper) ||
717 (surrogate && slower <= c) || (nonchar && !positions_left) ) {
initial.commit3f4a7322008-07-27 06:49:38 +0900718 return false;
719 }
720 overlong = surrogate = false;
721 }
722 }
723 return true;
724}
725
jungshik@google.com37790f32008-09-26 06:42:00 +0900726bool IsStringUTF8(const std::string& str) {
727 return IsStringUTF8T(str.data(), str.length());
initial.commit3f4a7322008-07-27 06:49:38 +0900728}
729
jungshik@google.com37790f32008-09-26 06:42:00 +0900730bool IsStringWideUTF8(const std::wstring& str) {
731 return IsStringUTF8T(str.data(), str.length());
initial.commit3f4a7322008-07-27 06:49:38 +0900732}
733
734template<typename Iter>
735static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
736 Iter a_end,
737 const char* b) {
738 for (Iter it = a_begin; it != a_end; ++it, ++b) {
739 if (!*b || ToLowerASCII(*it) != *b)
740 return false;
741 }
742 return *b == 0;
743}
744
745// Front-ends for LowerCaseEqualsASCII.
746bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
747 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
748}
749
750bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
751 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
752}
753
darin@chromium.org82899522009-10-16 08:22:56 +0900754#if !defined(WCHAR_T_IS_UTF16)
755bool LowerCaseEqualsASCII(const string16& a, const char* b) {
756 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
757}
758#endif
759
initial.commit3f4a7322008-07-27 06:49:38 +0900760bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
761 std::string::const_iterator a_end,
762 const char* b) {
763 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
764}
765
766bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
767 std::wstring::const_iterator a_end,
768 const char* b) {
769 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
770}
darin@chromium.org82899522009-10-16 08:22:56 +0900771
772#if !defined(WCHAR_T_IS_UTF16)
773bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
774 string16::const_iterator a_end,
775 const char* b) {
776 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
777}
778#endif
779
initial.commit3f4a7322008-07-27 06:49:38 +0900780bool LowerCaseEqualsASCII(const char* a_begin,
781 const char* a_end,
782 const char* b) {
783 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
784}
darin@chromium.org82899522009-10-16 08:22:56 +0900785
initial.commit3f4a7322008-07-27 06:49:38 +0900786bool LowerCaseEqualsASCII(const wchar_t* a_begin,
787 const wchar_t* a_end,
788 const char* b) {
789 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
790}
791
darin@chromium.org82899522009-10-16 08:22:56 +0900792#if !defined(WCHAR_T_IS_UTF16)
793bool LowerCaseEqualsASCII(const char16* a_begin,
794 const char16* a_end,
795 const char* b) {
796 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
797}
798#endif
799
tony@chromium.orgb84e9bd2009-09-11 06:08:39 +0900800bool EqualsASCII(const string16& a, const base::StringPiece& b) {
estade@chromium.orgf311bc92009-05-14 10:05:27 +0900801 if (a.length() != b.length())
802 return false;
803 return std::equal(b.begin(), b.end(), a.begin());
804}
805
initial.commit3f4a7322008-07-27 06:49:38 +0900806bool StartsWithASCII(const std::string& str,
807 const std::string& search,
808 bool case_sensitive) {
809 if (case_sensitive)
810 return str.compare(0, search.length(), search) == 0;
811 else
evanm@google.comb8a8f372008-08-08 04:26:37 +0900812 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
initial.commit3f4a7322008-07-27 06:49:38 +0900813}
814
darin@chromium.org82899522009-10-16 08:22:56 +0900815template <typename STR>
816bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
jcampan@chromium.org8bf9c5a2008-11-06 04:28:08 +0900817 if (case_sensitive)
818 return str.compare(0, search.length(), search) == 0;
819 else {
820 if (search.size() > str.size())
821 return false;
822 return std::equal(search.begin(), search.end(), str.begin(),
darin@chromium.org82899522009-10-16 08:22:56 +0900823 CaseInsensitiveCompare<typename STR::value_type>());
jcampan@chromium.org8bf9c5a2008-11-06 04:28:08 +0900824 }
825}
826
darin@chromium.org82899522009-10-16 08:22:56 +0900827bool StartsWith(const std::wstring& str, const std::wstring& search,
828 bool case_sensitive) {
829 return StartsWithT(str, search, case_sensitive);
830}
831
832#if !defined(WCHAR_T_IS_UTF16)
833bool StartsWith(const string16& str, const string16& search,
834 bool case_sensitive) {
835 return StartsWithT(str, search, case_sensitive);
836}
837#endif
838
839template <typename STR>
840bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
841 typename STR::size_type str_length = str.length();
842 typename STR::size_type search_length = search.length();
stuartmorgan@google.com21f185c2009-09-05 04:12:37 +0900843 if (search_length > str_length)
844 return false;
845 if (case_sensitive) {
846 return str.compare(str_length - search_length, search_length, search) == 0;
847 } else {
848 return std::equal(search.begin(), search.end(),
849 str.begin() + (str_length - search_length),
darin@chromium.org82899522009-10-16 08:22:56 +0900850 CaseInsensitiveCompare<typename STR::value_type>());
stuartmorgan@google.com21f185c2009-09-05 04:12:37 +0900851 }
852}
853
aa@chromium.org51add8d2009-11-04 11:15:20 +0900854bool EndsWith(const std::string& str, const std::string& search,
855 bool case_sensitive) {
856 return EndsWithT(str, search, case_sensitive);
857}
858
darin@chromium.org82899522009-10-16 08:22:56 +0900859bool EndsWith(const std::wstring& str, const std::wstring& search,
860 bool case_sensitive) {
861 return EndsWithT(str, search, case_sensitive);
862}
863
864#if !defined(WCHAR_T_IS_UTF16)
865bool EndsWith(const string16& str, const string16& search,
866 bool case_sensitive) {
867 return EndsWithT(str, search, case_sensitive);
868}
869#endif
870
initial.commit3f4a7322008-07-27 06:49:38 +0900871DataUnits GetByteDisplayUnits(int64 bytes) {
872 // The byte thresholds at which we display amounts. A byte count is displayed
873 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
874 // This must match the DataUnits enum.
875 static const int64 kUnitThresholds[] = {
876 0, // DATA_UNITS_BYTE,
877 3*1024, // DATA_UNITS_KILOBYTE,
878 2*1024*1024, // DATA_UNITS_MEGABYTE,
879 1024*1024*1024 // DATA_UNITS_GIGABYTE,
880 };
881
882 if (bytes < 0) {
883 NOTREACHED() << "Negative bytes value";
884 return DATA_UNITS_BYTE;
885 }
886
887 int unit_index = arraysize(kUnitThresholds);
888 while (--unit_index > 0) {
889 if (bytes >= kUnitThresholds[unit_index])
890 break;
891 }
892
893 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIGABYTE);
894 return DataUnits(unit_index);
895}
896
897// TODO(mpcomplete): deal with locale
898// Byte suffixes. This must match the DataUnits enum.
899static const wchar_t* const kByteStrings[] = {
900 L"B",
901 L"kB",
902 L"MB",
903 L"GB"
904};
905
906static const wchar_t* const kSpeedStrings[] = {
907 L"B/s",
908 L"kB/s",
909 L"MB/s",
910 L"GB/s"
911};
912
913std::wstring FormatBytesInternal(int64 bytes,
914 DataUnits units,
915 bool show_units,
916 const wchar_t* const* suffix) {
917 if (bytes < 0) {
918 NOTREACHED() << "Negative bytes value";
919 return std::wstring();
920 }
921
922 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIGABYTE);
923
924 // Put the quantity in the right units.
925 double unit_amount = static_cast<double>(bytes);
926 for (int i = 0; i < units; ++i)
927 unit_amount /= 1024.0;
928
929 wchar_t tmp[64];
930 // If the first decimal digit is 0, don't show it.
931 double int_part;
932 double fractional_part = modf(unit_amount, &int_part);
933 modf(fractional_part * 10, &int_part);
evanm@google.comb8a8f372008-08-08 04:26:37 +0900934 if (int_part == 0) {
935 base::swprintf(tmp, arraysize(tmp),
936 L"%lld", static_cast<int64>(unit_amount));
937 } else {
938 base::swprintf(tmp, arraysize(tmp), L"%.1lf", unit_amount);
939 }
initial.commit3f4a7322008-07-27 06:49:38 +0900940
941 std::wstring ret(tmp);
942 if (show_units) {
943 ret += L" ";
944 ret += suffix[units];
945 }
946
947 return ret;
948}
949
950std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units) {
951 return FormatBytesInternal(bytes, units, show_units, kByteStrings);
952}
953
954std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
955 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
956}
957
958template<class StringType>
959void DoReplaceSubstringsAfterOffset(StringType* str,
960 typename StringType::size_type start_offset,
961 const StringType& find_this,
aa@chromium.org103c9982009-01-22 08:47:54 +0900962 const StringType& replace_with,
963 bool replace_all) {
initial.commit3f4a7322008-07-27 06:49:38 +0900964 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
965 return;
966
967 DCHECK(!find_this.empty());
968 for (typename StringType::size_type offs(str->find(find_this, start_offset));
969 offs != StringType::npos; offs = str->find(find_this, offs)) {
970 str->replace(offs, find_this.length(), replace_with);
971 offs += replace_with.length();
aa@chromium.org103c9982009-01-22 08:47:54 +0900972
973 if (!replace_all)
974 break;
initial.commit3f4a7322008-07-27 06:49:38 +0900975 }
976}
977
dsh@google.com7d9dbcf2009-03-03 10:21:09 +0900978void ReplaceFirstSubstringAfterOffset(string16* str,
979 string16::size_type start_offset,
980 const string16& find_this,
981 const string16& replace_with) {
aa@chromium.org103c9982009-01-22 08:47:54 +0900982 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
983 false); // replace first instance
984}
985
986void ReplaceFirstSubstringAfterOffset(std::string* str,
987 std::string::size_type start_offset,
988 const std::string& find_this,
989 const std::string& replace_with) {
990 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
991 false); // replace first instance
992}
993
dsh@google.com7d9dbcf2009-03-03 10:21:09 +0900994void ReplaceSubstringsAfterOffset(string16* str,
995 string16::size_type start_offset,
996 const string16& find_this,
997 const string16& replace_with) {
aa@chromium.org103c9982009-01-22 08:47:54 +0900998 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
999 true); // replace all instances
initial.commit3f4a7322008-07-27 06:49:38 +09001000}
1001
1002void ReplaceSubstringsAfterOffset(std::string* str,
1003 std::string::size_type start_offset,
1004 const std::string& find_this,
1005 const std::string& replace_with) {
aa@chromium.org103c9982009-01-22 08:47:54 +09001006 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
1007 true); // replace all instances
initial.commit3f4a7322008-07-27 06:49:38 +09001008}
1009
1010// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
1011// is the size of the buffer. These return the number of characters in the
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001012// formatted string excluding the NUL terminator. If the buffer is not
1013// large enough to accommodate the formatted string without truncation, they
1014// return the number of characters that would be in the fully-formatted string
1015// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
initial.commit3f4a7322008-07-27 06:49:38 +09001016inline int vsnprintfT(char* buffer,
1017 size_t buf_size,
1018 const char* format,
1019 va_list argptr) {
evanm@google.comb8a8f372008-08-08 04:26:37 +09001020 return base::vsnprintf(buffer, buf_size, format, argptr);
initial.commit3f4a7322008-07-27 06:49:38 +09001021}
1022
1023inline int vsnprintfT(wchar_t* buffer,
1024 size_t buf_size,
1025 const wchar_t* format,
1026 va_list argptr) {
evanm@google.comb8a8f372008-08-08 04:26:37 +09001027 return base::vswprintf(buffer, buf_size, format, argptr);
initial.commit3f4a7322008-07-27 06:49:38 +09001028}
1029
1030// Templatized backend for StringPrintF/StringAppendF. This does not finalize
1031// the va_list, the caller is expected to do that.
mark@chromium.orgeecb0b82009-03-26 01:33:29 +09001032template <class StringType>
1033static void StringAppendVT(StringType* dst,
1034 const typename StringType::value_type* format,
1035 va_list ap) {
initial.commit3f4a7322008-07-27 06:49:38 +09001036 // First try with a small fixed size buffer.
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001037 // This buffer size should be kept in sync with StringUtilTest.GrowBoundary
1038 // and StringUtilTest.StringPrintfBounds.
mark@chromium.orgeecb0b82009-03-26 01:33:29 +09001039 typename StringType::value_type stack_buf[1024];
initial.commit3f4a7322008-07-27 06:49:38 +09001040
deanm@chromium.orga780eb42009-08-25 05:28:49 +09001041 va_list ap_copy;
1042 GG_VA_COPY(ap_copy, ap);
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001043
1044#if !defined(OS_WIN)
1045 errno = 0;
1046#endif
deanm@chromium.orga780eb42009-08-25 05:28:49 +09001047 int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, ap_copy);
1048 va_end(ap_copy);
initial.commit3f4a7322008-07-27 06:49:38 +09001049
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001050 if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) {
initial.commit3f4a7322008-07-27 06:49:38 +09001051 // It fit.
1052 dst->append(stack_buf, result);
1053 return;
1054 }
1055
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001056 // Repeatedly increase buffer size until it fits.
1057 int mem_length = arraysize(stack_buf);
1058 while (true) {
1059 if (result < 0) {
1060#if !defined(OS_WIN)
1061 // On Windows, vsnprintfT always returns the number of characters in a
1062 // fully-formatted string, so if we reach this point, something else is
1063 // wrong and no amount of buffer-doubling is going to fix it.
1064 if (errno != 0 && errno != EOVERFLOW)
1065#endif
1066 {
1067 // If an error other than overflow occurred, it's never going to work.
1068 DLOG(WARNING) << "Unable to printf the requested string due to error.";
1069 return;
1070 }
1071 // Try doubling the buffer size.
1072 mem_length *= 2;
1073 } else {
1074 // We need exactly "result + 1" characters.
1075 mem_length = result + 1;
1076 }
initial.commit3f4a7322008-07-27 06:49:38 +09001077
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001078 if (mem_length > 32 * 1024 * 1024) {
1079 // That should be plenty, don't try anything larger. This protects
1080 // against huge allocations when using vsnprintfT implementations that
1081 // return -1 for reasons other than overflow without setting errno.
1082 DLOG(WARNING) << "Unable to printf the requested string due to size.";
1083 return;
1084 }
1085
mark@chromium.orgeecb0b82009-03-26 01:33:29 +09001086 std::vector<typename StringType::value_type> mem_buf(mem_length);
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001087
deanm@chromium.orga780eb42009-08-25 05:28:49 +09001088 // NOTE: You can only use a va_list once. Since we're in a while loop, we
1089 // need to make a new copy each time so we don't use up the original.
1090 GG_VA_COPY(ap_copy, ap);
1091 result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
1092 va_end(ap_copy);
mmentovai@google.com38cabad2008-08-13 10:17:18 +09001093
1094 if ((result >= 0) && (result < mem_length)) {
1095 // It fit.
1096 dst->append(&mem_buf[0], result);
1097 return;
1098 }
initial.commit3f4a7322008-07-27 06:49:38 +09001099 }
initial.commit3f4a7322008-07-27 06:49:38 +09001100}
1101
deanm@google.comd6498452008-08-13 20:09:33 +09001102namespace {
initial.commit3f4a7322008-07-27 06:49:38 +09001103
deanm@google.comd6498452008-08-13 20:09:33 +09001104template <typename STR, typename INT, typename UINT, bool NEG>
1105struct IntToStringT {
initial.commit3f4a7322008-07-27 06:49:38 +09001106
deanm@google.comd6498452008-08-13 20:09:33 +09001107 // This is to avoid a compiler warning about unary minus on unsigned type.
1108 // For example, say you had the following code:
1109 // template <typename INT>
1110 // INT abs(INT value) { return value < 0 ? -value : value; }
1111 // Even though if INT is unsigned, it's impossible for value < 0, so the
1112 // unary minus will never be taken, the compiler will still generate a
1113 // warning. We do a little specialization dance...
1114 template <typename INT2, typename UINT2, bool NEG2>
1115 struct ToUnsignedT { };
1116
1117 template <typename INT2, typename UINT2>
1118 struct ToUnsignedT<INT2, UINT2, false> {
1119 static UINT2 ToUnsigned(INT2 value) {
1120 return static_cast<UINT2>(value);
1121 }
1122 };
1123
1124 template <typename INT2, typename UINT2>
1125 struct ToUnsignedT<INT2, UINT2, true> {
1126 static UINT2 ToUnsigned(INT2 value) {
1127 return static_cast<UINT2>(value < 0 ? -value : value);
1128 }
1129 };
1130
1131 static STR IntToString(INT value) {
1132 // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
1133 // So round up to allocate 3 output characters per byte, plus 1 for '-'.
1134 const int kOutputBufSize = 3 * sizeof(INT) + 1;
1135
1136 // Allocate the whole string right away, we will right back to front, and
1137 // then return the substr of what we ended up using.
1138 STR outbuf(kOutputBufSize, 0);
1139
1140 bool is_neg = value < 0;
1141 // Even though is_neg will never be true when INT is parameterized as
1142 // unsigned, even the presence of the unary operation causes a warning.
1143 UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
1144
1145 for (typename STR::iterator it = outbuf.end();;) {
1146 --it;
1147 DCHECK(it != outbuf.begin());
1148 *it = static_cast<typename STR::value_type>((res % 10) + '0');
1149 res /= 10;
1150
1151 // We're done..
1152 if (res == 0) {
1153 if (is_neg) {
1154 --it;
1155 DCHECK(it != outbuf.begin());
1156 *it = static_cast<typename STR::value_type>('-');
1157 }
1158 return STR(it, outbuf.end());
1159 }
1160 }
1161 NOTREACHED();
1162 return STR();
1163 }
1164};
1165
initial.commit3f4a7322008-07-27 06:49:38 +09001166}
1167
1168std::string IntToString(int value) {
deanm@google.comd6498452008-08-13 20:09:33 +09001169 return IntToStringT<std::string, int, unsigned int, true>::
1170 IntToString(value);
initial.commit3f4a7322008-07-27 06:49:38 +09001171}
initial.commit3f4a7322008-07-27 06:49:38 +09001172std::wstring IntToWString(int value) {
deanm@google.comd6498452008-08-13 20:09:33 +09001173 return IntToStringT<std::wstring, int, unsigned int, true>::
1174 IntToString(value);
1175}
brettw@chromium.org71865012009-05-15 00:53:13 +09001176string16 IntToString16(int value) {
1177 return IntToStringT<string16, int, unsigned int, true>::
1178 IntToString(value);
1179}
deanm@google.comd6498452008-08-13 20:09:33 +09001180std::string UintToString(unsigned int value) {
1181 return IntToStringT<std::string, unsigned int, unsigned int, false>::
1182 IntToString(value);
1183}
1184std::wstring UintToWString(unsigned int value) {
1185 return IntToStringT<std::wstring, unsigned int, unsigned int, false>::
1186 IntToString(value);
1187}
erg@google.come915b7f2009-06-11 08:54:20 +09001188string16 UintToString16(unsigned int value) {
1189 return IntToStringT<string16, unsigned int, unsigned int, false>::
1190 IntToString(value);
1191}
deanm@google.comd6498452008-08-13 20:09:33 +09001192std::string Int64ToString(int64 value) {
1193 return IntToStringT<std::string, int64, uint64, true>::
1194 IntToString(value);
1195}
1196std::wstring Int64ToWString(int64 value) {
1197 return IntToStringT<std::wstring, int64, uint64, true>::
1198 IntToString(value);
1199}
1200std::string Uint64ToString(uint64 value) {
1201 return IntToStringT<std::string, uint64, uint64, false>::
1202 IntToString(value);
1203}
1204std::wstring Uint64ToWString(uint64 value) {
1205 return IntToStringT<std::wstring, uint64, uint64, false>::
1206 IntToString(value);
initial.commit3f4a7322008-07-27 06:49:38 +09001207}
1208
tc@google.comfbc20492008-11-18 09:14:28 +09001209std::string DoubleToString(double value) {
1210 // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
1211 char buffer[32];
1212 dmg_fp::g_fmt(buffer, value);
1213 return std::string(buffer);
1214}
1215
1216std::wstring DoubleToWString(double value) {
1217 return ASCIIToWide(DoubleToString(value));
1218}
1219
tc@google.com14b39252009-01-13 05:40:28 +09001220void StringAppendV(std::string* dst, const char* format, va_list ap) {
mark@chromium.orgeecb0b82009-03-26 01:33:29 +09001221 StringAppendVT(dst, format, ap);
initial.commit3f4a7322008-07-27 06:49:38 +09001222}
1223
mark@chromium.orgeecb0b82009-03-26 01:33:29 +09001224void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
1225 StringAppendVT(dst, format, ap);
initial.commit3f4a7322008-07-27 06:49:38 +09001226}
1227
1228std::string StringPrintf(const char* format, ...) {
1229 va_list ap;
1230 va_start(ap, format);
1231 std::string result;
1232 StringAppendV(&result, format, ap);
1233 va_end(ap);
1234 return result;
1235}
1236
1237std::wstring StringPrintf(const wchar_t* format, ...) {
1238 va_list ap;
1239 va_start(ap, format);
1240 std::wstring result;
1241 StringAppendV(&result, format, ap);
1242 va_end(ap);
1243 return result;
1244}
1245
1246const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
1247 va_list ap;
1248 va_start(ap, format);
1249 dst->clear();
1250 StringAppendV(dst, format, ap);
1251 va_end(ap);
1252 return *dst;
1253}
1254
1255const std::wstring& SStringPrintf(std::wstring* dst,
1256 const wchar_t* format, ...) {
1257 va_list ap;
1258 va_start(ap, format);
1259 dst->clear();
1260 StringAppendV(dst, format, ap);
1261 va_end(ap);
1262 return *dst;
1263}
1264
1265void StringAppendF(std::string* dst, const char* format, ...) {
1266 va_list ap;
1267 va_start(ap, format);
1268 StringAppendV(dst, format, ap);
1269 va_end(ap);
1270}
1271
1272void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
1273 va_list ap;
1274 va_start(ap, format);
1275 StringAppendV(dst, format, ap);
1276 va_end(ap);
1277}
1278
1279template<typename STR>
1280static void SplitStringT(const STR& str,
1281 const typename STR::value_type s,
1282 bool trim_whitespace,
1283 std::vector<STR>* r) {
1284 size_t last = 0;
1285 size_t i;
1286 size_t c = str.size();
1287 for (i = 0; i <= c; ++i) {
1288 if (i == c || str[i] == s) {
1289 size_t len = i - last;
1290 STR tmp = str.substr(last, len);
1291 if (trim_whitespace) {
1292 STR t_tmp;
1293 TrimWhitespace(tmp, TRIM_ALL, &t_tmp);
1294 r->push_back(t_tmp);
1295 } else {
1296 r->push_back(tmp);
1297 }
1298 last = i + 1;
1299 }
1300 }
1301}
1302
1303void SplitString(const std::wstring& str,
1304 wchar_t s,
1305 std::vector<std::wstring>* r) {
1306 SplitStringT(str, s, true, r);
1307}
1308
darin@chromium.orgbca1e7c2009-10-20 04:36:53 +09001309#if !defined(WCHAR_T_IS_UTF16)
1310void SplitString(const string16& str,
1311 char16 s,
1312 std::vector<string16>* r) {
1313 SplitStringT(str, s, true, r);
1314}
1315#endif
1316
initial.commit3f4a7322008-07-27 06:49:38 +09001317void SplitString(const std::string& str,
1318 char s,
1319 std::vector<std::string>* r) {
1320 SplitStringT(str, s, true, r);
1321}
1322
1323void SplitStringDontTrim(const std::wstring& str,
1324 wchar_t s,
1325 std::vector<std::wstring>* r) {
1326 SplitStringT(str, s, false, r);
1327}
1328
darin@chromium.orgbca1e7c2009-10-20 04:36:53 +09001329#if !defined(WCHAR_T_IS_UTF16)
1330void SplitStringDontTrim(const string16& str,
1331 char16 s,
1332 std::vector<string16>* r) {
1333 SplitStringT(str, s, false, r);
1334}
1335#endif
1336
initial.commit3f4a7322008-07-27 06:49:38 +09001337void SplitStringDontTrim(const std::string& str,
1338 char s,
1339 std::vector<std::string>* r) {
1340 SplitStringT(str, s, false, r);
1341}
1342
aa@chromium.orgfb2d7a92009-02-02 13:09:58 +09001343template<typename STR>
1344static STR JoinStringT(const std::vector<STR>& parts,
1345 typename STR::value_type sep) {
1346 if (parts.size() == 0) return STR();
1347
1348 STR result(parts[0]);
1349 typename std::vector<STR>::const_iterator iter = parts.begin();
1350 ++iter;
1351
1352 for (; iter != parts.end(); ++iter) {
1353 result += sep;
1354 result += *iter;
1355 }
1356
1357 return result;
1358}
1359
1360std::string JoinString(const std::vector<std::string>& parts, char sep) {
1361 return JoinStringT(parts, sep);
1362}
1363
darin@chromium.orgbca1e7c2009-10-20 04:36:53 +09001364#if !defined(WCHAR_T_IS_UTF16)
1365string16 JoinString(const std::vector<string16>& parts, char sep) {
1366 return JoinStringT(parts, sep);
1367}
1368#endif
1369
aa@chromium.orgfb2d7a92009-02-02 13:09:58 +09001370std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t sep) {
1371 return JoinStringT(parts, sep);
1372}
1373
vandebo@chromium.org6595d1b2009-10-09 08:50:43 +09001374template<typename STR>
1375void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
initial.commit3f4a7322008-07-27 06:49:38 +09001376 const size_t length = str.length();
1377 if (!length)
1378 return;
1379
1380 bool last_was_ws = false;
1381 size_t last_non_ws_start = 0;
1382 for (size_t i = 0; i < length; ++i) {
1383 switch(str[i]) {
1384 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
1385 case L' ':
1386 case L'\t':
1387 case L'\xA':
1388 case L'\xB':
1389 case L'\xC':
1390 case L'\xD':
1391 if (!last_was_ws) {
1392 if (i > 0) {
1393 result->push_back(
1394 str.substr(last_non_ws_start, i - last_non_ws_start));
1395 }
1396 last_was_ws = true;
1397 }
1398 break;
1399
1400 default: // Not a space character.
1401 if (last_was_ws) {
1402 last_was_ws = false;
1403 last_non_ws_start = i;
1404 }
1405 break;
1406 }
1407 }
1408 if (!last_was_ws) {
1409 result->push_back(
glen@chromium.org7a664f42009-05-16 07:40:57 +09001410 str.substr(last_non_ws_start, length - last_non_ws_start));
initial.commit3f4a7322008-07-27 06:49:38 +09001411 }
1412}
1413
vandebo@chromium.org6595d1b2009-10-09 08:50:43 +09001414void SplitStringAlongWhitespace(const std::wstring& str,
1415 std::vector<std::wstring>* result) {
1416 SplitStringAlongWhitespaceT(str, result);
1417}
1418
darin@chromium.orgbca1e7c2009-10-20 04:36:53 +09001419#if !defined(WCHAR_T_IS_UTF16)
1420void SplitStringAlongWhitespace(const string16& str,
1421 std::vector<string16>* result) {
1422 SplitStringAlongWhitespaceT(str, result);
1423}
1424#endif
1425
vandebo@chromium.org6595d1b2009-10-09 08:50:43 +09001426void SplitStringAlongWhitespace(const std::string& str,
1427 std::vector<std::string>* result) {
1428 SplitStringAlongWhitespaceT(str, result);
1429}
1430
tony@chromium.org5feb5bb2009-10-30 04:48:54 +09001431template<class FormatStringType, class OutStringType>
1432OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
1433 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
gregoryd@google.com3734a872009-11-07 08:24:09 +09001434 size_t substitutions = subst.size();
glen@chromium.org7a664f42009-05-16 07:40:57 +09001435 DCHECK(substitutions < 10);
1436
gregoryd@google.com3734a872009-11-07 08:24:09 +09001437 size_t sub_length = 0;
tony@chromium.org5feb5bb2009-10-30 04:48:54 +09001438 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
1439 iter != subst.end(); ++iter) {
glen@chromium.org7a664f42009-05-16 07:40:57 +09001440 sub_length += (*iter).length();
initial.commit3f4a7322008-07-27 06:49:38 +09001441 }
initial.commit3f4a7322008-07-27 06:49:38 +09001442
tony@chromium.org5feb5bb2009-10-30 04:48:54 +09001443 OutStringType formatted;
glen@chromium.org7a664f42009-05-16 07:40:57 +09001444 formatted.reserve(format_string.length() + sub_length);
initial.commit3f4a7322008-07-27 06:49:38 +09001445
1446 std::vector<ReplacementOffset> r_offsets;
tony@chromium.org5feb5bb2009-10-30 04:48:54 +09001447 for (typename FormatStringType::const_iterator i = format_string.begin();
initial.commit3f4a7322008-07-27 06:49:38 +09001448 i != format_string.end(); ++i) {
1449 if ('$' == *i) {
1450 if (i + 1 != format_string.end()) {
1451 ++i;
glen@chromium.org7a664f42009-05-16 07:40:57 +09001452 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
initial.commit3f4a7322008-07-27 06:49:38 +09001453 if ('$' == *i) {
1454 formatted.push_back('$');
1455 } else {
gregoryd@google.com3734a872009-11-07 08:24:09 +09001456 uintptr_t index = *i - '1';
initial.commit3f4a7322008-07-27 06:49:38 +09001457 if (offsets) {
1458 ReplacementOffset r_offset(index,
glen@chromium.org7a664f42009-05-16 07:40:57 +09001459 static_cast<int>(formatted.size()));
initial.commit3f4a7322008-07-27 06:49:38 +09001460 r_offsets.insert(std::lower_bound(r_offsets.begin(),
glen@chromium.org7a664f42009-05-16 07:40:57 +09001461 r_offsets.end(), r_offset,
1462 &CompareParameter),
1463 r_offset);
initial.commit3f4a7322008-07-27 06:49:38 +09001464 }
glen@chromium.org7a664f42009-05-16 07:40:57 +09001465 if (index < substitutions)
1466 formatted.append(subst.at(index));
initial.commit3f4a7322008-07-27 06:49:38 +09001467 }
1468 }
1469 } else {
1470 formatted.push_back(*i);
1471 }
1472 }
1473 if (offsets) {
1474 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
glen@chromium.org7a664f42009-05-16 07:40:57 +09001475 i != r_offsets.end(); ++i) {
initial.commit3f4a7322008-07-27 06:49:38 +09001476 offsets->push_back(i->offset);
1477 }
1478 }
1479 return formatted;
1480}
1481
glen@chromium.org7a664f42009-05-16 07:40:57 +09001482string16 ReplaceStringPlaceholders(const string16& format_string,
tony@chromium.org8ec6de42009-09-30 06:32:01 +09001483 const std::vector<string16>& subst,
1484 std::vector<size_t>* offsets) {
1485 return DoReplaceStringPlaceholders(format_string, subst, offsets);
1486}
1487
tony@chromium.org5feb5bb2009-10-30 04:48:54 +09001488std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
tony@chromium.org8ec6de42009-09-30 06:32:01 +09001489 const std::vector<std::string>& subst,
1490 std::vector<size_t>* offsets) {
1491 return DoReplaceStringPlaceholders(format_string, subst, offsets);
1492}
1493
1494string16 ReplaceStringPlaceholders(const string16& format_string,
glen@chromium.org7a664f42009-05-16 07:40:57 +09001495 const string16& a,
1496 size_t* offset) {
1497 std::vector<size_t> offsets;
1498 std::vector<string16> subst;
1499 subst.push_back(a);
1500 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
1501
1502 DCHECK(offsets.size() == 1);
1503 if (offset) {
1504 *offset = offsets[0];
1505 }
1506 return result;
1507}
1508
initial.commit3f4a7322008-07-27 06:49:38 +09001509template <class CHAR>
1510static bool IsWildcard(CHAR character) {
1511 return character == '*' || character == '?';
1512}
1513
1514// Move the strings pointers to the point where they start to differ.
1515template <class CHAR>
1516static void EatSameChars(const CHAR** pattern, const CHAR** string) {
1517 bool escaped = false;
1518 while (**pattern && **string) {
1519 if (!escaped && IsWildcard(**pattern)) {
1520 // We don't want to match wildcard here, except if it's escaped.
1521 return;
1522 }
1523
1524 // Check if the escapement char is found. If so, skip it and move to the
1525 // next character.
1526 if (!escaped && **pattern == L'\\') {
1527 escaped = true;
1528 (*pattern)++;
1529 continue;
1530 }
1531
1532 // Check if the chars match, if so, increment the ptrs.
1533 if (**pattern == **string) {
1534 (*pattern)++;
1535 (*string)++;
1536 } else {
1537 // Uh ho, it did not match, we are done. If the last char was an
1538 // escapement, that means that it was an error to advance the ptr here,
1539 // let's put it back where it was. This also mean that the MatchPattern
1540 // function will return false because if we can't match an escape char
1541 // here, then no one will.
1542 if (escaped) {
1543 (*pattern)--;
1544 }
1545 return;
1546 }
1547
1548 escaped = false;
1549 }
1550}
1551
1552template <class CHAR>
1553static void EatWildcard(const CHAR** pattern) {
1554 while(**pattern) {
1555 if (!IsWildcard(**pattern))
1556 return;
1557 (*pattern)++;
1558 }
1559}
1560
1561template <class CHAR>
1562static bool MatchPatternT(const CHAR* eval, const CHAR* pattern) {
1563 // Eat all the matching chars.
1564 EatSameChars(&pattern, &eval);
1565
1566 // If the string is empty, then the pattern must be empty too, or contains
1567 // only wildcards.
1568 if (*eval == 0) {
1569 EatWildcard(&pattern);
1570 if (*pattern)
1571 return false;
1572 return true;
1573 }
1574
1575 // Pattern is empty but not string, this is not a match.
1576 if (*pattern == 0)
1577 return false;
1578
1579 // If this is a question mark, then we need to compare the rest with
1580 // the current string or the string with one character eaten.
1581 if (pattern[0] == '?') {
1582 if (MatchPatternT(eval, pattern + 1) ||
1583 MatchPatternT(eval + 1, pattern + 1))
1584 return true;
1585 }
1586
1587 // This is a *, try to match all the possible substrings with the remainder
1588 // of the pattern.
1589 if (pattern[0] == '*') {
1590 while (*eval) {
1591 if (MatchPatternT(eval, pattern + 1))
1592 return true;
1593 eval++;
1594 }
1595
1596 // We reached the end of the string, let see if the pattern contains only
1597 // wildcards.
1598 if (*eval == 0) {
1599 EatWildcard(&pattern);
1600 if (*pattern)
1601 return false;
1602 return true;
1603 }
1604 }
1605
1606 return false;
1607}
1608
1609bool MatchPattern(const std::wstring& eval, const std::wstring& pattern) {
1610 return MatchPatternT(eval.c_str(), pattern.c_str());
1611}
1612
1613bool MatchPattern(const std::string& eval, const std::string& pattern) {
1614 return MatchPatternT(eval.c_str(), pattern.c_str());
1615}
mmentovai@google.com93285682008-08-06 07:46:15 +09001616
mmentovai@google.com93285682008-08-06 07:46:15 +09001617bool StringToInt(const std::string& input, int* output) {
deanm@chromium.org57612e02009-07-29 02:34:43 +09001618 return StringToNumber<StringToIntTraits>(input, output);
mmentovai@google.com93285682008-08-06 07:46:15 +09001619}
1620
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001621bool StringToInt(const string16& input, int* output) {
deanm@chromium.org57612e02009-07-29 02:34:43 +09001622 return StringToNumber<String16ToIntTraits>(input, output);
mmentovai@google.com93285682008-08-06 07:46:15 +09001623}
1624
1625bool StringToInt64(const std::string& input, int64* output) {
tc@google.comfbc20492008-11-18 09:14:28 +09001626 return StringToNumber<StringToInt64Traits>(input, output);
mmentovai@google.com93285682008-08-06 07:46:15 +09001627}
1628
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001629bool StringToInt64(const string16& input, int64* output) {
1630 return StringToNumber<String16ToInt64Traits>(input, output);
mmentovai@google.com93285682008-08-06 07:46:15 +09001631}
1632
1633bool HexStringToInt(const std::string& input, int* output) {
deanm@chromium.org57612e02009-07-29 02:34:43 +09001634 return StringToNumber<HexStringToIntTraits>(input, output);
mmentovai@google.com93285682008-08-06 07:46:15 +09001635}
1636
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001637bool HexStringToInt(const string16& input, int* output) {
deanm@chromium.org57612e02009-07-29 02:34:43 +09001638 return StringToNumber<HexString16ToIntTraits>(input, output);
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +09001639}
1640
erikkay@google.com47287092009-01-28 05:23:10 +09001641namespace {
1642
erikkay@google.com200cc932009-01-28 01:11:15 +09001643template<class CHAR>
1644bool HexDigitToIntT(const CHAR digit, uint8* val) {
1645 if (digit >= '0' && digit <= '9')
1646 *val = digit - '0';
1647 else if (digit >= 'a' && digit <= 'f')
1648 *val = 10 + digit - 'a';
1649 else if (digit >= 'A' && digit <= 'F')
1650 *val = 10 + digit - 'A';
1651 else
1652 return false;
1653 return true;
1654}
1655
1656template<typename STR>
1657bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
1658 DCHECK(output->size() == 0);
gregoryd@google.com3734a872009-11-07 08:24:09 +09001659 size_t count = input.size();
erikkay@google.com200cc932009-01-28 01:11:15 +09001660 if (count == 0 || (count % 2) != 0)
1661 return false;
gregoryd@google.com3734a872009-11-07 08:24:09 +09001662 for (uintptr_t i = 0; i < count / 2; ++i) {
erikkay@google.com200cc932009-01-28 01:11:15 +09001663 uint8 msb = 0; // most significant 4 bits
1664 uint8 lsb = 0; // least significant 4 bits
1665 if (!HexDigitToIntT(input[i * 2], &msb) ||
1666 !HexDigitToIntT(input[i * 2 + 1], &lsb))
1667 return false;
1668 output->push_back((msb << 4) | lsb);
1669 }
1670 return true;
1671}
1672
erikkay@google.com47287092009-01-28 05:23:10 +09001673} // namespace
1674
erikkay@google.com200cc932009-01-28 01:11:15 +09001675bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
1676 return HexStringToBytesT(input, output);
1677}
1678
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001679bool HexStringToBytes(const string16& input, std::vector<uint8>* output) {
erikkay@google.com200cc932009-01-28 01:11:15 +09001680 return HexStringToBytesT(input, output);
1681}
1682
mmentovai@google.com93285682008-08-06 07:46:15 +09001683int StringToInt(const std::string& value) {
1684 int result;
1685 StringToInt(value, &result);
1686 return result;
1687}
1688
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001689int StringToInt(const string16& value) {
mmentovai@google.com93285682008-08-06 07:46:15 +09001690 int result;
1691 StringToInt(value, &result);
1692 return result;
1693}
1694
1695int64 StringToInt64(const std::string& value) {
1696 int64 result;
1697 StringToInt64(value, &result);
1698 return result;
1699}
1700
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001701int64 StringToInt64(const string16& value) {
mmentovai@google.com93285682008-08-06 07:46:15 +09001702 int64 result;
1703 StringToInt64(value, &result);
1704 return result;
1705}
1706
1707int HexStringToInt(const std::string& value) {
1708 int result;
1709 HexStringToInt(value, &result);
1710 return result;
1711}
1712
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001713int HexStringToInt(const string16& value) {
mmentovai@google.com93285682008-08-06 07:46:15 +09001714 int result;
1715 HexStringToInt(value, &result);
1716 return result;
1717}
mmentovai@google.com8dcf71c2008-08-08 02:15:41 +09001718
tc@google.comfbc20492008-11-18 09:14:28 +09001719bool StringToDouble(const std::string& input, double* output) {
1720 return StringToNumber<StringToDoubleTraits>(input, output);
1721}
1722
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001723bool StringToDouble(const string16& input, double* output) {
1724 return StringToNumber<String16ToDoubleTraits>(input, output);
tc@google.comfbc20492008-11-18 09:14:28 +09001725}
1726
1727double StringToDouble(const std::string& value) {
1728 double result;
1729 StringToDouble(value, &result);
1730 return result;
1731}
1732
dsh@google.com3b0f5f22009-02-28 10:01:50 +09001733double StringToDouble(const string16& value) {
tc@google.comfbc20492008-11-18 09:14:28 +09001734 double result;
1735 StringToDouble(value, &result);
1736 return result;
1737}
1738
deanm@google.comb5335212008-08-13 23:33:40 +09001739// The following code is compatible with the OpenBSD lcpy interface. See:
1740// http://www.gratisoft.us/todd/papers/strlcpy.html
1741// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1742
1743namespace {
1744
1745template <typename CHAR>
1746size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1747 for (size_t i = 0; i < dst_size; ++i) {
1748 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1749 return i;
1750 }
1751
1752 // We were left off at dst_size. We over copied 1 byte. Null terminate.
1753 if (dst_size != 0)
1754 dst[dst_size - 1] = 0;
1755
1756 // Count the rest of the |src|, and return it's length in characters.
1757 while (src[dst_size]) ++dst_size;
1758 return dst_size;
1759}
1760
1761} // namespace
1762
1763size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1764 return lcpyT<char>(dst, src, dst_size);
1765}
1766size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1767 return lcpyT<wchar_t>(dst, src, dst_size);
1768}
license.botf003cfe2008-08-24 09:55:55 +09001769
jcampan@chromium.orgbf29e602008-10-11 03:50:32 +09001770bool ElideString(const std::wstring& input, int max_len, std::wstring* output) {
1771 DCHECK(max_len >= 0);
1772 if (static_cast<int>(input.length()) <= max_len) {
1773 output->assign(input);
1774 return false;
1775 }
maruel@chromium.org8fe7adc2009-03-04 00:01:12 +09001776
jcampan@chromium.orgbf29e602008-10-11 03:50:32 +09001777 switch (max_len) {
1778 case 0:
1779 output->clear();
1780 break;
1781 case 1:
1782 output->assign(input.substr(0, 1));
1783 break;
1784 case 2:
1785 output->assign(input.substr(0, 2));
1786 break;
1787 case 3:
1788 output->assign(input.substr(0, 1) + L"." +
1789 input.substr(input.length() - 1));
1790 break;
1791 case 4:
1792 output->assign(input.substr(0, 1) + L".." +
1793 input.substr(input.length() - 1));
1794 break;
1795 default: {
1796 int rstr_len = (max_len - 3) / 2;
1797 int lstr_len = rstr_len + ((max_len - 3) % 2);
1798 output->assign(input.substr(0, lstr_len) + L"..." +
1799 input.substr(input.length() - rstr_len));
1800 break;
1801 }
1802 }
1803
1804 return true;
1805}
tommi@chromium.orga2f7e792009-01-22 09:10:08 +09001806
1807std::string HexEncode(const void* bytes, size_t size) {
deanm@chromium.orgdfe9fc42009-01-22 22:13:30 +09001808 static const char kHexChars[] = "0123456789ABCDEF";
tommi@chromium.orga2f7e792009-01-22 09:10:08 +09001809
deanm@chromium.orgdfe9fc42009-01-22 22:13:30 +09001810 // Each input byte creates two output hex characters.
1811 std::string ret(size * 2, '\0');
tommi@chromium.orga2f7e792009-01-22 09:10:08 +09001812
deanm@chromium.orgdfe9fc42009-01-22 22:13:30 +09001813 for (size_t i = 0; i < size; ++i) {
1814 char b = reinterpret_cast<const char*>(bytes)[i];
1815 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
1816 ret[(i * 2) + 1] = kHexChars[b & 0xf];
tommi@chromium.orga2f7e792009-01-22 09:10:08 +09001817 }
tommi@chromium.orga2f7e792009-01-22 09:10:08 +09001818 return ret;
1819}