blob: efd9843c469e0f81cc22b3fbacaf7d631580bf17 [file] [log] [blame]
henrike@webrtc.orgf0488722014-05-13 18:00:26 +00001/*
2 * Copyright 2004 The WebRTC Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "rtc_base/stringencode.h"
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000012
13#include <stdio.h>
14#include <stdlib.h>
15
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020016#include "rtc_base/checks.h"
17#include "rtc_base/stringutils.h"
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000018
19namespace rtc {
20
21/////////////////////////////////////////////////////////////////////////////
22// String Encoding Utilities
23/////////////////////////////////////////////////////////////////////////////
24
25size_t escape(char * buffer, size_t buflen,
26 const char * source, size_t srclen,
27 const char * illegal, char escape) {
henrikg91d6ede2015-09-17 00:24:34 -070028 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000029 if (buflen <= 0)
30 return 0;
31
32 size_t srcpos = 0, bufpos = 0;
33 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
34 char ch = source[srcpos++];
35 if ((ch == escape) || ::strchr(illegal, ch)) {
36 if (bufpos + 2 >= buflen)
37 break;
38 buffer[bufpos++] = escape;
39 }
40 buffer[bufpos++] = ch;
41 }
42
43 buffer[bufpos] = '\0';
44 return bufpos;
45}
46
47size_t unescape(char * buffer, size_t buflen,
48 const char * source, size_t srclen,
49 char escape) {
henrikg91d6ede2015-09-17 00:24:34 -070050 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000051 if (buflen <= 0)
52 return 0;
53
54 size_t srcpos = 0, bufpos = 0;
55 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
56 char ch = source[srcpos++];
57 if ((ch == escape) && (srcpos < srclen)) {
58 ch = source[srcpos++];
59 }
60 buffer[bufpos++] = ch;
61 }
62 buffer[bufpos] = '\0';
63 return bufpos;
64}
65
66size_t encode(char * buffer, size_t buflen,
67 const char * source, size_t srclen,
68 const char * illegal, char escape) {
henrikg91d6ede2015-09-17 00:24:34 -070069 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000070 if (buflen <= 0)
71 return 0;
72
73 size_t srcpos = 0, bufpos = 0;
74 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
75 char ch = source[srcpos++];
76 if ((ch != escape) && !::strchr(illegal, ch)) {
77 buffer[bufpos++] = ch;
78 } else if (bufpos + 3 >= buflen) {
79 break;
80 } else {
81 buffer[bufpos+0] = escape;
82 buffer[bufpos+1] = hex_encode((static_cast<unsigned char>(ch) >> 4) & 0xF);
83 buffer[bufpos+2] = hex_encode((static_cast<unsigned char>(ch) ) & 0xF);
84 bufpos += 3;
85 }
86 }
87 buffer[bufpos] = '\0';
88 return bufpos;
89}
90
91size_t decode(char * buffer, size_t buflen,
92 const char * source, size_t srclen,
93 char escape) {
94 if (buflen <= 0)
95 return 0;
96
97 unsigned char h1, h2;
98 size_t srcpos = 0, bufpos = 0;
99 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
100 char ch = source[srcpos++];
101 if ((ch == escape)
102 && (srcpos + 1 < srclen)
103 && hex_decode(source[srcpos], &h1)
104 && hex_decode(source[srcpos+1], &h2)) {
105 buffer[bufpos++] = (h1 << 4) | h2;
106 srcpos += 2;
107 } else {
108 buffer[bufpos++] = ch;
109 }
110 }
111 buffer[bufpos] = '\0';
112 return bufpos;
113}
114
115const char* unsafe_filename_characters() {
116 // It might be better to have a single specification which is the union of
117 // all operating systems, unless one system is overly restrictive.
118#if defined(WEBRTC_WIN)
119 return "\\/:*?\"<>|";
andrew@webrtc.org6ae5a6d2014-09-16 01:03:29 +0000120#else // !WEBRTC_WIN
henrikg91d6ede2015-09-17 00:24:34 -0700121 // TODO(grunell): Should this never be reached?
nisseeb4ca4e2017-01-12 02:24:27 -0800122 RTC_NOTREACHED();
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000123 return "";
124#endif // !WEBRTC_WIN
125}
126
127const unsigned char URL_UNSAFE = 0x1; // 0-33 "#$%&+,/:;<=>?@[\]^`{|} 127
128const unsigned char XML_UNSAFE = 0x2; // "&'<>
129const unsigned char HTML_UNSAFE = 0x2; // "&'<>
130
131// ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 6 5 7 8 9 : ; < = > ?
132//@ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
133//` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
134
135const unsigned char ASCII_CLASS[128] = {
136 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
137 1,0,3,1,1,1,3,2,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,3,1,3,1,
138 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,
139 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,
140};
141
142size_t url_encode(char * buffer, size_t buflen,
143 const char * source, size_t srclen) {
deadbeef37f5ecf2017-02-27 14:06:41 -0800144 if (nullptr == buffer)
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000145 return srclen * 3 + 1;
146 if (buflen <= 0)
147 return 0;
148
149 size_t srcpos = 0, bufpos = 0;
150 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
151 unsigned char ch = source[srcpos++];
152 if ((ch < 128) && (ASCII_CLASS[ch] & URL_UNSAFE)) {
153 if (bufpos + 3 >= buflen) {
154 break;
155 }
156 buffer[bufpos+0] = '%';
157 buffer[bufpos+1] = hex_encode((ch >> 4) & 0xF);
158 buffer[bufpos+2] = hex_encode((ch ) & 0xF);
159 bufpos += 3;
160 } else {
161 buffer[bufpos++] = ch;
162 }
163 }
164 buffer[bufpos] = '\0';
165 return bufpos;
166}
167
168size_t url_decode(char * buffer, size_t buflen,
169 const char * source, size_t srclen) {
deadbeef37f5ecf2017-02-27 14:06:41 -0800170 if (nullptr == buffer)
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000171 return srclen + 1;
172 if (buflen <= 0)
173 return 0;
174
175 unsigned char h1, h2;
176 size_t srcpos = 0, bufpos = 0;
177 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
178 unsigned char ch = source[srcpos++];
179 if (ch == '+') {
180 buffer[bufpos++] = ' ';
181 } else if ((ch == '%')
182 && (srcpos + 1 < srclen)
183 && hex_decode(source[srcpos], &h1)
184 && hex_decode(source[srcpos+1], &h2))
185 {
186 buffer[bufpos++] = (h1 << 4) | h2;
187 srcpos += 2;
188 } else {
189 buffer[bufpos++] = ch;
190 }
191 }
192 buffer[bufpos] = '\0';
193 return bufpos;
194}
195
196size_t utf8_decode(const char* source, size_t srclen, unsigned long* value) {
197 const unsigned char* s = reinterpret_cast<const unsigned char*>(source);
198 if ((s[0] & 0x80) == 0x00) { // Check s[0] == 0xxxxxxx
199 *value = s[0];
200 return 1;
201 }
202 if ((srclen < 2) || ((s[1] & 0xC0) != 0x80)) { // Check s[1] != 10xxxxxx
203 return 0;
204 }
205 // Accumulate the trailer byte values in value16, and combine it with the
206 // relevant bits from s[0], once we've determined the sequence length.
207 unsigned long value16 = (s[1] & 0x3F);
208 if ((s[0] & 0xE0) == 0xC0) { // Check s[0] == 110xxxxx
209 *value = ((s[0] & 0x1F) << 6) | value16;
210 return 2;
211 }
212 if ((srclen < 3) || ((s[2] & 0xC0) != 0x80)) { // Check s[2] != 10xxxxxx
213 return 0;
214 }
215 value16 = (value16 << 6) | (s[2] & 0x3F);
216 if ((s[0] & 0xF0) == 0xE0) { // Check s[0] == 1110xxxx
217 *value = ((s[0] & 0x0F) << 12) | value16;
218 return 3;
219 }
220 if ((srclen < 4) || ((s[3] & 0xC0) != 0x80)) { // Check s[3] != 10xxxxxx
221 return 0;
222 }
223 value16 = (value16 << 6) | (s[3] & 0x3F);
224 if ((s[0] & 0xF8) == 0xF0) { // Check s[0] == 11110xxx
225 *value = ((s[0] & 0x07) << 18) | value16;
226 return 4;
227 }
228 return 0;
229}
230
231size_t utf8_encode(char* buffer, size_t buflen, unsigned long value) {
232 if ((value <= 0x7F) && (buflen >= 1)) {
233 buffer[0] = static_cast<unsigned char>(value);
234 return 1;
235 }
236 if ((value <= 0x7FF) && (buflen >= 2)) {
237 buffer[0] = 0xC0 | static_cast<unsigned char>(value >> 6);
238 buffer[1] = 0x80 | static_cast<unsigned char>(value & 0x3F);
239 return 2;
240 }
241 if ((value <= 0xFFFF) && (buflen >= 3)) {
242 buffer[0] = 0xE0 | static_cast<unsigned char>(value >> 12);
243 buffer[1] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
244 buffer[2] = 0x80 | static_cast<unsigned char>(value & 0x3F);
245 return 3;
246 }
247 if ((value <= 0x1FFFFF) && (buflen >= 4)) {
248 buffer[0] = 0xF0 | static_cast<unsigned char>(value >> 18);
249 buffer[1] = 0x80 | static_cast<unsigned char>((value >> 12) & 0x3F);
250 buffer[2] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
251 buffer[3] = 0x80 | static_cast<unsigned char>(value & 0x3F);
252 return 4;
253 }
254 return 0;
255}
256
257size_t html_encode(char * buffer, size_t buflen,
258 const char * source, size_t srclen) {
henrikg91d6ede2015-09-17 00:24:34 -0700259 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000260 if (buflen <= 0)
261 return 0;
262
263 size_t srcpos = 0, bufpos = 0;
264 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
265 unsigned char ch = source[srcpos];
266 if (ch < 128) {
267 srcpos += 1;
268 if (ASCII_CLASS[ch] & HTML_UNSAFE) {
269 const char * escseq = 0;
270 size_t esclen = 0;
271 switch (ch) {
272 case '<': escseq = "&lt;"; esclen = 4; break;
273 case '>': escseq = "&gt;"; esclen = 4; break;
274 case '\'': escseq = "&#39;"; esclen = 5; break;
275 case '\"': escseq = "&quot;"; esclen = 6; break;
276 case '&': escseq = "&amp;"; esclen = 5; break;
nisseeb4ca4e2017-01-12 02:24:27 -0800277 default: RTC_NOTREACHED();
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000278 }
279 if (bufpos + esclen >= buflen) {
280 break;
281 }
282 memcpy(buffer + bufpos, escseq, esclen);
283 bufpos += esclen;
284 } else {
285 buffer[bufpos++] = ch;
286 }
287 } else {
288 // Largest value is 0x1FFFFF => &#2097151; (10 characters)
andrew@webrtc.org6ae5a6d2014-09-16 01:03:29 +0000289 const size_t kEscseqSize = 11;
290 char escseq[kEscseqSize];
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000291 unsigned long val;
292 if (size_t vallen = utf8_decode(&source[srcpos], srclen - srcpos, &val)) {
293 srcpos += vallen;
294 } else {
295 // Not a valid utf8 sequence, just use the raw character.
296 val = static_cast<unsigned char>(source[srcpos++]);
297 }
andrew@webrtc.org6ae5a6d2014-09-16 01:03:29 +0000298 size_t esclen = sprintfn(escseq, kEscseqSize, "&#%lu;", val);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000299 if (bufpos + esclen >= buflen) {
300 break;
301 }
302 memcpy(buffer + bufpos, escseq, esclen);
303 bufpos += esclen;
304 }
305 }
306 buffer[bufpos] = '\0';
307 return bufpos;
308}
309
310size_t html_decode(char * buffer, size_t buflen,
311 const char * source, size_t srclen) {
henrikg91d6ede2015-09-17 00:24:34 -0700312 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000313 return xml_decode(buffer, buflen, source, srclen);
314}
315
316size_t xml_encode(char * buffer, size_t buflen,
317 const char * source, size_t srclen) {
henrikg91d6ede2015-09-17 00:24:34 -0700318 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000319 if (buflen <= 0)
320 return 0;
321
322 size_t srcpos = 0, bufpos = 0;
323 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
324 unsigned char ch = source[srcpos++];
325 if ((ch < 128) && (ASCII_CLASS[ch] & XML_UNSAFE)) {
326 const char * escseq = 0;
327 size_t esclen = 0;
328 switch (ch) {
329 case '<': escseq = "&lt;"; esclen = 4; break;
330 case '>': escseq = "&gt;"; esclen = 4; break;
331 case '\'': escseq = "&apos;"; esclen = 6; break;
332 case '\"': escseq = "&quot;"; esclen = 6; break;
333 case '&': escseq = "&amp;"; esclen = 5; break;
nisseeb4ca4e2017-01-12 02:24:27 -0800334 default: RTC_NOTREACHED();
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000335 }
336 if (bufpos + esclen >= buflen) {
337 break;
338 }
339 memcpy(buffer + bufpos, escseq, esclen);
340 bufpos += esclen;
341 } else {
342 buffer[bufpos++] = ch;
343 }
344 }
345 buffer[bufpos] = '\0';
346 return bufpos;
347}
348
349size_t xml_decode(char * buffer, size_t buflen,
350 const char * source, size_t srclen) {
henrikg91d6ede2015-09-17 00:24:34 -0700351 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000352 if (buflen <= 0)
353 return 0;
354
355 size_t srcpos = 0, bufpos = 0;
356 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
357 unsigned char ch = source[srcpos++];
358 if (ch != '&') {
359 buffer[bufpos++] = ch;
360 } else if ((srcpos + 2 < srclen)
361 && (memcmp(source + srcpos, "lt;", 3) == 0)) {
362 buffer[bufpos++] = '<';
363 srcpos += 3;
364 } else if ((srcpos + 2 < srclen)
365 && (memcmp(source + srcpos, "gt;", 3) == 0)) {
366 buffer[bufpos++] = '>';
367 srcpos += 3;
368 } else if ((srcpos + 4 < srclen)
369 && (memcmp(source + srcpos, "apos;", 5) == 0)) {
370 buffer[bufpos++] = '\'';
371 srcpos += 5;
372 } else if ((srcpos + 4 < srclen)
373 && (memcmp(source + srcpos, "quot;", 5) == 0)) {
374 buffer[bufpos++] = '\"';
375 srcpos += 5;
376 } else if ((srcpos + 3 < srclen)
377 && (memcmp(source + srcpos, "amp;", 4) == 0)) {
378 buffer[bufpos++] = '&';
379 srcpos += 4;
380 } else if ((srcpos < srclen) && (source[srcpos] == '#')) {
381 int int_base = 10;
382 if ((srcpos + 1 < srclen) && (source[srcpos+1] == 'x')) {
383 int_base = 16;
384 srcpos += 1;
385 }
386 char * ptr;
henrikg91d6ede2015-09-17 00:24:34 -0700387 // TODO(grunell): Fix hack (ptr may go past end of data)
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000388 unsigned long val = strtoul(source + srcpos + 1, &ptr, int_base);
389 if ((static_cast<size_t>(ptr - source) < srclen) && (*ptr == ';')) {
390 srcpos = ptr - source + 1;
391 } else {
392 // Not a valid escape sequence.
393 break;
394 }
395 if (size_t esclen = utf8_encode(buffer + bufpos, buflen - bufpos, val)) {
396 bufpos += esclen;
397 } else {
398 // Not enough room to encode the character, or illegal character
399 break;
400 }
401 } else {
402 // Unrecognized escape sequence.
403 break;
404 }
405 }
406 buffer[bufpos] = '\0';
407 return bufpos;
408}
409
410static const char HEX[] = "0123456789abcdef";
411
412char hex_encode(unsigned char val) {
henrikg91d6ede2015-09-17 00:24:34 -0700413 RTC_DCHECK_LT(val, 16);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000414 return (val < 16) ? HEX[val] : '!';
415}
416
417bool hex_decode(char ch, unsigned char* val) {
418 if ((ch >= '0') && (ch <= '9')) {
419 *val = ch - '0';
420 } else if ((ch >= 'A') && (ch <= 'Z')) {
421 *val = (ch - 'A') + 10;
422 } else if ((ch >= 'a') && (ch <= 'z')) {
423 *val = (ch - 'a') + 10;
424 } else {
425 return false;
426 }
427 return true;
428}
429
430size_t hex_encode(char* buffer, size_t buflen,
431 const char* csource, size_t srclen) {
432 return hex_encode_with_delimiter(buffer, buflen, csource, srclen, 0);
433}
434
435size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
436 const char* csource, size_t srclen,
437 char delimiter) {
henrikg91d6ede2015-09-17 00:24:34 -0700438 RTC_DCHECK(buffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000439 if (buflen == 0)
440 return 0;
441
442 // Init and check bounds.
443 const unsigned char* bsource =
444 reinterpret_cast<const unsigned char*>(csource);
445 size_t srcpos = 0, bufpos = 0;
446 size_t needed = delimiter ? (srclen * 3) : (srclen * 2 + 1);
447 if (buflen < needed)
448 return 0;
449
450 while (srcpos < srclen) {
451 unsigned char ch = bsource[srcpos++];
452 buffer[bufpos ] = hex_encode((ch >> 4) & 0xF);
453 buffer[bufpos+1] = hex_encode((ch ) & 0xF);
454 bufpos += 2;
455
456 // Don't write a delimiter after the last byte.
457 if (delimiter && (srcpos < srclen)) {
458 buffer[bufpos] = delimiter;
459 ++bufpos;
460 }
461 }
462
463 // Null terminate.
464 buffer[bufpos] = '\0';
465 return bufpos;
466}
467
Peter Thatcher1cf6f812015-05-15 10:40:45 -0700468std::string hex_encode(const std::string& str) {
469 return hex_encode(str.c_str(), str.size());
470}
471
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000472std::string hex_encode(const char* source, size_t srclen) {
473 return hex_encode_with_delimiter(source, srclen, 0);
474}
475
476std::string hex_encode_with_delimiter(const char* source, size_t srclen,
477 char delimiter) {
478 const size_t kBufferSize = srclen * 3;
479 char* buffer = STACK_ARRAY(char, kBufferSize);
480 size_t length = hex_encode_with_delimiter(buffer, kBufferSize,
481 source, srclen, delimiter);
henrikg91d6ede2015-09-17 00:24:34 -0700482 RTC_DCHECK(srclen == 0 || length > 0);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000483 return std::string(buffer, length);
484}
485
486size_t hex_decode(char * cbuffer, size_t buflen,
487 const char * source, size_t srclen) {
488 return hex_decode_with_delimiter(cbuffer, buflen, source, srclen, 0);
489}
490
491size_t hex_decode_with_delimiter(char* cbuffer, size_t buflen,
492 const char* source, size_t srclen,
493 char delimiter) {
henrikg91d6ede2015-09-17 00:24:34 -0700494 RTC_DCHECK(cbuffer); // TODO(grunell): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000495 if (buflen == 0)
496 return 0;
497
498 // Init and bounds check.
499 unsigned char* bbuffer = reinterpret_cast<unsigned char*>(cbuffer);
500 size_t srcpos = 0, bufpos = 0;
501 size_t needed = (delimiter) ? (srclen + 1) / 3 : srclen / 2;
502 if (buflen < needed)
503 return 0;
504
505 while (srcpos < srclen) {
506 if ((srclen - srcpos) < 2) {
507 // This means we have an odd number of bytes.
508 return 0;
509 }
510
511 unsigned char h1, h2;
512 if (!hex_decode(source[srcpos], &h1) ||
513 !hex_decode(source[srcpos + 1], &h2))
514 return 0;
515
516 bbuffer[bufpos++] = (h1 << 4) | h2;
517 srcpos += 2;
518
519 // Remove the delimiter if needed.
520 if (delimiter && (srclen - srcpos) > 1) {
521 if (source[srcpos] != delimiter)
522 return 0;
523 ++srcpos;
524 }
525 }
526
527 return bufpos;
528}
529
530size_t hex_decode(char* buffer, size_t buflen, const std::string& source) {
531 return hex_decode_with_delimiter(buffer, buflen, source, 0);
532}
533size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
534 const std::string& source, char delimiter) {
535 return hex_decode_with_delimiter(buffer, buflen,
536 source.c_str(), source.length(), delimiter);
537}
538
539size_t transform(std::string& value, size_t maxlen, const std::string& source,
540 Transform t) {
541 char* buffer = STACK_ARRAY(char, maxlen + 1);
542 size_t length = t(buffer, maxlen + 1, source.data(), source.length());
543 value.assign(buffer, length);
544 return length;
545}
546
547std::string s_transform(const std::string& source, Transform t) {
548 // Ask transformation function to approximate the destination size (returns upper bound)
deadbeef37f5ecf2017-02-27 14:06:41 -0800549 size_t maxlen = t(nullptr, 0, source.data(), source.length());
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000550 char * buffer = STACK_ARRAY(char, maxlen);
551 size_t len = t(buffer, maxlen, source.data(), source.length());
552 std::string result(buffer, len);
553 return result;
554}
555
556size_t tokenize(const std::string& source, char delimiter,
557 std::vector<std::string>* fields) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000558 fields->clear();
559 size_t last = 0;
560 for (size_t i = 0; i < source.length(); ++i) {
561 if (source[i] == delimiter) {
562 if (i != last) {
563 fields->push_back(source.substr(last, i - last));
564 }
565 last = i + 1;
566 }
567 }
568 if (last != source.length()) {
569 fields->push_back(source.substr(last, source.length() - last));
570 }
571 return fields->size();
572}
573
deadbeef0a6c4ca2015-10-06 11:38:28 -0700574size_t tokenize_with_empty_tokens(const std::string& source,
575 char delimiter,
576 std::vector<std::string>* fields) {
577 fields->clear();
578 size_t last = 0;
579 for (size_t i = 0; i < source.length(); ++i) {
580 if (source[i] == delimiter) {
581 fields->push_back(source.substr(last, i - last));
582 last = i + 1;
583 }
584 }
585 fields->push_back(source.substr(last, source.length() - last));
586 return fields->size();
587}
588
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000589size_t tokenize_append(const std::string& source, char delimiter,
590 std::vector<std::string>* fields) {
591 if (!fields) return 0;
592
593 std::vector<std::string> new_fields;
594 tokenize(source, delimiter, &new_fields);
595 fields->insert(fields->end(), new_fields.begin(), new_fields.end());
596 return fields->size();
597}
598
599size_t tokenize(const std::string& source, char delimiter, char start_mark,
600 char end_mark, std::vector<std::string>* fields) {
601 if (!fields) return 0;
602 fields->clear();
603
604 std::string remain_source = source;
605 while (!remain_source.empty()) {
606 size_t start_pos = remain_source.find(start_mark);
607 if (std::string::npos == start_pos) break;
608 std::string pre_mark;
609 if (start_pos > 0) {
610 pre_mark = remain_source.substr(0, start_pos - 1);
611 }
612
613 ++start_pos;
614 size_t end_pos = remain_source.find(end_mark, start_pos);
615 if (std::string::npos == end_pos) break;
616
617 // We have found the matching marks. First tokenize the pre-mask. Then add
618 // the marked part as a single field. Finally, loop back for the post-mark.
619 tokenize_append(pre_mark, delimiter, fields);
620 fields->push_back(remain_source.substr(start_pos, end_pos - start_pos));
621 remain_source = remain_source.substr(end_pos + 1);
622 }
623
624 return tokenize_append(remain_source, delimiter, fields);
625}
626
Donald Curtis144d0182015-05-15 13:14:24 -0700627bool tokenize_first(const std::string& source,
628 const char delimiter,
629 std::string* token,
630 std::string* rest) {
Donald Curtis0e07f922015-05-15 09:21:23 -0700631 // Find the first delimiter
632 size_t left_pos = source.find(delimiter);
633 if (left_pos == std::string::npos) {
634 return false;
635 }
636
637 // Look for additional occurrances of delimiter.
638 size_t right_pos = left_pos + 1;
Donald Curtis144d0182015-05-15 13:14:24 -0700639 while (source[right_pos] == delimiter) {
Donald Curtis0e07f922015-05-15 09:21:23 -0700640 right_pos++;
641 }
642
643 *token = source.substr(0, left_pos);
644 *rest = source.substr(right_pos);
645 return true;
646}
647
Diogo Real7bd1f1b2017-09-08 12:50:41 -0700648std::string join(const std::vector<std::string>& source, char delimiter) {
649 if (source.size() == 0) {
650 return std::string();
651 }
652 // Find length of the string to be returned to pre-allocate memory.
653 size_t source_string_length = 0;
654 for (size_t i = 0; i < source.size(); ++i) {
655 source_string_length += source[i].length();
656 }
657
658 // Build the joined string.
659 std::string joined_string;
660 joined_string.reserve(source_string_length + source.size() - 1);
661 for (size_t i = 0; i < source.size(); ++i) {
662 if (i != 0) {
663 joined_string += delimiter;
664 }
665 joined_string += source[i];
666 }
667 return joined_string;
668}
669
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000670size_t split(const std::string& source, char delimiter,
671 std::vector<std::string>* fields) {
henrikg91d6ede2015-09-17 00:24:34 -0700672 RTC_DCHECK(fields);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000673 fields->clear();
674 size_t last = 0;
675 for (size_t i = 0; i < source.length(); ++i) {
676 if (source[i] == delimiter) {
677 fields->push_back(source.substr(last, i - last));
678 last = i + 1;
679 }
680 }
681 fields->push_back(source.substr(last, source.length() - last));
682 return fields->size();
683}
684
685char make_char_safe_for_filename(char c) {
686 if (c < 32)
687 return '_';
688
689 switch (c) {
690 case '<':
691 case '>':
692 case ':':
693 case '"':
694 case '/':
695 case '\\':
696 case '|':
697 case '*':
698 case '?':
699 return '_';
700
701 default:
702 return c;
703 }
704}
705
706/*
707void sprintf(std::string& value, size_t maxlen, const char * format, ...) {
708 char * buffer = STACK_ARRAY(char, maxlen + 1);
709 va_list args;
710 va_start(args, format);
711 value.assign(buffer, vsprintfn(buffer, maxlen + 1, format, args));
712 va_end(args);
713}
714*/
715
716/////////////////////////////////////////////////////////////////////////////
717
718} // namespace rtc