blob: 8e7c6d7c95524ae0b5370b947325bb621a1d0eb4 [file] [log] [blame]
henrike@webrtc.orgf0488722014-05-13 18:00:26 +00001/*
2 * Copyright 2004 The WebRTC Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "rtc_base/stringencode.h"
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000012
13#include <stdio.h>
14#include <stdlib.h>
15
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020016#include "rtc_base/checks.h"
17#include "rtc_base/stringutils.h"
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000018
19namespace rtc {
20
21/////////////////////////////////////////////////////////////////////////////
22// String Encoding Utilities
23/////////////////////////////////////////////////////////////////////////////
24
Yves Gerey665174f2018-06-19 15:03:05 +020025size_t url_decode(char* buffer,
26 size_t buflen,
27 const char* source,
28 size_t srclen) {
deadbeef37f5ecf2017-02-27 14:06:41 -080029 if (nullptr == buffer)
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000030 return srclen + 1;
31 if (buflen <= 0)
32 return 0;
33
34 unsigned char h1, h2;
35 size_t srcpos = 0, bufpos = 0;
36 while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
37 unsigned char ch = source[srcpos++];
38 if (ch == '+') {
39 buffer[bufpos++] = ' ';
Yves Gerey665174f2018-06-19 15:03:05 +020040 } else if ((ch == '%') && (srcpos + 1 < srclen) &&
41 hex_decode(source[srcpos], &h1) &&
42 hex_decode(source[srcpos + 1], &h2)) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000043 buffer[bufpos++] = (h1 << 4) | h2;
44 srcpos += 2;
45 } else {
46 buffer[bufpos++] = ch;
47 }
48 }
49 buffer[bufpos] = '\0';
50 return bufpos;
51}
52
53size_t utf8_decode(const char* source, size_t srclen, unsigned long* value) {
54 const unsigned char* s = reinterpret_cast<const unsigned char*>(source);
Yves Gerey665174f2018-06-19 15:03:05 +020055 if ((s[0] & 0x80) == 0x00) { // Check s[0] == 0xxxxxxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000056 *value = s[0];
57 return 1;
58 }
59 if ((srclen < 2) || ((s[1] & 0xC0) != 0x80)) { // Check s[1] != 10xxxxxx
60 return 0;
61 }
62 // Accumulate the trailer byte values in value16, and combine it with the
63 // relevant bits from s[0], once we've determined the sequence length.
64 unsigned long value16 = (s[1] & 0x3F);
Yves Gerey665174f2018-06-19 15:03:05 +020065 if ((s[0] & 0xE0) == 0xC0) { // Check s[0] == 110xxxxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000066 *value = ((s[0] & 0x1F) << 6) | value16;
67 return 2;
68 }
69 if ((srclen < 3) || ((s[2] & 0xC0) != 0x80)) { // Check s[2] != 10xxxxxx
70 return 0;
71 }
72 value16 = (value16 << 6) | (s[2] & 0x3F);
Yves Gerey665174f2018-06-19 15:03:05 +020073 if ((s[0] & 0xF0) == 0xE0) { // Check s[0] == 1110xxxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000074 *value = ((s[0] & 0x0F) << 12) | value16;
75 return 3;
76 }
77 if ((srclen < 4) || ((s[3] & 0xC0) != 0x80)) { // Check s[3] != 10xxxxxx
78 return 0;
79 }
80 value16 = (value16 << 6) | (s[3] & 0x3F);
Yves Gerey665174f2018-06-19 15:03:05 +020081 if ((s[0] & 0xF8) == 0xF0) { // Check s[0] == 11110xxx
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000082 *value = ((s[0] & 0x07) << 18) | value16;
83 return 4;
84 }
85 return 0;
86}
87
88size_t utf8_encode(char* buffer, size_t buflen, unsigned long value) {
89 if ((value <= 0x7F) && (buflen >= 1)) {
90 buffer[0] = static_cast<unsigned char>(value);
91 return 1;
92 }
93 if ((value <= 0x7FF) && (buflen >= 2)) {
94 buffer[0] = 0xC0 | static_cast<unsigned char>(value >> 6);
95 buffer[1] = 0x80 | static_cast<unsigned char>(value & 0x3F);
96 return 2;
97 }
98 if ((value <= 0xFFFF) && (buflen >= 3)) {
99 buffer[0] = 0xE0 | static_cast<unsigned char>(value >> 12);
100 buffer[1] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
101 buffer[2] = 0x80 | static_cast<unsigned char>(value & 0x3F);
102 return 3;
103 }
104 if ((value <= 0x1FFFFF) && (buflen >= 4)) {
105 buffer[0] = 0xF0 | static_cast<unsigned char>(value >> 18);
106 buffer[1] = 0x80 | static_cast<unsigned char>((value >> 12) & 0x3F);
107 buffer[2] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
108 buffer[3] = 0x80 | static_cast<unsigned char>(value & 0x3F);
109 return 4;
110 }
111 return 0;
112}
113
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000114static const char HEX[] = "0123456789abcdef";
115
116char hex_encode(unsigned char val) {
henrikg91d6ede2015-09-17 00:24:34 -0700117 RTC_DCHECK_LT(val, 16);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000118 return (val < 16) ? HEX[val] : '!';
119}
120
121bool hex_decode(char ch, unsigned char* val) {
122 if ((ch >= '0') && (ch <= '9')) {
123 *val = ch - '0';
124 } else if ((ch >= 'A') && (ch <= 'Z')) {
125 *val = (ch - 'A') + 10;
126 } else if ((ch >= 'a') && (ch <= 'z')) {
127 *val = (ch - 'a') + 10;
128 } else {
129 return false;
130 }
131 return true;
132}
133
Yves Gerey665174f2018-06-19 15:03:05 +0200134size_t hex_encode(char* buffer,
135 size_t buflen,
136 const char* csource,
137 size_t srclen) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000138 return hex_encode_with_delimiter(buffer, buflen, csource, srclen, 0);
139}
140
Yves Gerey665174f2018-06-19 15:03:05 +0200141size_t hex_encode_with_delimiter(char* buffer,
142 size_t buflen,
143 const char* csource,
144 size_t srclen,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000145 char delimiter) {
Henrik Grunell84879882018-03-23 15:33:03 +0100146 RTC_DCHECK(buffer); // TODO(kwiberg): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000147 if (buflen == 0)
148 return 0;
149
150 // Init and check bounds.
151 const unsigned char* bsource =
152 reinterpret_cast<const unsigned char*>(csource);
153 size_t srcpos = 0, bufpos = 0;
154 size_t needed = delimiter ? (srclen * 3) : (srclen * 2 + 1);
155 if (buflen < needed)
156 return 0;
157
158 while (srcpos < srclen) {
159 unsigned char ch = bsource[srcpos++];
Yves Gerey665174f2018-06-19 15:03:05 +0200160 buffer[bufpos] = hex_encode((ch >> 4) & 0xF);
161 buffer[bufpos + 1] = hex_encode((ch)&0xF);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000162 bufpos += 2;
163
164 // Don't write a delimiter after the last byte.
165 if (delimiter && (srcpos < srclen)) {
166 buffer[bufpos] = delimiter;
167 ++bufpos;
168 }
169 }
170
171 // Null terminate.
172 buffer[bufpos] = '\0';
173 return bufpos;
174}
175
Peter Thatcher1cf6f812015-05-15 10:40:45 -0700176std::string hex_encode(const std::string& str) {
177 return hex_encode(str.c_str(), str.size());
178}
179
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000180std::string hex_encode(const char* source, size_t srclen) {
181 return hex_encode_with_delimiter(source, srclen, 0);
182}
183
Yves Gerey665174f2018-06-19 15:03:05 +0200184std::string hex_encode_with_delimiter(const char* source,
185 size_t srclen,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000186 char delimiter) {
187 const size_t kBufferSize = srclen * 3;
188 char* buffer = STACK_ARRAY(char, kBufferSize);
Yves Gerey665174f2018-06-19 15:03:05 +0200189 size_t length =
190 hex_encode_with_delimiter(buffer, kBufferSize, source, srclen, delimiter);
henrikg91d6ede2015-09-17 00:24:34 -0700191 RTC_DCHECK(srclen == 0 || length > 0);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000192 return std::string(buffer, length);
193}
194
Yves Gerey665174f2018-06-19 15:03:05 +0200195size_t hex_decode(char* cbuffer,
196 size_t buflen,
197 const char* source,
198 size_t srclen) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000199 return hex_decode_with_delimiter(cbuffer, buflen, source, srclen, 0);
200}
201
Yves Gerey665174f2018-06-19 15:03:05 +0200202size_t hex_decode_with_delimiter(char* cbuffer,
203 size_t buflen,
204 const char* source,
205 size_t srclen,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000206 char delimiter) {
Henrik Grunell84879882018-03-23 15:33:03 +0100207 RTC_DCHECK(cbuffer); // TODO(kwiberg): estimate output size
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000208 if (buflen == 0)
209 return 0;
210
211 // Init and bounds check.
212 unsigned char* bbuffer = reinterpret_cast<unsigned char*>(cbuffer);
213 size_t srcpos = 0, bufpos = 0;
214 size_t needed = (delimiter) ? (srclen + 1) / 3 : srclen / 2;
215 if (buflen < needed)
216 return 0;
217
218 while (srcpos < srclen) {
219 if ((srclen - srcpos) < 2) {
220 // This means we have an odd number of bytes.
221 return 0;
222 }
223
224 unsigned char h1, h2;
225 if (!hex_decode(source[srcpos], &h1) ||
226 !hex_decode(source[srcpos + 1], &h2))
227 return 0;
228
229 bbuffer[bufpos++] = (h1 << 4) | h2;
230 srcpos += 2;
231
232 // Remove the delimiter if needed.
233 if (delimiter && (srclen - srcpos) > 1) {
234 if (source[srcpos] != delimiter)
235 return 0;
236 ++srcpos;
237 }
238 }
239
240 return bufpos;
241}
242
243size_t hex_decode(char* buffer, size_t buflen, const std::string& source) {
244 return hex_decode_with_delimiter(buffer, buflen, source, 0);
245}
Yves Gerey665174f2018-06-19 15:03:05 +0200246size_t hex_decode_with_delimiter(char* buffer,
247 size_t buflen,
248 const std::string& source,
249 char delimiter) {
250 return hex_decode_with_delimiter(buffer, buflen, source.c_str(),
251 source.length(), delimiter);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000252}
253
Yves Gerey665174f2018-06-19 15:03:05 +0200254size_t transform(std::string& value,
255 size_t maxlen,
256 const std::string& source,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000257 Transform t) {
258 char* buffer = STACK_ARRAY(char, maxlen + 1);
259 size_t length = t(buffer, maxlen + 1, source.data(), source.length());
260 value.assign(buffer, length);
261 return length;
262}
263
264std::string s_transform(const std::string& source, Transform t) {
Yves Gerey665174f2018-06-19 15:03:05 +0200265 // Ask transformation function to approximate the destination size (returns
266 // upper bound)
deadbeef37f5ecf2017-02-27 14:06:41 -0800267 size_t maxlen = t(nullptr, 0, source.data(), source.length());
Yves Gerey665174f2018-06-19 15:03:05 +0200268 char* buffer = STACK_ARRAY(char, maxlen);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000269 size_t len = t(buffer, maxlen, source.data(), source.length());
270 std::string result(buffer, len);
271 return result;
272}
273
Yves Gerey665174f2018-06-19 15:03:05 +0200274size_t tokenize(const std::string& source,
275 char delimiter,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000276 std::vector<std::string>* fields) {
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000277 fields->clear();
278 size_t last = 0;
279 for (size_t i = 0; i < source.length(); ++i) {
280 if (source[i] == delimiter) {
281 if (i != last) {
282 fields->push_back(source.substr(last, i - last));
283 }
284 last = i + 1;
285 }
286 }
287 if (last != source.length()) {
288 fields->push_back(source.substr(last, source.length() - last));
289 }
290 return fields->size();
291}
292
deadbeef0a6c4ca2015-10-06 11:38:28 -0700293size_t tokenize_with_empty_tokens(const std::string& source,
294 char delimiter,
295 std::vector<std::string>* fields) {
296 fields->clear();
297 size_t last = 0;
298 for (size_t i = 0; i < source.length(); ++i) {
299 if (source[i] == delimiter) {
300 fields->push_back(source.substr(last, i - last));
301 last = i + 1;
302 }
303 }
304 fields->push_back(source.substr(last, source.length() - last));
305 return fields->size();
306}
307
Yves Gerey665174f2018-06-19 15:03:05 +0200308size_t tokenize_append(const std::string& source,
309 char delimiter,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000310 std::vector<std::string>* fields) {
Yves Gerey665174f2018-06-19 15:03:05 +0200311 if (!fields)
312 return 0;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000313
314 std::vector<std::string> new_fields;
315 tokenize(source, delimiter, &new_fields);
316 fields->insert(fields->end(), new_fields.begin(), new_fields.end());
317 return fields->size();
318}
319
Yves Gerey665174f2018-06-19 15:03:05 +0200320size_t tokenize(const std::string& source,
321 char delimiter,
322 char start_mark,
323 char end_mark,
324 std::vector<std::string>* fields) {
325 if (!fields)
326 return 0;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000327 fields->clear();
328
329 std::string remain_source = source;
330 while (!remain_source.empty()) {
331 size_t start_pos = remain_source.find(start_mark);
Yves Gerey665174f2018-06-19 15:03:05 +0200332 if (std::string::npos == start_pos)
333 break;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000334 std::string pre_mark;
335 if (start_pos > 0) {
336 pre_mark = remain_source.substr(0, start_pos - 1);
337 }
338
339 ++start_pos;
340 size_t end_pos = remain_source.find(end_mark, start_pos);
Yves Gerey665174f2018-06-19 15:03:05 +0200341 if (std::string::npos == end_pos)
342 break;
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000343
344 // We have found the matching marks. First tokenize the pre-mask. Then add
345 // the marked part as a single field. Finally, loop back for the post-mark.
346 tokenize_append(pre_mark, delimiter, fields);
347 fields->push_back(remain_source.substr(start_pos, end_pos - start_pos));
348 remain_source = remain_source.substr(end_pos + 1);
349 }
350
351 return tokenize_append(remain_source, delimiter, fields);
352}
353
Donald Curtis144d0182015-05-15 13:14:24 -0700354bool tokenize_first(const std::string& source,
355 const char delimiter,
356 std::string* token,
357 std::string* rest) {
Donald Curtis0e07f922015-05-15 09:21:23 -0700358 // Find the first delimiter
359 size_t left_pos = source.find(delimiter);
360 if (left_pos == std::string::npos) {
361 return false;
362 }
363
364 // Look for additional occurrances of delimiter.
365 size_t right_pos = left_pos + 1;
Donald Curtis144d0182015-05-15 13:14:24 -0700366 while (source[right_pos] == delimiter) {
Donald Curtis0e07f922015-05-15 09:21:23 -0700367 right_pos++;
368 }
369
370 *token = source.substr(0, left_pos);
371 *rest = source.substr(right_pos);
372 return true;
373}
374
Diogo Real7bd1f1b2017-09-08 12:50:41 -0700375std::string join(const std::vector<std::string>& source, char delimiter) {
376 if (source.size() == 0) {
377 return std::string();
378 }
379 // Find length of the string to be returned to pre-allocate memory.
380 size_t source_string_length = 0;
381 for (size_t i = 0; i < source.size(); ++i) {
382 source_string_length += source[i].length();
383 }
384
385 // Build the joined string.
386 std::string joined_string;
387 joined_string.reserve(source_string_length + source.size() - 1);
388 for (size_t i = 0; i < source.size(); ++i) {
389 if (i != 0) {
390 joined_string += delimiter;
391 }
392 joined_string += source[i];
393 }
394 return joined_string;
395}
396
Yves Gerey665174f2018-06-19 15:03:05 +0200397size_t split(const std::string& source,
398 char delimiter,
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000399 std::vector<std::string>* fields) {
henrikg91d6ede2015-09-17 00:24:34 -0700400 RTC_DCHECK(fields);
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000401 fields->clear();
402 size_t last = 0;
403 for (size_t i = 0; i < source.length(); ++i) {
404 if (source[i] == delimiter) {
405 fields->push_back(source.substr(last, i - last));
406 last = i + 1;
407 }
408 }
409 fields->push_back(source.substr(last, source.length() - last));
410 return fields->size();
411}
412
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000413} // namespace rtc