blob: d99dd0d4a2be92dcd2586c19a1e59a251150b182 [file] [log] [blame]
henrike@webrtc.orgf0488722014-05-13 18:00:26 +00001/*
2 * Copyright 2004 The WebRTC Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef RTC_BASE_STRINGENCODE_H_
12#define RTC_BASE_STRINGENCODE_H_
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000013
Henrik Kjellanderec78f1c2017-06-29 07:52:50 +020014#include <sstream>
15#include <string>
16#include <vector>
henrike@webrtc.orgf0488722014-05-13 18:00:26 +000017
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020018#include "rtc_base/checks.h"
Henrik Kjellanderec78f1c2017-06-29 07:52:50 +020019
20namespace rtc {
21
22//////////////////////////////////////////////////////////////////////
23// String Encoding Utilities
24//////////////////////////////////////////////////////////////////////
25
26// Convert an unsigned value to it's utf8 representation. Returns the length
27// of the encoded string, or 0 if the encoding is longer than buflen - 1.
28size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
29// Decode the utf8 encoded value pointed to by source. Returns the number of
30// bytes used by the encoding, or 0 if the encoding is invalid.
31size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
32
33// Escaping prefixes illegal characters with the escape character. Compact, but
34// illegal characters still appear in the string.
35size_t escape(char * buffer, size_t buflen,
36 const char * source, size_t srclen,
37 const char * illegal, char escape);
38// Note: in-place unescaping (buffer == source) is allowed.
39size_t unescape(char * buffer, size_t buflen,
40 const char * source, size_t srclen,
41 char escape);
42
43// Encoding replaces illegal characters with the escape character and 2 hex
44// chars, so it's a little less compact than escape, but completely removes
45// illegal characters. note that hex digits should not be used as illegal
46// characters.
47size_t encode(char * buffer, size_t buflen,
48 const char * source, size_t srclen,
49 const char * illegal, char escape);
50// Note: in-place decoding (buffer == source) is allowed.
51size_t decode(char * buffer, size_t buflen,
52 const char * source, size_t srclen,
53 char escape);
54
55// Returns a list of characters that may be unsafe for use in the name of a
56// file, suitable for passing to the 'illegal' member of escape or encode.
57const char* unsafe_filename_characters();
58
59// url_encode is an encode operation with a predefined set of illegal characters
60// and escape character (for use in URLs, obviously).
61size_t url_encode(char * buffer, size_t buflen,
62 const char * source, size_t srclen);
63// Note: in-place decoding (buffer == source) is allowed.
64size_t url_decode(char * buffer, size_t buflen,
65 const char * source, size_t srclen);
66
67// html_encode prevents data embedded in html from containing markup.
68size_t html_encode(char * buffer, size_t buflen,
69 const char * source, size_t srclen);
70// Note: in-place decoding (buffer == source) is allowed.
71size_t html_decode(char * buffer, size_t buflen,
72 const char * source, size_t srclen);
73
74// xml_encode makes data suitable for inside xml attributes and values.
75size_t xml_encode(char * buffer, size_t buflen,
76 const char * source, size_t srclen);
77// Note: in-place decoding (buffer == source) is allowed.
78size_t xml_decode(char * buffer, size_t buflen,
79 const char * source, size_t srclen);
80
81// Convert an unsigned value from 0 to 15 to the hex character equivalent...
82char hex_encode(unsigned char val);
83// ...and vice-versa.
84bool hex_decode(char ch, unsigned char* val);
85
86// hex_encode shows the hex representation of binary data in ascii.
87size_t hex_encode(char* buffer, size_t buflen,
88 const char* source, size_t srclen);
89
90// hex_encode, but separate each byte representation with a delimiter.
91// |delimiter| == 0 means no delimiter
92// If the buffer is too short, we return 0
93size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
94 const char* source, size_t srclen,
95 char delimiter);
96
97// Helper functions for hex_encode.
98std::string hex_encode(const std::string& str);
99std::string hex_encode(const char* source, size_t srclen);
100std::string hex_encode_with_delimiter(const char* source, size_t srclen,
101 char delimiter);
102
103// hex_decode converts ascii hex to binary.
104size_t hex_decode(char* buffer, size_t buflen,
105 const char* source, size_t srclen);
106
107// hex_decode, assuming that there is a delimiter between every byte
108// pair.
109// |delimiter| == 0 means no delimiter
110// If the buffer is too short or the data is invalid, we return 0.
111size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
112 const char* source, size_t srclen,
113 char delimiter);
114
115// Helper functions for hex_decode.
116size_t hex_decode(char* buffer, size_t buflen, const std::string& source);
117size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
118 const std::string& source, char delimiter);
119
120// Apply any suitable string transform (including the ones above) to an STL
121// string. Stack-allocated temporary space is used for the transformation,
122// so value and source may refer to the same string.
123typedef size_t (*Transform)(char * buffer, size_t buflen,
124 const char * source, size_t srclen);
125size_t transform(std::string& value, size_t maxlen, const std::string& source,
126 Transform t);
127
128// Return the result of applying transform t to source.
129std::string s_transform(const std::string& source, Transform t);
130
131// Convenience wrappers.
132inline std::string s_url_encode(const std::string& source) {
133 return s_transform(source, url_encode);
134}
135inline std::string s_url_decode(const std::string& source) {
136 return s_transform(source, url_decode);
137}
138
Diogo Real7bd1f1b2017-09-08 12:50:41 -0700139// Joins the source vector of strings into a single string, with each
140// field in source being separated by delimiter. No trailing delimiter is added.
141std::string join(const std::vector<std::string>& source, char delimiter);
142
Henrik Kjellanderec78f1c2017-06-29 07:52:50 +0200143// Splits the source string into multiple fields separated by delimiter,
144// with duplicates of delimiter creating empty fields.
145size_t split(const std::string& source, char delimiter,
146 std::vector<std::string>* fields);
147
148// Splits the source string into multiple fields separated by delimiter,
149// with duplicates of delimiter ignored. Trailing delimiter ignored.
150size_t tokenize(const std::string& source, char delimiter,
151 std::vector<std::string>* fields);
152
153// Tokenize, including the empty tokens.
154size_t tokenize_with_empty_tokens(const std::string& source,
155 char delimiter,
156 std::vector<std::string>* fields);
157
158// Tokenize and append the tokens to fields. Return the new size of fields.
159size_t tokenize_append(const std::string& source, char delimiter,
160 std::vector<std::string>* fields);
161
162// Splits the source string into multiple fields separated by delimiter, with
163// duplicates of delimiter ignored. Trailing delimiter ignored. A substring in
164// between the start_mark and the end_mark is treated as a single field. Return
165// the size of fields. For example, if source is "filename
166// \"/Library/Application Support/media content.txt\"", delimiter is ' ', and
167// the start_mark and end_mark are '"', this method returns two fields:
168// "filename" and "/Library/Application Support/media content.txt".
169size_t tokenize(const std::string& source, char delimiter, char start_mark,
170 char end_mark, std::vector<std::string>* fields);
171
172// Extract the first token from source as separated by delimiter, with
173// duplicates of delimiter ignored. Return false if the delimiter could not be
174// found, otherwise return true.
175bool tokenize_first(const std::string& source,
176 const char delimiter,
177 std::string* token,
178 std::string* rest);
179
180// Safe sprintf to std::string
181//void sprintf(std::string& value, size_t maxlen, const char * format, ...)
182// PRINTF_FORMAT(3);
183
184// Convert arbitrary values to/from a string.
185
186template <class T>
187static bool ToString(const T &t, std::string* s) {
188 RTC_DCHECK(s);
189 std::ostringstream oss;
190 oss << std::boolalpha << t;
191 *s = oss.str();
192 return !oss.fail();
193}
194
195template <class T>
196static bool FromString(const std::string& s, T* t) {
197 RTC_DCHECK(t);
198 std::istringstream iss(s);
199 iss >> std::boolalpha >> *t;
200 return !iss.fail();
201}
202
203// Inline versions of the string conversion routines.
204
205template<typename T>
206static inline std::string ToString(const T& val) {
207 std::string str; ToString(val, &str); return str;
208}
209
210template<typename T>
211static inline T FromString(const std::string& str) {
212 T val; FromString(str, &val); return val;
213}
214
215template<typename T>
216static inline T FromString(const T& defaultValue, const std::string& str) {
217 T val(defaultValue); FromString(str, &val); return val;
218}
219
220// simple function to strip out characters which shouldn't be
221// used in filenames
222char make_char_safe_for_filename(char c);
223
224//////////////////////////////////////////////////////////////////////
225
226} // namespace rtc
henrike@webrtc.orgf0488722014-05-13 18:00:26 +0000227
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200228#endif // RTC_BASE_STRINGENCODE_H__