blob: 872dfd47e71c16e4b9936e8446a4913ab8771bfd [file] [log] [blame]
henrike@webrtc.org0e118e72013-07-10 00:45:36 +00001/*
2 * libjingle
3 * Copyright 2004, Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#ifndef TALK_BASE_STRINGENCODE_H_
29#define TALK_BASE_STRINGENCODE_H_
30
31#include <string>
32#include <sstream>
33#include <vector>
34
35#include "talk/base/common.h"
36
37namespace talk_base {
38
39//////////////////////////////////////////////////////////////////////
40// String Encoding Utilities
41//////////////////////////////////////////////////////////////////////
42
43// Convert an unsigned value to it's utf8 representation. Returns the length
44// of the encoded string, or 0 if the encoding is longer than buflen - 1.
45size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
46// Decode the utf8 encoded value pointed to by source. Returns the number of
47// bytes used by the encoding, or 0 if the encoding is invalid.
48size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
49
50// Escaping prefixes illegal characters with the escape character. Compact, but
51// illegal characters still appear in the string.
52size_t escape(char * buffer, size_t buflen,
53 const char * source, size_t srclen,
54 const char * illegal, char escape);
55// Note: in-place unescaping (buffer == source) is allowed.
56size_t unescape(char * buffer, size_t buflen,
57 const char * source, size_t srclen,
58 char escape);
59
60// Encoding replaces illegal characters with the escape character and 2 hex
61// chars, so it's a little less compact than escape, but completely removes
62// illegal characters. note that hex digits should not be used as illegal
63// characters.
64size_t encode(char * buffer, size_t buflen,
65 const char * source, size_t srclen,
66 const char * illegal, char escape);
67// Note: in-place decoding (buffer == source) is allowed.
68size_t decode(char * buffer, size_t buflen,
69 const char * source, size_t srclen,
70 char escape);
71
72// Returns a list of characters that may be unsafe for use in the name of a
73// file, suitable for passing to the 'illegal' member of escape or encode.
74const char* unsafe_filename_characters();
75
76// url_encode is an encode operation with a predefined set of illegal characters
77// and escape character (for use in URLs, obviously).
78size_t url_encode(char * buffer, size_t buflen,
79 const char * source, size_t srclen);
80// Note: in-place decoding (buffer == source) is allowed.
81size_t url_decode(char * buffer, size_t buflen,
82 const char * source, size_t srclen);
83
84// html_encode prevents data embedded in html from containing markup.
85size_t html_encode(char * buffer, size_t buflen,
86 const char * source, size_t srclen);
87// Note: in-place decoding (buffer == source) is allowed.
88size_t html_decode(char * buffer, size_t buflen,
89 const char * source, size_t srclen);
90
91// xml_encode makes data suitable for inside xml attributes and values.
92size_t xml_encode(char * buffer, size_t buflen,
93 const char * source, size_t srclen);
94// Note: in-place decoding (buffer == source) is allowed.
95size_t xml_decode(char * buffer, size_t buflen,
96 const char * source, size_t srclen);
97
98// Convert an unsigned value from 0 to 15 to the hex character equivalent...
99char hex_encode(unsigned char val);
100// ...and vice-versa.
101bool hex_decode(char ch, unsigned char* val);
102
103// hex_encode shows the hex representation of binary data in ascii.
104size_t hex_encode(char* buffer, size_t buflen,
105 const char* source, size_t srclen);
106
107// hex_encode, but separate each byte representation with a delimiter.
108// |delimiter| == 0 means no delimiter
109// If the buffer is too short, we return 0
110size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
111 const char* source, size_t srclen,
112 char delimiter);
113
114// Helper functions for hex_encode.
115std::string hex_encode(const char* source, size_t srclen);
116std::string hex_encode_with_delimiter(const char* source, size_t srclen,
117 char delimiter);
118
119// hex_decode converts ascii hex to binary.
120size_t hex_decode(char* buffer, size_t buflen,
121 const char* source, size_t srclen);
122
123// hex_decode, assuming that there is a delimiter between every byte
124// pair.
125// |delimiter| == 0 means no delimiter
126// If the buffer is too short or the data is invalid, we return 0.
127size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
128 const char* source, size_t srclen,
129 char delimiter);
130
131// Helper functions for hex_decode.
132size_t hex_decode(char* buffer, size_t buflen, const std::string& source);
133size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
134 const std::string& source, char delimiter);
135
136// Apply any suitable string transform (including the ones above) to an STL
137// string. Stack-allocated temporary space is used for the transformation,
138// so value and source may refer to the same string.
139typedef size_t (*Transform)(char * buffer, size_t buflen,
140 const char * source, size_t srclen);
141size_t transform(std::string& value, size_t maxlen, const std::string& source,
142 Transform t);
143
144// Return the result of applying transform t to source.
145std::string s_transform(const std::string& source, Transform t);
146
147// Convenience wrappers.
148inline std::string s_url_encode(const std::string& source) {
149 return s_transform(source, url_encode);
150}
151inline std::string s_url_decode(const std::string& source) {
152 return s_transform(source, url_decode);
153}
154
155// Splits the source string into multiple fields separated by delimiter,
156// with duplicates of delimiter creating empty fields.
157size_t split(const std::string& source, char delimiter,
158 std::vector<std::string>* fields);
159
160// Splits the source string into multiple fields separated by delimiter,
161// with duplicates of delimiter ignored. Trailing delimiter ignored.
162size_t tokenize(const std::string& source, char delimiter,
163 std::vector<std::string>* fields);
164
165// Tokenize and append the tokens to fields. Return the new size of fields.
166size_t tokenize_append(const std::string& source, char delimiter,
167 std::vector<std::string>* fields);
168
169// Splits the source string into multiple fields separated by delimiter, with
170// duplicates of delimiter ignored. Trailing delimiter ignored. A substring in
171// between the start_mark and the end_mark is treated as a single field. Return
172// the size of fields. For example, if source is "filename
173// \"/Library/Application Support/media content.txt\"", delimiter is ' ', and
174// the start_mark and end_mark are '"', this method returns two fields:
175// "filename" and "/Library/Application Support/media content.txt".
176size_t tokenize(const std::string& source, char delimiter, char start_mark,
177 char end_mark, std::vector<std::string>* fields);
178
179// Safe sprintf to std::string
180//void sprintf(std::string& value, size_t maxlen, const char * format, ...)
181// PRINTF_FORMAT(3);
182
183// Convert arbitrary values to/from a string.
184
185template <class T>
186static bool ToString(const T &t, std::string* s) {
187 ASSERT(NULL != s);
188 std::ostringstream oss;
189 oss << std::boolalpha << t;
190 *s = oss.str();
191 return !oss.fail();
192}
193
194template <class T>
195static bool FromString(const std::string& s, T* t) {
196 ASSERT(NULL != t);
197 std::istringstream iss(s);
198 iss >> std::boolalpha >> *t;
199 return !iss.fail();
200}
201
202// Inline versions of the string conversion routines.
203
204template<typename T>
205static inline std::string ToString(const T& val) {
206 std::string str; ToString(val, &str); return str;
207}
208
209template<typename T>
210static inline T FromString(const std::string& str) {
211 T val; FromString(str, &val); return val;
212}
213
214template<typename T>
215static inline T FromString(const T& defaultValue, const std::string& str) {
216 T val(defaultValue); FromString(str, &val); return val;
217}
218
219// simple function to strip out characters which shouldn't be
220// used in filenames
221char make_char_safe_for_filename(char c);
222
223//////////////////////////////////////////////////////////////////////
224
225} // namespace talk_base
226
227#endif // TALK_BASE_STRINGENCODE_H__