henrike@webrtc.org | 0e118e7 | 2013-07-10 00:45:36 +0000 | [diff] [blame] | 1 | /* |
| 2 | * libjingle |
| 3 | * Copyright 2004, Google Inc. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions are met: |
| 7 | * |
| 8 | * 1. Redistributions of source code must retain the above copyright notice, |
| 9 | * this list of conditions and the following disclaimer. |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, |
| 11 | * this list of conditions and the following disclaimer in the documentation |
| 12 | * and/or other materials provided with the distribution. |
| 13 | * 3. The name of the author may not be used to endorse or promote products |
| 14 | * derived from this software without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED |
| 17 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| 18 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
| 19 | * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 20 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
| 22 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
| 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
| 24 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| 25 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
| 28 | #ifndef TALK_BASE_STRINGENCODE_H_ |
| 29 | #define TALK_BASE_STRINGENCODE_H_ |
| 30 | |
| 31 | #include <string> |
| 32 | #include <sstream> |
| 33 | #include <vector> |
| 34 | |
| 35 | #include "talk/base/common.h" |
| 36 | |
| 37 | namespace talk_base { |
| 38 | |
| 39 | ////////////////////////////////////////////////////////////////////// |
| 40 | // String Encoding Utilities |
| 41 | ////////////////////////////////////////////////////////////////////// |
| 42 | |
| 43 | // Convert an unsigned value to it's utf8 representation. Returns the length |
| 44 | // of the encoded string, or 0 if the encoding is longer than buflen - 1. |
| 45 | size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); |
| 46 | // Decode the utf8 encoded value pointed to by source. Returns the number of |
| 47 | // bytes used by the encoding, or 0 if the encoding is invalid. |
| 48 | size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); |
| 49 | |
| 50 | // Escaping prefixes illegal characters with the escape character. Compact, but |
| 51 | // illegal characters still appear in the string. |
| 52 | size_t escape(char * buffer, size_t buflen, |
| 53 | const char * source, size_t srclen, |
| 54 | const char * illegal, char escape); |
| 55 | // Note: in-place unescaping (buffer == source) is allowed. |
| 56 | size_t unescape(char * buffer, size_t buflen, |
| 57 | const char * source, size_t srclen, |
| 58 | char escape); |
| 59 | |
| 60 | // Encoding replaces illegal characters with the escape character and 2 hex |
| 61 | // chars, so it's a little less compact than escape, but completely removes |
| 62 | // illegal characters. note that hex digits should not be used as illegal |
| 63 | // characters. |
| 64 | size_t encode(char * buffer, size_t buflen, |
| 65 | const char * source, size_t srclen, |
| 66 | const char * illegal, char escape); |
| 67 | // Note: in-place decoding (buffer == source) is allowed. |
| 68 | size_t decode(char * buffer, size_t buflen, |
| 69 | const char * source, size_t srclen, |
| 70 | char escape); |
| 71 | |
| 72 | // Returns a list of characters that may be unsafe for use in the name of a |
| 73 | // file, suitable for passing to the 'illegal' member of escape or encode. |
| 74 | const char* unsafe_filename_characters(); |
| 75 | |
| 76 | // url_encode is an encode operation with a predefined set of illegal characters |
| 77 | // and escape character (for use in URLs, obviously). |
| 78 | size_t url_encode(char * buffer, size_t buflen, |
| 79 | const char * source, size_t srclen); |
| 80 | // Note: in-place decoding (buffer == source) is allowed. |
| 81 | size_t url_decode(char * buffer, size_t buflen, |
| 82 | const char * source, size_t srclen); |
| 83 | |
| 84 | // html_encode prevents data embedded in html from containing markup. |
| 85 | size_t html_encode(char * buffer, size_t buflen, |
| 86 | const char * source, size_t srclen); |
| 87 | // Note: in-place decoding (buffer == source) is allowed. |
| 88 | size_t html_decode(char * buffer, size_t buflen, |
| 89 | const char * source, size_t srclen); |
| 90 | |
| 91 | // xml_encode makes data suitable for inside xml attributes and values. |
| 92 | size_t xml_encode(char * buffer, size_t buflen, |
| 93 | const char * source, size_t srclen); |
| 94 | // Note: in-place decoding (buffer == source) is allowed. |
| 95 | size_t xml_decode(char * buffer, size_t buflen, |
| 96 | const char * source, size_t srclen); |
| 97 | |
| 98 | // Convert an unsigned value from 0 to 15 to the hex character equivalent... |
| 99 | char hex_encode(unsigned char val); |
| 100 | // ...and vice-versa. |
| 101 | bool hex_decode(char ch, unsigned char* val); |
| 102 | |
| 103 | // hex_encode shows the hex representation of binary data in ascii. |
| 104 | size_t hex_encode(char* buffer, size_t buflen, |
| 105 | const char* source, size_t srclen); |
| 106 | |
| 107 | // hex_encode, but separate each byte representation with a delimiter. |
| 108 | // |delimiter| == 0 means no delimiter |
| 109 | // If the buffer is too short, we return 0 |
| 110 | size_t hex_encode_with_delimiter(char* buffer, size_t buflen, |
| 111 | const char* source, size_t srclen, |
| 112 | char delimiter); |
| 113 | |
| 114 | // Helper functions for hex_encode. |
| 115 | std::string hex_encode(const char* source, size_t srclen); |
| 116 | std::string hex_encode_with_delimiter(const char* source, size_t srclen, |
| 117 | char delimiter); |
| 118 | |
| 119 | // hex_decode converts ascii hex to binary. |
| 120 | size_t hex_decode(char* buffer, size_t buflen, |
| 121 | const char* source, size_t srclen); |
| 122 | |
| 123 | // hex_decode, assuming that there is a delimiter between every byte |
| 124 | // pair. |
| 125 | // |delimiter| == 0 means no delimiter |
| 126 | // If the buffer is too short or the data is invalid, we return 0. |
| 127 | size_t hex_decode_with_delimiter(char* buffer, size_t buflen, |
| 128 | const char* source, size_t srclen, |
| 129 | char delimiter); |
| 130 | |
| 131 | // Helper functions for hex_decode. |
| 132 | size_t hex_decode(char* buffer, size_t buflen, const std::string& source); |
| 133 | size_t hex_decode_with_delimiter(char* buffer, size_t buflen, |
| 134 | const std::string& source, char delimiter); |
| 135 | |
| 136 | // Apply any suitable string transform (including the ones above) to an STL |
| 137 | // string. Stack-allocated temporary space is used for the transformation, |
| 138 | // so value and source may refer to the same string. |
| 139 | typedef size_t (*Transform)(char * buffer, size_t buflen, |
| 140 | const char * source, size_t srclen); |
| 141 | size_t transform(std::string& value, size_t maxlen, const std::string& source, |
| 142 | Transform t); |
| 143 | |
| 144 | // Return the result of applying transform t to source. |
| 145 | std::string s_transform(const std::string& source, Transform t); |
| 146 | |
| 147 | // Convenience wrappers. |
| 148 | inline std::string s_url_encode(const std::string& source) { |
| 149 | return s_transform(source, url_encode); |
| 150 | } |
| 151 | inline std::string s_url_decode(const std::string& source) { |
| 152 | return s_transform(source, url_decode); |
| 153 | } |
| 154 | |
| 155 | // Splits the source string into multiple fields separated by delimiter, |
| 156 | // with duplicates of delimiter creating empty fields. |
| 157 | size_t split(const std::string& source, char delimiter, |
| 158 | std::vector<std::string>* fields); |
| 159 | |
| 160 | // Splits the source string into multiple fields separated by delimiter, |
| 161 | // with duplicates of delimiter ignored. Trailing delimiter ignored. |
| 162 | size_t tokenize(const std::string& source, char delimiter, |
| 163 | std::vector<std::string>* fields); |
| 164 | |
| 165 | // Tokenize and append the tokens to fields. Return the new size of fields. |
| 166 | size_t tokenize_append(const std::string& source, char delimiter, |
| 167 | std::vector<std::string>* fields); |
| 168 | |
| 169 | // Splits the source string into multiple fields separated by delimiter, with |
| 170 | // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in |
| 171 | // between the start_mark and the end_mark is treated as a single field. Return |
| 172 | // the size of fields. For example, if source is "filename |
| 173 | // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and |
| 174 | // the start_mark and end_mark are '"', this method returns two fields: |
| 175 | // "filename" and "/Library/Application Support/media content.txt". |
| 176 | size_t tokenize(const std::string& source, char delimiter, char start_mark, |
| 177 | char end_mark, std::vector<std::string>* fields); |
| 178 | |
| 179 | // Safe sprintf to std::string |
| 180 | //void sprintf(std::string& value, size_t maxlen, const char * format, ...) |
| 181 | // PRINTF_FORMAT(3); |
| 182 | |
| 183 | // Convert arbitrary values to/from a string. |
| 184 | |
| 185 | template <class T> |
| 186 | static bool ToString(const T &t, std::string* s) { |
| 187 | ASSERT(NULL != s); |
| 188 | std::ostringstream oss; |
| 189 | oss << std::boolalpha << t; |
| 190 | *s = oss.str(); |
| 191 | return !oss.fail(); |
| 192 | } |
| 193 | |
| 194 | template <class T> |
| 195 | static bool FromString(const std::string& s, T* t) { |
| 196 | ASSERT(NULL != t); |
| 197 | std::istringstream iss(s); |
| 198 | iss >> std::boolalpha >> *t; |
| 199 | return !iss.fail(); |
| 200 | } |
| 201 | |
| 202 | // Inline versions of the string conversion routines. |
| 203 | |
| 204 | template<typename T> |
| 205 | static inline std::string ToString(const T& val) { |
| 206 | std::string str; ToString(val, &str); return str; |
| 207 | } |
| 208 | |
| 209 | template<typename T> |
| 210 | static inline T FromString(const std::string& str) { |
| 211 | T val; FromString(str, &val); return val; |
| 212 | } |
| 213 | |
| 214 | template<typename T> |
| 215 | static inline T FromString(const T& defaultValue, const std::string& str) { |
| 216 | T val(defaultValue); FromString(str, &val); return val; |
| 217 | } |
| 218 | |
| 219 | // simple function to strip out characters which shouldn't be |
| 220 | // used in filenames |
| 221 | char make_char_safe_for_filename(char c); |
| 222 | |
| 223 | ////////////////////////////////////////////////////////////////////// |
| 224 | |
| 225 | } // namespace talk_base |
| 226 | |
| 227 | #endif // TALK_BASE_STRINGENCODE_H__ |