Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 1 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>encoding</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css"> |
| 2 | .synopsis, .classsynopsis { |
| 3 | background: #eeeeee; |
| 4 | border: solid 1px #aaaaaa; |
| 5 | padding: 0.5em; |
| 6 | } |
| 7 | .programlisting { |
| 8 | background: #eeeeff; |
| 9 | border: solid 1px #aaaaff; |
| 10 | padding: 0.5em; |
| 11 | } |
| 12 | .variablelist { |
| 13 | padding: 4px; |
| 14 | margin-left: 3em; |
| 15 | } |
| 16 | .navigation { |
| 17 | background: #ffeeee; |
| 18 | border: solid 1px #ffaaaa; |
| 19 | margin-top: 0.5em; |
| 20 | margin-bottom: 0.5em; |
| 21 | } |
| 22 | .navigation a { |
| 23 | color: #770000; |
| 24 | } |
| 25 | .navigation a:visited { |
| 26 | color: #550000; |
| 27 | } |
| 28 | .navigation .title { |
| 29 | font-size: 200%; |
| 30 | } |
| 31 | </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-parserInternals.html" title="parserInternals"><link rel="next" href="libxml-hash.html" title="hash"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-parserInternals.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-hash.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-encoding"></a><div class="titlepage"></div><div class="refnamediv"><h2>encoding</h2><p>encoding — </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis"> |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 32 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 33 | |
| 34 | |
| 35 | enum <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a>; |
| 36 | int (<a href="libxml-encoding.html#xmlCharEncodingInputFunc">*xmlCharEncodingInputFunc</a>) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 37 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 38 | unsigned char *in, |
Daniel Veillard | edfb29b | 2000-03-14 19:59:05 +0000 | [diff] [blame] | 39 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 40 | int (<a href="libxml-encoding.html#xmlCharEncodingOutputFunc">*xmlCharEncodingOutputFunc</a>) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 41 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 42 | unsigned char *in, |
Daniel Veillard | edfb29b | 2000-03-14 19:59:05 +0000 | [diff] [blame] | 43 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 44 | struct <a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a>; |
| 45 | typedef <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a>; |
| 46 | void <a href="libxml-encoding.html#xmlInitCharEncodingHandlers">xmlInitCharEncodingHandlers</a> (void); |
| 47 | void <a href="libxml-encoding.html#xmlCleanupCharEncodingHandlers">xmlCleanupCharEncodingHandlers</a> (void); |
| 48 | void <a href="libxml-encoding.html#xmlRegisterCharEncodingHandler">xmlRegisterCharEncodingHandler</a> (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler); |
| 49 | <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlGetCharEncodingHandler">xmlGetCharEncodingHandler</a> |
| 50 | (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); |
| 51 | <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlFindCharEncodingHandler">xmlFindCharEncodingHandler</a> |
Daniel Veillard | edfb29b | 2000-03-14 19:59:05 +0000 | [diff] [blame] | 52 | (const char *name); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 53 | <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlNewCharEncodingHandler">xmlNewCharEncodingHandler</a> |
Daniel Veillard | 2ace195 | 2002-09-26 12:28:02 +0000 | [diff] [blame] | 54 | (const char *name, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 55 | <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, |
| 56 | <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output); |
| 57 | int <a href="libxml-encoding.html#xmlAddEncodingAlias">xmlAddEncodingAlias</a> (const char *name, |
Daniel Veillard | 3bff2b0 | 2000-10-01 20:33:47 +0000 | [diff] [blame] | 58 | const char *alias); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 59 | int <a href="libxml-encoding.html#xmlDelEncodingAlias">xmlDelEncodingAlias</a> (const char *alias); |
| 60 | const char* <a href="libxml-encoding.html#xmlGetEncodingAlias">xmlGetEncodingAlias</a> (const char *alias); |
| 61 | void <a href="libxml-encoding.html#xmlCleanupEncodingAliases">xmlCleanupEncodingAliases</a> (void); |
| 62 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlParseCharEncoding">xmlParseCharEncoding</a> (const char *name); |
| 63 | const char* <a href="libxml-encoding.html#xmlGetCharEncodingName">xmlGetCharEncodingName</a> (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); |
| 64 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlDetectCharEncoding">xmlDetectCharEncoding</a> (unsigned char *in, |
Daniel Veillard | 3bff2b0 | 2000-10-01 20:33:47 +0000 | [diff] [blame] | 65 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 66 | int <a href="libxml-encoding.html#xmlCharEncOutFunc">xmlCharEncOutFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 67 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 68 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); |
| 69 | int <a href="libxml-encoding.html#xmlCharEncInFunc">xmlCharEncInFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 70 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 71 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); |
| 72 | int <a href="libxml-encoding.html#xmlCharEncFirstLine">xmlCharEncFirstLine</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 73 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 74 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); |
| 75 | int <a href="libxml-encoding.html#xmlCharEncCloseFunc">xmlCharEncCloseFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler); |
| 76 | int <a href="libxml-encoding.html#UTF8Toisolat1">UTF8Toisolat1</a> (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 77 | int *outlen, |
| 78 | unsigned char *in, |
| 79 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 80 | int <a href="libxml-encoding.html#isolat1ToUTF8">isolat1ToUTF8</a> (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 81 | int *outlen, |
| 82 | unsigned char *in, |
| 83 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 84 | int <a href="libxml-encoding.html#xmlGetUTF8Char">xmlGetUTF8Char</a> (unsigned char *utf, |
Daniel Veillard | aec6356 | 2003-03-23 20:42:17 +0000 | [diff] [blame] | 85 | int *len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 86 | int <a href="libxml-encoding.html#xmlCheckUTF8">xmlCheckUTF8</a> (unsigned char *utf); |
| 87 | int <a href="libxml-encoding.html#xmlUTF8Strsize">xmlUTF8Strsize</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 88 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 89 | <a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strndup">xmlUTF8Strndup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 90 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 91 | <a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strpos">xmlUTF8Strpos</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 92 | int pos); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 93 | int <a href="libxml-encoding.html#xmlUTF8Strloc">xmlUTF8Strloc</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 94 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar); |
| 95 | <a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strsub">xmlUTF8Strsub</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 96 | int start, |
| 97 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 98 | int <a href="libxml-encoding.html#xmlUTF8Strlen">xmlUTF8Strlen</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf); |
| 99 | </pre></div><div class="refsect1" lang="en"><h2>Description</h2><p> |
| 100 | |
| 101 | </p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="xmlCharEncoding"></a>enum xmlCharEncoding</h3><pre class="programlisting">typedef enum { |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 102 | XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ |
| 103 | XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ |
| 104 | XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ |
| 105 | XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ |
| 106 | XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ |
| 107 | XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ |
| 108 | XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ |
| 109 | XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ |
| 110 | XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ |
| 111 | XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ |
| 112 | XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ |
| 113 | XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ |
| 114 | XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ |
| 115 | XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ |
| 116 | XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ |
| 117 | XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ |
| 118 | XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ |
| 119 | XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ |
| 120 | XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ |
| 121 | XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ |
| 122 | XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ |
| 123 | XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ |
Daniel Veillard | e46e20d | 2000-07-14 15:02:46 +0000 | [diff] [blame] | 124 | XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ |
| 125 | XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 126 | } xmlCharEncoding; |
| 127 | </pre><p> |
| 128 | Predefined values for some standard encodings. |
Daniel Veillard | 1927409 | 2002-03-25 16:48:03 +0000 | [diff] [blame] | 129 | Libxml don't do beforehand translation on UTF8, ISOLatinX. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 130 | It also support UTF16 (LE and BE) by default. |
| 131 | </p><p> |
| 132 | Anything else would have to be translated to UTF8 before being |
Daniel Veillard | e7ead2d | 2001-08-22 23:44:09 +0000 | [diff] [blame] | 133 | given to the parser itself. The BOM for UTF16 and the encoding |
| 134 | declaration are looked at and a converter is looked for at that |
| 135 | point. If not found the parser stops here as asked by the XML REC |
| 136 | Converter can be registered by the user using xmlRegisterCharEncodingHandler |
Daniel Veillard | cbaf399 | 2001-12-31 16:16:02 +0000 | [diff] [blame] | 137 | but the current form doesn't allow stateful transcoding (a serious |
Daniel Veillard | e7ead2d | 2001-08-22 23:44:09 +0000 | [diff] [blame] | 138 | problem agreed !). If iconv has been found it will be used |
| 139 | automatically and allow stateful transcoding, the simplest is then |
| 140 | to be sure to enable icon and to provide iconv libs for the encoding |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 141 | support needed.</p><p> |
| 142 | |
| 143 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingInputFunc"></a>xmlCharEncodingInputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingInputFunc) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 144 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 145 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 146 | int *inlen);</pre><p> |
| 147 | Take a block of chars in the original encoding and try to convert |
| 148 | it to an UTF-8 block of chars out.</p><p> |
| 149 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 150 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the UTF-8 result |
| 151 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 152 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of chars in the original encoding |
| 153 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
| 154 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2 |
| 155 | if the transcoding failed. |
| 156 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
| 157 | as the return value is positive, else unpredictiable. |
| 158 | The value of <i><tt>outlen</tt></i> after return is the number of octets consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 159 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingOutputFunc"></a>xmlCharEncodingOutputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingOutputFunc) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 160 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 161 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 162 | int *inlen);</pre><p> |
| 163 | Take a block of UTF-8 chars in and try to convert it to an other |
Daniel Veillard | e7ead2d | 2001-08-22 23:44:09 +0000 | [diff] [blame] | 164 | encoding. |
| 165 | Note: a first call designed to produce heading info is called with |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 166 | in = NULL. If stateful this should also initialize the encoder state.</p><p> |
| 167 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 168 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result |
| 169 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 170 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars |
| 171 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
| 172 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2 |
| 173 | if the transcoding failed. |
| 174 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
| 175 | as the return value is positive, else unpredictiable. |
| 176 | The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 177 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandler"></a>struct xmlCharEncodingHandler</h3><pre class="programlisting">struct xmlCharEncodingHandler { |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 178 | char *name; |
| 179 | xmlCharEncodingInputFunc input; |
| 180 | xmlCharEncodingOutputFunc output; |
| 181 | #ifdef LIBXML_ICONV_ENABLED |
| 182 | iconv_t iconv_in; |
| 183 | iconv_t iconv_out; |
| 184 | #endif /* LIBXML_ICONV_ENABLED */ |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 185 | }; |
| 186 | </pre><p> |
| 187 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 188 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandlerPtr"></a>xmlCharEncodingHandlerPtr</h3><pre class="programlisting">typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; |
| 189 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 190 | |
| 191 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlInitCharEncodingHandlers"></a>xmlInitCharEncodingHandlers ()</h3><pre class="programlisting">void xmlInitCharEncodingHandlers (void);</pre><p> |
| 192 | Initialize the char encoding support, it registers the default |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 193 | encoding supported. |
| 194 | NOTE: while public, this function usually doesn't need to be called |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 195 | in normal processing.</p><p> |
| 196 | |
| 197 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupCharEncodingHandlers"></a>xmlCleanupCharEncodingHandlers ()</h3><pre class="programlisting">void xmlCleanupCharEncodingHandlers (void);</pre><p> |
| 198 | Cleanup the memory allocated for the char encoding support, it |
| 199 | unregisters all the encoding handlers and the aliases.</p><p> |
| 200 | |
| 201 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlRegisterCharEncodingHandler"></a>xmlRegisterCharEncodingHandler ()</h3><pre class="programlisting">void xmlRegisterCharEncodingHandler (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);</pre><p> |
| 202 | Register the char encoding handler, surprising, isn't it ?</p><p> |
| 203 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 204 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> the xmlCharEncodingHandlerPtr handler block |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 205 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingHandler"></a>xmlGetCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlGetCharEncodingHandler |
| 206 | (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> |
| 207 | Search in the registered set the handler able to read/write that encoding.</p><p> |
| 208 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 209 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i> :</span></td><td> an xmlCharEncoding value. |
| 210 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 211 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlFindCharEncodingHandler"></a>xmlFindCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlFindCharEncodingHandler |
| 212 | (const char *name);</pre><p> |
| 213 | Search in the registered set the handler able to read/write that encoding.</p><p> |
| 214 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 215 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> a string describing the char encoding. |
| 216 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 217 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlNewCharEncodingHandler"></a>xmlNewCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlNewCharEncodingHandler |
Daniel Veillard | 2ace195 | 2002-09-26 12:28:02 +0000 | [diff] [blame] | 218 | (const char *name, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 219 | <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, |
| 220 | <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);</pre><p> |
| 221 | Create and registers an xmlCharEncodingHandler.</p><p> |
| 222 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 223 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> the encoding name, in UTF-8 format (ASCII actually) |
| 224 | </td></tr><tr><td><span class="term"><i><tt>input</tt></i> :</span></td><td> the xmlCharEncodingInputFunc to read that encoding |
| 225 | </td></tr><tr><td><span class="term"><i><tt>output</tt></i> :</span></td><td> the xmlCharEncodingOutputFunc to write that encoding |
| 226 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the xmlCharEncodingHandlerPtr created (or NULL in case of error). |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 227 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlAddEncodingAlias"></a>xmlAddEncodingAlias ()</h3><pre class="programlisting">int xmlAddEncodingAlias (const char *name, |
| 228 | const char *alias);</pre><p> |
| 229 | Registers and alias <i><tt>alias</tt></i> for an encoding named <i><tt>name</tt></i>. Existing alias |
| 230 | will be overwritten.</p><p> |
| 231 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 232 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually) |
| 233 | </td></tr><tr><td><span class="term"><i><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) |
| 234 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 235 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDelEncodingAlias"></a>xmlDelEncodingAlias ()</h3><pre class="programlisting">int xmlDelEncodingAlias (const char *alias);</pre><p> |
| 236 | Unregisters an encoding alias <i><tt>alias</tt></i></p><p> |
| 237 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 238 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) |
| 239 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 240 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetEncodingAlias"></a>xmlGetEncodingAlias ()</h3><pre class="programlisting">const char* xmlGetEncodingAlias (const char *alias);</pre><p> |
| 241 | Lookup an encoding name for the given alias.</p><p> |
| 242 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 243 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) |
| 244 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>NULL if not found the original name otherwise |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 245 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupEncodingAliases"></a>xmlCleanupEncodingAliases ()</h3><pre class="programlisting">void xmlCleanupEncodingAliases (void);</pre><p> |
| 246 | Unregisters all aliases</p><p> |
| 247 | |
| 248 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlParseCharEncoding"></a>xmlParseCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlParseCharEncoding (const char *name);</pre><p> |
| 249 | Compare the string to the known encoding schemes already known. Note |
Daniel Veillard | 3bff2b0 | 2000-10-01 20:33:47 +0000 | [diff] [blame] | 250 | that the comparison is case insensitive accordingly to the section |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 251 | [XML] 4.3.3 Character Encoding in Entities.</p><p> |
| 252 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 253 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually) |
| 254 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE |
| 255 | if not recognized. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 256 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingName"></a>xmlGetCharEncodingName ()</h3><pre class="programlisting">const char* xmlGetCharEncodingName (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> |
| 257 | The "canonical" name for XML encoding. |
| 258 | C.f. http://www.w3.org/TR/REC-xml<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="charencoding">charencoding</GTKDOCLINK> |
| 259 | Section 4.3.3 Character Encoding in Entities</p><p> |
| 260 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 261 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i> :</span></td><td> the encoding |
| 262 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the canonical name for the given encoding |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 263 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDetectCharEncoding"></a>xmlDetectCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlDetectCharEncoding (unsigned char *in, |
| 264 | int len);</pre><p> |
| 265 | Guess the encoding of the entity using the first bytes of the entity content |
| 266 | accordingly of the non-normative appendix F of the XML-1.0 recommendation.</p><p> |
| 267 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 268 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to the first bytes of the XML entity, must be at least |
| 269 | 4 bytes long. |
| 270 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> pointer to the length of the buffer |
| 271 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 272 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncOutFunc"></a>xmlCharEncOutFunc ()</h3><pre class="programlisting">int xmlCharEncOutFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 273 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 274 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> |
| 275 | Generic front-end for the encoding handler output function |
| 276 | a first call with <i><tt>in</tt></i> == NULL has to be made firs to initiate the |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 277 | output in case of non-stateless encoding needing to initiate their |
| 278 | state or the output (like the BOM in UTF16). |
| 279 | In case of UTF8 sequence conversion errors for the given encoder, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 280 | the content will be automatically remapped to a CharRef sequence.</p><p> |
| 281 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 282 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure |
| 283 | </td></tr><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output. |
| 284 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input |
| 285 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or |
| 286 | -1 general error |
| 287 | -2 if the transcoding fails (for *in is not valid utf8 string or |
| 288 | the result of transformation can't fit into the encoding we want), or |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 289 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncInFunc"></a>xmlCharEncInFunc ()</h3><pre class="programlisting">int xmlCharEncInFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 290 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 291 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> |
| 292 | Generic front-end for the encoding handler input function</p><p> |
| 293 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 294 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char encoding transformation data structure |
| 295 | </td></tr><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output. |
| 296 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input |
| 297 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or |
| 298 | -1 general error |
| 299 | -2 if the transcoding fails (for *in is not valid utf8 string or |
| 300 | the result of transformation can't fit into the encoding we want), or |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 301 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncFirstLine"></a>xmlCharEncFirstLine ()</h3><pre class="programlisting">int xmlCharEncFirstLine (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 302 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 303 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> |
| 304 | Front-end for the encoding handler input function, but handle only |
| 305 | the very first line, i.e. limit itself to 45 chars.</p><p> |
| 306 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 307 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure |
| 308 | </td></tr><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output. |
| 309 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input |
| 310 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or |
| 311 | -1 general error |
| 312 | -2 if the transcoding fails (for *in is not valid utf8 string or |
| 313 | the result of transformation can't fit into the encoding we want), or |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 314 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncCloseFunc"></a>xmlCharEncCloseFunc ()</h3><pre class="programlisting">int xmlCharEncCloseFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);</pre><p> |
| 315 | Generic front-end for encoding handler close function</p><p> |
| 316 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 317 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure |
| 318 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 in case of error |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 319 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8Toisolat1"></a>UTF8Toisolat1 ()</h3><pre class="programlisting">int UTF8Toisolat1 (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 320 | int *outlen, |
| 321 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 322 | int *inlen);</pre><p> |
| 323 | Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 |
| 324 | block of chars out.</p><p> |
| 325 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 326 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result |
| 327 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 328 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars |
| 329 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
| 330 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise |
| 331 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
| 332 | as the return value is positive, else unpredictable. |
| 333 | The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 334 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="isolat1ToUTF8"></a>isolat1ToUTF8 ()</h3><pre class="programlisting">int isolat1ToUTF8 (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 335 | int *outlen, |
| 336 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 337 | int *inlen);</pre><p> |
| 338 | Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 |
| 339 | block of chars out.</p><p> |
| 340 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 341 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result |
| 342 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 343 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of ISO Latin 1 chars |
| 344 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
| 345 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 otherwise |
| 346 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
| 347 | as the return value is positive, else unpredictable. |
| 348 | The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 349 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetUTF8Char"></a>xmlGetUTF8Char ()</h3><pre class="programlisting">int xmlGetUTF8Char (unsigned char *utf, |
| 350 | int *len);</pre><p> |
| 351 | Read one UTF8 Char from <i><tt>utf</tt></i></p><p> |
| 352 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 353 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
| 354 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> a pointer to <i><tt>bytes</tt></i> len |
| 355 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the char value or -1 in case of error and update <i><tt>len</tt></i> with the |
| 356 | number of bytes used |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 357 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCheckUTF8"></a>xmlCheckUTF8 ()</h3><pre class="programlisting">int xmlCheckUTF8 (unsigned char *utf);</pre><p> |
| 358 | Checks <i><tt>utf</tt></i> for being valid utf-8. <i><tt>utf</tt></i> is assumed to be |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 359 | null-terminated. This function is not super-strict, as it will |
| 360 | allow longer utf-8 sequences than necessary. Note that Java is |
| 361 | capable of producing these sequences if provoked. Also note, this |
Daniel Veillard | cbaf399 | 2001-12-31 16:16:02 +0000 | [diff] [blame] | 362 | routine checks for the 4-byte maximum size, but does not check for |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 363 | 0x10ffff maximum value.</p><p> |
| 364 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 365 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> Pointer to putative utf-8 encoded string. |
| 366 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> true if <i><tt>utf</tt></i> is valid. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 367 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsize"></a>xmlUTF8Strsize ()</h3><pre class="programlisting">int xmlUTF8Strsize (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 368 | int len);</pre><p> |
| 369 | storage size of an UTF8 string</p><p> |
| 370 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 371 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
| 372 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> the number of characters in the array |
| 373 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the storage size of |
| 374 | the first 'len' characters of ARRAY |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 375 | |
| 376 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strndup"></a>xmlUTF8Strndup ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strndup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 377 | int len);</pre><p> |
| 378 | a strndup for array of UTF8's</p><p> |
| 379 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 380 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> the input UTF8 * |
| 381 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> the len of <i><tt>utf</tt></i> (in chars) |
| 382 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a new UTF8 * or NULL |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 383 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strpos"></a>xmlUTF8Strpos ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strpos (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 384 | int pos);</pre><p> |
| 385 | a function to provide the equivalent of fetching a |
| 386 | character from a string array</p><p> |
| 387 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 388 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> the input UTF8 * |
| 389 | </td></tr><tr><td><span class="term"><i><tt>pos</tt></i> :</span></td><td> the position of the desired UTF8 char (in chars) |
| 390 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to the UTF8 character or NULL |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 391 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strloc"></a>xmlUTF8Strloc ()</h3><pre class="programlisting">int xmlUTF8Strloc (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 392 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);</pre><p> |
| 393 | a function to provide relative location of a UTF8 char</p><p> |
| 394 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 395 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> the input UTF8 * |
| 396 | </td></tr><tr><td><span class="term"><i><tt>utfchar</tt></i> :</span></td><td> the UTF8 character to be found |
| 397 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the relative character position of the desired char |
| 398 | or -1 if not found |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 399 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsub"></a>xmlUTF8Strsub ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strsub (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 400 | int start, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 401 | int len);</pre><p> |
| 402 | Note: positions are given in units of UTF-8 chars</p><p> |
| 403 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 404 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
| 405 | </td></tr><tr><td><span class="term"><i><tt>start</tt></i> :</span></td><td> relative pos of first char |
| 406 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> total number to copy |
| 407 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to a newly created string |
| 408 | or NULL if any problem |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 409 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strlen"></a>xmlUTF8Strlen ()</h3><pre class="programlisting">int xmlUTF8Strlen (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p> |
| 410 | compute the length of an UTF8 string, it doesn't do a full UTF8 |
| 411 | checking of the content of the string.</p><p> |
| 412 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 413 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
| 414 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of characters in the string or -1 in case of error |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 415 | </td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-parserInternals.html"><b><< parserInternals</b></a></td><td align="right"><a accesskey="n" href="libxml-hash.html"><b>hash >></b></a></td></tr></table></body></html> |