Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 1 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>encoding</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css"> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 2 | .synopsis, .classsynopsis { |
| 3 | background: #eeeeee; |
| 4 | border: solid 1px #aaaaaa; |
| 5 | padding: 0.5em; |
| 6 | } |
| 7 | .programlisting { |
| 8 | background: #eeeeff; |
| 9 | border: solid 1px #aaaaff; |
| 10 | padding: 0.5em; |
| 11 | } |
| 12 | .variablelist { |
| 13 | padding: 4px; |
| 14 | margin-left: 3em; |
| 15 | } |
| 16 | .navigation { |
| 17 | background: #ffeeee; |
| 18 | border: solid 1px #ffaaaa; |
| 19 | margin-top: 0.5em; |
| 20 | margin-bottom: 0.5em; |
| 21 | } |
| 22 | .navigation a { |
| 23 | color: #770000; |
| 24 | } |
| 25 | .navigation a:visited { |
| 26 | color: #550000; |
| 27 | } |
| 28 | .navigation .title { |
| 29 | font-size: 200%; |
| 30 | } |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 31 | </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-parserInternals.html" title="parserInternals"><link rel="next" href="libxml-hash.html" title="hash"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-parserInternals.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-hash.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-encoding"></a><div class="titlepage"></div><div class="refnamediv"><h2>encoding</h2><p>encoding — </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis"> |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 32 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 33 | |
| 34 | |
| 35 | enum <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a>; |
| 36 | int (<a href="libxml-encoding.html#xmlCharEncodingInputFunc">*xmlCharEncodingInputFunc</a>) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 37 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 38 | unsigned char *in, |
Daniel Veillard | edfb29b | 2000-03-14 19:59:05 +0000 | [diff] [blame] | 39 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 40 | int (<a href="libxml-encoding.html#xmlCharEncodingOutputFunc">*xmlCharEncodingOutputFunc</a>) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 41 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 42 | unsigned char *in, |
Daniel Veillard | edfb29b | 2000-03-14 19:59:05 +0000 | [diff] [blame] | 43 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 44 | struct <a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a>; |
| 45 | typedef <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a>; |
| 46 | void <a href="libxml-encoding.html#xmlInitCharEncodingHandlers">xmlInitCharEncodingHandlers</a> (void); |
| 47 | void <a href="libxml-encoding.html#xmlCleanupCharEncodingHandlers">xmlCleanupCharEncodingHandlers</a> (void); |
| 48 | void <a href="libxml-encoding.html#xmlRegisterCharEncodingHandler">xmlRegisterCharEncodingHandler</a> (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler); |
| 49 | <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlGetCharEncodingHandler">xmlGetCharEncodingHandler</a> |
| 50 | (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); |
| 51 | <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlFindCharEncodingHandler">xmlFindCharEncodingHandler</a> |
Daniel Veillard | edfb29b | 2000-03-14 19:59:05 +0000 | [diff] [blame] | 52 | (const char *name); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 53 | <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlNewCharEncodingHandler">xmlNewCharEncodingHandler</a> |
Daniel Veillard | 2ace195 | 2002-09-26 12:28:02 +0000 | [diff] [blame] | 54 | (const char *name, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 55 | <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, |
| 56 | <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output); |
| 57 | int <a href="libxml-encoding.html#xmlAddEncodingAlias">xmlAddEncodingAlias</a> (const char *name, |
Daniel Veillard | 3bff2b0 | 2000-10-01 20:33:47 +0000 | [diff] [blame] | 58 | const char *alias); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 59 | int <a href="libxml-encoding.html#xmlDelEncodingAlias">xmlDelEncodingAlias</a> (const char *alias); |
| 60 | const char* <a href="libxml-encoding.html#xmlGetEncodingAlias">xmlGetEncodingAlias</a> (const char *alias); |
| 61 | void <a href="libxml-encoding.html#xmlCleanupEncodingAliases">xmlCleanupEncodingAliases</a> (void); |
| 62 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlParseCharEncoding">xmlParseCharEncoding</a> (const char *name); |
| 63 | const char* <a href="libxml-encoding.html#xmlGetCharEncodingName">xmlGetCharEncodingName</a> (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); |
| 64 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlDetectCharEncoding">xmlDetectCharEncoding</a> (unsigned char *in, |
Daniel Veillard | 3bff2b0 | 2000-10-01 20:33:47 +0000 | [diff] [blame] | 65 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 66 | int <a href="libxml-encoding.html#xmlCharEncOutFunc">xmlCharEncOutFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 67 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 68 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); |
| 69 | int <a href="libxml-encoding.html#xmlCharEncInFunc">xmlCharEncInFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 70 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 71 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); |
| 72 | int <a href="libxml-encoding.html#xmlCharEncFirstLine">xmlCharEncFirstLine</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 73 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 74 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in); |
| 75 | int <a href="libxml-encoding.html#xmlCharEncCloseFunc">xmlCharEncCloseFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler); |
| 76 | int <a href="libxml-encoding.html#UTF8Toisolat1">UTF8Toisolat1</a> (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 77 | int *outlen, |
| 78 | unsigned char *in, |
| 79 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 80 | int <a href="libxml-encoding.html#isolat1ToUTF8">isolat1ToUTF8</a> (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 81 | int *outlen, |
| 82 | unsigned char *in, |
| 83 | int *inlen); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 84 | int <a href="libxml-encoding.html#xmlGetUTF8Char">xmlGetUTF8Char</a> (unsigned char *utf, |
Daniel Veillard | aec6356 | 2003-03-23 20:42:17 +0000 | [diff] [blame] | 85 | int *len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 86 | int <a href="libxml-encoding.html#xmlCheckUTF8">xmlCheckUTF8</a> (unsigned char *utf); |
| 87 | int <a href="libxml-encoding.html#xmlUTF8Strsize">xmlUTF8Strsize</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 88 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 89 | <a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strndup">xmlUTF8Strndup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 90 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 91 | <a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strpos">xmlUTF8Strpos</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 92 | int pos); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 93 | int <a href="libxml-encoding.html#xmlUTF8Strloc">xmlUTF8Strloc</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 94 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar); |
| 95 | <a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strsub">xmlUTF8Strsub</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 96 | int start, |
| 97 | int len); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 98 | int <a href="libxml-encoding.html#xmlUTF8Strlen">xmlUTF8Strlen</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf); |
Daniel Veillard | c758c22 | 2003-08-04 20:42:34 +0000 | [diff] [blame] | 99 | int <a href="libxml-encoding.html#xmlUTF8Size">xmlUTF8Size</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf); |
| 100 | int <a href="libxml-encoding.html#xmlUTF8Charcmp">xmlUTF8Charcmp</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1, |
| 101 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2); |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 102 | </pre></div><div class="refsect1" lang="en"><h2>Description</h2><p> |
| 103 | |
| 104 | </p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="xmlCharEncoding"></a>enum xmlCharEncoding</h3><pre class="programlisting">typedef enum { |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 105 | XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ |
| 106 | XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ |
| 107 | XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ |
| 108 | XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ |
| 109 | XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ |
| 110 | XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ |
| 111 | XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ |
| 112 | XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ |
| 113 | XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ |
| 114 | XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ |
| 115 | XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ |
| 116 | XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ |
| 117 | XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ |
| 118 | XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ |
| 119 | XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ |
| 120 | XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ |
| 121 | XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ |
| 122 | XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ |
| 123 | XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ |
| 124 | XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ |
| 125 | XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ |
| 126 | XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ |
Daniel Veillard | e46e20d | 2000-07-14 15:02:46 +0000 | [diff] [blame] | 127 | XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ |
| 128 | XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 129 | } xmlCharEncoding; |
| 130 | </pre><p> |
| 131 | Predefined values for some standard encodings. |
Daniel Veillard | 1927409 | 2002-03-25 16:48:03 +0000 | [diff] [blame] | 132 | Libxml don't do beforehand translation on UTF8, ISOLatinX. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 133 | It also support UTF16 (LE and BE) by default. |
| 134 | </p><p> |
| 135 | Anything else would have to be translated to UTF8 before being |
Daniel Veillard | e7ead2d | 2001-08-22 23:44:09 +0000 | [diff] [blame] | 136 | given to the parser itself. The BOM for UTF16 and the encoding |
| 137 | declaration are looked at and a converter is looked for at that |
| 138 | point. If not found the parser stops here as asked by the XML REC |
| 139 | Converter can be registered by the user using xmlRegisterCharEncodingHandler |
Daniel Veillard | cbaf399 | 2001-12-31 16:16:02 +0000 | [diff] [blame] | 140 | but the current form doesn't allow stateful transcoding (a serious |
Daniel Veillard | e7ead2d | 2001-08-22 23:44:09 +0000 | [diff] [blame] | 141 | problem agreed !). If iconv has been found it will be used |
| 142 | automatically and allow stateful transcoding, the simplest is then |
| 143 | to be sure to enable icon and to provide iconv libs for the encoding |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 144 | support needed.</p><p> |
| 145 | |
| 146 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingInputFunc"></a>xmlCharEncodingInputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingInputFunc) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 147 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 148 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 149 | int *inlen);</pre><p> |
| 150 | Take a block of chars in the original encoding and try to convert |
| 151 | it to an UTF-8 block of chars out.</p><p> |
| 152 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 153 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the UTF-8 result |
| 154 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 155 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of chars in the original encoding |
| 156 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 157 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2 |
| 158 | if the transcoding failed. |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 159 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 160 | as the return value is positive, else unpredictiable. |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 161 | The value of <i><tt>outlen</tt></i> after return is the number of octets consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 162 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingOutputFunc"></a>xmlCharEncodingOutputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingOutputFunc) (unsigned char *out, |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 163 | int *outlen, |
Daniel Veillard | aeea04f | 2000-01-25 19:27:27 +0000 | [diff] [blame] | 164 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 165 | int *inlen);</pre><p> |
| 166 | Take a block of UTF-8 chars in and try to convert it to an other |
Daniel Veillard | e7ead2d | 2001-08-22 23:44:09 +0000 | [diff] [blame] | 167 | encoding. |
| 168 | Note: a first call designed to produce heading info is called with |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 169 | in = NULL. If stateful this should also initialize the encoder state.</p><p> |
| 170 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 171 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result |
| 172 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 173 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars |
| 174 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 175 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2 |
| 176 | if the transcoding failed. |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 177 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 178 | as the return value is positive, else unpredictiable. |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 179 | The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 180 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandler"></a>struct xmlCharEncodingHandler</h3><pre class="programlisting">struct xmlCharEncodingHandler { |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 181 | char *name; |
| 182 | xmlCharEncodingInputFunc input; |
| 183 | xmlCharEncodingOutputFunc output; |
| 184 | #ifdef LIBXML_ICONV_ENABLED |
| 185 | iconv_t iconv_in; |
| 186 | iconv_t iconv_out; |
| 187 | #endif /* LIBXML_ICONV_ENABLED */ |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 188 | }; |
| 189 | </pre><p> |
| 190 | |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 191 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandlerPtr"></a>xmlCharEncodingHandlerPtr</h3><pre class="programlisting">typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; |
| 192 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 193 | |
| 194 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlInitCharEncodingHandlers"></a>xmlInitCharEncodingHandlers ()</h3><pre class="programlisting">void xmlInitCharEncodingHandlers (void);</pre><p> |
| 195 | Initialize the char encoding support, it registers the default |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 196 | encoding supported. |
| 197 | NOTE: while public, this function usually doesn't need to be called |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 198 | in normal processing.</p><p> |
| 199 | |
| 200 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupCharEncodingHandlers"></a>xmlCleanupCharEncodingHandlers ()</h3><pre class="programlisting">void xmlCleanupCharEncodingHandlers (void);</pre><p> |
| 201 | Cleanup the memory allocated for the char encoding support, it |
| 202 | unregisters all the encoding handlers and the aliases.</p><p> |
| 203 | |
| 204 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlRegisterCharEncodingHandler"></a>xmlRegisterCharEncodingHandler ()</h3><pre class="programlisting">void xmlRegisterCharEncodingHandler (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);</pre><p> |
| 205 | Register the char encoding handler, surprising, isn't it ?</p><p> |
| 206 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 207 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> the xmlCharEncodingHandlerPtr handler block |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 208 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingHandler"></a>xmlGetCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlGetCharEncodingHandler |
| 209 | (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> |
| 210 | Search in the registered set the handler able to read/write that encoding.</p><p> |
| 211 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 212 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i> :</span></td><td> an xmlCharEncoding value. |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 213 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 214 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlFindCharEncodingHandler"></a>xmlFindCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlFindCharEncodingHandler |
| 215 | (const char *name);</pre><p> |
| 216 | Search in the registered set the handler able to read/write that encoding.</p><p> |
| 217 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 218 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> a string describing the char encoding. |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 219 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 220 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlNewCharEncodingHandler"></a>xmlNewCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlNewCharEncodingHandler |
Daniel Veillard | 2ace195 | 2002-09-26 12:28:02 +0000 | [diff] [blame] | 221 | (const char *name, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 222 | <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input, |
| 223 | <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);</pre><p> |
| 224 | Create and registers an xmlCharEncodingHandler.</p><p> |
| 225 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 226 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> the encoding name, in UTF-8 format (ASCII actually) |
| 227 | </td></tr><tr><td><span class="term"><i><tt>input</tt></i> :</span></td><td> the xmlCharEncodingInputFunc to read that encoding |
| 228 | </td></tr><tr><td><span class="term"><i><tt>output</tt></i> :</span></td><td> the xmlCharEncodingOutputFunc to write that encoding |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 229 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the xmlCharEncodingHandlerPtr created (or NULL in case of error). |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 230 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlAddEncodingAlias"></a>xmlAddEncodingAlias ()</h3><pre class="programlisting">int xmlAddEncodingAlias (const char *name, |
| 231 | const char *alias);</pre><p> |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 232 | Registers and alias <i><tt>alias</tt></i> for an encoding named <i><tt>name</tt></i>. Existing alias |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 233 | will be overwritten.</p><p> |
| 234 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 235 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually) |
| 236 | </td></tr><tr><td><span class="term"><i><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 237 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 238 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDelEncodingAlias"></a>xmlDelEncodingAlias ()</h3><pre class="programlisting">int xmlDelEncodingAlias (const char *alias);</pre><p> |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 239 | Unregisters an encoding alias <i><tt>alias</tt></i></p><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 240 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 241 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 242 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 243 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetEncodingAlias"></a>xmlGetEncodingAlias ()</h3><pre class="programlisting">const char* xmlGetEncodingAlias (const char *alias);</pre><p> |
| 244 | Lookup an encoding name for the given alias.</p><p> |
| 245 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 246 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually) |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 247 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>NULL if not found the original name otherwise |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 248 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupEncodingAliases"></a>xmlCleanupEncodingAliases ()</h3><pre class="programlisting">void xmlCleanupEncodingAliases (void);</pre><p> |
| 249 | Unregisters all aliases</p><p> |
| 250 | |
| 251 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlParseCharEncoding"></a>xmlParseCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlParseCharEncoding (const char *name);</pre><p> |
| 252 | Compare the string to the known encoding schemes already known. Note |
Daniel Veillard | 3bff2b0 | 2000-10-01 20:33:47 +0000 | [diff] [blame] | 253 | that the comparison is case insensitive accordingly to the section |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 254 | [XML] 4.3.3 Character Encoding in Entities.</p><p> |
| 255 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 256 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually) |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 257 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE |
| 258 | if not recognized. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 259 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingName"></a>xmlGetCharEncodingName ()</h3><pre class="programlisting">const char* xmlGetCharEncodingName (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> |
Daniel Veillard | cfba2fe | 2003-08-15 00:33:43 +0000 | [diff] [blame] | 260 | The "canonical" name for XML encoding. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 261 | C.f. http://www.w3.org/TR/REC-xml<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="charencoding">charencoding</GTKDOCLINK> |
| 262 | Section 4.3.3 Character Encoding in Entities</p><p> |
| 263 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 264 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>enc</tt></i> :</span></td><td> the encoding |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 265 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the canonical name for the given encoding |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 266 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDetectCharEncoding"></a>xmlDetectCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlDetectCharEncoding (unsigned char *in, |
| 267 | int len);</pre><p> |
| 268 | Guess the encoding of the entity using the first bytes of the entity content |
| 269 | accordingly of the non-normative appendix F of the XML-1.0 recommendation.</p><p> |
| 270 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 271 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to the first bytes of the XML entity, must be at least |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 272 | 4 bytes long. |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 273 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> pointer to the length of the buffer |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 274 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 275 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncOutFunc"></a>xmlCharEncOutFunc ()</h3><pre class="programlisting">int xmlCharEncOutFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 276 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 277 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> |
| 278 | Generic front-end for the encoding handler output function |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 279 | a first call with <i><tt>in</tt></i> == NULL has to be made firs to initiate the |
Daniel Veillard | 3f6f7f6 | 2000-06-30 17:58:25 +0000 | [diff] [blame] | 280 | output in case of non-stateless encoding needing to initiate their |
| 281 | state or the output (like the BOM in UTF16). |
| 282 | In case of UTF8 sequence conversion errors for the given encoder, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 283 | the content will be automatically remapped to a CharRef sequence.</p><p> |
| 284 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 285 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure |
| 286 | </td></tr><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output. |
| 287 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 288 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or |
| 289 | -1 general error |
| 290 | -2 if the transcoding fails (for *in is not valid utf8 string or |
| 291 | the result of transformation can't fit into the encoding we want), or |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 292 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncInFunc"></a>xmlCharEncInFunc ()</h3><pre class="programlisting">int xmlCharEncInFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 293 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 294 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> |
| 295 | Generic front-end for the encoding handler input function</p><p> |
| 296 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 297 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char encoding transformation data structure |
| 298 | </td></tr><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output. |
| 299 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 300 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or |
| 301 | -1 general error |
| 302 | -2 if the transcoding fails (for *in is not valid utf8 string or |
| 303 | the result of transformation can't fit into the encoding we want), or |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 304 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncFirstLine"></a>xmlCharEncFirstLine ()</h3><pre class="programlisting">int xmlCharEncFirstLine (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler, |
| 305 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out, |
| 306 | <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p> |
| 307 | Front-end for the encoding handler input function, but handle only |
| 308 | the very first line, i.e. limit itself to 45 chars.</p><p> |
| 309 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 310 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure |
| 311 | </td></tr><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output. |
| 312 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 313 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or |
| 314 | -1 general error |
| 315 | -2 if the transcoding fails (for *in is not valid utf8 string or |
| 316 | the result of transformation can't fit into the encoding we want), or |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 317 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncCloseFunc"></a>xmlCharEncCloseFunc ()</h3><pre class="programlisting">int xmlCharEncCloseFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);</pre><p> |
| 318 | Generic front-end for encoding handler close function</p><p> |
| 319 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 320 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 321 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 in case of error |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 322 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8Toisolat1"></a>UTF8Toisolat1 ()</h3><pre class="programlisting">int UTF8Toisolat1 (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 323 | int *outlen, |
| 324 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 325 | int *inlen);</pre><p> |
| 326 | Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 |
| 327 | block of chars out.</p><p> |
| 328 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 329 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result |
| 330 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 331 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars |
| 332 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 333 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 334 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 335 | as the return value is positive, else unpredictable. |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 336 | The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 337 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="isolat1ToUTF8"></a>isolat1ToUTF8 ()</h3><pre class="programlisting">int isolat1ToUTF8 (unsigned char *out, |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 338 | int *outlen, |
| 339 | unsigned char *in, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 340 | int *inlen);</pre><p> |
| 341 | Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 |
| 342 | block of chars out.</p><p> |
| 343 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 344 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result |
| 345 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> the length of <i><tt>out</tt></i> |
| 346 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> a pointer to an array of ISO Latin 1 chars |
| 347 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> the length of <i><tt>in</tt></i> |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 348 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 otherwise |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 349 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 350 | as the return value is positive, else unpredictable. |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 351 | The value of <i><tt>outlen</tt></i> after return is the number of ocetes consumed. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 352 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetUTF8Char"></a>xmlGetUTF8Char ()</h3><pre class="programlisting">int xmlGetUTF8Char (unsigned char *utf, |
| 353 | int *len);</pre><p> |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 354 | Read one UTF8 Char from <i><tt>utf</tt></i></p><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 355 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 356 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
| 357 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> a pointer to <i><tt>bytes</tt></i> len |
| 358 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the char value or -1 in case of error and update <i><tt>len</tt></i> with the |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 359 | number of bytes used |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 360 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCheckUTF8"></a>xmlCheckUTF8 ()</h3><pre class="programlisting">int xmlCheckUTF8 (unsigned char *utf);</pre><p> |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 361 | Checks <i><tt>utf</tt></i> for being valid utf-8. <i><tt>utf</tt></i> is assumed to be |
Daniel Veillard | a41123c | 2001-04-22 19:31:20 +0000 | [diff] [blame] | 362 | null-terminated. This function is not super-strict, as it will |
| 363 | allow longer utf-8 sequences than necessary. Note that Java is |
| 364 | capable of producing these sequences if provoked. Also note, this |
Daniel Veillard | cbaf399 | 2001-12-31 16:16:02 +0000 | [diff] [blame] | 365 | routine checks for the 4-byte maximum size, but does not check for |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 366 | 0x10ffff maximum value.</p><p> |
| 367 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 368 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> Pointer to putative utf-8 encoded string. |
| 369 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> true if <i><tt>utf</tt></i> is valid. |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 370 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsize"></a>xmlUTF8Strsize ()</h3><pre class="programlisting">int xmlUTF8Strsize (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 371 | int len);</pre><p> |
| 372 | storage size of an UTF8 string</p><p> |
| 373 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 374 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
| 375 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> the number of characters in the array |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 376 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the storage size of |
| 377 | the first 'len' characters of ARRAY |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 378 | |
| 379 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strndup"></a>xmlUTF8Strndup ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strndup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 380 | int len);</pre><p> |
| 381 | a strndup for array of UTF8's</p><p> |
| 382 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 383 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> the input UTF8 * |
| 384 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> the len of <i><tt>utf</tt></i> (in chars) |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 385 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a new UTF8 * or NULL |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 386 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strpos"></a>xmlUTF8Strpos ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strpos (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 387 | int pos);</pre><p> |
| 388 | a function to provide the equivalent of fetching a |
| 389 | character from a string array</p><p> |
| 390 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 391 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> the input UTF8 * |
| 392 | </td></tr><tr><td><span class="term"><i><tt>pos</tt></i> :</span></td><td> the position of the desired UTF8 char (in chars) |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 393 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to the UTF8 character or NULL |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 394 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strloc"></a>xmlUTF8Strloc ()</h3><pre class="programlisting">int xmlUTF8Strloc (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
| 395 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);</pre><p> |
| 396 | a function to provide relative location of a UTF8 char</p><p> |
| 397 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 398 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> the input UTF8 * |
| 399 | </td></tr><tr><td><span class="term"><i><tt>utfchar</tt></i> :</span></td><td> the UTF8 character to be found |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 400 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the relative character position of the desired char |
| 401 | or -1 if not found |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 402 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsub"></a>xmlUTF8Strsub ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strsub (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf, |
Daniel Veillard | 4ec885a | 2001-06-17 10:31:07 +0000 | [diff] [blame] | 403 | int start, |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 404 | int len);</pre><p> |
| 405 | Note: positions are given in units of UTF-8 chars</p><p> |
| 406 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 407 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
| 408 | </td></tr><tr><td><span class="term"><i><tt>start</tt></i> :</span></td><td> relative pos of first char |
| 409 | </td></tr><tr><td><span class="term"><i><tt>len</tt></i> :</span></td><td> total number to copy |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 410 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to a newly created string |
| 411 | or NULL if any problem |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 412 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strlen"></a>xmlUTF8Strlen ()</h3><pre class="programlisting">int xmlUTF8Strlen (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p> |
| 413 | compute the length of an UTF8 string, it doesn't do a full UTF8 |
| 414 | checking of the content of the string.</p><p> |
| 415 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 416 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 417 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of characters in the string or -1 in case of error |
Daniel Veillard | c758c22 | 2003-08-04 20:42:34 +0000 | [diff] [blame] | 418 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Size"></a>xmlUTF8Size ()</h3><pre class="programlisting">int xmlUTF8Size (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p> |
William M. Brack | c6e0755 | 2003-08-16 12:44:47 +0000 | [diff] [blame] | 419 | calulates the internal size of a UTF8 character</p><p> |
Daniel Veillard | c758c22 | 2003-08-04 20:42:34 +0000 | [diff] [blame] | 420 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 421 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf</tt></i> :</span></td><td> pointer to the UTF8 character |
Daniel Veillard | c758c22 | 2003-08-04 20:42:34 +0000 | [diff] [blame] | 422 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the numbers of bytes in the character, -1 on format error |
| 423 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Charcmp"></a>xmlUTF8Charcmp ()</h3><pre class="programlisting">int xmlUTF8Charcmp (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1, |
| 424 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2);</pre><p> |
William M. Brack | c6e0755 | 2003-08-16 12:44:47 +0000 | [diff] [blame] | 425 | compares the two UCS4 values</p><p> |
Daniel Veillard | c758c22 | 2003-08-04 20:42:34 +0000 | [diff] [blame] | 426 | |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 427 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>utf1</tt></i> :</span></td><td> pointer to first UTF8 char |
| 428 | </td></tr><tr><td><span class="term"><i><tt>utf2</tt></i> :</span></td><td> pointer to second UTF8 char |
William M. Brack | c6e0755 | 2003-08-16 12:44:47 +0000 | [diff] [blame] | 429 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>result of the compare as with xmlStrncmp |
Daniel Veillard | 2fdbd32 | 2003-08-18 12:15:38 +0000 | [diff] [blame] | 430 | </td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-parserInternals.html"><b><< parserInternals</b></a></td><td align="right"><a accesskey="n" href="libxml-hash.html"><b>hash >></b></a></td></tr></table></body></html> |