blob: 7031a726877960a8b2dcd73947b090ac4746848c [file] [log] [blame]
William M. Brackc6e07552003-08-16 12:44:47 +00001<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>encoding</title><meta name="generator" content="DocBook XSL Stylesheets V1.60.1"><meta xmlns="http://www.w3.org/TR/xhtml1/transitional" name="generator" content="GTK-Doc V1.0 (XML mode)"></meta><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css">
Daniel Veillardd4330462003-04-29 12:40:16 +00002 .synopsis, .classsynopsis {
3 background: #eeeeee;
4 border: solid 1px #aaaaaa;
5 padding: 0.5em;
6 }
7 .programlisting {
8 background: #eeeeff;
9 border: solid 1px #aaaaff;
10 padding: 0.5em;
11 }
12 .variablelist {
13 padding: 4px;
14 margin-left: 3em;
15 }
16 .navigation {
17 background: #ffeeee;
18 border: solid 1px #ffaaaa;
19 margin-top: 0.5em;
20 margin-bottom: 0.5em;
21 }
22 .navigation a {
23 color: #770000;
24 }
25 .navigation a:visited {
26 color: #550000;
27 }
28 .navigation .title {
29 font-size: 200%;
30 }
William M. Brackc6e07552003-08-16 12:44:47 +000031 </style><link rel="home" href="index.html" title="[Insert name here] Reference Manual"><link rel="up" href="ch01.html" title="[Insert title here]"><link rel="previous" href="libxml-wsockcompat.html" title="wsockcompat"><link rel="next" href="libxml-xmlregexp.html" title="xmlregexp"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-wsockcompat.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="ch01.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">[Insert name here] Reference Manual</th><td><a accesskey="n" href="libxml-xmlregexp.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-encoding"></a><div class="titlepage"><div></div><div></div></div><div class="refnamediv"><h2><span class="refentrytitle">encoding</span></h2><p>encoding &#8212; </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis">
Daniel Veillardaeea04f2000-01-25 19:27:27 +000032
Daniel Veillardd4330462003-04-29 12:40:16 +000033
34
35enum <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a>;
36int (<a href="libxml-encoding.html#xmlCharEncodingInputFunc">*xmlCharEncodingInputFunc</a>) (unsigned char *out,
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000037 int *outlen,
Daniel Veillardaeea04f2000-01-25 19:27:27 +000038 unsigned char *in,
Daniel Veillardedfb29b2000-03-14 19:59:05 +000039 int *inlen);
Daniel Veillardd4330462003-04-29 12:40:16 +000040int (<a href="libxml-encoding.html#xmlCharEncodingOutputFunc">*xmlCharEncodingOutputFunc</a>) (unsigned char *out,
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000041 int *outlen,
Daniel Veillardaeea04f2000-01-25 19:27:27 +000042 unsigned char *in,
Daniel Veillardedfb29b2000-03-14 19:59:05 +000043 int *inlen);
Daniel Veillardd4330462003-04-29 12:40:16 +000044struct <a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a>;
45typedef <a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a>;
46void <a href="libxml-encoding.html#xmlInitCharEncodingHandlers">xmlInitCharEncodingHandlers</a> (void);
47void <a href="libxml-encoding.html#xmlCleanupCharEncodingHandlers">xmlCleanupCharEncodingHandlers</a> (void);
48void <a href="libxml-encoding.html#xmlRegisterCharEncodingHandler">xmlRegisterCharEncodingHandler</a> (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);
49<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlGetCharEncodingHandler">xmlGetCharEncodingHandler</a>
50 (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
51<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlFindCharEncodingHandler">xmlFindCharEncodingHandler</a>
Daniel Veillardedfb29b2000-03-14 19:59:05 +000052 (const char *name);
Daniel Veillardd4330462003-04-29 12:40:16 +000053<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> <a href="libxml-encoding.html#xmlNewCharEncodingHandler">xmlNewCharEncodingHandler</a>
Daniel Veillard2ace1952002-09-26 12:28:02 +000054 (const char *name,
Daniel Veillardd4330462003-04-29 12:40:16 +000055 <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input,
56 <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);
57int <a href="libxml-encoding.html#xmlAddEncodingAlias">xmlAddEncodingAlias</a> (const char *name,
Daniel Veillard3bff2b02000-10-01 20:33:47 +000058 const char *alias);
Daniel Veillardd4330462003-04-29 12:40:16 +000059int <a href="libxml-encoding.html#xmlDelEncodingAlias">xmlDelEncodingAlias</a> (const char *alias);
60const char* <a href="libxml-encoding.html#xmlGetEncodingAlias">xmlGetEncodingAlias</a> (const char *alias);
61void <a href="libxml-encoding.html#xmlCleanupEncodingAliases">xmlCleanupEncodingAliases</a> (void);
62<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlParseCharEncoding">xmlParseCharEncoding</a> (const char *name);
63const char* <a href="libxml-encoding.html#xmlGetCharEncodingName">xmlGetCharEncodingName</a> (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
64<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> <a href="libxml-encoding.html#xmlDetectCharEncoding">xmlDetectCharEncoding</a> (unsigned char *in,
Daniel Veillard3bff2b02000-10-01 20:33:47 +000065 int len);
Daniel Veillardd4330462003-04-29 12:40:16 +000066int <a href="libxml-encoding.html#xmlCharEncOutFunc">xmlCharEncOutFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
67 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
68 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);
69int <a href="libxml-encoding.html#xmlCharEncInFunc">xmlCharEncInFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
70 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
71 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);
72int <a href="libxml-encoding.html#xmlCharEncFirstLine">xmlCharEncFirstLine</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
73 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
74 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);
75int <a href="libxml-encoding.html#xmlCharEncCloseFunc">xmlCharEncCloseFunc</a> (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);
76int <a href="libxml-encoding.html#UTF8Toisolat1">UTF8Toisolat1</a> (unsigned char *out,
Daniel Veillarda41123c2001-04-22 19:31:20 +000077 int *outlen,
78 unsigned char *in,
79 int *inlen);
Daniel Veillardd4330462003-04-29 12:40:16 +000080int <a href="libxml-encoding.html#isolat1ToUTF8">isolat1ToUTF8</a> (unsigned char *out,
Daniel Veillarda41123c2001-04-22 19:31:20 +000081 int *outlen,
82 unsigned char *in,
83 int *inlen);
Daniel Veillardd4330462003-04-29 12:40:16 +000084int <a href="libxml-encoding.html#xmlGetUTF8Char">xmlGetUTF8Char</a> (unsigned char *utf,
Daniel Veillardaec63562003-03-23 20:42:17 +000085 int *len);
Daniel Veillardd4330462003-04-29 12:40:16 +000086int <a href="libxml-encoding.html#xmlCheckUTF8">xmlCheckUTF8</a> (unsigned char *utf);
87int <a href="libxml-encoding.html#xmlUTF8Strsize">xmlUTF8Strsize</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
Daniel Veillard4ec885a2001-06-17 10:31:07 +000088 int len);
Daniel Veillardd4330462003-04-29 12:40:16 +000089<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strndup">xmlUTF8Strndup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
Daniel Veillard4ec885a2001-06-17 10:31:07 +000090 int len);
Daniel Veillardd4330462003-04-29 12:40:16 +000091<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strpos">xmlUTF8Strpos</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
Daniel Veillard4ec885a2001-06-17 10:31:07 +000092 int pos);
Daniel Veillardd4330462003-04-29 12:40:16 +000093int <a href="libxml-encoding.html#xmlUTF8Strloc">xmlUTF8Strloc</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
94 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);
95<a href="libxml-tree.html#xmlChar">xmlChar</a>* <a href="libxml-encoding.html#xmlUTF8Strsub">xmlUTF8Strsub</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
Daniel Veillard4ec885a2001-06-17 10:31:07 +000096 int start,
97 int len);
Daniel Veillardd4330462003-04-29 12:40:16 +000098int <a href="libxml-encoding.html#xmlUTF8Strlen">xmlUTF8Strlen</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);
Daniel Veillardc758c222003-08-04 20:42:34 +000099int <a href="libxml-encoding.html#xmlUTF8Size">xmlUTF8Size</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);
100int <a href="libxml-encoding.html#xmlUTF8Charcmp">xmlUTF8Charcmp</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1,
101 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2);
Daniel Veillardd4330462003-04-29 12:40:16 +0000102</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p>
103
104</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="xmlCharEncoding"></a>enum xmlCharEncoding</h3><pre class="programlisting">typedef enum {
Daniel Veillardaeea04f2000-01-25 19:27:27 +0000105 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
106 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */
107 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */
108 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */
109 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */
110 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */
111 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */
112 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */
113 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
114 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
115 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */
116 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */
117 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */
118 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */
119 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */
120 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */
121 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */
122 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */
123 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */
124 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */
125 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */
126 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
Daniel Veillarde46e20d2000-07-14 15:02:46 +0000127 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */
128 XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */
Daniel Veillardd4330462003-04-29 12:40:16 +0000129} xmlCharEncoding;
130</pre><p>
131Predefined values for some standard encodings.
Daniel Veillard19274092002-03-25 16:48:03 +0000132Libxml don't do beforehand translation on UTF8, ISOLatinX.
Daniel Veillardd4330462003-04-29 12:40:16 +0000133It also support UTF16 (LE and BE) by default.
134</p><p>
135Anything else would have to be translated to UTF8 before being
Daniel Veillarde7ead2d2001-08-22 23:44:09 +0000136given to the parser itself. The BOM for UTF16 and the encoding
137declaration are looked at and a converter is looked for at that
138point. If not found the parser stops here as asked by the XML REC
139Converter can be registered by the user using xmlRegisterCharEncodingHandler
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000140but the current form doesn't allow stateful transcoding (a serious
Daniel Veillarde7ead2d2001-08-22 23:44:09 +0000141problem agreed !). If iconv has been found it will be used
142automatically and allow stateful transcoding, the simplest is then
143to be sure to enable icon and to provide iconv libs for the encoding
Daniel Veillardd4330462003-04-29 12:40:16 +0000144support needed.</p><p>
145
146</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingInputFunc"></a>xmlCharEncodingInputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingInputFunc) (unsigned char *out,
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000147 int *outlen,
Daniel Veillardaeea04f2000-01-25 19:27:27 +0000148 unsigned char *in,
Daniel Veillardd4330462003-04-29 12:40:16 +0000149 int *inlen);</pre><p>
150Take a block of chars in the original encoding and try to convert
151it to an UTF-8 block of chars out.</p><p>
152
William M. Brackc6e07552003-08-16 12:44:47 +0000153</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the UTF-8 result
154</td></tr><tr><td><span class="term"><i class="parameter"><tt>outlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>out</tt></i>
155</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> a pointer to an array of chars in the original encoding
156</td></tr><tr><td><span class="term"><i class="parameter"><tt>inlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>in</tt></i>
Daniel Veillardd7cec922003-06-13 12:30:10 +0000157</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2
158 if the transcoding failed.
William M. Brackc6e07552003-08-16 12:44:47 +0000159The value of <i class="parameter"><tt>inlen</tt></i> after return is the number of octets consumed
Daniel Veillardd7cec922003-06-13 12:30:10 +0000160 as the return value is positive, else unpredictiable.
William M. Brackc6e07552003-08-16 12:44:47 +0000161The value of <i class="parameter"><tt>outlen</tt></i> after return is the number of octets consumed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000162</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingOutputFunc"></a>xmlCharEncodingOutputFunc ()</h3><pre class="programlisting">int (*xmlCharEncodingOutputFunc) (unsigned char *out,
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000163 int *outlen,
Daniel Veillardaeea04f2000-01-25 19:27:27 +0000164 unsigned char *in,
Daniel Veillardd4330462003-04-29 12:40:16 +0000165 int *inlen);</pre><p>
166Take a block of UTF-8 chars in and try to convert it to an other
Daniel Veillarde7ead2d2001-08-22 23:44:09 +0000167encoding.
168Note: a first call designed to produce heading info is called with
Daniel Veillardd4330462003-04-29 12:40:16 +0000169in = NULL. If stateful this should also initialize the encoder state.</p><p>
170
William M. Brackc6e07552003-08-16 12:44:47 +0000171</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result
172</td></tr><tr><td><span class="term"><i class="parameter"><tt>outlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>out</tt></i>
173</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars
174</td></tr><tr><td><span class="term"><i class="parameter"><tt>inlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>in</tt></i>
Daniel Veillardd7cec922003-06-13 12:30:10 +0000175</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written, or -1 by lack of space, or -2
176 if the transcoding failed.
William M. Brackc6e07552003-08-16 12:44:47 +0000177The value of <i class="parameter"><tt>inlen</tt></i> after return is the number of octets consumed
Daniel Veillardd7cec922003-06-13 12:30:10 +0000178 as the return value is positive, else unpredictiable.
William M. Brackc6e07552003-08-16 12:44:47 +0000179The value of <i class="parameter"><tt>outlen</tt></i> after return is the number of ocetes consumed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000180</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandler"></a>struct xmlCharEncodingHandler</h3><pre class="programlisting">struct xmlCharEncodingHandler {
William M. Brackc6e07552003-08-16 12:44:47 +0000181
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000182 char *name;
183 xmlCharEncodingInputFunc input;
184 xmlCharEncodingOutputFunc output;
185#ifdef LIBXML_ICONV_ENABLED
186 iconv_t iconv_in;
187 iconv_t iconv_out;
188#endif /* LIBXML_ICONV_ENABLED */
Daniel Veillardd4330462003-04-29 12:40:16 +0000189};
190</pre><p>
191
Daniel Veillardd7cec922003-06-13 12:30:10 +0000192</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncodingHandlerPtr"></a>xmlCharEncodingHandlerPtr</h3><pre class="programlisting">typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
193</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000194
195</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlInitCharEncodingHandlers"></a>xmlInitCharEncodingHandlers ()</h3><pre class="programlisting">void xmlInitCharEncodingHandlers (void);</pre><p>
196Initialize the char encoding support, it registers the default
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000197encoding supported.
198NOTE: while public, this function usually doesn't need to be called
Daniel Veillardd4330462003-04-29 12:40:16 +0000199 in normal processing.</p><p>
200
201</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupCharEncodingHandlers"></a>xmlCleanupCharEncodingHandlers ()</h3><pre class="programlisting">void xmlCleanupCharEncodingHandlers (void);</pre><p>
202Cleanup the memory allocated for the char encoding support, it
203unregisters all the encoding handlers and the aliases.</p><p>
204
205</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlRegisterCharEncodingHandler"></a>xmlRegisterCharEncodingHandler ()</h3><pre class="programlisting">void xmlRegisterCharEncodingHandler (<a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> handler);</pre><p>
206Register the char encoding handler, surprising, isn't it ?</p><p>
207
William M. Brackc6e07552003-08-16 12:44:47 +0000208</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>handler</tt></i> :</span></td><td> the xmlCharEncodingHandlerPtr handler block
Daniel Veillardd4330462003-04-29 12:40:16 +0000209</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingHandler"></a>xmlGetCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlGetCharEncodingHandler
210 (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
211Search in the registered set the handler able to read/write that encoding.</p><p>
212
William M. Brackc6e07552003-08-16 12:44:47 +0000213</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>enc</tt></i> :</span></td><td> an xmlCharEncoding value.
Daniel Veillardd7cec922003-06-13 12:30:10 +0000214</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found
Daniel Veillardd4330462003-04-29 12:40:16 +0000215</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlFindCharEncodingHandler"></a>xmlFindCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlFindCharEncodingHandler
216 (const char *name);</pre><p>
217Search in the registered set the handler able to read/write that encoding.</p><p>
218
William M. Brackc6e07552003-08-16 12:44:47 +0000219</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>name</tt></i> :</span></td><td> a string describing the char encoding.
Daniel Veillardd7cec922003-06-13 12:30:10 +0000220</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the handler or NULL if not found
Daniel Veillardd4330462003-04-29 12:40:16 +0000221</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlNewCharEncodingHandler"></a>xmlNewCharEncodingHandler ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncodingHandlerPtr">xmlCharEncodingHandlerPtr</a> xmlNewCharEncodingHandler
Daniel Veillard2ace1952002-09-26 12:28:02 +0000222 (const char *name,
Daniel Veillardd4330462003-04-29 12:40:16 +0000223 <a href="libxml-encoding.html#xmlCharEncodingInputFunc">xmlCharEncodingInputFunc</a> input,
224 <a href="libxml-encoding.html#xmlCharEncodingOutputFunc">xmlCharEncodingOutputFunc</a> output);</pre><p>
225Create and registers an xmlCharEncodingHandler.</p><p>
226
William M. Brackc6e07552003-08-16 12:44:47 +0000227</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>name</tt></i> :</span></td><td> the encoding name, in UTF-8 format (ASCII actually)
228</td></tr><tr><td><span class="term"><i class="parameter"><tt>input</tt></i> :</span></td><td> the xmlCharEncodingInputFunc to read that encoding
229</td></tr><tr><td><span class="term"><i class="parameter"><tt>output</tt></i> :</span></td><td> the xmlCharEncodingOutputFunc to write that encoding
Daniel Veillardd7cec922003-06-13 12:30:10 +0000230</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the xmlCharEncodingHandlerPtr created (or NULL in case of error).
Daniel Veillardd4330462003-04-29 12:40:16 +0000231</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlAddEncodingAlias"></a>xmlAddEncodingAlias ()</h3><pre class="programlisting">int xmlAddEncodingAlias (const char *name,
232 const char *alias);</pre><p>
William M. Brackc6e07552003-08-16 12:44:47 +0000233Registers and alias <i class="parameter"><tt>alias</tt></i> for an encoding named <i class="parameter"><tt>name</tt></i>. Existing alias
Daniel Veillardd4330462003-04-29 12:40:16 +0000234will be overwritten.</p><p>
235
William M. Brackc6e07552003-08-16 12:44:47 +0000236</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>name</tt></i> :</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually)
237</td></tr><tr><td><span class="term"><i class="parameter"><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually)
Daniel Veillardd7cec922003-06-13 12:30:10 +0000238</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error
Daniel Veillardd4330462003-04-29 12:40:16 +0000239</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDelEncodingAlias"></a>xmlDelEncodingAlias ()</h3><pre class="programlisting">int xmlDelEncodingAlias (const char *alias);</pre><p>
William M. Brackc6e07552003-08-16 12:44:47 +0000240Unregisters an encoding alias <i class="parameter"><tt>alias</tt></i></p><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000241
William M. Brackc6e07552003-08-16 12:44:47 +0000242</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually)
Daniel Veillardd7cec922003-06-13 12:30:10 +0000243</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 in case of success, -1 in case of error
Daniel Veillardd4330462003-04-29 12:40:16 +0000244</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetEncodingAlias"></a>xmlGetEncodingAlias ()</h3><pre class="programlisting">const char* xmlGetEncodingAlias (const char *alias);</pre><p>
245Lookup an encoding name for the given alias.</p><p>
246
William M. Brackc6e07552003-08-16 12:44:47 +0000247</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>alias</tt></i> :</span></td><td> the alias name as parsed, in UTF-8 format (ASCII actually)
Daniel Veillardd7cec922003-06-13 12:30:10 +0000248</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>NULL if not found the original name otherwise
Daniel Veillardd4330462003-04-29 12:40:16 +0000249</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCleanupEncodingAliases"></a>xmlCleanupEncodingAliases ()</h3><pre class="programlisting">void xmlCleanupEncodingAliases (void);</pre><p>
250Unregisters all aliases</p><p>
251
252</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlParseCharEncoding"></a>xmlParseCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlParseCharEncoding (const char *name);</pre><p>
253Compare the string to the known encoding schemes already known. Note
Daniel Veillard3bff2b02000-10-01 20:33:47 +0000254that the comparison is case insensitive accordingly to the section
Daniel Veillardd4330462003-04-29 12:40:16 +0000255[XML] 4.3.3 Character Encoding in Entities.</p><p>
256
William M. Brackc6e07552003-08-16 12:44:47 +0000257</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>name</tt></i> :</span></td><td> the encoding name as parsed, in UTF-8 format (ASCII actually)
Daniel Veillardd7cec922003-06-13 12:30:10 +0000258</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
259if not recognized.
Daniel Veillardd4330462003-04-29 12:40:16 +0000260</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetCharEncodingName"></a>xmlGetCharEncodingName ()</h3><pre class="programlisting">const char* xmlGetCharEncodingName (<a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
Daniel Veillardcfba2fe2003-08-15 00:33:43 +0000261The "canonical" name for XML encoding.
Daniel Veillardd4330462003-04-29 12:40:16 +0000262C.f. http://www.w3.org/TR/REC-xml<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="charencoding">charencoding</GTKDOCLINK>
263Section 4.3.3 Character Encoding in Entities</p><p>
264
William M. Brackc6e07552003-08-16 12:44:47 +0000265</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>enc</tt></i> :</span></td><td> the encoding
Daniel Veillardd7cec922003-06-13 12:30:10 +0000266</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the canonical name for the given encoding
Daniel Veillardd4330462003-04-29 12:40:16 +0000267</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlDetectCharEncoding"></a>xmlDetectCharEncoding ()</h3><pre class="programlisting"><a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> xmlDetectCharEncoding (unsigned char *in,
268 int len);</pre><p>
269Guess the encoding of the entity using the first bytes of the entity content
270accordingly of the non-normative appendix F of the XML-1.0 recommendation.</p><p>
271
William M. Brackc6e07552003-08-16 12:44:47 +0000272</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> a pointer to the first bytes of the XML entity, must be at least
Daniel Veillardd7cec922003-06-13 12:30:10 +0000273 4 bytes long.
William M. Brackc6e07552003-08-16 12:44:47 +0000274</td></tr><tr><td><span class="term"><i class="parameter"><tt>len</tt></i> :</span></td><td> pointer to the length of the buffer
Daniel Veillardd7cec922003-06-13 12:30:10 +0000275</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>one of the XML_CHAR_ENCODING_... values.
Daniel Veillardd4330462003-04-29 12:40:16 +0000276</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncOutFunc"></a>xmlCharEncOutFunc ()</h3><pre class="programlisting">int xmlCharEncOutFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
277 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
278 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p>
279Generic front-end for the encoding handler output function
William M. Brackc6e07552003-08-16 12:44:47 +0000280a first call with <i class="parameter"><tt>in</tt></i> == NULL has to be made firs to initiate the
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000281output in case of non-stateless encoding needing to initiate their
282state or the output (like the BOM in UTF16).
283In case of UTF8 sequence conversion errors for the given encoder,
Daniel Veillardd4330462003-04-29 12:40:16 +0000284the content will be automatically remapped to a CharRef sequence.</p><p>
285
William M. Brackc6e07552003-08-16 12:44:47 +0000286</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure
287</td></tr><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output.
288</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input
Daniel Veillardd7cec922003-06-13 12:30:10 +0000289</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or
290 -1 general error
291 -2 if the transcoding fails (for *in is not valid utf8 string or
292 the result of transformation can't fit into the encoding we want), or
Daniel Veillardd4330462003-04-29 12:40:16 +0000293</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncInFunc"></a>xmlCharEncInFunc ()</h3><pre class="programlisting">int xmlCharEncInFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
294 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
295 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p>
296Generic front-end for the encoding handler input function</p><p>
297
William M. Brackc6e07552003-08-16 12:44:47 +0000298</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>handler</tt></i> :</span></td><td> char encoding transformation data structure
299</td></tr><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output.
300</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input
Daniel Veillardd7cec922003-06-13 12:30:10 +0000301</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or
302 -1 general error
303 -2 if the transcoding fails (for *in is not valid utf8 string or
304 the result of transformation can't fit into the encoding we want), or
Daniel Veillardd4330462003-04-29 12:40:16 +0000305</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncFirstLine"></a>xmlCharEncFirstLine ()</h3><pre class="programlisting">int xmlCharEncFirstLine (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler,
306 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> out,
307 <a href="libxml-tree.html#xmlBufferPtr">xmlBufferPtr</a> in);</pre><p>
308Front-end for the encoding handler input function, but handle only
309the very first line, i.e. limit itself to 45 chars.</p><p>
310
William M. Brackc6e07552003-08-16 12:44:47 +0000311</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure
312</td></tr><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> an xmlBuffer for the output.
313</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> an xmlBuffer for the input
Daniel Veillardd7cec922003-06-13 12:30:10 +0000314</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of byte written if success, or
315 -1 general error
316 -2 if the transcoding fails (for *in is not valid utf8 string or
317 the result of transformation can't fit into the encoding we want), or
Daniel Veillardd4330462003-04-29 12:40:16 +0000318</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCharEncCloseFunc"></a>xmlCharEncCloseFunc ()</h3><pre class="programlisting">int xmlCharEncCloseFunc (<a href="libxml-encoding.html#xmlCharEncodingHandler">xmlCharEncodingHandler</a> *handler);</pre><p>
319Generic front-end for encoding handler close function</p><p>
320
William M. Brackc6e07552003-08-16 12:44:47 +0000321</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>handler</tt></i> :</span></td><td> char enconding transformation data structure
Daniel Veillardd7cec922003-06-13 12:30:10 +0000322</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 in case of error
Daniel Veillardd4330462003-04-29 12:40:16 +0000323</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8Toisolat1"></a>UTF8Toisolat1 ()</h3><pre class="programlisting">int UTF8Toisolat1 (unsigned char *out,
Daniel Veillarda41123c2001-04-22 19:31:20 +0000324 int *outlen,
325 unsigned char *in,
Daniel Veillardd4330462003-04-29 12:40:16 +0000326 int *inlen);</pre><p>
327Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
328block of chars out.</p><p>
329
William M. Brackc6e07552003-08-16 12:44:47 +0000330</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result
331</td></tr><tr><td><span class="term"><i class="parameter"><tt>outlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>out</tt></i>
332</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars
333</td></tr><tr><td><span class="term"><i class="parameter"><tt>inlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>in</tt></i>
Daniel Veillardd7cec922003-06-13 12:30:10 +0000334</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
William M. Brackc6e07552003-08-16 12:44:47 +0000335The value of <i class="parameter"><tt>inlen</tt></i> after return is the number of octets consumed
Daniel Veillardd7cec922003-06-13 12:30:10 +0000336 as the return value is positive, else unpredictable.
William M. Brackc6e07552003-08-16 12:44:47 +0000337The value of <i class="parameter"><tt>outlen</tt></i> after return is the number of ocetes consumed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000338</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="isolat1ToUTF8"></a>isolat1ToUTF8 ()</h3><pre class="programlisting">int isolat1ToUTF8 (unsigned char *out,
Daniel Veillarda41123c2001-04-22 19:31:20 +0000339 int *outlen,
340 unsigned char *in,
Daniel Veillardd4330462003-04-29 12:40:16 +0000341 int *inlen);</pre><p>
342Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
343block of chars out.</p><p>
344
William M. Brackc6e07552003-08-16 12:44:47 +0000345</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result
346</td></tr><tr><td><span class="term"><i class="parameter"><tt>outlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>out</tt></i>
347</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> a pointer to an array of ISO Latin 1 chars
348</td></tr><tr><td><span class="term"><i class="parameter"><tt>inlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>in</tt></i>
Daniel Veillardd7cec922003-06-13 12:30:10 +0000349</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, or -1 otherwise
William M. Brackc6e07552003-08-16 12:44:47 +0000350The value of <i class="parameter"><tt>inlen</tt></i> after return is the number of octets consumed
Daniel Veillardd7cec922003-06-13 12:30:10 +0000351 as the return value is positive, else unpredictable.
William M. Brackc6e07552003-08-16 12:44:47 +0000352The value of <i class="parameter"><tt>outlen</tt></i> after return is the number of ocetes consumed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000353</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlGetUTF8Char"></a>xmlGetUTF8Char ()</h3><pre class="programlisting">int xmlGetUTF8Char (unsigned char *utf,
354 int *len);</pre><p>
William M. Brackc6e07552003-08-16 12:44:47 +0000355Read one UTF8 Char from <i class="parameter"><tt>utf</tt></i></p><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000356
William M. Brackc6e07552003-08-16 12:44:47 +0000357</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes
358</td></tr><tr><td><span class="term"><i class="parameter"><tt>len</tt></i> :</span></td><td> a pointer to <i class="parameter"><tt>bytes</tt></i> len
359</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the char value or -1 in case of error and update <i class="parameter"><tt>len</tt></i> with the
Daniel Veillardd7cec922003-06-13 12:30:10 +0000360 number of bytes used
Daniel Veillardd4330462003-04-29 12:40:16 +0000361</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlCheckUTF8"></a>xmlCheckUTF8 ()</h3><pre class="programlisting">int xmlCheckUTF8 (unsigned char *utf);</pre><p>
William M. Brackc6e07552003-08-16 12:44:47 +0000362Checks <i class="parameter"><tt>utf</tt></i> for being valid utf-8. <i class="parameter"><tt>utf</tt></i> is assumed to be
Daniel Veillarda41123c2001-04-22 19:31:20 +0000363null-terminated. This function is not super-strict, as it will
364allow longer utf-8 sequences than necessary. Note that Java is
365capable of producing these sequences if provoked. Also note, this
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000366routine checks for the 4-byte maximum size, but does not check for
Daniel Veillardd4330462003-04-29 12:40:16 +00003670x10ffff maximum value.</p><p>
368
William M. Brackc6e07552003-08-16 12:44:47 +0000369</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> Pointer to putative utf-8 encoded string.
370</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> true if <i class="parameter"><tt>utf</tt></i> is valid.
Daniel Veillardd4330462003-04-29 12:40:16 +0000371</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsize"></a>xmlUTF8Strsize ()</h3><pre class="programlisting">int xmlUTF8Strsize (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
372 int len);</pre><p>
373storage size of an UTF8 string</p><p>
374
William M. Brackc6e07552003-08-16 12:44:47 +0000375</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes
376</td></tr><tr><td><span class="term"><i class="parameter"><tt>len</tt></i> :</span></td><td> the number of characters in the array
Daniel Veillardd7cec922003-06-13 12:30:10 +0000377</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the storage size of
378the first 'len' characters of ARRAY
Daniel Veillardd4330462003-04-29 12:40:16 +0000379
380</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strndup"></a>xmlUTF8Strndup ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strndup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
381 int len);</pre><p>
382a strndup for array of UTF8's</p><p>
383
William M. Brackc6e07552003-08-16 12:44:47 +0000384</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> the input UTF8 *
385</td></tr><tr><td><span class="term"><i class="parameter"><tt>len</tt></i> :</span></td><td> the len of <i class="parameter"><tt>utf</tt></i> (in chars)
Daniel Veillardd7cec922003-06-13 12:30:10 +0000386</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a new UTF8 * or NULL
Daniel Veillardd4330462003-04-29 12:40:16 +0000387</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strpos"></a>xmlUTF8Strpos ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strpos (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
388 int pos);</pre><p>
389a function to provide the equivalent of fetching a
390character from a string array</p><p>
391
William M. Brackc6e07552003-08-16 12:44:47 +0000392</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> the input UTF8 *
393</td></tr><tr><td><span class="term"><i class="parameter"><tt>pos</tt></i> :</span></td><td> the position of the desired UTF8 char (in chars)
Daniel Veillardd7cec922003-06-13 12:30:10 +0000394</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to the UTF8 character or NULL
Daniel Veillardd4330462003-04-29 12:40:16 +0000395</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strloc"></a>xmlUTF8Strloc ()</h3><pre class="programlisting">int xmlUTF8Strloc (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
396 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utfchar);</pre><p>
397a function to provide relative location of a UTF8 char</p><p>
398
William M. Brackc6e07552003-08-16 12:44:47 +0000399</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> the input UTF8 *
400</td></tr><tr><td><span class="term"><i class="parameter"><tt>utfchar</tt></i> :</span></td><td> the UTF8 character to be found
Daniel Veillardd7cec922003-06-13 12:30:10 +0000401</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the relative character position of the desired char
402or -1 if not found
Daniel Veillardd4330462003-04-29 12:40:16 +0000403</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strsub"></a>xmlUTF8Strsub ()</h3><pre class="programlisting"><a href="libxml-tree.html#xmlChar">xmlChar</a>* xmlUTF8Strsub (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf,
Daniel Veillard4ec885a2001-06-17 10:31:07 +0000404 int start,
Daniel Veillardd4330462003-04-29 12:40:16 +0000405 int len);</pre><p>
406Note: positions are given in units of UTF-8 chars</p><p>
407
William M. Brackc6e07552003-08-16 12:44:47 +0000408</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes
409</td></tr><tr><td><span class="term"><i class="parameter"><tt>start</tt></i> :</span></td><td> relative pos of first char
410</td></tr><tr><td><span class="term"><i class="parameter"><tt>len</tt></i> :</span></td><td> total number to copy
Daniel Veillardd7cec922003-06-13 12:30:10 +0000411</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>a pointer to a newly created string
412or NULL if any problem
Daniel Veillardd4330462003-04-29 12:40:16 +0000413</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Strlen"></a>xmlUTF8Strlen ()</h3><pre class="programlisting">int xmlUTF8Strlen (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p>
414compute the length of an UTF8 string, it doesn't do a full UTF8
415checking of the content of the string.</p><p>
416
William M. Brackc6e07552003-08-16 12:44:47 +0000417</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> a sequence of UTF-8 encoded bytes
Daniel Veillardd7cec922003-06-13 12:30:10 +0000418</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the number of characters in the string or -1 in case of error
Daniel Veillardc758c222003-08-04 20:42:34 +0000419</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Size"></a>xmlUTF8Size ()</h3><pre class="programlisting">int xmlUTF8Size (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf);</pre><p>
William M. Brackc6e07552003-08-16 12:44:47 +0000420calulates the internal size of a UTF8 character</p><p>
Daniel Veillardc758c222003-08-04 20:42:34 +0000421
William M. Brackc6e07552003-08-16 12:44:47 +0000422</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf</tt></i> :</span></td><td> pointer to the UTF8 character
Daniel Veillardc758c222003-08-04 20:42:34 +0000423</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the numbers of bytes in the character, -1 on format error
424</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="xmlUTF8Charcmp"></a>xmlUTF8Charcmp ()</h3><pre class="programlisting">int xmlUTF8Charcmp (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf1,
425 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *utf2);</pre><p>
William M. Brackc6e07552003-08-16 12:44:47 +0000426compares the two UCS4 values</p><p>
Daniel Veillardc758c222003-08-04 20:42:34 +0000427
William M. Brackc6e07552003-08-16 12:44:47 +0000428</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>utf1</tt></i> :</span></td><td> pointer to first UTF8 char
429</td></tr><tr><td><span class="term"><i class="parameter"><tt>utf2</tt></i> :</span></td><td> pointer to second UTF8 char
430</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>result of the compare as with xmlStrncmp
431</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-wsockcompat.html"><b>&lt;&lt; wsockcompat</b></a></td><td align="right"><a accesskey="n" href="libxml-xmlregexp.html"><b>xmlregexp &gt;&gt;</b></a></td></tr></table></body></html>