blob: 65ebf5fb4ecbd6d19684baab850fe4cd31fd09a5 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50
51
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
70 if ((myversion / 10000) != (version / 10000)) {
71 xmlGenericError(xmlGenericErrorContext,
72 "Fatal: program compiled against libxml %d using libxml %d\n",
73 (version / 10000), (myversion / 10000));
74 exit(1);
75 }
76 if ((myversion / 100) < (version / 100)) {
77 xmlGenericError(xmlGenericErrorContext,
78 "Warning: program compiled against libxml %d using older %d\n",
79 (version / 100), (myversion / 100));
80 }
81}
82
83
84const char *xmlFeaturesList[] = {
85 "validate",
86 "load subset",
87 "keep blanks",
88 "disable SAX",
89 "fetch external entities",
90 "substitute entities",
91 "gather line info",
92 "user data",
93 "is html",
94 "is standalone",
95 "stop parser",
96 "document",
97 "is well formed",
98 "is valid",
99 "SAX block",
100 "SAX function internalSubset",
101 "SAX function isStandalone",
102 "SAX function hasInternalSubset",
103 "SAX function hasExternalSubset",
104 "SAX function resolveEntity",
105 "SAX function getEntity",
106 "SAX function entityDecl",
107 "SAX function notationDecl",
108 "SAX function attributeDecl",
109 "SAX function elementDecl",
110 "SAX function unparsedEntityDecl",
111 "SAX function setDocumentLocator",
112 "SAX function startDocument",
113 "SAX function endDocument",
114 "SAX function startElement",
115 "SAX function endElement",
116 "SAX function reference",
117 "SAX function characters",
118 "SAX function ignorableWhitespace",
119 "SAX function processingInstruction",
120 "SAX function comment",
121 "SAX function warning",
122 "SAX function error",
123 "SAX function fatalError",
124 "SAX function getParameterEntity",
125 "SAX function cdataBlock",
126 "SAX function externalSubset",
127};
128
129/*
130 * xmlGetFeaturesList:
131 * @len: the length of the features name array (input/output)
132 * @result: an array of string to be filled with the features name.
133 *
134 * Copy at most *@len feature names into the @result array
135 *
136 * Returns -1 in case or error, or the total number of features,
137 * len is updated with the number of strings copied,
138 * strings must not be deallocated
139 */
140int
141xmlGetFeaturesList(int *len, const char **result) {
142 int ret, i;
143
144 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
145 if ((len == NULL) || (result == NULL))
146 return(ret);
147 if ((*len < 0) || (*len >= 1000))
148 return(-1);
149 if (*len > ret)
150 *len = ret;
151 for (i = 0;i < *len;i++)
152 result[i] = xmlFeaturesList[i];
153 return(ret);
154}
155
156/*
157 * xmlGetFeature:
158 * @ctxt: an XML/HTML parser context
159 * @name: the feature name
160 * @result: location to store the result
161 *
162 * Read the current value of one feature of this parser instance
163 *
164 * Returns -1 in case or error, 0 otherwise
165 */
166int
167xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
168 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
169 return(-1);
170
171 if (!strcmp(name, "validate")) {
172 *((int *) result) = ctxt->validate;
173 } else if (!strcmp(name, "keep blanks")) {
174 *((int *) result) = ctxt->keepBlanks;
175 } else if (!strcmp(name, "disable SAX")) {
176 *((int *) result) = ctxt->disableSAX;
177 } else if (!strcmp(name, "fetch external entities")) {
178 *((int *) result) = ctxt->loadsubset;
179 } else if (!strcmp(name, "substitute entities")) {
180 *((int *) result) = ctxt->replaceEntities;
181 } else if (!strcmp(name, "gather line info")) {
182 *((int *) result) = ctxt->record_info;
183 } else if (!strcmp(name, "user data")) {
184 *((void **)result) = ctxt->userData;
185 } else if (!strcmp(name, "is html")) {
186 *((int *) result) = ctxt->html;
187 } else if (!strcmp(name, "is standalone")) {
188 *((int *) result) = ctxt->standalone;
189 } else if (!strcmp(name, "document")) {
190 *((xmlDocPtr *) result) = ctxt->myDoc;
191 } else if (!strcmp(name, "is well formed")) {
192 *((int *) result) = ctxt->wellFormed;
193 } else if (!strcmp(name, "is valid")) {
194 *((int *) result) = ctxt->valid;
195 } else if (!strcmp(name, "SAX block")) {
196 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
197 } else if (!strcmp(name, "SAX function internalSubset")) {
198 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
199 } else if (!strcmp(name, "SAX function isStandalone")) {
200 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
201 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
202 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
203 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
204 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
205 } else if (!strcmp(name, "SAX function resolveEntity")) {
206 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
207 } else if (!strcmp(name, "SAX function getEntity")) {
208 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
209 } else if (!strcmp(name, "SAX function entityDecl")) {
210 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
211 } else if (!strcmp(name, "SAX function notationDecl")) {
212 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
213 } else if (!strcmp(name, "SAX function attributeDecl")) {
214 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
215 } else if (!strcmp(name, "SAX function elementDecl")) {
216 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
217 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
218 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
219 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
220 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
221 } else if (!strcmp(name, "SAX function startDocument")) {
222 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
223 } else if (!strcmp(name, "SAX function endDocument")) {
224 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
225 } else if (!strcmp(name, "SAX function startElement")) {
226 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
227 } else if (!strcmp(name, "SAX function endElement")) {
228 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
229 } else if (!strcmp(name, "SAX function reference")) {
230 *((referenceSAXFunc *) result) = ctxt->sax->reference;
231 } else if (!strcmp(name, "SAX function characters")) {
232 *((charactersSAXFunc *) result) = ctxt->sax->characters;
233 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
234 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
235 } else if (!strcmp(name, "SAX function processingInstruction")) {
236 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
237 } else if (!strcmp(name, "SAX function comment")) {
238 *((commentSAXFunc *) result) = ctxt->sax->comment;
239 } else if (!strcmp(name, "SAX function warning")) {
240 *((warningSAXFunc *) result) = ctxt->sax->warning;
241 } else if (!strcmp(name, "SAX function error")) {
242 *((errorSAXFunc *) result) = ctxt->sax->error;
243 } else if (!strcmp(name, "SAX function fatalError")) {
244 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
245 } else if (!strcmp(name, "SAX function getParameterEntity")) {
246 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
247 } else if (!strcmp(name, "SAX function cdataBlock")) {
248 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
249 } else if (!strcmp(name, "SAX function externalSubset")) {
250 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
251 } else {
252 return(-1);
253 }
254 return(0);
255}
256
257/*
258 * xmlSetFeature:
259 * @ctxt: an XML/HTML parser context
260 * @name: the feature name
261 * @value: pointer to the location of the new value
262 *
263 * Change the current value of one feature of this parser instance
264 *
265 * Returns -1 in case or error, 0 otherwise
266 */
267int
268xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
269 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
270 return(-1);
271
272 if (!strcmp(name, "validate")) {
273 int newvalidate = *((int *) value);
274 if ((!ctxt->validate) && (newvalidate != 0)) {
275 if (ctxt->vctxt.warning == NULL)
276 ctxt->vctxt.warning = xmlParserValidityWarning;
277 if (ctxt->vctxt.error == NULL)
278 ctxt->vctxt.error = xmlParserValidityError;
279 /* Allocate the Node stack */
280 ctxt->vctxt.nodeTab = (xmlNodePtr *)
281 xmlMalloc(4 * sizeof(xmlNodePtr));
282 if (ctxt->vctxt.nodeTab == NULL) {
283 ctxt->vctxt.nodeMax = 0;
284 ctxt->validate = 0;
285 return(-1);
286 }
287 ctxt->vctxt.nodeNr = 0;
288 ctxt->vctxt.nodeMax = 4;
289 ctxt->vctxt.node = NULL;
290 }
291 ctxt->validate = newvalidate;
292 } else if (!strcmp(name, "keep blanks")) {
293 ctxt->keepBlanks = *((int *) value);
294 } else if (!strcmp(name, "disable SAX")) {
295 ctxt->disableSAX = *((int *) value);
296 } else if (!strcmp(name, "fetch external entities")) {
297 ctxt->loadsubset = *((int *) value);
298 } else if (!strcmp(name, "substitute entities")) {
299 ctxt->replaceEntities = *((int *) value);
300 } else if (!strcmp(name, "gather line info")) {
301 ctxt->record_info = *((int *) value);
302 } else if (!strcmp(name, "user data")) {
303 ctxt->userData = *((void **)value);
304 } else if (!strcmp(name, "is html")) {
305 ctxt->html = *((int *) value);
306 } else if (!strcmp(name, "is standalone")) {
307 ctxt->standalone = *((int *) value);
308 } else if (!strcmp(name, "document")) {
309 ctxt->myDoc = *((xmlDocPtr *) value);
310 } else if (!strcmp(name, "is well formed")) {
311 ctxt->wellFormed = *((int *) value);
312 } else if (!strcmp(name, "is valid")) {
313 ctxt->valid = *((int *) value);
314 } else if (!strcmp(name, "SAX block")) {
315 ctxt->sax = *((xmlSAXHandlerPtr *) value);
316 } else if (!strcmp(name, "SAX function internalSubset")) {
317 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function isStandalone")) {
319 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
321 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
323 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function resolveEntity")) {
325 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function getEntity")) {
327 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function entityDecl")) {
329 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function notationDecl")) {
331 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function attributeDecl")) {
333 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function elementDecl")) {
335 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
337 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
339 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function startDocument")) {
341 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function endDocument")) {
343 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function startElement")) {
345 ctxt->sax->startElement = *((startElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function endElement")) {
347 ctxt->sax->endElement = *((endElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function reference")) {
349 ctxt->sax->reference = *((referenceSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function characters")) {
351 ctxt->sax->characters = *((charactersSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
353 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function processingInstruction")) {
355 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function comment")) {
357 ctxt->sax->comment = *((commentSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function warning")) {
359 ctxt->sax->warning = *((warningSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function error")) {
361 ctxt->sax->error = *((errorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function fatalError")) {
363 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function getParameterEntity")) {
365 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
366 } else if (!strcmp(name, "SAX function cdataBlock")) {
367 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
368 } else if (!strcmp(name, "SAX function externalSubset")) {
369 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
370 } else {
371 return(-1);
372 }
373 return(0);
374}
375
376/************************************************************************
377 * *
378 * Some functions to avoid too large macros *
379 * *
380 ************************************************************************/
381
382/**
383 * xmlIsChar:
384 * @c: an unicode character (int)
385 *
386 * Check whether the character is allowed by the production
387 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
388 * | [#x10000-#x10FFFF]
389 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
390 * Also available as a macro IS_CHAR()
391 *
392 * Returns 0 if not, non-zero otherwise
393 */
394int
395xmlIsChar(int c) {
396 return(
397 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
398 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
399 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
400 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
401}
402
403/**
404 * xmlIsBlank:
405 * @c: an unicode character (int)
406 *
407 * Check whether the character is allowed by the production
408 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
409 * Also available as a macro IS_BLANK()
410 *
411 * Returns 0 if not, non-zero otherwise
412 */
413int
414xmlIsBlank(int c) {
415 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
416}
417
418/**
419 * xmlIsBaseChar:
420 * @c: an unicode character (int)
421 *
422 * Check whether the character is allowed by the production
423 * [85] BaseChar ::= ... long list see REC ...
424 *
425 * VI is your friend !
426 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
427 * and
428 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
429 *
430 * Returns 0 if not, non-zero otherwise
431 */
432static int xmlBaseArray[] = {
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
437 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
446 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
449};
450
451int
452xmlIsBaseChar(int c) {
453 return(
454 (((c) < 0x0100) ? xmlBaseArray[c] :
455 ( /* accelerator */
456 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
457 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
458 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
459 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
460 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
461 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
462 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
463 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
464 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
465 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
466 ((c) == 0x0386) ||
467 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
468 ((c) == 0x038C) ||
469 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
470 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
471 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
472 ((c) == 0x03DA) ||
473 ((c) == 0x03DC) ||
474 ((c) == 0x03DE) ||
475 ((c) == 0x03E0) ||
476 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
477 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
478 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
479 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
480 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
481 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
482 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
483 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
484 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
485 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
486 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
487 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
488 ((c) == 0x0559) ||
489 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
490 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
491 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
492 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
493 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
494 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
495 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
496 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
497 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
498 ((c) == 0x06D5) ||
499 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
500 (((c) >= 0x905) && ( /* accelerator */
501 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
502 ((c) == 0x093D) ||
503 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
504 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
505 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
506 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
507 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
508 ((c) == 0x09B2) ||
509 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
510 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
511 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
512 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
513 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
514 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
515 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
516 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
517 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
518 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
519 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
520 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
521 ((c) == 0x0A5E) ||
522 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
523 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
524 ((c) == 0x0A8D) ||
525 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
526 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
527 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
528 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
529 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
530 ((c) == 0x0ABD) ||
531 ((c) == 0x0AE0) ||
532 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
533 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
534 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
535 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
536 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
537 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
538 ((c) == 0x0B3D) ||
539 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
540 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
541 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
542 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
543 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
544 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
545 ((c) == 0x0B9C) ||
546 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
547 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
548 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
549 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
550 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
551 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
552 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
553 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
554 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
555 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
556 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
557 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
558 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
559 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
560 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
561 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
562 ((c) == 0x0CDE) ||
563 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
564 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
565 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
566 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
567 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
568 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
569 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
570 ((c) == 0x0E30) ||
571 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
572 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
573 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
574 ((c) == 0x0E84) ||
575 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
576 ((c) == 0x0E8A) ||
577 ((c) == 0x0E8D) ||
578 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
579 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
580 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
581 ((c) == 0x0EA5) ||
582 ((c) == 0x0EA7) ||
583 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
584 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
585 ((c) == 0x0EB0) ||
586 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
587 ((c) == 0x0EBD) ||
588 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
589 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
590 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
591 (((c) >= 0x10A0) && ( /* accelerator */
592 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
593 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
594 ((c) == 0x1100) ||
595 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
596 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
597 ((c) == 0x1109) ||
598 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
599 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
600 ((c) == 0x113C) ||
601 ((c) == 0x113E) ||
602 ((c) == 0x1140) ||
603 ((c) == 0x114C) ||
604 ((c) == 0x114E) ||
605 ((c) == 0x1150) ||
606 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
607 ((c) == 0x1159) ||
608 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
609 ((c) == 0x1163) ||
610 ((c) == 0x1165) ||
611 ((c) == 0x1167) ||
612 ((c) == 0x1169) ||
613 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
614 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
615 ((c) == 0x1175) ||
616 ((c) == 0x119E) ||
617 ((c) == 0x11A8) ||
618 ((c) == 0x11AB) ||
619 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
620 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
621 ((c) == 0x11BA) ||
622 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
623 ((c) == 0x11EB) ||
624 ((c) == 0x11F0) ||
625 ((c) == 0x11F9) ||
626 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
627 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
628 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
629 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
630 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
631 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
632 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
633 ((c) == 0x1F59) ||
634 ((c) == 0x1F5B) ||
635 ((c) == 0x1F5D) ||
636 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
637 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
638 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
639 ((c) == 0x1FBE) ||
640 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
641 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
642 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
643 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
644 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
645 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
646 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
647 ((c) == 0x2126) ||
648 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
649 ((c) == 0x212E) ||
650 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
651 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
652 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
653 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
654 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
655}
656
657/**
658 * xmlIsDigit:
659 * @c: an unicode character (int)
660 *
661 * Check whether the character is allowed by the production
662 * [88] Digit ::= ... long list see REC ...
663 *
664 * Returns 0 if not, non-zero otherwise
665 */
666int
667xmlIsDigit(int c) {
668 return(
669 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
670 (((c) >= 0x660) && ( /* accelerator */
671 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
672 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
673 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
674 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
675 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
676 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
677 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
678 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
679 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
680 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
681 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
682 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
683 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
684 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
685}
686
687/**
688 * xmlIsCombining:
689 * @c: an unicode character (int)
690 *
691 * Check whether the character is allowed by the production
692 * [87] CombiningChar ::= ... long list see REC ...
693 *
694 * Returns 0 if not, non-zero otherwise
695 */
696int
697xmlIsCombining(int c) {
698 return(
699 (((c) >= 0x300) && ( /* accelerator */
700 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
701 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
702 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
703 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
704 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
705 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
706 ((c) == 0x05BF) ||
707 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
708 ((c) == 0x05C4) ||
709 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
710 ((c) == 0x0670) ||
711 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
712 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
713 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
714 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
715 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
716 (((c) >= 0x0901) && ( /* accelerator */
717 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
718 ((c) == 0x093C) ||
719 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
720 ((c) == 0x094D) ||
721 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
722 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
723 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
724 ((c) == 0x09BC) ||
725 ((c) == 0x09BE) ||
726 ((c) == 0x09BF) ||
727 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
728 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
729 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
730 ((c) == 0x09D7) ||
731 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
732 (((c) >= 0x0A02) && ( /* accelerator */
733 ((c) == 0x0A02) ||
734 ((c) == 0x0A3C) ||
735 ((c) == 0x0A3E) ||
736 ((c) == 0x0A3F) ||
737 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
738 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
739 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
740 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
741 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
742 ((c) == 0x0ABC) ||
743 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
744 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
745 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
746 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
747 ((c) == 0x0B3C) ||
748 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
749 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
750 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
751 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
752 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
753 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
754 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
755 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
756 ((c) == 0x0BD7) ||
757 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
758 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
759 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
760 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
761 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
762 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
763 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
764 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
765 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
766 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
767 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
768 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
769 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
770 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
771 ((c) == 0x0D57) ||
772 (((c) >= 0x0E31) && ( /* accelerator */
773 ((c) == 0x0E31) ||
774 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
775 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
776 ((c) == 0x0EB1) ||
777 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
778 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
779 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
780 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
781 ((c) == 0x0F35) ||
782 ((c) == 0x0F37) ||
783 ((c) == 0x0F39) ||
784 ((c) == 0x0F3E) ||
785 ((c) == 0x0F3F) ||
786 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
787 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
788 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
789 ((c) == 0x0F97) ||
790 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
791 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
792 ((c) == 0x0FB9) ||
793 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
794 ((c) == 0x20E1) ||
795 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
796 ((c) == 0x3099) ||
797 ((c) == 0x309A))))))))));
798}
799
800/**
801 * xmlIsExtender:
802 * @c: an unicode character (int)
803 *
804 * Check whether the character is allowed by the production
805 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
806 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
807 * [#x309D-#x309E] | [#x30FC-#x30FE]
808 *
809 * Returns 0 if not, non-zero otherwise
810 */
811int
812xmlIsExtender(int c) {
813 switch (c) {
814 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
815 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
816 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
817 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
818 case 0x30FE:
819 return 1;
820 default:
821 return 0;
822 }
823}
824
825/**
826 * xmlIsIdeographic:
827 * @c: an unicode character (int)
828 *
829 * Check whether the character is allowed by the production
830 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
831 *
832 * Returns 0 if not, non-zero otherwise
833 */
834int
835xmlIsIdeographic(int c) {
836 return(((c) < 0x0100) ? 0 :
837 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
838 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
839 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
840 ((c) == 0x3007));
841}
842
843/**
844 * xmlIsLetter:
845 * @c: an unicode character (int)
846 *
847 * Check whether the character is allowed by the production
848 * [84] Letter ::= BaseChar | Ideographic
849 *
850 * Returns 0 if not, non-zero otherwise
851 */
852int
853xmlIsLetter(int c) {
854 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
855}
856
857/**
858 * xmlIsPubidChar:
859 * @c: an unicode character (int)
860 *
861 * Check whether the character is allowed by the production
862 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
863 *
864 * Returns 0 if not, non-zero otherwise
865 */
866int
867xmlIsPubidChar(int c) {
868 return(
869 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
870 (((c) >= 'a') && ((c) <= 'z')) ||
871 (((c) >= 'A') && ((c) <= 'Z')) ||
872 (((c) >= '0') && ((c) <= '9')) ||
873 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
874 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
875 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
876 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
877 ((c) == '$') || ((c) == '_') || ((c) == '%'));
878}
879
880/************************************************************************
881 * *
882 * Input handling functions for progressive parsing *
883 * *
884 ************************************************************************/
885
886/* #define DEBUG_INPUT */
887/* #define DEBUG_STACK */
888/* #define DEBUG_PUSH */
889
890
891/* we need to keep enough input to show errors in context */
892#define LINE_LEN 80
893
894#ifdef DEBUG_INPUT
895#define CHECK_BUFFER(in) check_buffer(in)
896
897void check_buffer(xmlParserInputPtr in) {
898 if (in->base != in->buf->buffer->content) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: base mismatch problem\n");
901 }
902 if (in->cur < in->base) {
903 xmlGenericError(xmlGenericErrorContext,
904 "xmlParserInput: cur < base problem\n");
905 }
906 if (in->cur > in->base + in->buf->buffer->use) {
907 xmlGenericError(xmlGenericErrorContext,
908 "xmlParserInput: cur > base + use problem\n");
909 }
910 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
911 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
912 in->buf->buffer->use, in->buf->buffer->size);
913}
914
915#else
916#define CHECK_BUFFER(in)
917#endif
918
919
920/**
921 * xmlParserInputRead:
922 * @in: an XML parser input
923 * @len: an indicative size for the lookahead
924 *
925 * This function refresh the input for the parser. It doesn't try to
926 * preserve pointers to the input buffer, and discard already read data
927 *
928 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
929 * end of this entity
930 */
931int
932xmlParserInputRead(xmlParserInputPtr in, int len) {
933 int ret;
934 int used;
935 int index;
936
937#ifdef DEBUG_INPUT
938 xmlGenericError(xmlGenericErrorContext, "Read\n");
939#endif
940 if (in->buf == NULL) return(-1);
941 if (in->base == NULL) return(-1);
942 if (in->cur == NULL) return(-1);
943 if (in->buf->buffer == NULL) return(-1);
944 if (in->buf->readcallback == NULL) return(-1);
945
946 CHECK_BUFFER(in);
947
948 used = in->cur - in->buf->buffer->content;
949 ret = xmlBufferShrink(in->buf->buffer, used);
950 if (ret > 0) {
951 in->cur -= ret;
952 in->consumed += ret;
953 }
954 ret = xmlParserInputBufferRead(in->buf, len);
955 if (in->base != in->buf->buffer->content) {
956 /*
957 * the buffer has been realloced
958 */
959 index = in->cur - in->base;
960 in->base = in->buf->buffer->content;
961 in->cur = &in->buf->buffer->content[index];
962 }
963
964 CHECK_BUFFER(in);
965
966 return(ret);
967}
968
969/**
970 * xmlParserInputGrow:
971 * @in: an XML parser input
972 * @len: an indicative size for the lookahead
973 *
974 * This function increase the input for the parser. It tries to
975 * preserve pointers to the input buffer, and keep already read data
976 *
977 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
978 * end of this entity
979 */
980int
981xmlParserInputGrow(xmlParserInputPtr in, int len) {
982 int ret;
983 int index;
984
985#ifdef DEBUG_INPUT
986 xmlGenericError(xmlGenericErrorContext, "Grow\n");
987#endif
988 if (in->buf == NULL) return(-1);
989 if (in->base == NULL) return(-1);
990 if (in->cur == NULL) return(-1);
991 if (in->buf->buffer == NULL) return(-1);
992
993 CHECK_BUFFER(in);
994
995 index = in->cur - in->base;
996 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
997
998 CHECK_BUFFER(in);
999
1000 return(0);
1001 }
1002 if (in->buf->readcallback != NULL)
1003 ret = xmlParserInputBufferGrow(in->buf, len);
1004 else
1005 return(0);
1006
1007 /*
1008 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
1009 * block, but we use it really as an integer to do some
1010 * pointer arithmetic. Insure will raise it as a bug but in
1011 * that specific case, that's not !
1012 */
1013 if (in->base != in->buf->buffer->content) {
1014 /*
1015 * the buffer has been realloced
1016 */
1017 index = in->cur - in->base;
1018 in->base = in->buf->buffer->content;
1019 in->cur = &in->buf->buffer->content[index];
1020 }
1021
1022 CHECK_BUFFER(in);
1023
1024 return(ret);
1025}
1026
1027/**
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1030 *
1031 * This function removes used input for the parser.
1032 */
1033void
1034xmlParserInputShrink(xmlParserInputPtr in) {
1035 int used;
1036 int ret;
1037 int index;
1038
1039#ifdef DEBUG_INPUT
1040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1041#endif
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1046
1047 CHECK_BUFFER(in);
1048
1049 used = in->cur - in->buf->buffer->content;
1050 /*
1051 * Do not shrink on large buffers whose only a tiny fraction
1052 * was consumned
1053 */
1054 if (in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1055 return;
1056 if (used > INPUT_CHUNK) {
1057 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1058 if (ret > 0) {
1059 in->cur -= ret;
1060 in->consumed += ret;
1061 }
1062 }
1063
1064 CHECK_BUFFER(in);
1065
1066 if (in->buf->buffer->use > INPUT_CHUNK) {
1067 return;
1068 }
1069 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1070 if (in->base != in->buf->buffer->content) {
1071 /*
1072 * the buffer has been realloced
1073 */
1074 index = in->cur - in->base;
1075 in->base = in->buf->buffer->content;
1076 in->cur = &in->buf->buffer->content[index];
1077 }
1078
1079 CHECK_BUFFER(in);
1080}
1081
1082/************************************************************************
1083 * *
1084 * UTF8 character input and related functions *
1085 * *
1086 ************************************************************************/
1087
1088/**
1089 * xmlNextChar:
1090 * @ctxt: the XML parser context
1091 *
1092 * Skip to the next char input char.
1093 */
1094
1095void
1096xmlNextChar(xmlParserCtxtPtr ctxt) {
1097 if (ctxt->instate == XML_PARSER_EOF)
1098 return;
1099
1100 /*
1101 * 2.11 End-of-Line Handling
1102 * the literal two-character sequence "#xD#xA" or a standalone
1103 * literal #xD, an XML processor must pass to the application
1104 * the single character #xA.
1105 */
1106 if (ctxt->token != 0) ctxt->token = 0;
1107 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1108 if ((*ctxt->input->cur == 0) &&
1109 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1110 (ctxt->instate != XML_PARSER_COMMENT)) {
1111 /*
1112 * If we are at the end of the current entity and
1113 * the context allows it, we pop consumed entities
1114 * automatically.
1115 * the auto closing should be blocked in other cases
1116 */
1117 xmlPopInput(ctxt);
1118 } else {
1119 if (*(ctxt->input->cur) == '\n') {
1120 ctxt->input->line++; ctxt->input->col = 1;
1121 } else ctxt->input->col++;
1122 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1123 /*
1124 * We are supposed to handle UTF8, check it's valid
1125 * From rfc2044: encoding of the Unicode values on UTF-8:
1126 *
1127 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128 * 0000 0000-0000 007F 0xxxxxxx
1129 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131 *
1132 * Check for the 0x110000 limit too
1133 */
1134 const unsigned char *cur = ctxt->input->cur;
1135 unsigned char c;
1136
1137 c = *cur;
1138 if (c & 0x80) {
1139 if (cur[1] == 0)
1140 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1141 if ((cur[1] & 0xc0) != 0x80)
1142 goto encoding_error;
1143 if ((c & 0xe0) == 0xe0) {
1144 unsigned int val;
1145
1146 if (cur[2] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if ((cur[2] & 0xc0) != 0x80)
1149 goto encoding_error;
1150 if ((c & 0xf0) == 0xf0) {
1151 if (cur[3] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if (((c & 0xf8) != 0xf0) ||
1154 ((cur[3] & 0xc0) != 0x80))
1155 goto encoding_error;
1156 /* 4-byte code */
1157 ctxt->input->cur += 4;
1158 val = (cur[0] & 0x7) << 18;
1159 val |= (cur[1] & 0x3f) << 12;
1160 val |= (cur[2] & 0x3f) << 6;
1161 val |= cur[3] & 0x3f;
1162 } else {
1163 /* 3-byte code */
1164 ctxt->input->cur += 3;
1165 val = (cur[0] & 0xf) << 12;
1166 val |= (cur[1] & 0x3f) << 6;
1167 val |= cur[2] & 0x3f;
1168 }
1169 if (((val > 0xd7ff) && (val < 0xe000)) ||
1170 ((val > 0xfffd) && (val < 0x10000)) ||
1171 (val >= 0x110000)) {
1172 if ((ctxt->sax != NULL) &&
1173 (ctxt->sax->error != NULL))
1174 ctxt->sax->error(ctxt->userData,
1175 "Char 0x%X out of allowed range\n", val);
1176 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1177 ctxt->wellFormed = 0;
1178 ctxt->disableSAX = 1;
1179 }
1180 } else
1181 /* 2-byte code */
1182 ctxt->input->cur += 2;
1183 } else
1184 /* 1-byte code */
1185 ctxt->input->cur++;
1186 } else {
1187 /*
1188 * Assume it's a fixed lenght encoding (1) with
1189 * a compatibke encoding for the ASCII set, since
1190 * XML constructs only use < 128 chars
1191 */
1192 ctxt->input->cur++;
1193 }
1194 ctxt->nbChars++;
1195 if (*ctxt->input->cur == 0)
1196 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1197 }
1198 } else {
1199 ctxt->input->cur++;
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
1204 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1205 xmlParserHandlePEReference(ctxt);
1206 if ((*ctxt->input->cur == 0) &&
1207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1208 xmlPopInput(ctxt);
1209 return;
1210encoding_error:
1211 /*
1212 * If we detect an UTF8 error that probably mean that the
1213 * input encoding didn't get properly advertized in the
1214 * declaration header. Report the error and switch the encoding
1215 * to ISO-Latin-1 (if you don't like this policy, just declare the
1216 * encoding !)
1217 */
1218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1219 ctxt->sax->error(ctxt->userData,
1220 "Input is not proper UTF-8, indicate encoding !\n");
1221 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1222 ctxt->input->cur[0], ctxt->input->cur[1],
1223 ctxt->input->cur[2], ctxt->input->cur[3]);
1224 }
1225 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1226
1227 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1228 ctxt->input->cur++;
1229 return;
1230}
1231
1232/**
1233 * xmlCurrentChar:
1234 * @ctxt: the XML parser context
1235 * @len: pointer to the length of the char read
1236 *
1237 * The current char value, if using UTF-8 this may actaully span multiple
1238 * bytes in the input buffer. Implement the end of line normalization:
1239 * 2.11 End-of-Line Handling
1240 * Wherever an external parsed entity or the literal entity value
1241 * of an internal parsed entity contains either the literal two-character
1242 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1243 * must pass to the application the single character #xA.
1244 * This behavior can conveniently be produced by normalizing all
1245 * line breaks to #xA on input, before parsing.)
1246 *
1247 * Returns the current char value and its lenght
1248 */
1249
1250int
1251xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1252 if (ctxt->instate == XML_PARSER_EOF)
1253 return(0);
1254
1255 if (ctxt->token != 0) {
1256 *len = 0;
1257 return(ctxt->token);
1258 }
1259 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1260 *len = 1;
1261 return((int) *ctxt->input->cur);
1262 }
1263 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1264 /*
1265 * We are supposed to handle UTF8, check it's valid
1266 * From rfc2044: encoding of the Unicode values on UTF-8:
1267 *
1268 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1269 * 0000 0000-0000 007F 0xxxxxxx
1270 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1271 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1272 *
1273 * Check for the 0x110000 limit too
1274 */
1275 const unsigned char *cur = ctxt->input->cur;
1276 unsigned char c;
1277 unsigned int val;
1278
1279 c = *cur;
1280 if (c & 0x80) {
1281 if (cur[1] == 0)
1282 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1283 if ((cur[1] & 0xc0) != 0x80)
1284 goto encoding_error;
1285 if ((c & 0xe0) == 0xe0) {
1286
1287 if (cur[2] == 0)
1288 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1289 if ((cur[2] & 0xc0) != 0x80)
1290 goto encoding_error;
1291 if ((c & 0xf0) == 0xf0) {
1292 if (cur[3] == 0)
1293 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1294 if (((c & 0xf8) != 0xf0) ||
1295 ((cur[3] & 0xc0) != 0x80))
1296 goto encoding_error;
1297 /* 4-byte code */
1298 *len = 4;
1299 val = (cur[0] & 0x7) << 18;
1300 val |= (cur[1] & 0x3f) << 12;
1301 val |= (cur[2] & 0x3f) << 6;
1302 val |= cur[3] & 0x3f;
1303 } else {
1304 /* 3-byte code */
1305 *len = 3;
1306 val = (cur[0] & 0xf) << 12;
1307 val |= (cur[1] & 0x3f) << 6;
1308 val |= cur[2] & 0x3f;
1309 }
1310 } else {
1311 /* 2-byte code */
1312 *len = 2;
1313 val = (cur[0] & 0x1f) << 6;
1314 val |= cur[1] & 0x3f;
1315 }
1316 if (!IS_CHAR(val)) {
1317 if ((ctxt->sax != NULL) &&
1318 (ctxt->sax->error != NULL))
1319 ctxt->sax->error(ctxt->userData,
1320 "Char 0x%X out of allowed range\n", val);
1321 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1322 ctxt->wellFormed = 0;
1323 ctxt->disableSAX = 1;
1324 }
1325 return(val);
1326 } else {
1327 /* 1-byte code */
1328 *len = 1;
1329 if (*ctxt->input->cur == 0xD) {
1330 if (ctxt->input->cur[1] == 0xA) {
1331 ctxt->nbChars++;
1332 ctxt->input->cur++;
1333 }
1334 return(0xA);
1335 }
1336 return((int) *ctxt->input->cur);
1337 }
1338 }
1339 /*
1340 * Assume it's a fixed lenght encoding (1) with
1341 * a compatibke encoding for the ASCII set, since
1342 * XML constructs only use < 128 chars
1343 */
1344 *len = 1;
1345 if (*ctxt->input->cur == 0xD) {
1346 if (ctxt->input->cur[1] == 0xA) {
1347 ctxt->nbChars++;
1348 ctxt->input->cur++;
1349 }
1350 return(0xA);
1351 }
1352 return((int) *ctxt->input->cur);
1353encoding_error:
1354 /*
1355 * If we detect an UTF8 error that probably mean that the
1356 * input encoding didn't get properly advertized in the
1357 * declaration header. Report the error and switch the encoding
1358 * to ISO-Latin-1 (if you don't like this policy, just declare the
1359 * encoding !)
1360 */
1361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1362 ctxt->sax->error(ctxt->userData,
1363 "Input is not proper UTF-8, indicate encoding !\n");
1364 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1365 ctxt->input->cur[0], ctxt->input->cur[1],
1366 ctxt->input->cur[2], ctxt->input->cur[3]);
1367 }
1368 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1369
1370 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1371 *len = 1;
1372 return((int) *ctxt->input->cur);
1373}
1374
1375/**
1376 * xmlStringCurrentChar:
1377 * @ctxt: the XML parser context
1378 * @cur: pointer to the beginning of the char
1379 * @len: pointer to the length of the char read
1380 *
1381 * The current char value, if using UTF-8 this may actaully span multiple
1382 * bytes in the input buffer.
1383 *
1384 * Returns the current char value and its lenght
1385 */
1386
1387int
1388xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1389 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1390 /*
1391 * We are supposed to handle UTF8, check it's valid
1392 * From rfc2044: encoding of the Unicode values on UTF-8:
1393 *
1394 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1395 * 0000 0000-0000 007F 0xxxxxxx
1396 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1397 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1398 *
1399 * Check for the 0x110000 limit too
1400 */
1401 unsigned char c;
1402 unsigned int val;
1403
1404 c = *cur;
1405 if (c & 0x80) {
1406 if ((cur[1] & 0xc0) != 0x80)
1407 goto encoding_error;
1408 if ((c & 0xe0) == 0xe0) {
1409
1410 if ((cur[2] & 0xc0) != 0x80)
1411 goto encoding_error;
1412 if ((c & 0xf0) == 0xf0) {
1413 if (((c & 0xf8) != 0xf0) ||
1414 ((cur[3] & 0xc0) != 0x80))
1415 goto encoding_error;
1416 /* 4-byte code */
1417 *len = 4;
1418 val = (cur[0] & 0x7) << 18;
1419 val |= (cur[1] & 0x3f) << 12;
1420 val |= (cur[2] & 0x3f) << 6;
1421 val |= cur[3] & 0x3f;
1422 } else {
1423 /* 3-byte code */
1424 *len = 3;
1425 val = (cur[0] & 0xf) << 12;
1426 val |= (cur[1] & 0x3f) << 6;
1427 val |= cur[2] & 0x3f;
1428 }
1429 } else {
1430 /* 2-byte code */
1431 *len = 2;
1432 val = (cur[0] & 0x1f) << 6;
1433 val |= cur[2] & 0x3f;
1434 }
1435 if (!IS_CHAR(val)) {
1436 if ((ctxt->sax != NULL) &&
1437 (ctxt->sax->error != NULL))
1438 ctxt->sax->error(ctxt->userData,
1439 "Char 0x%X out of allowed range\n", val);
1440 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1441 ctxt->wellFormed = 0;
1442 ctxt->disableSAX = 1;
1443 }
1444 return(val);
1445 } else {
1446 /* 1-byte code */
1447 *len = 1;
1448 return((int) *cur);
1449 }
1450 }
1451 /*
1452 * Assume it's a fixed lenght encoding (1) with
1453 * a compatibke encoding for the ASCII set, since
1454 * XML constructs only use < 128 chars
1455 */
1456 *len = 1;
1457 return((int) *cur);
1458encoding_error:
1459 /*
1460 * If we detect an UTF8 error that probably mean that the
1461 * input encoding didn't get properly advertized in the
1462 * declaration header. Report the error and switch the encoding
1463 * to ISO-Latin-1 (if you don't like this policy, just declare the
1464 * encoding !)
1465 */
1466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1467 ctxt->sax->error(ctxt->userData,
1468 "Input is not proper UTF-8, indicate encoding !\n");
1469 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1470 ctxt->input->cur[0], ctxt->input->cur[1],
1471 ctxt->input->cur[2], ctxt->input->cur[3]);
1472 }
1473 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1474
1475 *len = 1;
1476 return((int) *cur);
1477}
1478
1479/**
1480 * xmlCopyChar:
1481 * @len: pointer to the length of the char read (or zero)
1482 * @array: pointer to an arry of xmlChar
1483 * @val: the char value
1484 *
1485 * append the char value in the array
1486 *
1487 * Returns the number of xmlChar written
1488 */
1489
1490int
1491xmlCopyChar(int len, xmlChar *out, int val) {
1492 /*
1493 * We are supposed to handle UTF8, check it's valid
1494 * From rfc2044: encoding of the Unicode values on UTF-8:
1495 *
1496 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1497 * 0000 0000-0000 007F 0xxxxxxx
1498 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1499 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1500 */
1501 if (len == 0) {
1502 if (val < 0) len = 0;
1503 else if (val < 0x80) len = 1;
1504 else if (val < 0x800) len = 2;
1505 else if (val < 0x10000) len = 3;
1506 else if (val < 0x110000) len = 4;
1507 if (len == 0) {
1508 xmlGenericError(xmlGenericErrorContext,
1509 "Internal error, xmlCopyChar 0x%X out of bound\n",
1510 val);
1511 return(0);
1512 }
1513 }
1514 if (len > 1) {
1515 int bits;
1516
1517 if (val < 0x80) { *out++= val; bits= -6; }
1518 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1519 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1520 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1521
1522 for ( ; bits >= 0; bits-= 6)
1523 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1524
1525 return(len);
1526 }
1527 *out = (xmlChar) val;
1528 return(1);
1529}
1530
1531/************************************************************************
1532 * *
1533 * Commodity functions to switch encodings *
1534 * *
1535 ************************************************************************/
1536
1537/**
1538 * xmlSwitchEncoding:
1539 * @ctxt: the parser context
1540 * @enc: the encoding value (number)
1541 *
1542 * change the input functions when discovering the character encoding
1543 * of a given entity.
1544 *
1545 * Returns 0 in case of success, -1 otherwise
1546 */
1547int
1548xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1549{
1550 xmlCharEncodingHandlerPtr handler;
1551
1552 switch (enc) {
1553 case XML_CHAR_ENCODING_ERROR:
1554 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1556 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1557 ctxt->wellFormed = 0;
1558 ctxt->disableSAX = 1;
1559 break;
1560 case XML_CHAR_ENCODING_NONE:
1561 /* let's assume it's UTF-8 without the XML decl */
1562 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1563 return(0);
1564 case XML_CHAR_ENCODING_UTF8:
1565 /* default encoding, no conversion should be needed */
1566 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1567 return(0);
1568 default:
1569 break;
1570 }
1571 handler = xmlGetCharEncodingHandler(enc);
1572 if (handler == NULL) {
1573 /*
1574 * Default handlers.
1575 */
1576 switch (enc) {
1577 case XML_CHAR_ENCODING_ERROR:
1578 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1580 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1581 ctxt->wellFormed = 0;
1582 ctxt->disableSAX = 1;
1583 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1584 break;
1585 case XML_CHAR_ENCODING_NONE:
1586 /* let's assume it's UTF-8 without the XML decl */
1587 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1588 return(0);
1589 case XML_CHAR_ENCODING_UTF8:
1590 case XML_CHAR_ENCODING_ASCII:
1591 /* default encoding, no conversion should be needed */
1592 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1593 return(0);
1594 case XML_CHAR_ENCODING_UTF16LE:
1595 break;
1596 case XML_CHAR_ENCODING_UTF16BE:
1597 break;
1598 case XML_CHAR_ENCODING_UCS4LE:
1599 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1601 ctxt->sax->error(ctxt->userData,
1602 "char encoding USC4 little endian not supported\n");
1603 break;
1604 case XML_CHAR_ENCODING_UCS4BE:
1605 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607 ctxt->sax->error(ctxt->userData,
1608 "char encoding USC4 big endian not supported\n");
1609 break;
1610 case XML_CHAR_ENCODING_EBCDIC:
1611 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1613 ctxt->sax->error(ctxt->userData,
1614 "char encoding EBCDIC not supported\n");
1615 break;
1616 case XML_CHAR_ENCODING_UCS4_2143:
1617 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1619 ctxt->sax->error(ctxt->userData,
1620 "char encoding UCS4 2143 not supported\n");
1621 break;
1622 case XML_CHAR_ENCODING_UCS4_3412:
1623 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1625 ctxt->sax->error(ctxt->userData,
1626 "char encoding UCS4 3412 not supported\n");
1627 break;
1628 case XML_CHAR_ENCODING_UCS2:
1629 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1631 ctxt->sax->error(ctxt->userData,
1632 "char encoding UCS2 not supported\n");
1633 break;
1634 case XML_CHAR_ENCODING_8859_1:
1635 case XML_CHAR_ENCODING_8859_2:
1636 case XML_CHAR_ENCODING_8859_3:
1637 case XML_CHAR_ENCODING_8859_4:
1638 case XML_CHAR_ENCODING_8859_5:
1639 case XML_CHAR_ENCODING_8859_6:
1640 case XML_CHAR_ENCODING_8859_7:
1641 case XML_CHAR_ENCODING_8859_8:
1642 case XML_CHAR_ENCODING_8859_9:
1643 /*
1644 * We used to keep the internal content in the
1645 * document encoding however this turns being unmaintainable
1646 * So xmlGetCharEncodingHandler() will return non-null
1647 * values for this now.
1648 */
1649 if ((ctxt->inputNr == 1) &&
1650 (ctxt->encoding == NULL) &&
1651 (ctxt->input->encoding != NULL)) {
1652 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1653 }
1654 ctxt->charset = enc;
1655 return(0);
1656 case XML_CHAR_ENCODING_2022_JP:
1657 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1659 ctxt->sax->error(ctxt->userData,
1660 "char encoding ISO-2022-JPnot supported\n");
1661 break;
1662 case XML_CHAR_ENCODING_SHIFT_JIS:
1663 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1665 ctxt->sax->error(ctxt->userData,
1666 "char encoding Shift_JIS not supported\n");
1667 break;
1668 case XML_CHAR_ENCODING_EUC_JP:
1669 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1671 ctxt->sax->error(ctxt->userData,
1672 "char encoding EUC-JPnot supported\n");
1673 break;
1674 }
1675 }
1676 if (handler == NULL)
1677 return(-1);
1678 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1679 return(xmlSwitchToEncoding(ctxt, handler));
1680}
1681
1682/**
1683 * xmlSwitchToEncoding:
1684 * @ctxt: the parser context
1685 * @handler: the encoding handler
1686 *
1687 * change the input functions when discovering the character encoding
1688 * of a given entity.
1689 *
1690 * Returns 0 in case of success, -1 otherwise
1691 */
1692int
1693xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1694{
1695 int nbchars;
1696
1697 if (handler != NULL) {
1698 if (ctxt->input != NULL) {
1699 if (ctxt->input->buf != NULL) {
1700 if (ctxt->input->buf->encoder != NULL) {
1701 if (ctxt->input->buf->encoder == handler)
1702 return(0);
1703 /*
1704 * Note: this is a bit dangerous, but that's what it
1705 * takes to use nearly compatible signature for different
1706 * encodings.
1707 */
1708 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1709 ctxt->input->buf->encoder = handler;
1710 return(0);
1711 }
1712 ctxt->input->buf->encoder = handler;
1713
1714 /*
1715 * Is there already some content down the pipe to convert ?
1716 */
1717 if ((ctxt->input->buf->buffer != NULL) &&
1718 (ctxt->input->buf->buffer->use > 0)) {
1719 int processed;
1720
1721 /*
1722 * Specific handling of the Byte Order Mark for
1723 * UTF-16
1724 */
1725 if ((handler->name != NULL) &&
1726 (!strcmp(handler->name, "UTF-16LE")) &&
1727 (ctxt->input->cur[0] == 0xFF) &&
1728 (ctxt->input->cur[1] == 0xFE)) {
1729 ctxt->input->cur += 2;
1730 }
1731 if ((handler->name != NULL) &&
1732 (!strcmp(handler->name, "UTF-16BE")) &&
1733 (ctxt->input->cur[0] == 0xFE) &&
1734 (ctxt->input->cur[1] == 0xFF)) {
1735 ctxt->input->cur += 2;
1736 }
1737
1738 /*
1739 * Shring the current input buffer.
1740 * Move it as the raw buffer and create a new input buffer
1741 */
1742 processed = ctxt->input->cur - ctxt->input->base;
1743 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1744 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1745 ctxt->input->buf->buffer = xmlBufferCreate();
1746
1747 if (ctxt->html) {
1748 /*
1749 * converst as much as possbile of the buffer
1750 */
1751 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1752 ctxt->input->buf->buffer,
1753 ctxt->input->buf->raw);
1754 } else {
1755 /*
1756 * convert just enough to get
1757 * '<?xml version="1.0" encoding="xxx"?>'
1758 * parsed with the autodetected encoding
1759 * into the parser reading buffer.
1760 */
1761 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1762 ctxt->input->buf->buffer,
1763 ctxt->input->buf->raw);
1764 }
1765 if (nbchars < 0) {
1766 xmlGenericError(xmlGenericErrorContext,
1767 "xmlSwitchToEncoding: encoder error\n");
1768 return(-1);
1769 }
1770 ctxt->input->base =
1771 ctxt->input->cur = ctxt->input->buf->buffer->content;
1772
1773 }
1774 return(0);
1775 } else {
1776 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1777 /*
1778 * When parsing a static memory array one must know the
1779 * size to be able to convert the buffer.
1780 */
1781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1782 ctxt->sax->error(ctxt->userData,
1783 "xmlSwitchEncoding : no input\n");
1784 return(-1);
1785 } else {
1786 int processed;
1787
1788 /*
1789 * Shring the current input buffer.
1790 * Move it as the raw buffer and create a new input buffer
1791 */
1792 processed = ctxt->input->cur - ctxt->input->base;
1793
1794 ctxt->input->buf->raw = xmlBufferCreate();
1795 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1796 ctxt->input->length - processed);
1797 ctxt->input->buf->buffer = xmlBufferCreate();
1798
1799 /*
1800 * convert as much as possible of the raw input
1801 * to the parser reading buffer.
1802 */
1803 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1804 ctxt->input->buf->buffer,
1805 ctxt->input->buf->raw);
1806 if (nbchars < 0) {
1807 xmlGenericError(xmlGenericErrorContext,
1808 "xmlSwitchToEncoding: encoder error\n");
1809 return(-1);
1810 }
1811
1812 /*
1813 * Conversion succeeded, get rid of the old buffer
1814 */
1815 if ((ctxt->input->free != NULL) &&
1816 (ctxt->input->base != NULL))
1817 ctxt->input->free((xmlChar *) ctxt->input->base);
1818 ctxt->input->base =
1819 ctxt->input->cur = ctxt->input->buf->buffer->content;
1820 }
1821 }
1822 } else {
1823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1824 ctxt->sax->error(ctxt->userData,
1825 "xmlSwitchEncoding : no input\n");
1826 return(-1);
1827 }
1828 /*
1829 * The parsing is now done in UTF8 natively
1830 */
1831 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1832 } else
1833 return(-1);
1834 return(0);
1835
1836}
1837
1838/************************************************************************
1839 * *
1840 * Commodity functions to handle entities processing *
1841 * *
1842 ************************************************************************/
1843
1844/**
1845 * xmlFreeInputStream:
1846 * @input: an xmlParserInputPtr
1847 *
1848 * Free up an input stream.
1849 */
1850void
1851xmlFreeInputStream(xmlParserInputPtr input) {
1852 if (input == NULL) return;
1853
1854 if (input->filename != NULL) xmlFree((char *) input->filename);
1855 if (input->directory != NULL) xmlFree((char *) input->directory);
1856 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1857 if (input->version != NULL) xmlFree((char *) input->version);
1858 if ((input->free != NULL) && (input->base != NULL))
1859 input->free((xmlChar *) input->base);
1860 if (input->buf != NULL)
1861 xmlFreeParserInputBuffer(input->buf);
1862 memset(input, -1, sizeof(xmlParserInput));
1863 xmlFree(input);
1864}
1865
1866/**
1867 * xmlNewInputStream:
1868 * @ctxt: an XML parser context
1869 *
1870 * Create a new input stream structure
1871 * Returns the new input stream or NULL
1872 */
1873xmlParserInputPtr
1874xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1875 xmlParserInputPtr input;
1876
1877 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1878 if (input == NULL) {
1879 if (ctxt != NULL) {
1880 ctxt->errNo = XML_ERR_NO_MEMORY;
1881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1882 ctxt->sax->error(ctxt->userData,
1883 "malloc: couldn't allocate a new input stream\n");
1884 ctxt->errNo = XML_ERR_NO_MEMORY;
1885 }
1886 return(NULL);
1887 }
1888 memset(input, 0, sizeof(xmlParserInput));
1889 input->line = 1;
1890 input->col = 1;
1891 input->standalone = -1;
1892 return(input);
1893}
1894
1895/**
1896 * xmlNewIOInputStream:
1897 * @ctxt: an XML parser context
1898 * @input: an I/O Input
1899 * @enc: the charset encoding if known
1900 *
1901 * Create a new input stream structure encapsulating the @input into
1902 * a stream suitable for the parser.
1903 *
1904 * Returns the new input stream or NULL
1905 */
1906xmlParserInputPtr
1907xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1908 xmlCharEncoding enc) {
1909 xmlParserInputPtr inputStream;
1910
1911 if (xmlParserDebugEntities)
1912 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1913 inputStream = xmlNewInputStream(ctxt);
1914 if (inputStream == NULL) {
1915 return(NULL);
1916 }
1917 inputStream->filename = NULL;
1918 inputStream->buf = input;
1919 inputStream->base = inputStream->buf->buffer->content;
1920 inputStream->cur = inputStream->buf->buffer->content;
1921 if (enc != XML_CHAR_ENCODING_NONE) {
1922 xmlSwitchEncoding(ctxt, enc);
1923 }
1924
1925 return(inputStream);
1926}
1927
1928/**
1929 * xmlNewEntityInputStream:
1930 * @ctxt: an XML parser context
1931 * @entity: an Entity pointer
1932 *
1933 * Create a new input stream based on an xmlEntityPtr
1934 *
1935 * Returns the new input stream or NULL
1936 */
1937xmlParserInputPtr
1938xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1939 xmlParserInputPtr input;
1940
1941 if (entity == NULL) {
1942 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1944 ctxt->sax->error(ctxt->userData,
1945 "internal: xmlNewEntityInputStream entity = NULL\n");
1946 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1947 return(NULL);
1948 }
1949 if (xmlParserDebugEntities)
1950 xmlGenericError(xmlGenericErrorContext,
1951 "new input from entity: %s\n", entity->name);
1952 if (entity->content == NULL) {
1953 switch (entity->etype) {
1954 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1955 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1957 ctxt->sax->error(ctxt->userData,
1958 "xmlNewEntityInputStream unparsed entity !\n");
1959 break;
1960 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1961 case XML_EXTERNAL_PARAMETER_ENTITY:
1962 return(xmlLoadExternalEntity((char *) entity->URI,
1963 (char *) entity->ExternalID, ctxt));
1964 case XML_INTERNAL_GENERAL_ENTITY:
1965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1966 ctxt->sax->error(ctxt->userData,
1967 "Internal entity %s without content !\n", entity->name);
1968 break;
1969 case XML_INTERNAL_PARAMETER_ENTITY:
1970 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1972 ctxt->sax->error(ctxt->userData,
1973 "Internal parameter entity %s without content !\n", entity->name);
1974 break;
1975 case XML_INTERNAL_PREDEFINED_ENTITY:
1976 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1978 ctxt->sax->error(ctxt->userData,
1979 "Predefined entity %s without content !\n", entity->name);
1980 break;
1981 }
1982 return(NULL);
1983 }
1984 input = xmlNewInputStream(ctxt);
1985 if (input == NULL) {
1986 return(NULL);
1987 }
1988 input->filename = (char *) entity->URI;
1989 input->base = entity->content;
1990 input->cur = entity->content;
1991 input->length = entity->length;
1992 return(input);
1993}
1994
1995/**
1996 * xmlNewStringInputStream:
1997 * @ctxt: an XML parser context
1998 * @buffer: an memory buffer
1999 *
2000 * Create a new input stream based on a memory buffer.
2001 * Returns the new input stream
2002 */
2003xmlParserInputPtr
2004xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2005 xmlParserInputPtr input;
2006
2007 if (buffer == NULL) {
2008 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2010 ctxt->sax->error(ctxt->userData,
2011 "internal: xmlNewStringInputStream string = NULL\n");
2012 return(NULL);
2013 }
2014 if (xmlParserDebugEntities)
2015 xmlGenericError(xmlGenericErrorContext,
2016 "new fixed input: %.30s\n", buffer);
2017 input = xmlNewInputStream(ctxt);
2018 if (input == NULL) {
2019 return(NULL);
2020 }
2021 input->base = buffer;
2022 input->cur = buffer;
2023 input->length = xmlStrlen(buffer);
2024 return(input);
2025}
2026
2027/**
2028 * xmlNewInputFromFile:
2029 * @ctxt: an XML parser context
2030 * @filename: the filename to use as entity
2031 *
2032 * Create a new input stream based on a file.
2033 *
2034 * Returns the new input stream or NULL in case of error
2035 */
2036xmlParserInputPtr
2037xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2038 xmlParserInputBufferPtr buf;
2039 xmlParserInputPtr inputStream;
2040 char *directory = NULL;
2041 xmlChar *URI = NULL;
2042
2043 if (xmlParserDebugEntities)
2044 xmlGenericError(xmlGenericErrorContext,
2045 "new input from file: %s\n", filename);
2046 if (ctxt == NULL) return(NULL);
2047 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2048 if (buf == NULL)
2049 return(NULL);
2050
2051 URI = xmlStrdup((xmlChar *) filename);
2052 directory = xmlParserGetDirectory((const char *) URI);
2053
2054 inputStream = xmlNewInputStream(ctxt);
2055 if (inputStream == NULL) {
2056 if (directory != NULL) xmlFree((char *) directory);
2057 if (URI != NULL) xmlFree((char *) URI);
2058 return(NULL);
2059 }
2060
2061 inputStream->filename = (const char *) URI;
2062 inputStream->directory = directory;
2063 inputStream->buf = buf;
2064
2065 inputStream->base = inputStream->buf->buffer->content;
2066 inputStream->cur = inputStream->buf->buffer->content;
2067 if ((ctxt->directory == NULL) && (directory != NULL))
2068 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2069 return(inputStream);
2070}
2071
2072/************************************************************************
2073 * *
2074 * Commodity functions to handle parser contexts *
2075 * *
2076 ************************************************************************/
2077
2078/**
2079 * xmlInitParserCtxt:
2080 * @ctxt: an XML parser context
2081 *
2082 * Initialize a parser context
2083 */
2084
2085void
2086xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2087{
2088 xmlSAXHandler *sax;
2089
2090 xmlDefaultSAXHandlerInit();
2091
2092 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2093 if (sax == NULL) {
2094 xmlGenericError(xmlGenericErrorContext,
2095 "xmlInitParserCtxt: out of memory\n");
2096 }
2097 else
2098 memset(sax, 0, sizeof(xmlSAXHandler));
2099
2100 /* Allocate the Input stack */
2101 ctxt->inputTab = (xmlParserInputPtr *)
2102 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2103 if (ctxt->inputTab == NULL) {
2104 xmlGenericError(xmlGenericErrorContext,
2105 "xmlInitParserCtxt: out of memory\n");
2106 ctxt->inputNr = 0;
2107 ctxt->inputMax = 0;
2108 ctxt->input = NULL;
2109 return;
2110 }
2111 ctxt->inputNr = 0;
2112 ctxt->inputMax = 5;
2113 ctxt->input = NULL;
2114
2115 ctxt->version = NULL;
2116 ctxt->encoding = NULL;
2117 ctxt->standalone = -1;
2118 ctxt->hasExternalSubset = 0;
2119 ctxt->hasPErefs = 0;
2120 ctxt->html = 0;
2121 ctxt->external = 0;
2122 ctxt->instate = XML_PARSER_START;
2123 ctxt->token = 0;
2124 ctxt->directory = NULL;
2125
2126 /* Allocate the Node stack */
2127 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2128 if (ctxt->nodeTab == NULL) {
2129 xmlGenericError(xmlGenericErrorContext,
2130 "xmlInitParserCtxt: out of memory\n");
2131 ctxt->nodeNr = 0;
2132 ctxt->nodeMax = 0;
2133 ctxt->node = NULL;
2134 ctxt->inputNr = 0;
2135 ctxt->inputMax = 0;
2136 ctxt->input = NULL;
2137 return;
2138 }
2139 ctxt->nodeNr = 0;
2140 ctxt->nodeMax = 10;
2141 ctxt->node = NULL;
2142
2143 /* Allocate the Name stack */
2144 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2145 if (ctxt->nameTab == NULL) {
2146 xmlGenericError(xmlGenericErrorContext,
2147 "xmlInitParserCtxt: out of memory\n");
2148 ctxt->nodeNr = 0;
2149 ctxt->nodeMax = 0;
2150 ctxt->node = NULL;
2151 ctxt->inputNr = 0;
2152 ctxt->inputMax = 0;
2153 ctxt->input = NULL;
2154 ctxt->nameNr = 0;
2155 ctxt->nameMax = 0;
2156 ctxt->name = NULL;
2157 return;
2158 }
2159 ctxt->nameNr = 0;
2160 ctxt->nameMax = 10;
2161 ctxt->name = NULL;
2162
2163 /* Allocate the space stack */
2164 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2165 if (ctxt->spaceTab == NULL) {
2166 xmlGenericError(xmlGenericErrorContext,
2167 "xmlInitParserCtxt: out of memory\n");
2168 ctxt->nodeNr = 0;
2169 ctxt->nodeMax = 0;
2170 ctxt->node = NULL;
2171 ctxt->inputNr = 0;
2172 ctxt->inputMax = 0;
2173 ctxt->input = NULL;
2174 ctxt->nameNr = 0;
2175 ctxt->nameMax = 0;
2176 ctxt->name = NULL;
2177 ctxt->spaceNr = 0;
2178 ctxt->spaceMax = 0;
2179 ctxt->space = NULL;
2180 return;
2181 }
2182 ctxt->spaceNr = 1;
2183 ctxt->spaceMax = 10;
2184 ctxt->spaceTab[0] = -1;
2185 ctxt->space = &ctxt->spaceTab[0];
2186
2187 if (sax == NULL) {
2188 ctxt->sax = &xmlDefaultSAXHandler;
2189 } else {
2190 ctxt->sax = sax;
2191 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2192 }
2193 ctxt->userData = ctxt;
2194 ctxt->myDoc = NULL;
2195 ctxt->wellFormed = 1;
2196 ctxt->valid = 1;
2197 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2198 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2199 ctxt->pedantic = xmlPedanticParserDefaultValue;
2200 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2201 ctxt->vctxt.userData = ctxt;
2202 if (ctxt->validate) {
2203 ctxt->vctxt.error = xmlParserValidityError;
2204 if (xmlGetWarningsDefaultValue == 0)
2205 ctxt->vctxt.warning = NULL;
2206 else
2207 ctxt->vctxt.warning = xmlParserValidityWarning;
2208 /* Allocate the Node stack */
2209 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2210 if (ctxt->vctxt.nodeTab == NULL) {
2211 xmlGenericError(xmlGenericErrorContext,
2212 "xmlInitParserCtxt: out of memory\n");
2213 ctxt->vctxt.nodeMax = 0;
2214 ctxt->validate = 0;
2215 ctxt->vctxt.error = NULL;
2216 ctxt->vctxt.warning = NULL;
2217 } else {
2218 ctxt->vctxt.nodeNr = 0;
2219 ctxt->vctxt.nodeMax = 4;
2220 ctxt->vctxt.node = NULL;
2221 }
2222 } else {
2223 ctxt->vctxt.error = NULL;
2224 ctxt->vctxt.warning = NULL;
2225 }
2226 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2227 ctxt->record_info = 0;
2228 ctxt->nbChars = 0;
2229 ctxt->checkIndex = 0;
2230 ctxt->inSubset = 0;
2231 ctxt->errNo = XML_ERR_OK;
2232 ctxt->depth = 0;
2233 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2234 xmlInitNodeInfoSeq(&ctxt->node_seq);
2235}
2236
2237/**
2238 * xmlFreeParserCtxt:
2239 * @ctxt: an XML parser context
2240 *
2241 * Free all the memory used by a parser context. However the parsed
2242 * document in ctxt->myDoc is not freed.
2243 */
2244
2245void
2246xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2247{
2248 xmlParserInputPtr input;
2249 xmlChar *oldname;
2250
2251 if (ctxt == NULL) return;
2252
2253 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2254 xmlFreeInputStream(input);
2255 }
2256 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2257 xmlFree(oldname);
2258 }
2259 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2260 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2261 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2262 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2263 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2264 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2265 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2266 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2267 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2268 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2269 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2270 xmlFree(ctxt->sax);
2271 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2272 xmlFree(ctxt);
2273}
2274
2275/**
2276 * xmlNewParserCtxt:
2277 *
2278 * Allocate and initialize a new parser context.
2279 *
2280 * Returns the xmlParserCtxtPtr or NULL
2281 */
2282
2283xmlParserCtxtPtr
2284xmlNewParserCtxt()
2285{
2286 xmlParserCtxtPtr ctxt;
2287
2288 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2289 if (ctxt == NULL) {
2290 xmlGenericError(xmlGenericErrorContext,
2291 "xmlNewParserCtxt : cannot allocate context\n");
2292 perror("malloc");
2293 return(NULL);
2294 }
2295 memset(ctxt, 0, sizeof(xmlParserCtxt));
2296 xmlInitParserCtxt(ctxt);
2297 return(ctxt);
2298}
2299
2300/************************************************************************
2301 * *
2302 * Handling of node informations *
2303 * *
2304 ************************************************************************/
2305
2306/**
2307 * xmlClearParserCtxt:
2308 * @ctxt: an XML parser context
2309 *
2310 * Clear (release owned resources) and reinitialize a parser context
2311 */
2312
2313void
2314xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2315{
2316 xmlClearNodeInfoSeq(&ctxt->node_seq);
2317 xmlInitParserCtxt(ctxt);
2318}
2319
2320/**
2321 * xmlParserFindNodeInfo:
2322 * @ctxt: an XML parser context
2323 * @node: an XML node within the tree
2324 *
2325 * Find the parser node info struct for a given node
2326 *
2327 * Returns an xmlParserNodeInfo block pointer or NULL
2328 */
2329const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2330 const xmlNode* node)
2331{
2332 unsigned long pos;
2333
2334 /* Find position where node should be at */
2335 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2336 if ( ctx->node_seq.buffer[pos].node == node )
2337 return &ctx->node_seq.buffer[pos];
2338 else
2339 return NULL;
2340}
2341
2342
2343/**
2344 * xmlInitNodeInfoSeq:
2345 * @seq: a node info sequence pointer
2346 *
2347 * -- Initialize (set to initial state) node info sequence
2348 */
2349void
2350xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2351{
2352 seq->length = 0;
2353 seq->maximum = 0;
2354 seq->buffer = NULL;
2355}
2356
2357/**
2358 * xmlClearNodeInfoSeq:
2359 * @seq: a node info sequence pointer
2360 *
2361 * -- Clear (release memory and reinitialize) node
2362 * info sequence
2363 */
2364void
2365xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2366{
2367 if ( seq->buffer != NULL )
2368 xmlFree(seq->buffer);
2369 xmlInitNodeInfoSeq(seq);
2370}
2371
2372
2373/**
2374 * xmlParserFindNodeInfoIndex:
2375 * @seq: a node info sequence pointer
2376 * @node: an XML node pointer
2377 *
2378 *
2379 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2380 * the given node is or should be at in a sorted sequence
2381 *
2382 * Returns a long indicating the position of the record
2383 */
2384unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2385 const xmlNode* node)
2386{
2387 unsigned long upper, lower, middle;
2388 int found = 0;
2389
2390 /* Do a binary search for the key */
2391 lower = 1;
2392 upper = seq->length;
2393 middle = 0;
2394 while ( lower <= upper && !found) {
2395 middle = lower + (upper - lower) / 2;
2396 if ( node == seq->buffer[middle - 1].node )
2397 found = 1;
2398 else if ( node < seq->buffer[middle - 1].node )
2399 upper = middle - 1;
2400 else
2401 lower = middle + 1;
2402 }
2403
2404 /* Return position */
2405 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2406 return middle;
2407 else
2408 return middle - 1;
2409}
2410
2411
2412/**
2413 * xmlParserAddNodeInfo:
2414 * @ctxt: an XML parser context
2415 * @info: a node info sequence pointer
2416 *
2417 * Insert node info record into the sorted sequence
2418 */
2419void
2420xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2421 const xmlParserNodeInfo* info)
2422{
2423 unsigned long pos;
2424 static unsigned int block_size = 5;
2425
2426 /* Find pos and check to see if node is already in the sequence */
2427 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2428 if ( pos < ctxt->node_seq.length
2429 && ctxt->node_seq.buffer[pos].node == info->node ) {
2430 ctxt->node_seq.buffer[pos] = *info;
2431 }
2432
2433 /* Otherwise, we need to add new node to buffer */
2434 else {
2435 /* Expand buffer by 5 if needed */
2436 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2437 xmlParserNodeInfo* tmp_buffer;
2438 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2439 *(ctxt->node_seq.maximum + block_size));
2440
2441 if ( ctxt->node_seq.buffer == NULL )
2442 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2443 else
2444 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2445
2446 if ( tmp_buffer == NULL ) {
2447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2449 ctxt->errNo = XML_ERR_NO_MEMORY;
2450 return;
2451 }
2452 ctxt->node_seq.buffer = tmp_buffer;
2453 ctxt->node_seq.maximum += block_size;
2454 }
2455
2456 /* If position is not at end, move elements out of the way */
2457 if ( pos != ctxt->node_seq.length ) {
2458 unsigned long i;
2459
2460 for ( i = ctxt->node_seq.length; i > pos; i-- )
2461 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2462 }
2463
2464 /* Copy element and increase length */
2465 ctxt->node_seq.buffer[pos] = *info;
2466 ctxt->node_seq.length++;
2467 }
2468}
2469
2470/************************************************************************
2471 * *
2472 * Deprecated functions kept for compatibility *
2473 * *
2474 ************************************************************************/
2475
2476/*
2477 * xmlCheckLanguageID
2478 * @lang: pointer to the string value
2479 *
2480 * Checks that the value conforms to the LanguageID production:
2481 *
2482 * NOTE: this is somewhat deprecated, those productions were removed from
2483 * the XML Second edition.
2484 *
2485 * [33] LanguageID ::= Langcode ('-' Subcode)*
2486 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2487 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2488 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2489 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2490 * [38] Subcode ::= ([a-z] | [A-Z])+
2491 *
2492 * Returns 1 if correct 0 otherwise
2493 **/
2494int
2495xmlCheckLanguageID(const xmlChar *lang) {
2496 const xmlChar *cur = lang;
2497
2498 if (cur == NULL)
2499 return(0);
2500 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2501 ((cur[0] == 'I') && (cur[1] == '-'))) {
2502 /*
2503 * IANA code
2504 */
2505 cur += 2;
2506 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2507 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2508 cur++;
2509 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2510 ((cur[0] == 'X') && (cur[1] == '-'))) {
2511 /*
2512 * User code
2513 */
2514 cur += 2;
2515 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2516 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2517 cur++;
2518 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2519 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2520 /*
2521 * ISO639
2522 */
2523 cur++;
2524 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2525 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2526 cur++;
2527 else
2528 return(0);
2529 } else
2530 return(0);
2531 while (cur[0] != 0) { /* non input consuming */
2532 if (cur[0] != '-')
2533 return(0);
2534 cur++;
2535 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2536 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2537 cur++;
2538 else
2539 return(0);
2540 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2541 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2542 cur++;
2543 }
2544 return(1);
2545}
2546
2547/**
2548 * xmlDecodeEntities:
2549 * @ctxt: the parser context
2550 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2551 * @len: the len to decode (in bytes !), -1 for no size limit
2552 * @end: an end marker xmlChar, 0 if none
2553 * @end2: an end marker xmlChar, 0 if none
2554 * @end3: an end marker xmlChar, 0 if none
2555 *
2556 * This function is deprecated, we now always process entities content
2557 * through xmlStringDecodeEntities
2558 *
2559 * TODO: remove it in next major release.
2560 *
2561 * [67] Reference ::= EntityRef | CharRef
2562 *
2563 * [69] PEReference ::= '%' Name ';'
2564 *
2565 * Returns A newly allocated string with the substitution done. The caller
2566 * must deallocate it !
2567 */
2568xmlChar *
2569xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2570 xmlChar end, xmlChar end2, xmlChar end3) {
2571#if 0
2572 xmlChar *buffer = NULL;
2573 unsigned int buffer_size = 0;
2574 unsigned int nbchars = 0;
2575
2576 xmlChar *current = NULL;
2577 xmlEntityPtr ent;
2578 unsigned int max = (unsigned int) len;
2579 int c,l;
2580#endif
2581
2582 static int deprecated = 0;
2583 if (!deprecated) {
2584 xmlGenericError(xmlGenericErrorContext,
2585 "xmlDecodeEntities() deprecated function reached\n");
2586 deprecated = 1;
2587 }
2588
2589#if 0
2590 if (ctxt->depth > 40) {
2591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2592 ctxt->sax->error(ctxt->userData,
2593 "Detected entity reference loop\n");
2594 ctxt->wellFormed = 0;
2595 ctxt->disableSAX = 1;
2596 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2597 return(NULL);
2598 }
2599
2600 /*
2601 * allocate a translation buffer.
2602 */
2603 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2604 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2605 if (buffer == NULL) {
2606 perror("xmlDecodeEntities: malloc failed");
2607 return(NULL);
2608 }
2609
2610 /*
2611 * Ok loop until we reach one of the ending char or a size limit.
2612 */
2613 GROW;
2614 c = CUR_CHAR(l);
2615 while ((nbchars < max) && (c != end) && /* NOTUSED */
2616 (c != end2) && (c != end3)) {
2617 GROW;
2618 if (c == 0) break;
2619 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2620 int val = xmlParseCharRef(ctxt);
2621 COPY_BUF(0,buffer,nbchars,val);
2622 NEXTL(l);
2623 } else if ((c == '&') && (ctxt->token != '&') &&
2624 (what & XML_SUBSTITUTE_REF)) {
2625 if (xmlParserDebugEntities)
2626 xmlGenericError(xmlGenericErrorContext,
2627 "decoding Entity Reference\n");
2628 ent = xmlParseEntityRef(ctxt);
2629 if ((ent != NULL) &&
2630 (ctxt->replaceEntities != 0)) {
2631 current = ent->content;
2632 while (*current != 0) { /* non input consuming loop */
2633 buffer[nbchars++] = *current++;
2634 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2635 growBuffer(buffer);
2636 }
2637 }
2638 } else if (ent != NULL) {
2639 const xmlChar *cur = ent->name;
2640
2641 buffer[nbchars++] = '&';
2642 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2643 growBuffer(buffer);
2644 }
2645 while (*cur != 0) { /* non input consuming loop */
2646 buffer[nbchars++] = *cur++;
2647 }
2648 buffer[nbchars++] = ';';
2649 }
2650 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2651 /*
2652 * a PEReference induce to switch the entity flow,
2653 * we break here to flush the current set of chars
2654 * parsed if any. We will be called back later.
2655 */
2656 if (xmlParserDebugEntities)
2657 xmlGenericError(xmlGenericErrorContext,
2658 "decoding PE Reference\n");
2659 if (nbchars != 0) break;
2660
2661 xmlParsePEReference(ctxt);
2662
2663 /*
2664 * Pop-up of finished entities.
2665 */
2666 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2667 xmlPopInput(ctxt);
2668
2669 break;
2670 } else {
2671 COPY_BUF(l,buffer,nbchars,c);
2672 NEXTL(l);
2673 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2674 growBuffer(buffer);
2675 }
2676 }
2677 c = CUR_CHAR(l);
2678 }
2679 buffer[nbchars++] = 0;
2680 return(buffer);
2681#endif
2682 return(NULL);
2683}
2684
2685/**
2686 * xmlNamespaceParseNCName:
2687 * @ctxt: an XML parser context
2688 *
2689 * parse an XML namespace name.
2690 *
2691 * TODO: this seems not in use anymore, the namespace handling is done on
2692 * top of the SAX interfaces, i.e. not on raw input.
2693 *
2694 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2695 *
2696 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2697 * CombiningChar | Extender
2698 *
2699 * Returns the namespace name or NULL
2700 */
2701
2702xmlChar *
2703xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2704#if 0
2705 xmlChar buf[XML_MAX_NAMELEN + 5];
2706 int len = 0, l;
2707 int cur = CUR_CHAR(l);
2708#endif
2709
2710 static int deprecated = 0;
2711 if (!deprecated) {
2712 xmlGenericError(xmlGenericErrorContext,
2713 "xmlNamespaceParseNCName() deprecated function reached\n");
2714 deprecated = 1;
2715 }
2716
2717#if 0
2718 /* load first the value of the char !!! */
2719 GROW;
2720 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2721
2722xmlGenericError(xmlGenericErrorContext,
2723 "xmlNamespaceParseNCName: reached loop 3\n");
2724 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2725 (cur == '.') || (cur == '-') ||
2726 (cur == '_') ||
2727 (IS_COMBINING(cur)) ||
2728 (IS_EXTENDER(cur))) {
2729 COPY_BUF(l,buf,len,cur);
2730 NEXTL(l);
2731 cur = CUR_CHAR(l);
2732 if (len >= XML_MAX_NAMELEN) {
2733 xmlGenericError(xmlGenericErrorContext,
2734 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2735 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2736 (cur == '.') || (cur == '-') ||
2737 (cur == '_') ||
2738 (IS_COMBINING(cur)) ||
2739 (IS_EXTENDER(cur))) {
2740 NEXTL(l);
2741 cur = CUR_CHAR(l);
2742 }
2743 break;
2744 }
2745 }
2746 return(xmlStrndup(buf, len));
2747#endif
2748 return(NULL);
2749}
2750
2751/**
2752 * xmlNamespaceParseQName:
2753 * @ctxt: an XML parser context
2754 * @prefix: a xmlChar **
2755 *
2756 * TODO: this seems not in use anymore, the namespace handling is done on
2757 * top of the SAX interfaces, i.e. not on raw input.
2758 *
2759 * parse an XML qualified name
2760 *
2761 * [NS 5] QName ::= (Prefix ':')? LocalPart
2762 *
2763 * [NS 6] Prefix ::= NCName
2764 *
2765 * [NS 7] LocalPart ::= NCName
2766 *
2767 * Returns the local part, and prefix is updated
2768 * to get the Prefix if any.
2769 */
2770
2771xmlChar *
2772xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2773
2774 static int deprecated = 0;
2775 if (!deprecated) {
2776 xmlGenericError(xmlGenericErrorContext,
2777 "xmlNamespaceParseQName() deprecated function reached\n");
2778 deprecated = 1;
2779 }
2780
2781#if 0
2782 xmlChar *ret = NULL;
2783
2784 *prefix = NULL;
2785 ret = xmlNamespaceParseNCName(ctxt);
2786 if (RAW == ':') {
2787 *prefix = ret;
2788 NEXT;
2789 ret = xmlNamespaceParseNCName(ctxt);
2790 }
2791
2792 return(ret);
2793#endif
2794 return(NULL);
2795}
2796
2797/**
2798 * xmlNamespaceParseNSDef:
2799 * @ctxt: an XML parser context
2800 *
2801 * parse a namespace prefix declaration
2802 *
2803 * TODO: this seems not in use anymore, the namespace handling is done on
2804 * top of the SAX interfaces, i.e. not on raw input.
2805 *
2806 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2807 *
2808 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2809 *
2810 * Returns the namespace name
2811 */
2812
2813xmlChar *
2814xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2815 static int deprecated = 0;
2816 if (!deprecated) {
2817 xmlGenericError(xmlGenericErrorContext,
2818 "xmlNamespaceParseNSDef() deprecated function reached\n");
2819 deprecated = 1;
2820 }
2821 return(NULL);
2822#if 0
2823 xmlChar *name = NULL;
2824
2825 if ((RAW == 'x') && (NXT(1) == 'm') &&
2826 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2827 (NXT(4) == 's')) {
2828 SKIP(5);
2829 if (RAW == ':') {
2830 NEXT;
2831 name = xmlNamespaceParseNCName(ctxt);
2832 }
2833 }
2834 return(name);
2835#endif
2836}
2837
2838/**
2839 * xmlParseQuotedString:
2840 * @ctxt: an XML parser context
2841 *
2842 * Parse and return a string between quotes or doublequotes
2843 *
2844 * TODO: Deprecated, to be removed at next drop of binary compatibility
2845 *
2846 * Returns the string parser or NULL.
2847 */
2848xmlChar *
2849xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2850 static int deprecated = 0;
2851 if (!deprecated) {
2852 xmlGenericError(xmlGenericErrorContext,
2853 "xmlParseQuotedString() deprecated function reached\n");
2854 deprecated = 1;
2855 }
2856 return(NULL);
2857
2858#if 0
2859 xmlChar *buf = NULL;
2860 int len = 0,l;
2861 int size = XML_PARSER_BUFFER_SIZE;
2862 int c;
2863
2864 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2865 if (buf == NULL) {
2866 xmlGenericError(xmlGenericErrorContext,
2867 "malloc of %d byte failed\n", size);
2868 return(NULL);
2869 }
2870xmlGenericError(xmlGenericErrorContext,
2871 "xmlParseQuotedString: reached loop 4\n");
2872 if (RAW == '"') {
2873 NEXT;
2874 c = CUR_CHAR(l);
2875 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2876 if (len + 5 >= size) {
2877 size *= 2;
2878 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2879 if (buf == NULL) {
2880 xmlGenericError(xmlGenericErrorContext,
2881 "realloc of %d byte failed\n", size);
2882 return(NULL);
2883 }
2884 }
2885 COPY_BUF(l,buf,len,c);
2886 NEXTL(l);
2887 c = CUR_CHAR(l);
2888 }
2889 if (c != '"') {
2890 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2892 ctxt->sax->error(ctxt->userData,
2893 "String not closed \"%.50s\"\n", buf);
2894 ctxt->wellFormed = 0;
2895 ctxt->disableSAX = 1;
2896 } else {
2897 NEXT;
2898 }
2899 } else if (RAW == '\''){
2900 NEXT;
2901 c = CUR;
2902 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2903 if (len + 1 >= size) {
2904 size *= 2;
2905 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2906 if (buf == NULL) {
2907 xmlGenericError(xmlGenericErrorContext,
2908 "realloc of %d byte failed\n", size);
2909 return(NULL);
2910 }
2911 }
2912 buf[len++] = c;
2913 NEXT;
2914 c = CUR;
2915 }
2916 if (RAW != '\'') {
2917 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2919 ctxt->sax->error(ctxt->userData,
2920 "String not closed \"%.50s\"\n", buf);
2921 ctxt->wellFormed = 0;
2922 ctxt->disableSAX = 1;
2923 } else {
2924 NEXT;
2925 }
2926 }
2927 return(buf);
2928#endif
2929}
2930
2931/**
2932 * xmlParseNamespace:
2933 * @ctxt: an XML parser context
2934 *
2935 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2936 *
2937 * This is what the older xml-name Working Draft specified, a bunch of
2938 * other stuff may still rely on it, so support is still here as
2939 * if it was declared on the root of the Tree:-(
2940 *
2941 * TODO: remove from library
2942 *
2943 * To be removed at next drop of binary compatibility
2944 */
2945
2946void
2947xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2948 static int deprecated = 0;
2949 if (!deprecated) {
2950 xmlGenericError(xmlGenericErrorContext,
2951 "xmlParseNamespace() deprecated function reached\n");
2952 deprecated = 1;
2953 }
2954
2955#if 0
2956 xmlChar *href = NULL;
2957 xmlChar *prefix = NULL;
2958 int garbage = 0;
2959
2960 /*
2961 * We just skipped "namespace" or "xml:namespace"
2962 */
2963 SKIP_BLANKS;
2964
2965xmlGenericError(xmlGenericErrorContext,
2966 "xmlParseNamespace: reached loop 5\n");
2967 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2968 /*
2969 * We can have "ns" or "prefix" attributes
2970 * Old encoding as 'href' or 'AS' attributes is still supported
2971 */
2972 if ((RAW == 'n') && (NXT(1) == 's')) {
2973 garbage = 0;
2974 SKIP(2);
2975 SKIP_BLANKS;
2976
2977 if (RAW != '=') continue;
2978 NEXT;
2979 SKIP_BLANKS;
2980
2981 href = xmlParseQuotedString(ctxt);
2982 SKIP_BLANKS;
2983 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2984 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2985 garbage = 0;
2986 SKIP(4);
2987 SKIP_BLANKS;
2988
2989 if (RAW != '=') continue;
2990 NEXT;
2991 SKIP_BLANKS;
2992
2993 href = xmlParseQuotedString(ctxt);
2994 SKIP_BLANKS;
2995 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2996 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2997 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2998 garbage = 0;
2999 SKIP(6);
3000 SKIP_BLANKS;
3001
3002 if (RAW != '=') continue;
3003 NEXT;
3004 SKIP_BLANKS;
3005
3006 prefix = xmlParseQuotedString(ctxt);
3007 SKIP_BLANKS;
3008 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3009 garbage = 0;
3010 SKIP(2);
3011 SKIP_BLANKS;
3012
3013 if (RAW != '=') continue;
3014 NEXT;
3015 SKIP_BLANKS;
3016
3017 prefix = xmlParseQuotedString(ctxt);
3018 SKIP_BLANKS;
3019 } else if ((RAW == '?') && (NXT(1) == '>')) {
3020 garbage = 0;
3021 NEXT;
3022 } else {
3023 /*
3024 * Found garbage when parsing the namespace
3025 */
3026 if (!garbage) {
3027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3028 ctxt->sax->error(ctxt->userData,
3029 "xmlParseNamespace found garbage\n");
3030 }
3031 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3032 ctxt->wellFormed = 0;
3033 ctxt->disableSAX = 1;
3034 NEXT;
3035 }
3036 }
3037
3038 MOVETO_ENDTAG(CUR_PTR);
3039 NEXT;
3040
3041 /*
3042 * Register the DTD.
3043 if (href != NULL)
3044 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3045 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3046 */
3047
3048 if (prefix != NULL) xmlFree(prefix);
3049 if (href != NULL) xmlFree(href);
3050#endif
3051}
3052
3053/**
3054 * xmlScanName:
3055 * @ctxt: an XML parser context
3056 *
3057 * Trickery: parse an XML name but without consuming the input flow
3058 * Needed for rollback cases. Used only when parsing entities references.
3059 *
3060 * TODO: seems deprecated now, only used in the default part of
3061 * xmlParserHandleReference
3062 *
3063 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3064 * CombiningChar | Extender
3065 *
3066 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3067 *
3068 * [6] Names ::= Name (S Name)*
3069 *
3070 * Returns the Name parsed or NULL
3071 */
3072
3073xmlChar *
3074xmlScanName(xmlParserCtxtPtr ctxt) {
3075 static int deprecated = 0;
3076 if (!deprecated) {
3077 xmlGenericError(xmlGenericErrorContext,
3078 "xmlScanName() deprecated function reached\n");
3079 deprecated = 1;
3080 }
3081 return(NULL);
3082
3083#if 0
3084 xmlChar buf[XML_MAX_NAMELEN];
3085 int len = 0;
3086
3087 GROW;
3088 if (!IS_LETTER(RAW) && (RAW != '_') &&
3089 (RAW != ':')) {
3090 return(NULL);
3091 }
3092
3093
3094 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3095 (NXT(len) == '.') || (NXT(len) == '-') ||
3096 (NXT(len) == '_') || (NXT(len) == ':') ||
3097 (IS_COMBINING(NXT(len))) ||
3098 (IS_EXTENDER(NXT(len)))) {
3099 GROW;
3100 buf[len] = NXT(len);
3101 len++;
3102 if (len >= XML_MAX_NAMELEN) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3105 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3106 (IS_DIGIT(NXT(len))) ||
3107 (NXT(len) == '.') || (NXT(len) == '-') ||
3108 (NXT(len) == '_') || (NXT(len) == ':') ||
3109 (IS_COMBINING(NXT(len))) ||
3110 (IS_EXTENDER(NXT(len))))
3111 len++;
3112 break;
3113 }
3114 }
3115 return(xmlStrndup(buf, len));
3116#endif
3117}
3118
3119/**
3120 * xmlParserHandleReference:
3121 * @ctxt: the parser context
3122 *
3123 * TODO: Remove, now deprecated ... the test is done directly in the
3124 * content parsing
3125 * routines.
3126 *
3127 * [67] Reference ::= EntityRef | CharRef
3128 *
3129 * [68] EntityRef ::= '&' Name ';'
3130 *
3131 * [ WFC: Entity Declared ]
3132 * the Name given in the entity reference must match that in an entity
3133 * declaration, except that well-formed documents need not declare any
3134 * of the following entities: amp, lt, gt, apos, quot.
3135 *
3136 * [ WFC: Parsed Entity ]
3137 * An entity reference must not contain the name of an unparsed entity
3138 *
3139 * [66] CharRef ::= '&#' [0-9]+ ';' |
3140 * '&#x' [0-9a-fA-F]+ ';'
3141 *
3142 * A PEReference may have been detectect in the current input stream
3143 * the handling is done accordingly to
3144 * http://www.w3.org/TR/REC-xml#entproc
3145 */
3146void
3147xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3148 static int deprecated = 0;
3149 if (!deprecated) {
3150 xmlGenericError(xmlGenericErrorContext,
3151 "xmlParserHandleReference() deprecated function reached\n");
3152 deprecated = 1;
3153 }
3154
3155#if 0
3156 xmlParserInputPtr input;
3157 xmlChar *name;
3158 xmlEntityPtr ent = NULL;
3159
3160 if (ctxt->token != 0) {
3161 return;
3162 }
3163 if (RAW != '&') return;
3164 GROW;
3165 if ((RAW == '&') && (NXT(1) == '#')) {
3166 switch(ctxt->instate) {
3167 case XML_PARSER_ENTITY_DECL:
3168 case XML_PARSER_PI:
3169 case XML_PARSER_CDATA_SECTION:
3170 case XML_PARSER_COMMENT:
3171 case XML_PARSER_SYSTEM_LITERAL:
3172 /* we just ignore it there */
3173 return;
3174 case XML_PARSER_START_TAG:
3175 return;
3176 case XML_PARSER_END_TAG:
3177 return;
3178 case XML_PARSER_EOF:
3179 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3181 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3182 ctxt->wellFormed = 0;
3183 ctxt->disableSAX = 1;
3184 return;
3185 case XML_PARSER_PROLOG:
3186 case XML_PARSER_START:
3187 case XML_PARSER_MISC:
3188 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3190 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3191 ctxt->wellFormed = 0;
3192 ctxt->disableSAX = 1;
3193 return;
3194 case XML_PARSER_EPILOG:
3195 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3197 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3198 ctxt->wellFormed = 0;
3199 ctxt->disableSAX = 1;
3200 return;
3201 case XML_PARSER_DTD:
3202 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3204 ctxt->sax->error(ctxt->userData,
3205 "CharRef are forbiden in DTDs!\n");
3206 ctxt->wellFormed = 0;
3207 ctxt->disableSAX = 1;
3208 return;
3209 case XML_PARSER_ENTITY_VALUE:
3210 /*
3211 * NOTE: in the case of entity values, we don't do the
3212 * substitution here since we need the literal
3213 * entity value to be able to save the internal
3214 * subset of the document.
3215 * This will be handled by xmlStringDecodeEntities
3216 */
3217 return;
3218 case XML_PARSER_CONTENT:
3219 return;
3220 case XML_PARSER_ATTRIBUTE_VALUE:
3221 /* ctxt->token = xmlParseCharRef(ctxt); */
3222 return;
3223 case XML_PARSER_IGNORE:
3224 return;
3225 }
3226 return;
3227 }
3228
3229 switch(ctxt->instate) {
3230 case XML_PARSER_CDATA_SECTION:
3231 return;
3232 case XML_PARSER_PI:
3233 case XML_PARSER_COMMENT:
3234 case XML_PARSER_SYSTEM_LITERAL:
3235 case XML_PARSER_CONTENT:
3236 return;
3237 case XML_PARSER_START_TAG:
3238 return;
3239 case XML_PARSER_END_TAG:
3240 return;
3241 case XML_PARSER_EOF:
3242 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3244 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3245 ctxt->wellFormed = 0;
3246 ctxt->disableSAX = 1;
3247 return;
3248 case XML_PARSER_PROLOG:
3249 case XML_PARSER_START:
3250 case XML_PARSER_MISC:
3251 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3254 ctxt->wellFormed = 0;
3255 ctxt->disableSAX = 1;
3256 return;
3257 case XML_PARSER_EPILOG:
3258 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3260 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3261 ctxt->wellFormed = 0;
3262 ctxt->disableSAX = 1;
3263 return;
3264 case XML_PARSER_ENTITY_VALUE:
3265 /*
3266 * NOTE: in the case of entity values, we don't do the
3267 * substitution here since we need the literal
3268 * entity value to be able to save the internal
3269 * subset of the document.
3270 * This will be handled by xmlStringDecodeEntities
3271 */
3272 return;
3273 case XML_PARSER_ATTRIBUTE_VALUE:
3274 /*
3275 * NOTE: in the case of attributes values, we don't do the
3276 * substitution here unless we are in a mode where
3277 * the parser is explicitely asked to substitute
3278 * entities. The SAX callback is called with values
3279 * without entity substitution.
3280 * This will then be handled by xmlStringDecodeEntities
3281 */
3282 return;
3283 case XML_PARSER_ENTITY_DECL:
3284 /*
3285 * we just ignore it there
3286 * the substitution will be done once the entity is referenced
3287 */
3288 return;
3289 case XML_PARSER_DTD:
3290 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3292 ctxt->sax->error(ctxt->userData,
3293 "Entity references are forbiden in DTDs!\n");
3294 ctxt->wellFormed = 0;
3295 ctxt->disableSAX = 1;
3296 return;
3297 case XML_PARSER_IGNORE:
3298 return;
3299 }
3300
3301/* TODO: this seems not reached anymore .... Verify ... */
3302xmlGenericError(xmlGenericErrorContext,
3303 "Reached deprecated section in xmlParserHandleReference()\n");
3304xmlGenericError(xmlGenericErrorContext,
3305 "Please forward the document to Daniel.Veillard@w3.org\n");
3306xmlGenericError(xmlGenericErrorContext,
3307 "indicating the version: %s, thanks !\n", xmlParserVersion);
3308 NEXT;
3309 name = xmlScanName(ctxt);
3310 if (name == NULL) {
3311 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3313 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3314 ctxt->wellFormed = 0;
3315 ctxt->disableSAX = 1;
3316 ctxt->token = '&';
3317 return;
3318 }
3319 if (NXT(xmlStrlen(name)) != ';') {
3320 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3322 ctxt->sax->error(ctxt->userData,
3323 "Entity reference: ';' expected\n");
3324 ctxt->wellFormed = 0;
3325 ctxt->disableSAX = 1;
3326 ctxt->token = '&';
3327 xmlFree(name);
3328 return;
3329 }
3330 SKIP(xmlStrlen(name) + 1);
3331 if (ctxt->sax != NULL) {
3332 if (ctxt->sax->getEntity != NULL)
3333 ent = ctxt->sax->getEntity(ctxt->userData, name);
3334 }
3335
3336 /*
3337 * [ WFC: Entity Declared ]
3338 * the Name given in the entity reference must match that in an entity
3339 * declaration, except that well-formed documents need not declare any
3340 * of the following entities: amp, lt, gt, apos, quot.
3341 */
3342 if (ent == NULL)
3343 ent = xmlGetPredefinedEntity(name);
3344 if (ent == NULL) {
3345 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3347 ctxt->sax->error(ctxt->userData,
3348 "Entity reference: entity %s not declared\n",
3349 name);
3350 ctxt->wellFormed = 0;
3351 ctxt->disableSAX = 1;
3352 xmlFree(name);
3353 return;
3354 }
3355
3356 /*
3357 * [ WFC: Parsed Entity ]
3358 * An entity reference must not contain the name of an unparsed entity
3359 */
3360 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3361 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3363 ctxt->sax->error(ctxt->userData,
3364 "Entity reference to unparsed entity %s\n", name);
3365 ctxt->wellFormed = 0;
3366 ctxt->disableSAX = 1;
3367 }
3368
3369 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3370 ctxt->token = ent->content[0];
3371 xmlFree(name);
3372 return;
3373 }
3374 input = xmlNewEntityInputStream(ctxt, ent);
3375 xmlPushInput(ctxt, input);
3376 xmlFree(name);
3377#endif
3378 return;
3379}
3380
3381/**
3382 * xmlHandleEntity:
3383 * @ctxt: an XML parser context
3384 * @entity: an XML entity pointer.
3385 *
3386 * Default handling of defined entities, when should we define a new input
3387 * stream ? When do we just handle that as a set of chars ?
3388 *
3389 * OBSOLETE: to be removed at some point.
3390 */
3391
3392void
3393xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3394 static int deprecated = 0;
3395 if (!deprecated) {
3396 xmlGenericError(xmlGenericErrorContext,
3397 "xmlHandleEntity() deprecated function reached\n");
3398 deprecated = 1;
3399 }
3400
3401#if 0
3402 int len;
3403 xmlParserInputPtr input;
3404
3405 if (entity->content == NULL) {
3406 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3409 entity->name);
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 return;
3413 }
3414 len = xmlStrlen(entity->content);
3415 if (len <= 2) goto handle_as_char;
3416
3417 /*
3418 * Redefine its content as an input stream.
3419 */
3420 input = xmlNewEntityInputStream(ctxt, entity);
3421 xmlPushInput(ctxt, input);
3422 return;
3423
3424handle_as_char:
3425 /*
3426 * Just handle the content as a set of chars.
3427 */
3428 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3429 (ctxt->sax->characters != NULL))
3430 ctxt->sax->characters(ctxt->userData, entity->content, len);
3431#endif
3432}
3433
3434/**
3435 * xmlNewGlobalNs:
3436 * @doc: the document carrying the namespace
3437 * @href: the URI associated
3438 * @prefix: the prefix for the namespace
3439 *
3440 * Creation of a Namespace, the old way using PI and without scoping
3441 * DEPRECATED !!!
3442 * It now create a namespace on the root element of the document if found.
3443 * Returns NULL this functionnality had been removed
3444 */
3445xmlNsPtr
3446xmlNewGlobalNs(xmlDocPtr doc, const xmlChar *href, const xmlChar *prefix) {
3447 static int deprecated = 0;
3448 if (!deprecated) {
3449 xmlGenericError(xmlGenericErrorContext,
3450 "xmlNewGlobalNs() deprecated function reached\n");
3451 deprecated = 1;
3452 }
3453 return(NULL);
3454#if 0
3455 xmlNodePtr root;
3456
3457 xmlNsPtr cur;
3458
3459 root = xmlDocGetRootElement(doc);
3460 if (root != NULL)
3461 return(xmlNewNs(root, href, prefix));
3462
3463 /*
3464 * if there is no root element yet, create an old Namespace type
3465 * and it will be moved to the root at save time.
3466 */
3467 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3468 if (cur == NULL) {
3469 xmlGenericError(xmlGenericErrorContext,
3470 "xmlNewGlobalNs : malloc failed\n");
3471 return(NULL);
3472 }
3473 memset(cur, 0, sizeof(xmlNs));
3474 cur->type = XML_GLOBAL_NAMESPACE;
3475
3476 if (href != NULL)
3477 cur->href = xmlStrdup(href);
3478 if (prefix != NULL)
3479 cur->prefix = xmlStrdup(prefix);
3480
3481 /*
3482 * Add it at the end to preserve parsing order ...
3483 */
3484 if (doc != NULL) {
3485 if (doc->oldNs == NULL) {
3486 doc->oldNs = cur;
3487 } else {
3488 xmlNsPtr prev = doc->oldNs;
3489
3490 while (prev->next != NULL) prev = prev->next;
3491 prev->next = cur;
3492 }
3493 }
3494
3495 return(NULL);
3496#endif
3497}
3498
3499/**
3500 * xmlUpgradeOldNs:
3501 * @doc: a document pointer
3502 *
3503 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3504 * DEPRECATED
3505 */
3506void
3507xmlUpgradeOldNs(xmlDocPtr doc) {
3508 static int deprecated = 0;
3509 if (!deprecated) {
3510 xmlGenericError(xmlGenericErrorContext,
3511 "xmlNewGlobalNs() deprecated function reached\n");
3512 deprecated = 1;
3513 }
3514#if 0
3515 xmlNsPtr cur;
3516
3517 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3518 if (doc->children == NULL) {
3519#ifdef DEBUG_TREE
3520 xmlGenericError(xmlGenericErrorContext,
3521 "xmlUpgradeOldNs: failed no root !\n");
3522#endif
3523 return;
3524 }
3525
3526 cur = doc->oldNs;
3527 while (cur->next != NULL) {
3528 cur->type = XML_LOCAL_NAMESPACE;
3529 cur = cur->next;
3530 }
3531 cur->type = XML_LOCAL_NAMESPACE;
3532 cur->next = doc->children->nsDef;
3533 doc->children->nsDef = doc->oldNs;
3534 doc->oldNs = NULL;
3535#endif
3536}
3537