blob: 281b75f443fb4bacd0bf042806842c88d79f16bb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50
51
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
70 if ((myversion / 10000) != (version / 10000)) {
71 xmlGenericError(xmlGenericErrorContext,
72 "Fatal: program compiled against libxml %d using libxml %d\n",
73 (version / 10000), (myversion / 10000));
74 exit(1);
75 }
76 if ((myversion / 100) < (version / 100)) {
77 xmlGenericError(xmlGenericErrorContext,
78 "Warning: program compiled against libxml %d using older %d\n",
79 (version / 100), (myversion / 100));
80 }
81}
82
83
84const char *xmlFeaturesList[] = {
85 "validate",
86 "load subset",
87 "keep blanks",
88 "disable SAX",
89 "fetch external entities",
90 "substitute entities",
91 "gather line info",
92 "user data",
93 "is html",
94 "is standalone",
95 "stop parser",
96 "document",
97 "is well formed",
98 "is valid",
99 "SAX block",
100 "SAX function internalSubset",
101 "SAX function isStandalone",
102 "SAX function hasInternalSubset",
103 "SAX function hasExternalSubset",
104 "SAX function resolveEntity",
105 "SAX function getEntity",
106 "SAX function entityDecl",
107 "SAX function notationDecl",
108 "SAX function attributeDecl",
109 "SAX function elementDecl",
110 "SAX function unparsedEntityDecl",
111 "SAX function setDocumentLocator",
112 "SAX function startDocument",
113 "SAX function endDocument",
114 "SAX function startElement",
115 "SAX function endElement",
116 "SAX function reference",
117 "SAX function characters",
118 "SAX function ignorableWhitespace",
119 "SAX function processingInstruction",
120 "SAX function comment",
121 "SAX function warning",
122 "SAX function error",
123 "SAX function fatalError",
124 "SAX function getParameterEntity",
125 "SAX function cdataBlock",
126 "SAX function externalSubset",
127};
128
129/*
130 * xmlGetFeaturesList:
131 * @len: the length of the features name array (input/output)
132 * @result: an array of string to be filled with the features name.
133 *
134 * Copy at most *@len feature names into the @result array
135 *
136 * Returns -1 in case or error, or the total number of features,
137 * len is updated with the number of strings copied,
138 * strings must not be deallocated
139 */
140int
141xmlGetFeaturesList(int *len, const char **result) {
142 int ret, i;
143
144 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
145 if ((len == NULL) || (result == NULL))
146 return(ret);
147 if ((*len < 0) || (*len >= 1000))
148 return(-1);
149 if (*len > ret)
150 *len = ret;
151 for (i = 0;i < *len;i++)
152 result[i] = xmlFeaturesList[i];
153 return(ret);
154}
155
156/*
157 * xmlGetFeature:
158 * @ctxt: an XML/HTML parser context
159 * @name: the feature name
160 * @result: location to store the result
161 *
162 * Read the current value of one feature of this parser instance
163 *
164 * Returns -1 in case or error, 0 otherwise
165 */
166int
167xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
168 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
169 return(-1);
170
171 if (!strcmp(name, "validate")) {
172 *((int *) result) = ctxt->validate;
173 } else if (!strcmp(name, "keep blanks")) {
174 *((int *) result) = ctxt->keepBlanks;
175 } else if (!strcmp(name, "disable SAX")) {
176 *((int *) result) = ctxt->disableSAX;
177 } else if (!strcmp(name, "fetch external entities")) {
178 *((int *) result) = ctxt->loadsubset;
179 } else if (!strcmp(name, "substitute entities")) {
180 *((int *) result) = ctxt->replaceEntities;
181 } else if (!strcmp(name, "gather line info")) {
182 *((int *) result) = ctxt->record_info;
183 } else if (!strcmp(name, "user data")) {
184 *((void **)result) = ctxt->userData;
185 } else if (!strcmp(name, "is html")) {
186 *((int *) result) = ctxt->html;
187 } else if (!strcmp(name, "is standalone")) {
188 *((int *) result) = ctxt->standalone;
189 } else if (!strcmp(name, "document")) {
190 *((xmlDocPtr *) result) = ctxt->myDoc;
191 } else if (!strcmp(name, "is well formed")) {
192 *((int *) result) = ctxt->wellFormed;
193 } else if (!strcmp(name, "is valid")) {
194 *((int *) result) = ctxt->valid;
195 } else if (!strcmp(name, "SAX block")) {
196 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
197 } else if (!strcmp(name, "SAX function internalSubset")) {
198 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
199 } else if (!strcmp(name, "SAX function isStandalone")) {
200 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
201 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
202 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
203 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
204 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
205 } else if (!strcmp(name, "SAX function resolveEntity")) {
206 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
207 } else if (!strcmp(name, "SAX function getEntity")) {
208 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
209 } else if (!strcmp(name, "SAX function entityDecl")) {
210 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
211 } else if (!strcmp(name, "SAX function notationDecl")) {
212 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
213 } else if (!strcmp(name, "SAX function attributeDecl")) {
214 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
215 } else if (!strcmp(name, "SAX function elementDecl")) {
216 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
217 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
218 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
219 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
220 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
221 } else if (!strcmp(name, "SAX function startDocument")) {
222 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
223 } else if (!strcmp(name, "SAX function endDocument")) {
224 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
225 } else if (!strcmp(name, "SAX function startElement")) {
226 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
227 } else if (!strcmp(name, "SAX function endElement")) {
228 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
229 } else if (!strcmp(name, "SAX function reference")) {
230 *((referenceSAXFunc *) result) = ctxt->sax->reference;
231 } else if (!strcmp(name, "SAX function characters")) {
232 *((charactersSAXFunc *) result) = ctxt->sax->characters;
233 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
234 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
235 } else if (!strcmp(name, "SAX function processingInstruction")) {
236 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
237 } else if (!strcmp(name, "SAX function comment")) {
238 *((commentSAXFunc *) result) = ctxt->sax->comment;
239 } else if (!strcmp(name, "SAX function warning")) {
240 *((warningSAXFunc *) result) = ctxt->sax->warning;
241 } else if (!strcmp(name, "SAX function error")) {
242 *((errorSAXFunc *) result) = ctxt->sax->error;
243 } else if (!strcmp(name, "SAX function fatalError")) {
244 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
245 } else if (!strcmp(name, "SAX function getParameterEntity")) {
246 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
247 } else if (!strcmp(name, "SAX function cdataBlock")) {
248 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
249 } else if (!strcmp(name, "SAX function externalSubset")) {
250 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
251 } else {
252 return(-1);
253 }
254 return(0);
255}
256
257/*
258 * xmlSetFeature:
259 * @ctxt: an XML/HTML parser context
260 * @name: the feature name
261 * @value: pointer to the location of the new value
262 *
263 * Change the current value of one feature of this parser instance
264 *
265 * Returns -1 in case or error, 0 otherwise
266 */
267int
268xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
269 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
270 return(-1);
271
272 if (!strcmp(name, "validate")) {
273 int newvalidate = *((int *) value);
274 if ((!ctxt->validate) && (newvalidate != 0)) {
275 if (ctxt->vctxt.warning == NULL)
276 ctxt->vctxt.warning = xmlParserValidityWarning;
277 if (ctxt->vctxt.error == NULL)
278 ctxt->vctxt.error = xmlParserValidityError;
279 /* Allocate the Node stack */
280 ctxt->vctxt.nodeTab = (xmlNodePtr *)
281 xmlMalloc(4 * sizeof(xmlNodePtr));
282 if (ctxt->vctxt.nodeTab == NULL) {
283 ctxt->vctxt.nodeMax = 0;
284 ctxt->validate = 0;
285 return(-1);
286 }
287 ctxt->vctxt.nodeNr = 0;
288 ctxt->vctxt.nodeMax = 4;
289 ctxt->vctxt.node = NULL;
290 }
291 ctxt->validate = newvalidate;
292 } else if (!strcmp(name, "keep blanks")) {
293 ctxt->keepBlanks = *((int *) value);
294 } else if (!strcmp(name, "disable SAX")) {
295 ctxt->disableSAX = *((int *) value);
296 } else if (!strcmp(name, "fetch external entities")) {
297 ctxt->loadsubset = *((int *) value);
298 } else if (!strcmp(name, "substitute entities")) {
299 ctxt->replaceEntities = *((int *) value);
300 } else if (!strcmp(name, "gather line info")) {
301 ctxt->record_info = *((int *) value);
302 } else if (!strcmp(name, "user data")) {
303 ctxt->userData = *((void **)value);
304 } else if (!strcmp(name, "is html")) {
305 ctxt->html = *((int *) value);
306 } else if (!strcmp(name, "is standalone")) {
307 ctxt->standalone = *((int *) value);
308 } else if (!strcmp(name, "document")) {
309 ctxt->myDoc = *((xmlDocPtr *) value);
310 } else if (!strcmp(name, "is well formed")) {
311 ctxt->wellFormed = *((int *) value);
312 } else if (!strcmp(name, "is valid")) {
313 ctxt->valid = *((int *) value);
314 } else if (!strcmp(name, "SAX block")) {
315 ctxt->sax = *((xmlSAXHandlerPtr *) value);
316 } else if (!strcmp(name, "SAX function internalSubset")) {
317 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function isStandalone")) {
319 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
321 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
323 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function resolveEntity")) {
325 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function getEntity")) {
327 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function entityDecl")) {
329 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function notationDecl")) {
331 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function attributeDecl")) {
333 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function elementDecl")) {
335 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
337 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
339 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function startDocument")) {
341 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function endDocument")) {
343 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function startElement")) {
345 ctxt->sax->startElement = *((startElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function endElement")) {
347 ctxt->sax->endElement = *((endElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function reference")) {
349 ctxt->sax->reference = *((referenceSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function characters")) {
351 ctxt->sax->characters = *((charactersSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
353 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function processingInstruction")) {
355 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function comment")) {
357 ctxt->sax->comment = *((commentSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function warning")) {
359 ctxt->sax->warning = *((warningSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function error")) {
361 ctxt->sax->error = *((errorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function fatalError")) {
363 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function getParameterEntity")) {
365 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
366 } else if (!strcmp(name, "SAX function cdataBlock")) {
367 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
368 } else if (!strcmp(name, "SAX function externalSubset")) {
369 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
370 } else {
371 return(-1);
372 }
373 return(0);
374}
375
376/************************************************************************
377 * *
378 * Some functions to avoid too large macros *
379 * *
380 ************************************************************************/
381
382/**
383 * xmlIsChar:
384 * @c: an unicode character (int)
385 *
386 * Check whether the character is allowed by the production
387 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
388 * | [#x10000-#x10FFFF]
389 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
390 * Also available as a macro IS_CHAR()
391 *
392 * Returns 0 if not, non-zero otherwise
393 */
394int
395xmlIsChar(int c) {
396 return(
397 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
398 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
399 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
400 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
401}
402
403/**
404 * xmlIsBlank:
405 * @c: an unicode character (int)
406 *
407 * Check whether the character is allowed by the production
408 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
409 * Also available as a macro IS_BLANK()
410 *
411 * Returns 0 if not, non-zero otherwise
412 */
413int
414xmlIsBlank(int c) {
415 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
416}
417
418/**
419 * xmlIsBaseChar:
420 * @c: an unicode character (int)
421 *
422 * Check whether the character is allowed by the production
423 * [85] BaseChar ::= ... long list see REC ...
424 *
425 * VI is your friend !
426 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
427 * and
428 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
429 *
430 * Returns 0 if not, non-zero otherwise
431 */
432static int xmlBaseArray[] = {
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
437 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
446 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
449};
450
451int
452xmlIsBaseChar(int c) {
453 return(
454 (((c) < 0x0100) ? xmlBaseArray[c] :
455 ( /* accelerator */
456 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
457 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
458 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
459 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
460 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
461 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
462 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
463 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
464 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
465 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
466 ((c) == 0x0386) ||
467 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
468 ((c) == 0x038C) ||
469 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
470 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
471 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
472 ((c) == 0x03DA) ||
473 ((c) == 0x03DC) ||
474 ((c) == 0x03DE) ||
475 ((c) == 0x03E0) ||
476 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
477 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
478 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
479 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
480 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
481 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
482 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
483 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
484 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
485 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
486 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
487 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
488 ((c) == 0x0559) ||
489 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
490 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
491 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
492 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
493 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
494 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
495 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
496 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
497 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
498 ((c) == 0x06D5) ||
499 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
500 (((c) >= 0x905) && ( /* accelerator */
501 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
502 ((c) == 0x093D) ||
503 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
504 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
505 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
506 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
507 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
508 ((c) == 0x09B2) ||
509 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
510 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
511 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
512 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
513 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
514 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
515 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
516 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
517 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
518 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
519 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
520 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
521 ((c) == 0x0A5E) ||
522 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
523 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
524 ((c) == 0x0A8D) ||
525 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
526 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
527 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
528 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
529 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
530 ((c) == 0x0ABD) ||
531 ((c) == 0x0AE0) ||
532 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
533 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
534 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
535 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
536 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
537 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
538 ((c) == 0x0B3D) ||
539 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
540 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
541 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
542 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
543 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
544 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
545 ((c) == 0x0B9C) ||
546 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
547 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
548 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
549 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
550 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
551 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
552 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
553 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
554 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
555 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
556 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
557 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
558 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
559 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
560 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
561 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
562 ((c) == 0x0CDE) ||
563 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
564 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
565 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
566 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
567 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
568 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
569 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
570 ((c) == 0x0E30) ||
571 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
572 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
573 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
574 ((c) == 0x0E84) ||
575 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
576 ((c) == 0x0E8A) ||
577 ((c) == 0x0E8D) ||
578 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
579 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
580 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
581 ((c) == 0x0EA5) ||
582 ((c) == 0x0EA7) ||
583 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
584 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
585 ((c) == 0x0EB0) ||
586 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
587 ((c) == 0x0EBD) ||
588 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
589 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
590 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
591 (((c) >= 0x10A0) && ( /* accelerator */
592 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
593 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
594 ((c) == 0x1100) ||
595 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
596 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
597 ((c) == 0x1109) ||
598 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
599 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
600 ((c) == 0x113C) ||
601 ((c) == 0x113E) ||
602 ((c) == 0x1140) ||
603 ((c) == 0x114C) ||
604 ((c) == 0x114E) ||
605 ((c) == 0x1150) ||
606 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
607 ((c) == 0x1159) ||
608 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
609 ((c) == 0x1163) ||
610 ((c) == 0x1165) ||
611 ((c) == 0x1167) ||
612 ((c) == 0x1169) ||
613 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
614 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
615 ((c) == 0x1175) ||
616 ((c) == 0x119E) ||
617 ((c) == 0x11A8) ||
618 ((c) == 0x11AB) ||
619 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
620 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
621 ((c) == 0x11BA) ||
622 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
623 ((c) == 0x11EB) ||
624 ((c) == 0x11F0) ||
625 ((c) == 0x11F9) ||
626 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
627 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
628 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
629 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
630 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
631 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
632 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
633 ((c) == 0x1F59) ||
634 ((c) == 0x1F5B) ||
635 ((c) == 0x1F5D) ||
636 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
637 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
638 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
639 ((c) == 0x1FBE) ||
640 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
641 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
642 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
643 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
644 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
645 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
646 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
647 ((c) == 0x2126) ||
648 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
649 ((c) == 0x212E) ||
650 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
651 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
652 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
653 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
654 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
655}
656
657/**
658 * xmlIsDigit:
659 * @c: an unicode character (int)
660 *
661 * Check whether the character is allowed by the production
662 * [88] Digit ::= ... long list see REC ...
663 *
664 * Returns 0 if not, non-zero otherwise
665 */
666int
667xmlIsDigit(int c) {
668 return(
669 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
670 (((c) >= 0x660) && ( /* accelerator */
671 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
672 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
673 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
674 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
675 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
676 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
677 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
678 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
679 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
680 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
681 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
682 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
683 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
684 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
685}
686
687/**
688 * xmlIsCombining:
689 * @c: an unicode character (int)
690 *
691 * Check whether the character is allowed by the production
692 * [87] CombiningChar ::= ... long list see REC ...
693 *
694 * Returns 0 if not, non-zero otherwise
695 */
696int
697xmlIsCombining(int c) {
698 return(
699 (((c) >= 0x300) && ( /* accelerator */
700 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
701 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
702 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
703 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
704 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
705 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
706 ((c) == 0x05BF) ||
707 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
708 ((c) == 0x05C4) ||
709 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
710 ((c) == 0x0670) ||
711 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
712 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
713 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
714 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
715 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
716 (((c) >= 0x0901) && ( /* accelerator */
717 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
718 ((c) == 0x093C) ||
719 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
720 ((c) == 0x094D) ||
721 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
722 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
723 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
724 ((c) == 0x09BC) ||
725 ((c) == 0x09BE) ||
726 ((c) == 0x09BF) ||
727 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
728 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
729 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
730 ((c) == 0x09D7) ||
731 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
732 (((c) >= 0x0A02) && ( /* accelerator */
733 ((c) == 0x0A02) ||
734 ((c) == 0x0A3C) ||
735 ((c) == 0x0A3E) ||
736 ((c) == 0x0A3F) ||
737 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
738 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
739 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
740 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
741 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
742 ((c) == 0x0ABC) ||
743 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
744 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
745 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
746 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
747 ((c) == 0x0B3C) ||
748 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
749 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
750 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
751 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
752 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
753 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
754 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
755 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
756 ((c) == 0x0BD7) ||
757 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
758 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
759 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
760 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
761 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
762 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
763 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
764 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
765 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
766 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
767 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
768 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
769 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
770 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
771 ((c) == 0x0D57) ||
772 (((c) >= 0x0E31) && ( /* accelerator */
773 ((c) == 0x0E31) ||
774 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
775 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
776 ((c) == 0x0EB1) ||
777 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
778 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
779 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
780 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
781 ((c) == 0x0F35) ||
782 ((c) == 0x0F37) ||
783 ((c) == 0x0F39) ||
784 ((c) == 0x0F3E) ||
785 ((c) == 0x0F3F) ||
786 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
787 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
788 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
789 ((c) == 0x0F97) ||
790 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
791 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
792 ((c) == 0x0FB9) ||
793 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
794 ((c) == 0x20E1) ||
795 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
796 ((c) == 0x3099) ||
797 ((c) == 0x309A))))))))));
798}
799
800/**
801 * xmlIsExtender:
802 * @c: an unicode character (int)
803 *
804 * Check whether the character is allowed by the production
805 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
806 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
807 * [#x309D-#x309E] | [#x30FC-#x30FE]
808 *
809 * Returns 0 if not, non-zero otherwise
810 */
811int
812xmlIsExtender(int c) {
813 switch (c) {
814 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
815 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
816 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
817 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
818 case 0x30FE:
819 return 1;
820 default:
821 return 0;
822 }
823}
824
825/**
826 * xmlIsIdeographic:
827 * @c: an unicode character (int)
828 *
829 * Check whether the character is allowed by the production
830 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
831 *
832 * Returns 0 if not, non-zero otherwise
833 */
834int
835xmlIsIdeographic(int c) {
836 return(((c) < 0x0100) ? 0 :
837 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
838 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
839 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
840 ((c) == 0x3007));
841}
842
843/**
844 * xmlIsLetter:
845 * @c: an unicode character (int)
846 *
847 * Check whether the character is allowed by the production
848 * [84] Letter ::= BaseChar | Ideographic
849 *
850 * Returns 0 if not, non-zero otherwise
851 */
852int
853xmlIsLetter(int c) {
854 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
855}
856
857/**
858 * xmlIsPubidChar:
859 * @c: an unicode character (int)
860 *
861 * Check whether the character is allowed by the production
862 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
863 *
864 * Returns 0 if not, non-zero otherwise
865 */
866int
867xmlIsPubidChar(int c) {
868 return(
869 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
870 (((c) >= 'a') && ((c) <= 'z')) ||
871 (((c) >= 'A') && ((c) <= 'Z')) ||
872 (((c) >= '0') && ((c) <= '9')) ||
873 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
874 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
875 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
876 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
877 ((c) == '$') || ((c) == '_') || ((c) == '%'));
878}
879
880/************************************************************************
881 * *
882 * Input handling functions for progressive parsing *
883 * *
884 ************************************************************************/
885
886/* #define DEBUG_INPUT */
887/* #define DEBUG_STACK */
888/* #define DEBUG_PUSH */
889
890
891/* we need to keep enough input to show errors in context */
892#define LINE_LEN 80
893
894#ifdef DEBUG_INPUT
895#define CHECK_BUFFER(in) check_buffer(in)
896
897void check_buffer(xmlParserInputPtr in) {
898 if (in->base != in->buf->buffer->content) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: base mismatch problem\n");
901 }
902 if (in->cur < in->base) {
903 xmlGenericError(xmlGenericErrorContext,
904 "xmlParserInput: cur < base problem\n");
905 }
906 if (in->cur > in->base + in->buf->buffer->use) {
907 xmlGenericError(xmlGenericErrorContext,
908 "xmlParserInput: cur > base + use problem\n");
909 }
910 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
911 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
912 in->buf->buffer->use, in->buf->buffer->size);
913}
914
915#else
916#define CHECK_BUFFER(in)
917#endif
918
919
920/**
921 * xmlParserInputRead:
922 * @in: an XML parser input
923 * @len: an indicative size for the lookahead
924 *
925 * This function refresh the input for the parser. It doesn't try to
926 * preserve pointers to the input buffer, and discard already read data
927 *
928 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
929 * end of this entity
930 */
931int
932xmlParserInputRead(xmlParserInputPtr in, int len) {
933 int ret;
934 int used;
935 int index;
936
937#ifdef DEBUG_INPUT
938 xmlGenericError(xmlGenericErrorContext, "Read\n");
939#endif
940 if (in->buf == NULL) return(-1);
941 if (in->base == NULL) return(-1);
942 if (in->cur == NULL) return(-1);
943 if (in->buf->buffer == NULL) return(-1);
944 if (in->buf->readcallback == NULL) return(-1);
945
946 CHECK_BUFFER(in);
947
948 used = in->cur - in->buf->buffer->content;
949 ret = xmlBufferShrink(in->buf->buffer, used);
950 if (ret > 0) {
951 in->cur -= ret;
952 in->consumed += ret;
953 }
954 ret = xmlParserInputBufferRead(in->buf, len);
955 if (in->base != in->buf->buffer->content) {
956 /*
957 * the buffer has been realloced
958 */
959 index = in->cur - in->base;
960 in->base = in->buf->buffer->content;
961 in->cur = &in->buf->buffer->content[index];
962 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000963 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000964
965 CHECK_BUFFER(in);
966
967 return(ret);
968}
969
970/**
971 * xmlParserInputGrow:
972 * @in: an XML parser input
973 * @len: an indicative size for the lookahead
974 *
975 * This function increase the input for the parser. It tries to
976 * preserve pointers to the input buffer, and keep already read data
977 *
978 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
979 * end of this entity
980 */
981int
982xmlParserInputGrow(xmlParserInputPtr in, int len) {
983 int ret;
984 int index;
985
986#ifdef DEBUG_INPUT
987 xmlGenericError(xmlGenericErrorContext, "Grow\n");
988#endif
989 if (in->buf == NULL) return(-1);
990 if (in->base == NULL) return(-1);
991 if (in->cur == NULL) return(-1);
992 if (in->buf->buffer == NULL) return(-1);
993
994 CHECK_BUFFER(in);
995
996 index = in->cur - in->base;
997 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
998
999 CHECK_BUFFER(in);
1000
1001 return(0);
1002 }
1003 if (in->buf->readcallback != NULL)
1004 ret = xmlParserInputBufferGrow(in->buf, len);
1005 else
1006 return(0);
1007
1008 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001009 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001010 * block, but we use it really as an integer to do some
1011 * pointer arithmetic. Insure will raise it as a bug but in
1012 * that specific case, that's not !
1013 */
1014 if (in->base != in->buf->buffer->content) {
1015 /*
1016 * the buffer has been realloced
1017 */
1018 index = in->cur - in->base;
1019 in->base = in->buf->buffer->content;
1020 in->cur = &in->buf->buffer->content[index];
1021 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001022 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001023
1024 CHECK_BUFFER(in);
1025
1026 return(ret);
1027}
1028
1029/**
1030 * xmlParserInputShrink:
1031 * @in: an XML parser input
1032 *
1033 * This function removes used input for the parser.
1034 */
1035void
1036xmlParserInputShrink(xmlParserInputPtr in) {
1037 int used;
1038 int ret;
1039 int index;
1040
1041#ifdef DEBUG_INPUT
1042 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1043#endif
1044 if (in->buf == NULL) return;
1045 if (in->base == NULL) return;
1046 if (in->cur == NULL) return;
1047 if (in->buf->buffer == NULL) return;
1048
1049 CHECK_BUFFER(in);
1050
1051 used = in->cur - in->buf->buffer->content;
1052 /*
1053 * Do not shrink on large buffers whose only a tiny fraction
1054 * was consumned
1055 */
1056 if (in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1057 return;
1058 if (used > INPUT_CHUNK) {
1059 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1060 if (ret > 0) {
1061 in->cur -= ret;
1062 in->consumed += ret;
1063 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001064 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001065 }
1066
1067 CHECK_BUFFER(in);
1068
1069 if (in->buf->buffer->use > INPUT_CHUNK) {
1070 return;
1071 }
1072 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1073 if (in->base != in->buf->buffer->content) {
1074 /*
1075 * the buffer has been realloced
1076 */
1077 index = in->cur - in->base;
1078 in->base = in->buf->buffer->content;
1079 in->cur = &in->buf->buffer->content[index];
1080 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001081 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001082
1083 CHECK_BUFFER(in);
1084}
1085
1086/************************************************************************
1087 * *
1088 * UTF8 character input and related functions *
1089 * *
1090 ************************************************************************/
1091
1092/**
1093 * xmlNextChar:
1094 * @ctxt: the XML parser context
1095 *
1096 * Skip to the next char input char.
1097 */
1098
1099void
1100xmlNextChar(xmlParserCtxtPtr ctxt) {
1101 if (ctxt->instate == XML_PARSER_EOF)
1102 return;
1103
1104 /*
1105 * 2.11 End-of-Line Handling
1106 * the literal two-character sequence "#xD#xA" or a standalone
1107 * literal #xD, an XML processor must pass to the application
1108 * the single character #xA.
1109 */
1110 if (ctxt->token != 0) ctxt->token = 0;
1111 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1112 if ((*ctxt->input->cur == 0) &&
1113 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1114 (ctxt->instate != XML_PARSER_COMMENT)) {
1115 /*
1116 * If we are at the end of the current entity and
1117 * the context allows it, we pop consumed entities
1118 * automatically.
1119 * the auto closing should be blocked in other cases
1120 */
1121 xmlPopInput(ctxt);
1122 } else {
1123 if (*(ctxt->input->cur) == '\n') {
1124 ctxt->input->line++; ctxt->input->col = 1;
1125 } else ctxt->input->col++;
1126 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1127 /*
1128 * We are supposed to handle UTF8, check it's valid
1129 * From rfc2044: encoding of the Unicode values on UTF-8:
1130 *
1131 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1132 * 0000 0000-0000 007F 0xxxxxxx
1133 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1134 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1135 *
1136 * Check for the 0x110000 limit too
1137 */
1138 const unsigned char *cur = ctxt->input->cur;
1139 unsigned char c;
1140
1141 c = *cur;
1142 if (c & 0x80) {
1143 if (cur[1] == 0)
1144 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1145 if ((cur[1] & 0xc0) != 0x80)
1146 goto encoding_error;
1147 if ((c & 0xe0) == 0xe0) {
1148 unsigned int val;
1149
1150 if (cur[2] == 0)
1151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1152 if ((cur[2] & 0xc0) != 0x80)
1153 goto encoding_error;
1154 if ((c & 0xf0) == 0xf0) {
1155 if (cur[3] == 0)
1156 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1157 if (((c & 0xf8) != 0xf0) ||
1158 ((cur[3] & 0xc0) != 0x80))
1159 goto encoding_error;
1160 /* 4-byte code */
1161 ctxt->input->cur += 4;
1162 val = (cur[0] & 0x7) << 18;
1163 val |= (cur[1] & 0x3f) << 12;
1164 val |= (cur[2] & 0x3f) << 6;
1165 val |= cur[3] & 0x3f;
1166 } else {
1167 /* 3-byte code */
1168 ctxt->input->cur += 3;
1169 val = (cur[0] & 0xf) << 12;
1170 val |= (cur[1] & 0x3f) << 6;
1171 val |= cur[2] & 0x3f;
1172 }
1173 if (((val > 0xd7ff) && (val < 0xe000)) ||
1174 ((val > 0xfffd) && (val < 0x10000)) ||
1175 (val >= 0x110000)) {
1176 if ((ctxt->sax != NULL) &&
1177 (ctxt->sax->error != NULL))
1178 ctxt->sax->error(ctxt->userData,
1179 "Char 0x%X out of allowed range\n", val);
1180 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1181 ctxt->wellFormed = 0;
1182 ctxt->disableSAX = 1;
1183 }
1184 } else
1185 /* 2-byte code */
1186 ctxt->input->cur += 2;
1187 } else
1188 /* 1-byte code */
1189 ctxt->input->cur++;
1190 } else {
1191 /*
1192 * Assume it's a fixed lenght encoding (1) with
1193 * a compatibke encoding for the ASCII set, since
1194 * XML constructs only use < 128 chars
1195 */
1196 ctxt->input->cur++;
1197 }
1198 ctxt->nbChars++;
1199 if (*ctxt->input->cur == 0)
1200 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1201 }
1202 } else {
1203 ctxt->input->cur++;
1204 ctxt->nbChars++;
1205 if (*ctxt->input->cur == 0)
1206 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1207 }
1208 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1209 xmlParserHandlePEReference(ctxt);
1210 if ((*ctxt->input->cur == 0) &&
1211 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1212 xmlPopInput(ctxt);
1213 return;
1214encoding_error:
1215 /*
1216 * If we detect an UTF8 error that probably mean that the
1217 * input encoding didn't get properly advertized in the
1218 * declaration header. Report the error and switch the encoding
1219 * to ISO-Latin-1 (if you don't like this policy, just declare the
1220 * encoding !)
1221 */
1222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1223 ctxt->sax->error(ctxt->userData,
1224 "Input is not proper UTF-8, indicate encoding !\n");
1225 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1226 ctxt->input->cur[0], ctxt->input->cur[1],
1227 ctxt->input->cur[2], ctxt->input->cur[3]);
1228 }
1229 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1230
1231 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1232 ctxt->input->cur++;
1233 return;
1234}
1235
1236/**
1237 * xmlCurrentChar:
1238 * @ctxt: the XML parser context
1239 * @len: pointer to the length of the char read
1240 *
1241 * The current char value, if using UTF-8 this may actaully span multiple
1242 * bytes in the input buffer. Implement the end of line normalization:
1243 * 2.11 End-of-Line Handling
1244 * Wherever an external parsed entity or the literal entity value
1245 * of an internal parsed entity contains either the literal two-character
1246 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1247 * must pass to the application the single character #xA.
1248 * This behavior can conveniently be produced by normalizing all
1249 * line breaks to #xA on input, before parsing.)
1250 *
1251 * Returns the current char value and its lenght
1252 */
1253
1254int
1255xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1256 if (ctxt->instate == XML_PARSER_EOF)
1257 return(0);
1258
1259 if (ctxt->token != 0) {
1260 *len = 0;
1261 return(ctxt->token);
1262 }
1263 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1264 *len = 1;
1265 return((int) *ctxt->input->cur);
1266 }
1267 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1268 /*
1269 * We are supposed to handle UTF8, check it's valid
1270 * From rfc2044: encoding of the Unicode values on UTF-8:
1271 *
1272 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1273 * 0000 0000-0000 007F 0xxxxxxx
1274 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1275 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1276 *
1277 * Check for the 0x110000 limit too
1278 */
1279 const unsigned char *cur = ctxt->input->cur;
1280 unsigned char c;
1281 unsigned int val;
1282
1283 c = *cur;
1284 if (c & 0x80) {
1285 if (cur[1] == 0)
1286 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1287 if ((cur[1] & 0xc0) != 0x80)
1288 goto encoding_error;
1289 if ((c & 0xe0) == 0xe0) {
1290
1291 if (cur[2] == 0)
1292 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1293 if ((cur[2] & 0xc0) != 0x80)
1294 goto encoding_error;
1295 if ((c & 0xf0) == 0xf0) {
1296 if (cur[3] == 0)
1297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1298 if (((c & 0xf8) != 0xf0) ||
1299 ((cur[3] & 0xc0) != 0x80))
1300 goto encoding_error;
1301 /* 4-byte code */
1302 *len = 4;
1303 val = (cur[0] & 0x7) << 18;
1304 val |= (cur[1] & 0x3f) << 12;
1305 val |= (cur[2] & 0x3f) << 6;
1306 val |= cur[3] & 0x3f;
1307 } else {
1308 /* 3-byte code */
1309 *len = 3;
1310 val = (cur[0] & 0xf) << 12;
1311 val |= (cur[1] & 0x3f) << 6;
1312 val |= cur[2] & 0x3f;
1313 }
1314 } else {
1315 /* 2-byte code */
1316 *len = 2;
1317 val = (cur[0] & 0x1f) << 6;
1318 val |= cur[1] & 0x3f;
1319 }
1320 if (!IS_CHAR(val)) {
1321 if ((ctxt->sax != NULL) &&
1322 (ctxt->sax->error != NULL))
1323 ctxt->sax->error(ctxt->userData,
1324 "Char 0x%X out of allowed range\n", val);
1325 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1326 ctxt->wellFormed = 0;
1327 ctxt->disableSAX = 1;
1328 }
1329 return(val);
1330 } else {
1331 /* 1-byte code */
1332 *len = 1;
1333 if (*ctxt->input->cur == 0xD) {
1334 if (ctxt->input->cur[1] == 0xA) {
1335 ctxt->nbChars++;
1336 ctxt->input->cur++;
1337 }
1338 return(0xA);
1339 }
1340 return((int) *ctxt->input->cur);
1341 }
1342 }
1343 /*
1344 * Assume it's a fixed lenght encoding (1) with
1345 * a compatibke encoding for the ASCII set, since
1346 * XML constructs only use < 128 chars
1347 */
1348 *len = 1;
1349 if (*ctxt->input->cur == 0xD) {
1350 if (ctxt->input->cur[1] == 0xA) {
1351 ctxt->nbChars++;
1352 ctxt->input->cur++;
1353 }
1354 return(0xA);
1355 }
1356 return((int) *ctxt->input->cur);
1357encoding_error:
1358 /*
1359 * If we detect an UTF8 error that probably mean that the
1360 * input encoding didn't get properly advertized in the
1361 * declaration header. Report the error and switch the encoding
1362 * to ISO-Latin-1 (if you don't like this policy, just declare the
1363 * encoding !)
1364 */
1365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1366 ctxt->sax->error(ctxt->userData,
1367 "Input is not proper UTF-8, indicate encoding !\n");
1368 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1369 ctxt->input->cur[0], ctxt->input->cur[1],
1370 ctxt->input->cur[2], ctxt->input->cur[3]);
1371 }
1372 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1373
1374 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1375 *len = 1;
1376 return((int) *ctxt->input->cur);
1377}
1378
1379/**
1380 * xmlStringCurrentChar:
1381 * @ctxt: the XML parser context
1382 * @cur: pointer to the beginning of the char
1383 * @len: pointer to the length of the char read
1384 *
1385 * The current char value, if using UTF-8 this may actaully span multiple
1386 * bytes in the input buffer.
1387 *
1388 * Returns the current char value and its lenght
1389 */
1390
1391int
1392xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1393 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1394 /*
1395 * We are supposed to handle UTF8, check it's valid
1396 * From rfc2044: encoding of the Unicode values on UTF-8:
1397 *
1398 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1399 * 0000 0000-0000 007F 0xxxxxxx
1400 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1401 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1402 *
1403 * Check for the 0x110000 limit too
1404 */
1405 unsigned char c;
1406 unsigned int val;
1407
1408 c = *cur;
1409 if (c & 0x80) {
1410 if ((cur[1] & 0xc0) != 0x80)
1411 goto encoding_error;
1412 if ((c & 0xe0) == 0xe0) {
1413
1414 if ((cur[2] & 0xc0) != 0x80)
1415 goto encoding_error;
1416 if ((c & 0xf0) == 0xf0) {
1417 if (((c & 0xf8) != 0xf0) ||
1418 ((cur[3] & 0xc0) != 0x80))
1419 goto encoding_error;
1420 /* 4-byte code */
1421 *len = 4;
1422 val = (cur[0] & 0x7) << 18;
1423 val |= (cur[1] & 0x3f) << 12;
1424 val |= (cur[2] & 0x3f) << 6;
1425 val |= cur[3] & 0x3f;
1426 } else {
1427 /* 3-byte code */
1428 *len = 3;
1429 val = (cur[0] & 0xf) << 12;
1430 val |= (cur[1] & 0x3f) << 6;
1431 val |= cur[2] & 0x3f;
1432 }
1433 } else {
1434 /* 2-byte code */
1435 *len = 2;
1436 val = (cur[0] & 0x1f) << 6;
1437 val |= cur[2] & 0x3f;
1438 }
1439 if (!IS_CHAR(val)) {
1440 if ((ctxt->sax != NULL) &&
1441 (ctxt->sax->error != NULL))
1442 ctxt->sax->error(ctxt->userData,
1443 "Char 0x%X out of allowed range\n", val);
1444 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1445 ctxt->wellFormed = 0;
1446 ctxt->disableSAX = 1;
1447 }
1448 return(val);
1449 } else {
1450 /* 1-byte code */
1451 *len = 1;
1452 return((int) *cur);
1453 }
1454 }
1455 /*
1456 * Assume it's a fixed lenght encoding (1) with
1457 * a compatibke encoding for the ASCII set, since
1458 * XML constructs only use < 128 chars
1459 */
1460 *len = 1;
1461 return((int) *cur);
1462encoding_error:
1463 /*
1464 * If we detect an UTF8 error that probably mean that the
1465 * input encoding didn't get properly advertized in the
1466 * declaration header. Report the error and switch the encoding
1467 * to ISO-Latin-1 (if you don't like this policy, just declare the
1468 * encoding !)
1469 */
1470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1471 ctxt->sax->error(ctxt->userData,
1472 "Input is not proper UTF-8, indicate encoding !\n");
1473 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1474 ctxt->input->cur[0], ctxt->input->cur[1],
1475 ctxt->input->cur[2], ctxt->input->cur[3]);
1476 }
1477 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1478
1479 *len = 1;
1480 return((int) *cur);
1481}
1482
1483/**
1484 * xmlCopyChar:
1485 * @len: pointer to the length of the char read (or zero)
1486 * @array: pointer to an arry of xmlChar
1487 * @val: the char value
1488 *
1489 * append the char value in the array
1490 *
1491 * Returns the number of xmlChar written
1492 */
1493
1494int
1495xmlCopyChar(int len, xmlChar *out, int val) {
1496 /*
1497 * We are supposed to handle UTF8, check it's valid
1498 * From rfc2044: encoding of the Unicode values on UTF-8:
1499 *
1500 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1501 * 0000 0000-0000 007F 0xxxxxxx
1502 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1503 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1504 */
1505 if (len == 0) {
1506 if (val < 0) len = 0;
1507 else if (val < 0x80) len = 1;
1508 else if (val < 0x800) len = 2;
1509 else if (val < 0x10000) len = 3;
1510 else if (val < 0x110000) len = 4;
1511 if (len == 0) {
1512 xmlGenericError(xmlGenericErrorContext,
1513 "Internal error, xmlCopyChar 0x%X out of bound\n",
1514 val);
1515 return(0);
1516 }
1517 }
1518 if (len > 1) {
1519 int bits;
1520
1521 if (val < 0x80) { *out++= val; bits= -6; }
1522 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1523 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1524 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1525
1526 for ( ; bits >= 0; bits-= 6)
1527 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1528
1529 return(len);
1530 }
1531 *out = (xmlChar) val;
1532 return(1);
1533}
1534
1535/************************************************************************
1536 * *
1537 * Commodity functions to switch encodings *
1538 * *
1539 ************************************************************************/
1540
1541/**
1542 * xmlSwitchEncoding:
1543 * @ctxt: the parser context
1544 * @enc: the encoding value (number)
1545 *
1546 * change the input functions when discovering the character encoding
1547 * of a given entity.
1548 *
1549 * Returns 0 in case of success, -1 otherwise
1550 */
1551int
1552xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1553{
1554 xmlCharEncodingHandlerPtr handler;
1555
1556 switch (enc) {
1557 case XML_CHAR_ENCODING_ERROR:
1558 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1561 ctxt->wellFormed = 0;
1562 ctxt->disableSAX = 1;
1563 break;
1564 case XML_CHAR_ENCODING_NONE:
1565 /* let's assume it's UTF-8 without the XML decl */
1566 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1567 return(0);
1568 case XML_CHAR_ENCODING_UTF8:
1569 /* default encoding, no conversion should be needed */
1570 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1571 return(0);
1572 default:
1573 break;
1574 }
1575 handler = xmlGetCharEncodingHandler(enc);
1576 if (handler == NULL) {
1577 /*
1578 * Default handlers.
1579 */
1580 switch (enc) {
1581 case XML_CHAR_ENCODING_ERROR:
1582 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1584 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1585 ctxt->wellFormed = 0;
1586 ctxt->disableSAX = 1;
1587 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1588 break;
1589 case XML_CHAR_ENCODING_NONE:
1590 /* let's assume it's UTF-8 without the XML decl */
1591 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1592 return(0);
1593 case XML_CHAR_ENCODING_UTF8:
1594 case XML_CHAR_ENCODING_ASCII:
1595 /* default encoding, no conversion should be needed */
1596 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1597 return(0);
1598 case XML_CHAR_ENCODING_UTF16LE:
1599 break;
1600 case XML_CHAR_ENCODING_UTF16BE:
1601 break;
1602 case XML_CHAR_ENCODING_UCS4LE:
1603 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "char encoding USC4 little endian not supported\n");
1607 break;
1608 case XML_CHAR_ENCODING_UCS4BE:
1609 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1611 ctxt->sax->error(ctxt->userData,
1612 "char encoding USC4 big endian not supported\n");
1613 break;
1614 case XML_CHAR_ENCODING_EBCDIC:
1615 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "char encoding EBCDIC not supported\n");
1619 break;
1620 case XML_CHAR_ENCODING_UCS4_2143:
1621 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1623 ctxt->sax->error(ctxt->userData,
1624 "char encoding UCS4 2143 not supported\n");
1625 break;
1626 case XML_CHAR_ENCODING_UCS4_3412:
1627 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1629 ctxt->sax->error(ctxt->userData,
1630 "char encoding UCS4 3412 not supported\n");
1631 break;
1632 case XML_CHAR_ENCODING_UCS2:
1633 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1635 ctxt->sax->error(ctxt->userData,
1636 "char encoding UCS2 not supported\n");
1637 break;
1638 case XML_CHAR_ENCODING_8859_1:
1639 case XML_CHAR_ENCODING_8859_2:
1640 case XML_CHAR_ENCODING_8859_3:
1641 case XML_CHAR_ENCODING_8859_4:
1642 case XML_CHAR_ENCODING_8859_5:
1643 case XML_CHAR_ENCODING_8859_6:
1644 case XML_CHAR_ENCODING_8859_7:
1645 case XML_CHAR_ENCODING_8859_8:
1646 case XML_CHAR_ENCODING_8859_9:
1647 /*
1648 * We used to keep the internal content in the
1649 * document encoding however this turns being unmaintainable
1650 * So xmlGetCharEncodingHandler() will return non-null
1651 * values for this now.
1652 */
1653 if ((ctxt->inputNr == 1) &&
1654 (ctxt->encoding == NULL) &&
1655 (ctxt->input->encoding != NULL)) {
1656 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1657 }
1658 ctxt->charset = enc;
1659 return(0);
1660 case XML_CHAR_ENCODING_2022_JP:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding ISO-2022-JPnot supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_SHIFT_JIS:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding Shift_JIS not supported\n");
1671 break;
1672 case XML_CHAR_ENCODING_EUC_JP:
1673 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt->userData,
1676 "char encoding EUC-JPnot supported\n");
1677 break;
1678 }
1679 }
1680 if (handler == NULL)
1681 return(-1);
1682 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1683 return(xmlSwitchToEncoding(ctxt, handler));
1684}
1685
1686/**
1687 * xmlSwitchToEncoding:
1688 * @ctxt: the parser context
1689 * @handler: the encoding handler
1690 *
1691 * change the input functions when discovering the character encoding
1692 * of a given entity.
1693 *
1694 * Returns 0 in case of success, -1 otherwise
1695 */
1696int
1697xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1698{
1699 int nbchars;
1700
1701 if (handler != NULL) {
1702 if (ctxt->input != NULL) {
1703 if (ctxt->input->buf != NULL) {
1704 if (ctxt->input->buf->encoder != NULL) {
1705 if (ctxt->input->buf->encoder == handler)
1706 return(0);
1707 /*
1708 * Note: this is a bit dangerous, but that's what it
1709 * takes to use nearly compatible signature for different
1710 * encodings.
1711 */
1712 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1713 ctxt->input->buf->encoder = handler;
1714 return(0);
1715 }
1716 ctxt->input->buf->encoder = handler;
1717
1718 /*
1719 * Is there already some content down the pipe to convert ?
1720 */
1721 if ((ctxt->input->buf->buffer != NULL) &&
1722 (ctxt->input->buf->buffer->use > 0)) {
1723 int processed;
1724
1725 /*
1726 * Specific handling of the Byte Order Mark for
1727 * UTF-16
1728 */
1729 if ((handler->name != NULL) &&
1730 (!strcmp(handler->name, "UTF-16LE")) &&
1731 (ctxt->input->cur[0] == 0xFF) &&
1732 (ctxt->input->cur[1] == 0xFE)) {
1733 ctxt->input->cur += 2;
1734 }
1735 if ((handler->name != NULL) &&
1736 (!strcmp(handler->name, "UTF-16BE")) &&
1737 (ctxt->input->cur[0] == 0xFE) &&
1738 (ctxt->input->cur[1] == 0xFF)) {
1739 ctxt->input->cur += 2;
1740 }
1741
1742 /*
1743 * Shring the current input buffer.
1744 * Move it as the raw buffer and create a new input buffer
1745 */
1746 processed = ctxt->input->cur - ctxt->input->base;
1747 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1748 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1749 ctxt->input->buf->buffer = xmlBufferCreate();
1750
1751 if (ctxt->html) {
1752 /*
1753 * converst as much as possbile of the buffer
1754 */
1755 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1756 ctxt->input->buf->buffer,
1757 ctxt->input->buf->raw);
1758 } else {
1759 /*
1760 * convert just enough to get
1761 * '<?xml version="1.0" encoding="xxx"?>'
1762 * parsed with the autodetected encoding
1763 * into the parser reading buffer.
1764 */
1765 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1766 ctxt->input->buf->buffer,
1767 ctxt->input->buf->raw);
1768 }
1769 if (nbchars < 0) {
1770 xmlGenericError(xmlGenericErrorContext,
1771 "xmlSwitchToEncoding: encoder error\n");
1772 return(-1);
1773 }
1774 ctxt->input->base =
1775 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001776 ctxt->input->end =
1777 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001778
1779 }
1780 return(0);
1781 } else {
1782 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1783 /*
1784 * When parsing a static memory array one must know the
1785 * size to be able to convert the buffer.
1786 */
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
1789 "xmlSwitchEncoding : no input\n");
1790 return(-1);
1791 } else {
1792 int processed;
1793
1794 /*
1795 * Shring the current input buffer.
1796 * Move it as the raw buffer and create a new input buffer
1797 */
1798 processed = ctxt->input->cur - ctxt->input->base;
1799
1800 ctxt->input->buf->raw = xmlBufferCreate();
1801 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1802 ctxt->input->length - processed);
1803 ctxt->input->buf->buffer = xmlBufferCreate();
1804
1805 /*
1806 * convert as much as possible of the raw input
1807 * to the parser reading buffer.
1808 */
1809 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1810 ctxt->input->buf->buffer,
1811 ctxt->input->buf->raw);
1812 if (nbchars < 0) {
1813 xmlGenericError(xmlGenericErrorContext,
1814 "xmlSwitchToEncoding: encoder error\n");
1815 return(-1);
1816 }
1817
1818 /*
1819 * Conversion succeeded, get rid of the old buffer
1820 */
1821 if ((ctxt->input->free != NULL) &&
1822 (ctxt->input->base != NULL))
1823 ctxt->input->free((xmlChar *) ctxt->input->base);
1824 ctxt->input->base =
1825 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001826 ctxt->input->end =
1827 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001828 }
1829 }
1830 } else {
1831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1832 ctxt->sax->error(ctxt->userData,
1833 "xmlSwitchEncoding : no input\n");
1834 return(-1);
1835 }
1836 /*
1837 * The parsing is now done in UTF8 natively
1838 */
1839 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1840 } else
1841 return(-1);
1842 return(0);
1843
1844}
1845
1846/************************************************************************
1847 * *
1848 * Commodity functions to handle entities processing *
1849 * *
1850 ************************************************************************/
1851
1852/**
1853 * xmlFreeInputStream:
1854 * @input: an xmlParserInputPtr
1855 *
1856 * Free up an input stream.
1857 */
1858void
1859xmlFreeInputStream(xmlParserInputPtr input) {
1860 if (input == NULL) return;
1861
1862 if (input->filename != NULL) xmlFree((char *) input->filename);
1863 if (input->directory != NULL) xmlFree((char *) input->directory);
1864 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1865 if (input->version != NULL) xmlFree((char *) input->version);
1866 if ((input->free != NULL) && (input->base != NULL))
1867 input->free((xmlChar *) input->base);
1868 if (input->buf != NULL)
1869 xmlFreeParserInputBuffer(input->buf);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001870 MEM_CLEANUP(input, sizeof(xmlParserInput));
Owen Taylor3473f882001-02-23 17:55:21 +00001871 xmlFree(input);
1872}
1873
1874/**
1875 * xmlNewInputStream:
1876 * @ctxt: an XML parser context
1877 *
1878 * Create a new input stream structure
1879 * Returns the new input stream or NULL
1880 */
1881xmlParserInputPtr
1882xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1883 xmlParserInputPtr input;
1884
1885 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1886 if (input == NULL) {
1887 if (ctxt != NULL) {
1888 ctxt->errNo = XML_ERR_NO_MEMORY;
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "malloc: couldn't allocate a new input stream\n");
1892 ctxt->errNo = XML_ERR_NO_MEMORY;
1893 }
1894 return(NULL);
1895 }
1896 memset(input, 0, sizeof(xmlParserInput));
1897 input->line = 1;
1898 input->col = 1;
1899 input->standalone = -1;
1900 return(input);
1901}
1902
1903/**
1904 * xmlNewIOInputStream:
1905 * @ctxt: an XML parser context
1906 * @input: an I/O Input
1907 * @enc: the charset encoding if known
1908 *
1909 * Create a new input stream structure encapsulating the @input into
1910 * a stream suitable for the parser.
1911 *
1912 * Returns the new input stream or NULL
1913 */
1914xmlParserInputPtr
1915xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1916 xmlCharEncoding enc) {
1917 xmlParserInputPtr inputStream;
1918
1919 if (xmlParserDebugEntities)
1920 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1921 inputStream = xmlNewInputStream(ctxt);
1922 if (inputStream == NULL) {
1923 return(NULL);
1924 }
1925 inputStream->filename = NULL;
1926 inputStream->buf = input;
1927 inputStream->base = inputStream->buf->buffer->content;
1928 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001929 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001930 if (enc != XML_CHAR_ENCODING_NONE) {
1931 xmlSwitchEncoding(ctxt, enc);
1932 }
1933
1934 return(inputStream);
1935}
1936
1937/**
1938 * xmlNewEntityInputStream:
1939 * @ctxt: an XML parser context
1940 * @entity: an Entity pointer
1941 *
1942 * Create a new input stream based on an xmlEntityPtr
1943 *
1944 * Returns the new input stream or NULL
1945 */
1946xmlParserInputPtr
1947xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1948 xmlParserInputPtr input;
1949
1950 if (entity == NULL) {
1951 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData,
1954 "internal: xmlNewEntityInputStream entity = NULL\n");
1955 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1956 return(NULL);
1957 }
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext,
1960 "new input from entity: %s\n", entity->name);
1961 if (entity->content == NULL) {
1962 switch (entity->etype) {
1963 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1964 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1966 ctxt->sax->error(ctxt->userData,
1967 "xmlNewEntityInputStream unparsed entity !\n");
1968 break;
1969 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1970 case XML_EXTERNAL_PARAMETER_ENTITY:
1971 return(xmlLoadExternalEntity((char *) entity->URI,
1972 (char *) entity->ExternalID, ctxt));
1973 case XML_INTERNAL_GENERAL_ENTITY:
1974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975 ctxt->sax->error(ctxt->userData,
1976 "Internal entity %s without content !\n", entity->name);
1977 break;
1978 case XML_INTERNAL_PARAMETER_ENTITY:
1979 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981 ctxt->sax->error(ctxt->userData,
1982 "Internal parameter entity %s without content !\n", entity->name);
1983 break;
1984 case XML_INTERNAL_PREDEFINED_ENTITY:
1985 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "Predefined entity %s without content !\n", entity->name);
1989 break;
1990 }
1991 return(NULL);
1992 }
1993 input = xmlNewInputStream(ctxt);
1994 if (input == NULL) {
1995 return(NULL);
1996 }
1997 input->filename = (char *) entity->URI;
1998 input->base = entity->content;
1999 input->cur = entity->content;
2000 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002001 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002002 return(input);
2003}
2004
2005/**
2006 * xmlNewStringInputStream:
2007 * @ctxt: an XML parser context
2008 * @buffer: an memory buffer
2009 *
2010 * Create a new input stream based on a memory buffer.
2011 * Returns the new input stream
2012 */
2013xmlParserInputPtr
2014xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2015 xmlParserInputPtr input;
2016
2017 if (buffer == NULL) {
2018 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData,
2021 "internal: xmlNewStringInputStream string = NULL\n");
2022 return(NULL);
2023 }
2024 if (xmlParserDebugEntities)
2025 xmlGenericError(xmlGenericErrorContext,
2026 "new fixed input: %.30s\n", buffer);
2027 input = xmlNewInputStream(ctxt);
2028 if (input == NULL) {
2029 return(NULL);
2030 }
2031 input->base = buffer;
2032 input->cur = buffer;
2033 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002034 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002035 return(input);
2036}
2037
2038/**
2039 * xmlNewInputFromFile:
2040 * @ctxt: an XML parser context
2041 * @filename: the filename to use as entity
2042 *
2043 * Create a new input stream based on a file.
2044 *
2045 * Returns the new input stream or NULL in case of error
2046 */
2047xmlParserInputPtr
2048xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2049 xmlParserInputBufferPtr buf;
2050 xmlParserInputPtr inputStream;
2051 char *directory = NULL;
2052 xmlChar *URI = NULL;
2053
2054 if (xmlParserDebugEntities)
2055 xmlGenericError(xmlGenericErrorContext,
2056 "new input from file: %s\n", filename);
2057 if (ctxt == NULL) return(NULL);
2058 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2059 if (buf == NULL)
2060 return(NULL);
2061
2062 URI = xmlStrdup((xmlChar *) filename);
2063 directory = xmlParserGetDirectory((const char *) URI);
2064
2065 inputStream = xmlNewInputStream(ctxt);
2066 if (inputStream == NULL) {
2067 if (directory != NULL) xmlFree((char *) directory);
2068 if (URI != NULL) xmlFree((char *) URI);
2069 return(NULL);
2070 }
2071
2072 inputStream->filename = (const char *) URI;
2073 inputStream->directory = directory;
2074 inputStream->buf = buf;
2075
2076 inputStream->base = inputStream->buf->buffer->content;
2077 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002078 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002079 if ((ctxt->directory == NULL) && (directory != NULL))
2080 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2081 return(inputStream);
2082}
2083
2084/************************************************************************
2085 * *
2086 * Commodity functions to handle parser contexts *
2087 * *
2088 ************************************************************************/
2089
2090/**
2091 * xmlInitParserCtxt:
2092 * @ctxt: an XML parser context
2093 *
2094 * Initialize a parser context
2095 */
2096
2097void
2098xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2099{
2100 xmlSAXHandler *sax;
2101
2102 xmlDefaultSAXHandlerInit();
2103
2104 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2105 if (sax == NULL) {
2106 xmlGenericError(xmlGenericErrorContext,
2107 "xmlInitParserCtxt: out of memory\n");
2108 }
2109 else
2110 memset(sax, 0, sizeof(xmlSAXHandler));
2111
2112 /* Allocate the Input stack */
2113 ctxt->inputTab = (xmlParserInputPtr *)
2114 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2115 if (ctxt->inputTab == NULL) {
2116 xmlGenericError(xmlGenericErrorContext,
2117 "xmlInitParserCtxt: out of memory\n");
2118 ctxt->inputNr = 0;
2119 ctxt->inputMax = 0;
2120 ctxt->input = NULL;
2121 return;
2122 }
2123 ctxt->inputNr = 0;
2124 ctxt->inputMax = 5;
2125 ctxt->input = NULL;
2126
2127 ctxt->version = NULL;
2128 ctxt->encoding = NULL;
2129 ctxt->standalone = -1;
2130 ctxt->hasExternalSubset = 0;
2131 ctxt->hasPErefs = 0;
2132 ctxt->html = 0;
2133 ctxt->external = 0;
2134 ctxt->instate = XML_PARSER_START;
2135 ctxt->token = 0;
2136 ctxt->directory = NULL;
2137
2138 /* Allocate the Node stack */
2139 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2140 if (ctxt->nodeTab == NULL) {
2141 xmlGenericError(xmlGenericErrorContext,
2142 "xmlInitParserCtxt: out of memory\n");
2143 ctxt->nodeNr = 0;
2144 ctxt->nodeMax = 0;
2145 ctxt->node = NULL;
2146 ctxt->inputNr = 0;
2147 ctxt->inputMax = 0;
2148 ctxt->input = NULL;
2149 return;
2150 }
2151 ctxt->nodeNr = 0;
2152 ctxt->nodeMax = 10;
2153 ctxt->node = NULL;
2154
2155 /* Allocate the Name stack */
2156 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2157 if (ctxt->nameTab == NULL) {
2158 xmlGenericError(xmlGenericErrorContext,
2159 "xmlInitParserCtxt: out of memory\n");
2160 ctxt->nodeNr = 0;
2161 ctxt->nodeMax = 0;
2162 ctxt->node = NULL;
2163 ctxt->inputNr = 0;
2164 ctxt->inputMax = 0;
2165 ctxt->input = NULL;
2166 ctxt->nameNr = 0;
2167 ctxt->nameMax = 0;
2168 ctxt->name = NULL;
2169 return;
2170 }
2171 ctxt->nameNr = 0;
2172 ctxt->nameMax = 10;
2173 ctxt->name = NULL;
2174
2175 /* Allocate the space stack */
2176 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2177 if (ctxt->spaceTab == NULL) {
2178 xmlGenericError(xmlGenericErrorContext,
2179 "xmlInitParserCtxt: out of memory\n");
2180 ctxt->nodeNr = 0;
2181 ctxt->nodeMax = 0;
2182 ctxt->node = NULL;
2183 ctxt->inputNr = 0;
2184 ctxt->inputMax = 0;
2185 ctxt->input = NULL;
2186 ctxt->nameNr = 0;
2187 ctxt->nameMax = 0;
2188 ctxt->name = NULL;
2189 ctxt->spaceNr = 0;
2190 ctxt->spaceMax = 0;
2191 ctxt->space = NULL;
2192 return;
2193 }
2194 ctxt->spaceNr = 1;
2195 ctxt->spaceMax = 10;
2196 ctxt->spaceTab[0] = -1;
2197 ctxt->space = &ctxt->spaceTab[0];
2198
Daniel Veillard14be0a12001-03-03 18:50:55 +00002199 ctxt->sax = sax;
2200 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202 ctxt->userData = ctxt;
2203 ctxt->myDoc = NULL;
2204 ctxt->wellFormed = 1;
2205 ctxt->valid = 1;
2206 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2207 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2208 ctxt->pedantic = xmlPedanticParserDefaultValue;
2209 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2210 ctxt->vctxt.userData = ctxt;
2211 if (ctxt->validate) {
2212 ctxt->vctxt.error = xmlParserValidityError;
2213 if (xmlGetWarningsDefaultValue == 0)
2214 ctxt->vctxt.warning = NULL;
2215 else
2216 ctxt->vctxt.warning = xmlParserValidityWarning;
2217 /* Allocate the Node stack */
2218 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2219 if (ctxt->vctxt.nodeTab == NULL) {
2220 xmlGenericError(xmlGenericErrorContext,
2221 "xmlInitParserCtxt: out of memory\n");
2222 ctxt->vctxt.nodeMax = 0;
2223 ctxt->validate = 0;
2224 ctxt->vctxt.error = NULL;
2225 ctxt->vctxt.warning = NULL;
2226 } else {
2227 ctxt->vctxt.nodeNr = 0;
2228 ctxt->vctxt.nodeMax = 4;
2229 ctxt->vctxt.node = NULL;
2230 }
2231 } else {
2232 ctxt->vctxt.error = NULL;
2233 ctxt->vctxt.warning = NULL;
2234 }
2235 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2236 ctxt->record_info = 0;
2237 ctxt->nbChars = 0;
2238 ctxt->checkIndex = 0;
2239 ctxt->inSubset = 0;
2240 ctxt->errNo = XML_ERR_OK;
2241 ctxt->depth = 0;
2242 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2243 xmlInitNodeInfoSeq(&ctxt->node_seq);
2244}
2245
2246/**
2247 * xmlFreeParserCtxt:
2248 * @ctxt: an XML parser context
2249 *
2250 * Free all the memory used by a parser context. However the parsed
2251 * document in ctxt->myDoc is not freed.
2252 */
2253
2254void
2255xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2256{
2257 xmlParserInputPtr input;
2258 xmlChar *oldname;
2259
2260 if (ctxt == NULL) return;
2261
2262 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2263 xmlFreeInputStream(input);
2264 }
2265 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2266 xmlFree(oldname);
2267 }
2268 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2269 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2270 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2271 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2272 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2273 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2274 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2275 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2276 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2277 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2278 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2279 xmlFree(ctxt->sax);
2280 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2281 xmlFree(ctxt);
2282}
2283
2284/**
2285 * xmlNewParserCtxt:
2286 *
2287 * Allocate and initialize a new parser context.
2288 *
2289 * Returns the xmlParserCtxtPtr or NULL
2290 */
2291
2292xmlParserCtxtPtr
2293xmlNewParserCtxt()
2294{
2295 xmlParserCtxtPtr ctxt;
2296
2297 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2298 if (ctxt == NULL) {
2299 xmlGenericError(xmlGenericErrorContext,
2300 "xmlNewParserCtxt : cannot allocate context\n");
2301 perror("malloc");
2302 return(NULL);
2303 }
2304 memset(ctxt, 0, sizeof(xmlParserCtxt));
2305 xmlInitParserCtxt(ctxt);
2306 return(ctxt);
2307}
2308
2309/************************************************************************
2310 * *
2311 * Handling of node informations *
2312 * *
2313 ************************************************************************/
2314
2315/**
2316 * xmlClearParserCtxt:
2317 * @ctxt: an XML parser context
2318 *
2319 * Clear (release owned resources) and reinitialize a parser context
2320 */
2321
2322void
2323xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2324{
2325 xmlClearNodeInfoSeq(&ctxt->node_seq);
2326 xmlInitParserCtxt(ctxt);
2327}
2328
2329/**
2330 * xmlParserFindNodeInfo:
2331 * @ctxt: an XML parser context
2332 * @node: an XML node within the tree
2333 *
2334 * Find the parser node info struct for a given node
2335 *
2336 * Returns an xmlParserNodeInfo block pointer or NULL
2337 */
2338const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2339 const xmlNode* node)
2340{
2341 unsigned long pos;
2342
2343 /* Find position where node should be at */
2344 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2345 if ( ctx->node_seq.buffer[pos].node == node )
2346 return &ctx->node_seq.buffer[pos];
2347 else
2348 return NULL;
2349}
2350
2351
2352/**
2353 * xmlInitNodeInfoSeq:
2354 * @seq: a node info sequence pointer
2355 *
2356 * -- Initialize (set to initial state) node info sequence
2357 */
2358void
2359xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2360{
2361 seq->length = 0;
2362 seq->maximum = 0;
2363 seq->buffer = NULL;
2364}
2365
2366/**
2367 * xmlClearNodeInfoSeq:
2368 * @seq: a node info sequence pointer
2369 *
2370 * -- Clear (release memory and reinitialize) node
2371 * info sequence
2372 */
2373void
2374xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2375{
2376 if ( seq->buffer != NULL )
2377 xmlFree(seq->buffer);
2378 xmlInitNodeInfoSeq(seq);
2379}
2380
2381
2382/**
2383 * xmlParserFindNodeInfoIndex:
2384 * @seq: a node info sequence pointer
2385 * @node: an XML node pointer
2386 *
2387 *
2388 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2389 * the given node is or should be at in a sorted sequence
2390 *
2391 * Returns a long indicating the position of the record
2392 */
2393unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2394 const xmlNode* node)
2395{
2396 unsigned long upper, lower, middle;
2397 int found = 0;
2398
2399 /* Do a binary search for the key */
2400 lower = 1;
2401 upper = seq->length;
2402 middle = 0;
2403 while ( lower <= upper && !found) {
2404 middle = lower + (upper - lower) / 2;
2405 if ( node == seq->buffer[middle - 1].node )
2406 found = 1;
2407 else if ( node < seq->buffer[middle - 1].node )
2408 upper = middle - 1;
2409 else
2410 lower = middle + 1;
2411 }
2412
2413 /* Return position */
2414 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2415 return middle;
2416 else
2417 return middle - 1;
2418}
2419
2420
2421/**
2422 * xmlParserAddNodeInfo:
2423 * @ctxt: an XML parser context
2424 * @info: a node info sequence pointer
2425 *
2426 * Insert node info record into the sorted sequence
2427 */
2428void
2429xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2430 const xmlParserNodeInfo* info)
2431{
2432 unsigned long pos;
2433 static unsigned int block_size = 5;
2434
2435 /* Find pos and check to see if node is already in the sequence */
2436 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2437 if ( pos < ctxt->node_seq.length
2438 && ctxt->node_seq.buffer[pos].node == info->node ) {
2439 ctxt->node_seq.buffer[pos] = *info;
2440 }
2441
2442 /* Otherwise, we need to add new node to buffer */
2443 else {
2444 /* Expand buffer by 5 if needed */
2445 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2446 xmlParserNodeInfo* tmp_buffer;
2447 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2448 *(ctxt->node_seq.maximum + block_size));
2449
2450 if ( ctxt->node_seq.buffer == NULL )
2451 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2452 else
2453 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2454
2455 if ( tmp_buffer == NULL ) {
2456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2457 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2458 ctxt->errNo = XML_ERR_NO_MEMORY;
2459 return;
2460 }
2461 ctxt->node_seq.buffer = tmp_buffer;
2462 ctxt->node_seq.maximum += block_size;
2463 }
2464
2465 /* If position is not at end, move elements out of the way */
2466 if ( pos != ctxt->node_seq.length ) {
2467 unsigned long i;
2468
2469 for ( i = ctxt->node_seq.length; i > pos; i-- )
2470 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2471 }
2472
2473 /* Copy element and increase length */
2474 ctxt->node_seq.buffer[pos] = *info;
2475 ctxt->node_seq.length++;
2476 }
2477}
2478
2479/************************************************************************
2480 * *
2481 * Deprecated functions kept for compatibility *
2482 * *
2483 ************************************************************************/
2484
2485/*
2486 * xmlCheckLanguageID
2487 * @lang: pointer to the string value
2488 *
2489 * Checks that the value conforms to the LanguageID production:
2490 *
2491 * NOTE: this is somewhat deprecated, those productions were removed from
2492 * the XML Second edition.
2493 *
2494 * [33] LanguageID ::= Langcode ('-' Subcode)*
2495 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2496 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2497 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2498 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2499 * [38] Subcode ::= ([a-z] | [A-Z])+
2500 *
2501 * Returns 1 if correct 0 otherwise
2502 **/
2503int
2504xmlCheckLanguageID(const xmlChar *lang) {
2505 const xmlChar *cur = lang;
2506
2507 if (cur == NULL)
2508 return(0);
2509 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2510 ((cur[0] == 'I') && (cur[1] == '-'))) {
2511 /*
2512 * IANA code
2513 */
2514 cur += 2;
2515 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2516 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2517 cur++;
2518 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2519 ((cur[0] == 'X') && (cur[1] == '-'))) {
2520 /*
2521 * User code
2522 */
2523 cur += 2;
2524 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2525 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2526 cur++;
2527 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2528 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2529 /*
2530 * ISO639
2531 */
2532 cur++;
2533 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2534 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2535 cur++;
2536 else
2537 return(0);
2538 } else
2539 return(0);
2540 while (cur[0] != 0) { /* non input consuming */
2541 if (cur[0] != '-')
2542 return(0);
2543 cur++;
2544 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2545 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2546 cur++;
2547 else
2548 return(0);
2549 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2550 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2551 cur++;
2552 }
2553 return(1);
2554}
2555
2556/**
2557 * xmlDecodeEntities:
2558 * @ctxt: the parser context
2559 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2560 * @len: the len to decode (in bytes !), -1 for no size limit
2561 * @end: an end marker xmlChar, 0 if none
2562 * @end2: an end marker xmlChar, 0 if none
2563 * @end3: an end marker xmlChar, 0 if none
2564 *
2565 * This function is deprecated, we now always process entities content
2566 * through xmlStringDecodeEntities
2567 *
2568 * TODO: remove it in next major release.
2569 *
2570 * [67] Reference ::= EntityRef | CharRef
2571 *
2572 * [69] PEReference ::= '%' Name ';'
2573 *
2574 * Returns A newly allocated string with the substitution done. The caller
2575 * must deallocate it !
2576 */
2577xmlChar *
2578xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2579 xmlChar end, xmlChar end2, xmlChar end3) {
2580#if 0
2581 xmlChar *buffer = NULL;
2582 unsigned int buffer_size = 0;
2583 unsigned int nbchars = 0;
2584
2585 xmlChar *current = NULL;
2586 xmlEntityPtr ent;
2587 unsigned int max = (unsigned int) len;
2588 int c,l;
2589#endif
2590
2591 static int deprecated = 0;
2592 if (!deprecated) {
2593 xmlGenericError(xmlGenericErrorContext,
2594 "xmlDecodeEntities() deprecated function reached\n");
2595 deprecated = 1;
2596 }
2597
2598#if 0
2599 if (ctxt->depth > 40) {
2600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2601 ctxt->sax->error(ctxt->userData,
2602 "Detected entity reference loop\n");
2603 ctxt->wellFormed = 0;
2604 ctxt->disableSAX = 1;
2605 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2606 return(NULL);
2607 }
2608
2609 /*
2610 * allocate a translation buffer.
2611 */
2612 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2613 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2614 if (buffer == NULL) {
2615 perror("xmlDecodeEntities: malloc failed");
2616 return(NULL);
2617 }
2618
2619 /*
2620 * Ok loop until we reach one of the ending char or a size limit.
2621 */
2622 GROW;
2623 c = CUR_CHAR(l);
2624 while ((nbchars < max) && (c != end) && /* NOTUSED */
2625 (c != end2) && (c != end3)) {
2626 GROW;
2627 if (c == 0) break;
2628 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2629 int val = xmlParseCharRef(ctxt);
2630 COPY_BUF(0,buffer,nbchars,val);
2631 NEXTL(l);
2632 } else if ((c == '&') && (ctxt->token != '&') &&
2633 (what & XML_SUBSTITUTE_REF)) {
2634 if (xmlParserDebugEntities)
2635 xmlGenericError(xmlGenericErrorContext,
2636 "decoding Entity Reference\n");
2637 ent = xmlParseEntityRef(ctxt);
2638 if ((ent != NULL) &&
2639 (ctxt->replaceEntities != 0)) {
2640 current = ent->content;
2641 while (*current != 0) { /* non input consuming loop */
2642 buffer[nbchars++] = *current++;
2643 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2644 growBuffer(buffer);
2645 }
2646 }
2647 } else if (ent != NULL) {
2648 const xmlChar *cur = ent->name;
2649
2650 buffer[nbchars++] = '&';
2651 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2652 growBuffer(buffer);
2653 }
2654 while (*cur != 0) { /* non input consuming loop */
2655 buffer[nbchars++] = *cur++;
2656 }
2657 buffer[nbchars++] = ';';
2658 }
2659 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2660 /*
2661 * a PEReference induce to switch the entity flow,
2662 * we break here to flush the current set of chars
2663 * parsed if any. We will be called back later.
2664 */
2665 if (xmlParserDebugEntities)
2666 xmlGenericError(xmlGenericErrorContext,
2667 "decoding PE Reference\n");
2668 if (nbchars != 0) break;
2669
2670 xmlParsePEReference(ctxt);
2671
2672 /*
2673 * Pop-up of finished entities.
2674 */
2675 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2676 xmlPopInput(ctxt);
2677
2678 break;
2679 } else {
2680 COPY_BUF(l,buffer,nbchars,c);
2681 NEXTL(l);
2682 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2683 growBuffer(buffer);
2684 }
2685 }
2686 c = CUR_CHAR(l);
2687 }
2688 buffer[nbchars++] = 0;
2689 return(buffer);
2690#endif
2691 return(NULL);
2692}
2693
2694/**
2695 * xmlNamespaceParseNCName:
2696 * @ctxt: an XML parser context
2697 *
2698 * parse an XML namespace name.
2699 *
2700 * TODO: this seems not in use anymore, the namespace handling is done on
2701 * top of the SAX interfaces, i.e. not on raw input.
2702 *
2703 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2704 *
2705 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2706 * CombiningChar | Extender
2707 *
2708 * Returns the namespace name or NULL
2709 */
2710
2711xmlChar *
2712xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2713#if 0
2714 xmlChar buf[XML_MAX_NAMELEN + 5];
2715 int len = 0, l;
2716 int cur = CUR_CHAR(l);
2717#endif
2718
2719 static int deprecated = 0;
2720 if (!deprecated) {
2721 xmlGenericError(xmlGenericErrorContext,
2722 "xmlNamespaceParseNCName() deprecated function reached\n");
2723 deprecated = 1;
2724 }
2725
2726#if 0
2727 /* load first the value of the char !!! */
2728 GROW;
2729 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2730
2731xmlGenericError(xmlGenericErrorContext,
2732 "xmlNamespaceParseNCName: reached loop 3\n");
2733 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2734 (cur == '.') || (cur == '-') ||
2735 (cur == '_') ||
2736 (IS_COMBINING(cur)) ||
2737 (IS_EXTENDER(cur))) {
2738 COPY_BUF(l,buf,len,cur);
2739 NEXTL(l);
2740 cur = CUR_CHAR(l);
2741 if (len >= XML_MAX_NAMELEN) {
2742 xmlGenericError(xmlGenericErrorContext,
2743 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2744 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2745 (cur == '.') || (cur == '-') ||
2746 (cur == '_') ||
2747 (IS_COMBINING(cur)) ||
2748 (IS_EXTENDER(cur))) {
2749 NEXTL(l);
2750 cur = CUR_CHAR(l);
2751 }
2752 break;
2753 }
2754 }
2755 return(xmlStrndup(buf, len));
2756#endif
2757 return(NULL);
2758}
2759
2760/**
2761 * xmlNamespaceParseQName:
2762 * @ctxt: an XML parser context
2763 * @prefix: a xmlChar **
2764 *
2765 * TODO: this seems not in use anymore, the namespace handling is done on
2766 * top of the SAX interfaces, i.e. not on raw input.
2767 *
2768 * parse an XML qualified name
2769 *
2770 * [NS 5] QName ::= (Prefix ':')? LocalPart
2771 *
2772 * [NS 6] Prefix ::= NCName
2773 *
2774 * [NS 7] LocalPart ::= NCName
2775 *
2776 * Returns the local part, and prefix is updated
2777 * to get the Prefix if any.
2778 */
2779
2780xmlChar *
2781xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2782
2783 static int deprecated = 0;
2784 if (!deprecated) {
2785 xmlGenericError(xmlGenericErrorContext,
2786 "xmlNamespaceParseQName() deprecated function reached\n");
2787 deprecated = 1;
2788 }
2789
2790#if 0
2791 xmlChar *ret = NULL;
2792
2793 *prefix = NULL;
2794 ret = xmlNamespaceParseNCName(ctxt);
2795 if (RAW == ':') {
2796 *prefix = ret;
2797 NEXT;
2798 ret = xmlNamespaceParseNCName(ctxt);
2799 }
2800
2801 return(ret);
2802#endif
2803 return(NULL);
2804}
2805
2806/**
2807 * xmlNamespaceParseNSDef:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse a namespace prefix declaration
2811 *
2812 * TODO: this seems not in use anymore, the namespace handling is done on
2813 * top of the SAX interfaces, i.e. not on raw input.
2814 *
2815 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2816 *
2817 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2818 *
2819 * Returns the namespace name
2820 */
2821
2822xmlChar *
2823xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2824 static int deprecated = 0;
2825 if (!deprecated) {
2826 xmlGenericError(xmlGenericErrorContext,
2827 "xmlNamespaceParseNSDef() deprecated function reached\n");
2828 deprecated = 1;
2829 }
2830 return(NULL);
2831#if 0
2832 xmlChar *name = NULL;
2833
2834 if ((RAW == 'x') && (NXT(1) == 'm') &&
2835 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2836 (NXT(4) == 's')) {
2837 SKIP(5);
2838 if (RAW == ':') {
2839 NEXT;
2840 name = xmlNamespaceParseNCName(ctxt);
2841 }
2842 }
2843 return(name);
2844#endif
2845}
2846
2847/**
2848 * xmlParseQuotedString:
2849 * @ctxt: an XML parser context
2850 *
2851 * Parse and return a string between quotes or doublequotes
2852 *
2853 * TODO: Deprecated, to be removed at next drop of binary compatibility
2854 *
2855 * Returns the string parser or NULL.
2856 */
2857xmlChar *
2858xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2859 static int deprecated = 0;
2860 if (!deprecated) {
2861 xmlGenericError(xmlGenericErrorContext,
2862 "xmlParseQuotedString() deprecated function reached\n");
2863 deprecated = 1;
2864 }
2865 return(NULL);
2866
2867#if 0
2868 xmlChar *buf = NULL;
2869 int len = 0,l;
2870 int size = XML_PARSER_BUFFER_SIZE;
2871 int c;
2872
2873 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2874 if (buf == NULL) {
2875 xmlGenericError(xmlGenericErrorContext,
2876 "malloc of %d byte failed\n", size);
2877 return(NULL);
2878 }
2879xmlGenericError(xmlGenericErrorContext,
2880 "xmlParseQuotedString: reached loop 4\n");
2881 if (RAW == '"') {
2882 NEXT;
2883 c = CUR_CHAR(l);
2884 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2885 if (len + 5 >= size) {
2886 size *= 2;
2887 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2888 if (buf == NULL) {
2889 xmlGenericError(xmlGenericErrorContext,
2890 "realloc of %d byte failed\n", size);
2891 return(NULL);
2892 }
2893 }
2894 COPY_BUF(l,buf,len,c);
2895 NEXTL(l);
2896 c = CUR_CHAR(l);
2897 }
2898 if (c != '"') {
2899 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2901 ctxt->sax->error(ctxt->userData,
2902 "String not closed \"%.50s\"\n", buf);
2903 ctxt->wellFormed = 0;
2904 ctxt->disableSAX = 1;
2905 } else {
2906 NEXT;
2907 }
2908 } else if (RAW == '\''){
2909 NEXT;
2910 c = CUR;
2911 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2912 if (len + 1 >= size) {
2913 size *= 2;
2914 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2915 if (buf == NULL) {
2916 xmlGenericError(xmlGenericErrorContext,
2917 "realloc of %d byte failed\n", size);
2918 return(NULL);
2919 }
2920 }
2921 buf[len++] = c;
2922 NEXT;
2923 c = CUR;
2924 }
2925 if (RAW != '\'') {
2926 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928 ctxt->sax->error(ctxt->userData,
2929 "String not closed \"%.50s\"\n", buf);
2930 ctxt->wellFormed = 0;
2931 ctxt->disableSAX = 1;
2932 } else {
2933 NEXT;
2934 }
2935 }
2936 return(buf);
2937#endif
2938}
2939
2940/**
2941 * xmlParseNamespace:
2942 * @ctxt: an XML parser context
2943 *
2944 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2945 *
2946 * This is what the older xml-name Working Draft specified, a bunch of
2947 * other stuff may still rely on it, so support is still here as
2948 * if it was declared on the root of the Tree:-(
2949 *
2950 * TODO: remove from library
2951 *
2952 * To be removed at next drop of binary compatibility
2953 */
2954
2955void
2956xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2957 static int deprecated = 0;
2958 if (!deprecated) {
2959 xmlGenericError(xmlGenericErrorContext,
2960 "xmlParseNamespace() deprecated function reached\n");
2961 deprecated = 1;
2962 }
2963
2964#if 0
2965 xmlChar *href = NULL;
2966 xmlChar *prefix = NULL;
2967 int garbage = 0;
2968
2969 /*
2970 * We just skipped "namespace" or "xml:namespace"
2971 */
2972 SKIP_BLANKS;
2973
2974xmlGenericError(xmlGenericErrorContext,
2975 "xmlParseNamespace: reached loop 5\n");
2976 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2977 /*
2978 * We can have "ns" or "prefix" attributes
2979 * Old encoding as 'href' or 'AS' attributes is still supported
2980 */
2981 if ((RAW == 'n') && (NXT(1) == 's')) {
2982 garbage = 0;
2983 SKIP(2);
2984 SKIP_BLANKS;
2985
2986 if (RAW != '=') continue;
2987 NEXT;
2988 SKIP_BLANKS;
2989
2990 href = xmlParseQuotedString(ctxt);
2991 SKIP_BLANKS;
2992 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2993 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2994 garbage = 0;
2995 SKIP(4);
2996 SKIP_BLANKS;
2997
2998 if (RAW != '=') continue;
2999 NEXT;
3000 SKIP_BLANKS;
3001
3002 href = xmlParseQuotedString(ctxt);
3003 SKIP_BLANKS;
3004 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3005 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3006 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3007 garbage = 0;
3008 SKIP(6);
3009 SKIP_BLANKS;
3010
3011 if (RAW != '=') continue;
3012 NEXT;
3013 SKIP_BLANKS;
3014
3015 prefix = xmlParseQuotedString(ctxt);
3016 SKIP_BLANKS;
3017 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3018 garbage = 0;
3019 SKIP(2);
3020 SKIP_BLANKS;
3021
3022 if (RAW != '=') continue;
3023 NEXT;
3024 SKIP_BLANKS;
3025
3026 prefix = xmlParseQuotedString(ctxt);
3027 SKIP_BLANKS;
3028 } else if ((RAW == '?') && (NXT(1) == '>')) {
3029 garbage = 0;
3030 NEXT;
3031 } else {
3032 /*
3033 * Found garbage when parsing the namespace
3034 */
3035 if (!garbage) {
3036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3037 ctxt->sax->error(ctxt->userData,
3038 "xmlParseNamespace found garbage\n");
3039 }
3040 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3041 ctxt->wellFormed = 0;
3042 ctxt->disableSAX = 1;
3043 NEXT;
3044 }
3045 }
3046
3047 MOVETO_ENDTAG(CUR_PTR);
3048 NEXT;
3049
3050 /*
3051 * Register the DTD.
3052 if (href != NULL)
3053 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3054 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3055 */
3056
3057 if (prefix != NULL) xmlFree(prefix);
3058 if (href != NULL) xmlFree(href);
3059#endif
3060}
3061
3062/**
3063 * xmlScanName:
3064 * @ctxt: an XML parser context
3065 *
3066 * Trickery: parse an XML name but without consuming the input flow
3067 * Needed for rollback cases. Used only when parsing entities references.
3068 *
3069 * TODO: seems deprecated now, only used in the default part of
3070 * xmlParserHandleReference
3071 *
3072 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3073 * CombiningChar | Extender
3074 *
3075 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3076 *
3077 * [6] Names ::= Name (S Name)*
3078 *
3079 * Returns the Name parsed or NULL
3080 */
3081
3082xmlChar *
3083xmlScanName(xmlParserCtxtPtr ctxt) {
3084 static int deprecated = 0;
3085 if (!deprecated) {
3086 xmlGenericError(xmlGenericErrorContext,
3087 "xmlScanName() deprecated function reached\n");
3088 deprecated = 1;
3089 }
3090 return(NULL);
3091
3092#if 0
3093 xmlChar buf[XML_MAX_NAMELEN];
3094 int len = 0;
3095
3096 GROW;
3097 if (!IS_LETTER(RAW) && (RAW != '_') &&
3098 (RAW != ':')) {
3099 return(NULL);
3100 }
3101
3102
3103 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3104 (NXT(len) == '.') || (NXT(len) == '-') ||
3105 (NXT(len) == '_') || (NXT(len) == ':') ||
3106 (IS_COMBINING(NXT(len))) ||
3107 (IS_EXTENDER(NXT(len)))) {
3108 GROW;
3109 buf[len] = NXT(len);
3110 len++;
3111 if (len >= XML_MAX_NAMELEN) {
3112 xmlGenericError(xmlGenericErrorContext,
3113 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3114 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3115 (IS_DIGIT(NXT(len))) ||
3116 (NXT(len) == '.') || (NXT(len) == '-') ||
3117 (NXT(len) == '_') || (NXT(len) == ':') ||
3118 (IS_COMBINING(NXT(len))) ||
3119 (IS_EXTENDER(NXT(len))))
3120 len++;
3121 break;
3122 }
3123 }
3124 return(xmlStrndup(buf, len));
3125#endif
3126}
3127
3128/**
3129 * xmlParserHandleReference:
3130 * @ctxt: the parser context
3131 *
3132 * TODO: Remove, now deprecated ... the test is done directly in the
3133 * content parsing
3134 * routines.
3135 *
3136 * [67] Reference ::= EntityRef | CharRef
3137 *
3138 * [68] EntityRef ::= '&' Name ';'
3139 *
3140 * [ WFC: Entity Declared ]
3141 * the Name given in the entity reference must match that in an entity
3142 * declaration, except that well-formed documents need not declare any
3143 * of the following entities: amp, lt, gt, apos, quot.
3144 *
3145 * [ WFC: Parsed Entity ]
3146 * An entity reference must not contain the name of an unparsed entity
3147 *
3148 * [66] CharRef ::= '&#' [0-9]+ ';' |
3149 * '&#x' [0-9a-fA-F]+ ';'
3150 *
3151 * A PEReference may have been detectect in the current input stream
3152 * the handling is done accordingly to
3153 * http://www.w3.org/TR/REC-xml#entproc
3154 */
3155void
3156xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3157 static int deprecated = 0;
3158 if (!deprecated) {
3159 xmlGenericError(xmlGenericErrorContext,
3160 "xmlParserHandleReference() deprecated function reached\n");
3161 deprecated = 1;
3162 }
3163
3164#if 0
3165 xmlParserInputPtr input;
3166 xmlChar *name;
3167 xmlEntityPtr ent = NULL;
3168
3169 if (ctxt->token != 0) {
3170 return;
3171 }
3172 if (RAW != '&') return;
3173 GROW;
3174 if ((RAW == '&') && (NXT(1) == '#')) {
3175 switch(ctxt->instate) {
3176 case XML_PARSER_ENTITY_DECL:
3177 case XML_PARSER_PI:
3178 case XML_PARSER_CDATA_SECTION:
3179 case XML_PARSER_COMMENT:
3180 case XML_PARSER_SYSTEM_LITERAL:
3181 /* we just ignore it there */
3182 return;
3183 case XML_PARSER_START_TAG:
3184 return;
3185 case XML_PARSER_END_TAG:
3186 return;
3187 case XML_PARSER_EOF:
3188 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3190 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3191 ctxt->wellFormed = 0;
3192 ctxt->disableSAX = 1;
3193 return;
3194 case XML_PARSER_PROLOG:
3195 case XML_PARSER_START:
3196 case XML_PARSER_MISC:
3197 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3199 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3200 ctxt->wellFormed = 0;
3201 ctxt->disableSAX = 1;
3202 return;
3203 case XML_PARSER_EPILOG:
3204 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3207 ctxt->wellFormed = 0;
3208 ctxt->disableSAX = 1;
3209 return;
3210 case XML_PARSER_DTD:
3211 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3213 ctxt->sax->error(ctxt->userData,
3214 "CharRef are forbiden in DTDs!\n");
3215 ctxt->wellFormed = 0;
3216 ctxt->disableSAX = 1;
3217 return;
3218 case XML_PARSER_ENTITY_VALUE:
3219 /*
3220 * NOTE: in the case of entity values, we don't do the
3221 * substitution here since we need the literal
3222 * entity value to be able to save the internal
3223 * subset of the document.
3224 * This will be handled by xmlStringDecodeEntities
3225 */
3226 return;
3227 case XML_PARSER_CONTENT:
3228 return;
3229 case XML_PARSER_ATTRIBUTE_VALUE:
3230 /* ctxt->token = xmlParseCharRef(ctxt); */
3231 return;
3232 case XML_PARSER_IGNORE:
3233 return;
3234 }
3235 return;
3236 }
3237
3238 switch(ctxt->instate) {
3239 case XML_PARSER_CDATA_SECTION:
3240 return;
3241 case XML_PARSER_PI:
3242 case XML_PARSER_COMMENT:
3243 case XML_PARSER_SYSTEM_LITERAL:
3244 case XML_PARSER_CONTENT:
3245 return;
3246 case XML_PARSER_START_TAG:
3247 return;
3248 case XML_PARSER_END_TAG:
3249 return;
3250 case XML_PARSER_EOF:
3251 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3254 ctxt->wellFormed = 0;
3255 ctxt->disableSAX = 1;
3256 return;
3257 case XML_PARSER_PROLOG:
3258 case XML_PARSER_START:
3259 case XML_PARSER_MISC:
3260 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3262 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3263 ctxt->wellFormed = 0;
3264 ctxt->disableSAX = 1;
3265 return;
3266 case XML_PARSER_EPILOG:
3267 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3269 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3270 ctxt->wellFormed = 0;
3271 ctxt->disableSAX = 1;
3272 return;
3273 case XML_PARSER_ENTITY_VALUE:
3274 /*
3275 * NOTE: in the case of entity values, we don't do the
3276 * substitution here since we need the literal
3277 * entity value to be able to save the internal
3278 * subset of the document.
3279 * This will be handled by xmlStringDecodeEntities
3280 */
3281 return;
3282 case XML_PARSER_ATTRIBUTE_VALUE:
3283 /*
3284 * NOTE: in the case of attributes values, we don't do the
3285 * substitution here unless we are in a mode where
3286 * the parser is explicitely asked to substitute
3287 * entities. The SAX callback is called with values
3288 * without entity substitution.
3289 * This will then be handled by xmlStringDecodeEntities
3290 */
3291 return;
3292 case XML_PARSER_ENTITY_DECL:
3293 /*
3294 * we just ignore it there
3295 * the substitution will be done once the entity is referenced
3296 */
3297 return;
3298 case XML_PARSER_DTD:
3299 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "Entity references are forbiden in DTDs!\n");
3303 ctxt->wellFormed = 0;
3304 ctxt->disableSAX = 1;
3305 return;
3306 case XML_PARSER_IGNORE:
3307 return;
3308 }
3309
3310/* TODO: this seems not reached anymore .... Verify ... */
3311xmlGenericError(xmlGenericErrorContext,
3312 "Reached deprecated section in xmlParserHandleReference()\n");
3313xmlGenericError(xmlGenericErrorContext,
3314 "Please forward the document to Daniel.Veillard@w3.org\n");
3315xmlGenericError(xmlGenericErrorContext,
3316 "indicating the version: %s, thanks !\n", xmlParserVersion);
3317 NEXT;
3318 name = xmlScanName(ctxt);
3319 if (name == NULL) {
3320 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3322 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3323 ctxt->wellFormed = 0;
3324 ctxt->disableSAX = 1;
3325 ctxt->token = '&';
3326 return;
3327 }
3328 if (NXT(xmlStrlen(name)) != ';') {
3329 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData,
3332 "Entity reference: ';' expected\n");
3333 ctxt->wellFormed = 0;
3334 ctxt->disableSAX = 1;
3335 ctxt->token = '&';
3336 xmlFree(name);
3337 return;
3338 }
3339 SKIP(xmlStrlen(name) + 1);
3340 if (ctxt->sax != NULL) {
3341 if (ctxt->sax->getEntity != NULL)
3342 ent = ctxt->sax->getEntity(ctxt->userData, name);
3343 }
3344
3345 /*
3346 * [ WFC: Entity Declared ]
3347 * the Name given in the entity reference must match that in an entity
3348 * declaration, except that well-formed documents need not declare any
3349 * of the following entities: amp, lt, gt, apos, quot.
3350 */
3351 if (ent == NULL)
3352 ent = xmlGetPredefinedEntity(name);
3353 if (ent == NULL) {
3354 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356 ctxt->sax->error(ctxt->userData,
3357 "Entity reference: entity %s not declared\n",
3358 name);
3359 ctxt->wellFormed = 0;
3360 ctxt->disableSAX = 1;
3361 xmlFree(name);
3362 return;
3363 }
3364
3365 /*
3366 * [ WFC: Parsed Entity ]
3367 * An entity reference must not contain the name of an unparsed entity
3368 */
3369 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3370 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3372 ctxt->sax->error(ctxt->userData,
3373 "Entity reference to unparsed entity %s\n", name);
3374 ctxt->wellFormed = 0;
3375 ctxt->disableSAX = 1;
3376 }
3377
3378 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3379 ctxt->token = ent->content[0];
3380 xmlFree(name);
3381 return;
3382 }
3383 input = xmlNewEntityInputStream(ctxt, ent);
3384 xmlPushInput(ctxt, input);
3385 xmlFree(name);
3386#endif
3387 return;
3388}
3389
3390/**
3391 * xmlHandleEntity:
3392 * @ctxt: an XML parser context
3393 * @entity: an XML entity pointer.
3394 *
3395 * Default handling of defined entities, when should we define a new input
3396 * stream ? When do we just handle that as a set of chars ?
3397 *
3398 * OBSOLETE: to be removed at some point.
3399 */
3400
3401void
3402xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3403 static int deprecated = 0;
3404 if (!deprecated) {
3405 xmlGenericError(xmlGenericErrorContext,
3406 "xmlHandleEntity() deprecated function reached\n");
3407 deprecated = 1;
3408 }
3409
3410#if 0
3411 int len;
3412 xmlParserInputPtr input;
3413
3414 if (entity->content == NULL) {
3415 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3418 entity->name);
3419 ctxt->wellFormed = 0;
3420 ctxt->disableSAX = 1;
3421 return;
3422 }
3423 len = xmlStrlen(entity->content);
3424 if (len <= 2) goto handle_as_char;
3425
3426 /*
3427 * Redefine its content as an input stream.
3428 */
3429 input = xmlNewEntityInputStream(ctxt, entity);
3430 xmlPushInput(ctxt, input);
3431 return;
3432
3433handle_as_char:
3434 /*
3435 * Just handle the content as a set of chars.
3436 */
3437 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3438 (ctxt->sax->characters != NULL))
3439 ctxt->sax->characters(ctxt->userData, entity->content, len);
3440#endif
3441}
3442
3443/**
3444 * xmlNewGlobalNs:
3445 * @doc: the document carrying the namespace
3446 * @href: the URI associated
3447 * @prefix: the prefix for the namespace
3448 *
3449 * Creation of a Namespace, the old way using PI and without scoping
3450 * DEPRECATED !!!
3451 * It now create a namespace on the root element of the document if found.
3452 * Returns NULL this functionnality had been removed
3453 */
3454xmlNsPtr
3455xmlNewGlobalNs(xmlDocPtr doc, const xmlChar *href, const xmlChar *prefix) {
3456 static int deprecated = 0;
3457 if (!deprecated) {
3458 xmlGenericError(xmlGenericErrorContext,
3459 "xmlNewGlobalNs() deprecated function reached\n");
3460 deprecated = 1;
3461 }
3462 return(NULL);
3463#if 0
3464 xmlNodePtr root;
3465
3466 xmlNsPtr cur;
3467
3468 root = xmlDocGetRootElement(doc);
3469 if (root != NULL)
3470 return(xmlNewNs(root, href, prefix));
3471
3472 /*
3473 * if there is no root element yet, create an old Namespace type
3474 * and it will be moved to the root at save time.
3475 */
3476 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3477 if (cur == NULL) {
3478 xmlGenericError(xmlGenericErrorContext,
3479 "xmlNewGlobalNs : malloc failed\n");
3480 return(NULL);
3481 }
3482 memset(cur, 0, sizeof(xmlNs));
3483 cur->type = XML_GLOBAL_NAMESPACE;
3484
3485 if (href != NULL)
3486 cur->href = xmlStrdup(href);
3487 if (prefix != NULL)
3488 cur->prefix = xmlStrdup(prefix);
3489
3490 /*
3491 * Add it at the end to preserve parsing order ...
3492 */
3493 if (doc != NULL) {
3494 if (doc->oldNs == NULL) {
3495 doc->oldNs = cur;
3496 } else {
3497 xmlNsPtr prev = doc->oldNs;
3498
3499 while (prev->next != NULL) prev = prev->next;
3500 prev->next = cur;
3501 }
3502 }
3503
3504 return(NULL);
3505#endif
3506}
3507
3508/**
3509 * xmlUpgradeOldNs:
3510 * @doc: a document pointer
3511 *
3512 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3513 * DEPRECATED
3514 */
3515void
3516xmlUpgradeOldNs(xmlDocPtr doc) {
3517 static int deprecated = 0;
3518 if (!deprecated) {
3519 xmlGenericError(xmlGenericErrorContext,
3520 "xmlNewGlobalNs() deprecated function reached\n");
3521 deprecated = 1;
3522 }
3523#if 0
3524 xmlNsPtr cur;
3525
3526 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3527 if (doc->children == NULL) {
3528#ifdef DEBUG_TREE
3529 xmlGenericError(xmlGenericErrorContext,
3530 "xmlUpgradeOldNs: failed no root !\n");
3531#endif
3532 return;
3533 }
3534
3535 cur = doc->oldNs;
3536 while (cur->next != NULL) {
3537 cur->type = XML_LOCAL_NAMESPACE;
3538 cur = cur->next;
3539 }
3540 cur->type = XML_LOCAL_NAMESPACE;
3541 cur->next = doc->children->nsDef;
3542 doc->children->nsDef = doc->oldNs;
3543 doc->oldNs = NULL;
3544#endif
3545}
3546