blob: e271e3882030b2d147bd9c6be2cc03c16dfa2c4a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50
51
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
70 if ((myversion / 10000) != (version / 10000)) {
71 xmlGenericError(xmlGenericErrorContext,
72 "Fatal: program compiled against libxml %d using libxml %d\n",
73 (version / 10000), (myversion / 10000));
74 exit(1);
75 }
76 if ((myversion / 100) < (version / 100)) {
77 xmlGenericError(xmlGenericErrorContext,
78 "Warning: program compiled against libxml %d using older %d\n",
79 (version / 100), (myversion / 100));
80 }
81}
82
83
84const char *xmlFeaturesList[] = {
85 "validate",
86 "load subset",
87 "keep blanks",
88 "disable SAX",
89 "fetch external entities",
90 "substitute entities",
91 "gather line info",
92 "user data",
93 "is html",
94 "is standalone",
95 "stop parser",
96 "document",
97 "is well formed",
98 "is valid",
99 "SAX block",
100 "SAX function internalSubset",
101 "SAX function isStandalone",
102 "SAX function hasInternalSubset",
103 "SAX function hasExternalSubset",
104 "SAX function resolveEntity",
105 "SAX function getEntity",
106 "SAX function entityDecl",
107 "SAX function notationDecl",
108 "SAX function attributeDecl",
109 "SAX function elementDecl",
110 "SAX function unparsedEntityDecl",
111 "SAX function setDocumentLocator",
112 "SAX function startDocument",
113 "SAX function endDocument",
114 "SAX function startElement",
115 "SAX function endElement",
116 "SAX function reference",
117 "SAX function characters",
118 "SAX function ignorableWhitespace",
119 "SAX function processingInstruction",
120 "SAX function comment",
121 "SAX function warning",
122 "SAX function error",
123 "SAX function fatalError",
124 "SAX function getParameterEntity",
125 "SAX function cdataBlock",
126 "SAX function externalSubset",
127};
128
129/*
130 * xmlGetFeaturesList:
131 * @len: the length of the features name array (input/output)
132 * @result: an array of string to be filled with the features name.
133 *
134 * Copy at most *@len feature names into the @result array
135 *
136 * Returns -1 in case or error, or the total number of features,
137 * len is updated with the number of strings copied,
138 * strings must not be deallocated
139 */
140int
141xmlGetFeaturesList(int *len, const char **result) {
142 int ret, i;
143
144 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
145 if ((len == NULL) || (result == NULL))
146 return(ret);
147 if ((*len < 0) || (*len >= 1000))
148 return(-1);
149 if (*len > ret)
150 *len = ret;
151 for (i = 0;i < *len;i++)
152 result[i] = xmlFeaturesList[i];
153 return(ret);
154}
155
156/*
157 * xmlGetFeature:
158 * @ctxt: an XML/HTML parser context
159 * @name: the feature name
160 * @result: location to store the result
161 *
162 * Read the current value of one feature of this parser instance
163 *
164 * Returns -1 in case or error, 0 otherwise
165 */
166int
167xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
168 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
169 return(-1);
170
171 if (!strcmp(name, "validate")) {
172 *((int *) result) = ctxt->validate;
173 } else if (!strcmp(name, "keep blanks")) {
174 *((int *) result) = ctxt->keepBlanks;
175 } else if (!strcmp(name, "disable SAX")) {
176 *((int *) result) = ctxt->disableSAX;
177 } else if (!strcmp(name, "fetch external entities")) {
178 *((int *) result) = ctxt->loadsubset;
179 } else if (!strcmp(name, "substitute entities")) {
180 *((int *) result) = ctxt->replaceEntities;
181 } else if (!strcmp(name, "gather line info")) {
182 *((int *) result) = ctxt->record_info;
183 } else if (!strcmp(name, "user data")) {
184 *((void **)result) = ctxt->userData;
185 } else if (!strcmp(name, "is html")) {
186 *((int *) result) = ctxt->html;
187 } else if (!strcmp(name, "is standalone")) {
188 *((int *) result) = ctxt->standalone;
189 } else if (!strcmp(name, "document")) {
190 *((xmlDocPtr *) result) = ctxt->myDoc;
191 } else if (!strcmp(name, "is well formed")) {
192 *((int *) result) = ctxt->wellFormed;
193 } else if (!strcmp(name, "is valid")) {
194 *((int *) result) = ctxt->valid;
195 } else if (!strcmp(name, "SAX block")) {
196 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
197 } else if (!strcmp(name, "SAX function internalSubset")) {
198 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
199 } else if (!strcmp(name, "SAX function isStandalone")) {
200 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
201 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
202 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
203 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
204 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
205 } else if (!strcmp(name, "SAX function resolveEntity")) {
206 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
207 } else if (!strcmp(name, "SAX function getEntity")) {
208 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
209 } else if (!strcmp(name, "SAX function entityDecl")) {
210 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
211 } else if (!strcmp(name, "SAX function notationDecl")) {
212 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
213 } else if (!strcmp(name, "SAX function attributeDecl")) {
214 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
215 } else if (!strcmp(name, "SAX function elementDecl")) {
216 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
217 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
218 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
219 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
220 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
221 } else if (!strcmp(name, "SAX function startDocument")) {
222 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
223 } else if (!strcmp(name, "SAX function endDocument")) {
224 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
225 } else if (!strcmp(name, "SAX function startElement")) {
226 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
227 } else if (!strcmp(name, "SAX function endElement")) {
228 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
229 } else if (!strcmp(name, "SAX function reference")) {
230 *((referenceSAXFunc *) result) = ctxt->sax->reference;
231 } else if (!strcmp(name, "SAX function characters")) {
232 *((charactersSAXFunc *) result) = ctxt->sax->characters;
233 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
234 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
235 } else if (!strcmp(name, "SAX function processingInstruction")) {
236 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
237 } else if (!strcmp(name, "SAX function comment")) {
238 *((commentSAXFunc *) result) = ctxt->sax->comment;
239 } else if (!strcmp(name, "SAX function warning")) {
240 *((warningSAXFunc *) result) = ctxt->sax->warning;
241 } else if (!strcmp(name, "SAX function error")) {
242 *((errorSAXFunc *) result) = ctxt->sax->error;
243 } else if (!strcmp(name, "SAX function fatalError")) {
244 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
245 } else if (!strcmp(name, "SAX function getParameterEntity")) {
246 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
247 } else if (!strcmp(name, "SAX function cdataBlock")) {
248 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
249 } else if (!strcmp(name, "SAX function externalSubset")) {
250 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
251 } else {
252 return(-1);
253 }
254 return(0);
255}
256
257/*
258 * xmlSetFeature:
259 * @ctxt: an XML/HTML parser context
260 * @name: the feature name
261 * @value: pointer to the location of the new value
262 *
263 * Change the current value of one feature of this parser instance
264 *
265 * Returns -1 in case or error, 0 otherwise
266 */
267int
268xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
269 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
270 return(-1);
271
272 if (!strcmp(name, "validate")) {
273 int newvalidate = *((int *) value);
274 if ((!ctxt->validate) && (newvalidate != 0)) {
275 if (ctxt->vctxt.warning == NULL)
276 ctxt->vctxt.warning = xmlParserValidityWarning;
277 if (ctxt->vctxt.error == NULL)
278 ctxt->vctxt.error = xmlParserValidityError;
279 /* Allocate the Node stack */
280 ctxt->vctxt.nodeTab = (xmlNodePtr *)
281 xmlMalloc(4 * sizeof(xmlNodePtr));
282 if (ctxt->vctxt.nodeTab == NULL) {
283 ctxt->vctxt.nodeMax = 0;
284 ctxt->validate = 0;
285 return(-1);
286 }
287 ctxt->vctxt.nodeNr = 0;
288 ctxt->vctxt.nodeMax = 4;
289 ctxt->vctxt.node = NULL;
290 }
291 ctxt->validate = newvalidate;
292 } else if (!strcmp(name, "keep blanks")) {
293 ctxt->keepBlanks = *((int *) value);
294 } else if (!strcmp(name, "disable SAX")) {
295 ctxt->disableSAX = *((int *) value);
296 } else if (!strcmp(name, "fetch external entities")) {
297 ctxt->loadsubset = *((int *) value);
298 } else if (!strcmp(name, "substitute entities")) {
299 ctxt->replaceEntities = *((int *) value);
300 } else if (!strcmp(name, "gather line info")) {
301 ctxt->record_info = *((int *) value);
302 } else if (!strcmp(name, "user data")) {
303 ctxt->userData = *((void **)value);
304 } else if (!strcmp(name, "is html")) {
305 ctxt->html = *((int *) value);
306 } else if (!strcmp(name, "is standalone")) {
307 ctxt->standalone = *((int *) value);
308 } else if (!strcmp(name, "document")) {
309 ctxt->myDoc = *((xmlDocPtr *) value);
310 } else if (!strcmp(name, "is well formed")) {
311 ctxt->wellFormed = *((int *) value);
312 } else if (!strcmp(name, "is valid")) {
313 ctxt->valid = *((int *) value);
314 } else if (!strcmp(name, "SAX block")) {
315 ctxt->sax = *((xmlSAXHandlerPtr *) value);
316 } else if (!strcmp(name, "SAX function internalSubset")) {
317 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function isStandalone")) {
319 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
321 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
323 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function resolveEntity")) {
325 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function getEntity")) {
327 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function entityDecl")) {
329 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function notationDecl")) {
331 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function attributeDecl")) {
333 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function elementDecl")) {
335 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
337 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
339 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function startDocument")) {
341 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function endDocument")) {
343 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function startElement")) {
345 ctxt->sax->startElement = *((startElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function endElement")) {
347 ctxt->sax->endElement = *((endElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function reference")) {
349 ctxt->sax->reference = *((referenceSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function characters")) {
351 ctxt->sax->characters = *((charactersSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
353 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function processingInstruction")) {
355 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function comment")) {
357 ctxt->sax->comment = *((commentSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function warning")) {
359 ctxt->sax->warning = *((warningSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function error")) {
361 ctxt->sax->error = *((errorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function fatalError")) {
363 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function getParameterEntity")) {
365 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
366 } else if (!strcmp(name, "SAX function cdataBlock")) {
367 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
368 } else if (!strcmp(name, "SAX function externalSubset")) {
369 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
370 } else {
371 return(-1);
372 }
373 return(0);
374}
375
376/************************************************************************
377 * *
378 * Some functions to avoid too large macros *
379 * *
380 ************************************************************************/
381
382/**
383 * xmlIsChar:
384 * @c: an unicode character (int)
385 *
386 * Check whether the character is allowed by the production
387 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
388 * | [#x10000-#x10FFFF]
389 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
390 * Also available as a macro IS_CHAR()
391 *
392 * Returns 0 if not, non-zero otherwise
393 */
394int
395xmlIsChar(int c) {
396 return(
397 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
398 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
399 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
400 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
401}
402
403/**
404 * xmlIsBlank:
405 * @c: an unicode character (int)
406 *
407 * Check whether the character is allowed by the production
408 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
409 * Also available as a macro IS_BLANK()
410 *
411 * Returns 0 if not, non-zero otherwise
412 */
413int
414xmlIsBlank(int c) {
415 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
416}
417
418/**
419 * xmlIsBaseChar:
420 * @c: an unicode character (int)
421 *
422 * Check whether the character is allowed by the production
423 * [85] BaseChar ::= ... long list see REC ...
424 *
425 * VI is your friend !
426 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
427 * and
428 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
429 *
430 * Returns 0 if not, non-zero otherwise
431 */
432static int xmlBaseArray[] = {
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
437 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
446 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
449};
450
451int
452xmlIsBaseChar(int c) {
453 return(
454 (((c) < 0x0100) ? xmlBaseArray[c] :
455 ( /* accelerator */
456 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
457 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
458 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
459 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
460 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
461 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
462 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
463 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
464 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
465 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
466 ((c) == 0x0386) ||
467 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
468 ((c) == 0x038C) ||
469 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
470 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
471 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
472 ((c) == 0x03DA) ||
473 ((c) == 0x03DC) ||
474 ((c) == 0x03DE) ||
475 ((c) == 0x03E0) ||
476 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
477 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
478 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
479 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
480 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
481 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
482 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
483 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
484 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
485 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
486 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
487 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
488 ((c) == 0x0559) ||
489 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
490 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
491 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
492 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
493 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
494 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
495 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
496 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
497 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
498 ((c) == 0x06D5) ||
499 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
500 (((c) >= 0x905) && ( /* accelerator */
501 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
502 ((c) == 0x093D) ||
503 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
504 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
505 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
506 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
507 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
508 ((c) == 0x09B2) ||
509 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
510 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
511 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
512 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
513 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
514 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
515 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
516 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
517 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
518 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
519 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
520 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
521 ((c) == 0x0A5E) ||
522 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
523 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
524 ((c) == 0x0A8D) ||
525 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
526 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
527 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
528 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
529 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
530 ((c) == 0x0ABD) ||
531 ((c) == 0x0AE0) ||
532 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
533 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
534 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
535 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
536 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
537 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
538 ((c) == 0x0B3D) ||
539 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
540 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
541 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
542 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
543 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
544 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
545 ((c) == 0x0B9C) ||
546 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
547 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
548 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
549 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
550 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
551 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
552 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
553 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
554 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
555 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
556 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
557 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
558 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
559 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
560 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
561 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
562 ((c) == 0x0CDE) ||
563 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
564 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
565 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
566 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
567 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
568 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
569 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
570 ((c) == 0x0E30) ||
571 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
572 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
573 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
574 ((c) == 0x0E84) ||
575 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
576 ((c) == 0x0E8A) ||
577 ((c) == 0x0E8D) ||
578 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
579 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
580 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
581 ((c) == 0x0EA5) ||
582 ((c) == 0x0EA7) ||
583 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
584 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
585 ((c) == 0x0EB0) ||
586 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
587 ((c) == 0x0EBD) ||
588 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
589 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
590 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
591 (((c) >= 0x10A0) && ( /* accelerator */
592 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
593 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
594 ((c) == 0x1100) ||
595 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
596 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
597 ((c) == 0x1109) ||
598 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
599 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
600 ((c) == 0x113C) ||
601 ((c) == 0x113E) ||
602 ((c) == 0x1140) ||
603 ((c) == 0x114C) ||
604 ((c) == 0x114E) ||
605 ((c) == 0x1150) ||
606 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
607 ((c) == 0x1159) ||
608 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
609 ((c) == 0x1163) ||
610 ((c) == 0x1165) ||
611 ((c) == 0x1167) ||
612 ((c) == 0x1169) ||
613 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
614 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
615 ((c) == 0x1175) ||
616 ((c) == 0x119E) ||
617 ((c) == 0x11A8) ||
618 ((c) == 0x11AB) ||
619 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
620 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
621 ((c) == 0x11BA) ||
622 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
623 ((c) == 0x11EB) ||
624 ((c) == 0x11F0) ||
625 ((c) == 0x11F9) ||
626 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
627 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
628 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
629 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
630 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
631 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
632 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
633 ((c) == 0x1F59) ||
634 ((c) == 0x1F5B) ||
635 ((c) == 0x1F5D) ||
636 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
637 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
638 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
639 ((c) == 0x1FBE) ||
640 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
641 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
642 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
643 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
644 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
645 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
646 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
647 ((c) == 0x2126) ||
648 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
649 ((c) == 0x212E) ||
650 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
651 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
652 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
653 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
654 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
655}
656
657/**
658 * xmlIsDigit:
659 * @c: an unicode character (int)
660 *
661 * Check whether the character is allowed by the production
662 * [88] Digit ::= ... long list see REC ...
663 *
664 * Returns 0 if not, non-zero otherwise
665 */
666int
667xmlIsDigit(int c) {
668 return(
669 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
670 (((c) >= 0x660) && ( /* accelerator */
671 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
672 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
673 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
674 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
675 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
676 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
677 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
678 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
679 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
680 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
681 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
682 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
683 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
684 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
685}
686
687/**
688 * xmlIsCombining:
689 * @c: an unicode character (int)
690 *
691 * Check whether the character is allowed by the production
692 * [87] CombiningChar ::= ... long list see REC ...
693 *
694 * Returns 0 if not, non-zero otherwise
695 */
696int
697xmlIsCombining(int c) {
698 return(
699 (((c) >= 0x300) && ( /* accelerator */
700 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
701 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
702 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
703 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
704 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
705 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
706 ((c) == 0x05BF) ||
707 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
708 ((c) == 0x05C4) ||
709 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
710 ((c) == 0x0670) ||
711 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
712 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
713 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
714 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
715 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
716 (((c) >= 0x0901) && ( /* accelerator */
717 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
718 ((c) == 0x093C) ||
719 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
720 ((c) == 0x094D) ||
721 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
722 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
723 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
724 ((c) == 0x09BC) ||
725 ((c) == 0x09BE) ||
726 ((c) == 0x09BF) ||
727 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
728 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
729 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
730 ((c) == 0x09D7) ||
731 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
732 (((c) >= 0x0A02) && ( /* accelerator */
733 ((c) == 0x0A02) ||
734 ((c) == 0x0A3C) ||
735 ((c) == 0x0A3E) ||
736 ((c) == 0x0A3F) ||
737 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
738 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
739 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
740 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
741 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
742 ((c) == 0x0ABC) ||
743 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
744 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
745 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
746 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
747 ((c) == 0x0B3C) ||
748 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
749 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
750 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
751 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
752 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
753 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
754 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
755 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
756 ((c) == 0x0BD7) ||
757 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
758 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
759 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
760 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
761 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
762 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
763 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
764 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
765 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
766 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
767 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
768 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
769 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
770 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
771 ((c) == 0x0D57) ||
772 (((c) >= 0x0E31) && ( /* accelerator */
773 ((c) == 0x0E31) ||
774 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
775 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
776 ((c) == 0x0EB1) ||
777 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
778 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
779 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
780 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
781 ((c) == 0x0F35) ||
782 ((c) == 0x0F37) ||
783 ((c) == 0x0F39) ||
784 ((c) == 0x0F3E) ||
785 ((c) == 0x0F3F) ||
786 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
787 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
788 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
789 ((c) == 0x0F97) ||
790 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
791 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
792 ((c) == 0x0FB9) ||
793 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
794 ((c) == 0x20E1) ||
795 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
796 ((c) == 0x3099) ||
797 ((c) == 0x309A))))))))));
798}
799
800/**
801 * xmlIsExtender:
802 * @c: an unicode character (int)
803 *
804 * Check whether the character is allowed by the production
805 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
806 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
807 * [#x309D-#x309E] | [#x30FC-#x30FE]
808 *
809 * Returns 0 if not, non-zero otherwise
810 */
811int
812xmlIsExtender(int c) {
813 switch (c) {
814 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
815 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
816 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
817 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
818 case 0x30FE:
819 return 1;
820 default:
821 return 0;
822 }
823}
824
825/**
826 * xmlIsIdeographic:
827 * @c: an unicode character (int)
828 *
829 * Check whether the character is allowed by the production
830 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
831 *
832 * Returns 0 if not, non-zero otherwise
833 */
834int
835xmlIsIdeographic(int c) {
836 return(((c) < 0x0100) ? 0 :
837 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
838 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
839 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
840 ((c) == 0x3007));
841}
842
843/**
844 * xmlIsLetter:
845 * @c: an unicode character (int)
846 *
847 * Check whether the character is allowed by the production
848 * [84] Letter ::= BaseChar | Ideographic
849 *
850 * Returns 0 if not, non-zero otherwise
851 */
852int
853xmlIsLetter(int c) {
854 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
855}
856
857/**
858 * xmlIsPubidChar:
859 * @c: an unicode character (int)
860 *
861 * Check whether the character is allowed by the production
862 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
863 *
864 * Returns 0 if not, non-zero otherwise
865 */
866int
867xmlIsPubidChar(int c) {
868 return(
869 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
870 (((c) >= 'a') && ((c) <= 'z')) ||
871 (((c) >= 'A') && ((c) <= 'Z')) ||
872 (((c) >= '0') && ((c) <= '9')) ||
873 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
874 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
875 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
876 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
877 ((c) == '$') || ((c) == '_') || ((c) == '%'));
878}
879
880/************************************************************************
881 * *
882 * Input handling functions for progressive parsing *
883 * *
884 ************************************************************************/
885
886/* #define DEBUG_INPUT */
887/* #define DEBUG_STACK */
888/* #define DEBUG_PUSH */
889
890
891/* we need to keep enough input to show errors in context */
892#define LINE_LEN 80
893
894#ifdef DEBUG_INPUT
895#define CHECK_BUFFER(in) check_buffer(in)
896
897void check_buffer(xmlParserInputPtr in) {
898 if (in->base != in->buf->buffer->content) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: base mismatch problem\n");
901 }
902 if (in->cur < in->base) {
903 xmlGenericError(xmlGenericErrorContext,
904 "xmlParserInput: cur < base problem\n");
905 }
906 if (in->cur > in->base + in->buf->buffer->use) {
907 xmlGenericError(xmlGenericErrorContext,
908 "xmlParserInput: cur > base + use problem\n");
909 }
910 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
911 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
912 in->buf->buffer->use, in->buf->buffer->size);
913}
914
915#else
916#define CHECK_BUFFER(in)
917#endif
918
919
920/**
921 * xmlParserInputRead:
922 * @in: an XML parser input
923 * @len: an indicative size for the lookahead
924 *
925 * This function refresh the input for the parser. It doesn't try to
926 * preserve pointers to the input buffer, and discard already read data
927 *
928 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
929 * end of this entity
930 */
931int
932xmlParserInputRead(xmlParserInputPtr in, int len) {
933 int ret;
934 int used;
935 int index;
936
937#ifdef DEBUG_INPUT
938 xmlGenericError(xmlGenericErrorContext, "Read\n");
939#endif
940 if (in->buf == NULL) return(-1);
941 if (in->base == NULL) return(-1);
942 if (in->cur == NULL) return(-1);
943 if (in->buf->buffer == NULL) return(-1);
944 if (in->buf->readcallback == NULL) return(-1);
945
946 CHECK_BUFFER(in);
947
948 used = in->cur - in->buf->buffer->content;
949 ret = xmlBufferShrink(in->buf->buffer, used);
950 if (ret > 0) {
951 in->cur -= ret;
952 in->consumed += ret;
953 }
954 ret = xmlParserInputBufferRead(in->buf, len);
955 if (in->base != in->buf->buffer->content) {
956 /*
957 * the buffer has been realloced
958 */
959 index = in->cur - in->base;
960 in->base = in->buf->buffer->content;
961 in->cur = &in->buf->buffer->content[index];
962 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000963 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000964
965 CHECK_BUFFER(in);
966
967 return(ret);
968}
969
970/**
971 * xmlParserInputGrow:
972 * @in: an XML parser input
973 * @len: an indicative size for the lookahead
974 *
975 * This function increase the input for the parser. It tries to
976 * preserve pointers to the input buffer, and keep already read data
977 *
978 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
979 * end of this entity
980 */
981int
982xmlParserInputGrow(xmlParserInputPtr in, int len) {
983 int ret;
984 int index;
985
986#ifdef DEBUG_INPUT
987 xmlGenericError(xmlGenericErrorContext, "Grow\n");
988#endif
989 if (in->buf == NULL) return(-1);
990 if (in->base == NULL) return(-1);
991 if (in->cur == NULL) return(-1);
992 if (in->buf->buffer == NULL) return(-1);
993
994 CHECK_BUFFER(in);
995
996 index = in->cur - in->base;
997 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
998
999 CHECK_BUFFER(in);
1000
1001 return(0);
1002 }
1003 if (in->buf->readcallback != NULL)
1004 ret = xmlParserInputBufferGrow(in->buf, len);
1005 else
1006 return(0);
1007
1008 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001009 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001010 * block, but we use it really as an integer to do some
1011 * pointer arithmetic. Insure will raise it as a bug but in
1012 * that specific case, that's not !
1013 */
1014 if (in->base != in->buf->buffer->content) {
1015 /*
1016 * the buffer has been realloced
1017 */
1018 index = in->cur - in->base;
1019 in->base = in->buf->buffer->content;
1020 in->cur = &in->buf->buffer->content[index];
1021 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001022 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001023
1024 CHECK_BUFFER(in);
1025
1026 return(ret);
1027}
1028
1029/**
1030 * xmlParserInputShrink:
1031 * @in: an XML parser input
1032 *
1033 * This function removes used input for the parser.
1034 */
1035void
1036xmlParserInputShrink(xmlParserInputPtr in) {
1037 int used;
1038 int ret;
1039 int index;
1040
1041#ifdef DEBUG_INPUT
1042 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1043#endif
1044 if (in->buf == NULL) return;
1045 if (in->base == NULL) return;
1046 if (in->cur == NULL) return;
1047 if (in->buf->buffer == NULL) return;
1048
1049 CHECK_BUFFER(in);
1050
1051 used = in->cur - in->buf->buffer->content;
1052 /*
1053 * Do not shrink on large buffers whose only a tiny fraction
1054 * was consumned
1055 */
1056 if (in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1057 return;
1058 if (used > INPUT_CHUNK) {
1059 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1060 if (ret > 0) {
1061 in->cur -= ret;
1062 in->consumed += ret;
1063 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001064 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001065 }
1066
1067 CHECK_BUFFER(in);
1068
1069 if (in->buf->buffer->use > INPUT_CHUNK) {
1070 return;
1071 }
1072 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1073 if (in->base != in->buf->buffer->content) {
1074 /*
1075 * the buffer has been realloced
1076 */
1077 index = in->cur - in->base;
1078 in->base = in->buf->buffer->content;
1079 in->cur = &in->buf->buffer->content[index];
1080 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001081 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001082
1083 CHECK_BUFFER(in);
1084}
1085
1086/************************************************************************
1087 * *
1088 * UTF8 character input and related functions *
1089 * *
1090 ************************************************************************/
1091
1092/**
1093 * xmlNextChar:
1094 * @ctxt: the XML parser context
1095 *
1096 * Skip to the next char input char.
1097 */
1098
1099void
1100xmlNextChar(xmlParserCtxtPtr ctxt) {
1101 if (ctxt->instate == XML_PARSER_EOF)
1102 return;
1103
1104 /*
1105 * 2.11 End-of-Line Handling
1106 * the literal two-character sequence "#xD#xA" or a standalone
1107 * literal #xD, an XML processor must pass to the application
1108 * the single character #xA.
1109 */
1110 if (ctxt->token != 0) ctxt->token = 0;
1111 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1112 if ((*ctxt->input->cur == 0) &&
1113 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1114 (ctxt->instate != XML_PARSER_COMMENT)) {
1115 /*
1116 * If we are at the end of the current entity and
1117 * the context allows it, we pop consumed entities
1118 * automatically.
1119 * the auto closing should be blocked in other cases
1120 */
1121 xmlPopInput(ctxt);
1122 } else {
1123 if (*(ctxt->input->cur) == '\n') {
1124 ctxt->input->line++; ctxt->input->col = 1;
1125 } else ctxt->input->col++;
1126 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1127 /*
1128 * We are supposed to handle UTF8, check it's valid
1129 * From rfc2044: encoding of the Unicode values on UTF-8:
1130 *
1131 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1132 * 0000 0000-0000 007F 0xxxxxxx
1133 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1134 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1135 *
1136 * Check for the 0x110000 limit too
1137 */
1138 const unsigned char *cur = ctxt->input->cur;
1139 unsigned char c;
1140
1141 c = *cur;
1142 if (c & 0x80) {
1143 if (cur[1] == 0)
1144 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1145 if ((cur[1] & 0xc0) != 0x80)
1146 goto encoding_error;
1147 if ((c & 0xe0) == 0xe0) {
1148 unsigned int val;
1149
1150 if (cur[2] == 0)
1151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1152 if ((cur[2] & 0xc0) != 0x80)
1153 goto encoding_error;
1154 if ((c & 0xf0) == 0xf0) {
1155 if (cur[3] == 0)
1156 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1157 if (((c & 0xf8) != 0xf0) ||
1158 ((cur[3] & 0xc0) != 0x80))
1159 goto encoding_error;
1160 /* 4-byte code */
1161 ctxt->input->cur += 4;
1162 val = (cur[0] & 0x7) << 18;
1163 val |= (cur[1] & 0x3f) << 12;
1164 val |= (cur[2] & 0x3f) << 6;
1165 val |= cur[3] & 0x3f;
1166 } else {
1167 /* 3-byte code */
1168 ctxt->input->cur += 3;
1169 val = (cur[0] & 0xf) << 12;
1170 val |= (cur[1] & 0x3f) << 6;
1171 val |= cur[2] & 0x3f;
1172 }
1173 if (((val > 0xd7ff) && (val < 0xe000)) ||
1174 ((val > 0xfffd) && (val < 0x10000)) ||
1175 (val >= 0x110000)) {
1176 if ((ctxt->sax != NULL) &&
1177 (ctxt->sax->error != NULL))
1178 ctxt->sax->error(ctxt->userData,
1179 "Char 0x%X out of allowed range\n", val);
1180 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1181 ctxt->wellFormed = 0;
1182 ctxt->disableSAX = 1;
1183 }
1184 } else
1185 /* 2-byte code */
1186 ctxt->input->cur += 2;
1187 } else
1188 /* 1-byte code */
1189 ctxt->input->cur++;
1190 } else {
1191 /*
1192 * Assume it's a fixed lenght encoding (1) with
1193 * a compatibke encoding for the ASCII set, since
1194 * XML constructs only use < 128 chars
1195 */
1196 ctxt->input->cur++;
1197 }
1198 ctxt->nbChars++;
1199 if (*ctxt->input->cur == 0)
1200 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1201 }
1202 } else {
1203 ctxt->input->cur++;
1204 ctxt->nbChars++;
1205 if (*ctxt->input->cur == 0)
1206 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1207 }
1208 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1209 xmlParserHandlePEReference(ctxt);
1210 if ((*ctxt->input->cur == 0) &&
1211 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1212 xmlPopInput(ctxt);
1213 return;
1214encoding_error:
1215 /*
1216 * If we detect an UTF8 error that probably mean that the
1217 * input encoding didn't get properly advertized in the
1218 * declaration header. Report the error and switch the encoding
1219 * to ISO-Latin-1 (if you don't like this policy, just declare the
1220 * encoding !)
1221 */
1222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1223 ctxt->sax->error(ctxt->userData,
1224 "Input is not proper UTF-8, indicate encoding !\n");
1225 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1226 ctxt->input->cur[0], ctxt->input->cur[1],
1227 ctxt->input->cur[2], ctxt->input->cur[3]);
1228 }
1229 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1230
1231 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1232 ctxt->input->cur++;
1233 return;
1234}
1235
1236/**
1237 * xmlCurrentChar:
1238 * @ctxt: the XML parser context
1239 * @len: pointer to the length of the char read
1240 *
1241 * The current char value, if using UTF-8 this may actaully span multiple
1242 * bytes in the input buffer. Implement the end of line normalization:
1243 * 2.11 End-of-Line Handling
1244 * Wherever an external parsed entity or the literal entity value
1245 * of an internal parsed entity contains either the literal two-character
1246 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1247 * must pass to the application the single character #xA.
1248 * This behavior can conveniently be produced by normalizing all
1249 * line breaks to #xA on input, before parsing.)
1250 *
1251 * Returns the current char value and its lenght
1252 */
1253
1254int
1255xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1256 if (ctxt->instate == XML_PARSER_EOF)
1257 return(0);
1258
1259 if (ctxt->token != 0) {
1260 *len = 0;
1261 return(ctxt->token);
1262 }
1263 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1264 *len = 1;
1265 return((int) *ctxt->input->cur);
1266 }
1267 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1268 /*
1269 * We are supposed to handle UTF8, check it's valid
1270 * From rfc2044: encoding of the Unicode values on UTF-8:
1271 *
1272 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1273 * 0000 0000-0000 007F 0xxxxxxx
1274 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1275 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1276 *
1277 * Check for the 0x110000 limit too
1278 */
1279 const unsigned char *cur = ctxt->input->cur;
1280 unsigned char c;
1281 unsigned int val;
1282
1283 c = *cur;
1284 if (c & 0x80) {
1285 if (cur[1] == 0)
1286 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1287 if ((cur[1] & 0xc0) != 0x80)
1288 goto encoding_error;
1289 if ((c & 0xe0) == 0xe0) {
1290
1291 if (cur[2] == 0)
1292 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1293 if ((cur[2] & 0xc0) != 0x80)
1294 goto encoding_error;
1295 if ((c & 0xf0) == 0xf0) {
1296 if (cur[3] == 0)
1297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1298 if (((c & 0xf8) != 0xf0) ||
1299 ((cur[3] & 0xc0) != 0x80))
1300 goto encoding_error;
1301 /* 4-byte code */
1302 *len = 4;
1303 val = (cur[0] & 0x7) << 18;
1304 val |= (cur[1] & 0x3f) << 12;
1305 val |= (cur[2] & 0x3f) << 6;
1306 val |= cur[3] & 0x3f;
1307 } else {
1308 /* 3-byte code */
1309 *len = 3;
1310 val = (cur[0] & 0xf) << 12;
1311 val |= (cur[1] & 0x3f) << 6;
1312 val |= cur[2] & 0x3f;
1313 }
1314 } else {
1315 /* 2-byte code */
1316 *len = 2;
1317 val = (cur[0] & 0x1f) << 6;
1318 val |= cur[1] & 0x3f;
1319 }
1320 if (!IS_CHAR(val)) {
1321 if ((ctxt->sax != NULL) &&
1322 (ctxt->sax->error != NULL))
1323 ctxt->sax->error(ctxt->userData,
1324 "Char 0x%X out of allowed range\n", val);
1325 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1326 ctxt->wellFormed = 0;
1327 ctxt->disableSAX = 1;
1328 }
1329 return(val);
1330 } else {
1331 /* 1-byte code */
1332 *len = 1;
1333 if (*ctxt->input->cur == 0xD) {
1334 if (ctxt->input->cur[1] == 0xA) {
1335 ctxt->nbChars++;
1336 ctxt->input->cur++;
1337 }
1338 return(0xA);
1339 }
1340 return((int) *ctxt->input->cur);
1341 }
1342 }
1343 /*
1344 * Assume it's a fixed lenght encoding (1) with
1345 * a compatibke encoding for the ASCII set, since
1346 * XML constructs only use < 128 chars
1347 */
1348 *len = 1;
1349 if (*ctxt->input->cur == 0xD) {
1350 if (ctxt->input->cur[1] == 0xA) {
1351 ctxt->nbChars++;
1352 ctxt->input->cur++;
1353 }
1354 return(0xA);
1355 }
1356 return((int) *ctxt->input->cur);
1357encoding_error:
1358 /*
1359 * If we detect an UTF8 error that probably mean that the
1360 * input encoding didn't get properly advertized in the
1361 * declaration header. Report the error and switch the encoding
1362 * to ISO-Latin-1 (if you don't like this policy, just declare the
1363 * encoding !)
1364 */
1365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1366 ctxt->sax->error(ctxt->userData,
1367 "Input is not proper UTF-8, indicate encoding !\n");
1368 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1369 ctxt->input->cur[0], ctxt->input->cur[1],
1370 ctxt->input->cur[2], ctxt->input->cur[3]);
1371 }
1372 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1373
1374 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1375 *len = 1;
1376 return((int) *ctxt->input->cur);
1377}
1378
1379/**
1380 * xmlStringCurrentChar:
1381 * @ctxt: the XML parser context
1382 * @cur: pointer to the beginning of the char
1383 * @len: pointer to the length of the char read
1384 *
1385 * The current char value, if using UTF-8 this may actaully span multiple
1386 * bytes in the input buffer.
1387 *
1388 * Returns the current char value and its lenght
1389 */
1390
1391int
1392xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1393 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1394 /*
1395 * We are supposed to handle UTF8, check it's valid
1396 * From rfc2044: encoding of the Unicode values on UTF-8:
1397 *
1398 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1399 * 0000 0000-0000 007F 0xxxxxxx
1400 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1401 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1402 *
1403 * Check for the 0x110000 limit too
1404 */
1405 unsigned char c;
1406 unsigned int val;
1407
1408 c = *cur;
1409 if (c & 0x80) {
1410 if ((cur[1] & 0xc0) != 0x80)
1411 goto encoding_error;
1412 if ((c & 0xe0) == 0xe0) {
1413
1414 if ((cur[2] & 0xc0) != 0x80)
1415 goto encoding_error;
1416 if ((c & 0xf0) == 0xf0) {
1417 if (((c & 0xf8) != 0xf0) ||
1418 ((cur[3] & 0xc0) != 0x80))
1419 goto encoding_error;
1420 /* 4-byte code */
1421 *len = 4;
1422 val = (cur[0] & 0x7) << 18;
1423 val |= (cur[1] & 0x3f) << 12;
1424 val |= (cur[2] & 0x3f) << 6;
1425 val |= cur[3] & 0x3f;
1426 } else {
1427 /* 3-byte code */
1428 *len = 3;
1429 val = (cur[0] & 0xf) << 12;
1430 val |= (cur[1] & 0x3f) << 6;
1431 val |= cur[2] & 0x3f;
1432 }
1433 } else {
1434 /* 2-byte code */
1435 *len = 2;
1436 val = (cur[0] & 0x1f) << 6;
1437 val |= cur[2] & 0x3f;
1438 }
1439 if (!IS_CHAR(val)) {
1440 if ((ctxt->sax != NULL) &&
1441 (ctxt->sax->error != NULL))
1442 ctxt->sax->error(ctxt->userData,
1443 "Char 0x%X out of allowed range\n", val);
1444 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1445 ctxt->wellFormed = 0;
1446 ctxt->disableSAX = 1;
1447 }
1448 return(val);
1449 } else {
1450 /* 1-byte code */
1451 *len = 1;
1452 return((int) *cur);
1453 }
1454 }
1455 /*
1456 * Assume it's a fixed lenght encoding (1) with
1457 * a compatibke encoding for the ASCII set, since
1458 * XML constructs only use < 128 chars
1459 */
1460 *len = 1;
1461 return((int) *cur);
1462encoding_error:
1463 /*
1464 * If we detect an UTF8 error that probably mean that the
1465 * input encoding didn't get properly advertized in the
1466 * declaration header. Report the error and switch the encoding
1467 * to ISO-Latin-1 (if you don't like this policy, just declare the
1468 * encoding !)
1469 */
1470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1471 ctxt->sax->error(ctxt->userData,
1472 "Input is not proper UTF-8, indicate encoding !\n");
1473 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1474 ctxt->input->cur[0], ctxt->input->cur[1],
1475 ctxt->input->cur[2], ctxt->input->cur[3]);
1476 }
1477 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1478
1479 *len = 1;
1480 return((int) *cur);
1481}
1482
1483/**
1484 * xmlCopyChar:
1485 * @len: pointer to the length of the char read (or zero)
1486 * @array: pointer to an arry of xmlChar
1487 * @val: the char value
1488 *
1489 * append the char value in the array
1490 *
1491 * Returns the number of xmlChar written
1492 */
1493
1494int
1495xmlCopyChar(int len, xmlChar *out, int val) {
1496 /*
1497 * We are supposed to handle UTF8, check it's valid
1498 * From rfc2044: encoding of the Unicode values on UTF-8:
1499 *
1500 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1501 * 0000 0000-0000 007F 0xxxxxxx
1502 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1503 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1504 */
1505 if (len == 0) {
1506 if (val < 0) len = 0;
1507 else if (val < 0x80) len = 1;
1508 else if (val < 0x800) len = 2;
1509 else if (val < 0x10000) len = 3;
1510 else if (val < 0x110000) len = 4;
1511 if (len == 0) {
1512 xmlGenericError(xmlGenericErrorContext,
1513 "Internal error, xmlCopyChar 0x%X out of bound\n",
1514 val);
1515 return(0);
1516 }
1517 }
1518 if (len > 1) {
1519 int bits;
1520
1521 if (val < 0x80) { *out++= val; bits= -6; }
1522 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1523 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1524 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1525
1526 for ( ; bits >= 0; bits-= 6)
1527 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1528
1529 return(len);
1530 }
1531 *out = (xmlChar) val;
1532 return(1);
1533}
1534
1535/************************************************************************
1536 * *
1537 * Commodity functions to switch encodings *
1538 * *
1539 ************************************************************************/
1540
1541/**
1542 * xmlSwitchEncoding:
1543 * @ctxt: the parser context
1544 * @enc: the encoding value (number)
1545 *
1546 * change the input functions when discovering the character encoding
1547 * of a given entity.
1548 *
1549 * Returns 0 in case of success, -1 otherwise
1550 */
1551int
1552xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1553{
1554 xmlCharEncodingHandlerPtr handler;
1555
1556 switch (enc) {
1557 case XML_CHAR_ENCODING_ERROR:
1558 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1561 ctxt->wellFormed = 0;
1562 ctxt->disableSAX = 1;
1563 break;
1564 case XML_CHAR_ENCODING_NONE:
1565 /* let's assume it's UTF-8 without the XML decl */
1566 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1567 return(0);
1568 case XML_CHAR_ENCODING_UTF8:
1569 /* default encoding, no conversion should be needed */
1570 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1571 return(0);
1572 default:
1573 break;
1574 }
1575 handler = xmlGetCharEncodingHandler(enc);
1576 if (handler == NULL) {
1577 /*
1578 * Default handlers.
1579 */
1580 switch (enc) {
1581 case XML_CHAR_ENCODING_ERROR:
1582 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1584 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1585 ctxt->wellFormed = 0;
1586 ctxt->disableSAX = 1;
1587 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1588 break;
1589 case XML_CHAR_ENCODING_NONE:
1590 /* let's assume it's UTF-8 without the XML decl */
1591 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1592 return(0);
1593 case XML_CHAR_ENCODING_UTF8:
1594 case XML_CHAR_ENCODING_ASCII:
1595 /* default encoding, no conversion should be needed */
1596 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1597 return(0);
1598 case XML_CHAR_ENCODING_UTF16LE:
1599 break;
1600 case XML_CHAR_ENCODING_UTF16BE:
1601 break;
1602 case XML_CHAR_ENCODING_UCS4LE:
1603 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "char encoding USC4 little endian not supported\n");
1607 break;
1608 case XML_CHAR_ENCODING_UCS4BE:
1609 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1611 ctxt->sax->error(ctxt->userData,
1612 "char encoding USC4 big endian not supported\n");
1613 break;
1614 case XML_CHAR_ENCODING_EBCDIC:
1615 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "char encoding EBCDIC not supported\n");
1619 break;
1620 case XML_CHAR_ENCODING_UCS4_2143:
1621 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1623 ctxt->sax->error(ctxt->userData,
1624 "char encoding UCS4 2143 not supported\n");
1625 break;
1626 case XML_CHAR_ENCODING_UCS4_3412:
1627 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1629 ctxt->sax->error(ctxt->userData,
1630 "char encoding UCS4 3412 not supported\n");
1631 break;
1632 case XML_CHAR_ENCODING_UCS2:
1633 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1635 ctxt->sax->error(ctxt->userData,
1636 "char encoding UCS2 not supported\n");
1637 break;
1638 case XML_CHAR_ENCODING_8859_1:
1639 case XML_CHAR_ENCODING_8859_2:
1640 case XML_CHAR_ENCODING_8859_3:
1641 case XML_CHAR_ENCODING_8859_4:
1642 case XML_CHAR_ENCODING_8859_5:
1643 case XML_CHAR_ENCODING_8859_6:
1644 case XML_CHAR_ENCODING_8859_7:
1645 case XML_CHAR_ENCODING_8859_8:
1646 case XML_CHAR_ENCODING_8859_9:
1647 /*
1648 * We used to keep the internal content in the
1649 * document encoding however this turns being unmaintainable
1650 * So xmlGetCharEncodingHandler() will return non-null
1651 * values for this now.
1652 */
1653 if ((ctxt->inputNr == 1) &&
1654 (ctxt->encoding == NULL) &&
1655 (ctxt->input->encoding != NULL)) {
1656 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1657 }
1658 ctxt->charset = enc;
1659 return(0);
1660 case XML_CHAR_ENCODING_2022_JP:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding ISO-2022-JPnot supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_SHIFT_JIS:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding Shift_JIS not supported\n");
1671 break;
1672 case XML_CHAR_ENCODING_EUC_JP:
1673 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt->userData,
1676 "char encoding EUC-JPnot supported\n");
1677 break;
1678 }
1679 }
1680 if (handler == NULL)
1681 return(-1);
1682 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1683 return(xmlSwitchToEncoding(ctxt, handler));
1684}
1685
1686/**
1687 * xmlSwitchToEncoding:
1688 * @ctxt: the parser context
1689 * @handler: the encoding handler
1690 *
1691 * change the input functions when discovering the character encoding
1692 * of a given entity.
1693 *
1694 * Returns 0 in case of success, -1 otherwise
1695 */
1696int
1697xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1698{
1699 int nbchars;
1700
1701 if (handler != NULL) {
1702 if (ctxt->input != NULL) {
1703 if (ctxt->input->buf != NULL) {
1704 if (ctxt->input->buf->encoder != NULL) {
1705 if (ctxt->input->buf->encoder == handler)
1706 return(0);
1707 /*
1708 * Note: this is a bit dangerous, but that's what it
1709 * takes to use nearly compatible signature for different
1710 * encodings.
1711 */
1712 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1713 ctxt->input->buf->encoder = handler;
1714 return(0);
1715 }
1716 ctxt->input->buf->encoder = handler;
1717
1718 /*
1719 * Is there already some content down the pipe to convert ?
1720 */
1721 if ((ctxt->input->buf->buffer != NULL) &&
1722 (ctxt->input->buf->buffer->use > 0)) {
1723 int processed;
1724
1725 /*
1726 * Specific handling of the Byte Order Mark for
1727 * UTF-16
1728 */
1729 if ((handler->name != NULL) &&
1730 (!strcmp(handler->name, "UTF-16LE")) &&
1731 (ctxt->input->cur[0] == 0xFF) &&
1732 (ctxt->input->cur[1] == 0xFE)) {
1733 ctxt->input->cur += 2;
1734 }
1735 if ((handler->name != NULL) &&
1736 (!strcmp(handler->name, "UTF-16BE")) &&
1737 (ctxt->input->cur[0] == 0xFE) &&
1738 (ctxt->input->cur[1] == 0xFF)) {
1739 ctxt->input->cur += 2;
1740 }
1741
1742 /*
1743 * Shring the current input buffer.
1744 * Move it as the raw buffer and create a new input buffer
1745 */
1746 processed = ctxt->input->cur - ctxt->input->base;
1747 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1748 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1749 ctxt->input->buf->buffer = xmlBufferCreate();
1750
1751 if (ctxt->html) {
1752 /*
1753 * converst as much as possbile of the buffer
1754 */
1755 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1756 ctxt->input->buf->buffer,
1757 ctxt->input->buf->raw);
1758 } else {
1759 /*
1760 * convert just enough to get
1761 * '<?xml version="1.0" encoding="xxx"?>'
1762 * parsed with the autodetected encoding
1763 * into the parser reading buffer.
1764 */
1765 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1766 ctxt->input->buf->buffer,
1767 ctxt->input->buf->raw);
1768 }
1769 if (nbchars < 0) {
1770 xmlGenericError(xmlGenericErrorContext,
1771 "xmlSwitchToEncoding: encoder error\n");
1772 return(-1);
1773 }
1774 ctxt->input->base =
1775 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001776 ctxt->input->end =
1777 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001778
1779 }
1780 return(0);
1781 } else {
1782 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1783 /*
1784 * When parsing a static memory array one must know the
1785 * size to be able to convert the buffer.
1786 */
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
1789 "xmlSwitchEncoding : no input\n");
1790 return(-1);
1791 } else {
1792 int processed;
1793
1794 /*
1795 * Shring the current input buffer.
1796 * Move it as the raw buffer and create a new input buffer
1797 */
1798 processed = ctxt->input->cur - ctxt->input->base;
1799
1800 ctxt->input->buf->raw = xmlBufferCreate();
1801 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1802 ctxt->input->length - processed);
1803 ctxt->input->buf->buffer = xmlBufferCreate();
1804
1805 /*
1806 * convert as much as possible of the raw input
1807 * to the parser reading buffer.
1808 */
1809 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1810 ctxt->input->buf->buffer,
1811 ctxt->input->buf->raw);
1812 if (nbchars < 0) {
1813 xmlGenericError(xmlGenericErrorContext,
1814 "xmlSwitchToEncoding: encoder error\n");
1815 return(-1);
1816 }
1817
1818 /*
1819 * Conversion succeeded, get rid of the old buffer
1820 */
1821 if ((ctxt->input->free != NULL) &&
1822 (ctxt->input->base != NULL))
1823 ctxt->input->free((xmlChar *) ctxt->input->base);
1824 ctxt->input->base =
1825 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001826 ctxt->input->end =
1827 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001828 }
1829 }
1830 } else {
1831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1832 ctxt->sax->error(ctxt->userData,
1833 "xmlSwitchEncoding : no input\n");
1834 return(-1);
1835 }
1836 /*
1837 * The parsing is now done in UTF8 natively
1838 */
1839 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1840 } else
1841 return(-1);
1842 return(0);
1843
1844}
1845
1846/************************************************************************
1847 * *
1848 * Commodity functions to handle entities processing *
1849 * *
1850 ************************************************************************/
1851
1852/**
1853 * xmlFreeInputStream:
1854 * @input: an xmlParserInputPtr
1855 *
1856 * Free up an input stream.
1857 */
1858void
1859xmlFreeInputStream(xmlParserInputPtr input) {
1860 if (input == NULL) return;
1861
1862 if (input->filename != NULL) xmlFree((char *) input->filename);
1863 if (input->directory != NULL) xmlFree((char *) input->directory);
1864 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1865 if (input->version != NULL) xmlFree((char *) input->version);
1866 if ((input->free != NULL) && (input->base != NULL))
1867 input->free((xmlChar *) input->base);
1868 if (input->buf != NULL)
1869 xmlFreeParserInputBuffer(input->buf);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001870 MEM_CLEANUP(input, sizeof(xmlParserInput));
Owen Taylor3473f882001-02-23 17:55:21 +00001871 xmlFree(input);
1872}
1873
1874/**
1875 * xmlNewInputStream:
1876 * @ctxt: an XML parser context
1877 *
1878 * Create a new input stream structure
1879 * Returns the new input stream or NULL
1880 */
1881xmlParserInputPtr
1882xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1883 xmlParserInputPtr input;
1884
1885 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1886 if (input == NULL) {
1887 if (ctxt != NULL) {
1888 ctxt->errNo = XML_ERR_NO_MEMORY;
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "malloc: couldn't allocate a new input stream\n");
1892 ctxt->errNo = XML_ERR_NO_MEMORY;
1893 }
1894 return(NULL);
1895 }
1896 memset(input, 0, sizeof(xmlParserInput));
1897 input->line = 1;
1898 input->col = 1;
1899 input->standalone = -1;
1900 return(input);
1901}
1902
1903/**
1904 * xmlNewIOInputStream:
1905 * @ctxt: an XML parser context
1906 * @input: an I/O Input
1907 * @enc: the charset encoding if known
1908 *
1909 * Create a new input stream structure encapsulating the @input into
1910 * a stream suitable for the parser.
1911 *
1912 * Returns the new input stream or NULL
1913 */
1914xmlParserInputPtr
1915xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1916 xmlCharEncoding enc) {
1917 xmlParserInputPtr inputStream;
1918
1919 if (xmlParserDebugEntities)
1920 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1921 inputStream = xmlNewInputStream(ctxt);
1922 if (inputStream == NULL) {
1923 return(NULL);
1924 }
1925 inputStream->filename = NULL;
1926 inputStream->buf = input;
1927 inputStream->base = inputStream->buf->buffer->content;
1928 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001929 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001930 if (enc != XML_CHAR_ENCODING_NONE) {
1931 xmlSwitchEncoding(ctxt, enc);
1932 }
1933
1934 return(inputStream);
1935}
1936
1937/**
1938 * xmlNewEntityInputStream:
1939 * @ctxt: an XML parser context
1940 * @entity: an Entity pointer
1941 *
1942 * Create a new input stream based on an xmlEntityPtr
1943 *
1944 * Returns the new input stream or NULL
1945 */
1946xmlParserInputPtr
1947xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1948 xmlParserInputPtr input;
1949
1950 if (entity == NULL) {
1951 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData,
1954 "internal: xmlNewEntityInputStream entity = NULL\n");
1955 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1956 return(NULL);
1957 }
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext,
1960 "new input from entity: %s\n", entity->name);
1961 if (entity->content == NULL) {
1962 switch (entity->etype) {
1963 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1964 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1966 ctxt->sax->error(ctxt->userData,
1967 "xmlNewEntityInputStream unparsed entity !\n");
1968 break;
1969 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1970 case XML_EXTERNAL_PARAMETER_ENTITY:
1971 return(xmlLoadExternalEntity((char *) entity->URI,
1972 (char *) entity->ExternalID, ctxt));
1973 case XML_INTERNAL_GENERAL_ENTITY:
1974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975 ctxt->sax->error(ctxt->userData,
1976 "Internal entity %s without content !\n", entity->name);
1977 break;
1978 case XML_INTERNAL_PARAMETER_ENTITY:
1979 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981 ctxt->sax->error(ctxt->userData,
1982 "Internal parameter entity %s without content !\n", entity->name);
1983 break;
1984 case XML_INTERNAL_PREDEFINED_ENTITY:
1985 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "Predefined entity %s without content !\n", entity->name);
1989 break;
1990 }
1991 return(NULL);
1992 }
1993 input = xmlNewInputStream(ctxt);
1994 if (input == NULL) {
1995 return(NULL);
1996 }
1997 input->filename = (char *) entity->URI;
1998 input->base = entity->content;
1999 input->cur = entity->content;
2000 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002001 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002002 return(input);
2003}
2004
2005/**
2006 * xmlNewStringInputStream:
2007 * @ctxt: an XML parser context
2008 * @buffer: an memory buffer
2009 *
2010 * Create a new input stream based on a memory buffer.
2011 * Returns the new input stream
2012 */
2013xmlParserInputPtr
2014xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2015 xmlParserInputPtr input;
2016
2017 if (buffer == NULL) {
2018 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData,
2021 "internal: xmlNewStringInputStream string = NULL\n");
2022 return(NULL);
2023 }
2024 if (xmlParserDebugEntities)
2025 xmlGenericError(xmlGenericErrorContext,
2026 "new fixed input: %.30s\n", buffer);
2027 input = xmlNewInputStream(ctxt);
2028 if (input == NULL) {
2029 return(NULL);
2030 }
2031 input->base = buffer;
2032 input->cur = buffer;
2033 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002034 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002035 return(input);
2036}
2037
2038/**
2039 * xmlNewInputFromFile:
2040 * @ctxt: an XML parser context
2041 * @filename: the filename to use as entity
2042 *
2043 * Create a new input stream based on a file.
2044 *
2045 * Returns the new input stream or NULL in case of error
2046 */
2047xmlParserInputPtr
2048xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2049 xmlParserInputBufferPtr buf;
2050 xmlParserInputPtr inputStream;
2051 char *directory = NULL;
2052 xmlChar *URI = NULL;
2053
2054 if (xmlParserDebugEntities)
2055 xmlGenericError(xmlGenericErrorContext,
2056 "new input from file: %s\n", filename);
2057 if (ctxt == NULL) return(NULL);
2058 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2059 if (buf == NULL)
2060 return(NULL);
2061
2062 URI = xmlStrdup((xmlChar *) filename);
2063 directory = xmlParserGetDirectory((const char *) URI);
2064
2065 inputStream = xmlNewInputStream(ctxt);
2066 if (inputStream == NULL) {
2067 if (directory != NULL) xmlFree((char *) directory);
2068 if (URI != NULL) xmlFree((char *) URI);
2069 return(NULL);
2070 }
2071
2072 inputStream->filename = (const char *) URI;
2073 inputStream->directory = directory;
2074 inputStream->buf = buf;
2075
2076 inputStream->base = inputStream->buf->buffer->content;
2077 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002078 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002079 if ((ctxt->directory == NULL) && (directory != NULL))
2080 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2081 return(inputStream);
2082}
2083
2084/************************************************************************
2085 * *
2086 * Commodity functions to handle parser contexts *
2087 * *
2088 ************************************************************************/
2089
2090/**
2091 * xmlInitParserCtxt:
2092 * @ctxt: an XML parser context
2093 *
2094 * Initialize a parser context
2095 */
2096
2097void
2098xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2099{
2100 xmlSAXHandler *sax;
2101
2102 xmlDefaultSAXHandlerInit();
2103
2104 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2105 if (sax == NULL) {
2106 xmlGenericError(xmlGenericErrorContext,
2107 "xmlInitParserCtxt: out of memory\n");
2108 }
2109 else
2110 memset(sax, 0, sizeof(xmlSAXHandler));
2111
2112 /* Allocate the Input stack */
2113 ctxt->inputTab = (xmlParserInputPtr *)
2114 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2115 if (ctxt->inputTab == NULL) {
2116 xmlGenericError(xmlGenericErrorContext,
2117 "xmlInitParserCtxt: out of memory\n");
2118 ctxt->inputNr = 0;
2119 ctxt->inputMax = 0;
2120 ctxt->input = NULL;
2121 return;
2122 }
2123 ctxt->inputNr = 0;
2124 ctxt->inputMax = 5;
2125 ctxt->input = NULL;
2126
2127 ctxt->version = NULL;
2128 ctxt->encoding = NULL;
2129 ctxt->standalone = -1;
2130 ctxt->hasExternalSubset = 0;
2131 ctxt->hasPErefs = 0;
2132 ctxt->html = 0;
2133 ctxt->external = 0;
2134 ctxt->instate = XML_PARSER_START;
2135 ctxt->token = 0;
2136 ctxt->directory = NULL;
2137
2138 /* Allocate the Node stack */
2139 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2140 if (ctxt->nodeTab == NULL) {
2141 xmlGenericError(xmlGenericErrorContext,
2142 "xmlInitParserCtxt: out of memory\n");
2143 ctxt->nodeNr = 0;
2144 ctxt->nodeMax = 0;
2145 ctxt->node = NULL;
2146 ctxt->inputNr = 0;
2147 ctxt->inputMax = 0;
2148 ctxt->input = NULL;
2149 return;
2150 }
2151 ctxt->nodeNr = 0;
2152 ctxt->nodeMax = 10;
2153 ctxt->node = NULL;
2154
2155 /* Allocate the Name stack */
2156 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2157 if (ctxt->nameTab == NULL) {
2158 xmlGenericError(xmlGenericErrorContext,
2159 "xmlInitParserCtxt: out of memory\n");
2160 ctxt->nodeNr = 0;
2161 ctxt->nodeMax = 0;
2162 ctxt->node = NULL;
2163 ctxt->inputNr = 0;
2164 ctxt->inputMax = 0;
2165 ctxt->input = NULL;
2166 ctxt->nameNr = 0;
2167 ctxt->nameMax = 0;
2168 ctxt->name = NULL;
2169 return;
2170 }
2171 ctxt->nameNr = 0;
2172 ctxt->nameMax = 10;
2173 ctxt->name = NULL;
2174
2175 /* Allocate the space stack */
2176 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2177 if (ctxt->spaceTab == NULL) {
2178 xmlGenericError(xmlGenericErrorContext,
2179 "xmlInitParserCtxt: out of memory\n");
2180 ctxt->nodeNr = 0;
2181 ctxt->nodeMax = 0;
2182 ctxt->node = NULL;
2183 ctxt->inputNr = 0;
2184 ctxt->inputMax = 0;
2185 ctxt->input = NULL;
2186 ctxt->nameNr = 0;
2187 ctxt->nameMax = 0;
2188 ctxt->name = NULL;
2189 ctxt->spaceNr = 0;
2190 ctxt->spaceMax = 0;
2191 ctxt->space = NULL;
2192 return;
2193 }
2194 ctxt->spaceNr = 1;
2195 ctxt->spaceMax = 10;
2196 ctxt->spaceTab[0] = -1;
2197 ctxt->space = &ctxt->spaceTab[0];
2198
2199 if (sax == NULL) {
2200 ctxt->sax = &xmlDefaultSAXHandler;
2201 } else {
2202 ctxt->sax = sax;
2203 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2204 }
2205 ctxt->userData = ctxt;
2206 ctxt->myDoc = NULL;
2207 ctxt->wellFormed = 1;
2208 ctxt->valid = 1;
2209 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2210 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2211 ctxt->pedantic = xmlPedanticParserDefaultValue;
2212 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2213 ctxt->vctxt.userData = ctxt;
2214 if (ctxt->validate) {
2215 ctxt->vctxt.error = xmlParserValidityError;
2216 if (xmlGetWarningsDefaultValue == 0)
2217 ctxt->vctxt.warning = NULL;
2218 else
2219 ctxt->vctxt.warning = xmlParserValidityWarning;
2220 /* Allocate the Node stack */
2221 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2222 if (ctxt->vctxt.nodeTab == NULL) {
2223 xmlGenericError(xmlGenericErrorContext,
2224 "xmlInitParserCtxt: out of memory\n");
2225 ctxt->vctxt.nodeMax = 0;
2226 ctxt->validate = 0;
2227 ctxt->vctxt.error = NULL;
2228 ctxt->vctxt.warning = NULL;
2229 } else {
2230 ctxt->vctxt.nodeNr = 0;
2231 ctxt->vctxt.nodeMax = 4;
2232 ctxt->vctxt.node = NULL;
2233 }
2234 } else {
2235 ctxt->vctxt.error = NULL;
2236 ctxt->vctxt.warning = NULL;
2237 }
2238 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2239 ctxt->record_info = 0;
2240 ctxt->nbChars = 0;
2241 ctxt->checkIndex = 0;
2242 ctxt->inSubset = 0;
2243 ctxt->errNo = XML_ERR_OK;
2244 ctxt->depth = 0;
2245 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2246 xmlInitNodeInfoSeq(&ctxt->node_seq);
2247}
2248
2249/**
2250 * xmlFreeParserCtxt:
2251 * @ctxt: an XML parser context
2252 *
2253 * Free all the memory used by a parser context. However the parsed
2254 * document in ctxt->myDoc is not freed.
2255 */
2256
2257void
2258xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2259{
2260 xmlParserInputPtr input;
2261 xmlChar *oldname;
2262
2263 if (ctxt == NULL) return;
2264
2265 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2266 xmlFreeInputStream(input);
2267 }
2268 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2269 xmlFree(oldname);
2270 }
2271 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2272 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2273 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2274 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2275 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2276 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2277 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2278 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2279 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2280 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2281 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2282 xmlFree(ctxt->sax);
2283 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2284 xmlFree(ctxt);
2285}
2286
2287/**
2288 * xmlNewParserCtxt:
2289 *
2290 * Allocate and initialize a new parser context.
2291 *
2292 * Returns the xmlParserCtxtPtr or NULL
2293 */
2294
2295xmlParserCtxtPtr
2296xmlNewParserCtxt()
2297{
2298 xmlParserCtxtPtr ctxt;
2299
2300 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2301 if (ctxt == NULL) {
2302 xmlGenericError(xmlGenericErrorContext,
2303 "xmlNewParserCtxt : cannot allocate context\n");
2304 perror("malloc");
2305 return(NULL);
2306 }
2307 memset(ctxt, 0, sizeof(xmlParserCtxt));
2308 xmlInitParserCtxt(ctxt);
2309 return(ctxt);
2310}
2311
2312/************************************************************************
2313 * *
2314 * Handling of node informations *
2315 * *
2316 ************************************************************************/
2317
2318/**
2319 * xmlClearParserCtxt:
2320 * @ctxt: an XML parser context
2321 *
2322 * Clear (release owned resources) and reinitialize a parser context
2323 */
2324
2325void
2326xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2327{
2328 xmlClearNodeInfoSeq(&ctxt->node_seq);
2329 xmlInitParserCtxt(ctxt);
2330}
2331
2332/**
2333 * xmlParserFindNodeInfo:
2334 * @ctxt: an XML parser context
2335 * @node: an XML node within the tree
2336 *
2337 * Find the parser node info struct for a given node
2338 *
2339 * Returns an xmlParserNodeInfo block pointer or NULL
2340 */
2341const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2342 const xmlNode* node)
2343{
2344 unsigned long pos;
2345
2346 /* Find position where node should be at */
2347 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2348 if ( ctx->node_seq.buffer[pos].node == node )
2349 return &ctx->node_seq.buffer[pos];
2350 else
2351 return NULL;
2352}
2353
2354
2355/**
2356 * xmlInitNodeInfoSeq:
2357 * @seq: a node info sequence pointer
2358 *
2359 * -- Initialize (set to initial state) node info sequence
2360 */
2361void
2362xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2363{
2364 seq->length = 0;
2365 seq->maximum = 0;
2366 seq->buffer = NULL;
2367}
2368
2369/**
2370 * xmlClearNodeInfoSeq:
2371 * @seq: a node info sequence pointer
2372 *
2373 * -- Clear (release memory and reinitialize) node
2374 * info sequence
2375 */
2376void
2377xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2378{
2379 if ( seq->buffer != NULL )
2380 xmlFree(seq->buffer);
2381 xmlInitNodeInfoSeq(seq);
2382}
2383
2384
2385/**
2386 * xmlParserFindNodeInfoIndex:
2387 * @seq: a node info sequence pointer
2388 * @node: an XML node pointer
2389 *
2390 *
2391 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2392 * the given node is or should be at in a sorted sequence
2393 *
2394 * Returns a long indicating the position of the record
2395 */
2396unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2397 const xmlNode* node)
2398{
2399 unsigned long upper, lower, middle;
2400 int found = 0;
2401
2402 /* Do a binary search for the key */
2403 lower = 1;
2404 upper = seq->length;
2405 middle = 0;
2406 while ( lower <= upper && !found) {
2407 middle = lower + (upper - lower) / 2;
2408 if ( node == seq->buffer[middle - 1].node )
2409 found = 1;
2410 else if ( node < seq->buffer[middle - 1].node )
2411 upper = middle - 1;
2412 else
2413 lower = middle + 1;
2414 }
2415
2416 /* Return position */
2417 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2418 return middle;
2419 else
2420 return middle - 1;
2421}
2422
2423
2424/**
2425 * xmlParserAddNodeInfo:
2426 * @ctxt: an XML parser context
2427 * @info: a node info sequence pointer
2428 *
2429 * Insert node info record into the sorted sequence
2430 */
2431void
2432xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2433 const xmlParserNodeInfo* info)
2434{
2435 unsigned long pos;
2436 static unsigned int block_size = 5;
2437
2438 /* Find pos and check to see if node is already in the sequence */
2439 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2440 if ( pos < ctxt->node_seq.length
2441 && ctxt->node_seq.buffer[pos].node == info->node ) {
2442 ctxt->node_seq.buffer[pos] = *info;
2443 }
2444
2445 /* Otherwise, we need to add new node to buffer */
2446 else {
2447 /* Expand buffer by 5 if needed */
2448 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2449 xmlParserNodeInfo* tmp_buffer;
2450 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2451 *(ctxt->node_seq.maximum + block_size));
2452
2453 if ( ctxt->node_seq.buffer == NULL )
2454 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2455 else
2456 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2457
2458 if ( tmp_buffer == NULL ) {
2459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2460 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2461 ctxt->errNo = XML_ERR_NO_MEMORY;
2462 return;
2463 }
2464 ctxt->node_seq.buffer = tmp_buffer;
2465 ctxt->node_seq.maximum += block_size;
2466 }
2467
2468 /* If position is not at end, move elements out of the way */
2469 if ( pos != ctxt->node_seq.length ) {
2470 unsigned long i;
2471
2472 for ( i = ctxt->node_seq.length; i > pos; i-- )
2473 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2474 }
2475
2476 /* Copy element and increase length */
2477 ctxt->node_seq.buffer[pos] = *info;
2478 ctxt->node_seq.length++;
2479 }
2480}
2481
2482/************************************************************************
2483 * *
2484 * Deprecated functions kept for compatibility *
2485 * *
2486 ************************************************************************/
2487
2488/*
2489 * xmlCheckLanguageID
2490 * @lang: pointer to the string value
2491 *
2492 * Checks that the value conforms to the LanguageID production:
2493 *
2494 * NOTE: this is somewhat deprecated, those productions were removed from
2495 * the XML Second edition.
2496 *
2497 * [33] LanguageID ::= Langcode ('-' Subcode)*
2498 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2499 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2500 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2501 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2502 * [38] Subcode ::= ([a-z] | [A-Z])+
2503 *
2504 * Returns 1 if correct 0 otherwise
2505 **/
2506int
2507xmlCheckLanguageID(const xmlChar *lang) {
2508 const xmlChar *cur = lang;
2509
2510 if (cur == NULL)
2511 return(0);
2512 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2513 ((cur[0] == 'I') && (cur[1] == '-'))) {
2514 /*
2515 * IANA code
2516 */
2517 cur += 2;
2518 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2519 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2520 cur++;
2521 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2522 ((cur[0] == 'X') && (cur[1] == '-'))) {
2523 /*
2524 * User code
2525 */
2526 cur += 2;
2527 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2528 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2529 cur++;
2530 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2531 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2532 /*
2533 * ISO639
2534 */
2535 cur++;
2536 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2537 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2538 cur++;
2539 else
2540 return(0);
2541 } else
2542 return(0);
2543 while (cur[0] != 0) { /* non input consuming */
2544 if (cur[0] != '-')
2545 return(0);
2546 cur++;
2547 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2548 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2549 cur++;
2550 else
2551 return(0);
2552 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2553 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2554 cur++;
2555 }
2556 return(1);
2557}
2558
2559/**
2560 * xmlDecodeEntities:
2561 * @ctxt: the parser context
2562 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2563 * @len: the len to decode (in bytes !), -1 for no size limit
2564 * @end: an end marker xmlChar, 0 if none
2565 * @end2: an end marker xmlChar, 0 if none
2566 * @end3: an end marker xmlChar, 0 if none
2567 *
2568 * This function is deprecated, we now always process entities content
2569 * through xmlStringDecodeEntities
2570 *
2571 * TODO: remove it in next major release.
2572 *
2573 * [67] Reference ::= EntityRef | CharRef
2574 *
2575 * [69] PEReference ::= '%' Name ';'
2576 *
2577 * Returns A newly allocated string with the substitution done. The caller
2578 * must deallocate it !
2579 */
2580xmlChar *
2581xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2582 xmlChar end, xmlChar end2, xmlChar end3) {
2583#if 0
2584 xmlChar *buffer = NULL;
2585 unsigned int buffer_size = 0;
2586 unsigned int nbchars = 0;
2587
2588 xmlChar *current = NULL;
2589 xmlEntityPtr ent;
2590 unsigned int max = (unsigned int) len;
2591 int c,l;
2592#endif
2593
2594 static int deprecated = 0;
2595 if (!deprecated) {
2596 xmlGenericError(xmlGenericErrorContext,
2597 "xmlDecodeEntities() deprecated function reached\n");
2598 deprecated = 1;
2599 }
2600
2601#if 0
2602 if (ctxt->depth > 40) {
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "Detected entity reference loop\n");
2606 ctxt->wellFormed = 0;
2607 ctxt->disableSAX = 1;
2608 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2609 return(NULL);
2610 }
2611
2612 /*
2613 * allocate a translation buffer.
2614 */
2615 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2616 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2617 if (buffer == NULL) {
2618 perror("xmlDecodeEntities: malloc failed");
2619 return(NULL);
2620 }
2621
2622 /*
2623 * Ok loop until we reach one of the ending char or a size limit.
2624 */
2625 GROW;
2626 c = CUR_CHAR(l);
2627 while ((nbchars < max) && (c != end) && /* NOTUSED */
2628 (c != end2) && (c != end3)) {
2629 GROW;
2630 if (c == 0) break;
2631 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2632 int val = xmlParseCharRef(ctxt);
2633 COPY_BUF(0,buffer,nbchars,val);
2634 NEXTL(l);
2635 } else if ((c == '&') && (ctxt->token != '&') &&
2636 (what & XML_SUBSTITUTE_REF)) {
2637 if (xmlParserDebugEntities)
2638 xmlGenericError(xmlGenericErrorContext,
2639 "decoding Entity Reference\n");
2640 ent = xmlParseEntityRef(ctxt);
2641 if ((ent != NULL) &&
2642 (ctxt->replaceEntities != 0)) {
2643 current = ent->content;
2644 while (*current != 0) { /* non input consuming loop */
2645 buffer[nbchars++] = *current++;
2646 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2647 growBuffer(buffer);
2648 }
2649 }
2650 } else if (ent != NULL) {
2651 const xmlChar *cur = ent->name;
2652
2653 buffer[nbchars++] = '&';
2654 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2655 growBuffer(buffer);
2656 }
2657 while (*cur != 0) { /* non input consuming loop */
2658 buffer[nbchars++] = *cur++;
2659 }
2660 buffer[nbchars++] = ';';
2661 }
2662 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2663 /*
2664 * a PEReference induce to switch the entity flow,
2665 * we break here to flush the current set of chars
2666 * parsed if any. We will be called back later.
2667 */
2668 if (xmlParserDebugEntities)
2669 xmlGenericError(xmlGenericErrorContext,
2670 "decoding PE Reference\n");
2671 if (nbchars != 0) break;
2672
2673 xmlParsePEReference(ctxt);
2674
2675 /*
2676 * Pop-up of finished entities.
2677 */
2678 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2679 xmlPopInput(ctxt);
2680
2681 break;
2682 } else {
2683 COPY_BUF(l,buffer,nbchars,c);
2684 NEXTL(l);
2685 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2686 growBuffer(buffer);
2687 }
2688 }
2689 c = CUR_CHAR(l);
2690 }
2691 buffer[nbchars++] = 0;
2692 return(buffer);
2693#endif
2694 return(NULL);
2695}
2696
2697/**
2698 * xmlNamespaceParseNCName:
2699 * @ctxt: an XML parser context
2700 *
2701 * parse an XML namespace name.
2702 *
2703 * TODO: this seems not in use anymore, the namespace handling is done on
2704 * top of the SAX interfaces, i.e. not on raw input.
2705 *
2706 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2707 *
2708 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2709 * CombiningChar | Extender
2710 *
2711 * Returns the namespace name or NULL
2712 */
2713
2714xmlChar *
2715xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2716#if 0
2717 xmlChar buf[XML_MAX_NAMELEN + 5];
2718 int len = 0, l;
2719 int cur = CUR_CHAR(l);
2720#endif
2721
2722 static int deprecated = 0;
2723 if (!deprecated) {
2724 xmlGenericError(xmlGenericErrorContext,
2725 "xmlNamespaceParseNCName() deprecated function reached\n");
2726 deprecated = 1;
2727 }
2728
2729#if 0
2730 /* load first the value of the char !!! */
2731 GROW;
2732 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2733
2734xmlGenericError(xmlGenericErrorContext,
2735 "xmlNamespaceParseNCName: reached loop 3\n");
2736 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2737 (cur == '.') || (cur == '-') ||
2738 (cur == '_') ||
2739 (IS_COMBINING(cur)) ||
2740 (IS_EXTENDER(cur))) {
2741 COPY_BUF(l,buf,len,cur);
2742 NEXTL(l);
2743 cur = CUR_CHAR(l);
2744 if (len >= XML_MAX_NAMELEN) {
2745 xmlGenericError(xmlGenericErrorContext,
2746 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2747 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2748 (cur == '.') || (cur == '-') ||
2749 (cur == '_') ||
2750 (IS_COMBINING(cur)) ||
2751 (IS_EXTENDER(cur))) {
2752 NEXTL(l);
2753 cur = CUR_CHAR(l);
2754 }
2755 break;
2756 }
2757 }
2758 return(xmlStrndup(buf, len));
2759#endif
2760 return(NULL);
2761}
2762
2763/**
2764 * xmlNamespaceParseQName:
2765 * @ctxt: an XML parser context
2766 * @prefix: a xmlChar **
2767 *
2768 * TODO: this seems not in use anymore, the namespace handling is done on
2769 * top of the SAX interfaces, i.e. not on raw input.
2770 *
2771 * parse an XML qualified name
2772 *
2773 * [NS 5] QName ::= (Prefix ':')? LocalPart
2774 *
2775 * [NS 6] Prefix ::= NCName
2776 *
2777 * [NS 7] LocalPart ::= NCName
2778 *
2779 * Returns the local part, and prefix is updated
2780 * to get the Prefix if any.
2781 */
2782
2783xmlChar *
2784xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2785
2786 static int deprecated = 0;
2787 if (!deprecated) {
2788 xmlGenericError(xmlGenericErrorContext,
2789 "xmlNamespaceParseQName() deprecated function reached\n");
2790 deprecated = 1;
2791 }
2792
2793#if 0
2794 xmlChar *ret = NULL;
2795
2796 *prefix = NULL;
2797 ret = xmlNamespaceParseNCName(ctxt);
2798 if (RAW == ':') {
2799 *prefix = ret;
2800 NEXT;
2801 ret = xmlNamespaceParseNCName(ctxt);
2802 }
2803
2804 return(ret);
2805#endif
2806 return(NULL);
2807}
2808
2809/**
2810 * xmlNamespaceParseNSDef:
2811 * @ctxt: an XML parser context
2812 *
2813 * parse a namespace prefix declaration
2814 *
2815 * TODO: this seems not in use anymore, the namespace handling is done on
2816 * top of the SAX interfaces, i.e. not on raw input.
2817 *
2818 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2819 *
2820 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2821 *
2822 * Returns the namespace name
2823 */
2824
2825xmlChar *
2826xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2827 static int deprecated = 0;
2828 if (!deprecated) {
2829 xmlGenericError(xmlGenericErrorContext,
2830 "xmlNamespaceParseNSDef() deprecated function reached\n");
2831 deprecated = 1;
2832 }
2833 return(NULL);
2834#if 0
2835 xmlChar *name = NULL;
2836
2837 if ((RAW == 'x') && (NXT(1) == 'm') &&
2838 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2839 (NXT(4) == 's')) {
2840 SKIP(5);
2841 if (RAW == ':') {
2842 NEXT;
2843 name = xmlNamespaceParseNCName(ctxt);
2844 }
2845 }
2846 return(name);
2847#endif
2848}
2849
2850/**
2851 * xmlParseQuotedString:
2852 * @ctxt: an XML parser context
2853 *
2854 * Parse and return a string between quotes or doublequotes
2855 *
2856 * TODO: Deprecated, to be removed at next drop of binary compatibility
2857 *
2858 * Returns the string parser or NULL.
2859 */
2860xmlChar *
2861xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2862 static int deprecated = 0;
2863 if (!deprecated) {
2864 xmlGenericError(xmlGenericErrorContext,
2865 "xmlParseQuotedString() deprecated function reached\n");
2866 deprecated = 1;
2867 }
2868 return(NULL);
2869
2870#if 0
2871 xmlChar *buf = NULL;
2872 int len = 0,l;
2873 int size = XML_PARSER_BUFFER_SIZE;
2874 int c;
2875
2876 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2877 if (buf == NULL) {
2878 xmlGenericError(xmlGenericErrorContext,
2879 "malloc of %d byte failed\n", size);
2880 return(NULL);
2881 }
2882xmlGenericError(xmlGenericErrorContext,
2883 "xmlParseQuotedString: reached loop 4\n");
2884 if (RAW == '"') {
2885 NEXT;
2886 c = CUR_CHAR(l);
2887 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2888 if (len + 5 >= size) {
2889 size *= 2;
2890 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2891 if (buf == NULL) {
2892 xmlGenericError(xmlGenericErrorContext,
2893 "realloc of %d byte failed\n", size);
2894 return(NULL);
2895 }
2896 }
2897 COPY_BUF(l,buf,len,c);
2898 NEXTL(l);
2899 c = CUR_CHAR(l);
2900 }
2901 if (c != '"') {
2902 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2904 ctxt->sax->error(ctxt->userData,
2905 "String not closed \"%.50s\"\n", buf);
2906 ctxt->wellFormed = 0;
2907 ctxt->disableSAX = 1;
2908 } else {
2909 NEXT;
2910 }
2911 } else if (RAW == '\''){
2912 NEXT;
2913 c = CUR;
2914 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2915 if (len + 1 >= size) {
2916 size *= 2;
2917 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2918 if (buf == NULL) {
2919 xmlGenericError(xmlGenericErrorContext,
2920 "realloc of %d byte failed\n", size);
2921 return(NULL);
2922 }
2923 }
2924 buf[len++] = c;
2925 NEXT;
2926 c = CUR;
2927 }
2928 if (RAW != '\'') {
2929 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2931 ctxt->sax->error(ctxt->userData,
2932 "String not closed \"%.50s\"\n", buf);
2933 ctxt->wellFormed = 0;
2934 ctxt->disableSAX = 1;
2935 } else {
2936 NEXT;
2937 }
2938 }
2939 return(buf);
2940#endif
2941}
2942
2943/**
2944 * xmlParseNamespace:
2945 * @ctxt: an XML parser context
2946 *
2947 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2948 *
2949 * This is what the older xml-name Working Draft specified, a bunch of
2950 * other stuff may still rely on it, so support is still here as
2951 * if it was declared on the root of the Tree:-(
2952 *
2953 * TODO: remove from library
2954 *
2955 * To be removed at next drop of binary compatibility
2956 */
2957
2958void
2959xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2960 static int deprecated = 0;
2961 if (!deprecated) {
2962 xmlGenericError(xmlGenericErrorContext,
2963 "xmlParseNamespace() deprecated function reached\n");
2964 deprecated = 1;
2965 }
2966
2967#if 0
2968 xmlChar *href = NULL;
2969 xmlChar *prefix = NULL;
2970 int garbage = 0;
2971
2972 /*
2973 * We just skipped "namespace" or "xml:namespace"
2974 */
2975 SKIP_BLANKS;
2976
2977xmlGenericError(xmlGenericErrorContext,
2978 "xmlParseNamespace: reached loop 5\n");
2979 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2980 /*
2981 * We can have "ns" or "prefix" attributes
2982 * Old encoding as 'href' or 'AS' attributes is still supported
2983 */
2984 if ((RAW == 'n') && (NXT(1) == 's')) {
2985 garbage = 0;
2986 SKIP(2);
2987 SKIP_BLANKS;
2988
2989 if (RAW != '=') continue;
2990 NEXT;
2991 SKIP_BLANKS;
2992
2993 href = xmlParseQuotedString(ctxt);
2994 SKIP_BLANKS;
2995 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2996 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2997 garbage = 0;
2998 SKIP(4);
2999 SKIP_BLANKS;
3000
3001 if (RAW != '=') continue;
3002 NEXT;
3003 SKIP_BLANKS;
3004
3005 href = xmlParseQuotedString(ctxt);
3006 SKIP_BLANKS;
3007 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3008 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3009 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3010 garbage = 0;
3011 SKIP(6);
3012 SKIP_BLANKS;
3013
3014 if (RAW != '=') continue;
3015 NEXT;
3016 SKIP_BLANKS;
3017
3018 prefix = xmlParseQuotedString(ctxt);
3019 SKIP_BLANKS;
3020 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3021 garbage = 0;
3022 SKIP(2);
3023 SKIP_BLANKS;
3024
3025 if (RAW != '=') continue;
3026 NEXT;
3027 SKIP_BLANKS;
3028
3029 prefix = xmlParseQuotedString(ctxt);
3030 SKIP_BLANKS;
3031 } else if ((RAW == '?') && (NXT(1) == '>')) {
3032 garbage = 0;
3033 NEXT;
3034 } else {
3035 /*
3036 * Found garbage when parsing the namespace
3037 */
3038 if (!garbage) {
3039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3040 ctxt->sax->error(ctxt->userData,
3041 "xmlParseNamespace found garbage\n");
3042 }
3043 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3044 ctxt->wellFormed = 0;
3045 ctxt->disableSAX = 1;
3046 NEXT;
3047 }
3048 }
3049
3050 MOVETO_ENDTAG(CUR_PTR);
3051 NEXT;
3052
3053 /*
3054 * Register the DTD.
3055 if (href != NULL)
3056 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3057 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3058 */
3059
3060 if (prefix != NULL) xmlFree(prefix);
3061 if (href != NULL) xmlFree(href);
3062#endif
3063}
3064
3065/**
3066 * xmlScanName:
3067 * @ctxt: an XML parser context
3068 *
3069 * Trickery: parse an XML name but without consuming the input flow
3070 * Needed for rollback cases. Used only when parsing entities references.
3071 *
3072 * TODO: seems deprecated now, only used in the default part of
3073 * xmlParserHandleReference
3074 *
3075 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3076 * CombiningChar | Extender
3077 *
3078 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3079 *
3080 * [6] Names ::= Name (S Name)*
3081 *
3082 * Returns the Name parsed or NULL
3083 */
3084
3085xmlChar *
3086xmlScanName(xmlParserCtxtPtr ctxt) {
3087 static int deprecated = 0;
3088 if (!deprecated) {
3089 xmlGenericError(xmlGenericErrorContext,
3090 "xmlScanName() deprecated function reached\n");
3091 deprecated = 1;
3092 }
3093 return(NULL);
3094
3095#if 0
3096 xmlChar buf[XML_MAX_NAMELEN];
3097 int len = 0;
3098
3099 GROW;
3100 if (!IS_LETTER(RAW) && (RAW != '_') &&
3101 (RAW != ':')) {
3102 return(NULL);
3103 }
3104
3105
3106 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3107 (NXT(len) == '.') || (NXT(len) == '-') ||
3108 (NXT(len) == '_') || (NXT(len) == ':') ||
3109 (IS_COMBINING(NXT(len))) ||
3110 (IS_EXTENDER(NXT(len)))) {
3111 GROW;
3112 buf[len] = NXT(len);
3113 len++;
3114 if (len >= XML_MAX_NAMELEN) {
3115 xmlGenericError(xmlGenericErrorContext,
3116 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3117 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3118 (IS_DIGIT(NXT(len))) ||
3119 (NXT(len) == '.') || (NXT(len) == '-') ||
3120 (NXT(len) == '_') || (NXT(len) == ':') ||
3121 (IS_COMBINING(NXT(len))) ||
3122 (IS_EXTENDER(NXT(len))))
3123 len++;
3124 break;
3125 }
3126 }
3127 return(xmlStrndup(buf, len));
3128#endif
3129}
3130
3131/**
3132 * xmlParserHandleReference:
3133 * @ctxt: the parser context
3134 *
3135 * TODO: Remove, now deprecated ... the test is done directly in the
3136 * content parsing
3137 * routines.
3138 *
3139 * [67] Reference ::= EntityRef | CharRef
3140 *
3141 * [68] EntityRef ::= '&' Name ';'
3142 *
3143 * [ WFC: Entity Declared ]
3144 * the Name given in the entity reference must match that in an entity
3145 * declaration, except that well-formed documents need not declare any
3146 * of the following entities: amp, lt, gt, apos, quot.
3147 *
3148 * [ WFC: Parsed Entity ]
3149 * An entity reference must not contain the name of an unparsed entity
3150 *
3151 * [66] CharRef ::= '&#' [0-9]+ ';' |
3152 * '&#x' [0-9a-fA-F]+ ';'
3153 *
3154 * A PEReference may have been detectect in the current input stream
3155 * the handling is done accordingly to
3156 * http://www.w3.org/TR/REC-xml#entproc
3157 */
3158void
3159xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3160 static int deprecated = 0;
3161 if (!deprecated) {
3162 xmlGenericError(xmlGenericErrorContext,
3163 "xmlParserHandleReference() deprecated function reached\n");
3164 deprecated = 1;
3165 }
3166
3167#if 0
3168 xmlParserInputPtr input;
3169 xmlChar *name;
3170 xmlEntityPtr ent = NULL;
3171
3172 if (ctxt->token != 0) {
3173 return;
3174 }
3175 if (RAW != '&') return;
3176 GROW;
3177 if ((RAW == '&') && (NXT(1) == '#')) {
3178 switch(ctxt->instate) {
3179 case XML_PARSER_ENTITY_DECL:
3180 case XML_PARSER_PI:
3181 case XML_PARSER_CDATA_SECTION:
3182 case XML_PARSER_COMMENT:
3183 case XML_PARSER_SYSTEM_LITERAL:
3184 /* we just ignore it there */
3185 return;
3186 case XML_PARSER_START_TAG:
3187 return;
3188 case XML_PARSER_END_TAG:
3189 return;
3190 case XML_PARSER_EOF:
3191 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3193 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3194 ctxt->wellFormed = 0;
3195 ctxt->disableSAX = 1;
3196 return;
3197 case XML_PARSER_PROLOG:
3198 case XML_PARSER_START:
3199 case XML_PARSER_MISC:
3200 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3202 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3203 ctxt->wellFormed = 0;
3204 ctxt->disableSAX = 1;
3205 return;
3206 case XML_PARSER_EPILOG:
3207 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3209 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3210 ctxt->wellFormed = 0;
3211 ctxt->disableSAX = 1;
3212 return;
3213 case XML_PARSER_DTD:
3214 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3216 ctxt->sax->error(ctxt->userData,
3217 "CharRef are forbiden in DTDs!\n");
3218 ctxt->wellFormed = 0;
3219 ctxt->disableSAX = 1;
3220 return;
3221 case XML_PARSER_ENTITY_VALUE:
3222 /*
3223 * NOTE: in the case of entity values, we don't do the
3224 * substitution here since we need the literal
3225 * entity value to be able to save the internal
3226 * subset of the document.
3227 * This will be handled by xmlStringDecodeEntities
3228 */
3229 return;
3230 case XML_PARSER_CONTENT:
3231 return;
3232 case XML_PARSER_ATTRIBUTE_VALUE:
3233 /* ctxt->token = xmlParseCharRef(ctxt); */
3234 return;
3235 case XML_PARSER_IGNORE:
3236 return;
3237 }
3238 return;
3239 }
3240
3241 switch(ctxt->instate) {
3242 case XML_PARSER_CDATA_SECTION:
3243 return;
3244 case XML_PARSER_PI:
3245 case XML_PARSER_COMMENT:
3246 case XML_PARSER_SYSTEM_LITERAL:
3247 case XML_PARSER_CONTENT:
3248 return;
3249 case XML_PARSER_START_TAG:
3250 return;
3251 case XML_PARSER_END_TAG:
3252 return;
3253 case XML_PARSER_EOF:
3254 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3257 ctxt->wellFormed = 0;
3258 ctxt->disableSAX = 1;
3259 return;
3260 case XML_PARSER_PROLOG:
3261 case XML_PARSER_START:
3262 case XML_PARSER_MISC:
3263 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3265 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3266 ctxt->wellFormed = 0;
3267 ctxt->disableSAX = 1;
3268 return;
3269 case XML_PARSER_EPILOG:
3270 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3272 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3273 ctxt->wellFormed = 0;
3274 ctxt->disableSAX = 1;
3275 return;
3276 case XML_PARSER_ENTITY_VALUE:
3277 /*
3278 * NOTE: in the case of entity values, we don't do the
3279 * substitution here since we need the literal
3280 * entity value to be able to save the internal
3281 * subset of the document.
3282 * This will be handled by xmlStringDecodeEntities
3283 */
3284 return;
3285 case XML_PARSER_ATTRIBUTE_VALUE:
3286 /*
3287 * NOTE: in the case of attributes values, we don't do the
3288 * substitution here unless we are in a mode where
3289 * the parser is explicitely asked to substitute
3290 * entities. The SAX callback is called with values
3291 * without entity substitution.
3292 * This will then be handled by xmlStringDecodeEntities
3293 */
3294 return;
3295 case XML_PARSER_ENTITY_DECL:
3296 /*
3297 * we just ignore it there
3298 * the substitution will be done once the entity is referenced
3299 */
3300 return;
3301 case XML_PARSER_DTD:
3302 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3304 ctxt->sax->error(ctxt->userData,
3305 "Entity references are forbiden in DTDs!\n");
3306 ctxt->wellFormed = 0;
3307 ctxt->disableSAX = 1;
3308 return;
3309 case XML_PARSER_IGNORE:
3310 return;
3311 }
3312
3313/* TODO: this seems not reached anymore .... Verify ... */
3314xmlGenericError(xmlGenericErrorContext,
3315 "Reached deprecated section in xmlParserHandleReference()\n");
3316xmlGenericError(xmlGenericErrorContext,
3317 "Please forward the document to Daniel.Veillard@w3.org\n");
3318xmlGenericError(xmlGenericErrorContext,
3319 "indicating the version: %s, thanks !\n", xmlParserVersion);
3320 NEXT;
3321 name = xmlScanName(ctxt);
3322 if (name == NULL) {
3323 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3325 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3326 ctxt->wellFormed = 0;
3327 ctxt->disableSAX = 1;
3328 ctxt->token = '&';
3329 return;
3330 }
3331 if (NXT(xmlStrlen(name)) != ';') {
3332 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3334 ctxt->sax->error(ctxt->userData,
3335 "Entity reference: ';' expected\n");
3336 ctxt->wellFormed = 0;
3337 ctxt->disableSAX = 1;
3338 ctxt->token = '&';
3339 xmlFree(name);
3340 return;
3341 }
3342 SKIP(xmlStrlen(name) + 1);
3343 if (ctxt->sax != NULL) {
3344 if (ctxt->sax->getEntity != NULL)
3345 ent = ctxt->sax->getEntity(ctxt->userData, name);
3346 }
3347
3348 /*
3349 * [ WFC: Entity Declared ]
3350 * the Name given in the entity reference must match that in an entity
3351 * declaration, except that well-formed documents need not declare any
3352 * of the following entities: amp, lt, gt, apos, quot.
3353 */
3354 if (ent == NULL)
3355 ent = xmlGetPredefinedEntity(name);
3356 if (ent == NULL) {
3357 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3359 ctxt->sax->error(ctxt->userData,
3360 "Entity reference: entity %s not declared\n",
3361 name);
3362 ctxt->wellFormed = 0;
3363 ctxt->disableSAX = 1;
3364 xmlFree(name);
3365 return;
3366 }
3367
3368 /*
3369 * [ WFC: Parsed Entity ]
3370 * An entity reference must not contain the name of an unparsed entity
3371 */
3372 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3373 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData,
3376 "Entity reference to unparsed entity %s\n", name);
3377 ctxt->wellFormed = 0;
3378 ctxt->disableSAX = 1;
3379 }
3380
3381 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3382 ctxt->token = ent->content[0];
3383 xmlFree(name);
3384 return;
3385 }
3386 input = xmlNewEntityInputStream(ctxt, ent);
3387 xmlPushInput(ctxt, input);
3388 xmlFree(name);
3389#endif
3390 return;
3391}
3392
3393/**
3394 * xmlHandleEntity:
3395 * @ctxt: an XML parser context
3396 * @entity: an XML entity pointer.
3397 *
3398 * Default handling of defined entities, when should we define a new input
3399 * stream ? When do we just handle that as a set of chars ?
3400 *
3401 * OBSOLETE: to be removed at some point.
3402 */
3403
3404void
3405xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3406 static int deprecated = 0;
3407 if (!deprecated) {
3408 xmlGenericError(xmlGenericErrorContext,
3409 "xmlHandleEntity() deprecated function reached\n");
3410 deprecated = 1;
3411 }
3412
3413#if 0
3414 int len;
3415 xmlParserInputPtr input;
3416
3417 if (entity->content == NULL) {
3418 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3420 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3421 entity->name);
3422 ctxt->wellFormed = 0;
3423 ctxt->disableSAX = 1;
3424 return;
3425 }
3426 len = xmlStrlen(entity->content);
3427 if (len <= 2) goto handle_as_char;
3428
3429 /*
3430 * Redefine its content as an input stream.
3431 */
3432 input = xmlNewEntityInputStream(ctxt, entity);
3433 xmlPushInput(ctxt, input);
3434 return;
3435
3436handle_as_char:
3437 /*
3438 * Just handle the content as a set of chars.
3439 */
3440 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3441 (ctxt->sax->characters != NULL))
3442 ctxt->sax->characters(ctxt->userData, entity->content, len);
3443#endif
3444}
3445
3446/**
3447 * xmlNewGlobalNs:
3448 * @doc: the document carrying the namespace
3449 * @href: the URI associated
3450 * @prefix: the prefix for the namespace
3451 *
3452 * Creation of a Namespace, the old way using PI and without scoping
3453 * DEPRECATED !!!
3454 * It now create a namespace on the root element of the document if found.
3455 * Returns NULL this functionnality had been removed
3456 */
3457xmlNsPtr
3458xmlNewGlobalNs(xmlDocPtr doc, const xmlChar *href, const xmlChar *prefix) {
3459 static int deprecated = 0;
3460 if (!deprecated) {
3461 xmlGenericError(xmlGenericErrorContext,
3462 "xmlNewGlobalNs() deprecated function reached\n");
3463 deprecated = 1;
3464 }
3465 return(NULL);
3466#if 0
3467 xmlNodePtr root;
3468
3469 xmlNsPtr cur;
3470
3471 root = xmlDocGetRootElement(doc);
3472 if (root != NULL)
3473 return(xmlNewNs(root, href, prefix));
3474
3475 /*
3476 * if there is no root element yet, create an old Namespace type
3477 * and it will be moved to the root at save time.
3478 */
3479 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3480 if (cur == NULL) {
3481 xmlGenericError(xmlGenericErrorContext,
3482 "xmlNewGlobalNs : malloc failed\n");
3483 return(NULL);
3484 }
3485 memset(cur, 0, sizeof(xmlNs));
3486 cur->type = XML_GLOBAL_NAMESPACE;
3487
3488 if (href != NULL)
3489 cur->href = xmlStrdup(href);
3490 if (prefix != NULL)
3491 cur->prefix = xmlStrdup(prefix);
3492
3493 /*
3494 * Add it at the end to preserve parsing order ...
3495 */
3496 if (doc != NULL) {
3497 if (doc->oldNs == NULL) {
3498 doc->oldNs = cur;
3499 } else {
3500 xmlNsPtr prev = doc->oldNs;
3501
3502 while (prev->next != NULL) prev = prev->next;
3503 prev->next = cur;
3504 }
3505 }
3506
3507 return(NULL);
3508#endif
3509}
3510
3511/**
3512 * xmlUpgradeOldNs:
3513 * @doc: a document pointer
3514 *
3515 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3516 * DEPRECATED
3517 */
3518void
3519xmlUpgradeOldNs(xmlDocPtr doc) {
3520 static int deprecated = 0;
3521 if (!deprecated) {
3522 xmlGenericError(xmlGenericErrorContext,
3523 "xmlNewGlobalNs() deprecated function reached\n");
3524 deprecated = 1;
3525 }
3526#if 0
3527 xmlNsPtr cur;
3528
3529 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3530 if (doc->children == NULL) {
3531#ifdef DEBUG_TREE
3532 xmlGenericError(xmlGenericErrorContext,
3533 "xmlUpgradeOldNs: failed no root !\n");
3534#endif
3535 return;
3536 }
3537
3538 cur = doc->oldNs;
3539 while (cur->next != NULL) {
3540 cur->type = XML_LOCAL_NAMESPACE;
3541 cur = cur->next;
3542 }
3543 cur->type = XML_LOCAL_NAMESPACE;
3544 cur->next = doc->children->nsDef;
3545 doc->children->nsDef = doc->oldNs;
3546 doc->oldNs = NULL;
3547#endif
3548}
3549