blob: 37cb19e7ddd00b7e14ef6179a09c7d9c413d07bf [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50
Daniel Veillard56a4cb82001-03-24 17:00:36 +000051void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000052
53/************************************************************************
54 * *
55 * Version and Features handling *
56 * *
57 ************************************************************************/
58const char *xmlParserVersion = LIBXML_VERSION_STRING;
59
60/*
61 * xmlCheckVersion:
62 * @version: the include version number
63 *
64 * check the compiled lib version against the include one.
65 * This can warn or immediately kill the application
66 */
67void
68xmlCheckVersion(int version) {
69 int myversion = (int) LIBXML_VERSION;
70
71 if ((myversion / 10000) != (version / 10000)) {
72 xmlGenericError(xmlGenericErrorContext,
73 "Fatal: program compiled against libxml %d using libxml %d\n",
74 (version / 10000), (myversion / 10000));
75 exit(1);
76 }
77 if ((myversion / 100) < (version / 100)) {
78 xmlGenericError(xmlGenericErrorContext,
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
81 }
82}
83
84
85const char *xmlFeaturesList[] = {
86 "validate",
87 "load subset",
88 "keep blanks",
89 "disable SAX",
90 "fetch external entities",
91 "substitute entities",
92 "gather line info",
93 "user data",
94 "is html",
95 "is standalone",
96 "stop parser",
97 "document",
98 "is well formed",
99 "is valid",
100 "SAX block",
101 "SAX function internalSubset",
102 "SAX function isStandalone",
103 "SAX function hasInternalSubset",
104 "SAX function hasExternalSubset",
105 "SAX function resolveEntity",
106 "SAX function getEntity",
107 "SAX function entityDecl",
108 "SAX function notationDecl",
109 "SAX function attributeDecl",
110 "SAX function elementDecl",
111 "SAX function unparsedEntityDecl",
112 "SAX function setDocumentLocator",
113 "SAX function startDocument",
114 "SAX function endDocument",
115 "SAX function startElement",
116 "SAX function endElement",
117 "SAX function reference",
118 "SAX function characters",
119 "SAX function ignorableWhitespace",
120 "SAX function processingInstruction",
121 "SAX function comment",
122 "SAX function warning",
123 "SAX function error",
124 "SAX function fatalError",
125 "SAX function getParameterEntity",
126 "SAX function cdataBlock",
127 "SAX function externalSubset",
128};
129
130/*
131 * xmlGetFeaturesList:
132 * @len: the length of the features name array (input/output)
133 * @result: an array of string to be filled with the features name.
134 *
135 * Copy at most *@len feature names into the @result array
136 *
137 * Returns -1 in case or error, or the total number of features,
138 * len is updated with the number of strings copied,
139 * strings must not be deallocated
140 */
141int
142xmlGetFeaturesList(int *len, const char **result) {
143 int ret, i;
144
145 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
146 if ((len == NULL) || (result == NULL))
147 return(ret);
148 if ((*len < 0) || (*len >= 1000))
149 return(-1);
150 if (*len > ret)
151 *len = ret;
152 for (i = 0;i < *len;i++)
153 result[i] = xmlFeaturesList[i];
154 return(ret);
155}
156
157/*
158 * xmlGetFeature:
159 * @ctxt: an XML/HTML parser context
160 * @name: the feature name
161 * @result: location to store the result
162 *
163 * Read the current value of one feature of this parser instance
164 *
165 * Returns -1 in case or error, 0 otherwise
166 */
167int
168xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
169 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
170 return(-1);
171
172 if (!strcmp(name, "validate")) {
173 *((int *) result) = ctxt->validate;
174 } else if (!strcmp(name, "keep blanks")) {
175 *((int *) result) = ctxt->keepBlanks;
176 } else if (!strcmp(name, "disable SAX")) {
177 *((int *) result) = ctxt->disableSAX;
178 } else if (!strcmp(name, "fetch external entities")) {
179 *((int *) result) = ctxt->loadsubset;
180 } else if (!strcmp(name, "substitute entities")) {
181 *((int *) result) = ctxt->replaceEntities;
182 } else if (!strcmp(name, "gather line info")) {
183 *((int *) result) = ctxt->record_info;
184 } else if (!strcmp(name, "user data")) {
185 *((void **)result) = ctxt->userData;
186 } else if (!strcmp(name, "is html")) {
187 *((int *) result) = ctxt->html;
188 } else if (!strcmp(name, "is standalone")) {
189 *((int *) result) = ctxt->standalone;
190 } else if (!strcmp(name, "document")) {
191 *((xmlDocPtr *) result) = ctxt->myDoc;
192 } else if (!strcmp(name, "is well formed")) {
193 *((int *) result) = ctxt->wellFormed;
194 } else if (!strcmp(name, "is valid")) {
195 *((int *) result) = ctxt->valid;
196 } else if (!strcmp(name, "SAX block")) {
197 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
198 } else if (!strcmp(name, "SAX function internalSubset")) {
199 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
200 } else if (!strcmp(name, "SAX function isStandalone")) {
201 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
202 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
203 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
204 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
205 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
206 } else if (!strcmp(name, "SAX function resolveEntity")) {
207 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
208 } else if (!strcmp(name, "SAX function getEntity")) {
209 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
210 } else if (!strcmp(name, "SAX function entityDecl")) {
211 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
212 } else if (!strcmp(name, "SAX function notationDecl")) {
213 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
214 } else if (!strcmp(name, "SAX function attributeDecl")) {
215 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
216 } else if (!strcmp(name, "SAX function elementDecl")) {
217 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
218 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
219 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
220 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
221 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
222 } else if (!strcmp(name, "SAX function startDocument")) {
223 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
224 } else if (!strcmp(name, "SAX function endDocument")) {
225 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
226 } else if (!strcmp(name, "SAX function startElement")) {
227 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
228 } else if (!strcmp(name, "SAX function endElement")) {
229 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
230 } else if (!strcmp(name, "SAX function reference")) {
231 *((referenceSAXFunc *) result) = ctxt->sax->reference;
232 } else if (!strcmp(name, "SAX function characters")) {
233 *((charactersSAXFunc *) result) = ctxt->sax->characters;
234 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
235 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
236 } else if (!strcmp(name, "SAX function processingInstruction")) {
237 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
238 } else if (!strcmp(name, "SAX function comment")) {
239 *((commentSAXFunc *) result) = ctxt->sax->comment;
240 } else if (!strcmp(name, "SAX function warning")) {
241 *((warningSAXFunc *) result) = ctxt->sax->warning;
242 } else if (!strcmp(name, "SAX function error")) {
243 *((errorSAXFunc *) result) = ctxt->sax->error;
244 } else if (!strcmp(name, "SAX function fatalError")) {
245 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
246 } else if (!strcmp(name, "SAX function getParameterEntity")) {
247 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
248 } else if (!strcmp(name, "SAX function cdataBlock")) {
249 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
250 } else if (!strcmp(name, "SAX function externalSubset")) {
251 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
252 } else {
253 return(-1);
254 }
255 return(0);
256}
257
258/*
259 * xmlSetFeature:
260 * @ctxt: an XML/HTML parser context
261 * @name: the feature name
262 * @value: pointer to the location of the new value
263 *
264 * Change the current value of one feature of this parser instance
265 *
266 * Returns -1 in case or error, 0 otherwise
267 */
268int
269xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
270 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
271 return(-1);
272
273 if (!strcmp(name, "validate")) {
274 int newvalidate = *((int *) value);
275 if ((!ctxt->validate) && (newvalidate != 0)) {
276 if (ctxt->vctxt.warning == NULL)
277 ctxt->vctxt.warning = xmlParserValidityWarning;
278 if (ctxt->vctxt.error == NULL)
279 ctxt->vctxt.error = xmlParserValidityError;
280 /* Allocate the Node stack */
281 ctxt->vctxt.nodeTab = (xmlNodePtr *)
282 xmlMalloc(4 * sizeof(xmlNodePtr));
283 if (ctxt->vctxt.nodeTab == NULL) {
284 ctxt->vctxt.nodeMax = 0;
285 ctxt->validate = 0;
286 return(-1);
287 }
288 ctxt->vctxt.nodeNr = 0;
289 ctxt->vctxt.nodeMax = 4;
290 ctxt->vctxt.node = NULL;
291 }
292 ctxt->validate = newvalidate;
293 } else if (!strcmp(name, "keep blanks")) {
294 ctxt->keepBlanks = *((int *) value);
295 } else if (!strcmp(name, "disable SAX")) {
296 ctxt->disableSAX = *((int *) value);
297 } else if (!strcmp(name, "fetch external entities")) {
298 ctxt->loadsubset = *((int *) value);
299 } else if (!strcmp(name, "substitute entities")) {
300 ctxt->replaceEntities = *((int *) value);
301 } else if (!strcmp(name, "gather line info")) {
302 ctxt->record_info = *((int *) value);
303 } else if (!strcmp(name, "user data")) {
304 ctxt->userData = *((void **)value);
305 } else if (!strcmp(name, "is html")) {
306 ctxt->html = *((int *) value);
307 } else if (!strcmp(name, "is standalone")) {
308 ctxt->standalone = *((int *) value);
309 } else if (!strcmp(name, "document")) {
310 ctxt->myDoc = *((xmlDocPtr *) value);
311 } else if (!strcmp(name, "is well formed")) {
312 ctxt->wellFormed = *((int *) value);
313 } else if (!strcmp(name, "is valid")) {
314 ctxt->valid = *((int *) value);
315 } else if (!strcmp(name, "SAX block")) {
316 ctxt->sax = *((xmlSAXHandlerPtr *) value);
317 } else if (!strcmp(name, "SAX function internalSubset")) {
318 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function isStandalone")) {
320 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
322 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
324 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
325 } else if (!strcmp(name, "SAX function resolveEntity")) {
326 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
327 } else if (!strcmp(name, "SAX function getEntity")) {
328 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
329 } else if (!strcmp(name, "SAX function entityDecl")) {
330 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function notationDecl")) {
332 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function attributeDecl")) {
334 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function elementDecl")) {
336 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
338 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
340 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startDocument")) {
342 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endDocument")) {
344 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function startElement")) {
346 ctxt->sax->startElement = *((startElementSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function endElement")) {
348 ctxt->sax->endElement = *((endElementSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function reference")) {
350 ctxt->sax->reference = *((referenceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function characters")) {
352 ctxt->sax->characters = *((charactersSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
354 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function processingInstruction")) {
356 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function comment")) {
358 ctxt->sax->comment = *((commentSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function warning")) {
360 ctxt->sax->warning = *((warningSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function error")) {
362 ctxt->sax->error = *((errorSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function fatalError")) {
364 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function getParameterEntity")) {
366 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
367 } else if (!strcmp(name, "SAX function cdataBlock")) {
368 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
369 } else if (!strcmp(name, "SAX function externalSubset")) {
370 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
371 } else {
372 return(-1);
373 }
374 return(0);
375}
376
377/************************************************************************
378 * *
379 * Some functions to avoid too large macros *
380 * *
381 ************************************************************************/
382
383/**
384 * xmlIsChar:
385 * @c: an unicode character (int)
386 *
387 * Check whether the character is allowed by the production
388 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
389 * | [#x10000-#x10FFFF]
390 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
391 * Also available as a macro IS_CHAR()
392 *
393 * Returns 0 if not, non-zero otherwise
394 */
395int
396xmlIsChar(int c) {
397 return(
398 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
399 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
400 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
401 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
402}
403
404/**
405 * xmlIsBlank:
406 * @c: an unicode character (int)
407 *
408 * Check whether the character is allowed by the production
409 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
410 * Also available as a macro IS_BLANK()
411 *
412 * Returns 0 if not, non-zero otherwise
413 */
414int
415xmlIsBlank(int c) {
416 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
417}
418
419/**
420 * xmlIsBaseChar:
421 * @c: an unicode character (int)
422 *
423 * Check whether the character is allowed by the production
424 * [85] BaseChar ::= ... long list see REC ...
425 *
426 * VI is your friend !
427 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
428 * and
429 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
430 *
431 * Returns 0 if not, non-zero otherwise
432 */
433static int xmlBaseArray[] = {
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
438 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
440 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
447 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
449 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
450};
451
452int
453xmlIsBaseChar(int c) {
454 return(
455 (((c) < 0x0100) ? xmlBaseArray[c] :
456 ( /* accelerator */
457 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
458 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
459 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
460 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
461 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
462 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
463 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
464 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
465 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
466 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
467 ((c) == 0x0386) ||
468 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
469 ((c) == 0x038C) ||
470 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
471 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
472 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
473 ((c) == 0x03DA) ||
474 ((c) == 0x03DC) ||
475 ((c) == 0x03DE) ||
476 ((c) == 0x03E0) ||
477 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
478 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
479 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
480 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
481 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
482 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
483 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
484 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
485 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
486 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
487 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
488 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
489 ((c) == 0x0559) ||
490 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
491 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
492 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
493 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
494 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
495 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
496 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
497 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
498 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
499 ((c) == 0x06D5) ||
500 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
501 (((c) >= 0x905) && ( /* accelerator */
502 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
503 ((c) == 0x093D) ||
504 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
505 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
506 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
507 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
508 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
509 ((c) == 0x09B2) ||
510 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
511 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
512 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
513 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
514 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
515 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
516 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
517 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
518 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
519 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
520 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
521 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
522 ((c) == 0x0A5E) ||
523 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
524 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
525 ((c) == 0x0A8D) ||
526 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
527 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
528 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
529 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
530 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
531 ((c) == 0x0ABD) ||
532 ((c) == 0x0AE0) ||
533 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
534 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
535 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
536 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
537 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
538 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
539 ((c) == 0x0B3D) ||
540 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
541 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
542 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
543 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
544 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
545 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
546 ((c) == 0x0B9C) ||
547 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
548 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
549 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
550 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
551 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
552 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
553 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
554 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
555 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
556 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
557 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
558 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
559 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
560 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
561 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
562 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
563 ((c) == 0x0CDE) ||
564 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
565 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
566 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
567 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
568 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
569 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
570 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
571 ((c) == 0x0E30) ||
572 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
573 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
574 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
575 ((c) == 0x0E84) ||
576 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
577 ((c) == 0x0E8A) ||
578 ((c) == 0x0E8D) ||
579 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
580 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
581 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
582 ((c) == 0x0EA5) ||
583 ((c) == 0x0EA7) ||
584 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
585 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
586 ((c) == 0x0EB0) ||
587 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
588 ((c) == 0x0EBD) ||
589 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
590 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
591 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
592 (((c) >= 0x10A0) && ( /* accelerator */
593 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
594 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
595 ((c) == 0x1100) ||
596 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
597 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
598 ((c) == 0x1109) ||
599 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
600 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
601 ((c) == 0x113C) ||
602 ((c) == 0x113E) ||
603 ((c) == 0x1140) ||
604 ((c) == 0x114C) ||
605 ((c) == 0x114E) ||
606 ((c) == 0x1150) ||
607 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
608 ((c) == 0x1159) ||
609 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
610 ((c) == 0x1163) ||
611 ((c) == 0x1165) ||
612 ((c) == 0x1167) ||
613 ((c) == 0x1169) ||
614 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
615 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
616 ((c) == 0x1175) ||
617 ((c) == 0x119E) ||
618 ((c) == 0x11A8) ||
619 ((c) == 0x11AB) ||
620 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
621 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
622 ((c) == 0x11BA) ||
623 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
624 ((c) == 0x11EB) ||
625 ((c) == 0x11F0) ||
626 ((c) == 0x11F9) ||
627 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
628 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
629 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
630 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
631 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
632 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
633 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
634 ((c) == 0x1F59) ||
635 ((c) == 0x1F5B) ||
636 ((c) == 0x1F5D) ||
637 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
638 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
639 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
640 ((c) == 0x1FBE) ||
641 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
642 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
643 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
644 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
645 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
646 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
647 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
648 ((c) == 0x2126) ||
649 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
650 ((c) == 0x212E) ||
651 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
652 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
653 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
654 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
655 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
656}
657
658/**
659 * xmlIsDigit:
660 * @c: an unicode character (int)
661 *
662 * Check whether the character is allowed by the production
663 * [88] Digit ::= ... long list see REC ...
664 *
665 * Returns 0 if not, non-zero otherwise
666 */
667int
668xmlIsDigit(int c) {
669 return(
670 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
671 (((c) >= 0x660) && ( /* accelerator */
672 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
673 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
674 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
675 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
676 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
677 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
678 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
679 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
680 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
681 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
682 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
683 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
684 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
685 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
686}
687
688/**
689 * xmlIsCombining:
690 * @c: an unicode character (int)
691 *
692 * Check whether the character is allowed by the production
693 * [87] CombiningChar ::= ... long list see REC ...
694 *
695 * Returns 0 if not, non-zero otherwise
696 */
697int
698xmlIsCombining(int c) {
699 return(
700 (((c) >= 0x300) && ( /* accelerator */
701 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
702 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
703 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
704 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
705 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
706 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
707 ((c) == 0x05BF) ||
708 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
709 ((c) == 0x05C4) ||
710 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
711 ((c) == 0x0670) ||
712 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
713 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
714 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
715 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
716 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
717 (((c) >= 0x0901) && ( /* accelerator */
718 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
719 ((c) == 0x093C) ||
720 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
721 ((c) == 0x094D) ||
722 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
723 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
724 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
725 ((c) == 0x09BC) ||
726 ((c) == 0x09BE) ||
727 ((c) == 0x09BF) ||
728 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
729 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
730 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
731 ((c) == 0x09D7) ||
732 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
733 (((c) >= 0x0A02) && ( /* accelerator */
734 ((c) == 0x0A02) ||
735 ((c) == 0x0A3C) ||
736 ((c) == 0x0A3E) ||
737 ((c) == 0x0A3F) ||
738 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
739 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
740 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
741 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
742 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
743 ((c) == 0x0ABC) ||
744 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
745 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
746 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
747 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
748 ((c) == 0x0B3C) ||
749 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
750 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
751 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
752 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
753 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
754 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
755 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
756 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
757 ((c) == 0x0BD7) ||
758 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
759 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
760 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
761 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
762 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
763 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
764 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
765 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
766 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
767 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
768 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
769 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
770 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
771 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
772 ((c) == 0x0D57) ||
773 (((c) >= 0x0E31) && ( /* accelerator */
774 ((c) == 0x0E31) ||
775 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
776 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
777 ((c) == 0x0EB1) ||
778 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
779 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
780 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
781 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
782 ((c) == 0x0F35) ||
783 ((c) == 0x0F37) ||
784 ((c) == 0x0F39) ||
785 ((c) == 0x0F3E) ||
786 ((c) == 0x0F3F) ||
787 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
788 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
789 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
790 ((c) == 0x0F97) ||
791 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
792 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
793 ((c) == 0x0FB9) ||
794 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
795 ((c) == 0x20E1) ||
796 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
797 ((c) == 0x3099) ||
798 ((c) == 0x309A))))))))));
799}
800
801/**
802 * xmlIsExtender:
803 * @c: an unicode character (int)
804 *
805 * Check whether the character is allowed by the production
806 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
807 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
808 * [#x309D-#x309E] | [#x30FC-#x30FE]
809 *
810 * Returns 0 if not, non-zero otherwise
811 */
812int
813xmlIsExtender(int c) {
814 switch (c) {
815 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
816 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
817 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
818 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
819 case 0x30FE:
820 return 1;
821 default:
822 return 0;
823 }
824}
825
826/**
827 * xmlIsIdeographic:
828 * @c: an unicode character (int)
829 *
830 * Check whether the character is allowed by the production
831 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
832 *
833 * Returns 0 if not, non-zero otherwise
834 */
835int
836xmlIsIdeographic(int c) {
837 return(((c) < 0x0100) ? 0 :
838 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
839 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
840 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
841 ((c) == 0x3007));
842}
843
844/**
845 * xmlIsLetter:
846 * @c: an unicode character (int)
847 *
848 * Check whether the character is allowed by the production
849 * [84] Letter ::= BaseChar | Ideographic
850 *
851 * Returns 0 if not, non-zero otherwise
852 */
853int
854xmlIsLetter(int c) {
855 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
856}
857
858/**
859 * xmlIsPubidChar:
860 * @c: an unicode character (int)
861 *
862 * Check whether the character is allowed by the production
863 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
864 *
865 * Returns 0 if not, non-zero otherwise
866 */
867int
868xmlIsPubidChar(int c) {
869 return(
870 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
871 (((c) >= 'a') && ((c) <= 'z')) ||
872 (((c) >= 'A') && ((c) <= 'Z')) ||
873 (((c) >= '0') && ((c) <= '9')) ||
874 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
875 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
876 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
877 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
878 ((c) == '$') || ((c) == '_') || ((c) == '%'));
879}
880
881/************************************************************************
882 * *
883 * Input handling functions for progressive parsing *
884 * *
885 ************************************************************************/
886
887/* #define DEBUG_INPUT */
888/* #define DEBUG_STACK */
889/* #define DEBUG_PUSH */
890
891
892/* we need to keep enough input to show errors in context */
893#define LINE_LEN 80
894
895#ifdef DEBUG_INPUT
896#define CHECK_BUFFER(in) check_buffer(in)
897
898void check_buffer(xmlParserInputPtr in) {
899 if (in->base != in->buf->buffer->content) {
900 xmlGenericError(xmlGenericErrorContext,
901 "xmlParserInput: base mismatch problem\n");
902 }
903 if (in->cur < in->base) {
904 xmlGenericError(xmlGenericErrorContext,
905 "xmlParserInput: cur < base problem\n");
906 }
907 if (in->cur > in->base + in->buf->buffer->use) {
908 xmlGenericError(xmlGenericErrorContext,
909 "xmlParserInput: cur > base + use problem\n");
910 }
911 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
912 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
913 in->buf->buffer->use, in->buf->buffer->size);
914}
915
916#else
917#define CHECK_BUFFER(in)
918#endif
919
920
921/**
922 * xmlParserInputRead:
923 * @in: an XML parser input
924 * @len: an indicative size for the lookahead
925 *
926 * This function refresh the input for the parser. It doesn't try to
927 * preserve pointers to the input buffer, and discard already read data
928 *
929 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
930 * end of this entity
931 */
932int
933xmlParserInputRead(xmlParserInputPtr in, int len) {
934 int ret;
935 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000936 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000937
938#ifdef DEBUG_INPUT
939 xmlGenericError(xmlGenericErrorContext, "Read\n");
940#endif
941 if (in->buf == NULL) return(-1);
942 if (in->base == NULL) return(-1);
943 if (in->cur == NULL) return(-1);
944 if (in->buf->buffer == NULL) return(-1);
945 if (in->buf->readcallback == NULL) return(-1);
946
947 CHECK_BUFFER(in);
948
949 used = in->cur - in->buf->buffer->content;
950 ret = xmlBufferShrink(in->buf->buffer, used);
951 if (ret > 0) {
952 in->cur -= ret;
953 in->consumed += ret;
954 }
955 ret = xmlParserInputBufferRead(in->buf, len);
956 if (in->base != in->buf->buffer->content) {
957 /*
958 * the buffer has been realloced
959 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000960 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000961 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000962 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000963 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000964 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000965
966 CHECK_BUFFER(in);
967
968 return(ret);
969}
970
971/**
972 * xmlParserInputGrow:
973 * @in: an XML parser input
974 * @len: an indicative size for the lookahead
975 *
976 * This function increase the input for the parser. It tries to
977 * preserve pointers to the input buffer, and keep already read data
978 *
979 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
980 * end of this entity
981 */
982int
983xmlParserInputGrow(xmlParserInputPtr in, int len) {
984 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000985 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000986
987#ifdef DEBUG_INPUT
988 xmlGenericError(xmlGenericErrorContext, "Grow\n");
989#endif
990 if (in->buf == NULL) return(-1);
991 if (in->base == NULL) return(-1);
992 if (in->cur == NULL) return(-1);
993 if (in->buf->buffer == NULL) return(-1);
994
995 CHECK_BUFFER(in);
996
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000997 indx = in->cur - in->base;
998 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000999
1000 CHECK_BUFFER(in);
1001
1002 return(0);
1003 }
1004 if (in->buf->readcallback != NULL)
1005 ret = xmlParserInputBufferGrow(in->buf, len);
1006 else
1007 return(0);
1008
1009 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001010 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001011 * block, but we use it really as an integer to do some
1012 * pointer arithmetic. Insure will raise it as a bug but in
1013 * that specific case, that's not !
1014 */
1015 if (in->base != in->buf->buffer->content) {
1016 /*
1017 * the buffer has been realloced
1018 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001019 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001020 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001021 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001022 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001023 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001024
1025 CHECK_BUFFER(in);
1026
1027 return(ret);
1028}
1029
1030/**
1031 * xmlParserInputShrink:
1032 * @in: an XML parser input
1033 *
1034 * This function removes used input for the parser.
1035 */
1036void
1037xmlParserInputShrink(xmlParserInputPtr in) {
1038 int used;
1039 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001040 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001041
1042#ifdef DEBUG_INPUT
1043 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1044#endif
1045 if (in->buf == NULL) return;
1046 if (in->base == NULL) return;
1047 if (in->cur == NULL) return;
1048 if (in->buf->buffer == NULL) return;
1049
1050 CHECK_BUFFER(in);
1051
1052 used = in->cur - in->buf->buffer->content;
1053 /*
1054 * Do not shrink on large buffers whose only a tiny fraction
1055 * was consumned
1056 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001057 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001058 return;
1059 if (used > INPUT_CHUNK) {
1060 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1061 if (ret > 0) {
1062 in->cur -= ret;
1063 in->consumed += ret;
1064 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001065 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001066 }
1067
1068 CHECK_BUFFER(in);
1069
1070 if (in->buf->buffer->use > INPUT_CHUNK) {
1071 return;
1072 }
1073 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1074 if (in->base != in->buf->buffer->content) {
1075 /*
1076 * the buffer has been realloced
1077 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001078 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001079 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001080 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001081 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001082 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001083
1084 CHECK_BUFFER(in);
1085}
1086
1087/************************************************************************
1088 * *
1089 * UTF8 character input and related functions *
1090 * *
1091 ************************************************************************/
1092
1093/**
1094 * xmlNextChar:
1095 * @ctxt: the XML parser context
1096 *
1097 * Skip to the next char input char.
1098 */
1099
1100void
1101xmlNextChar(xmlParserCtxtPtr ctxt) {
1102 if (ctxt->instate == XML_PARSER_EOF)
1103 return;
1104
1105 /*
1106 * 2.11 End-of-Line Handling
1107 * the literal two-character sequence "#xD#xA" or a standalone
1108 * literal #xD, an XML processor must pass to the application
1109 * the single character #xA.
1110 */
1111 if (ctxt->token != 0) ctxt->token = 0;
1112 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1113 if ((*ctxt->input->cur == 0) &&
1114 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1115 (ctxt->instate != XML_PARSER_COMMENT)) {
1116 /*
1117 * If we are at the end of the current entity and
1118 * the context allows it, we pop consumed entities
1119 * automatically.
1120 * the auto closing should be blocked in other cases
1121 */
1122 xmlPopInput(ctxt);
1123 } else {
1124 if (*(ctxt->input->cur) == '\n') {
1125 ctxt->input->line++; ctxt->input->col = 1;
1126 } else ctxt->input->col++;
1127 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1128 /*
1129 * We are supposed to handle UTF8, check it's valid
1130 * From rfc2044: encoding of the Unicode values on UTF-8:
1131 *
1132 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1133 * 0000 0000-0000 007F 0xxxxxxx
1134 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1135 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1136 *
1137 * Check for the 0x110000 limit too
1138 */
1139 const unsigned char *cur = ctxt->input->cur;
1140 unsigned char c;
1141
1142 c = *cur;
1143 if (c & 0x80) {
1144 if (cur[1] == 0)
1145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1146 if ((cur[1] & 0xc0) != 0x80)
1147 goto encoding_error;
1148 if ((c & 0xe0) == 0xe0) {
1149 unsigned int val;
1150
1151 if (cur[2] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if ((cur[2] & 0xc0) != 0x80)
1154 goto encoding_error;
1155 if ((c & 0xf0) == 0xf0) {
1156 if (cur[3] == 0)
1157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1158 if (((c & 0xf8) != 0xf0) ||
1159 ((cur[3] & 0xc0) != 0x80))
1160 goto encoding_error;
1161 /* 4-byte code */
1162 ctxt->input->cur += 4;
1163 val = (cur[0] & 0x7) << 18;
1164 val |= (cur[1] & 0x3f) << 12;
1165 val |= (cur[2] & 0x3f) << 6;
1166 val |= cur[3] & 0x3f;
1167 } else {
1168 /* 3-byte code */
1169 ctxt->input->cur += 3;
1170 val = (cur[0] & 0xf) << 12;
1171 val |= (cur[1] & 0x3f) << 6;
1172 val |= cur[2] & 0x3f;
1173 }
1174 if (((val > 0xd7ff) && (val < 0xe000)) ||
1175 ((val > 0xfffd) && (val < 0x10000)) ||
1176 (val >= 0x110000)) {
1177 if ((ctxt->sax != NULL) &&
1178 (ctxt->sax->error != NULL))
1179 ctxt->sax->error(ctxt->userData,
1180 "Char 0x%X out of allowed range\n", val);
1181 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1182 ctxt->wellFormed = 0;
1183 ctxt->disableSAX = 1;
1184 }
1185 } else
1186 /* 2-byte code */
1187 ctxt->input->cur += 2;
1188 } else
1189 /* 1-byte code */
1190 ctxt->input->cur++;
1191 } else {
1192 /*
1193 * Assume it's a fixed lenght encoding (1) with
1194 * a compatibke encoding for the ASCII set, since
1195 * XML constructs only use < 128 chars
1196 */
1197 ctxt->input->cur++;
1198 }
1199 ctxt->nbChars++;
1200 if (*ctxt->input->cur == 0)
1201 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1202 }
1203 } else {
1204 ctxt->input->cur++;
1205 ctxt->nbChars++;
1206 if (*ctxt->input->cur == 0)
1207 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1208 }
1209 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1210 xmlParserHandlePEReference(ctxt);
1211 if ((*ctxt->input->cur == 0) &&
1212 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1213 xmlPopInput(ctxt);
1214 return;
1215encoding_error:
1216 /*
1217 * If we detect an UTF8 error that probably mean that the
1218 * input encoding didn't get properly advertized in the
1219 * declaration header. Report the error and switch the encoding
1220 * to ISO-Latin-1 (if you don't like this policy, just declare the
1221 * encoding !)
1222 */
1223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1224 ctxt->sax->error(ctxt->userData,
1225 "Input is not proper UTF-8, indicate encoding !\n");
1226 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1227 ctxt->input->cur[0], ctxt->input->cur[1],
1228 ctxt->input->cur[2], ctxt->input->cur[3]);
1229 }
1230 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1231
1232 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1233 ctxt->input->cur++;
1234 return;
1235}
1236
1237/**
1238 * xmlCurrentChar:
1239 * @ctxt: the XML parser context
1240 * @len: pointer to the length of the char read
1241 *
1242 * The current char value, if using UTF-8 this may actaully span multiple
1243 * bytes in the input buffer. Implement the end of line normalization:
1244 * 2.11 End-of-Line Handling
1245 * Wherever an external parsed entity or the literal entity value
1246 * of an internal parsed entity contains either the literal two-character
1247 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1248 * must pass to the application the single character #xA.
1249 * This behavior can conveniently be produced by normalizing all
1250 * line breaks to #xA on input, before parsing.)
1251 *
1252 * Returns the current char value and its lenght
1253 */
1254
1255int
1256xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1257 if (ctxt->instate == XML_PARSER_EOF)
1258 return(0);
1259
1260 if (ctxt->token != 0) {
1261 *len = 0;
1262 return(ctxt->token);
1263 }
1264 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1265 *len = 1;
1266 return((int) *ctxt->input->cur);
1267 }
1268 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1269 /*
1270 * We are supposed to handle UTF8, check it's valid
1271 * From rfc2044: encoding of the Unicode values on UTF-8:
1272 *
1273 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1274 * 0000 0000-0000 007F 0xxxxxxx
1275 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1276 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1277 *
1278 * Check for the 0x110000 limit too
1279 */
1280 const unsigned char *cur = ctxt->input->cur;
1281 unsigned char c;
1282 unsigned int val;
1283
1284 c = *cur;
1285 if (c & 0x80) {
1286 if (cur[1] == 0)
1287 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1288 if ((cur[1] & 0xc0) != 0x80)
1289 goto encoding_error;
1290 if ((c & 0xe0) == 0xe0) {
1291
1292 if (cur[2] == 0)
1293 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1294 if ((cur[2] & 0xc0) != 0x80)
1295 goto encoding_error;
1296 if ((c & 0xf0) == 0xf0) {
1297 if (cur[3] == 0)
1298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1299 if (((c & 0xf8) != 0xf0) ||
1300 ((cur[3] & 0xc0) != 0x80))
1301 goto encoding_error;
1302 /* 4-byte code */
1303 *len = 4;
1304 val = (cur[0] & 0x7) << 18;
1305 val |= (cur[1] & 0x3f) << 12;
1306 val |= (cur[2] & 0x3f) << 6;
1307 val |= cur[3] & 0x3f;
1308 } else {
1309 /* 3-byte code */
1310 *len = 3;
1311 val = (cur[0] & 0xf) << 12;
1312 val |= (cur[1] & 0x3f) << 6;
1313 val |= cur[2] & 0x3f;
1314 }
1315 } else {
1316 /* 2-byte code */
1317 *len = 2;
1318 val = (cur[0] & 0x1f) << 6;
1319 val |= cur[1] & 0x3f;
1320 }
1321 if (!IS_CHAR(val)) {
1322 if ((ctxt->sax != NULL) &&
1323 (ctxt->sax->error != NULL))
1324 ctxt->sax->error(ctxt->userData,
1325 "Char 0x%X out of allowed range\n", val);
1326 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1327 ctxt->wellFormed = 0;
1328 ctxt->disableSAX = 1;
1329 }
1330 return(val);
1331 } else {
1332 /* 1-byte code */
1333 *len = 1;
1334 if (*ctxt->input->cur == 0xD) {
1335 if (ctxt->input->cur[1] == 0xA) {
1336 ctxt->nbChars++;
1337 ctxt->input->cur++;
1338 }
1339 return(0xA);
1340 }
1341 return((int) *ctxt->input->cur);
1342 }
1343 }
1344 /*
1345 * Assume it's a fixed lenght encoding (1) with
1346 * a compatibke encoding for the ASCII set, since
1347 * XML constructs only use < 128 chars
1348 */
1349 *len = 1;
1350 if (*ctxt->input->cur == 0xD) {
1351 if (ctxt->input->cur[1] == 0xA) {
1352 ctxt->nbChars++;
1353 ctxt->input->cur++;
1354 }
1355 return(0xA);
1356 }
1357 return((int) *ctxt->input->cur);
1358encoding_error:
1359 /*
1360 * If we detect an UTF8 error that probably mean that the
1361 * input encoding didn't get properly advertized in the
1362 * declaration header. Report the error and switch the encoding
1363 * to ISO-Latin-1 (if you don't like this policy, just declare the
1364 * encoding !)
1365 */
1366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1367 ctxt->sax->error(ctxt->userData,
1368 "Input is not proper UTF-8, indicate encoding !\n");
1369 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1370 ctxt->input->cur[0], ctxt->input->cur[1],
1371 ctxt->input->cur[2], ctxt->input->cur[3]);
1372 }
1373 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1374
1375 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1376 *len = 1;
1377 return((int) *ctxt->input->cur);
1378}
1379
1380/**
1381 * xmlStringCurrentChar:
1382 * @ctxt: the XML parser context
1383 * @cur: pointer to the beginning of the char
1384 * @len: pointer to the length of the char read
1385 *
1386 * The current char value, if using UTF-8 this may actaully span multiple
1387 * bytes in the input buffer.
1388 *
1389 * Returns the current char value and its lenght
1390 */
1391
1392int
1393xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1394 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1395 /*
1396 * We are supposed to handle UTF8, check it's valid
1397 * From rfc2044: encoding of the Unicode values on UTF-8:
1398 *
1399 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1400 * 0000 0000-0000 007F 0xxxxxxx
1401 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1402 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1403 *
1404 * Check for the 0x110000 limit too
1405 */
1406 unsigned char c;
1407 unsigned int val;
1408
1409 c = *cur;
1410 if (c & 0x80) {
1411 if ((cur[1] & 0xc0) != 0x80)
1412 goto encoding_error;
1413 if ((c & 0xe0) == 0xe0) {
1414
1415 if ((cur[2] & 0xc0) != 0x80)
1416 goto encoding_error;
1417 if ((c & 0xf0) == 0xf0) {
1418 if (((c & 0xf8) != 0xf0) ||
1419 ((cur[3] & 0xc0) != 0x80))
1420 goto encoding_error;
1421 /* 4-byte code */
1422 *len = 4;
1423 val = (cur[0] & 0x7) << 18;
1424 val |= (cur[1] & 0x3f) << 12;
1425 val |= (cur[2] & 0x3f) << 6;
1426 val |= cur[3] & 0x3f;
1427 } else {
1428 /* 3-byte code */
1429 *len = 3;
1430 val = (cur[0] & 0xf) << 12;
1431 val |= (cur[1] & 0x3f) << 6;
1432 val |= cur[2] & 0x3f;
1433 }
1434 } else {
1435 /* 2-byte code */
1436 *len = 2;
1437 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001438 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001439 }
1440 if (!IS_CHAR(val)) {
1441 if ((ctxt->sax != NULL) &&
1442 (ctxt->sax->error != NULL))
1443 ctxt->sax->error(ctxt->userData,
1444 "Char 0x%X out of allowed range\n", val);
1445 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1446 ctxt->wellFormed = 0;
1447 ctxt->disableSAX = 1;
1448 }
1449 return(val);
1450 } else {
1451 /* 1-byte code */
1452 *len = 1;
1453 return((int) *cur);
1454 }
1455 }
1456 /*
1457 * Assume it's a fixed lenght encoding (1) with
1458 * a compatibke encoding for the ASCII set, since
1459 * XML constructs only use < 128 chars
1460 */
1461 *len = 1;
1462 return((int) *cur);
1463encoding_error:
1464 /*
1465 * If we detect an UTF8 error that probably mean that the
1466 * input encoding didn't get properly advertized in the
1467 * declaration header. Report the error and switch the encoding
1468 * to ISO-Latin-1 (if you don't like this policy, just declare the
1469 * encoding !)
1470 */
1471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1472 ctxt->sax->error(ctxt->userData,
1473 "Input is not proper UTF-8, indicate encoding !\n");
1474 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1475 ctxt->input->cur[0], ctxt->input->cur[1],
1476 ctxt->input->cur[2], ctxt->input->cur[3]);
1477 }
1478 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1479
1480 *len = 1;
1481 return((int) *cur);
1482}
1483
1484/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001485 * xmlCopyCharMultiByte:
1486 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001487 * @val: the char value
1488 *
1489 * append the char value in the array
1490 *
1491 * Returns the number of xmlChar written
1492 */
Owen Taylor3473f882001-02-23 17:55:21 +00001493int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001494xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001495 /*
1496 * We are supposed to handle UTF8, check it's valid
1497 * From rfc2044: encoding of the Unicode values on UTF-8:
1498 *
1499 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1500 * 0000 0000-0000 007F 0xxxxxxx
1501 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1502 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1503 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001504 if (val >= 0x80) {
1505 xmlChar *savedout = out;
1506 int bits;
1507 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1508 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1509 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1510 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 xmlGenericError(xmlGenericErrorContext,
1512 "Internal error, xmlCopyChar 0x%X out of bound\n",
1513 val);
1514 return(0);
1515 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001516 for ( ; bits >= 0; bits-= 6)
1517 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1518 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001519 }
1520 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001521 return 1;
1522}
1523
1524/**
1525 * xmlCopyChar:
1526 * @len: Ignored, compatibility
1527 * @out: pointer to an arry of xmlChar
1528 * @val: the char value
1529 *
1530 * append the char value in the array
1531 *
1532 * Returns the number of xmlChar written
1533 */
1534
1535int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001536xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001537 /* the len parameter is ignored */
1538 if (val >= 0x80) {
1539 return(xmlCopyCharMultiByte (out, val));
1540 }
1541 *out = (xmlChar) val;
1542 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001543}
1544
1545/************************************************************************
1546 * *
1547 * Commodity functions to switch encodings *
1548 * *
1549 ************************************************************************/
1550
1551/**
1552 * xmlSwitchEncoding:
1553 * @ctxt: the parser context
1554 * @enc: the encoding value (number)
1555 *
1556 * change the input functions when discovering the character encoding
1557 * of a given entity.
1558 *
1559 * Returns 0 in case of success, -1 otherwise
1560 */
1561int
1562xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1563{
1564 xmlCharEncodingHandlerPtr handler;
1565
1566 switch (enc) {
1567 case XML_CHAR_ENCODING_ERROR:
1568 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1570 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1571 ctxt->wellFormed = 0;
1572 ctxt->disableSAX = 1;
1573 break;
1574 case XML_CHAR_ENCODING_NONE:
1575 /* let's assume it's UTF-8 without the XML decl */
1576 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1577 return(0);
1578 case XML_CHAR_ENCODING_UTF8:
1579 /* default encoding, no conversion should be needed */
1580 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1581 return(0);
1582 default:
1583 break;
1584 }
1585 handler = xmlGetCharEncodingHandler(enc);
1586 if (handler == NULL) {
1587 /*
1588 * Default handlers.
1589 */
1590 switch (enc) {
1591 case XML_CHAR_ENCODING_ERROR:
1592 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1594 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1595 ctxt->wellFormed = 0;
1596 ctxt->disableSAX = 1;
1597 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1598 break;
1599 case XML_CHAR_ENCODING_NONE:
1600 /* let's assume it's UTF-8 without the XML decl */
1601 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1602 return(0);
1603 case XML_CHAR_ENCODING_UTF8:
1604 case XML_CHAR_ENCODING_ASCII:
1605 /* default encoding, no conversion should be needed */
1606 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1607 return(0);
1608 case XML_CHAR_ENCODING_UTF16LE:
1609 break;
1610 case XML_CHAR_ENCODING_UTF16BE:
1611 break;
1612 case XML_CHAR_ENCODING_UCS4LE:
1613 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1615 ctxt->sax->error(ctxt->userData,
1616 "char encoding USC4 little endian not supported\n");
1617 break;
1618 case XML_CHAR_ENCODING_UCS4BE:
1619 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1621 ctxt->sax->error(ctxt->userData,
1622 "char encoding USC4 big endian not supported\n");
1623 break;
1624 case XML_CHAR_ENCODING_EBCDIC:
1625 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1627 ctxt->sax->error(ctxt->userData,
1628 "char encoding EBCDIC not supported\n");
1629 break;
1630 case XML_CHAR_ENCODING_UCS4_2143:
1631 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1633 ctxt->sax->error(ctxt->userData,
1634 "char encoding UCS4 2143 not supported\n");
1635 break;
1636 case XML_CHAR_ENCODING_UCS4_3412:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding UCS4 3412 not supported\n");
1641 break;
1642 case XML_CHAR_ENCODING_UCS2:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding UCS2 not supported\n");
1647 break;
1648 case XML_CHAR_ENCODING_8859_1:
1649 case XML_CHAR_ENCODING_8859_2:
1650 case XML_CHAR_ENCODING_8859_3:
1651 case XML_CHAR_ENCODING_8859_4:
1652 case XML_CHAR_ENCODING_8859_5:
1653 case XML_CHAR_ENCODING_8859_6:
1654 case XML_CHAR_ENCODING_8859_7:
1655 case XML_CHAR_ENCODING_8859_8:
1656 case XML_CHAR_ENCODING_8859_9:
1657 /*
1658 * We used to keep the internal content in the
1659 * document encoding however this turns being unmaintainable
1660 * So xmlGetCharEncodingHandler() will return non-null
1661 * values for this now.
1662 */
1663 if ((ctxt->inputNr == 1) &&
1664 (ctxt->encoding == NULL) &&
1665 (ctxt->input->encoding != NULL)) {
1666 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1667 }
1668 ctxt->charset = enc;
1669 return(0);
1670 case XML_CHAR_ENCODING_2022_JP:
1671 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1673 ctxt->sax->error(ctxt->userData,
1674 "char encoding ISO-2022-JPnot supported\n");
1675 break;
1676 case XML_CHAR_ENCODING_SHIFT_JIS:
1677 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1679 ctxt->sax->error(ctxt->userData,
1680 "char encoding Shift_JIS not supported\n");
1681 break;
1682 case XML_CHAR_ENCODING_EUC_JP:
1683 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1685 ctxt->sax->error(ctxt->userData,
1686 "char encoding EUC-JPnot supported\n");
1687 break;
1688 }
1689 }
1690 if (handler == NULL)
1691 return(-1);
1692 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1693 return(xmlSwitchToEncoding(ctxt, handler));
1694}
1695
1696/**
1697 * xmlSwitchToEncoding:
1698 * @ctxt: the parser context
1699 * @handler: the encoding handler
1700 *
1701 * change the input functions when discovering the character encoding
1702 * of a given entity.
1703 *
1704 * Returns 0 in case of success, -1 otherwise
1705 */
1706int
1707xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1708{
1709 int nbchars;
1710
1711 if (handler != NULL) {
1712 if (ctxt->input != NULL) {
1713 if (ctxt->input->buf != NULL) {
1714 if (ctxt->input->buf->encoder != NULL) {
1715 if (ctxt->input->buf->encoder == handler)
1716 return(0);
1717 /*
1718 * Note: this is a bit dangerous, but that's what it
1719 * takes to use nearly compatible signature for different
1720 * encodings.
1721 */
1722 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1723 ctxt->input->buf->encoder = handler;
1724 return(0);
1725 }
1726 ctxt->input->buf->encoder = handler;
1727
1728 /*
1729 * Is there already some content down the pipe to convert ?
1730 */
1731 if ((ctxt->input->buf->buffer != NULL) &&
1732 (ctxt->input->buf->buffer->use > 0)) {
1733 int processed;
1734
1735 /*
1736 * Specific handling of the Byte Order Mark for
1737 * UTF-16
1738 */
1739 if ((handler->name != NULL) &&
1740 (!strcmp(handler->name, "UTF-16LE")) &&
1741 (ctxt->input->cur[0] == 0xFF) &&
1742 (ctxt->input->cur[1] == 0xFE)) {
1743 ctxt->input->cur += 2;
1744 }
1745 if ((handler->name != NULL) &&
1746 (!strcmp(handler->name, "UTF-16BE")) &&
1747 (ctxt->input->cur[0] == 0xFE) &&
1748 (ctxt->input->cur[1] == 0xFF)) {
1749 ctxt->input->cur += 2;
1750 }
1751
1752 /*
1753 * Shring the current input buffer.
1754 * Move it as the raw buffer and create a new input buffer
1755 */
1756 processed = ctxt->input->cur - ctxt->input->base;
1757 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1758 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1759 ctxt->input->buf->buffer = xmlBufferCreate();
1760
1761 if (ctxt->html) {
1762 /*
1763 * converst as much as possbile of the buffer
1764 */
1765 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1766 ctxt->input->buf->buffer,
1767 ctxt->input->buf->raw);
1768 } else {
1769 /*
1770 * convert just enough to get
1771 * '<?xml version="1.0" encoding="xxx"?>'
1772 * parsed with the autodetected encoding
1773 * into the parser reading buffer.
1774 */
1775 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1776 ctxt->input->buf->buffer,
1777 ctxt->input->buf->raw);
1778 }
1779 if (nbchars < 0) {
1780 xmlGenericError(xmlGenericErrorContext,
1781 "xmlSwitchToEncoding: encoder error\n");
1782 return(-1);
1783 }
1784 ctxt->input->base =
1785 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001786 ctxt->input->end =
1787 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001788
1789 }
1790 return(0);
1791 } else {
1792 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1793 /*
1794 * When parsing a static memory array one must know the
1795 * size to be able to convert the buffer.
1796 */
1797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798 ctxt->sax->error(ctxt->userData,
1799 "xmlSwitchEncoding : no input\n");
1800 return(-1);
1801 } else {
1802 int processed;
1803
1804 /*
1805 * Shring the current input buffer.
1806 * Move it as the raw buffer and create a new input buffer
1807 */
1808 processed = ctxt->input->cur - ctxt->input->base;
1809
1810 ctxt->input->buf->raw = xmlBufferCreate();
1811 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1812 ctxt->input->length - processed);
1813 ctxt->input->buf->buffer = xmlBufferCreate();
1814
1815 /*
1816 * convert as much as possible of the raw input
1817 * to the parser reading buffer.
1818 */
1819 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1820 ctxt->input->buf->buffer,
1821 ctxt->input->buf->raw);
1822 if (nbchars < 0) {
1823 xmlGenericError(xmlGenericErrorContext,
1824 "xmlSwitchToEncoding: encoder error\n");
1825 return(-1);
1826 }
1827
1828 /*
1829 * Conversion succeeded, get rid of the old buffer
1830 */
1831 if ((ctxt->input->free != NULL) &&
1832 (ctxt->input->base != NULL))
1833 ctxt->input->free((xmlChar *) ctxt->input->base);
1834 ctxt->input->base =
1835 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001836 ctxt->input->end =
1837 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001838 }
1839 }
1840 } else {
1841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1842 ctxt->sax->error(ctxt->userData,
1843 "xmlSwitchEncoding : no input\n");
1844 return(-1);
1845 }
1846 /*
1847 * The parsing is now done in UTF8 natively
1848 */
1849 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1850 } else
1851 return(-1);
1852 return(0);
1853
1854}
1855
1856/************************************************************************
1857 * *
1858 * Commodity functions to handle entities processing *
1859 * *
1860 ************************************************************************/
1861
1862/**
1863 * xmlFreeInputStream:
1864 * @input: an xmlParserInputPtr
1865 *
1866 * Free up an input stream.
1867 */
1868void
1869xmlFreeInputStream(xmlParserInputPtr input) {
1870 if (input == NULL) return;
1871
1872 if (input->filename != NULL) xmlFree((char *) input->filename);
1873 if (input->directory != NULL) xmlFree((char *) input->directory);
1874 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1875 if (input->version != NULL) xmlFree((char *) input->version);
1876 if ((input->free != NULL) && (input->base != NULL))
1877 input->free((xmlChar *) input->base);
1878 if (input->buf != NULL)
1879 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001880 xmlFree(input);
1881}
1882
1883/**
1884 * xmlNewInputStream:
1885 * @ctxt: an XML parser context
1886 *
1887 * Create a new input stream structure
1888 * Returns the new input stream or NULL
1889 */
1890xmlParserInputPtr
1891xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1892 xmlParserInputPtr input;
1893
1894 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1895 if (input == NULL) {
1896 if (ctxt != NULL) {
1897 ctxt->errNo = XML_ERR_NO_MEMORY;
1898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1899 ctxt->sax->error(ctxt->userData,
1900 "malloc: couldn't allocate a new input stream\n");
1901 ctxt->errNo = XML_ERR_NO_MEMORY;
1902 }
1903 return(NULL);
1904 }
1905 memset(input, 0, sizeof(xmlParserInput));
1906 input->line = 1;
1907 input->col = 1;
1908 input->standalone = -1;
1909 return(input);
1910}
1911
1912/**
1913 * xmlNewIOInputStream:
1914 * @ctxt: an XML parser context
1915 * @input: an I/O Input
1916 * @enc: the charset encoding if known
1917 *
1918 * Create a new input stream structure encapsulating the @input into
1919 * a stream suitable for the parser.
1920 *
1921 * Returns the new input stream or NULL
1922 */
1923xmlParserInputPtr
1924xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1925 xmlCharEncoding enc) {
1926 xmlParserInputPtr inputStream;
1927
1928 if (xmlParserDebugEntities)
1929 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1930 inputStream = xmlNewInputStream(ctxt);
1931 if (inputStream == NULL) {
1932 return(NULL);
1933 }
1934 inputStream->filename = NULL;
1935 inputStream->buf = input;
1936 inputStream->base = inputStream->buf->buffer->content;
1937 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001938 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 if (enc != XML_CHAR_ENCODING_NONE) {
1940 xmlSwitchEncoding(ctxt, enc);
1941 }
1942
1943 return(inputStream);
1944}
1945
1946/**
1947 * xmlNewEntityInputStream:
1948 * @ctxt: an XML parser context
1949 * @entity: an Entity pointer
1950 *
1951 * Create a new input stream based on an xmlEntityPtr
1952 *
1953 * Returns the new input stream or NULL
1954 */
1955xmlParserInputPtr
1956xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1957 xmlParserInputPtr input;
1958
1959 if (entity == NULL) {
1960 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1962 ctxt->sax->error(ctxt->userData,
1963 "internal: xmlNewEntityInputStream entity = NULL\n");
1964 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1965 return(NULL);
1966 }
1967 if (xmlParserDebugEntities)
1968 xmlGenericError(xmlGenericErrorContext,
1969 "new input from entity: %s\n", entity->name);
1970 if (entity->content == NULL) {
1971 switch (entity->etype) {
1972 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1973 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975 ctxt->sax->error(ctxt->userData,
1976 "xmlNewEntityInputStream unparsed entity !\n");
1977 break;
1978 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1979 case XML_EXTERNAL_PARAMETER_ENTITY:
1980 return(xmlLoadExternalEntity((char *) entity->URI,
1981 (char *) entity->ExternalID, ctxt));
1982 case XML_INTERNAL_GENERAL_ENTITY:
1983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1984 ctxt->sax->error(ctxt->userData,
1985 "Internal entity %s without content !\n", entity->name);
1986 break;
1987 case XML_INTERNAL_PARAMETER_ENTITY:
1988 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1990 ctxt->sax->error(ctxt->userData,
1991 "Internal parameter entity %s without content !\n", entity->name);
1992 break;
1993 case XML_INTERNAL_PREDEFINED_ENTITY:
1994 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1996 ctxt->sax->error(ctxt->userData,
1997 "Predefined entity %s without content !\n", entity->name);
1998 break;
1999 }
2000 return(NULL);
2001 }
2002 input = xmlNewInputStream(ctxt);
2003 if (input == NULL) {
2004 return(NULL);
2005 }
2006 input->filename = (char *) entity->URI;
2007 input->base = entity->content;
2008 input->cur = entity->content;
2009 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002010 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002011 return(input);
2012}
2013
2014/**
2015 * xmlNewStringInputStream:
2016 * @ctxt: an XML parser context
2017 * @buffer: an memory buffer
2018 *
2019 * Create a new input stream based on a memory buffer.
2020 * Returns the new input stream
2021 */
2022xmlParserInputPtr
2023xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2024 xmlParserInputPtr input;
2025
2026 if (buffer == NULL) {
2027 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029 ctxt->sax->error(ctxt->userData,
2030 "internal: xmlNewStringInputStream string = NULL\n");
2031 return(NULL);
2032 }
2033 if (xmlParserDebugEntities)
2034 xmlGenericError(xmlGenericErrorContext,
2035 "new fixed input: %.30s\n", buffer);
2036 input = xmlNewInputStream(ctxt);
2037 if (input == NULL) {
2038 return(NULL);
2039 }
2040 input->base = buffer;
2041 input->cur = buffer;
2042 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002043 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002044 return(input);
2045}
2046
2047/**
2048 * xmlNewInputFromFile:
2049 * @ctxt: an XML parser context
2050 * @filename: the filename to use as entity
2051 *
2052 * Create a new input stream based on a file.
2053 *
2054 * Returns the new input stream or NULL in case of error
2055 */
2056xmlParserInputPtr
2057xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2058 xmlParserInputBufferPtr buf;
2059 xmlParserInputPtr inputStream;
2060 char *directory = NULL;
2061 xmlChar *URI = NULL;
2062
2063 if (xmlParserDebugEntities)
2064 xmlGenericError(xmlGenericErrorContext,
2065 "new input from file: %s\n", filename);
2066 if (ctxt == NULL) return(NULL);
2067 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2068 if (buf == NULL)
2069 return(NULL);
2070
2071 URI = xmlStrdup((xmlChar *) filename);
2072 directory = xmlParserGetDirectory((const char *) URI);
2073
2074 inputStream = xmlNewInputStream(ctxt);
2075 if (inputStream == NULL) {
2076 if (directory != NULL) xmlFree((char *) directory);
2077 if (URI != NULL) xmlFree((char *) URI);
2078 return(NULL);
2079 }
2080
2081 inputStream->filename = (const char *) URI;
2082 inputStream->directory = directory;
2083 inputStream->buf = buf;
2084
2085 inputStream->base = inputStream->buf->buffer->content;
2086 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002087 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002088 if ((ctxt->directory == NULL) && (directory != NULL))
2089 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2090 return(inputStream);
2091}
2092
2093/************************************************************************
2094 * *
2095 * Commodity functions to handle parser contexts *
2096 * *
2097 ************************************************************************/
2098
2099/**
2100 * xmlInitParserCtxt:
2101 * @ctxt: an XML parser context
2102 *
2103 * Initialize a parser context
2104 */
2105
2106void
2107xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2108{
2109 xmlSAXHandler *sax;
2110
2111 xmlDefaultSAXHandlerInit();
2112
2113 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2114 if (sax == NULL) {
2115 xmlGenericError(xmlGenericErrorContext,
2116 "xmlInitParserCtxt: out of memory\n");
2117 }
2118 else
2119 memset(sax, 0, sizeof(xmlSAXHandler));
2120
2121 /* Allocate the Input stack */
2122 ctxt->inputTab = (xmlParserInputPtr *)
2123 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2124 if (ctxt->inputTab == NULL) {
2125 xmlGenericError(xmlGenericErrorContext,
2126 "xmlInitParserCtxt: out of memory\n");
2127 ctxt->inputNr = 0;
2128 ctxt->inputMax = 0;
2129 ctxt->input = NULL;
2130 return;
2131 }
2132 ctxt->inputNr = 0;
2133 ctxt->inputMax = 5;
2134 ctxt->input = NULL;
2135
2136 ctxt->version = NULL;
2137 ctxt->encoding = NULL;
2138 ctxt->standalone = -1;
2139 ctxt->hasExternalSubset = 0;
2140 ctxt->hasPErefs = 0;
2141 ctxt->html = 0;
2142 ctxt->external = 0;
2143 ctxt->instate = XML_PARSER_START;
2144 ctxt->token = 0;
2145 ctxt->directory = NULL;
2146
2147 /* Allocate the Node stack */
2148 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2149 if (ctxt->nodeTab == NULL) {
2150 xmlGenericError(xmlGenericErrorContext,
2151 "xmlInitParserCtxt: out of memory\n");
2152 ctxt->nodeNr = 0;
2153 ctxt->nodeMax = 0;
2154 ctxt->node = NULL;
2155 ctxt->inputNr = 0;
2156 ctxt->inputMax = 0;
2157 ctxt->input = NULL;
2158 return;
2159 }
2160 ctxt->nodeNr = 0;
2161 ctxt->nodeMax = 10;
2162 ctxt->node = NULL;
2163
2164 /* Allocate the Name stack */
2165 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2166 if (ctxt->nameTab == NULL) {
2167 xmlGenericError(xmlGenericErrorContext,
2168 "xmlInitParserCtxt: out of memory\n");
2169 ctxt->nodeNr = 0;
2170 ctxt->nodeMax = 0;
2171 ctxt->node = NULL;
2172 ctxt->inputNr = 0;
2173 ctxt->inputMax = 0;
2174 ctxt->input = NULL;
2175 ctxt->nameNr = 0;
2176 ctxt->nameMax = 0;
2177 ctxt->name = NULL;
2178 return;
2179 }
2180 ctxt->nameNr = 0;
2181 ctxt->nameMax = 10;
2182 ctxt->name = NULL;
2183
2184 /* Allocate the space stack */
2185 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2186 if (ctxt->spaceTab == NULL) {
2187 xmlGenericError(xmlGenericErrorContext,
2188 "xmlInitParserCtxt: out of memory\n");
2189 ctxt->nodeNr = 0;
2190 ctxt->nodeMax = 0;
2191 ctxt->node = NULL;
2192 ctxt->inputNr = 0;
2193 ctxt->inputMax = 0;
2194 ctxt->input = NULL;
2195 ctxt->nameNr = 0;
2196 ctxt->nameMax = 0;
2197 ctxt->name = NULL;
2198 ctxt->spaceNr = 0;
2199 ctxt->spaceMax = 0;
2200 ctxt->space = NULL;
2201 return;
2202 }
2203 ctxt->spaceNr = 1;
2204 ctxt->spaceMax = 10;
2205 ctxt->spaceTab[0] = -1;
2206 ctxt->space = &ctxt->spaceTab[0];
2207
Daniel Veillard14be0a12001-03-03 18:50:55 +00002208 ctxt->sax = sax;
2209 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2210
Owen Taylor3473f882001-02-23 17:55:21 +00002211 ctxt->userData = ctxt;
2212 ctxt->myDoc = NULL;
2213 ctxt->wellFormed = 1;
2214 ctxt->valid = 1;
2215 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2216 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2217 ctxt->pedantic = xmlPedanticParserDefaultValue;
2218 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2219 ctxt->vctxt.userData = ctxt;
2220 if (ctxt->validate) {
2221 ctxt->vctxt.error = xmlParserValidityError;
2222 if (xmlGetWarningsDefaultValue == 0)
2223 ctxt->vctxt.warning = NULL;
2224 else
2225 ctxt->vctxt.warning = xmlParserValidityWarning;
2226 /* Allocate the Node stack */
2227 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2228 if (ctxt->vctxt.nodeTab == NULL) {
2229 xmlGenericError(xmlGenericErrorContext,
2230 "xmlInitParserCtxt: out of memory\n");
2231 ctxt->vctxt.nodeMax = 0;
2232 ctxt->validate = 0;
2233 ctxt->vctxt.error = NULL;
2234 ctxt->vctxt.warning = NULL;
2235 } else {
2236 ctxt->vctxt.nodeNr = 0;
2237 ctxt->vctxt.nodeMax = 4;
2238 ctxt->vctxt.node = NULL;
2239 }
2240 } else {
2241 ctxt->vctxt.error = NULL;
2242 ctxt->vctxt.warning = NULL;
2243 }
2244 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2245 ctxt->record_info = 0;
2246 ctxt->nbChars = 0;
2247 ctxt->checkIndex = 0;
2248 ctxt->inSubset = 0;
2249 ctxt->errNo = XML_ERR_OK;
2250 ctxt->depth = 0;
2251 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2252 xmlInitNodeInfoSeq(&ctxt->node_seq);
2253}
2254
2255/**
2256 * xmlFreeParserCtxt:
2257 * @ctxt: an XML parser context
2258 *
2259 * Free all the memory used by a parser context. However the parsed
2260 * document in ctxt->myDoc is not freed.
2261 */
2262
2263void
2264xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2265{
2266 xmlParserInputPtr input;
2267 xmlChar *oldname;
2268
2269 if (ctxt == NULL) return;
2270
2271 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2272 xmlFreeInputStream(input);
2273 }
2274 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2275 xmlFree(oldname);
2276 }
2277 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2278 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2279 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2280 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2281 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2282 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2283 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2284 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2285 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2286 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2287 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2288 xmlFree(ctxt->sax);
2289 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2290 xmlFree(ctxt);
2291}
2292
2293/**
2294 * xmlNewParserCtxt:
2295 *
2296 * Allocate and initialize a new parser context.
2297 *
2298 * Returns the xmlParserCtxtPtr or NULL
2299 */
2300
2301xmlParserCtxtPtr
2302xmlNewParserCtxt()
2303{
2304 xmlParserCtxtPtr ctxt;
2305
2306 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2307 if (ctxt == NULL) {
2308 xmlGenericError(xmlGenericErrorContext,
2309 "xmlNewParserCtxt : cannot allocate context\n");
2310 perror("malloc");
2311 return(NULL);
2312 }
2313 memset(ctxt, 0, sizeof(xmlParserCtxt));
2314 xmlInitParserCtxt(ctxt);
2315 return(ctxt);
2316}
2317
2318/************************************************************************
2319 * *
2320 * Handling of node informations *
2321 * *
2322 ************************************************************************/
2323
2324/**
2325 * xmlClearParserCtxt:
2326 * @ctxt: an XML parser context
2327 *
2328 * Clear (release owned resources) and reinitialize a parser context
2329 */
2330
2331void
2332xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2333{
2334 xmlClearNodeInfoSeq(&ctxt->node_seq);
2335 xmlInitParserCtxt(ctxt);
2336}
2337
2338/**
2339 * xmlParserFindNodeInfo:
2340 * @ctxt: an XML parser context
2341 * @node: an XML node within the tree
2342 *
2343 * Find the parser node info struct for a given node
2344 *
2345 * Returns an xmlParserNodeInfo block pointer or NULL
2346 */
2347const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2348 const xmlNode* node)
2349{
2350 unsigned long pos;
2351
2352 /* Find position where node should be at */
2353 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2354 if ( ctx->node_seq.buffer[pos].node == node )
2355 return &ctx->node_seq.buffer[pos];
2356 else
2357 return NULL;
2358}
2359
2360
2361/**
2362 * xmlInitNodeInfoSeq:
2363 * @seq: a node info sequence pointer
2364 *
2365 * -- Initialize (set to initial state) node info sequence
2366 */
2367void
2368xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2369{
2370 seq->length = 0;
2371 seq->maximum = 0;
2372 seq->buffer = NULL;
2373}
2374
2375/**
2376 * xmlClearNodeInfoSeq:
2377 * @seq: a node info sequence pointer
2378 *
2379 * -- Clear (release memory and reinitialize) node
2380 * info sequence
2381 */
2382void
2383xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2384{
2385 if ( seq->buffer != NULL )
2386 xmlFree(seq->buffer);
2387 xmlInitNodeInfoSeq(seq);
2388}
2389
2390
2391/**
2392 * xmlParserFindNodeInfoIndex:
2393 * @seq: a node info sequence pointer
2394 * @node: an XML node pointer
2395 *
2396 *
2397 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2398 * the given node is or should be at in a sorted sequence
2399 *
2400 * Returns a long indicating the position of the record
2401 */
2402unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2403 const xmlNode* node)
2404{
2405 unsigned long upper, lower, middle;
2406 int found = 0;
2407
2408 /* Do a binary search for the key */
2409 lower = 1;
2410 upper = seq->length;
2411 middle = 0;
2412 while ( lower <= upper && !found) {
2413 middle = lower + (upper - lower) / 2;
2414 if ( node == seq->buffer[middle - 1].node )
2415 found = 1;
2416 else if ( node < seq->buffer[middle - 1].node )
2417 upper = middle - 1;
2418 else
2419 lower = middle + 1;
2420 }
2421
2422 /* Return position */
2423 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2424 return middle;
2425 else
2426 return middle - 1;
2427}
2428
2429
2430/**
2431 * xmlParserAddNodeInfo:
2432 * @ctxt: an XML parser context
2433 * @info: a node info sequence pointer
2434 *
2435 * Insert node info record into the sorted sequence
2436 */
2437void
2438xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2439 const xmlParserNodeInfo* info)
2440{
2441 unsigned long pos;
2442 static unsigned int block_size = 5;
2443
2444 /* Find pos and check to see if node is already in the sequence */
2445 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2446 if ( pos < ctxt->node_seq.length
2447 && ctxt->node_seq.buffer[pos].node == info->node ) {
2448 ctxt->node_seq.buffer[pos] = *info;
2449 }
2450
2451 /* Otherwise, we need to add new node to buffer */
2452 else {
2453 /* Expand buffer by 5 if needed */
2454 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2455 xmlParserNodeInfo* tmp_buffer;
2456 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2457 *(ctxt->node_seq.maximum + block_size));
2458
2459 if ( ctxt->node_seq.buffer == NULL )
2460 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2461 else
2462 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2463
2464 if ( tmp_buffer == NULL ) {
2465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2466 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2467 ctxt->errNo = XML_ERR_NO_MEMORY;
2468 return;
2469 }
2470 ctxt->node_seq.buffer = tmp_buffer;
2471 ctxt->node_seq.maximum += block_size;
2472 }
2473
2474 /* If position is not at end, move elements out of the way */
2475 if ( pos != ctxt->node_seq.length ) {
2476 unsigned long i;
2477
2478 for ( i = ctxt->node_seq.length; i > pos; i-- )
2479 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2480 }
2481
2482 /* Copy element and increase length */
2483 ctxt->node_seq.buffer[pos] = *info;
2484 ctxt->node_seq.length++;
2485 }
2486}
2487
2488/************************************************************************
2489 * *
2490 * Deprecated functions kept for compatibility *
2491 * *
2492 ************************************************************************/
2493
2494/*
2495 * xmlCheckLanguageID
2496 * @lang: pointer to the string value
2497 *
2498 * Checks that the value conforms to the LanguageID production:
2499 *
2500 * NOTE: this is somewhat deprecated, those productions were removed from
2501 * the XML Second edition.
2502 *
2503 * [33] LanguageID ::= Langcode ('-' Subcode)*
2504 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2505 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2506 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2507 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2508 * [38] Subcode ::= ([a-z] | [A-Z])+
2509 *
2510 * Returns 1 if correct 0 otherwise
2511 **/
2512int
2513xmlCheckLanguageID(const xmlChar *lang) {
2514 const xmlChar *cur = lang;
2515
2516 if (cur == NULL)
2517 return(0);
2518 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2519 ((cur[0] == 'I') && (cur[1] == '-'))) {
2520 /*
2521 * IANA code
2522 */
2523 cur += 2;
2524 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2525 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2526 cur++;
2527 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2528 ((cur[0] == 'X') && (cur[1] == '-'))) {
2529 /*
2530 * User code
2531 */
2532 cur += 2;
2533 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2534 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2535 cur++;
2536 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2537 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2538 /*
2539 * ISO639
2540 */
2541 cur++;
2542 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2543 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2544 cur++;
2545 else
2546 return(0);
2547 } else
2548 return(0);
2549 while (cur[0] != 0) { /* non input consuming */
2550 if (cur[0] != '-')
2551 return(0);
2552 cur++;
2553 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2554 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2555 cur++;
2556 else
2557 return(0);
2558 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2559 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2560 cur++;
2561 }
2562 return(1);
2563}
2564
2565/**
2566 * xmlDecodeEntities:
2567 * @ctxt: the parser context
2568 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2569 * @len: the len to decode (in bytes !), -1 for no size limit
2570 * @end: an end marker xmlChar, 0 if none
2571 * @end2: an end marker xmlChar, 0 if none
2572 * @end3: an end marker xmlChar, 0 if none
2573 *
2574 * This function is deprecated, we now always process entities content
2575 * through xmlStringDecodeEntities
2576 *
2577 * TODO: remove it in next major release.
2578 *
2579 * [67] Reference ::= EntityRef | CharRef
2580 *
2581 * [69] PEReference ::= '%' Name ';'
2582 *
2583 * Returns A newly allocated string with the substitution done. The caller
2584 * must deallocate it !
2585 */
2586xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002587xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2588 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002589#if 0
2590 xmlChar *buffer = NULL;
2591 unsigned int buffer_size = 0;
2592 unsigned int nbchars = 0;
2593
2594 xmlChar *current = NULL;
2595 xmlEntityPtr ent;
2596 unsigned int max = (unsigned int) len;
2597 int c,l;
2598#endif
2599
2600 static int deprecated = 0;
2601 if (!deprecated) {
2602 xmlGenericError(xmlGenericErrorContext,
2603 "xmlDecodeEntities() deprecated function reached\n");
2604 deprecated = 1;
2605 }
2606
2607#if 0
2608 if (ctxt->depth > 40) {
2609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2610 ctxt->sax->error(ctxt->userData,
2611 "Detected entity reference loop\n");
2612 ctxt->wellFormed = 0;
2613 ctxt->disableSAX = 1;
2614 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2615 return(NULL);
2616 }
2617
2618 /*
2619 * allocate a translation buffer.
2620 */
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2622 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2623 if (buffer == NULL) {
2624 perror("xmlDecodeEntities: malloc failed");
2625 return(NULL);
2626 }
2627
2628 /*
2629 * Ok loop until we reach one of the ending char or a size limit.
2630 */
2631 GROW;
2632 c = CUR_CHAR(l);
2633 while ((nbchars < max) && (c != end) && /* NOTUSED */
2634 (c != end2) && (c != end3)) {
2635 GROW;
2636 if (c == 0) break;
2637 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2638 int val = xmlParseCharRef(ctxt);
2639 COPY_BUF(0,buffer,nbchars,val);
2640 NEXTL(l);
2641 } else if ((c == '&') && (ctxt->token != '&') &&
2642 (what & XML_SUBSTITUTE_REF)) {
2643 if (xmlParserDebugEntities)
2644 xmlGenericError(xmlGenericErrorContext,
2645 "decoding Entity Reference\n");
2646 ent = xmlParseEntityRef(ctxt);
2647 if ((ent != NULL) &&
2648 (ctxt->replaceEntities != 0)) {
2649 current = ent->content;
2650 while (*current != 0) { /* non input consuming loop */
2651 buffer[nbchars++] = *current++;
2652 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2653 growBuffer(buffer);
2654 }
2655 }
2656 } else if (ent != NULL) {
2657 const xmlChar *cur = ent->name;
2658
2659 buffer[nbchars++] = '&';
2660 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2661 growBuffer(buffer);
2662 }
2663 while (*cur != 0) { /* non input consuming loop */
2664 buffer[nbchars++] = *cur++;
2665 }
2666 buffer[nbchars++] = ';';
2667 }
2668 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2669 /*
2670 * a PEReference induce to switch the entity flow,
2671 * we break here to flush the current set of chars
2672 * parsed if any. We will be called back later.
2673 */
2674 if (xmlParserDebugEntities)
2675 xmlGenericError(xmlGenericErrorContext,
2676 "decoding PE Reference\n");
2677 if (nbchars != 0) break;
2678
2679 xmlParsePEReference(ctxt);
2680
2681 /*
2682 * Pop-up of finished entities.
2683 */
2684 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2685 xmlPopInput(ctxt);
2686
2687 break;
2688 } else {
2689 COPY_BUF(l,buffer,nbchars,c);
2690 NEXTL(l);
2691 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2692 growBuffer(buffer);
2693 }
2694 }
2695 c = CUR_CHAR(l);
2696 }
2697 buffer[nbchars++] = 0;
2698 return(buffer);
2699#endif
2700 return(NULL);
2701}
2702
2703/**
2704 * xmlNamespaceParseNCName:
2705 * @ctxt: an XML parser context
2706 *
2707 * parse an XML namespace name.
2708 *
2709 * TODO: this seems not in use anymore, the namespace handling is done on
2710 * top of the SAX interfaces, i.e. not on raw input.
2711 *
2712 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2713 *
2714 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2715 * CombiningChar | Extender
2716 *
2717 * Returns the namespace name or NULL
2718 */
2719
2720xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002721xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002722#if 0
2723 xmlChar buf[XML_MAX_NAMELEN + 5];
2724 int len = 0, l;
2725 int cur = CUR_CHAR(l);
2726#endif
2727
2728 static int deprecated = 0;
2729 if (!deprecated) {
2730 xmlGenericError(xmlGenericErrorContext,
2731 "xmlNamespaceParseNCName() deprecated function reached\n");
2732 deprecated = 1;
2733 }
2734
2735#if 0
2736 /* load first the value of the char !!! */
2737 GROW;
2738 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2739
2740xmlGenericError(xmlGenericErrorContext,
2741 "xmlNamespaceParseNCName: reached loop 3\n");
2742 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2743 (cur == '.') || (cur == '-') ||
2744 (cur == '_') ||
2745 (IS_COMBINING(cur)) ||
2746 (IS_EXTENDER(cur))) {
2747 COPY_BUF(l,buf,len,cur);
2748 NEXTL(l);
2749 cur = CUR_CHAR(l);
2750 if (len >= XML_MAX_NAMELEN) {
2751 xmlGenericError(xmlGenericErrorContext,
2752 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2753 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2754 (cur == '.') || (cur == '-') ||
2755 (cur == '_') ||
2756 (IS_COMBINING(cur)) ||
2757 (IS_EXTENDER(cur))) {
2758 NEXTL(l);
2759 cur = CUR_CHAR(l);
2760 }
2761 break;
2762 }
2763 }
2764 return(xmlStrndup(buf, len));
2765#endif
2766 return(NULL);
2767}
2768
2769/**
2770 * xmlNamespaceParseQName:
2771 * @ctxt: an XML parser context
2772 * @prefix: a xmlChar **
2773 *
2774 * TODO: this seems not in use anymore, the namespace handling is done on
2775 * top of the SAX interfaces, i.e. not on raw input.
2776 *
2777 * parse an XML qualified name
2778 *
2779 * [NS 5] QName ::= (Prefix ':')? LocalPart
2780 *
2781 * [NS 6] Prefix ::= NCName
2782 *
2783 * [NS 7] LocalPart ::= NCName
2784 *
2785 * Returns the local part, and prefix is updated
2786 * to get the Prefix if any.
2787 */
2788
2789xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002790xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002791
2792 static int deprecated = 0;
2793 if (!deprecated) {
2794 xmlGenericError(xmlGenericErrorContext,
2795 "xmlNamespaceParseQName() deprecated function reached\n");
2796 deprecated = 1;
2797 }
2798
2799#if 0
2800 xmlChar *ret = NULL;
2801
2802 *prefix = NULL;
2803 ret = xmlNamespaceParseNCName(ctxt);
2804 if (RAW == ':') {
2805 *prefix = ret;
2806 NEXT;
2807 ret = xmlNamespaceParseNCName(ctxt);
2808 }
2809
2810 return(ret);
2811#endif
2812 return(NULL);
2813}
2814
2815/**
2816 * xmlNamespaceParseNSDef:
2817 * @ctxt: an XML parser context
2818 *
2819 * parse a namespace prefix declaration
2820 *
2821 * TODO: this seems not in use anymore, the namespace handling is done on
2822 * top of the SAX interfaces, i.e. not on raw input.
2823 *
2824 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2825 *
2826 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2827 *
2828 * Returns the namespace name
2829 */
2830
2831xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002832xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002833 static int deprecated = 0;
2834 if (!deprecated) {
2835 xmlGenericError(xmlGenericErrorContext,
2836 "xmlNamespaceParseNSDef() deprecated function reached\n");
2837 deprecated = 1;
2838 }
2839 return(NULL);
2840#if 0
2841 xmlChar *name = NULL;
2842
2843 if ((RAW == 'x') && (NXT(1) == 'm') &&
2844 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2845 (NXT(4) == 's')) {
2846 SKIP(5);
2847 if (RAW == ':') {
2848 NEXT;
2849 name = xmlNamespaceParseNCName(ctxt);
2850 }
2851 }
2852 return(name);
2853#endif
2854}
2855
2856/**
2857 * xmlParseQuotedString:
2858 * @ctxt: an XML parser context
2859 *
2860 * Parse and return a string between quotes or doublequotes
2861 *
2862 * TODO: Deprecated, to be removed at next drop of binary compatibility
2863 *
2864 * Returns the string parser or NULL.
2865 */
2866xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002867xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002868 static int deprecated = 0;
2869 if (!deprecated) {
2870 xmlGenericError(xmlGenericErrorContext,
2871 "xmlParseQuotedString() deprecated function reached\n");
2872 deprecated = 1;
2873 }
2874 return(NULL);
2875
2876#if 0
2877 xmlChar *buf = NULL;
2878 int len = 0,l;
2879 int size = XML_PARSER_BUFFER_SIZE;
2880 int c;
2881
2882 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2883 if (buf == NULL) {
2884 xmlGenericError(xmlGenericErrorContext,
2885 "malloc of %d byte failed\n", size);
2886 return(NULL);
2887 }
2888xmlGenericError(xmlGenericErrorContext,
2889 "xmlParseQuotedString: reached loop 4\n");
2890 if (RAW == '"') {
2891 NEXT;
2892 c = CUR_CHAR(l);
2893 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2894 if (len + 5 >= size) {
2895 size *= 2;
2896 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2897 if (buf == NULL) {
2898 xmlGenericError(xmlGenericErrorContext,
2899 "realloc of %d byte failed\n", size);
2900 return(NULL);
2901 }
2902 }
2903 COPY_BUF(l,buf,len,c);
2904 NEXTL(l);
2905 c = CUR_CHAR(l);
2906 }
2907 if (c != '"') {
2908 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2910 ctxt->sax->error(ctxt->userData,
2911 "String not closed \"%.50s\"\n", buf);
2912 ctxt->wellFormed = 0;
2913 ctxt->disableSAX = 1;
2914 } else {
2915 NEXT;
2916 }
2917 } else if (RAW == '\''){
2918 NEXT;
2919 c = CUR;
2920 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2921 if (len + 1 >= size) {
2922 size *= 2;
2923 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2924 if (buf == NULL) {
2925 xmlGenericError(xmlGenericErrorContext,
2926 "realloc of %d byte failed\n", size);
2927 return(NULL);
2928 }
2929 }
2930 buf[len++] = c;
2931 NEXT;
2932 c = CUR;
2933 }
2934 if (RAW != '\'') {
2935 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2937 ctxt->sax->error(ctxt->userData,
2938 "String not closed \"%.50s\"\n", buf);
2939 ctxt->wellFormed = 0;
2940 ctxt->disableSAX = 1;
2941 } else {
2942 NEXT;
2943 }
2944 }
2945 return(buf);
2946#endif
2947}
2948
2949/**
2950 * xmlParseNamespace:
2951 * @ctxt: an XML parser context
2952 *
2953 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2954 *
2955 * This is what the older xml-name Working Draft specified, a bunch of
2956 * other stuff may still rely on it, so support is still here as
2957 * if it was declared on the root of the Tree:-(
2958 *
2959 * TODO: remove from library
2960 *
2961 * To be removed at next drop of binary compatibility
2962 */
2963
2964void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002965xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002966 static int deprecated = 0;
2967 if (!deprecated) {
2968 xmlGenericError(xmlGenericErrorContext,
2969 "xmlParseNamespace() deprecated function reached\n");
2970 deprecated = 1;
2971 }
2972
2973#if 0
2974 xmlChar *href = NULL;
2975 xmlChar *prefix = NULL;
2976 int garbage = 0;
2977
2978 /*
2979 * We just skipped "namespace" or "xml:namespace"
2980 */
2981 SKIP_BLANKS;
2982
2983xmlGenericError(xmlGenericErrorContext,
2984 "xmlParseNamespace: reached loop 5\n");
2985 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2986 /*
2987 * We can have "ns" or "prefix" attributes
2988 * Old encoding as 'href' or 'AS' attributes is still supported
2989 */
2990 if ((RAW == 'n') && (NXT(1) == 's')) {
2991 garbage = 0;
2992 SKIP(2);
2993 SKIP_BLANKS;
2994
2995 if (RAW != '=') continue;
2996 NEXT;
2997 SKIP_BLANKS;
2998
2999 href = xmlParseQuotedString(ctxt);
3000 SKIP_BLANKS;
3001 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3002 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3003 garbage = 0;
3004 SKIP(4);
3005 SKIP_BLANKS;
3006
3007 if (RAW != '=') continue;
3008 NEXT;
3009 SKIP_BLANKS;
3010
3011 href = xmlParseQuotedString(ctxt);
3012 SKIP_BLANKS;
3013 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3014 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3015 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3016 garbage = 0;
3017 SKIP(6);
3018 SKIP_BLANKS;
3019
3020 if (RAW != '=') continue;
3021 NEXT;
3022 SKIP_BLANKS;
3023
3024 prefix = xmlParseQuotedString(ctxt);
3025 SKIP_BLANKS;
3026 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3027 garbage = 0;
3028 SKIP(2);
3029 SKIP_BLANKS;
3030
3031 if (RAW != '=') continue;
3032 NEXT;
3033 SKIP_BLANKS;
3034
3035 prefix = xmlParseQuotedString(ctxt);
3036 SKIP_BLANKS;
3037 } else if ((RAW == '?') && (NXT(1) == '>')) {
3038 garbage = 0;
3039 NEXT;
3040 } else {
3041 /*
3042 * Found garbage when parsing the namespace
3043 */
3044 if (!garbage) {
3045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3046 ctxt->sax->error(ctxt->userData,
3047 "xmlParseNamespace found garbage\n");
3048 }
3049 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3050 ctxt->wellFormed = 0;
3051 ctxt->disableSAX = 1;
3052 NEXT;
3053 }
3054 }
3055
3056 MOVETO_ENDTAG(CUR_PTR);
3057 NEXT;
3058
3059 /*
3060 * Register the DTD.
3061 if (href != NULL)
3062 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3063 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3064 */
3065
3066 if (prefix != NULL) xmlFree(prefix);
3067 if (href != NULL) xmlFree(href);
3068#endif
3069}
3070
3071/**
3072 * xmlScanName:
3073 * @ctxt: an XML parser context
3074 *
3075 * Trickery: parse an XML name but without consuming the input flow
3076 * Needed for rollback cases. Used only when parsing entities references.
3077 *
3078 * TODO: seems deprecated now, only used in the default part of
3079 * xmlParserHandleReference
3080 *
3081 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3082 * CombiningChar | Extender
3083 *
3084 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3085 *
3086 * [6] Names ::= Name (S Name)*
3087 *
3088 * Returns the Name parsed or NULL
3089 */
3090
3091xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003092xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003093 static int deprecated = 0;
3094 if (!deprecated) {
3095 xmlGenericError(xmlGenericErrorContext,
3096 "xmlScanName() deprecated function reached\n");
3097 deprecated = 1;
3098 }
3099 return(NULL);
3100
3101#if 0
3102 xmlChar buf[XML_MAX_NAMELEN];
3103 int len = 0;
3104
3105 GROW;
3106 if (!IS_LETTER(RAW) && (RAW != '_') &&
3107 (RAW != ':')) {
3108 return(NULL);
3109 }
3110
3111
3112 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3113 (NXT(len) == '.') || (NXT(len) == '-') ||
3114 (NXT(len) == '_') || (NXT(len) == ':') ||
3115 (IS_COMBINING(NXT(len))) ||
3116 (IS_EXTENDER(NXT(len)))) {
3117 GROW;
3118 buf[len] = NXT(len);
3119 len++;
3120 if (len >= XML_MAX_NAMELEN) {
3121 xmlGenericError(xmlGenericErrorContext,
3122 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3123 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3124 (IS_DIGIT(NXT(len))) ||
3125 (NXT(len) == '.') || (NXT(len) == '-') ||
3126 (NXT(len) == '_') || (NXT(len) == ':') ||
3127 (IS_COMBINING(NXT(len))) ||
3128 (IS_EXTENDER(NXT(len))))
3129 len++;
3130 break;
3131 }
3132 }
3133 return(xmlStrndup(buf, len));
3134#endif
3135}
3136
3137/**
3138 * xmlParserHandleReference:
3139 * @ctxt: the parser context
3140 *
3141 * TODO: Remove, now deprecated ... the test is done directly in the
3142 * content parsing
3143 * routines.
3144 *
3145 * [67] Reference ::= EntityRef | CharRef
3146 *
3147 * [68] EntityRef ::= '&' Name ';'
3148 *
3149 * [ WFC: Entity Declared ]
3150 * the Name given in the entity reference must match that in an entity
3151 * declaration, except that well-formed documents need not declare any
3152 * of the following entities: amp, lt, gt, apos, quot.
3153 *
3154 * [ WFC: Parsed Entity ]
3155 * An entity reference must not contain the name of an unparsed entity
3156 *
3157 * [66] CharRef ::= '&#' [0-9]+ ';' |
3158 * '&#x' [0-9a-fA-F]+ ';'
3159 *
3160 * A PEReference may have been detectect in the current input stream
3161 * the handling is done accordingly to
3162 * http://www.w3.org/TR/REC-xml#entproc
3163 */
3164void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003165xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003166 static int deprecated = 0;
3167 if (!deprecated) {
3168 xmlGenericError(xmlGenericErrorContext,
3169 "xmlParserHandleReference() deprecated function reached\n");
3170 deprecated = 1;
3171 }
3172
3173#if 0
3174 xmlParserInputPtr input;
3175 xmlChar *name;
3176 xmlEntityPtr ent = NULL;
3177
3178 if (ctxt->token != 0) {
3179 return;
3180 }
3181 if (RAW != '&') return;
3182 GROW;
3183 if ((RAW == '&') && (NXT(1) == '#')) {
3184 switch(ctxt->instate) {
3185 case XML_PARSER_ENTITY_DECL:
3186 case XML_PARSER_PI:
3187 case XML_PARSER_CDATA_SECTION:
3188 case XML_PARSER_COMMENT:
3189 case XML_PARSER_SYSTEM_LITERAL:
3190 /* we just ignore it there */
3191 return;
3192 case XML_PARSER_START_TAG:
3193 return;
3194 case XML_PARSER_END_TAG:
3195 return;
3196 case XML_PARSER_EOF:
3197 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3199 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3200 ctxt->wellFormed = 0;
3201 ctxt->disableSAX = 1;
3202 return;
3203 case XML_PARSER_PROLOG:
3204 case XML_PARSER_START:
3205 case XML_PARSER_MISC:
3206 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3209 ctxt->wellFormed = 0;
3210 ctxt->disableSAX = 1;
3211 return;
3212 case XML_PARSER_EPILOG:
3213 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3215 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3216 ctxt->wellFormed = 0;
3217 ctxt->disableSAX = 1;
3218 return;
3219 case XML_PARSER_DTD:
3220 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3222 ctxt->sax->error(ctxt->userData,
3223 "CharRef are forbiden in DTDs!\n");
3224 ctxt->wellFormed = 0;
3225 ctxt->disableSAX = 1;
3226 return;
3227 case XML_PARSER_ENTITY_VALUE:
3228 /*
3229 * NOTE: in the case of entity values, we don't do the
3230 * substitution here since we need the literal
3231 * entity value to be able to save the internal
3232 * subset of the document.
3233 * This will be handled by xmlStringDecodeEntities
3234 */
3235 return;
3236 case XML_PARSER_CONTENT:
3237 return;
3238 case XML_PARSER_ATTRIBUTE_VALUE:
3239 /* ctxt->token = xmlParseCharRef(ctxt); */
3240 return;
3241 case XML_PARSER_IGNORE:
3242 return;
3243 }
3244 return;
3245 }
3246
3247 switch(ctxt->instate) {
3248 case XML_PARSER_CDATA_SECTION:
3249 return;
3250 case XML_PARSER_PI:
3251 case XML_PARSER_COMMENT:
3252 case XML_PARSER_SYSTEM_LITERAL:
3253 case XML_PARSER_CONTENT:
3254 return;
3255 case XML_PARSER_START_TAG:
3256 return;
3257 case XML_PARSER_END_TAG:
3258 return;
3259 case XML_PARSER_EOF:
3260 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3262 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3263 ctxt->wellFormed = 0;
3264 ctxt->disableSAX = 1;
3265 return;
3266 case XML_PARSER_PROLOG:
3267 case XML_PARSER_START:
3268 case XML_PARSER_MISC:
3269 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3271 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3272 ctxt->wellFormed = 0;
3273 ctxt->disableSAX = 1;
3274 return;
3275 case XML_PARSER_EPILOG:
3276 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3278 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3279 ctxt->wellFormed = 0;
3280 ctxt->disableSAX = 1;
3281 return;
3282 case XML_PARSER_ENTITY_VALUE:
3283 /*
3284 * NOTE: in the case of entity values, we don't do the
3285 * substitution here since we need the literal
3286 * entity value to be able to save the internal
3287 * subset of the document.
3288 * This will be handled by xmlStringDecodeEntities
3289 */
3290 return;
3291 case XML_PARSER_ATTRIBUTE_VALUE:
3292 /*
3293 * NOTE: in the case of attributes values, we don't do the
3294 * substitution here unless we are in a mode where
3295 * the parser is explicitely asked to substitute
3296 * entities. The SAX callback is called with values
3297 * without entity substitution.
3298 * This will then be handled by xmlStringDecodeEntities
3299 */
3300 return;
3301 case XML_PARSER_ENTITY_DECL:
3302 /*
3303 * we just ignore it there
3304 * the substitution will be done once the entity is referenced
3305 */
3306 return;
3307 case XML_PARSER_DTD:
3308 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3310 ctxt->sax->error(ctxt->userData,
3311 "Entity references are forbiden in DTDs!\n");
3312 ctxt->wellFormed = 0;
3313 ctxt->disableSAX = 1;
3314 return;
3315 case XML_PARSER_IGNORE:
3316 return;
3317 }
3318
3319/* TODO: this seems not reached anymore .... Verify ... */
3320xmlGenericError(xmlGenericErrorContext,
3321 "Reached deprecated section in xmlParserHandleReference()\n");
3322xmlGenericError(xmlGenericErrorContext,
3323 "Please forward the document to Daniel.Veillard@w3.org\n");
3324xmlGenericError(xmlGenericErrorContext,
3325 "indicating the version: %s, thanks !\n", xmlParserVersion);
3326 NEXT;
3327 name = xmlScanName(ctxt);
3328 if (name == NULL) {
3329 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3332 ctxt->wellFormed = 0;
3333 ctxt->disableSAX = 1;
3334 ctxt->token = '&';
3335 return;
3336 }
3337 if (NXT(xmlStrlen(name)) != ';') {
3338 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData,
3341 "Entity reference: ';' expected\n");
3342 ctxt->wellFormed = 0;
3343 ctxt->disableSAX = 1;
3344 ctxt->token = '&';
3345 xmlFree(name);
3346 return;
3347 }
3348 SKIP(xmlStrlen(name) + 1);
3349 if (ctxt->sax != NULL) {
3350 if (ctxt->sax->getEntity != NULL)
3351 ent = ctxt->sax->getEntity(ctxt->userData, name);
3352 }
3353
3354 /*
3355 * [ WFC: Entity Declared ]
3356 * the Name given in the entity reference must match that in an entity
3357 * declaration, except that well-formed documents need not declare any
3358 * of the following entities: amp, lt, gt, apos, quot.
3359 */
3360 if (ent == NULL)
3361 ent = xmlGetPredefinedEntity(name);
3362 if (ent == NULL) {
3363 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3365 ctxt->sax->error(ctxt->userData,
3366 "Entity reference: entity %s not declared\n",
3367 name);
3368 ctxt->wellFormed = 0;
3369 ctxt->disableSAX = 1;
3370 xmlFree(name);
3371 return;
3372 }
3373
3374 /*
3375 * [ WFC: Parsed Entity ]
3376 * An entity reference must not contain the name of an unparsed entity
3377 */
3378 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3379 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3381 ctxt->sax->error(ctxt->userData,
3382 "Entity reference to unparsed entity %s\n", name);
3383 ctxt->wellFormed = 0;
3384 ctxt->disableSAX = 1;
3385 }
3386
3387 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3388 ctxt->token = ent->content[0];
3389 xmlFree(name);
3390 return;
3391 }
3392 input = xmlNewEntityInputStream(ctxt, ent);
3393 xmlPushInput(ctxt, input);
3394 xmlFree(name);
3395#endif
3396 return;
3397}
3398
3399/**
3400 * xmlHandleEntity:
3401 * @ctxt: an XML parser context
3402 * @entity: an XML entity pointer.
3403 *
3404 * Default handling of defined entities, when should we define a new input
3405 * stream ? When do we just handle that as a set of chars ?
3406 *
3407 * OBSOLETE: to be removed at some point.
3408 */
3409
3410void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003411xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003412 static int deprecated = 0;
3413 if (!deprecated) {
3414 xmlGenericError(xmlGenericErrorContext,
3415 "xmlHandleEntity() deprecated function reached\n");
3416 deprecated = 1;
3417 }
3418
3419#if 0
3420 int len;
3421 xmlParserInputPtr input;
3422
3423 if (entity->content == NULL) {
3424 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3426 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3427 entity->name);
3428 ctxt->wellFormed = 0;
3429 ctxt->disableSAX = 1;
3430 return;
3431 }
3432 len = xmlStrlen(entity->content);
3433 if (len <= 2) goto handle_as_char;
3434
3435 /*
3436 * Redefine its content as an input stream.
3437 */
3438 input = xmlNewEntityInputStream(ctxt, entity);
3439 xmlPushInput(ctxt, input);
3440 return;
3441
3442handle_as_char:
3443 /*
3444 * Just handle the content as a set of chars.
3445 */
3446 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3447 (ctxt->sax->characters != NULL))
3448 ctxt->sax->characters(ctxt->userData, entity->content, len);
3449#endif
3450}
3451
3452/**
3453 * xmlNewGlobalNs:
3454 * @doc: the document carrying the namespace
3455 * @href: the URI associated
3456 * @prefix: the prefix for the namespace
3457 *
3458 * Creation of a Namespace, the old way using PI and without scoping
3459 * DEPRECATED !!!
3460 * It now create a namespace on the root element of the document if found.
3461 * Returns NULL this functionnality had been removed
3462 */
3463xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003464xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3465 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003466 static int deprecated = 0;
3467 if (!deprecated) {
3468 xmlGenericError(xmlGenericErrorContext,
3469 "xmlNewGlobalNs() deprecated function reached\n");
3470 deprecated = 1;
3471 }
3472 return(NULL);
3473#if 0
3474 xmlNodePtr root;
3475
3476 xmlNsPtr cur;
3477
3478 root = xmlDocGetRootElement(doc);
3479 if (root != NULL)
3480 return(xmlNewNs(root, href, prefix));
3481
3482 /*
3483 * if there is no root element yet, create an old Namespace type
3484 * and it will be moved to the root at save time.
3485 */
3486 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3487 if (cur == NULL) {
3488 xmlGenericError(xmlGenericErrorContext,
3489 "xmlNewGlobalNs : malloc failed\n");
3490 return(NULL);
3491 }
3492 memset(cur, 0, sizeof(xmlNs));
3493 cur->type = XML_GLOBAL_NAMESPACE;
3494
3495 if (href != NULL)
3496 cur->href = xmlStrdup(href);
3497 if (prefix != NULL)
3498 cur->prefix = xmlStrdup(prefix);
3499
3500 /*
3501 * Add it at the end to preserve parsing order ...
3502 */
3503 if (doc != NULL) {
3504 if (doc->oldNs == NULL) {
3505 doc->oldNs = cur;
3506 } else {
3507 xmlNsPtr prev = doc->oldNs;
3508
3509 while (prev->next != NULL) prev = prev->next;
3510 prev->next = cur;
3511 }
3512 }
3513
3514 return(NULL);
3515#endif
3516}
3517
3518/**
3519 * xmlUpgradeOldNs:
3520 * @doc: a document pointer
3521 *
3522 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3523 * DEPRECATED
3524 */
3525void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003526xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003527 static int deprecated = 0;
3528 if (!deprecated) {
3529 xmlGenericError(xmlGenericErrorContext,
3530 "xmlNewGlobalNs() deprecated function reached\n");
3531 deprecated = 1;
3532 }
3533#if 0
3534 xmlNsPtr cur;
3535
3536 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3537 if (doc->children == NULL) {
3538#ifdef DEBUG_TREE
3539 xmlGenericError(xmlGenericErrorContext,
3540 "xmlUpgradeOldNs: failed no root !\n");
3541#endif
3542 return;
3543 }
3544
3545 cur = doc->oldNs;
3546 while (cur->next != NULL) {
3547 cur->type = XML_LOCAL_NAMESPACE;
3548 cur = cur->next;
3549 }
3550 cur->type = XML_LOCAL_NAMESPACE;
3551 cur->next = doc->children->nsDef;
3552 doc->children->nsDef = doc->oldNs;
3553 doc->oldNs = NULL;
3554#endif
3555}
3556