blob: 0f6dfccc0a2f4ba73effddbc1f76ce0694aec0cb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50
Daniel Veillard56a4cb82001-03-24 17:00:36 +000051/************************************************************************
52 * *
53 * When running GCC in vaacum cleaner mode *
54 * *
55 ************************************************************************/
56
57#ifdef __GNUC__
58#define UNUSED __attribute__((__unused__))
59#else
60#define UNUSED
61#endif
62
63void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000064
65/************************************************************************
66 * *
67 * Version and Features handling *
68 * *
69 ************************************************************************/
70const char *xmlParserVersion = LIBXML_VERSION_STRING;
71
72/*
73 * xmlCheckVersion:
74 * @version: the include version number
75 *
76 * check the compiled lib version against the include one.
77 * This can warn or immediately kill the application
78 */
79void
80xmlCheckVersion(int version) {
81 int myversion = (int) LIBXML_VERSION;
82
83 if ((myversion / 10000) != (version / 10000)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Fatal: program compiled against libxml %d using libxml %d\n",
86 (version / 10000), (myversion / 10000));
87 exit(1);
88 }
89 if ((myversion / 100) < (version / 100)) {
90 xmlGenericError(xmlGenericErrorContext,
91 "Warning: program compiled against libxml %d using older %d\n",
92 (version / 100), (myversion / 100));
93 }
94}
95
96
97const char *xmlFeaturesList[] = {
98 "validate",
99 "load subset",
100 "keep blanks",
101 "disable SAX",
102 "fetch external entities",
103 "substitute entities",
104 "gather line info",
105 "user data",
106 "is html",
107 "is standalone",
108 "stop parser",
109 "document",
110 "is well formed",
111 "is valid",
112 "SAX block",
113 "SAX function internalSubset",
114 "SAX function isStandalone",
115 "SAX function hasInternalSubset",
116 "SAX function hasExternalSubset",
117 "SAX function resolveEntity",
118 "SAX function getEntity",
119 "SAX function entityDecl",
120 "SAX function notationDecl",
121 "SAX function attributeDecl",
122 "SAX function elementDecl",
123 "SAX function unparsedEntityDecl",
124 "SAX function setDocumentLocator",
125 "SAX function startDocument",
126 "SAX function endDocument",
127 "SAX function startElement",
128 "SAX function endElement",
129 "SAX function reference",
130 "SAX function characters",
131 "SAX function ignorableWhitespace",
132 "SAX function processingInstruction",
133 "SAX function comment",
134 "SAX function warning",
135 "SAX function error",
136 "SAX function fatalError",
137 "SAX function getParameterEntity",
138 "SAX function cdataBlock",
139 "SAX function externalSubset",
140};
141
142/*
143 * xmlGetFeaturesList:
144 * @len: the length of the features name array (input/output)
145 * @result: an array of string to be filled with the features name.
146 *
147 * Copy at most *@len feature names into the @result array
148 *
149 * Returns -1 in case or error, or the total number of features,
150 * len is updated with the number of strings copied,
151 * strings must not be deallocated
152 */
153int
154xmlGetFeaturesList(int *len, const char **result) {
155 int ret, i;
156
157 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
158 if ((len == NULL) || (result == NULL))
159 return(ret);
160 if ((*len < 0) || (*len >= 1000))
161 return(-1);
162 if (*len > ret)
163 *len = ret;
164 for (i = 0;i < *len;i++)
165 result[i] = xmlFeaturesList[i];
166 return(ret);
167}
168
169/*
170 * xmlGetFeature:
171 * @ctxt: an XML/HTML parser context
172 * @name: the feature name
173 * @result: location to store the result
174 *
175 * Read the current value of one feature of this parser instance
176 *
177 * Returns -1 in case or error, 0 otherwise
178 */
179int
180xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
181 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
182 return(-1);
183
184 if (!strcmp(name, "validate")) {
185 *((int *) result) = ctxt->validate;
186 } else if (!strcmp(name, "keep blanks")) {
187 *((int *) result) = ctxt->keepBlanks;
188 } else if (!strcmp(name, "disable SAX")) {
189 *((int *) result) = ctxt->disableSAX;
190 } else if (!strcmp(name, "fetch external entities")) {
191 *((int *) result) = ctxt->loadsubset;
192 } else if (!strcmp(name, "substitute entities")) {
193 *((int *) result) = ctxt->replaceEntities;
194 } else if (!strcmp(name, "gather line info")) {
195 *((int *) result) = ctxt->record_info;
196 } else if (!strcmp(name, "user data")) {
197 *((void **)result) = ctxt->userData;
198 } else if (!strcmp(name, "is html")) {
199 *((int *) result) = ctxt->html;
200 } else if (!strcmp(name, "is standalone")) {
201 *((int *) result) = ctxt->standalone;
202 } else if (!strcmp(name, "document")) {
203 *((xmlDocPtr *) result) = ctxt->myDoc;
204 } else if (!strcmp(name, "is well formed")) {
205 *((int *) result) = ctxt->wellFormed;
206 } else if (!strcmp(name, "is valid")) {
207 *((int *) result) = ctxt->valid;
208 } else if (!strcmp(name, "SAX block")) {
209 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
210 } else if (!strcmp(name, "SAX function internalSubset")) {
211 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
212 } else if (!strcmp(name, "SAX function isStandalone")) {
213 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
214 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
215 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
216 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
217 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
218 } else if (!strcmp(name, "SAX function resolveEntity")) {
219 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
220 } else if (!strcmp(name, "SAX function getEntity")) {
221 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
222 } else if (!strcmp(name, "SAX function entityDecl")) {
223 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
224 } else if (!strcmp(name, "SAX function notationDecl")) {
225 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
226 } else if (!strcmp(name, "SAX function attributeDecl")) {
227 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
228 } else if (!strcmp(name, "SAX function elementDecl")) {
229 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
230 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
231 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
232 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
233 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
234 } else if (!strcmp(name, "SAX function startDocument")) {
235 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
236 } else if (!strcmp(name, "SAX function endDocument")) {
237 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
238 } else if (!strcmp(name, "SAX function startElement")) {
239 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
240 } else if (!strcmp(name, "SAX function endElement")) {
241 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
242 } else if (!strcmp(name, "SAX function reference")) {
243 *((referenceSAXFunc *) result) = ctxt->sax->reference;
244 } else if (!strcmp(name, "SAX function characters")) {
245 *((charactersSAXFunc *) result) = ctxt->sax->characters;
246 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
247 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
248 } else if (!strcmp(name, "SAX function processingInstruction")) {
249 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
250 } else if (!strcmp(name, "SAX function comment")) {
251 *((commentSAXFunc *) result) = ctxt->sax->comment;
252 } else if (!strcmp(name, "SAX function warning")) {
253 *((warningSAXFunc *) result) = ctxt->sax->warning;
254 } else if (!strcmp(name, "SAX function error")) {
255 *((errorSAXFunc *) result) = ctxt->sax->error;
256 } else if (!strcmp(name, "SAX function fatalError")) {
257 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
258 } else if (!strcmp(name, "SAX function getParameterEntity")) {
259 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
260 } else if (!strcmp(name, "SAX function cdataBlock")) {
261 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
262 } else if (!strcmp(name, "SAX function externalSubset")) {
263 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
264 } else {
265 return(-1);
266 }
267 return(0);
268}
269
270/*
271 * xmlSetFeature:
272 * @ctxt: an XML/HTML parser context
273 * @name: the feature name
274 * @value: pointer to the location of the new value
275 *
276 * Change the current value of one feature of this parser instance
277 *
278 * Returns -1 in case or error, 0 otherwise
279 */
280int
281xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
282 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
283 return(-1);
284
285 if (!strcmp(name, "validate")) {
286 int newvalidate = *((int *) value);
287 if ((!ctxt->validate) && (newvalidate != 0)) {
288 if (ctxt->vctxt.warning == NULL)
289 ctxt->vctxt.warning = xmlParserValidityWarning;
290 if (ctxt->vctxt.error == NULL)
291 ctxt->vctxt.error = xmlParserValidityError;
292 /* Allocate the Node stack */
293 ctxt->vctxt.nodeTab = (xmlNodePtr *)
294 xmlMalloc(4 * sizeof(xmlNodePtr));
295 if (ctxt->vctxt.nodeTab == NULL) {
296 ctxt->vctxt.nodeMax = 0;
297 ctxt->validate = 0;
298 return(-1);
299 }
300 ctxt->vctxt.nodeNr = 0;
301 ctxt->vctxt.nodeMax = 4;
302 ctxt->vctxt.node = NULL;
303 }
304 ctxt->validate = newvalidate;
305 } else if (!strcmp(name, "keep blanks")) {
306 ctxt->keepBlanks = *((int *) value);
307 } else if (!strcmp(name, "disable SAX")) {
308 ctxt->disableSAX = *((int *) value);
309 } else if (!strcmp(name, "fetch external entities")) {
310 ctxt->loadsubset = *((int *) value);
311 } else if (!strcmp(name, "substitute entities")) {
312 ctxt->replaceEntities = *((int *) value);
313 } else if (!strcmp(name, "gather line info")) {
314 ctxt->record_info = *((int *) value);
315 } else if (!strcmp(name, "user data")) {
316 ctxt->userData = *((void **)value);
317 } else if (!strcmp(name, "is html")) {
318 ctxt->html = *((int *) value);
319 } else if (!strcmp(name, "is standalone")) {
320 ctxt->standalone = *((int *) value);
321 } else if (!strcmp(name, "document")) {
322 ctxt->myDoc = *((xmlDocPtr *) value);
323 } else if (!strcmp(name, "is well formed")) {
324 ctxt->wellFormed = *((int *) value);
325 } else if (!strcmp(name, "is valid")) {
326 ctxt->valid = *((int *) value);
327 } else if (!strcmp(name, "SAX block")) {
328 ctxt->sax = *((xmlSAXHandlerPtr *) value);
329 } else if (!strcmp(name, "SAX function internalSubset")) {
330 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function isStandalone")) {
332 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
334 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
336 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function resolveEntity")) {
338 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
339 } else if (!strcmp(name, "SAX function getEntity")) {
340 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
341 } else if (!strcmp(name, "SAX function entityDecl")) {
342 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function notationDecl")) {
344 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function attributeDecl")) {
346 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function elementDecl")) {
348 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
350 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
352 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function startDocument")) {
354 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function endDocument")) {
356 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function startElement")) {
358 ctxt->sax->startElement = *((startElementSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function endElement")) {
360 ctxt->sax->endElement = *((endElementSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function reference")) {
362 ctxt->sax->reference = *((referenceSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function characters")) {
364 ctxt->sax->characters = *((charactersSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
366 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function processingInstruction")) {
368 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
369 } else if (!strcmp(name, "SAX function comment")) {
370 ctxt->sax->comment = *((commentSAXFunc *) value);
371 } else if (!strcmp(name, "SAX function warning")) {
372 ctxt->sax->warning = *((warningSAXFunc *) value);
373 } else if (!strcmp(name, "SAX function error")) {
374 ctxt->sax->error = *((errorSAXFunc *) value);
375 } else if (!strcmp(name, "SAX function fatalError")) {
376 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
377 } else if (!strcmp(name, "SAX function getParameterEntity")) {
378 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
379 } else if (!strcmp(name, "SAX function cdataBlock")) {
380 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
381 } else if (!strcmp(name, "SAX function externalSubset")) {
382 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
383 } else {
384 return(-1);
385 }
386 return(0);
387}
388
389/************************************************************************
390 * *
391 * Some functions to avoid too large macros *
392 * *
393 ************************************************************************/
394
395/**
396 * xmlIsChar:
397 * @c: an unicode character (int)
398 *
399 * Check whether the character is allowed by the production
400 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
401 * | [#x10000-#x10FFFF]
402 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
403 * Also available as a macro IS_CHAR()
404 *
405 * Returns 0 if not, non-zero otherwise
406 */
407int
408xmlIsChar(int c) {
409 return(
410 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
411 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
412 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
413 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
414}
415
416/**
417 * xmlIsBlank:
418 * @c: an unicode character (int)
419 *
420 * Check whether the character is allowed by the production
421 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
422 * Also available as a macro IS_BLANK()
423 *
424 * Returns 0 if not, non-zero otherwise
425 */
426int
427xmlIsBlank(int c) {
428 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
429}
430
431/**
432 * xmlIsBaseChar:
433 * @c: an unicode character (int)
434 *
435 * Check whether the character is allowed by the production
436 * [85] BaseChar ::= ... long list see REC ...
437 *
438 * VI is your friend !
439 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
440 * and
441 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
442 *
443 * Returns 0 if not, non-zero otherwise
444 */
445static int xmlBaseArray[] = {
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
450 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
451 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
452 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
453 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
458 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
459 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
460 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
461 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
462};
463
464int
465xmlIsBaseChar(int c) {
466 return(
467 (((c) < 0x0100) ? xmlBaseArray[c] :
468 ( /* accelerator */
469 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
470 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
471 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
472 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
473 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
474 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
475 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
476 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
477 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
478 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
479 ((c) == 0x0386) ||
480 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
481 ((c) == 0x038C) ||
482 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
483 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
484 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
485 ((c) == 0x03DA) ||
486 ((c) == 0x03DC) ||
487 ((c) == 0x03DE) ||
488 ((c) == 0x03E0) ||
489 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
490 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
491 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
492 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
493 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
494 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
495 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
496 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
497 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
498 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
499 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
500 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
501 ((c) == 0x0559) ||
502 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
503 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
504 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
505 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
506 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
507 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
508 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
509 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
510 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
511 ((c) == 0x06D5) ||
512 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
513 (((c) >= 0x905) && ( /* accelerator */
514 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
515 ((c) == 0x093D) ||
516 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
517 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
518 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
519 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
520 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
521 ((c) == 0x09B2) ||
522 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
523 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
524 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
525 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
526 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
527 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
528 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
529 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
530 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
531 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
532 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
533 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
534 ((c) == 0x0A5E) ||
535 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
536 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
537 ((c) == 0x0A8D) ||
538 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
539 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
540 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
541 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
542 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
543 ((c) == 0x0ABD) ||
544 ((c) == 0x0AE0) ||
545 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
546 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
547 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
548 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
549 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
550 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
551 ((c) == 0x0B3D) ||
552 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
553 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
554 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
555 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
556 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
557 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
558 ((c) == 0x0B9C) ||
559 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
560 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
561 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
562 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
563 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
564 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
565 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
566 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
567 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
568 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
569 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
570 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
571 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
572 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
573 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
574 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
575 ((c) == 0x0CDE) ||
576 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
577 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
578 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
579 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
580 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
581 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
582 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
583 ((c) == 0x0E30) ||
584 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
585 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
586 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
587 ((c) == 0x0E84) ||
588 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
589 ((c) == 0x0E8A) ||
590 ((c) == 0x0E8D) ||
591 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
592 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
593 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
594 ((c) == 0x0EA5) ||
595 ((c) == 0x0EA7) ||
596 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
597 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
598 ((c) == 0x0EB0) ||
599 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
600 ((c) == 0x0EBD) ||
601 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
602 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
603 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
604 (((c) >= 0x10A0) && ( /* accelerator */
605 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
606 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
607 ((c) == 0x1100) ||
608 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
609 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
610 ((c) == 0x1109) ||
611 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
612 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
613 ((c) == 0x113C) ||
614 ((c) == 0x113E) ||
615 ((c) == 0x1140) ||
616 ((c) == 0x114C) ||
617 ((c) == 0x114E) ||
618 ((c) == 0x1150) ||
619 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
620 ((c) == 0x1159) ||
621 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
622 ((c) == 0x1163) ||
623 ((c) == 0x1165) ||
624 ((c) == 0x1167) ||
625 ((c) == 0x1169) ||
626 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
627 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
628 ((c) == 0x1175) ||
629 ((c) == 0x119E) ||
630 ((c) == 0x11A8) ||
631 ((c) == 0x11AB) ||
632 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
633 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
634 ((c) == 0x11BA) ||
635 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
636 ((c) == 0x11EB) ||
637 ((c) == 0x11F0) ||
638 ((c) == 0x11F9) ||
639 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
640 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
641 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
642 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
643 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
644 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
645 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
646 ((c) == 0x1F59) ||
647 ((c) == 0x1F5B) ||
648 ((c) == 0x1F5D) ||
649 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
650 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
651 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
652 ((c) == 0x1FBE) ||
653 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
654 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
655 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
656 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
657 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
658 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
659 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
660 ((c) == 0x2126) ||
661 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
662 ((c) == 0x212E) ||
663 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
664 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
665 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
666 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
667 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
668}
669
670/**
671 * xmlIsDigit:
672 * @c: an unicode character (int)
673 *
674 * Check whether the character is allowed by the production
675 * [88] Digit ::= ... long list see REC ...
676 *
677 * Returns 0 if not, non-zero otherwise
678 */
679int
680xmlIsDigit(int c) {
681 return(
682 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
683 (((c) >= 0x660) && ( /* accelerator */
684 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
685 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
686 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
687 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
688 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
689 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
690 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
691 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
692 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
693 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
694 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
695 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
696 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
697 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
698}
699
700/**
701 * xmlIsCombining:
702 * @c: an unicode character (int)
703 *
704 * Check whether the character is allowed by the production
705 * [87] CombiningChar ::= ... long list see REC ...
706 *
707 * Returns 0 if not, non-zero otherwise
708 */
709int
710xmlIsCombining(int c) {
711 return(
712 (((c) >= 0x300) && ( /* accelerator */
713 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
714 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
715 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
716 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
717 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
718 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
719 ((c) == 0x05BF) ||
720 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
721 ((c) == 0x05C4) ||
722 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
723 ((c) == 0x0670) ||
724 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
725 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
726 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
727 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
728 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
729 (((c) >= 0x0901) && ( /* accelerator */
730 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
731 ((c) == 0x093C) ||
732 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
733 ((c) == 0x094D) ||
734 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
735 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
736 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
737 ((c) == 0x09BC) ||
738 ((c) == 0x09BE) ||
739 ((c) == 0x09BF) ||
740 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
741 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
742 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
743 ((c) == 0x09D7) ||
744 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
745 (((c) >= 0x0A02) && ( /* accelerator */
746 ((c) == 0x0A02) ||
747 ((c) == 0x0A3C) ||
748 ((c) == 0x0A3E) ||
749 ((c) == 0x0A3F) ||
750 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
751 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
752 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
753 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
754 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
755 ((c) == 0x0ABC) ||
756 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
757 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
758 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
759 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
760 ((c) == 0x0B3C) ||
761 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
762 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
763 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
764 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
765 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
766 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
767 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
768 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
769 ((c) == 0x0BD7) ||
770 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
771 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
772 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
773 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
774 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
775 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
776 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
777 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
778 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
779 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
780 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
781 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
782 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
783 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
784 ((c) == 0x0D57) ||
785 (((c) >= 0x0E31) && ( /* accelerator */
786 ((c) == 0x0E31) ||
787 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
788 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
789 ((c) == 0x0EB1) ||
790 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
791 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
792 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
793 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
794 ((c) == 0x0F35) ||
795 ((c) == 0x0F37) ||
796 ((c) == 0x0F39) ||
797 ((c) == 0x0F3E) ||
798 ((c) == 0x0F3F) ||
799 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
800 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
801 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
802 ((c) == 0x0F97) ||
803 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
804 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
805 ((c) == 0x0FB9) ||
806 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
807 ((c) == 0x20E1) ||
808 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
809 ((c) == 0x3099) ||
810 ((c) == 0x309A))))))))));
811}
812
813/**
814 * xmlIsExtender:
815 * @c: an unicode character (int)
816 *
817 * Check whether the character is allowed by the production
818 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
819 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
820 * [#x309D-#x309E] | [#x30FC-#x30FE]
821 *
822 * Returns 0 if not, non-zero otherwise
823 */
824int
825xmlIsExtender(int c) {
826 switch (c) {
827 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
828 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
829 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
830 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
831 case 0x30FE:
832 return 1;
833 default:
834 return 0;
835 }
836}
837
838/**
839 * xmlIsIdeographic:
840 * @c: an unicode character (int)
841 *
842 * Check whether the character is allowed by the production
843 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
844 *
845 * Returns 0 if not, non-zero otherwise
846 */
847int
848xmlIsIdeographic(int c) {
849 return(((c) < 0x0100) ? 0 :
850 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
851 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
852 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
853 ((c) == 0x3007));
854}
855
856/**
857 * xmlIsLetter:
858 * @c: an unicode character (int)
859 *
860 * Check whether the character is allowed by the production
861 * [84] Letter ::= BaseChar | Ideographic
862 *
863 * Returns 0 if not, non-zero otherwise
864 */
865int
866xmlIsLetter(int c) {
867 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
868}
869
870/**
871 * xmlIsPubidChar:
872 * @c: an unicode character (int)
873 *
874 * Check whether the character is allowed by the production
875 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
876 *
877 * Returns 0 if not, non-zero otherwise
878 */
879int
880xmlIsPubidChar(int c) {
881 return(
882 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
883 (((c) >= 'a') && ((c) <= 'z')) ||
884 (((c) >= 'A') && ((c) <= 'Z')) ||
885 (((c) >= '0') && ((c) <= '9')) ||
886 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
887 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
888 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
889 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
890 ((c) == '$') || ((c) == '_') || ((c) == '%'));
891}
892
893/************************************************************************
894 * *
895 * Input handling functions for progressive parsing *
896 * *
897 ************************************************************************/
898
899/* #define DEBUG_INPUT */
900/* #define DEBUG_STACK */
901/* #define DEBUG_PUSH */
902
903
904/* we need to keep enough input to show errors in context */
905#define LINE_LEN 80
906
907#ifdef DEBUG_INPUT
908#define CHECK_BUFFER(in) check_buffer(in)
909
910void check_buffer(xmlParserInputPtr in) {
911 if (in->base != in->buf->buffer->content) {
912 xmlGenericError(xmlGenericErrorContext,
913 "xmlParserInput: base mismatch problem\n");
914 }
915 if (in->cur < in->base) {
916 xmlGenericError(xmlGenericErrorContext,
917 "xmlParserInput: cur < base problem\n");
918 }
919 if (in->cur > in->base + in->buf->buffer->use) {
920 xmlGenericError(xmlGenericErrorContext,
921 "xmlParserInput: cur > base + use problem\n");
922 }
923 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
924 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
925 in->buf->buffer->use, in->buf->buffer->size);
926}
927
928#else
929#define CHECK_BUFFER(in)
930#endif
931
932
933/**
934 * xmlParserInputRead:
935 * @in: an XML parser input
936 * @len: an indicative size for the lookahead
937 *
938 * This function refresh the input for the parser. It doesn't try to
939 * preserve pointers to the input buffer, and discard already read data
940 *
941 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
942 * end of this entity
943 */
944int
945xmlParserInputRead(xmlParserInputPtr in, int len) {
946 int ret;
947 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000948 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000949
950#ifdef DEBUG_INPUT
951 xmlGenericError(xmlGenericErrorContext, "Read\n");
952#endif
953 if (in->buf == NULL) return(-1);
954 if (in->base == NULL) return(-1);
955 if (in->cur == NULL) return(-1);
956 if (in->buf->buffer == NULL) return(-1);
957 if (in->buf->readcallback == NULL) return(-1);
958
959 CHECK_BUFFER(in);
960
961 used = in->cur - in->buf->buffer->content;
962 ret = xmlBufferShrink(in->buf->buffer, used);
963 if (ret > 0) {
964 in->cur -= ret;
965 in->consumed += ret;
966 }
967 ret = xmlParserInputBufferRead(in->buf, len);
968 if (in->base != in->buf->buffer->content) {
969 /*
970 * the buffer has been realloced
971 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000972 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000973 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000974 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000975 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000976 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000977
978 CHECK_BUFFER(in);
979
980 return(ret);
981}
982
983/**
984 * xmlParserInputGrow:
985 * @in: an XML parser input
986 * @len: an indicative size for the lookahead
987 *
988 * This function increase the input for the parser. It tries to
989 * preserve pointers to the input buffer, and keep already read data
990 *
991 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
992 * end of this entity
993 */
994int
995xmlParserInputGrow(xmlParserInputPtr in, int len) {
996 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000997 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000998
999#ifdef DEBUG_INPUT
1000 xmlGenericError(xmlGenericErrorContext, "Grow\n");
1001#endif
1002 if (in->buf == NULL) return(-1);
1003 if (in->base == NULL) return(-1);
1004 if (in->cur == NULL) return(-1);
1005 if (in->buf->buffer == NULL) return(-1);
1006
1007 CHECK_BUFFER(in);
1008
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001009 indx = in->cur - in->base;
1010 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001011
1012 CHECK_BUFFER(in);
1013
1014 return(0);
1015 }
1016 if (in->buf->readcallback != NULL)
1017 ret = xmlParserInputBufferGrow(in->buf, len);
1018 else
1019 return(0);
1020
1021 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001022 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001023 * block, but we use it really as an integer to do some
1024 * pointer arithmetic. Insure will raise it as a bug but in
1025 * that specific case, that's not !
1026 */
1027 if (in->base != in->buf->buffer->content) {
1028 /*
1029 * the buffer has been realloced
1030 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001031 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001032 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001033 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001034 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001035 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001036
1037 CHECK_BUFFER(in);
1038
1039 return(ret);
1040}
1041
1042/**
1043 * xmlParserInputShrink:
1044 * @in: an XML parser input
1045 *
1046 * This function removes used input for the parser.
1047 */
1048void
1049xmlParserInputShrink(xmlParserInputPtr in) {
1050 int used;
1051 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001052 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001053
1054#ifdef DEBUG_INPUT
1055 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1056#endif
1057 if (in->buf == NULL) return;
1058 if (in->base == NULL) return;
1059 if (in->cur == NULL) return;
1060 if (in->buf->buffer == NULL) return;
1061
1062 CHECK_BUFFER(in);
1063
1064 used = in->cur - in->buf->buffer->content;
1065 /*
1066 * Do not shrink on large buffers whose only a tiny fraction
1067 * was consumned
1068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return;
1071 if (used > INPUT_CHUNK) {
1072 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1073 if (ret > 0) {
1074 in->cur -= ret;
1075 in->consumed += ret;
1076 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001077 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001078 }
1079
1080 CHECK_BUFFER(in);
1081
1082 if (in->buf->buffer->use > INPUT_CHUNK) {
1083 return;
1084 }
1085 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1086 if (in->base != in->buf->buffer->content) {
1087 /*
1088 * the buffer has been realloced
1089 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001090 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001091 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001092 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001093 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001094 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001095
1096 CHECK_BUFFER(in);
1097}
1098
1099/************************************************************************
1100 * *
1101 * UTF8 character input and related functions *
1102 * *
1103 ************************************************************************/
1104
1105/**
1106 * xmlNextChar:
1107 * @ctxt: the XML parser context
1108 *
1109 * Skip to the next char input char.
1110 */
1111
1112void
1113xmlNextChar(xmlParserCtxtPtr ctxt) {
1114 if (ctxt->instate == XML_PARSER_EOF)
1115 return;
1116
1117 /*
1118 * 2.11 End-of-Line Handling
1119 * the literal two-character sequence "#xD#xA" or a standalone
1120 * literal #xD, an XML processor must pass to the application
1121 * the single character #xA.
1122 */
1123 if (ctxt->token != 0) ctxt->token = 0;
1124 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1125 if ((*ctxt->input->cur == 0) &&
1126 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1127 (ctxt->instate != XML_PARSER_COMMENT)) {
1128 /*
1129 * If we are at the end of the current entity and
1130 * the context allows it, we pop consumed entities
1131 * automatically.
1132 * the auto closing should be blocked in other cases
1133 */
1134 xmlPopInput(ctxt);
1135 } else {
1136 if (*(ctxt->input->cur) == '\n') {
1137 ctxt->input->line++; ctxt->input->col = 1;
1138 } else ctxt->input->col++;
1139 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1140 /*
1141 * We are supposed to handle UTF8, check it's valid
1142 * From rfc2044: encoding of the Unicode values on UTF-8:
1143 *
1144 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1145 * 0000 0000-0000 007F 0xxxxxxx
1146 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1147 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1148 *
1149 * Check for the 0x110000 limit too
1150 */
1151 const unsigned char *cur = ctxt->input->cur;
1152 unsigned char c;
1153
1154 c = *cur;
1155 if (c & 0x80) {
1156 if (cur[1] == 0)
1157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1158 if ((cur[1] & 0xc0) != 0x80)
1159 goto encoding_error;
1160 if ((c & 0xe0) == 0xe0) {
1161 unsigned int val;
1162
1163 if (cur[2] == 0)
1164 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1165 if ((cur[2] & 0xc0) != 0x80)
1166 goto encoding_error;
1167 if ((c & 0xf0) == 0xf0) {
1168 if (cur[3] == 0)
1169 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1170 if (((c & 0xf8) != 0xf0) ||
1171 ((cur[3] & 0xc0) != 0x80))
1172 goto encoding_error;
1173 /* 4-byte code */
1174 ctxt->input->cur += 4;
1175 val = (cur[0] & 0x7) << 18;
1176 val |= (cur[1] & 0x3f) << 12;
1177 val |= (cur[2] & 0x3f) << 6;
1178 val |= cur[3] & 0x3f;
1179 } else {
1180 /* 3-byte code */
1181 ctxt->input->cur += 3;
1182 val = (cur[0] & 0xf) << 12;
1183 val |= (cur[1] & 0x3f) << 6;
1184 val |= cur[2] & 0x3f;
1185 }
1186 if (((val > 0xd7ff) && (val < 0xe000)) ||
1187 ((val > 0xfffd) && (val < 0x10000)) ||
1188 (val >= 0x110000)) {
1189 if ((ctxt->sax != NULL) &&
1190 (ctxt->sax->error != NULL))
1191 ctxt->sax->error(ctxt->userData,
1192 "Char 0x%X out of allowed range\n", val);
1193 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1194 ctxt->wellFormed = 0;
1195 ctxt->disableSAX = 1;
1196 }
1197 } else
1198 /* 2-byte code */
1199 ctxt->input->cur += 2;
1200 } else
1201 /* 1-byte code */
1202 ctxt->input->cur++;
1203 } else {
1204 /*
1205 * Assume it's a fixed lenght encoding (1) with
1206 * a compatibke encoding for the ASCII set, since
1207 * XML constructs only use < 128 chars
1208 */
1209 ctxt->input->cur++;
1210 }
1211 ctxt->nbChars++;
1212 if (*ctxt->input->cur == 0)
1213 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1214 }
1215 } else {
1216 ctxt->input->cur++;
1217 ctxt->nbChars++;
1218 if (*ctxt->input->cur == 0)
1219 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1220 }
1221 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1222 xmlParserHandlePEReference(ctxt);
1223 if ((*ctxt->input->cur == 0) &&
1224 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1225 xmlPopInput(ctxt);
1226 return;
1227encoding_error:
1228 /*
1229 * If we detect an UTF8 error that probably mean that the
1230 * input encoding didn't get properly advertized in the
1231 * declaration header. Report the error and switch the encoding
1232 * to ISO-Latin-1 (if you don't like this policy, just declare the
1233 * encoding !)
1234 */
1235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1236 ctxt->sax->error(ctxt->userData,
1237 "Input is not proper UTF-8, indicate encoding !\n");
1238 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1239 ctxt->input->cur[0], ctxt->input->cur[1],
1240 ctxt->input->cur[2], ctxt->input->cur[3]);
1241 }
1242 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1243
1244 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1245 ctxt->input->cur++;
1246 return;
1247}
1248
1249/**
1250 * xmlCurrentChar:
1251 * @ctxt: the XML parser context
1252 * @len: pointer to the length of the char read
1253 *
1254 * The current char value, if using UTF-8 this may actaully span multiple
1255 * bytes in the input buffer. Implement the end of line normalization:
1256 * 2.11 End-of-Line Handling
1257 * Wherever an external parsed entity or the literal entity value
1258 * of an internal parsed entity contains either the literal two-character
1259 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1260 * must pass to the application the single character #xA.
1261 * This behavior can conveniently be produced by normalizing all
1262 * line breaks to #xA on input, before parsing.)
1263 *
1264 * Returns the current char value and its lenght
1265 */
1266
1267int
1268xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1269 if (ctxt->instate == XML_PARSER_EOF)
1270 return(0);
1271
1272 if (ctxt->token != 0) {
1273 *len = 0;
1274 return(ctxt->token);
1275 }
1276 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1277 *len = 1;
1278 return((int) *ctxt->input->cur);
1279 }
1280 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1281 /*
1282 * We are supposed to handle UTF8, check it's valid
1283 * From rfc2044: encoding of the Unicode values on UTF-8:
1284 *
1285 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1286 * 0000 0000-0000 007F 0xxxxxxx
1287 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1288 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1289 *
1290 * Check for the 0x110000 limit too
1291 */
1292 const unsigned char *cur = ctxt->input->cur;
1293 unsigned char c;
1294 unsigned int val;
1295
1296 c = *cur;
1297 if (c & 0x80) {
1298 if (cur[1] == 0)
1299 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1300 if ((cur[1] & 0xc0) != 0x80)
1301 goto encoding_error;
1302 if ((c & 0xe0) == 0xe0) {
1303
1304 if (cur[2] == 0)
1305 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1306 if ((cur[2] & 0xc0) != 0x80)
1307 goto encoding_error;
1308 if ((c & 0xf0) == 0xf0) {
1309 if (cur[3] == 0)
1310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1311 if (((c & 0xf8) != 0xf0) ||
1312 ((cur[3] & 0xc0) != 0x80))
1313 goto encoding_error;
1314 /* 4-byte code */
1315 *len = 4;
1316 val = (cur[0] & 0x7) << 18;
1317 val |= (cur[1] & 0x3f) << 12;
1318 val |= (cur[2] & 0x3f) << 6;
1319 val |= cur[3] & 0x3f;
1320 } else {
1321 /* 3-byte code */
1322 *len = 3;
1323 val = (cur[0] & 0xf) << 12;
1324 val |= (cur[1] & 0x3f) << 6;
1325 val |= cur[2] & 0x3f;
1326 }
1327 } else {
1328 /* 2-byte code */
1329 *len = 2;
1330 val = (cur[0] & 0x1f) << 6;
1331 val |= cur[1] & 0x3f;
1332 }
1333 if (!IS_CHAR(val)) {
1334 if ((ctxt->sax != NULL) &&
1335 (ctxt->sax->error != NULL))
1336 ctxt->sax->error(ctxt->userData,
1337 "Char 0x%X out of allowed range\n", val);
1338 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1339 ctxt->wellFormed = 0;
1340 ctxt->disableSAX = 1;
1341 }
1342 return(val);
1343 } else {
1344 /* 1-byte code */
1345 *len = 1;
1346 if (*ctxt->input->cur == 0xD) {
1347 if (ctxt->input->cur[1] == 0xA) {
1348 ctxt->nbChars++;
1349 ctxt->input->cur++;
1350 }
1351 return(0xA);
1352 }
1353 return((int) *ctxt->input->cur);
1354 }
1355 }
1356 /*
1357 * Assume it's a fixed lenght encoding (1) with
1358 * a compatibke encoding for the ASCII set, since
1359 * XML constructs only use < 128 chars
1360 */
1361 *len = 1;
1362 if (*ctxt->input->cur == 0xD) {
1363 if (ctxt->input->cur[1] == 0xA) {
1364 ctxt->nbChars++;
1365 ctxt->input->cur++;
1366 }
1367 return(0xA);
1368 }
1369 return((int) *ctxt->input->cur);
1370encoding_error:
1371 /*
1372 * If we detect an UTF8 error that probably mean that the
1373 * input encoding didn't get properly advertized in the
1374 * declaration header. Report the error and switch the encoding
1375 * to ISO-Latin-1 (if you don't like this policy, just declare the
1376 * encoding !)
1377 */
1378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1379 ctxt->sax->error(ctxt->userData,
1380 "Input is not proper UTF-8, indicate encoding !\n");
1381 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1382 ctxt->input->cur[0], ctxt->input->cur[1],
1383 ctxt->input->cur[2], ctxt->input->cur[3]);
1384 }
1385 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1386
1387 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1388 *len = 1;
1389 return((int) *ctxt->input->cur);
1390}
1391
1392/**
1393 * xmlStringCurrentChar:
1394 * @ctxt: the XML parser context
1395 * @cur: pointer to the beginning of the char
1396 * @len: pointer to the length of the char read
1397 *
1398 * The current char value, if using UTF-8 this may actaully span multiple
1399 * bytes in the input buffer.
1400 *
1401 * Returns the current char value and its lenght
1402 */
1403
1404int
1405xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1406 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1407 /*
1408 * We are supposed to handle UTF8, check it's valid
1409 * From rfc2044: encoding of the Unicode values on UTF-8:
1410 *
1411 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1412 * 0000 0000-0000 007F 0xxxxxxx
1413 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1414 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1415 *
1416 * Check for the 0x110000 limit too
1417 */
1418 unsigned char c;
1419 unsigned int val;
1420
1421 c = *cur;
1422 if (c & 0x80) {
1423 if ((cur[1] & 0xc0) != 0x80)
1424 goto encoding_error;
1425 if ((c & 0xe0) == 0xe0) {
1426
1427 if ((cur[2] & 0xc0) != 0x80)
1428 goto encoding_error;
1429 if ((c & 0xf0) == 0xf0) {
1430 if (((c & 0xf8) != 0xf0) ||
1431 ((cur[3] & 0xc0) != 0x80))
1432 goto encoding_error;
1433 /* 4-byte code */
1434 *len = 4;
1435 val = (cur[0] & 0x7) << 18;
1436 val |= (cur[1] & 0x3f) << 12;
1437 val |= (cur[2] & 0x3f) << 6;
1438 val |= cur[3] & 0x3f;
1439 } else {
1440 /* 3-byte code */
1441 *len = 3;
1442 val = (cur[0] & 0xf) << 12;
1443 val |= (cur[1] & 0x3f) << 6;
1444 val |= cur[2] & 0x3f;
1445 }
1446 } else {
1447 /* 2-byte code */
1448 *len = 2;
1449 val = (cur[0] & 0x1f) << 6;
1450 val |= cur[2] & 0x3f;
1451 }
1452 if (!IS_CHAR(val)) {
1453 if ((ctxt->sax != NULL) &&
1454 (ctxt->sax->error != NULL))
1455 ctxt->sax->error(ctxt->userData,
1456 "Char 0x%X out of allowed range\n", val);
1457 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1458 ctxt->wellFormed = 0;
1459 ctxt->disableSAX = 1;
1460 }
1461 return(val);
1462 } else {
1463 /* 1-byte code */
1464 *len = 1;
1465 return((int) *cur);
1466 }
1467 }
1468 /*
1469 * Assume it's a fixed lenght encoding (1) with
1470 * a compatibke encoding for the ASCII set, since
1471 * XML constructs only use < 128 chars
1472 */
1473 *len = 1;
1474 return((int) *cur);
1475encoding_error:
1476 /*
1477 * If we detect an UTF8 error that probably mean that the
1478 * input encoding didn't get properly advertized in the
1479 * declaration header. Report the error and switch the encoding
1480 * to ISO-Latin-1 (if you don't like this policy, just declare the
1481 * encoding !)
1482 */
1483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1484 ctxt->sax->error(ctxt->userData,
1485 "Input is not proper UTF-8, indicate encoding !\n");
1486 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1487 ctxt->input->cur[0], ctxt->input->cur[1],
1488 ctxt->input->cur[2], ctxt->input->cur[3]);
1489 }
1490 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1491
1492 *len = 1;
1493 return((int) *cur);
1494}
1495
1496/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001497 * xmlCopyCharMultiByte:
1498 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001499 * @val: the char value
1500 *
1501 * append the char value in the array
1502 *
1503 * Returns the number of xmlChar written
1504 */
Owen Taylor3473f882001-02-23 17:55:21 +00001505int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001506xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001507 /*
1508 * We are supposed to handle UTF8, check it's valid
1509 * From rfc2044: encoding of the Unicode values on UTF-8:
1510 *
1511 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1512 * 0000 0000-0000 007F 0xxxxxxx
1513 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1514 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1515 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001516 if (val >= 0x80) {
1517 xmlChar *savedout = out;
1518 int bits;
1519 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1520 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1521 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1522 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001523 xmlGenericError(xmlGenericErrorContext,
1524 "Internal error, xmlCopyChar 0x%X out of bound\n",
1525 val);
1526 return(0);
1527 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 for ( ; bits >= 0; bits-= 6)
1529 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1530 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 }
1532 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001533 return 1;
1534}
1535
1536/**
1537 * xmlCopyChar:
1538 * @len: Ignored, compatibility
1539 * @out: pointer to an arry of xmlChar
1540 * @val: the char value
1541 *
1542 * append the char value in the array
1543 *
1544 * Returns the number of xmlChar written
1545 */
1546
1547int
1548xmlCopyChar(int len UNUSED, xmlChar *out, int val) {
1549 /* the len parameter is ignored */
1550 if (val >= 0x80) {
1551 return(xmlCopyCharMultiByte (out, val));
1552 }
1553 *out = (xmlChar) val;
1554 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001555}
1556
1557/************************************************************************
1558 * *
1559 * Commodity functions to switch encodings *
1560 * *
1561 ************************************************************************/
1562
1563/**
1564 * xmlSwitchEncoding:
1565 * @ctxt: the parser context
1566 * @enc: the encoding value (number)
1567 *
1568 * change the input functions when discovering the character encoding
1569 * of a given entity.
1570 *
1571 * Returns 0 in case of success, -1 otherwise
1572 */
1573int
1574xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1575{
1576 xmlCharEncodingHandlerPtr handler;
1577
1578 switch (enc) {
1579 case XML_CHAR_ENCODING_ERROR:
1580 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1582 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1583 ctxt->wellFormed = 0;
1584 ctxt->disableSAX = 1;
1585 break;
1586 case XML_CHAR_ENCODING_NONE:
1587 /* let's assume it's UTF-8 without the XML decl */
1588 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1589 return(0);
1590 case XML_CHAR_ENCODING_UTF8:
1591 /* default encoding, no conversion should be needed */
1592 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1593 return(0);
1594 default:
1595 break;
1596 }
1597 handler = xmlGetCharEncodingHandler(enc);
1598 if (handler == NULL) {
1599 /*
1600 * Default handlers.
1601 */
1602 switch (enc) {
1603 case XML_CHAR_ENCODING_ERROR:
1604 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1606 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1607 ctxt->wellFormed = 0;
1608 ctxt->disableSAX = 1;
1609 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1610 break;
1611 case XML_CHAR_ENCODING_NONE:
1612 /* let's assume it's UTF-8 without the XML decl */
1613 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1614 return(0);
1615 case XML_CHAR_ENCODING_UTF8:
1616 case XML_CHAR_ENCODING_ASCII:
1617 /* default encoding, no conversion should be needed */
1618 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1619 return(0);
1620 case XML_CHAR_ENCODING_UTF16LE:
1621 break;
1622 case XML_CHAR_ENCODING_UTF16BE:
1623 break;
1624 case XML_CHAR_ENCODING_UCS4LE:
1625 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1627 ctxt->sax->error(ctxt->userData,
1628 "char encoding USC4 little endian not supported\n");
1629 break;
1630 case XML_CHAR_ENCODING_UCS4BE:
1631 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1633 ctxt->sax->error(ctxt->userData,
1634 "char encoding USC4 big endian not supported\n");
1635 break;
1636 case XML_CHAR_ENCODING_EBCDIC:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding EBCDIC not supported\n");
1641 break;
1642 case XML_CHAR_ENCODING_UCS4_2143:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding UCS4 2143 not supported\n");
1647 break;
1648 case XML_CHAR_ENCODING_UCS4_3412:
1649 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651 ctxt->sax->error(ctxt->userData,
1652 "char encoding UCS4 3412 not supported\n");
1653 break;
1654 case XML_CHAR_ENCODING_UCS2:
1655 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "char encoding UCS2 not supported\n");
1659 break;
1660 case XML_CHAR_ENCODING_8859_1:
1661 case XML_CHAR_ENCODING_8859_2:
1662 case XML_CHAR_ENCODING_8859_3:
1663 case XML_CHAR_ENCODING_8859_4:
1664 case XML_CHAR_ENCODING_8859_5:
1665 case XML_CHAR_ENCODING_8859_6:
1666 case XML_CHAR_ENCODING_8859_7:
1667 case XML_CHAR_ENCODING_8859_8:
1668 case XML_CHAR_ENCODING_8859_9:
1669 /*
1670 * We used to keep the internal content in the
1671 * document encoding however this turns being unmaintainable
1672 * So xmlGetCharEncodingHandler() will return non-null
1673 * values for this now.
1674 */
1675 if ((ctxt->inputNr == 1) &&
1676 (ctxt->encoding == NULL) &&
1677 (ctxt->input->encoding != NULL)) {
1678 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1679 }
1680 ctxt->charset = enc;
1681 return(0);
1682 case XML_CHAR_ENCODING_2022_JP:
1683 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1685 ctxt->sax->error(ctxt->userData,
1686 "char encoding ISO-2022-JPnot supported\n");
1687 break;
1688 case XML_CHAR_ENCODING_SHIFT_JIS:
1689 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1691 ctxt->sax->error(ctxt->userData,
1692 "char encoding Shift_JIS not supported\n");
1693 break;
1694 case XML_CHAR_ENCODING_EUC_JP:
1695 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697 ctxt->sax->error(ctxt->userData,
1698 "char encoding EUC-JPnot supported\n");
1699 break;
1700 }
1701 }
1702 if (handler == NULL)
1703 return(-1);
1704 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1705 return(xmlSwitchToEncoding(ctxt, handler));
1706}
1707
1708/**
1709 * xmlSwitchToEncoding:
1710 * @ctxt: the parser context
1711 * @handler: the encoding handler
1712 *
1713 * change the input functions when discovering the character encoding
1714 * of a given entity.
1715 *
1716 * Returns 0 in case of success, -1 otherwise
1717 */
1718int
1719xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1720{
1721 int nbchars;
1722
1723 if (handler != NULL) {
1724 if (ctxt->input != NULL) {
1725 if (ctxt->input->buf != NULL) {
1726 if (ctxt->input->buf->encoder != NULL) {
1727 if (ctxt->input->buf->encoder == handler)
1728 return(0);
1729 /*
1730 * Note: this is a bit dangerous, but that's what it
1731 * takes to use nearly compatible signature for different
1732 * encodings.
1733 */
1734 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1735 ctxt->input->buf->encoder = handler;
1736 return(0);
1737 }
1738 ctxt->input->buf->encoder = handler;
1739
1740 /*
1741 * Is there already some content down the pipe to convert ?
1742 */
1743 if ((ctxt->input->buf->buffer != NULL) &&
1744 (ctxt->input->buf->buffer->use > 0)) {
1745 int processed;
1746
1747 /*
1748 * Specific handling of the Byte Order Mark for
1749 * UTF-16
1750 */
1751 if ((handler->name != NULL) &&
1752 (!strcmp(handler->name, "UTF-16LE")) &&
1753 (ctxt->input->cur[0] == 0xFF) &&
1754 (ctxt->input->cur[1] == 0xFE)) {
1755 ctxt->input->cur += 2;
1756 }
1757 if ((handler->name != NULL) &&
1758 (!strcmp(handler->name, "UTF-16BE")) &&
1759 (ctxt->input->cur[0] == 0xFE) &&
1760 (ctxt->input->cur[1] == 0xFF)) {
1761 ctxt->input->cur += 2;
1762 }
1763
1764 /*
1765 * Shring the current input buffer.
1766 * Move it as the raw buffer and create a new input buffer
1767 */
1768 processed = ctxt->input->cur - ctxt->input->base;
1769 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1770 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1771 ctxt->input->buf->buffer = xmlBufferCreate();
1772
1773 if (ctxt->html) {
1774 /*
1775 * converst as much as possbile of the buffer
1776 */
1777 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1778 ctxt->input->buf->buffer,
1779 ctxt->input->buf->raw);
1780 } else {
1781 /*
1782 * convert just enough to get
1783 * '<?xml version="1.0" encoding="xxx"?>'
1784 * parsed with the autodetected encoding
1785 * into the parser reading buffer.
1786 */
1787 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1788 ctxt->input->buf->buffer,
1789 ctxt->input->buf->raw);
1790 }
1791 if (nbchars < 0) {
1792 xmlGenericError(xmlGenericErrorContext,
1793 "xmlSwitchToEncoding: encoder error\n");
1794 return(-1);
1795 }
1796 ctxt->input->base =
1797 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001798 ctxt->input->end =
1799 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001800
1801 }
1802 return(0);
1803 } else {
1804 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1805 /*
1806 * When parsing a static memory array one must know the
1807 * size to be able to convert the buffer.
1808 */
1809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1810 ctxt->sax->error(ctxt->userData,
1811 "xmlSwitchEncoding : no input\n");
1812 return(-1);
1813 } else {
1814 int processed;
1815
1816 /*
1817 * Shring the current input buffer.
1818 * Move it as the raw buffer and create a new input buffer
1819 */
1820 processed = ctxt->input->cur - ctxt->input->base;
1821
1822 ctxt->input->buf->raw = xmlBufferCreate();
1823 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1824 ctxt->input->length - processed);
1825 ctxt->input->buf->buffer = xmlBufferCreate();
1826
1827 /*
1828 * convert as much as possible of the raw input
1829 * to the parser reading buffer.
1830 */
1831 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1832 ctxt->input->buf->buffer,
1833 ctxt->input->buf->raw);
1834 if (nbchars < 0) {
1835 xmlGenericError(xmlGenericErrorContext,
1836 "xmlSwitchToEncoding: encoder error\n");
1837 return(-1);
1838 }
1839
1840 /*
1841 * Conversion succeeded, get rid of the old buffer
1842 */
1843 if ((ctxt->input->free != NULL) &&
1844 (ctxt->input->base != NULL))
1845 ctxt->input->free((xmlChar *) ctxt->input->base);
1846 ctxt->input->base =
1847 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001848 ctxt->input->end =
1849 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001850 }
1851 }
1852 } else {
1853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1854 ctxt->sax->error(ctxt->userData,
1855 "xmlSwitchEncoding : no input\n");
1856 return(-1);
1857 }
1858 /*
1859 * The parsing is now done in UTF8 natively
1860 */
1861 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1862 } else
1863 return(-1);
1864 return(0);
1865
1866}
1867
1868/************************************************************************
1869 * *
1870 * Commodity functions to handle entities processing *
1871 * *
1872 ************************************************************************/
1873
1874/**
1875 * xmlFreeInputStream:
1876 * @input: an xmlParserInputPtr
1877 *
1878 * Free up an input stream.
1879 */
1880void
1881xmlFreeInputStream(xmlParserInputPtr input) {
1882 if (input == NULL) return;
1883
1884 if (input->filename != NULL) xmlFree((char *) input->filename);
1885 if (input->directory != NULL) xmlFree((char *) input->directory);
1886 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1887 if (input->version != NULL) xmlFree((char *) input->version);
1888 if ((input->free != NULL) && (input->base != NULL))
1889 input->free((xmlChar *) input->base);
1890 if (input->buf != NULL)
1891 xmlFreeParserInputBuffer(input->buf);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001892 MEM_CLEANUP(input, sizeof(xmlParserInput));
Owen Taylor3473f882001-02-23 17:55:21 +00001893 xmlFree(input);
1894}
1895
1896/**
1897 * xmlNewInputStream:
1898 * @ctxt: an XML parser context
1899 *
1900 * Create a new input stream structure
1901 * Returns the new input stream or NULL
1902 */
1903xmlParserInputPtr
1904xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1905 xmlParserInputPtr input;
1906
1907 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1908 if (input == NULL) {
1909 if (ctxt != NULL) {
1910 ctxt->errNo = XML_ERR_NO_MEMORY;
1911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1912 ctxt->sax->error(ctxt->userData,
1913 "malloc: couldn't allocate a new input stream\n");
1914 ctxt->errNo = XML_ERR_NO_MEMORY;
1915 }
1916 return(NULL);
1917 }
1918 memset(input, 0, sizeof(xmlParserInput));
1919 input->line = 1;
1920 input->col = 1;
1921 input->standalone = -1;
1922 return(input);
1923}
1924
1925/**
1926 * xmlNewIOInputStream:
1927 * @ctxt: an XML parser context
1928 * @input: an I/O Input
1929 * @enc: the charset encoding if known
1930 *
1931 * Create a new input stream structure encapsulating the @input into
1932 * a stream suitable for the parser.
1933 *
1934 * Returns the new input stream or NULL
1935 */
1936xmlParserInputPtr
1937xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1938 xmlCharEncoding enc) {
1939 xmlParserInputPtr inputStream;
1940
1941 if (xmlParserDebugEntities)
1942 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1943 inputStream = xmlNewInputStream(ctxt);
1944 if (inputStream == NULL) {
1945 return(NULL);
1946 }
1947 inputStream->filename = NULL;
1948 inputStream->buf = input;
1949 inputStream->base = inputStream->buf->buffer->content;
1950 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001951 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001952 if (enc != XML_CHAR_ENCODING_NONE) {
1953 xmlSwitchEncoding(ctxt, enc);
1954 }
1955
1956 return(inputStream);
1957}
1958
1959/**
1960 * xmlNewEntityInputStream:
1961 * @ctxt: an XML parser context
1962 * @entity: an Entity pointer
1963 *
1964 * Create a new input stream based on an xmlEntityPtr
1965 *
1966 * Returns the new input stream or NULL
1967 */
1968xmlParserInputPtr
1969xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1970 xmlParserInputPtr input;
1971
1972 if (entity == NULL) {
1973 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975 ctxt->sax->error(ctxt->userData,
1976 "internal: xmlNewEntityInputStream entity = NULL\n");
1977 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1978 return(NULL);
1979 }
1980 if (xmlParserDebugEntities)
1981 xmlGenericError(xmlGenericErrorContext,
1982 "new input from entity: %s\n", entity->name);
1983 if (entity->content == NULL) {
1984 switch (entity->etype) {
1985 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1986 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1988 ctxt->sax->error(ctxt->userData,
1989 "xmlNewEntityInputStream unparsed entity !\n");
1990 break;
1991 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1992 case XML_EXTERNAL_PARAMETER_ENTITY:
1993 return(xmlLoadExternalEntity((char *) entity->URI,
1994 (char *) entity->ExternalID, ctxt));
1995 case XML_INTERNAL_GENERAL_ENTITY:
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "Internal entity %s without content !\n", entity->name);
1999 break;
2000 case XML_INTERNAL_PARAMETER_ENTITY:
2001 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2003 ctxt->sax->error(ctxt->userData,
2004 "Internal parameter entity %s without content !\n", entity->name);
2005 break;
2006 case XML_INTERNAL_PREDEFINED_ENTITY:
2007 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2009 ctxt->sax->error(ctxt->userData,
2010 "Predefined entity %s without content !\n", entity->name);
2011 break;
2012 }
2013 return(NULL);
2014 }
2015 input = xmlNewInputStream(ctxt);
2016 if (input == NULL) {
2017 return(NULL);
2018 }
2019 input->filename = (char *) entity->URI;
2020 input->base = entity->content;
2021 input->cur = entity->content;
2022 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002023 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002024 return(input);
2025}
2026
2027/**
2028 * xmlNewStringInputStream:
2029 * @ctxt: an XML parser context
2030 * @buffer: an memory buffer
2031 *
2032 * Create a new input stream based on a memory buffer.
2033 * Returns the new input stream
2034 */
2035xmlParserInputPtr
2036xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2037 xmlParserInputPtr input;
2038
2039 if (buffer == NULL) {
2040 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2042 ctxt->sax->error(ctxt->userData,
2043 "internal: xmlNewStringInputStream string = NULL\n");
2044 return(NULL);
2045 }
2046 if (xmlParserDebugEntities)
2047 xmlGenericError(xmlGenericErrorContext,
2048 "new fixed input: %.30s\n", buffer);
2049 input = xmlNewInputStream(ctxt);
2050 if (input == NULL) {
2051 return(NULL);
2052 }
2053 input->base = buffer;
2054 input->cur = buffer;
2055 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002056 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002057 return(input);
2058}
2059
2060/**
2061 * xmlNewInputFromFile:
2062 * @ctxt: an XML parser context
2063 * @filename: the filename to use as entity
2064 *
2065 * Create a new input stream based on a file.
2066 *
2067 * Returns the new input stream or NULL in case of error
2068 */
2069xmlParserInputPtr
2070xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2071 xmlParserInputBufferPtr buf;
2072 xmlParserInputPtr inputStream;
2073 char *directory = NULL;
2074 xmlChar *URI = NULL;
2075
2076 if (xmlParserDebugEntities)
2077 xmlGenericError(xmlGenericErrorContext,
2078 "new input from file: %s\n", filename);
2079 if (ctxt == NULL) return(NULL);
2080 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2081 if (buf == NULL)
2082 return(NULL);
2083
2084 URI = xmlStrdup((xmlChar *) filename);
2085 directory = xmlParserGetDirectory((const char *) URI);
2086
2087 inputStream = xmlNewInputStream(ctxt);
2088 if (inputStream == NULL) {
2089 if (directory != NULL) xmlFree((char *) directory);
2090 if (URI != NULL) xmlFree((char *) URI);
2091 return(NULL);
2092 }
2093
2094 inputStream->filename = (const char *) URI;
2095 inputStream->directory = directory;
2096 inputStream->buf = buf;
2097
2098 inputStream->base = inputStream->buf->buffer->content;
2099 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002100 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002101 if ((ctxt->directory == NULL) && (directory != NULL))
2102 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2103 return(inputStream);
2104}
2105
2106/************************************************************************
2107 * *
2108 * Commodity functions to handle parser contexts *
2109 * *
2110 ************************************************************************/
2111
2112/**
2113 * xmlInitParserCtxt:
2114 * @ctxt: an XML parser context
2115 *
2116 * Initialize a parser context
2117 */
2118
2119void
2120xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2121{
2122 xmlSAXHandler *sax;
2123
2124 xmlDefaultSAXHandlerInit();
2125
2126 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2127 if (sax == NULL) {
2128 xmlGenericError(xmlGenericErrorContext,
2129 "xmlInitParserCtxt: out of memory\n");
2130 }
2131 else
2132 memset(sax, 0, sizeof(xmlSAXHandler));
2133
2134 /* Allocate the Input stack */
2135 ctxt->inputTab = (xmlParserInputPtr *)
2136 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2137 if (ctxt->inputTab == NULL) {
2138 xmlGenericError(xmlGenericErrorContext,
2139 "xmlInitParserCtxt: out of memory\n");
2140 ctxt->inputNr = 0;
2141 ctxt->inputMax = 0;
2142 ctxt->input = NULL;
2143 return;
2144 }
2145 ctxt->inputNr = 0;
2146 ctxt->inputMax = 5;
2147 ctxt->input = NULL;
2148
2149 ctxt->version = NULL;
2150 ctxt->encoding = NULL;
2151 ctxt->standalone = -1;
2152 ctxt->hasExternalSubset = 0;
2153 ctxt->hasPErefs = 0;
2154 ctxt->html = 0;
2155 ctxt->external = 0;
2156 ctxt->instate = XML_PARSER_START;
2157 ctxt->token = 0;
2158 ctxt->directory = NULL;
2159
2160 /* Allocate the Node stack */
2161 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2162 if (ctxt->nodeTab == NULL) {
2163 xmlGenericError(xmlGenericErrorContext,
2164 "xmlInitParserCtxt: out of memory\n");
2165 ctxt->nodeNr = 0;
2166 ctxt->nodeMax = 0;
2167 ctxt->node = NULL;
2168 ctxt->inputNr = 0;
2169 ctxt->inputMax = 0;
2170 ctxt->input = NULL;
2171 return;
2172 }
2173 ctxt->nodeNr = 0;
2174 ctxt->nodeMax = 10;
2175 ctxt->node = NULL;
2176
2177 /* Allocate the Name stack */
2178 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2179 if (ctxt->nameTab == NULL) {
2180 xmlGenericError(xmlGenericErrorContext,
2181 "xmlInitParserCtxt: out of memory\n");
2182 ctxt->nodeNr = 0;
2183 ctxt->nodeMax = 0;
2184 ctxt->node = NULL;
2185 ctxt->inputNr = 0;
2186 ctxt->inputMax = 0;
2187 ctxt->input = NULL;
2188 ctxt->nameNr = 0;
2189 ctxt->nameMax = 0;
2190 ctxt->name = NULL;
2191 return;
2192 }
2193 ctxt->nameNr = 0;
2194 ctxt->nameMax = 10;
2195 ctxt->name = NULL;
2196
2197 /* Allocate the space stack */
2198 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2199 if (ctxt->spaceTab == NULL) {
2200 xmlGenericError(xmlGenericErrorContext,
2201 "xmlInitParserCtxt: out of memory\n");
2202 ctxt->nodeNr = 0;
2203 ctxt->nodeMax = 0;
2204 ctxt->node = NULL;
2205 ctxt->inputNr = 0;
2206 ctxt->inputMax = 0;
2207 ctxt->input = NULL;
2208 ctxt->nameNr = 0;
2209 ctxt->nameMax = 0;
2210 ctxt->name = NULL;
2211 ctxt->spaceNr = 0;
2212 ctxt->spaceMax = 0;
2213 ctxt->space = NULL;
2214 return;
2215 }
2216 ctxt->spaceNr = 1;
2217 ctxt->spaceMax = 10;
2218 ctxt->spaceTab[0] = -1;
2219 ctxt->space = &ctxt->spaceTab[0];
2220
Daniel Veillard14be0a12001-03-03 18:50:55 +00002221 ctxt->sax = sax;
2222 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2223
Owen Taylor3473f882001-02-23 17:55:21 +00002224 ctxt->userData = ctxt;
2225 ctxt->myDoc = NULL;
2226 ctxt->wellFormed = 1;
2227 ctxt->valid = 1;
2228 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2229 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2230 ctxt->pedantic = xmlPedanticParserDefaultValue;
2231 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2232 ctxt->vctxt.userData = ctxt;
2233 if (ctxt->validate) {
2234 ctxt->vctxt.error = xmlParserValidityError;
2235 if (xmlGetWarningsDefaultValue == 0)
2236 ctxt->vctxt.warning = NULL;
2237 else
2238 ctxt->vctxt.warning = xmlParserValidityWarning;
2239 /* Allocate the Node stack */
2240 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2241 if (ctxt->vctxt.nodeTab == NULL) {
2242 xmlGenericError(xmlGenericErrorContext,
2243 "xmlInitParserCtxt: out of memory\n");
2244 ctxt->vctxt.nodeMax = 0;
2245 ctxt->validate = 0;
2246 ctxt->vctxt.error = NULL;
2247 ctxt->vctxt.warning = NULL;
2248 } else {
2249 ctxt->vctxt.nodeNr = 0;
2250 ctxt->vctxt.nodeMax = 4;
2251 ctxt->vctxt.node = NULL;
2252 }
2253 } else {
2254 ctxt->vctxt.error = NULL;
2255 ctxt->vctxt.warning = NULL;
2256 }
2257 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2258 ctxt->record_info = 0;
2259 ctxt->nbChars = 0;
2260 ctxt->checkIndex = 0;
2261 ctxt->inSubset = 0;
2262 ctxt->errNo = XML_ERR_OK;
2263 ctxt->depth = 0;
2264 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2265 xmlInitNodeInfoSeq(&ctxt->node_seq);
2266}
2267
2268/**
2269 * xmlFreeParserCtxt:
2270 * @ctxt: an XML parser context
2271 *
2272 * Free all the memory used by a parser context. However the parsed
2273 * document in ctxt->myDoc is not freed.
2274 */
2275
2276void
2277xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2278{
2279 xmlParserInputPtr input;
2280 xmlChar *oldname;
2281
2282 if (ctxt == NULL) return;
2283
2284 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2285 xmlFreeInputStream(input);
2286 }
2287 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2288 xmlFree(oldname);
2289 }
2290 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2291 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2292 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2293 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2294 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2295 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2296 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2297 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2298 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2299 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2300 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2301 xmlFree(ctxt->sax);
2302 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2303 xmlFree(ctxt);
2304}
2305
2306/**
2307 * xmlNewParserCtxt:
2308 *
2309 * Allocate and initialize a new parser context.
2310 *
2311 * Returns the xmlParserCtxtPtr or NULL
2312 */
2313
2314xmlParserCtxtPtr
2315xmlNewParserCtxt()
2316{
2317 xmlParserCtxtPtr ctxt;
2318
2319 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2320 if (ctxt == NULL) {
2321 xmlGenericError(xmlGenericErrorContext,
2322 "xmlNewParserCtxt : cannot allocate context\n");
2323 perror("malloc");
2324 return(NULL);
2325 }
2326 memset(ctxt, 0, sizeof(xmlParserCtxt));
2327 xmlInitParserCtxt(ctxt);
2328 return(ctxt);
2329}
2330
2331/************************************************************************
2332 * *
2333 * Handling of node informations *
2334 * *
2335 ************************************************************************/
2336
2337/**
2338 * xmlClearParserCtxt:
2339 * @ctxt: an XML parser context
2340 *
2341 * Clear (release owned resources) and reinitialize a parser context
2342 */
2343
2344void
2345xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2346{
2347 xmlClearNodeInfoSeq(&ctxt->node_seq);
2348 xmlInitParserCtxt(ctxt);
2349}
2350
2351/**
2352 * xmlParserFindNodeInfo:
2353 * @ctxt: an XML parser context
2354 * @node: an XML node within the tree
2355 *
2356 * Find the parser node info struct for a given node
2357 *
2358 * Returns an xmlParserNodeInfo block pointer or NULL
2359 */
2360const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2361 const xmlNode* node)
2362{
2363 unsigned long pos;
2364
2365 /* Find position where node should be at */
2366 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2367 if ( ctx->node_seq.buffer[pos].node == node )
2368 return &ctx->node_seq.buffer[pos];
2369 else
2370 return NULL;
2371}
2372
2373
2374/**
2375 * xmlInitNodeInfoSeq:
2376 * @seq: a node info sequence pointer
2377 *
2378 * -- Initialize (set to initial state) node info sequence
2379 */
2380void
2381xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2382{
2383 seq->length = 0;
2384 seq->maximum = 0;
2385 seq->buffer = NULL;
2386}
2387
2388/**
2389 * xmlClearNodeInfoSeq:
2390 * @seq: a node info sequence pointer
2391 *
2392 * -- Clear (release memory and reinitialize) node
2393 * info sequence
2394 */
2395void
2396xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2397{
2398 if ( seq->buffer != NULL )
2399 xmlFree(seq->buffer);
2400 xmlInitNodeInfoSeq(seq);
2401}
2402
2403
2404/**
2405 * xmlParserFindNodeInfoIndex:
2406 * @seq: a node info sequence pointer
2407 * @node: an XML node pointer
2408 *
2409 *
2410 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2411 * the given node is or should be at in a sorted sequence
2412 *
2413 * Returns a long indicating the position of the record
2414 */
2415unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2416 const xmlNode* node)
2417{
2418 unsigned long upper, lower, middle;
2419 int found = 0;
2420
2421 /* Do a binary search for the key */
2422 lower = 1;
2423 upper = seq->length;
2424 middle = 0;
2425 while ( lower <= upper && !found) {
2426 middle = lower + (upper - lower) / 2;
2427 if ( node == seq->buffer[middle - 1].node )
2428 found = 1;
2429 else if ( node < seq->buffer[middle - 1].node )
2430 upper = middle - 1;
2431 else
2432 lower = middle + 1;
2433 }
2434
2435 /* Return position */
2436 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2437 return middle;
2438 else
2439 return middle - 1;
2440}
2441
2442
2443/**
2444 * xmlParserAddNodeInfo:
2445 * @ctxt: an XML parser context
2446 * @info: a node info sequence pointer
2447 *
2448 * Insert node info record into the sorted sequence
2449 */
2450void
2451xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2452 const xmlParserNodeInfo* info)
2453{
2454 unsigned long pos;
2455 static unsigned int block_size = 5;
2456
2457 /* Find pos and check to see if node is already in the sequence */
2458 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2459 if ( pos < ctxt->node_seq.length
2460 && ctxt->node_seq.buffer[pos].node == info->node ) {
2461 ctxt->node_seq.buffer[pos] = *info;
2462 }
2463
2464 /* Otherwise, we need to add new node to buffer */
2465 else {
2466 /* Expand buffer by 5 if needed */
2467 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2468 xmlParserNodeInfo* tmp_buffer;
2469 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2470 *(ctxt->node_seq.maximum + block_size));
2471
2472 if ( ctxt->node_seq.buffer == NULL )
2473 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2474 else
2475 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2476
2477 if ( tmp_buffer == NULL ) {
2478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2479 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2480 ctxt->errNo = XML_ERR_NO_MEMORY;
2481 return;
2482 }
2483 ctxt->node_seq.buffer = tmp_buffer;
2484 ctxt->node_seq.maximum += block_size;
2485 }
2486
2487 /* If position is not at end, move elements out of the way */
2488 if ( pos != ctxt->node_seq.length ) {
2489 unsigned long i;
2490
2491 for ( i = ctxt->node_seq.length; i > pos; i-- )
2492 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2493 }
2494
2495 /* Copy element and increase length */
2496 ctxt->node_seq.buffer[pos] = *info;
2497 ctxt->node_seq.length++;
2498 }
2499}
2500
2501/************************************************************************
2502 * *
2503 * Deprecated functions kept for compatibility *
2504 * *
2505 ************************************************************************/
2506
2507/*
2508 * xmlCheckLanguageID
2509 * @lang: pointer to the string value
2510 *
2511 * Checks that the value conforms to the LanguageID production:
2512 *
2513 * NOTE: this is somewhat deprecated, those productions were removed from
2514 * the XML Second edition.
2515 *
2516 * [33] LanguageID ::= Langcode ('-' Subcode)*
2517 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2518 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2519 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2520 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2521 * [38] Subcode ::= ([a-z] | [A-Z])+
2522 *
2523 * Returns 1 if correct 0 otherwise
2524 **/
2525int
2526xmlCheckLanguageID(const xmlChar *lang) {
2527 const xmlChar *cur = lang;
2528
2529 if (cur == NULL)
2530 return(0);
2531 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2532 ((cur[0] == 'I') && (cur[1] == '-'))) {
2533 /*
2534 * IANA code
2535 */
2536 cur += 2;
2537 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2538 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2539 cur++;
2540 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2541 ((cur[0] == 'X') && (cur[1] == '-'))) {
2542 /*
2543 * User code
2544 */
2545 cur += 2;
2546 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2547 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2548 cur++;
2549 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2550 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2551 /*
2552 * ISO639
2553 */
2554 cur++;
2555 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2556 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2557 cur++;
2558 else
2559 return(0);
2560 } else
2561 return(0);
2562 while (cur[0] != 0) { /* non input consuming */
2563 if (cur[0] != '-')
2564 return(0);
2565 cur++;
2566 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2567 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2568 cur++;
2569 else
2570 return(0);
2571 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2572 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2573 cur++;
2574 }
2575 return(1);
2576}
2577
2578/**
2579 * xmlDecodeEntities:
2580 * @ctxt: the parser context
2581 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2582 * @len: the len to decode (in bytes !), -1 for no size limit
2583 * @end: an end marker xmlChar, 0 if none
2584 * @end2: an end marker xmlChar, 0 if none
2585 * @end3: an end marker xmlChar, 0 if none
2586 *
2587 * This function is deprecated, we now always process entities content
2588 * through xmlStringDecodeEntities
2589 *
2590 * TODO: remove it in next major release.
2591 *
2592 * [67] Reference ::= EntityRef | CharRef
2593 *
2594 * [69] PEReference ::= '%' Name ';'
2595 *
2596 * Returns A newly allocated string with the substitution done. The caller
2597 * must deallocate it !
2598 */
2599xmlChar *
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002600xmlDecodeEntities(xmlParserCtxtPtr ctxt UNUSED, int len UNUSED, int what UNUSED,
2601 xmlChar end UNUSED, xmlChar end2 UNUSED, xmlChar end3 UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002602#if 0
2603 xmlChar *buffer = NULL;
2604 unsigned int buffer_size = 0;
2605 unsigned int nbchars = 0;
2606
2607 xmlChar *current = NULL;
2608 xmlEntityPtr ent;
2609 unsigned int max = (unsigned int) len;
2610 int c,l;
2611#endif
2612
2613 static int deprecated = 0;
2614 if (!deprecated) {
2615 xmlGenericError(xmlGenericErrorContext,
2616 "xmlDecodeEntities() deprecated function reached\n");
2617 deprecated = 1;
2618 }
2619
2620#if 0
2621 if (ctxt->depth > 40) {
2622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2623 ctxt->sax->error(ctxt->userData,
2624 "Detected entity reference loop\n");
2625 ctxt->wellFormed = 0;
2626 ctxt->disableSAX = 1;
2627 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2628 return(NULL);
2629 }
2630
2631 /*
2632 * allocate a translation buffer.
2633 */
2634 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2635 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2636 if (buffer == NULL) {
2637 perror("xmlDecodeEntities: malloc failed");
2638 return(NULL);
2639 }
2640
2641 /*
2642 * Ok loop until we reach one of the ending char or a size limit.
2643 */
2644 GROW;
2645 c = CUR_CHAR(l);
2646 while ((nbchars < max) && (c != end) && /* NOTUSED */
2647 (c != end2) && (c != end3)) {
2648 GROW;
2649 if (c == 0) break;
2650 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2651 int val = xmlParseCharRef(ctxt);
2652 COPY_BUF(0,buffer,nbchars,val);
2653 NEXTL(l);
2654 } else if ((c == '&') && (ctxt->token != '&') &&
2655 (what & XML_SUBSTITUTE_REF)) {
2656 if (xmlParserDebugEntities)
2657 xmlGenericError(xmlGenericErrorContext,
2658 "decoding Entity Reference\n");
2659 ent = xmlParseEntityRef(ctxt);
2660 if ((ent != NULL) &&
2661 (ctxt->replaceEntities != 0)) {
2662 current = ent->content;
2663 while (*current != 0) { /* non input consuming loop */
2664 buffer[nbchars++] = *current++;
2665 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2666 growBuffer(buffer);
2667 }
2668 }
2669 } else if (ent != NULL) {
2670 const xmlChar *cur = ent->name;
2671
2672 buffer[nbchars++] = '&';
2673 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2674 growBuffer(buffer);
2675 }
2676 while (*cur != 0) { /* non input consuming loop */
2677 buffer[nbchars++] = *cur++;
2678 }
2679 buffer[nbchars++] = ';';
2680 }
2681 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2682 /*
2683 * a PEReference induce to switch the entity flow,
2684 * we break here to flush the current set of chars
2685 * parsed if any. We will be called back later.
2686 */
2687 if (xmlParserDebugEntities)
2688 xmlGenericError(xmlGenericErrorContext,
2689 "decoding PE Reference\n");
2690 if (nbchars != 0) break;
2691
2692 xmlParsePEReference(ctxt);
2693
2694 /*
2695 * Pop-up of finished entities.
2696 */
2697 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2698 xmlPopInput(ctxt);
2699
2700 break;
2701 } else {
2702 COPY_BUF(l,buffer,nbchars,c);
2703 NEXTL(l);
2704 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2705 growBuffer(buffer);
2706 }
2707 }
2708 c = CUR_CHAR(l);
2709 }
2710 buffer[nbchars++] = 0;
2711 return(buffer);
2712#endif
2713 return(NULL);
2714}
2715
2716/**
2717 * xmlNamespaceParseNCName:
2718 * @ctxt: an XML parser context
2719 *
2720 * parse an XML namespace name.
2721 *
2722 * TODO: this seems not in use anymore, the namespace handling is done on
2723 * top of the SAX interfaces, i.e. not on raw input.
2724 *
2725 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2726 *
2727 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2728 * CombiningChar | Extender
2729 *
2730 * Returns the namespace name or NULL
2731 */
2732
2733xmlChar *
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002734xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002735#if 0
2736 xmlChar buf[XML_MAX_NAMELEN + 5];
2737 int len = 0, l;
2738 int cur = CUR_CHAR(l);
2739#endif
2740
2741 static int deprecated = 0;
2742 if (!deprecated) {
2743 xmlGenericError(xmlGenericErrorContext,
2744 "xmlNamespaceParseNCName() deprecated function reached\n");
2745 deprecated = 1;
2746 }
2747
2748#if 0
2749 /* load first the value of the char !!! */
2750 GROW;
2751 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2752
2753xmlGenericError(xmlGenericErrorContext,
2754 "xmlNamespaceParseNCName: reached loop 3\n");
2755 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2756 (cur == '.') || (cur == '-') ||
2757 (cur == '_') ||
2758 (IS_COMBINING(cur)) ||
2759 (IS_EXTENDER(cur))) {
2760 COPY_BUF(l,buf,len,cur);
2761 NEXTL(l);
2762 cur = CUR_CHAR(l);
2763 if (len >= XML_MAX_NAMELEN) {
2764 xmlGenericError(xmlGenericErrorContext,
2765 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2766 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2767 (cur == '.') || (cur == '-') ||
2768 (cur == '_') ||
2769 (IS_COMBINING(cur)) ||
2770 (IS_EXTENDER(cur))) {
2771 NEXTL(l);
2772 cur = CUR_CHAR(l);
2773 }
2774 break;
2775 }
2776 }
2777 return(xmlStrndup(buf, len));
2778#endif
2779 return(NULL);
2780}
2781
2782/**
2783 * xmlNamespaceParseQName:
2784 * @ctxt: an XML parser context
2785 * @prefix: a xmlChar **
2786 *
2787 * TODO: this seems not in use anymore, the namespace handling is done on
2788 * top of the SAX interfaces, i.e. not on raw input.
2789 *
2790 * parse an XML qualified name
2791 *
2792 * [NS 5] QName ::= (Prefix ':')? LocalPart
2793 *
2794 * [NS 6] Prefix ::= NCName
2795 *
2796 * [NS 7] LocalPart ::= NCName
2797 *
2798 * Returns the local part, and prefix is updated
2799 * to get the Prefix if any.
2800 */
2801
2802xmlChar *
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002803xmlNamespaceParseQName(xmlParserCtxtPtr ctxt UNUSED, xmlChar **prefix UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002804
2805 static int deprecated = 0;
2806 if (!deprecated) {
2807 xmlGenericError(xmlGenericErrorContext,
2808 "xmlNamespaceParseQName() deprecated function reached\n");
2809 deprecated = 1;
2810 }
2811
2812#if 0
2813 xmlChar *ret = NULL;
2814
2815 *prefix = NULL;
2816 ret = xmlNamespaceParseNCName(ctxt);
2817 if (RAW == ':') {
2818 *prefix = ret;
2819 NEXT;
2820 ret = xmlNamespaceParseNCName(ctxt);
2821 }
2822
2823 return(ret);
2824#endif
2825 return(NULL);
2826}
2827
2828/**
2829 * xmlNamespaceParseNSDef:
2830 * @ctxt: an XML parser context
2831 *
2832 * parse a namespace prefix declaration
2833 *
2834 * TODO: this seems not in use anymore, the namespace handling is done on
2835 * top of the SAX interfaces, i.e. not on raw input.
2836 *
2837 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2838 *
2839 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2840 *
2841 * Returns the namespace name
2842 */
2843
2844xmlChar *
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002845xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002846 static int deprecated = 0;
2847 if (!deprecated) {
2848 xmlGenericError(xmlGenericErrorContext,
2849 "xmlNamespaceParseNSDef() deprecated function reached\n");
2850 deprecated = 1;
2851 }
2852 return(NULL);
2853#if 0
2854 xmlChar *name = NULL;
2855
2856 if ((RAW == 'x') && (NXT(1) == 'm') &&
2857 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2858 (NXT(4) == 's')) {
2859 SKIP(5);
2860 if (RAW == ':') {
2861 NEXT;
2862 name = xmlNamespaceParseNCName(ctxt);
2863 }
2864 }
2865 return(name);
2866#endif
2867}
2868
2869/**
2870 * xmlParseQuotedString:
2871 * @ctxt: an XML parser context
2872 *
2873 * Parse and return a string between quotes or doublequotes
2874 *
2875 * TODO: Deprecated, to be removed at next drop of binary compatibility
2876 *
2877 * Returns the string parser or NULL.
2878 */
2879xmlChar *
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002880xmlParseQuotedString(xmlParserCtxtPtr ctxt UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002881 static int deprecated = 0;
2882 if (!deprecated) {
2883 xmlGenericError(xmlGenericErrorContext,
2884 "xmlParseQuotedString() deprecated function reached\n");
2885 deprecated = 1;
2886 }
2887 return(NULL);
2888
2889#if 0
2890 xmlChar *buf = NULL;
2891 int len = 0,l;
2892 int size = XML_PARSER_BUFFER_SIZE;
2893 int c;
2894
2895 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2896 if (buf == NULL) {
2897 xmlGenericError(xmlGenericErrorContext,
2898 "malloc of %d byte failed\n", size);
2899 return(NULL);
2900 }
2901xmlGenericError(xmlGenericErrorContext,
2902 "xmlParseQuotedString: reached loop 4\n");
2903 if (RAW == '"') {
2904 NEXT;
2905 c = CUR_CHAR(l);
2906 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2907 if (len + 5 >= size) {
2908 size *= 2;
2909 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2910 if (buf == NULL) {
2911 xmlGenericError(xmlGenericErrorContext,
2912 "realloc of %d byte failed\n", size);
2913 return(NULL);
2914 }
2915 }
2916 COPY_BUF(l,buf,len,c);
2917 NEXTL(l);
2918 c = CUR_CHAR(l);
2919 }
2920 if (c != '"') {
2921 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2923 ctxt->sax->error(ctxt->userData,
2924 "String not closed \"%.50s\"\n", buf);
2925 ctxt->wellFormed = 0;
2926 ctxt->disableSAX = 1;
2927 } else {
2928 NEXT;
2929 }
2930 } else if (RAW == '\''){
2931 NEXT;
2932 c = CUR;
2933 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2934 if (len + 1 >= size) {
2935 size *= 2;
2936 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2937 if (buf == NULL) {
2938 xmlGenericError(xmlGenericErrorContext,
2939 "realloc of %d byte failed\n", size);
2940 return(NULL);
2941 }
2942 }
2943 buf[len++] = c;
2944 NEXT;
2945 c = CUR;
2946 }
2947 if (RAW != '\'') {
2948 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2950 ctxt->sax->error(ctxt->userData,
2951 "String not closed \"%.50s\"\n", buf);
2952 ctxt->wellFormed = 0;
2953 ctxt->disableSAX = 1;
2954 } else {
2955 NEXT;
2956 }
2957 }
2958 return(buf);
2959#endif
2960}
2961
2962/**
2963 * xmlParseNamespace:
2964 * @ctxt: an XML parser context
2965 *
2966 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2967 *
2968 * This is what the older xml-name Working Draft specified, a bunch of
2969 * other stuff may still rely on it, so support is still here as
2970 * if it was declared on the root of the Tree:-(
2971 *
2972 * TODO: remove from library
2973 *
2974 * To be removed at next drop of binary compatibility
2975 */
2976
2977void
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002978xmlParseNamespace(xmlParserCtxtPtr ctxt UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002979 static int deprecated = 0;
2980 if (!deprecated) {
2981 xmlGenericError(xmlGenericErrorContext,
2982 "xmlParseNamespace() deprecated function reached\n");
2983 deprecated = 1;
2984 }
2985
2986#if 0
2987 xmlChar *href = NULL;
2988 xmlChar *prefix = NULL;
2989 int garbage = 0;
2990
2991 /*
2992 * We just skipped "namespace" or "xml:namespace"
2993 */
2994 SKIP_BLANKS;
2995
2996xmlGenericError(xmlGenericErrorContext,
2997 "xmlParseNamespace: reached loop 5\n");
2998 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2999 /*
3000 * We can have "ns" or "prefix" attributes
3001 * Old encoding as 'href' or 'AS' attributes is still supported
3002 */
3003 if ((RAW == 'n') && (NXT(1) == 's')) {
3004 garbage = 0;
3005 SKIP(2);
3006 SKIP_BLANKS;
3007
3008 if (RAW != '=') continue;
3009 NEXT;
3010 SKIP_BLANKS;
3011
3012 href = xmlParseQuotedString(ctxt);
3013 SKIP_BLANKS;
3014 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3015 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3016 garbage = 0;
3017 SKIP(4);
3018 SKIP_BLANKS;
3019
3020 if (RAW != '=') continue;
3021 NEXT;
3022 SKIP_BLANKS;
3023
3024 href = xmlParseQuotedString(ctxt);
3025 SKIP_BLANKS;
3026 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3027 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3028 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3029 garbage = 0;
3030 SKIP(6);
3031 SKIP_BLANKS;
3032
3033 if (RAW != '=') continue;
3034 NEXT;
3035 SKIP_BLANKS;
3036
3037 prefix = xmlParseQuotedString(ctxt);
3038 SKIP_BLANKS;
3039 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3040 garbage = 0;
3041 SKIP(2);
3042 SKIP_BLANKS;
3043
3044 if (RAW != '=') continue;
3045 NEXT;
3046 SKIP_BLANKS;
3047
3048 prefix = xmlParseQuotedString(ctxt);
3049 SKIP_BLANKS;
3050 } else if ((RAW == '?') && (NXT(1) == '>')) {
3051 garbage = 0;
3052 NEXT;
3053 } else {
3054 /*
3055 * Found garbage when parsing the namespace
3056 */
3057 if (!garbage) {
3058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3059 ctxt->sax->error(ctxt->userData,
3060 "xmlParseNamespace found garbage\n");
3061 }
3062 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3063 ctxt->wellFormed = 0;
3064 ctxt->disableSAX = 1;
3065 NEXT;
3066 }
3067 }
3068
3069 MOVETO_ENDTAG(CUR_PTR);
3070 NEXT;
3071
3072 /*
3073 * Register the DTD.
3074 if (href != NULL)
3075 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3076 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3077 */
3078
3079 if (prefix != NULL) xmlFree(prefix);
3080 if (href != NULL) xmlFree(href);
3081#endif
3082}
3083
3084/**
3085 * xmlScanName:
3086 * @ctxt: an XML parser context
3087 *
3088 * Trickery: parse an XML name but without consuming the input flow
3089 * Needed for rollback cases. Used only when parsing entities references.
3090 *
3091 * TODO: seems deprecated now, only used in the default part of
3092 * xmlParserHandleReference
3093 *
3094 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3095 * CombiningChar | Extender
3096 *
3097 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3098 *
3099 * [6] Names ::= Name (S Name)*
3100 *
3101 * Returns the Name parsed or NULL
3102 */
3103
3104xmlChar *
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003105xmlScanName(xmlParserCtxtPtr ctxt UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003106 static int deprecated = 0;
3107 if (!deprecated) {
3108 xmlGenericError(xmlGenericErrorContext,
3109 "xmlScanName() deprecated function reached\n");
3110 deprecated = 1;
3111 }
3112 return(NULL);
3113
3114#if 0
3115 xmlChar buf[XML_MAX_NAMELEN];
3116 int len = 0;
3117
3118 GROW;
3119 if (!IS_LETTER(RAW) && (RAW != '_') &&
3120 (RAW != ':')) {
3121 return(NULL);
3122 }
3123
3124
3125 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3126 (NXT(len) == '.') || (NXT(len) == '-') ||
3127 (NXT(len) == '_') || (NXT(len) == ':') ||
3128 (IS_COMBINING(NXT(len))) ||
3129 (IS_EXTENDER(NXT(len)))) {
3130 GROW;
3131 buf[len] = NXT(len);
3132 len++;
3133 if (len >= XML_MAX_NAMELEN) {
3134 xmlGenericError(xmlGenericErrorContext,
3135 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3136 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3137 (IS_DIGIT(NXT(len))) ||
3138 (NXT(len) == '.') || (NXT(len) == '-') ||
3139 (NXT(len) == '_') || (NXT(len) == ':') ||
3140 (IS_COMBINING(NXT(len))) ||
3141 (IS_EXTENDER(NXT(len))))
3142 len++;
3143 break;
3144 }
3145 }
3146 return(xmlStrndup(buf, len));
3147#endif
3148}
3149
3150/**
3151 * xmlParserHandleReference:
3152 * @ctxt: the parser context
3153 *
3154 * TODO: Remove, now deprecated ... the test is done directly in the
3155 * content parsing
3156 * routines.
3157 *
3158 * [67] Reference ::= EntityRef | CharRef
3159 *
3160 * [68] EntityRef ::= '&' Name ';'
3161 *
3162 * [ WFC: Entity Declared ]
3163 * the Name given in the entity reference must match that in an entity
3164 * declaration, except that well-formed documents need not declare any
3165 * of the following entities: amp, lt, gt, apos, quot.
3166 *
3167 * [ WFC: Parsed Entity ]
3168 * An entity reference must not contain the name of an unparsed entity
3169 *
3170 * [66] CharRef ::= '&#' [0-9]+ ';' |
3171 * '&#x' [0-9a-fA-F]+ ';'
3172 *
3173 * A PEReference may have been detectect in the current input stream
3174 * the handling is done accordingly to
3175 * http://www.w3.org/TR/REC-xml#entproc
3176 */
3177void
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003178xmlParserHandleReference(xmlParserCtxtPtr ctxt UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003179 static int deprecated = 0;
3180 if (!deprecated) {
3181 xmlGenericError(xmlGenericErrorContext,
3182 "xmlParserHandleReference() deprecated function reached\n");
3183 deprecated = 1;
3184 }
3185
3186#if 0
3187 xmlParserInputPtr input;
3188 xmlChar *name;
3189 xmlEntityPtr ent = NULL;
3190
3191 if (ctxt->token != 0) {
3192 return;
3193 }
3194 if (RAW != '&') return;
3195 GROW;
3196 if ((RAW == '&') && (NXT(1) == '#')) {
3197 switch(ctxt->instate) {
3198 case XML_PARSER_ENTITY_DECL:
3199 case XML_PARSER_PI:
3200 case XML_PARSER_CDATA_SECTION:
3201 case XML_PARSER_COMMENT:
3202 case XML_PARSER_SYSTEM_LITERAL:
3203 /* we just ignore it there */
3204 return;
3205 case XML_PARSER_START_TAG:
3206 return;
3207 case XML_PARSER_END_TAG:
3208 return;
3209 case XML_PARSER_EOF:
3210 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3212 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3213 ctxt->wellFormed = 0;
3214 ctxt->disableSAX = 1;
3215 return;
3216 case XML_PARSER_PROLOG:
3217 case XML_PARSER_START:
3218 case XML_PARSER_MISC:
3219 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3221 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3222 ctxt->wellFormed = 0;
3223 ctxt->disableSAX = 1;
3224 return;
3225 case XML_PARSER_EPILOG:
3226 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3228 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3229 ctxt->wellFormed = 0;
3230 ctxt->disableSAX = 1;
3231 return;
3232 case XML_PARSER_DTD:
3233 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3235 ctxt->sax->error(ctxt->userData,
3236 "CharRef are forbiden in DTDs!\n");
3237 ctxt->wellFormed = 0;
3238 ctxt->disableSAX = 1;
3239 return;
3240 case XML_PARSER_ENTITY_VALUE:
3241 /*
3242 * NOTE: in the case of entity values, we don't do the
3243 * substitution here since we need the literal
3244 * entity value to be able to save the internal
3245 * subset of the document.
3246 * This will be handled by xmlStringDecodeEntities
3247 */
3248 return;
3249 case XML_PARSER_CONTENT:
3250 return;
3251 case XML_PARSER_ATTRIBUTE_VALUE:
3252 /* ctxt->token = xmlParseCharRef(ctxt); */
3253 return;
3254 case XML_PARSER_IGNORE:
3255 return;
3256 }
3257 return;
3258 }
3259
3260 switch(ctxt->instate) {
3261 case XML_PARSER_CDATA_SECTION:
3262 return;
3263 case XML_PARSER_PI:
3264 case XML_PARSER_COMMENT:
3265 case XML_PARSER_SYSTEM_LITERAL:
3266 case XML_PARSER_CONTENT:
3267 return;
3268 case XML_PARSER_START_TAG:
3269 return;
3270 case XML_PARSER_END_TAG:
3271 return;
3272 case XML_PARSER_EOF:
3273 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3275 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3276 ctxt->wellFormed = 0;
3277 ctxt->disableSAX = 1;
3278 return;
3279 case XML_PARSER_PROLOG:
3280 case XML_PARSER_START:
3281 case XML_PARSER_MISC:
3282 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3284 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3285 ctxt->wellFormed = 0;
3286 ctxt->disableSAX = 1;
3287 return;
3288 case XML_PARSER_EPILOG:
3289 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3291 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3292 ctxt->wellFormed = 0;
3293 ctxt->disableSAX = 1;
3294 return;
3295 case XML_PARSER_ENTITY_VALUE:
3296 /*
3297 * NOTE: in the case of entity values, we don't do the
3298 * substitution here since we need the literal
3299 * entity value to be able to save the internal
3300 * subset of the document.
3301 * This will be handled by xmlStringDecodeEntities
3302 */
3303 return;
3304 case XML_PARSER_ATTRIBUTE_VALUE:
3305 /*
3306 * NOTE: in the case of attributes values, we don't do the
3307 * substitution here unless we are in a mode where
3308 * the parser is explicitely asked to substitute
3309 * entities. The SAX callback is called with values
3310 * without entity substitution.
3311 * This will then be handled by xmlStringDecodeEntities
3312 */
3313 return;
3314 case XML_PARSER_ENTITY_DECL:
3315 /*
3316 * we just ignore it there
3317 * the substitution will be done once the entity is referenced
3318 */
3319 return;
3320 case XML_PARSER_DTD:
3321 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3323 ctxt->sax->error(ctxt->userData,
3324 "Entity references are forbiden in DTDs!\n");
3325 ctxt->wellFormed = 0;
3326 ctxt->disableSAX = 1;
3327 return;
3328 case XML_PARSER_IGNORE:
3329 return;
3330 }
3331
3332/* TODO: this seems not reached anymore .... Verify ... */
3333xmlGenericError(xmlGenericErrorContext,
3334 "Reached deprecated section in xmlParserHandleReference()\n");
3335xmlGenericError(xmlGenericErrorContext,
3336 "Please forward the document to Daniel.Veillard@w3.org\n");
3337xmlGenericError(xmlGenericErrorContext,
3338 "indicating the version: %s, thanks !\n", xmlParserVersion);
3339 NEXT;
3340 name = xmlScanName(ctxt);
3341 if (name == NULL) {
3342 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3344 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 ctxt->token = '&';
3348 return;
3349 }
3350 if (NXT(xmlStrlen(name)) != ';') {
3351 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Entity reference: ';' expected\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 ctxt->token = '&';
3358 xmlFree(name);
3359 return;
3360 }
3361 SKIP(xmlStrlen(name) + 1);
3362 if (ctxt->sax != NULL) {
3363 if (ctxt->sax->getEntity != NULL)
3364 ent = ctxt->sax->getEntity(ctxt->userData, name);
3365 }
3366
3367 /*
3368 * [ WFC: Entity Declared ]
3369 * the Name given in the entity reference must match that in an entity
3370 * declaration, except that well-formed documents need not declare any
3371 * of the following entities: amp, lt, gt, apos, quot.
3372 */
3373 if (ent == NULL)
3374 ent = xmlGetPredefinedEntity(name);
3375 if (ent == NULL) {
3376 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3378 ctxt->sax->error(ctxt->userData,
3379 "Entity reference: entity %s not declared\n",
3380 name);
3381 ctxt->wellFormed = 0;
3382 ctxt->disableSAX = 1;
3383 xmlFree(name);
3384 return;
3385 }
3386
3387 /*
3388 * [ WFC: Parsed Entity ]
3389 * An entity reference must not contain the name of an unparsed entity
3390 */
3391 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3392 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3394 ctxt->sax->error(ctxt->userData,
3395 "Entity reference to unparsed entity %s\n", name);
3396 ctxt->wellFormed = 0;
3397 ctxt->disableSAX = 1;
3398 }
3399
3400 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3401 ctxt->token = ent->content[0];
3402 xmlFree(name);
3403 return;
3404 }
3405 input = xmlNewEntityInputStream(ctxt, ent);
3406 xmlPushInput(ctxt, input);
3407 xmlFree(name);
3408#endif
3409 return;
3410}
3411
3412/**
3413 * xmlHandleEntity:
3414 * @ctxt: an XML parser context
3415 * @entity: an XML entity pointer.
3416 *
3417 * Default handling of defined entities, when should we define a new input
3418 * stream ? When do we just handle that as a set of chars ?
3419 *
3420 * OBSOLETE: to be removed at some point.
3421 */
3422
3423void
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003424xmlHandleEntity(xmlParserCtxtPtr ctxt UNUSED, xmlEntityPtr entity UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003425 static int deprecated = 0;
3426 if (!deprecated) {
3427 xmlGenericError(xmlGenericErrorContext,
3428 "xmlHandleEntity() deprecated function reached\n");
3429 deprecated = 1;
3430 }
3431
3432#if 0
3433 int len;
3434 xmlParserInputPtr input;
3435
3436 if (entity->content == NULL) {
3437 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3439 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3440 entity->name);
3441 ctxt->wellFormed = 0;
3442 ctxt->disableSAX = 1;
3443 return;
3444 }
3445 len = xmlStrlen(entity->content);
3446 if (len <= 2) goto handle_as_char;
3447
3448 /*
3449 * Redefine its content as an input stream.
3450 */
3451 input = xmlNewEntityInputStream(ctxt, entity);
3452 xmlPushInput(ctxt, input);
3453 return;
3454
3455handle_as_char:
3456 /*
3457 * Just handle the content as a set of chars.
3458 */
3459 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3460 (ctxt->sax->characters != NULL))
3461 ctxt->sax->characters(ctxt->userData, entity->content, len);
3462#endif
3463}
3464
3465/**
3466 * xmlNewGlobalNs:
3467 * @doc: the document carrying the namespace
3468 * @href: the URI associated
3469 * @prefix: the prefix for the namespace
3470 *
3471 * Creation of a Namespace, the old way using PI and without scoping
3472 * DEPRECATED !!!
3473 * It now create a namespace on the root element of the document if found.
3474 * Returns NULL this functionnality had been removed
3475 */
3476xmlNsPtr
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003477xmlNewGlobalNs(xmlDocPtr doc UNUSED, const xmlChar *href UNUSED,
3478 const xmlChar *prefix UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003479 static int deprecated = 0;
3480 if (!deprecated) {
3481 xmlGenericError(xmlGenericErrorContext,
3482 "xmlNewGlobalNs() deprecated function reached\n");
3483 deprecated = 1;
3484 }
3485 return(NULL);
3486#if 0
3487 xmlNodePtr root;
3488
3489 xmlNsPtr cur;
3490
3491 root = xmlDocGetRootElement(doc);
3492 if (root != NULL)
3493 return(xmlNewNs(root, href, prefix));
3494
3495 /*
3496 * if there is no root element yet, create an old Namespace type
3497 * and it will be moved to the root at save time.
3498 */
3499 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3500 if (cur == NULL) {
3501 xmlGenericError(xmlGenericErrorContext,
3502 "xmlNewGlobalNs : malloc failed\n");
3503 return(NULL);
3504 }
3505 memset(cur, 0, sizeof(xmlNs));
3506 cur->type = XML_GLOBAL_NAMESPACE;
3507
3508 if (href != NULL)
3509 cur->href = xmlStrdup(href);
3510 if (prefix != NULL)
3511 cur->prefix = xmlStrdup(prefix);
3512
3513 /*
3514 * Add it at the end to preserve parsing order ...
3515 */
3516 if (doc != NULL) {
3517 if (doc->oldNs == NULL) {
3518 doc->oldNs = cur;
3519 } else {
3520 xmlNsPtr prev = doc->oldNs;
3521
3522 while (prev->next != NULL) prev = prev->next;
3523 prev->next = cur;
3524 }
3525 }
3526
3527 return(NULL);
3528#endif
3529}
3530
3531/**
3532 * xmlUpgradeOldNs:
3533 * @doc: a document pointer
3534 *
3535 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3536 * DEPRECATED
3537 */
3538void
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003539xmlUpgradeOldNs(xmlDocPtr doc UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003540 static int deprecated = 0;
3541 if (!deprecated) {
3542 xmlGenericError(xmlGenericErrorContext,
3543 "xmlNewGlobalNs() deprecated function reached\n");
3544 deprecated = 1;
3545 }
3546#if 0
3547 xmlNsPtr cur;
3548
3549 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3550 if (doc->children == NULL) {
3551#ifdef DEBUG_TREE
3552 xmlGenericError(xmlGenericErrorContext,
3553 "xmlUpgradeOldNs: failed no root !\n");
3554#endif
3555 return;
3556 }
3557
3558 cur = doc->oldNs;
3559 while (cur->next != NULL) {
3560 cur->type = XML_LOCAL_NAMESPACE;
3561 cur = cur->next;
3562 }
3563 cur->type = XML_LOCAL_NAMESPACE;
3564 cur->next = doc->children->nsDef;
3565 doc->children->nsDef = doc->oldNs;
3566 doc->oldNs = NULL;
3567#endif
3568}
3569