blob: e0cdf31621b02bec09571d2e6b92ef66dd07742f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50
Daniel Veillard56a4cb82001-03-24 17:00:36 +000051void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000052
53/************************************************************************
54 * *
55 * Version and Features handling *
56 * *
57 ************************************************************************/
58const char *xmlParserVersion = LIBXML_VERSION_STRING;
59
60/*
61 * xmlCheckVersion:
62 * @version: the include version number
63 *
64 * check the compiled lib version against the include one.
65 * This can warn or immediately kill the application
66 */
67void
68xmlCheckVersion(int version) {
69 int myversion = (int) LIBXML_VERSION;
70
71 if ((myversion / 10000) != (version / 10000)) {
72 xmlGenericError(xmlGenericErrorContext,
73 "Fatal: program compiled against libxml %d using libxml %d\n",
74 (version / 10000), (myversion / 10000));
75 exit(1);
76 }
77 if ((myversion / 100) < (version / 100)) {
78 xmlGenericError(xmlGenericErrorContext,
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
81 }
82}
83
84
85const char *xmlFeaturesList[] = {
86 "validate",
87 "load subset",
88 "keep blanks",
89 "disable SAX",
90 "fetch external entities",
91 "substitute entities",
92 "gather line info",
93 "user data",
94 "is html",
95 "is standalone",
96 "stop parser",
97 "document",
98 "is well formed",
99 "is valid",
100 "SAX block",
101 "SAX function internalSubset",
102 "SAX function isStandalone",
103 "SAX function hasInternalSubset",
104 "SAX function hasExternalSubset",
105 "SAX function resolveEntity",
106 "SAX function getEntity",
107 "SAX function entityDecl",
108 "SAX function notationDecl",
109 "SAX function attributeDecl",
110 "SAX function elementDecl",
111 "SAX function unparsedEntityDecl",
112 "SAX function setDocumentLocator",
113 "SAX function startDocument",
114 "SAX function endDocument",
115 "SAX function startElement",
116 "SAX function endElement",
117 "SAX function reference",
118 "SAX function characters",
119 "SAX function ignorableWhitespace",
120 "SAX function processingInstruction",
121 "SAX function comment",
122 "SAX function warning",
123 "SAX function error",
124 "SAX function fatalError",
125 "SAX function getParameterEntity",
126 "SAX function cdataBlock",
127 "SAX function externalSubset",
128};
129
130/*
131 * xmlGetFeaturesList:
132 * @len: the length of the features name array (input/output)
133 * @result: an array of string to be filled with the features name.
134 *
135 * Copy at most *@len feature names into the @result array
136 *
137 * Returns -1 in case or error, or the total number of features,
138 * len is updated with the number of strings copied,
139 * strings must not be deallocated
140 */
141int
142xmlGetFeaturesList(int *len, const char **result) {
143 int ret, i;
144
145 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
146 if ((len == NULL) || (result == NULL))
147 return(ret);
148 if ((*len < 0) || (*len >= 1000))
149 return(-1);
150 if (*len > ret)
151 *len = ret;
152 for (i = 0;i < *len;i++)
153 result[i] = xmlFeaturesList[i];
154 return(ret);
155}
156
157/*
158 * xmlGetFeature:
159 * @ctxt: an XML/HTML parser context
160 * @name: the feature name
161 * @result: location to store the result
162 *
163 * Read the current value of one feature of this parser instance
164 *
165 * Returns -1 in case or error, 0 otherwise
166 */
167int
168xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
169 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
170 return(-1);
171
172 if (!strcmp(name, "validate")) {
173 *((int *) result) = ctxt->validate;
174 } else if (!strcmp(name, "keep blanks")) {
175 *((int *) result) = ctxt->keepBlanks;
176 } else if (!strcmp(name, "disable SAX")) {
177 *((int *) result) = ctxt->disableSAX;
178 } else if (!strcmp(name, "fetch external entities")) {
179 *((int *) result) = ctxt->loadsubset;
180 } else if (!strcmp(name, "substitute entities")) {
181 *((int *) result) = ctxt->replaceEntities;
182 } else if (!strcmp(name, "gather line info")) {
183 *((int *) result) = ctxt->record_info;
184 } else if (!strcmp(name, "user data")) {
185 *((void **)result) = ctxt->userData;
186 } else if (!strcmp(name, "is html")) {
187 *((int *) result) = ctxt->html;
188 } else if (!strcmp(name, "is standalone")) {
189 *((int *) result) = ctxt->standalone;
190 } else if (!strcmp(name, "document")) {
191 *((xmlDocPtr *) result) = ctxt->myDoc;
192 } else if (!strcmp(name, "is well formed")) {
193 *((int *) result) = ctxt->wellFormed;
194 } else if (!strcmp(name, "is valid")) {
195 *((int *) result) = ctxt->valid;
196 } else if (!strcmp(name, "SAX block")) {
197 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
198 } else if (!strcmp(name, "SAX function internalSubset")) {
199 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
200 } else if (!strcmp(name, "SAX function isStandalone")) {
201 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
202 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
203 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
204 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
205 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
206 } else if (!strcmp(name, "SAX function resolveEntity")) {
207 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
208 } else if (!strcmp(name, "SAX function getEntity")) {
209 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
210 } else if (!strcmp(name, "SAX function entityDecl")) {
211 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
212 } else if (!strcmp(name, "SAX function notationDecl")) {
213 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
214 } else if (!strcmp(name, "SAX function attributeDecl")) {
215 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
216 } else if (!strcmp(name, "SAX function elementDecl")) {
217 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
218 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
219 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
220 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
221 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
222 } else if (!strcmp(name, "SAX function startDocument")) {
223 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
224 } else if (!strcmp(name, "SAX function endDocument")) {
225 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
226 } else if (!strcmp(name, "SAX function startElement")) {
227 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
228 } else if (!strcmp(name, "SAX function endElement")) {
229 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
230 } else if (!strcmp(name, "SAX function reference")) {
231 *((referenceSAXFunc *) result) = ctxt->sax->reference;
232 } else if (!strcmp(name, "SAX function characters")) {
233 *((charactersSAXFunc *) result) = ctxt->sax->characters;
234 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
235 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
236 } else if (!strcmp(name, "SAX function processingInstruction")) {
237 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
238 } else if (!strcmp(name, "SAX function comment")) {
239 *((commentSAXFunc *) result) = ctxt->sax->comment;
240 } else if (!strcmp(name, "SAX function warning")) {
241 *((warningSAXFunc *) result) = ctxt->sax->warning;
242 } else if (!strcmp(name, "SAX function error")) {
243 *((errorSAXFunc *) result) = ctxt->sax->error;
244 } else if (!strcmp(name, "SAX function fatalError")) {
245 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
246 } else if (!strcmp(name, "SAX function getParameterEntity")) {
247 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
248 } else if (!strcmp(name, "SAX function cdataBlock")) {
249 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
250 } else if (!strcmp(name, "SAX function externalSubset")) {
251 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
252 } else {
253 return(-1);
254 }
255 return(0);
256}
257
258/*
259 * xmlSetFeature:
260 * @ctxt: an XML/HTML parser context
261 * @name: the feature name
262 * @value: pointer to the location of the new value
263 *
264 * Change the current value of one feature of this parser instance
265 *
266 * Returns -1 in case or error, 0 otherwise
267 */
268int
269xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
270 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
271 return(-1);
272
273 if (!strcmp(name, "validate")) {
274 int newvalidate = *((int *) value);
275 if ((!ctxt->validate) && (newvalidate != 0)) {
276 if (ctxt->vctxt.warning == NULL)
277 ctxt->vctxt.warning = xmlParserValidityWarning;
278 if (ctxt->vctxt.error == NULL)
279 ctxt->vctxt.error = xmlParserValidityError;
280 /* Allocate the Node stack */
281 ctxt->vctxt.nodeTab = (xmlNodePtr *)
282 xmlMalloc(4 * sizeof(xmlNodePtr));
283 if (ctxt->vctxt.nodeTab == NULL) {
284 ctxt->vctxt.nodeMax = 0;
285 ctxt->validate = 0;
286 return(-1);
287 }
288 ctxt->vctxt.nodeNr = 0;
289 ctxt->vctxt.nodeMax = 4;
290 ctxt->vctxt.node = NULL;
291 }
292 ctxt->validate = newvalidate;
293 } else if (!strcmp(name, "keep blanks")) {
294 ctxt->keepBlanks = *((int *) value);
295 } else if (!strcmp(name, "disable SAX")) {
296 ctxt->disableSAX = *((int *) value);
297 } else if (!strcmp(name, "fetch external entities")) {
298 ctxt->loadsubset = *((int *) value);
299 } else if (!strcmp(name, "substitute entities")) {
300 ctxt->replaceEntities = *((int *) value);
301 } else if (!strcmp(name, "gather line info")) {
302 ctxt->record_info = *((int *) value);
303 } else if (!strcmp(name, "user data")) {
304 ctxt->userData = *((void **)value);
305 } else if (!strcmp(name, "is html")) {
306 ctxt->html = *((int *) value);
307 } else if (!strcmp(name, "is standalone")) {
308 ctxt->standalone = *((int *) value);
309 } else if (!strcmp(name, "document")) {
310 ctxt->myDoc = *((xmlDocPtr *) value);
311 } else if (!strcmp(name, "is well formed")) {
312 ctxt->wellFormed = *((int *) value);
313 } else if (!strcmp(name, "is valid")) {
314 ctxt->valid = *((int *) value);
315 } else if (!strcmp(name, "SAX block")) {
316 ctxt->sax = *((xmlSAXHandlerPtr *) value);
317 } else if (!strcmp(name, "SAX function internalSubset")) {
318 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function isStandalone")) {
320 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
322 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
324 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
325 } else if (!strcmp(name, "SAX function resolveEntity")) {
326 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
327 } else if (!strcmp(name, "SAX function getEntity")) {
328 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
329 } else if (!strcmp(name, "SAX function entityDecl")) {
330 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function notationDecl")) {
332 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function attributeDecl")) {
334 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function elementDecl")) {
336 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
338 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
340 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startDocument")) {
342 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endDocument")) {
344 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function startElement")) {
346 ctxt->sax->startElement = *((startElementSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function endElement")) {
348 ctxt->sax->endElement = *((endElementSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function reference")) {
350 ctxt->sax->reference = *((referenceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function characters")) {
352 ctxt->sax->characters = *((charactersSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
354 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function processingInstruction")) {
356 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function comment")) {
358 ctxt->sax->comment = *((commentSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function warning")) {
360 ctxt->sax->warning = *((warningSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function error")) {
362 ctxt->sax->error = *((errorSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function fatalError")) {
364 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function getParameterEntity")) {
366 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
367 } else if (!strcmp(name, "SAX function cdataBlock")) {
368 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
369 } else if (!strcmp(name, "SAX function externalSubset")) {
370 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
371 } else {
372 return(-1);
373 }
374 return(0);
375}
376
377/************************************************************************
378 * *
379 * Some functions to avoid too large macros *
380 * *
381 ************************************************************************/
382
383/**
384 * xmlIsChar:
385 * @c: an unicode character (int)
386 *
387 * Check whether the character is allowed by the production
388 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
389 * | [#x10000-#x10FFFF]
390 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
391 * Also available as a macro IS_CHAR()
392 *
393 * Returns 0 if not, non-zero otherwise
394 */
395int
396xmlIsChar(int c) {
397 return(
398 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
399 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
400 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
401 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
402}
403
404/**
405 * xmlIsBlank:
406 * @c: an unicode character (int)
407 *
408 * Check whether the character is allowed by the production
409 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
410 * Also available as a macro IS_BLANK()
411 *
412 * Returns 0 if not, non-zero otherwise
413 */
414int
415xmlIsBlank(int c) {
416 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
417}
418
419/**
420 * xmlIsBaseChar:
421 * @c: an unicode character (int)
422 *
423 * Check whether the character is allowed by the production
424 * [85] BaseChar ::= ... long list see REC ...
425 *
426 * VI is your friend !
427 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
428 * and
429 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
430 *
431 * Returns 0 if not, non-zero otherwise
432 */
433static int xmlBaseArray[] = {
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
438 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
440 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
447 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
449 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
450};
451
452int
453xmlIsBaseChar(int c) {
454 return(
455 (((c) < 0x0100) ? xmlBaseArray[c] :
456 ( /* accelerator */
457 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
458 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
459 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
460 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
461 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
462 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
463 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
464 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
465 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
466 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
467 ((c) == 0x0386) ||
468 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
469 ((c) == 0x038C) ||
470 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
471 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
472 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
473 ((c) == 0x03DA) ||
474 ((c) == 0x03DC) ||
475 ((c) == 0x03DE) ||
476 ((c) == 0x03E0) ||
477 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
478 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
479 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
480 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
481 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
482 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
483 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
484 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
485 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
486 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
487 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
488 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
489 ((c) == 0x0559) ||
490 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
491 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
492 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
493 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
494 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
495 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
496 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
497 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
498 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
499 ((c) == 0x06D5) ||
500 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
501 (((c) >= 0x905) && ( /* accelerator */
502 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
503 ((c) == 0x093D) ||
504 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
505 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
506 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
507 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
508 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
509 ((c) == 0x09B2) ||
510 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
511 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
512 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
513 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
514 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
515 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
516 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
517 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
518 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
519 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
520 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
521 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
522 ((c) == 0x0A5E) ||
523 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
524 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
525 ((c) == 0x0A8D) ||
526 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
527 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
528 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
529 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
530 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
531 ((c) == 0x0ABD) ||
532 ((c) == 0x0AE0) ||
533 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
534 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
535 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
536 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
537 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
538 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
539 ((c) == 0x0B3D) ||
540 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
541 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
542 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
543 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
544 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
545 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
546 ((c) == 0x0B9C) ||
547 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
548 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
549 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
550 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
551 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
552 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
553 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
554 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
555 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
556 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
557 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
558 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
559 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
560 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
561 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
562 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
563 ((c) == 0x0CDE) ||
564 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
565 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
566 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
567 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
568 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
569 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
570 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
571 ((c) == 0x0E30) ||
572 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
573 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
574 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
575 ((c) == 0x0E84) ||
576 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
577 ((c) == 0x0E8A) ||
578 ((c) == 0x0E8D) ||
579 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
580 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
581 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
582 ((c) == 0x0EA5) ||
583 ((c) == 0x0EA7) ||
584 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
585 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
586 ((c) == 0x0EB0) ||
587 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
588 ((c) == 0x0EBD) ||
589 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
590 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
591 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
592 (((c) >= 0x10A0) && ( /* accelerator */
593 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
594 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
595 ((c) == 0x1100) ||
596 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
597 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
598 ((c) == 0x1109) ||
599 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
600 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
601 ((c) == 0x113C) ||
602 ((c) == 0x113E) ||
603 ((c) == 0x1140) ||
604 ((c) == 0x114C) ||
605 ((c) == 0x114E) ||
606 ((c) == 0x1150) ||
607 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
608 ((c) == 0x1159) ||
609 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
610 ((c) == 0x1163) ||
611 ((c) == 0x1165) ||
612 ((c) == 0x1167) ||
613 ((c) == 0x1169) ||
614 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
615 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
616 ((c) == 0x1175) ||
617 ((c) == 0x119E) ||
618 ((c) == 0x11A8) ||
619 ((c) == 0x11AB) ||
620 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
621 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
622 ((c) == 0x11BA) ||
623 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
624 ((c) == 0x11EB) ||
625 ((c) == 0x11F0) ||
626 ((c) == 0x11F9) ||
627 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
628 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
629 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
630 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
631 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
632 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
633 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
634 ((c) == 0x1F59) ||
635 ((c) == 0x1F5B) ||
636 ((c) == 0x1F5D) ||
637 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
638 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
639 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
640 ((c) == 0x1FBE) ||
641 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
642 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
643 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
644 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
645 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
646 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
647 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
648 ((c) == 0x2126) ||
649 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
650 ((c) == 0x212E) ||
651 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
652 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
653 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
654 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
655 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
656}
657
658/**
659 * xmlIsDigit:
660 * @c: an unicode character (int)
661 *
662 * Check whether the character is allowed by the production
663 * [88] Digit ::= ... long list see REC ...
664 *
665 * Returns 0 if not, non-zero otherwise
666 */
667int
668xmlIsDigit(int c) {
669 return(
670 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
671 (((c) >= 0x660) && ( /* accelerator */
672 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
673 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
674 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
675 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
676 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
677 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
678 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
679 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
680 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
681 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
682 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
683 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
684 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
685 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
686}
687
688/**
689 * xmlIsCombining:
690 * @c: an unicode character (int)
691 *
692 * Check whether the character is allowed by the production
693 * [87] CombiningChar ::= ... long list see REC ...
694 *
695 * Returns 0 if not, non-zero otherwise
696 */
697int
698xmlIsCombining(int c) {
699 return(
700 (((c) >= 0x300) && ( /* accelerator */
701 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
702 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
703 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
704 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
705 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
706 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
707 ((c) == 0x05BF) ||
708 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
709 ((c) == 0x05C4) ||
710 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
711 ((c) == 0x0670) ||
712 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
713 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
714 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
715 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
716 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
717 (((c) >= 0x0901) && ( /* accelerator */
718 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
719 ((c) == 0x093C) ||
720 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
721 ((c) == 0x094D) ||
722 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
723 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
724 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
725 ((c) == 0x09BC) ||
726 ((c) == 0x09BE) ||
727 ((c) == 0x09BF) ||
728 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
729 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
730 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
731 ((c) == 0x09D7) ||
732 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
733 (((c) >= 0x0A02) && ( /* accelerator */
734 ((c) == 0x0A02) ||
735 ((c) == 0x0A3C) ||
736 ((c) == 0x0A3E) ||
737 ((c) == 0x0A3F) ||
738 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
739 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
740 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
741 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
742 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
743 ((c) == 0x0ABC) ||
744 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
745 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
746 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
747 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
748 ((c) == 0x0B3C) ||
749 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
750 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
751 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
752 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
753 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
754 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
755 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
756 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
757 ((c) == 0x0BD7) ||
758 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
759 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
760 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
761 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
762 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
763 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
764 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
765 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
766 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
767 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
768 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
769 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
770 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
771 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
772 ((c) == 0x0D57) ||
773 (((c) >= 0x0E31) && ( /* accelerator */
774 ((c) == 0x0E31) ||
775 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
776 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
777 ((c) == 0x0EB1) ||
778 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
779 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
780 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
781 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
782 ((c) == 0x0F35) ||
783 ((c) == 0x0F37) ||
784 ((c) == 0x0F39) ||
785 ((c) == 0x0F3E) ||
786 ((c) == 0x0F3F) ||
787 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
788 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
789 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
790 ((c) == 0x0F97) ||
791 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
792 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
793 ((c) == 0x0FB9) ||
794 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
795 ((c) == 0x20E1) ||
796 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
797 ((c) == 0x3099) ||
798 ((c) == 0x309A))))))))));
799}
800
801/**
802 * xmlIsExtender:
803 * @c: an unicode character (int)
804 *
805 * Check whether the character is allowed by the production
806 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
807 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
808 * [#x309D-#x309E] | [#x30FC-#x30FE]
809 *
810 * Returns 0 if not, non-zero otherwise
811 */
812int
813xmlIsExtender(int c) {
814 switch (c) {
815 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
816 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
817 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
818 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
819 case 0x30FE:
820 return 1;
821 default:
822 return 0;
823 }
824}
825
826/**
827 * xmlIsIdeographic:
828 * @c: an unicode character (int)
829 *
830 * Check whether the character is allowed by the production
831 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
832 *
833 * Returns 0 if not, non-zero otherwise
834 */
835int
836xmlIsIdeographic(int c) {
837 return(((c) < 0x0100) ? 0 :
838 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
839 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
840 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
841 ((c) == 0x3007));
842}
843
844/**
845 * xmlIsLetter:
846 * @c: an unicode character (int)
847 *
848 * Check whether the character is allowed by the production
849 * [84] Letter ::= BaseChar | Ideographic
850 *
851 * Returns 0 if not, non-zero otherwise
852 */
853int
854xmlIsLetter(int c) {
855 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
856}
857
858/**
859 * xmlIsPubidChar:
860 * @c: an unicode character (int)
861 *
862 * Check whether the character is allowed by the production
863 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
864 *
865 * Returns 0 if not, non-zero otherwise
866 */
867int
868xmlIsPubidChar(int c) {
869 return(
870 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
871 (((c) >= 'a') && ((c) <= 'z')) ||
872 (((c) >= 'A') && ((c) <= 'Z')) ||
873 (((c) >= '0') && ((c) <= '9')) ||
874 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
875 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
876 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
877 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
878 ((c) == '$') || ((c) == '_') || ((c) == '%'));
879}
880
881/************************************************************************
882 * *
883 * Input handling functions for progressive parsing *
884 * *
885 ************************************************************************/
886
887/* #define DEBUG_INPUT */
888/* #define DEBUG_STACK */
889/* #define DEBUG_PUSH */
890
891
892/* we need to keep enough input to show errors in context */
893#define LINE_LEN 80
894
895#ifdef DEBUG_INPUT
896#define CHECK_BUFFER(in) check_buffer(in)
897
898void check_buffer(xmlParserInputPtr in) {
899 if (in->base != in->buf->buffer->content) {
900 xmlGenericError(xmlGenericErrorContext,
901 "xmlParserInput: base mismatch problem\n");
902 }
903 if (in->cur < in->base) {
904 xmlGenericError(xmlGenericErrorContext,
905 "xmlParserInput: cur < base problem\n");
906 }
907 if (in->cur > in->base + in->buf->buffer->use) {
908 xmlGenericError(xmlGenericErrorContext,
909 "xmlParserInput: cur > base + use problem\n");
910 }
911 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
912 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
913 in->buf->buffer->use, in->buf->buffer->size);
914}
915
916#else
917#define CHECK_BUFFER(in)
918#endif
919
920
921/**
922 * xmlParserInputRead:
923 * @in: an XML parser input
924 * @len: an indicative size for the lookahead
925 *
926 * This function refresh the input for the parser. It doesn't try to
927 * preserve pointers to the input buffer, and discard already read data
928 *
929 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
930 * end of this entity
931 */
932int
933xmlParserInputRead(xmlParserInputPtr in, int len) {
934 int ret;
935 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000936 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000937
938#ifdef DEBUG_INPUT
939 xmlGenericError(xmlGenericErrorContext, "Read\n");
940#endif
941 if (in->buf == NULL) return(-1);
942 if (in->base == NULL) return(-1);
943 if (in->cur == NULL) return(-1);
944 if (in->buf->buffer == NULL) return(-1);
945 if (in->buf->readcallback == NULL) return(-1);
946
947 CHECK_BUFFER(in);
948
949 used = in->cur - in->buf->buffer->content;
950 ret = xmlBufferShrink(in->buf->buffer, used);
951 if (ret > 0) {
952 in->cur -= ret;
953 in->consumed += ret;
954 }
955 ret = xmlParserInputBufferRead(in->buf, len);
956 if (in->base != in->buf->buffer->content) {
957 /*
958 * the buffer has been realloced
959 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000960 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000961 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000962 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000963 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000964 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000965
966 CHECK_BUFFER(in);
967
968 return(ret);
969}
970
971/**
972 * xmlParserInputGrow:
973 * @in: an XML parser input
974 * @len: an indicative size for the lookahead
975 *
976 * This function increase the input for the parser. It tries to
977 * preserve pointers to the input buffer, and keep already read data
978 *
979 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
980 * end of this entity
981 */
982int
983xmlParserInputGrow(xmlParserInputPtr in, int len) {
984 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000985 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000986
987#ifdef DEBUG_INPUT
988 xmlGenericError(xmlGenericErrorContext, "Grow\n");
989#endif
990 if (in->buf == NULL) return(-1);
991 if (in->base == NULL) return(-1);
992 if (in->cur == NULL) return(-1);
993 if (in->buf->buffer == NULL) return(-1);
994
995 CHECK_BUFFER(in);
996
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000997 indx = in->cur - in->base;
998 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000999
1000 CHECK_BUFFER(in);
1001
1002 return(0);
1003 }
1004 if (in->buf->readcallback != NULL)
1005 ret = xmlParserInputBufferGrow(in->buf, len);
1006 else
1007 return(0);
1008
1009 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001010 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001011 * block, but we use it really as an integer to do some
1012 * pointer arithmetic. Insure will raise it as a bug but in
1013 * that specific case, that's not !
1014 */
1015 if (in->base != in->buf->buffer->content) {
1016 /*
1017 * the buffer has been realloced
1018 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001019 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001020 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001021 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001022 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001023 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001024
1025 CHECK_BUFFER(in);
1026
1027 return(ret);
1028}
1029
1030/**
1031 * xmlParserInputShrink:
1032 * @in: an XML parser input
1033 *
1034 * This function removes used input for the parser.
1035 */
1036void
1037xmlParserInputShrink(xmlParserInputPtr in) {
1038 int used;
1039 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001040 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001041
1042#ifdef DEBUG_INPUT
1043 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1044#endif
1045 if (in->buf == NULL) return;
1046 if (in->base == NULL) return;
1047 if (in->cur == NULL) return;
1048 if (in->buf->buffer == NULL) return;
1049
1050 CHECK_BUFFER(in);
1051
1052 used = in->cur - in->buf->buffer->content;
1053 /*
1054 * Do not shrink on large buffers whose only a tiny fraction
1055 * was consumned
1056 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001057 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001058 return;
1059 if (used > INPUT_CHUNK) {
1060 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1061 if (ret > 0) {
1062 in->cur -= ret;
1063 in->consumed += ret;
1064 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001065 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001066 }
1067
1068 CHECK_BUFFER(in);
1069
1070 if (in->buf->buffer->use > INPUT_CHUNK) {
1071 return;
1072 }
1073 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1074 if (in->base != in->buf->buffer->content) {
1075 /*
1076 * the buffer has been realloced
1077 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001078 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001079 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001080 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001081 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001082 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001083
1084 CHECK_BUFFER(in);
1085}
1086
1087/************************************************************************
1088 * *
1089 * UTF8 character input and related functions *
1090 * *
1091 ************************************************************************/
1092
1093/**
1094 * xmlNextChar:
1095 * @ctxt: the XML parser context
1096 *
1097 * Skip to the next char input char.
1098 */
1099
1100void
1101xmlNextChar(xmlParserCtxtPtr ctxt) {
1102 if (ctxt->instate == XML_PARSER_EOF)
1103 return;
1104
1105 /*
1106 * 2.11 End-of-Line Handling
1107 * the literal two-character sequence "#xD#xA" or a standalone
1108 * literal #xD, an XML processor must pass to the application
1109 * the single character #xA.
1110 */
1111 if (ctxt->token != 0) ctxt->token = 0;
1112 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1113 if ((*ctxt->input->cur == 0) &&
1114 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1115 (ctxt->instate != XML_PARSER_COMMENT)) {
1116 /*
1117 * If we are at the end of the current entity and
1118 * the context allows it, we pop consumed entities
1119 * automatically.
1120 * the auto closing should be blocked in other cases
1121 */
1122 xmlPopInput(ctxt);
1123 } else {
1124 if (*(ctxt->input->cur) == '\n') {
1125 ctxt->input->line++; ctxt->input->col = 1;
1126 } else ctxt->input->col++;
1127 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1128 /*
1129 * We are supposed to handle UTF8, check it's valid
1130 * From rfc2044: encoding of the Unicode values on UTF-8:
1131 *
1132 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1133 * 0000 0000-0000 007F 0xxxxxxx
1134 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1135 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1136 *
1137 * Check for the 0x110000 limit too
1138 */
1139 const unsigned char *cur = ctxt->input->cur;
1140 unsigned char c;
1141
1142 c = *cur;
1143 if (c & 0x80) {
1144 if (cur[1] == 0)
1145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1146 if ((cur[1] & 0xc0) != 0x80)
1147 goto encoding_error;
1148 if ((c & 0xe0) == 0xe0) {
1149 unsigned int val;
1150
1151 if (cur[2] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if ((cur[2] & 0xc0) != 0x80)
1154 goto encoding_error;
1155 if ((c & 0xf0) == 0xf0) {
1156 if (cur[3] == 0)
1157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1158 if (((c & 0xf8) != 0xf0) ||
1159 ((cur[3] & 0xc0) != 0x80))
1160 goto encoding_error;
1161 /* 4-byte code */
1162 ctxt->input->cur += 4;
1163 val = (cur[0] & 0x7) << 18;
1164 val |= (cur[1] & 0x3f) << 12;
1165 val |= (cur[2] & 0x3f) << 6;
1166 val |= cur[3] & 0x3f;
1167 } else {
1168 /* 3-byte code */
1169 ctxt->input->cur += 3;
1170 val = (cur[0] & 0xf) << 12;
1171 val |= (cur[1] & 0x3f) << 6;
1172 val |= cur[2] & 0x3f;
1173 }
1174 if (((val > 0xd7ff) && (val < 0xe000)) ||
1175 ((val > 0xfffd) && (val < 0x10000)) ||
1176 (val >= 0x110000)) {
1177 if ((ctxt->sax != NULL) &&
1178 (ctxt->sax->error != NULL))
1179 ctxt->sax->error(ctxt->userData,
1180 "Char 0x%X out of allowed range\n", val);
1181 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1182 ctxt->wellFormed = 0;
1183 ctxt->disableSAX = 1;
1184 }
1185 } else
1186 /* 2-byte code */
1187 ctxt->input->cur += 2;
1188 } else
1189 /* 1-byte code */
1190 ctxt->input->cur++;
1191 } else {
1192 /*
1193 * Assume it's a fixed lenght encoding (1) with
1194 * a compatibke encoding for the ASCII set, since
1195 * XML constructs only use < 128 chars
1196 */
1197 ctxt->input->cur++;
1198 }
1199 ctxt->nbChars++;
1200 if (*ctxt->input->cur == 0)
1201 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1202 }
1203 } else {
1204 ctxt->input->cur++;
1205 ctxt->nbChars++;
1206 if (*ctxt->input->cur == 0)
1207 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1208 }
1209 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1210 xmlParserHandlePEReference(ctxt);
1211 if ((*ctxt->input->cur == 0) &&
1212 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1213 xmlPopInput(ctxt);
1214 return;
1215encoding_error:
1216 /*
1217 * If we detect an UTF8 error that probably mean that the
1218 * input encoding didn't get properly advertized in the
1219 * declaration header. Report the error and switch the encoding
1220 * to ISO-Latin-1 (if you don't like this policy, just declare the
1221 * encoding !)
1222 */
1223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1224 ctxt->sax->error(ctxt->userData,
1225 "Input is not proper UTF-8, indicate encoding !\n");
1226 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1227 ctxt->input->cur[0], ctxt->input->cur[1],
1228 ctxt->input->cur[2], ctxt->input->cur[3]);
1229 }
1230 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1231
1232 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1233 ctxt->input->cur++;
1234 return;
1235}
1236
1237/**
1238 * xmlCurrentChar:
1239 * @ctxt: the XML parser context
1240 * @len: pointer to the length of the char read
1241 *
1242 * The current char value, if using UTF-8 this may actaully span multiple
1243 * bytes in the input buffer. Implement the end of line normalization:
1244 * 2.11 End-of-Line Handling
1245 * Wherever an external parsed entity or the literal entity value
1246 * of an internal parsed entity contains either the literal two-character
1247 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1248 * must pass to the application the single character #xA.
1249 * This behavior can conveniently be produced by normalizing all
1250 * line breaks to #xA on input, before parsing.)
1251 *
1252 * Returns the current char value and its lenght
1253 */
1254
1255int
1256xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1257 if (ctxt->instate == XML_PARSER_EOF)
1258 return(0);
1259
1260 if (ctxt->token != 0) {
1261 *len = 0;
1262 return(ctxt->token);
1263 }
1264 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1265 *len = 1;
1266 return((int) *ctxt->input->cur);
1267 }
1268 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1269 /*
1270 * We are supposed to handle UTF8, check it's valid
1271 * From rfc2044: encoding of the Unicode values on UTF-8:
1272 *
1273 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1274 * 0000 0000-0000 007F 0xxxxxxx
1275 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1276 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1277 *
1278 * Check for the 0x110000 limit too
1279 */
1280 const unsigned char *cur = ctxt->input->cur;
1281 unsigned char c;
1282 unsigned int val;
1283
1284 c = *cur;
1285 if (c & 0x80) {
1286 if (cur[1] == 0)
1287 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1288 if ((cur[1] & 0xc0) != 0x80)
1289 goto encoding_error;
1290 if ((c & 0xe0) == 0xe0) {
1291
1292 if (cur[2] == 0)
1293 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1294 if ((cur[2] & 0xc0) != 0x80)
1295 goto encoding_error;
1296 if ((c & 0xf0) == 0xf0) {
1297 if (cur[3] == 0)
1298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1299 if (((c & 0xf8) != 0xf0) ||
1300 ((cur[3] & 0xc0) != 0x80))
1301 goto encoding_error;
1302 /* 4-byte code */
1303 *len = 4;
1304 val = (cur[0] & 0x7) << 18;
1305 val |= (cur[1] & 0x3f) << 12;
1306 val |= (cur[2] & 0x3f) << 6;
1307 val |= cur[3] & 0x3f;
1308 } else {
1309 /* 3-byte code */
1310 *len = 3;
1311 val = (cur[0] & 0xf) << 12;
1312 val |= (cur[1] & 0x3f) << 6;
1313 val |= cur[2] & 0x3f;
1314 }
1315 } else {
1316 /* 2-byte code */
1317 *len = 2;
1318 val = (cur[0] & 0x1f) << 6;
1319 val |= cur[1] & 0x3f;
1320 }
1321 if (!IS_CHAR(val)) {
1322 if ((ctxt->sax != NULL) &&
1323 (ctxt->sax->error != NULL))
1324 ctxt->sax->error(ctxt->userData,
1325 "Char 0x%X out of allowed range\n", val);
1326 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1327 ctxt->wellFormed = 0;
1328 ctxt->disableSAX = 1;
1329 }
1330 return(val);
1331 } else {
1332 /* 1-byte code */
1333 *len = 1;
1334 if (*ctxt->input->cur == 0xD) {
1335 if (ctxt->input->cur[1] == 0xA) {
1336 ctxt->nbChars++;
1337 ctxt->input->cur++;
1338 }
1339 return(0xA);
1340 }
1341 return((int) *ctxt->input->cur);
1342 }
1343 }
1344 /*
1345 * Assume it's a fixed lenght encoding (1) with
1346 * a compatibke encoding for the ASCII set, since
1347 * XML constructs only use < 128 chars
1348 */
1349 *len = 1;
1350 if (*ctxt->input->cur == 0xD) {
1351 if (ctxt->input->cur[1] == 0xA) {
1352 ctxt->nbChars++;
1353 ctxt->input->cur++;
1354 }
1355 return(0xA);
1356 }
1357 return((int) *ctxt->input->cur);
1358encoding_error:
1359 /*
1360 * If we detect an UTF8 error that probably mean that the
1361 * input encoding didn't get properly advertized in the
1362 * declaration header. Report the error and switch the encoding
1363 * to ISO-Latin-1 (if you don't like this policy, just declare the
1364 * encoding !)
1365 */
1366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1367 ctxt->sax->error(ctxt->userData,
1368 "Input is not proper UTF-8, indicate encoding !\n");
1369 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1370 ctxt->input->cur[0], ctxt->input->cur[1],
1371 ctxt->input->cur[2], ctxt->input->cur[3]);
1372 }
1373 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1374
1375 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1376 *len = 1;
1377 return((int) *ctxt->input->cur);
1378}
1379
1380/**
1381 * xmlStringCurrentChar:
1382 * @ctxt: the XML parser context
1383 * @cur: pointer to the beginning of the char
1384 * @len: pointer to the length of the char read
1385 *
1386 * The current char value, if using UTF-8 this may actaully span multiple
1387 * bytes in the input buffer.
1388 *
1389 * Returns the current char value and its lenght
1390 */
1391
1392int
1393xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1394 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1395 /*
1396 * We are supposed to handle UTF8, check it's valid
1397 * From rfc2044: encoding of the Unicode values on UTF-8:
1398 *
1399 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1400 * 0000 0000-0000 007F 0xxxxxxx
1401 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1402 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1403 *
1404 * Check for the 0x110000 limit too
1405 */
1406 unsigned char c;
1407 unsigned int val;
1408
1409 c = *cur;
1410 if (c & 0x80) {
1411 if ((cur[1] & 0xc0) != 0x80)
1412 goto encoding_error;
1413 if ((c & 0xe0) == 0xe0) {
1414
1415 if ((cur[2] & 0xc0) != 0x80)
1416 goto encoding_error;
1417 if ((c & 0xf0) == 0xf0) {
1418 if (((c & 0xf8) != 0xf0) ||
1419 ((cur[3] & 0xc0) != 0x80))
1420 goto encoding_error;
1421 /* 4-byte code */
1422 *len = 4;
1423 val = (cur[0] & 0x7) << 18;
1424 val |= (cur[1] & 0x3f) << 12;
1425 val |= (cur[2] & 0x3f) << 6;
1426 val |= cur[3] & 0x3f;
1427 } else {
1428 /* 3-byte code */
1429 *len = 3;
1430 val = (cur[0] & 0xf) << 12;
1431 val |= (cur[1] & 0x3f) << 6;
1432 val |= cur[2] & 0x3f;
1433 }
1434 } else {
1435 /* 2-byte code */
1436 *len = 2;
1437 val = (cur[0] & 0x1f) << 6;
1438 val |= cur[2] & 0x3f;
1439 }
1440 if (!IS_CHAR(val)) {
1441 if ((ctxt->sax != NULL) &&
1442 (ctxt->sax->error != NULL))
1443 ctxt->sax->error(ctxt->userData,
1444 "Char 0x%X out of allowed range\n", val);
1445 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1446 ctxt->wellFormed = 0;
1447 ctxt->disableSAX = 1;
1448 }
1449 return(val);
1450 } else {
1451 /* 1-byte code */
1452 *len = 1;
1453 return((int) *cur);
1454 }
1455 }
1456 /*
1457 * Assume it's a fixed lenght encoding (1) with
1458 * a compatibke encoding for the ASCII set, since
1459 * XML constructs only use < 128 chars
1460 */
1461 *len = 1;
1462 return((int) *cur);
1463encoding_error:
1464 /*
1465 * If we detect an UTF8 error that probably mean that the
1466 * input encoding didn't get properly advertized in the
1467 * declaration header. Report the error and switch the encoding
1468 * to ISO-Latin-1 (if you don't like this policy, just declare the
1469 * encoding !)
1470 */
1471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1472 ctxt->sax->error(ctxt->userData,
1473 "Input is not proper UTF-8, indicate encoding !\n");
1474 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1475 ctxt->input->cur[0], ctxt->input->cur[1],
1476 ctxt->input->cur[2], ctxt->input->cur[3]);
1477 }
1478 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1479
1480 *len = 1;
1481 return((int) *cur);
1482}
1483
1484/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001485 * xmlCopyCharMultiByte:
1486 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001487 * @val: the char value
1488 *
1489 * append the char value in the array
1490 *
1491 * Returns the number of xmlChar written
1492 */
Owen Taylor3473f882001-02-23 17:55:21 +00001493int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001494xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001495 /*
1496 * We are supposed to handle UTF8, check it's valid
1497 * From rfc2044: encoding of the Unicode values on UTF-8:
1498 *
1499 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1500 * 0000 0000-0000 007F 0xxxxxxx
1501 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1502 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1503 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001504 if (val >= 0x80) {
1505 xmlChar *savedout = out;
1506 int bits;
1507 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1508 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1509 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1510 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 xmlGenericError(xmlGenericErrorContext,
1512 "Internal error, xmlCopyChar 0x%X out of bound\n",
1513 val);
1514 return(0);
1515 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001516 for ( ; bits >= 0; bits-= 6)
1517 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1518 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001519 }
1520 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001521 return 1;
1522}
1523
1524/**
1525 * xmlCopyChar:
1526 * @len: Ignored, compatibility
1527 * @out: pointer to an arry of xmlChar
1528 * @val: the char value
1529 *
1530 * append the char value in the array
1531 *
1532 * Returns the number of xmlChar written
1533 */
1534
1535int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001536xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001537 /* the len parameter is ignored */
1538 if (val >= 0x80) {
1539 return(xmlCopyCharMultiByte (out, val));
1540 }
1541 *out = (xmlChar) val;
1542 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001543}
1544
1545/************************************************************************
1546 * *
1547 * Commodity functions to switch encodings *
1548 * *
1549 ************************************************************************/
1550
1551/**
1552 * xmlSwitchEncoding:
1553 * @ctxt: the parser context
1554 * @enc: the encoding value (number)
1555 *
1556 * change the input functions when discovering the character encoding
1557 * of a given entity.
1558 *
1559 * Returns 0 in case of success, -1 otherwise
1560 */
1561int
1562xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1563{
1564 xmlCharEncodingHandlerPtr handler;
1565
1566 switch (enc) {
1567 case XML_CHAR_ENCODING_ERROR:
1568 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1570 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1571 ctxt->wellFormed = 0;
1572 ctxt->disableSAX = 1;
1573 break;
1574 case XML_CHAR_ENCODING_NONE:
1575 /* let's assume it's UTF-8 without the XML decl */
1576 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1577 return(0);
1578 case XML_CHAR_ENCODING_UTF8:
1579 /* default encoding, no conversion should be needed */
1580 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1581 return(0);
1582 default:
1583 break;
1584 }
1585 handler = xmlGetCharEncodingHandler(enc);
1586 if (handler == NULL) {
1587 /*
1588 * Default handlers.
1589 */
1590 switch (enc) {
1591 case XML_CHAR_ENCODING_ERROR:
1592 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1594 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1595 ctxt->wellFormed = 0;
1596 ctxt->disableSAX = 1;
1597 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1598 break;
1599 case XML_CHAR_ENCODING_NONE:
1600 /* let's assume it's UTF-8 without the XML decl */
1601 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1602 return(0);
1603 case XML_CHAR_ENCODING_UTF8:
1604 case XML_CHAR_ENCODING_ASCII:
1605 /* default encoding, no conversion should be needed */
1606 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1607 return(0);
1608 case XML_CHAR_ENCODING_UTF16LE:
1609 break;
1610 case XML_CHAR_ENCODING_UTF16BE:
1611 break;
1612 case XML_CHAR_ENCODING_UCS4LE:
1613 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1615 ctxt->sax->error(ctxt->userData,
1616 "char encoding USC4 little endian not supported\n");
1617 break;
1618 case XML_CHAR_ENCODING_UCS4BE:
1619 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1621 ctxt->sax->error(ctxt->userData,
1622 "char encoding USC4 big endian not supported\n");
1623 break;
1624 case XML_CHAR_ENCODING_EBCDIC:
1625 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1627 ctxt->sax->error(ctxt->userData,
1628 "char encoding EBCDIC not supported\n");
1629 break;
1630 case XML_CHAR_ENCODING_UCS4_2143:
1631 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1633 ctxt->sax->error(ctxt->userData,
1634 "char encoding UCS4 2143 not supported\n");
1635 break;
1636 case XML_CHAR_ENCODING_UCS4_3412:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding UCS4 3412 not supported\n");
1641 break;
1642 case XML_CHAR_ENCODING_UCS2:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding UCS2 not supported\n");
1647 break;
1648 case XML_CHAR_ENCODING_8859_1:
1649 case XML_CHAR_ENCODING_8859_2:
1650 case XML_CHAR_ENCODING_8859_3:
1651 case XML_CHAR_ENCODING_8859_4:
1652 case XML_CHAR_ENCODING_8859_5:
1653 case XML_CHAR_ENCODING_8859_6:
1654 case XML_CHAR_ENCODING_8859_7:
1655 case XML_CHAR_ENCODING_8859_8:
1656 case XML_CHAR_ENCODING_8859_9:
1657 /*
1658 * We used to keep the internal content in the
1659 * document encoding however this turns being unmaintainable
1660 * So xmlGetCharEncodingHandler() will return non-null
1661 * values for this now.
1662 */
1663 if ((ctxt->inputNr == 1) &&
1664 (ctxt->encoding == NULL) &&
1665 (ctxt->input->encoding != NULL)) {
1666 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1667 }
1668 ctxt->charset = enc;
1669 return(0);
1670 case XML_CHAR_ENCODING_2022_JP:
1671 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1673 ctxt->sax->error(ctxt->userData,
1674 "char encoding ISO-2022-JPnot supported\n");
1675 break;
1676 case XML_CHAR_ENCODING_SHIFT_JIS:
1677 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1679 ctxt->sax->error(ctxt->userData,
1680 "char encoding Shift_JIS not supported\n");
1681 break;
1682 case XML_CHAR_ENCODING_EUC_JP:
1683 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1685 ctxt->sax->error(ctxt->userData,
1686 "char encoding EUC-JPnot supported\n");
1687 break;
1688 }
1689 }
1690 if (handler == NULL)
1691 return(-1);
1692 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1693 return(xmlSwitchToEncoding(ctxt, handler));
1694}
1695
1696/**
1697 * xmlSwitchToEncoding:
1698 * @ctxt: the parser context
1699 * @handler: the encoding handler
1700 *
1701 * change the input functions when discovering the character encoding
1702 * of a given entity.
1703 *
1704 * Returns 0 in case of success, -1 otherwise
1705 */
1706int
1707xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1708{
1709 int nbchars;
1710
1711 if (handler != NULL) {
1712 if (ctxt->input != NULL) {
1713 if (ctxt->input->buf != NULL) {
1714 if (ctxt->input->buf->encoder != NULL) {
1715 if (ctxt->input->buf->encoder == handler)
1716 return(0);
1717 /*
1718 * Note: this is a bit dangerous, but that's what it
1719 * takes to use nearly compatible signature for different
1720 * encodings.
1721 */
1722 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1723 ctxt->input->buf->encoder = handler;
1724 return(0);
1725 }
1726 ctxt->input->buf->encoder = handler;
1727
1728 /*
1729 * Is there already some content down the pipe to convert ?
1730 */
1731 if ((ctxt->input->buf->buffer != NULL) &&
1732 (ctxt->input->buf->buffer->use > 0)) {
1733 int processed;
1734
1735 /*
1736 * Specific handling of the Byte Order Mark for
1737 * UTF-16
1738 */
1739 if ((handler->name != NULL) &&
1740 (!strcmp(handler->name, "UTF-16LE")) &&
1741 (ctxt->input->cur[0] == 0xFF) &&
1742 (ctxt->input->cur[1] == 0xFE)) {
1743 ctxt->input->cur += 2;
1744 }
1745 if ((handler->name != NULL) &&
1746 (!strcmp(handler->name, "UTF-16BE")) &&
1747 (ctxt->input->cur[0] == 0xFE) &&
1748 (ctxt->input->cur[1] == 0xFF)) {
1749 ctxt->input->cur += 2;
1750 }
1751
1752 /*
1753 * Shring the current input buffer.
1754 * Move it as the raw buffer and create a new input buffer
1755 */
1756 processed = ctxt->input->cur - ctxt->input->base;
1757 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1758 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1759 ctxt->input->buf->buffer = xmlBufferCreate();
1760
1761 if (ctxt->html) {
1762 /*
1763 * converst as much as possbile of the buffer
1764 */
1765 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1766 ctxt->input->buf->buffer,
1767 ctxt->input->buf->raw);
1768 } else {
1769 /*
1770 * convert just enough to get
1771 * '<?xml version="1.0" encoding="xxx"?>'
1772 * parsed with the autodetected encoding
1773 * into the parser reading buffer.
1774 */
1775 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1776 ctxt->input->buf->buffer,
1777 ctxt->input->buf->raw);
1778 }
1779 if (nbchars < 0) {
1780 xmlGenericError(xmlGenericErrorContext,
1781 "xmlSwitchToEncoding: encoder error\n");
1782 return(-1);
1783 }
1784 ctxt->input->base =
1785 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001786 ctxt->input->end =
1787 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001788
1789 }
1790 return(0);
1791 } else {
1792 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1793 /*
1794 * When parsing a static memory array one must know the
1795 * size to be able to convert the buffer.
1796 */
1797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798 ctxt->sax->error(ctxt->userData,
1799 "xmlSwitchEncoding : no input\n");
1800 return(-1);
1801 } else {
1802 int processed;
1803
1804 /*
1805 * Shring the current input buffer.
1806 * Move it as the raw buffer and create a new input buffer
1807 */
1808 processed = ctxt->input->cur - ctxt->input->base;
1809
1810 ctxt->input->buf->raw = xmlBufferCreate();
1811 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1812 ctxt->input->length - processed);
1813 ctxt->input->buf->buffer = xmlBufferCreate();
1814
1815 /*
1816 * convert as much as possible of the raw input
1817 * to the parser reading buffer.
1818 */
1819 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1820 ctxt->input->buf->buffer,
1821 ctxt->input->buf->raw);
1822 if (nbchars < 0) {
1823 xmlGenericError(xmlGenericErrorContext,
1824 "xmlSwitchToEncoding: encoder error\n");
1825 return(-1);
1826 }
1827
1828 /*
1829 * Conversion succeeded, get rid of the old buffer
1830 */
1831 if ((ctxt->input->free != NULL) &&
1832 (ctxt->input->base != NULL))
1833 ctxt->input->free((xmlChar *) ctxt->input->base);
1834 ctxt->input->base =
1835 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001836 ctxt->input->end =
1837 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001838 }
1839 }
1840 } else {
1841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1842 ctxt->sax->error(ctxt->userData,
1843 "xmlSwitchEncoding : no input\n");
1844 return(-1);
1845 }
1846 /*
1847 * The parsing is now done in UTF8 natively
1848 */
1849 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1850 } else
1851 return(-1);
1852 return(0);
1853
1854}
1855
1856/************************************************************************
1857 * *
1858 * Commodity functions to handle entities processing *
1859 * *
1860 ************************************************************************/
1861
1862/**
1863 * xmlFreeInputStream:
1864 * @input: an xmlParserInputPtr
1865 *
1866 * Free up an input stream.
1867 */
1868void
1869xmlFreeInputStream(xmlParserInputPtr input) {
1870 if (input == NULL) return;
1871
1872 if (input->filename != NULL) xmlFree((char *) input->filename);
1873 if (input->directory != NULL) xmlFree((char *) input->directory);
1874 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1875 if (input->version != NULL) xmlFree((char *) input->version);
1876 if ((input->free != NULL) && (input->base != NULL))
1877 input->free((xmlChar *) input->base);
1878 if (input->buf != NULL)
1879 xmlFreeParserInputBuffer(input->buf);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001880 MEM_CLEANUP(input, sizeof(xmlParserInput));
Owen Taylor3473f882001-02-23 17:55:21 +00001881 xmlFree(input);
1882}
1883
1884/**
1885 * xmlNewInputStream:
1886 * @ctxt: an XML parser context
1887 *
1888 * Create a new input stream structure
1889 * Returns the new input stream or NULL
1890 */
1891xmlParserInputPtr
1892xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1893 xmlParserInputPtr input;
1894
1895 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1896 if (input == NULL) {
1897 if (ctxt != NULL) {
1898 ctxt->errNo = XML_ERR_NO_MEMORY;
1899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1900 ctxt->sax->error(ctxt->userData,
1901 "malloc: couldn't allocate a new input stream\n");
1902 ctxt->errNo = XML_ERR_NO_MEMORY;
1903 }
1904 return(NULL);
1905 }
1906 memset(input, 0, sizeof(xmlParserInput));
1907 input->line = 1;
1908 input->col = 1;
1909 input->standalone = -1;
1910 return(input);
1911}
1912
1913/**
1914 * xmlNewIOInputStream:
1915 * @ctxt: an XML parser context
1916 * @input: an I/O Input
1917 * @enc: the charset encoding if known
1918 *
1919 * Create a new input stream structure encapsulating the @input into
1920 * a stream suitable for the parser.
1921 *
1922 * Returns the new input stream or NULL
1923 */
1924xmlParserInputPtr
1925xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1926 xmlCharEncoding enc) {
1927 xmlParserInputPtr inputStream;
1928
1929 if (xmlParserDebugEntities)
1930 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1931 inputStream = xmlNewInputStream(ctxt);
1932 if (inputStream == NULL) {
1933 return(NULL);
1934 }
1935 inputStream->filename = NULL;
1936 inputStream->buf = input;
1937 inputStream->base = inputStream->buf->buffer->content;
1938 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001939 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001940 if (enc != XML_CHAR_ENCODING_NONE) {
1941 xmlSwitchEncoding(ctxt, enc);
1942 }
1943
1944 return(inputStream);
1945}
1946
1947/**
1948 * xmlNewEntityInputStream:
1949 * @ctxt: an XML parser context
1950 * @entity: an Entity pointer
1951 *
1952 * Create a new input stream based on an xmlEntityPtr
1953 *
1954 * Returns the new input stream or NULL
1955 */
1956xmlParserInputPtr
1957xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1958 xmlParserInputPtr input;
1959
1960 if (entity == NULL) {
1961 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1963 ctxt->sax->error(ctxt->userData,
1964 "internal: xmlNewEntityInputStream entity = NULL\n");
1965 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1966 return(NULL);
1967 }
1968 if (xmlParserDebugEntities)
1969 xmlGenericError(xmlGenericErrorContext,
1970 "new input from entity: %s\n", entity->name);
1971 if (entity->content == NULL) {
1972 switch (entity->etype) {
1973 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1974 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1976 ctxt->sax->error(ctxt->userData,
1977 "xmlNewEntityInputStream unparsed entity !\n");
1978 break;
1979 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1980 case XML_EXTERNAL_PARAMETER_ENTITY:
1981 return(xmlLoadExternalEntity((char *) entity->URI,
1982 (char *) entity->ExternalID, ctxt));
1983 case XML_INTERNAL_GENERAL_ENTITY:
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
1986 "Internal entity %s without content !\n", entity->name);
1987 break;
1988 case XML_INTERNAL_PARAMETER_ENTITY:
1989 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1991 ctxt->sax->error(ctxt->userData,
1992 "Internal parameter entity %s without content !\n", entity->name);
1993 break;
1994 case XML_INTERNAL_PREDEFINED_ENTITY:
1995 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "Predefined entity %s without content !\n", entity->name);
1999 break;
2000 }
2001 return(NULL);
2002 }
2003 input = xmlNewInputStream(ctxt);
2004 if (input == NULL) {
2005 return(NULL);
2006 }
2007 input->filename = (char *) entity->URI;
2008 input->base = entity->content;
2009 input->cur = entity->content;
2010 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002011 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002012 return(input);
2013}
2014
2015/**
2016 * xmlNewStringInputStream:
2017 * @ctxt: an XML parser context
2018 * @buffer: an memory buffer
2019 *
2020 * Create a new input stream based on a memory buffer.
2021 * Returns the new input stream
2022 */
2023xmlParserInputPtr
2024xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2025 xmlParserInputPtr input;
2026
2027 if (buffer == NULL) {
2028 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2030 ctxt->sax->error(ctxt->userData,
2031 "internal: xmlNewStringInputStream string = NULL\n");
2032 return(NULL);
2033 }
2034 if (xmlParserDebugEntities)
2035 xmlGenericError(xmlGenericErrorContext,
2036 "new fixed input: %.30s\n", buffer);
2037 input = xmlNewInputStream(ctxt);
2038 if (input == NULL) {
2039 return(NULL);
2040 }
2041 input->base = buffer;
2042 input->cur = buffer;
2043 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002044 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002045 return(input);
2046}
2047
2048/**
2049 * xmlNewInputFromFile:
2050 * @ctxt: an XML parser context
2051 * @filename: the filename to use as entity
2052 *
2053 * Create a new input stream based on a file.
2054 *
2055 * Returns the new input stream or NULL in case of error
2056 */
2057xmlParserInputPtr
2058xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2059 xmlParserInputBufferPtr buf;
2060 xmlParserInputPtr inputStream;
2061 char *directory = NULL;
2062 xmlChar *URI = NULL;
2063
2064 if (xmlParserDebugEntities)
2065 xmlGenericError(xmlGenericErrorContext,
2066 "new input from file: %s\n", filename);
2067 if (ctxt == NULL) return(NULL);
2068 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2069 if (buf == NULL)
2070 return(NULL);
2071
2072 URI = xmlStrdup((xmlChar *) filename);
2073 directory = xmlParserGetDirectory((const char *) URI);
2074
2075 inputStream = xmlNewInputStream(ctxt);
2076 if (inputStream == NULL) {
2077 if (directory != NULL) xmlFree((char *) directory);
2078 if (URI != NULL) xmlFree((char *) URI);
2079 return(NULL);
2080 }
2081
2082 inputStream->filename = (const char *) URI;
2083 inputStream->directory = directory;
2084 inputStream->buf = buf;
2085
2086 inputStream->base = inputStream->buf->buffer->content;
2087 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002088 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002089 if ((ctxt->directory == NULL) && (directory != NULL))
2090 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2091 return(inputStream);
2092}
2093
2094/************************************************************************
2095 * *
2096 * Commodity functions to handle parser contexts *
2097 * *
2098 ************************************************************************/
2099
2100/**
2101 * xmlInitParserCtxt:
2102 * @ctxt: an XML parser context
2103 *
2104 * Initialize a parser context
2105 */
2106
2107void
2108xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2109{
2110 xmlSAXHandler *sax;
2111
2112 xmlDefaultSAXHandlerInit();
2113
2114 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2115 if (sax == NULL) {
2116 xmlGenericError(xmlGenericErrorContext,
2117 "xmlInitParserCtxt: out of memory\n");
2118 }
2119 else
2120 memset(sax, 0, sizeof(xmlSAXHandler));
2121
2122 /* Allocate the Input stack */
2123 ctxt->inputTab = (xmlParserInputPtr *)
2124 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2125 if (ctxt->inputTab == NULL) {
2126 xmlGenericError(xmlGenericErrorContext,
2127 "xmlInitParserCtxt: out of memory\n");
2128 ctxt->inputNr = 0;
2129 ctxt->inputMax = 0;
2130 ctxt->input = NULL;
2131 return;
2132 }
2133 ctxt->inputNr = 0;
2134 ctxt->inputMax = 5;
2135 ctxt->input = NULL;
2136
2137 ctxt->version = NULL;
2138 ctxt->encoding = NULL;
2139 ctxt->standalone = -1;
2140 ctxt->hasExternalSubset = 0;
2141 ctxt->hasPErefs = 0;
2142 ctxt->html = 0;
2143 ctxt->external = 0;
2144 ctxt->instate = XML_PARSER_START;
2145 ctxt->token = 0;
2146 ctxt->directory = NULL;
2147
2148 /* Allocate the Node stack */
2149 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2150 if (ctxt->nodeTab == NULL) {
2151 xmlGenericError(xmlGenericErrorContext,
2152 "xmlInitParserCtxt: out of memory\n");
2153 ctxt->nodeNr = 0;
2154 ctxt->nodeMax = 0;
2155 ctxt->node = NULL;
2156 ctxt->inputNr = 0;
2157 ctxt->inputMax = 0;
2158 ctxt->input = NULL;
2159 return;
2160 }
2161 ctxt->nodeNr = 0;
2162 ctxt->nodeMax = 10;
2163 ctxt->node = NULL;
2164
2165 /* Allocate the Name stack */
2166 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2167 if (ctxt->nameTab == NULL) {
2168 xmlGenericError(xmlGenericErrorContext,
2169 "xmlInitParserCtxt: out of memory\n");
2170 ctxt->nodeNr = 0;
2171 ctxt->nodeMax = 0;
2172 ctxt->node = NULL;
2173 ctxt->inputNr = 0;
2174 ctxt->inputMax = 0;
2175 ctxt->input = NULL;
2176 ctxt->nameNr = 0;
2177 ctxt->nameMax = 0;
2178 ctxt->name = NULL;
2179 return;
2180 }
2181 ctxt->nameNr = 0;
2182 ctxt->nameMax = 10;
2183 ctxt->name = NULL;
2184
2185 /* Allocate the space stack */
2186 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2187 if (ctxt->spaceTab == NULL) {
2188 xmlGenericError(xmlGenericErrorContext,
2189 "xmlInitParserCtxt: out of memory\n");
2190 ctxt->nodeNr = 0;
2191 ctxt->nodeMax = 0;
2192 ctxt->node = NULL;
2193 ctxt->inputNr = 0;
2194 ctxt->inputMax = 0;
2195 ctxt->input = NULL;
2196 ctxt->nameNr = 0;
2197 ctxt->nameMax = 0;
2198 ctxt->name = NULL;
2199 ctxt->spaceNr = 0;
2200 ctxt->spaceMax = 0;
2201 ctxt->space = NULL;
2202 return;
2203 }
2204 ctxt->spaceNr = 1;
2205 ctxt->spaceMax = 10;
2206 ctxt->spaceTab[0] = -1;
2207 ctxt->space = &ctxt->spaceTab[0];
2208
Daniel Veillard14be0a12001-03-03 18:50:55 +00002209 ctxt->sax = sax;
2210 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2211
Owen Taylor3473f882001-02-23 17:55:21 +00002212 ctxt->userData = ctxt;
2213 ctxt->myDoc = NULL;
2214 ctxt->wellFormed = 1;
2215 ctxt->valid = 1;
2216 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2217 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2218 ctxt->pedantic = xmlPedanticParserDefaultValue;
2219 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2220 ctxt->vctxt.userData = ctxt;
2221 if (ctxt->validate) {
2222 ctxt->vctxt.error = xmlParserValidityError;
2223 if (xmlGetWarningsDefaultValue == 0)
2224 ctxt->vctxt.warning = NULL;
2225 else
2226 ctxt->vctxt.warning = xmlParserValidityWarning;
2227 /* Allocate the Node stack */
2228 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2229 if (ctxt->vctxt.nodeTab == NULL) {
2230 xmlGenericError(xmlGenericErrorContext,
2231 "xmlInitParserCtxt: out of memory\n");
2232 ctxt->vctxt.nodeMax = 0;
2233 ctxt->validate = 0;
2234 ctxt->vctxt.error = NULL;
2235 ctxt->vctxt.warning = NULL;
2236 } else {
2237 ctxt->vctxt.nodeNr = 0;
2238 ctxt->vctxt.nodeMax = 4;
2239 ctxt->vctxt.node = NULL;
2240 }
2241 } else {
2242 ctxt->vctxt.error = NULL;
2243 ctxt->vctxt.warning = NULL;
2244 }
2245 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2246 ctxt->record_info = 0;
2247 ctxt->nbChars = 0;
2248 ctxt->checkIndex = 0;
2249 ctxt->inSubset = 0;
2250 ctxt->errNo = XML_ERR_OK;
2251 ctxt->depth = 0;
2252 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2253 xmlInitNodeInfoSeq(&ctxt->node_seq);
2254}
2255
2256/**
2257 * xmlFreeParserCtxt:
2258 * @ctxt: an XML parser context
2259 *
2260 * Free all the memory used by a parser context. However the parsed
2261 * document in ctxt->myDoc is not freed.
2262 */
2263
2264void
2265xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2266{
2267 xmlParserInputPtr input;
2268 xmlChar *oldname;
2269
2270 if (ctxt == NULL) return;
2271
2272 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2273 xmlFreeInputStream(input);
2274 }
2275 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2276 xmlFree(oldname);
2277 }
2278 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2279 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2280 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2281 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2282 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2283 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2284 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2285 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2286 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2287 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2288 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2289 xmlFree(ctxt->sax);
2290 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2291 xmlFree(ctxt);
2292}
2293
2294/**
2295 * xmlNewParserCtxt:
2296 *
2297 * Allocate and initialize a new parser context.
2298 *
2299 * Returns the xmlParserCtxtPtr or NULL
2300 */
2301
2302xmlParserCtxtPtr
2303xmlNewParserCtxt()
2304{
2305 xmlParserCtxtPtr ctxt;
2306
2307 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2308 if (ctxt == NULL) {
2309 xmlGenericError(xmlGenericErrorContext,
2310 "xmlNewParserCtxt : cannot allocate context\n");
2311 perror("malloc");
2312 return(NULL);
2313 }
2314 memset(ctxt, 0, sizeof(xmlParserCtxt));
2315 xmlInitParserCtxt(ctxt);
2316 return(ctxt);
2317}
2318
2319/************************************************************************
2320 * *
2321 * Handling of node informations *
2322 * *
2323 ************************************************************************/
2324
2325/**
2326 * xmlClearParserCtxt:
2327 * @ctxt: an XML parser context
2328 *
2329 * Clear (release owned resources) and reinitialize a parser context
2330 */
2331
2332void
2333xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2334{
2335 xmlClearNodeInfoSeq(&ctxt->node_seq);
2336 xmlInitParserCtxt(ctxt);
2337}
2338
2339/**
2340 * xmlParserFindNodeInfo:
2341 * @ctxt: an XML parser context
2342 * @node: an XML node within the tree
2343 *
2344 * Find the parser node info struct for a given node
2345 *
2346 * Returns an xmlParserNodeInfo block pointer or NULL
2347 */
2348const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2349 const xmlNode* node)
2350{
2351 unsigned long pos;
2352
2353 /* Find position where node should be at */
2354 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2355 if ( ctx->node_seq.buffer[pos].node == node )
2356 return &ctx->node_seq.buffer[pos];
2357 else
2358 return NULL;
2359}
2360
2361
2362/**
2363 * xmlInitNodeInfoSeq:
2364 * @seq: a node info sequence pointer
2365 *
2366 * -- Initialize (set to initial state) node info sequence
2367 */
2368void
2369xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2370{
2371 seq->length = 0;
2372 seq->maximum = 0;
2373 seq->buffer = NULL;
2374}
2375
2376/**
2377 * xmlClearNodeInfoSeq:
2378 * @seq: a node info sequence pointer
2379 *
2380 * -- Clear (release memory and reinitialize) node
2381 * info sequence
2382 */
2383void
2384xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2385{
2386 if ( seq->buffer != NULL )
2387 xmlFree(seq->buffer);
2388 xmlInitNodeInfoSeq(seq);
2389}
2390
2391
2392/**
2393 * xmlParserFindNodeInfoIndex:
2394 * @seq: a node info sequence pointer
2395 * @node: an XML node pointer
2396 *
2397 *
2398 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2399 * the given node is or should be at in a sorted sequence
2400 *
2401 * Returns a long indicating the position of the record
2402 */
2403unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2404 const xmlNode* node)
2405{
2406 unsigned long upper, lower, middle;
2407 int found = 0;
2408
2409 /* Do a binary search for the key */
2410 lower = 1;
2411 upper = seq->length;
2412 middle = 0;
2413 while ( lower <= upper && !found) {
2414 middle = lower + (upper - lower) / 2;
2415 if ( node == seq->buffer[middle - 1].node )
2416 found = 1;
2417 else if ( node < seq->buffer[middle - 1].node )
2418 upper = middle - 1;
2419 else
2420 lower = middle + 1;
2421 }
2422
2423 /* Return position */
2424 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2425 return middle;
2426 else
2427 return middle - 1;
2428}
2429
2430
2431/**
2432 * xmlParserAddNodeInfo:
2433 * @ctxt: an XML parser context
2434 * @info: a node info sequence pointer
2435 *
2436 * Insert node info record into the sorted sequence
2437 */
2438void
2439xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2440 const xmlParserNodeInfo* info)
2441{
2442 unsigned long pos;
2443 static unsigned int block_size = 5;
2444
2445 /* Find pos and check to see if node is already in the sequence */
2446 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2447 if ( pos < ctxt->node_seq.length
2448 && ctxt->node_seq.buffer[pos].node == info->node ) {
2449 ctxt->node_seq.buffer[pos] = *info;
2450 }
2451
2452 /* Otherwise, we need to add new node to buffer */
2453 else {
2454 /* Expand buffer by 5 if needed */
2455 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2456 xmlParserNodeInfo* tmp_buffer;
2457 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2458 *(ctxt->node_seq.maximum + block_size));
2459
2460 if ( ctxt->node_seq.buffer == NULL )
2461 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2462 else
2463 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2464
2465 if ( tmp_buffer == NULL ) {
2466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2467 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2468 ctxt->errNo = XML_ERR_NO_MEMORY;
2469 return;
2470 }
2471 ctxt->node_seq.buffer = tmp_buffer;
2472 ctxt->node_seq.maximum += block_size;
2473 }
2474
2475 /* If position is not at end, move elements out of the way */
2476 if ( pos != ctxt->node_seq.length ) {
2477 unsigned long i;
2478
2479 for ( i = ctxt->node_seq.length; i > pos; i-- )
2480 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2481 }
2482
2483 /* Copy element and increase length */
2484 ctxt->node_seq.buffer[pos] = *info;
2485 ctxt->node_seq.length++;
2486 }
2487}
2488
2489/************************************************************************
2490 * *
2491 * Deprecated functions kept for compatibility *
2492 * *
2493 ************************************************************************/
2494
2495/*
2496 * xmlCheckLanguageID
2497 * @lang: pointer to the string value
2498 *
2499 * Checks that the value conforms to the LanguageID production:
2500 *
2501 * NOTE: this is somewhat deprecated, those productions were removed from
2502 * the XML Second edition.
2503 *
2504 * [33] LanguageID ::= Langcode ('-' Subcode)*
2505 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2506 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2507 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2508 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2509 * [38] Subcode ::= ([a-z] | [A-Z])+
2510 *
2511 * Returns 1 if correct 0 otherwise
2512 **/
2513int
2514xmlCheckLanguageID(const xmlChar *lang) {
2515 const xmlChar *cur = lang;
2516
2517 if (cur == NULL)
2518 return(0);
2519 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2520 ((cur[0] == 'I') && (cur[1] == '-'))) {
2521 /*
2522 * IANA code
2523 */
2524 cur += 2;
2525 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2526 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2527 cur++;
2528 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2529 ((cur[0] == 'X') && (cur[1] == '-'))) {
2530 /*
2531 * User code
2532 */
2533 cur += 2;
2534 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2535 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2536 cur++;
2537 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2538 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2539 /*
2540 * ISO639
2541 */
2542 cur++;
2543 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2544 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2545 cur++;
2546 else
2547 return(0);
2548 } else
2549 return(0);
2550 while (cur[0] != 0) { /* non input consuming */
2551 if (cur[0] != '-')
2552 return(0);
2553 cur++;
2554 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2555 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2556 cur++;
2557 else
2558 return(0);
2559 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2560 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2561 cur++;
2562 }
2563 return(1);
2564}
2565
2566/**
2567 * xmlDecodeEntities:
2568 * @ctxt: the parser context
2569 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2570 * @len: the len to decode (in bytes !), -1 for no size limit
2571 * @end: an end marker xmlChar, 0 if none
2572 * @end2: an end marker xmlChar, 0 if none
2573 * @end3: an end marker xmlChar, 0 if none
2574 *
2575 * This function is deprecated, we now always process entities content
2576 * through xmlStringDecodeEntities
2577 *
2578 * TODO: remove it in next major release.
2579 *
2580 * [67] Reference ::= EntityRef | CharRef
2581 *
2582 * [69] PEReference ::= '%' Name ';'
2583 *
2584 * Returns A newly allocated string with the substitution done. The caller
2585 * must deallocate it !
2586 */
2587xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002588xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2589 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002590#if 0
2591 xmlChar *buffer = NULL;
2592 unsigned int buffer_size = 0;
2593 unsigned int nbchars = 0;
2594
2595 xmlChar *current = NULL;
2596 xmlEntityPtr ent;
2597 unsigned int max = (unsigned int) len;
2598 int c,l;
2599#endif
2600
2601 static int deprecated = 0;
2602 if (!deprecated) {
2603 xmlGenericError(xmlGenericErrorContext,
2604 "xmlDecodeEntities() deprecated function reached\n");
2605 deprecated = 1;
2606 }
2607
2608#if 0
2609 if (ctxt->depth > 40) {
2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2611 ctxt->sax->error(ctxt->userData,
2612 "Detected entity reference loop\n");
2613 ctxt->wellFormed = 0;
2614 ctxt->disableSAX = 1;
2615 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2616 return(NULL);
2617 }
2618
2619 /*
2620 * allocate a translation buffer.
2621 */
2622 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2623 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2624 if (buffer == NULL) {
2625 perror("xmlDecodeEntities: malloc failed");
2626 return(NULL);
2627 }
2628
2629 /*
2630 * Ok loop until we reach one of the ending char or a size limit.
2631 */
2632 GROW;
2633 c = CUR_CHAR(l);
2634 while ((nbchars < max) && (c != end) && /* NOTUSED */
2635 (c != end2) && (c != end3)) {
2636 GROW;
2637 if (c == 0) break;
2638 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2639 int val = xmlParseCharRef(ctxt);
2640 COPY_BUF(0,buffer,nbchars,val);
2641 NEXTL(l);
2642 } else if ((c == '&') && (ctxt->token != '&') &&
2643 (what & XML_SUBSTITUTE_REF)) {
2644 if (xmlParserDebugEntities)
2645 xmlGenericError(xmlGenericErrorContext,
2646 "decoding Entity Reference\n");
2647 ent = xmlParseEntityRef(ctxt);
2648 if ((ent != NULL) &&
2649 (ctxt->replaceEntities != 0)) {
2650 current = ent->content;
2651 while (*current != 0) { /* non input consuming loop */
2652 buffer[nbchars++] = *current++;
2653 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2654 growBuffer(buffer);
2655 }
2656 }
2657 } else if (ent != NULL) {
2658 const xmlChar *cur = ent->name;
2659
2660 buffer[nbchars++] = '&';
2661 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2662 growBuffer(buffer);
2663 }
2664 while (*cur != 0) { /* non input consuming loop */
2665 buffer[nbchars++] = *cur++;
2666 }
2667 buffer[nbchars++] = ';';
2668 }
2669 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2670 /*
2671 * a PEReference induce to switch the entity flow,
2672 * we break here to flush the current set of chars
2673 * parsed if any. We will be called back later.
2674 */
2675 if (xmlParserDebugEntities)
2676 xmlGenericError(xmlGenericErrorContext,
2677 "decoding PE Reference\n");
2678 if (nbchars != 0) break;
2679
2680 xmlParsePEReference(ctxt);
2681
2682 /*
2683 * Pop-up of finished entities.
2684 */
2685 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2686 xmlPopInput(ctxt);
2687
2688 break;
2689 } else {
2690 COPY_BUF(l,buffer,nbchars,c);
2691 NEXTL(l);
2692 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2693 growBuffer(buffer);
2694 }
2695 }
2696 c = CUR_CHAR(l);
2697 }
2698 buffer[nbchars++] = 0;
2699 return(buffer);
2700#endif
2701 return(NULL);
2702}
2703
2704/**
2705 * xmlNamespaceParseNCName:
2706 * @ctxt: an XML parser context
2707 *
2708 * parse an XML namespace name.
2709 *
2710 * TODO: this seems not in use anymore, the namespace handling is done on
2711 * top of the SAX interfaces, i.e. not on raw input.
2712 *
2713 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2714 *
2715 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2716 * CombiningChar | Extender
2717 *
2718 * Returns the namespace name or NULL
2719 */
2720
2721xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002722xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723#if 0
2724 xmlChar buf[XML_MAX_NAMELEN + 5];
2725 int len = 0, l;
2726 int cur = CUR_CHAR(l);
2727#endif
2728
2729 static int deprecated = 0;
2730 if (!deprecated) {
2731 xmlGenericError(xmlGenericErrorContext,
2732 "xmlNamespaceParseNCName() deprecated function reached\n");
2733 deprecated = 1;
2734 }
2735
2736#if 0
2737 /* load first the value of the char !!! */
2738 GROW;
2739 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2740
2741xmlGenericError(xmlGenericErrorContext,
2742 "xmlNamespaceParseNCName: reached loop 3\n");
2743 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2744 (cur == '.') || (cur == '-') ||
2745 (cur == '_') ||
2746 (IS_COMBINING(cur)) ||
2747 (IS_EXTENDER(cur))) {
2748 COPY_BUF(l,buf,len,cur);
2749 NEXTL(l);
2750 cur = CUR_CHAR(l);
2751 if (len >= XML_MAX_NAMELEN) {
2752 xmlGenericError(xmlGenericErrorContext,
2753 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2754 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2755 (cur == '.') || (cur == '-') ||
2756 (cur == '_') ||
2757 (IS_COMBINING(cur)) ||
2758 (IS_EXTENDER(cur))) {
2759 NEXTL(l);
2760 cur = CUR_CHAR(l);
2761 }
2762 break;
2763 }
2764 }
2765 return(xmlStrndup(buf, len));
2766#endif
2767 return(NULL);
2768}
2769
2770/**
2771 * xmlNamespaceParseQName:
2772 * @ctxt: an XML parser context
2773 * @prefix: a xmlChar **
2774 *
2775 * TODO: this seems not in use anymore, the namespace handling is done on
2776 * top of the SAX interfaces, i.e. not on raw input.
2777 *
2778 * parse an XML qualified name
2779 *
2780 * [NS 5] QName ::= (Prefix ':')? LocalPart
2781 *
2782 * [NS 6] Prefix ::= NCName
2783 *
2784 * [NS 7] LocalPart ::= NCName
2785 *
2786 * Returns the local part, and prefix is updated
2787 * to get the Prefix if any.
2788 */
2789
2790xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002791xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002792
2793 static int deprecated = 0;
2794 if (!deprecated) {
2795 xmlGenericError(xmlGenericErrorContext,
2796 "xmlNamespaceParseQName() deprecated function reached\n");
2797 deprecated = 1;
2798 }
2799
2800#if 0
2801 xmlChar *ret = NULL;
2802
2803 *prefix = NULL;
2804 ret = xmlNamespaceParseNCName(ctxt);
2805 if (RAW == ':') {
2806 *prefix = ret;
2807 NEXT;
2808 ret = xmlNamespaceParseNCName(ctxt);
2809 }
2810
2811 return(ret);
2812#endif
2813 return(NULL);
2814}
2815
2816/**
2817 * xmlNamespaceParseNSDef:
2818 * @ctxt: an XML parser context
2819 *
2820 * parse a namespace prefix declaration
2821 *
2822 * TODO: this seems not in use anymore, the namespace handling is done on
2823 * top of the SAX interfaces, i.e. not on raw input.
2824 *
2825 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2826 *
2827 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2828 *
2829 * Returns the namespace name
2830 */
2831
2832xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002833xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 static int deprecated = 0;
2835 if (!deprecated) {
2836 xmlGenericError(xmlGenericErrorContext,
2837 "xmlNamespaceParseNSDef() deprecated function reached\n");
2838 deprecated = 1;
2839 }
2840 return(NULL);
2841#if 0
2842 xmlChar *name = NULL;
2843
2844 if ((RAW == 'x') && (NXT(1) == 'm') &&
2845 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2846 (NXT(4) == 's')) {
2847 SKIP(5);
2848 if (RAW == ':') {
2849 NEXT;
2850 name = xmlNamespaceParseNCName(ctxt);
2851 }
2852 }
2853 return(name);
2854#endif
2855}
2856
2857/**
2858 * xmlParseQuotedString:
2859 * @ctxt: an XML parser context
2860 *
2861 * Parse and return a string between quotes or doublequotes
2862 *
2863 * TODO: Deprecated, to be removed at next drop of binary compatibility
2864 *
2865 * Returns the string parser or NULL.
2866 */
2867xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002868xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002869 static int deprecated = 0;
2870 if (!deprecated) {
2871 xmlGenericError(xmlGenericErrorContext,
2872 "xmlParseQuotedString() deprecated function reached\n");
2873 deprecated = 1;
2874 }
2875 return(NULL);
2876
2877#if 0
2878 xmlChar *buf = NULL;
2879 int len = 0,l;
2880 int size = XML_PARSER_BUFFER_SIZE;
2881 int c;
2882
2883 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2884 if (buf == NULL) {
2885 xmlGenericError(xmlGenericErrorContext,
2886 "malloc of %d byte failed\n", size);
2887 return(NULL);
2888 }
2889xmlGenericError(xmlGenericErrorContext,
2890 "xmlParseQuotedString: reached loop 4\n");
2891 if (RAW == '"') {
2892 NEXT;
2893 c = CUR_CHAR(l);
2894 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2895 if (len + 5 >= size) {
2896 size *= 2;
2897 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2898 if (buf == NULL) {
2899 xmlGenericError(xmlGenericErrorContext,
2900 "realloc of %d byte failed\n", size);
2901 return(NULL);
2902 }
2903 }
2904 COPY_BUF(l,buf,len,c);
2905 NEXTL(l);
2906 c = CUR_CHAR(l);
2907 }
2908 if (c != '"') {
2909 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2911 ctxt->sax->error(ctxt->userData,
2912 "String not closed \"%.50s\"\n", buf);
2913 ctxt->wellFormed = 0;
2914 ctxt->disableSAX = 1;
2915 } else {
2916 NEXT;
2917 }
2918 } else if (RAW == '\''){
2919 NEXT;
2920 c = CUR;
2921 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2922 if (len + 1 >= size) {
2923 size *= 2;
2924 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2925 if (buf == NULL) {
2926 xmlGenericError(xmlGenericErrorContext,
2927 "realloc of %d byte failed\n", size);
2928 return(NULL);
2929 }
2930 }
2931 buf[len++] = c;
2932 NEXT;
2933 c = CUR;
2934 }
2935 if (RAW != '\'') {
2936 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2938 ctxt->sax->error(ctxt->userData,
2939 "String not closed \"%.50s\"\n", buf);
2940 ctxt->wellFormed = 0;
2941 ctxt->disableSAX = 1;
2942 } else {
2943 NEXT;
2944 }
2945 }
2946 return(buf);
2947#endif
2948}
2949
2950/**
2951 * xmlParseNamespace:
2952 * @ctxt: an XML parser context
2953 *
2954 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2955 *
2956 * This is what the older xml-name Working Draft specified, a bunch of
2957 * other stuff may still rely on it, so support is still here as
2958 * if it was declared on the root of the Tree:-(
2959 *
2960 * TODO: remove from library
2961 *
2962 * To be removed at next drop of binary compatibility
2963 */
2964
2965void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002966xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002967 static int deprecated = 0;
2968 if (!deprecated) {
2969 xmlGenericError(xmlGenericErrorContext,
2970 "xmlParseNamespace() deprecated function reached\n");
2971 deprecated = 1;
2972 }
2973
2974#if 0
2975 xmlChar *href = NULL;
2976 xmlChar *prefix = NULL;
2977 int garbage = 0;
2978
2979 /*
2980 * We just skipped "namespace" or "xml:namespace"
2981 */
2982 SKIP_BLANKS;
2983
2984xmlGenericError(xmlGenericErrorContext,
2985 "xmlParseNamespace: reached loop 5\n");
2986 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2987 /*
2988 * We can have "ns" or "prefix" attributes
2989 * Old encoding as 'href' or 'AS' attributes is still supported
2990 */
2991 if ((RAW == 'n') && (NXT(1) == 's')) {
2992 garbage = 0;
2993 SKIP(2);
2994 SKIP_BLANKS;
2995
2996 if (RAW != '=') continue;
2997 NEXT;
2998 SKIP_BLANKS;
2999
3000 href = xmlParseQuotedString(ctxt);
3001 SKIP_BLANKS;
3002 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3003 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3004 garbage = 0;
3005 SKIP(4);
3006 SKIP_BLANKS;
3007
3008 if (RAW != '=') continue;
3009 NEXT;
3010 SKIP_BLANKS;
3011
3012 href = xmlParseQuotedString(ctxt);
3013 SKIP_BLANKS;
3014 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3015 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3016 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3017 garbage = 0;
3018 SKIP(6);
3019 SKIP_BLANKS;
3020
3021 if (RAW != '=') continue;
3022 NEXT;
3023 SKIP_BLANKS;
3024
3025 prefix = xmlParseQuotedString(ctxt);
3026 SKIP_BLANKS;
3027 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3028 garbage = 0;
3029 SKIP(2);
3030 SKIP_BLANKS;
3031
3032 if (RAW != '=') continue;
3033 NEXT;
3034 SKIP_BLANKS;
3035
3036 prefix = xmlParseQuotedString(ctxt);
3037 SKIP_BLANKS;
3038 } else if ((RAW == '?') && (NXT(1) == '>')) {
3039 garbage = 0;
3040 NEXT;
3041 } else {
3042 /*
3043 * Found garbage when parsing the namespace
3044 */
3045 if (!garbage) {
3046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3047 ctxt->sax->error(ctxt->userData,
3048 "xmlParseNamespace found garbage\n");
3049 }
3050 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3051 ctxt->wellFormed = 0;
3052 ctxt->disableSAX = 1;
3053 NEXT;
3054 }
3055 }
3056
3057 MOVETO_ENDTAG(CUR_PTR);
3058 NEXT;
3059
3060 /*
3061 * Register the DTD.
3062 if (href != NULL)
3063 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3064 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3065 */
3066
3067 if (prefix != NULL) xmlFree(prefix);
3068 if (href != NULL) xmlFree(href);
3069#endif
3070}
3071
3072/**
3073 * xmlScanName:
3074 * @ctxt: an XML parser context
3075 *
3076 * Trickery: parse an XML name but without consuming the input flow
3077 * Needed for rollback cases. Used only when parsing entities references.
3078 *
3079 * TODO: seems deprecated now, only used in the default part of
3080 * xmlParserHandleReference
3081 *
3082 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3083 * CombiningChar | Extender
3084 *
3085 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3086 *
3087 * [6] Names ::= Name (S Name)*
3088 *
3089 * Returns the Name parsed or NULL
3090 */
3091
3092xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003093xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 static int deprecated = 0;
3095 if (!deprecated) {
3096 xmlGenericError(xmlGenericErrorContext,
3097 "xmlScanName() deprecated function reached\n");
3098 deprecated = 1;
3099 }
3100 return(NULL);
3101
3102#if 0
3103 xmlChar buf[XML_MAX_NAMELEN];
3104 int len = 0;
3105
3106 GROW;
3107 if (!IS_LETTER(RAW) && (RAW != '_') &&
3108 (RAW != ':')) {
3109 return(NULL);
3110 }
3111
3112
3113 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3114 (NXT(len) == '.') || (NXT(len) == '-') ||
3115 (NXT(len) == '_') || (NXT(len) == ':') ||
3116 (IS_COMBINING(NXT(len))) ||
3117 (IS_EXTENDER(NXT(len)))) {
3118 GROW;
3119 buf[len] = NXT(len);
3120 len++;
3121 if (len >= XML_MAX_NAMELEN) {
3122 xmlGenericError(xmlGenericErrorContext,
3123 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3124 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3125 (IS_DIGIT(NXT(len))) ||
3126 (NXT(len) == '.') || (NXT(len) == '-') ||
3127 (NXT(len) == '_') || (NXT(len) == ':') ||
3128 (IS_COMBINING(NXT(len))) ||
3129 (IS_EXTENDER(NXT(len))))
3130 len++;
3131 break;
3132 }
3133 }
3134 return(xmlStrndup(buf, len));
3135#endif
3136}
3137
3138/**
3139 * xmlParserHandleReference:
3140 * @ctxt: the parser context
3141 *
3142 * TODO: Remove, now deprecated ... the test is done directly in the
3143 * content parsing
3144 * routines.
3145 *
3146 * [67] Reference ::= EntityRef | CharRef
3147 *
3148 * [68] EntityRef ::= '&' Name ';'
3149 *
3150 * [ WFC: Entity Declared ]
3151 * the Name given in the entity reference must match that in an entity
3152 * declaration, except that well-formed documents need not declare any
3153 * of the following entities: amp, lt, gt, apos, quot.
3154 *
3155 * [ WFC: Parsed Entity ]
3156 * An entity reference must not contain the name of an unparsed entity
3157 *
3158 * [66] CharRef ::= '&#' [0-9]+ ';' |
3159 * '&#x' [0-9a-fA-F]+ ';'
3160 *
3161 * A PEReference may have been detectect in the current input stream
3162 * the handling is done accordingly to
3163 * http://www.w3.org/TR/REC-xml#entproc
3164 */
3165void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003166xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003167 static int deprecated = 0;
3168 if (!deprecated) {
3169 xmlGenericError(xmlGenericErrorContext,
3170 "xmlParserHandleReference() deprecated function reached\n");
3171 deprecated = 1;
3172 }
3173
3174#if 0
3175 xmlParserInputPtr input;
3176 xmlChar *name;
3177 xmlEntityPtr ent = NULL;
3178
3179 if (ctxt->token != 0) {
3180 return;
3181 }
3182 if (RAW != '&') return;
3183 GROW;
3184 if ((RAW == '&') && (NXT(1) == '#')) {
3185 switch(ctxt->instate) {
3186 case XML_PARSER_ENTITY_DECL:
3187 case XML_PARSER_PI:
3188 case XML_PARSER_CDATA_SECTION:
3189 case XML_PARSER_COMMENT:
3190 case XML_PARSER_SYSTEM_LITERAL:
3191 /* we just ignore it there */
3192 return;
3193 case XML_PARSER_START_TAG:
3194 return;
3195 case XML_PARSER_END_TAG:
3196 return;
3197 case XML_PARSER_EOF:
3198 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3201 ctxt->wellFormed = 0;
3202 ctxt->disableSAX = 1;
3203 return;
3204 case XML_PARSER_PROLOG:
3205 case XML_PARSER_START:
3206 case XML_PARSER_MISC:
3207 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3209 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3210 ctxt->wellFormed = 0;
3211 ctxt->disableSAX = 1;
3212 return;
3213 case XML_PARSER_EPILOG:
3214 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3216 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3217 ctxt->wellFormed = 0;
3218 ctxt->disableSAX = 1;
3219 return;
3220 case XML_PARSER_DTD:
3221 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3223 ctxt->sax->error(ctxt->userData,
3224 "CharRef are forbiden in DTDs!\n");
3225 ctxt->wellFormed = 0;
3226 ctxt->disableSAX = 1;
3227 return;
3228 case XML_PARSER_ENTITY_VALUE:
3229 /*
3230 * NOTE: in the case of entity values, we don't do the
3231 * substitution here since we need the literal
3232 * entity value to be able to save the internal
3233 * subset of the document.
3234 * This will be handled by xmlStringDecodeEntities
3235 */
3236 return;
3237 case XML_PARSER_CONTENT:
3238 return;
3239 case XML_PARSER_ATTRIBUTE_VALUE:
3240 /* ctxt->token = xmlParseCharRef(ctxt); */
3241 return;
3242 case XML_PARSER_IGNORE:
3243 return;
3244 }
3245 return;
3246 }
3247
3248 switch(ctxt->instate) {
3249 case XML_PARSER_CDATA_SECTION:
3250 return;
3251 case XML_PARSER_PI:
3252 case XML_PARSER_COMMENT:
3253 case XML_PARSER_SYSTEM_LITERAL:
3254 case XML_PARSER_CONTENT:
3255 return;
3256 case XML_PARSER_START_TAG:
3257 return;
3258 case XML_PARSER_END_TAG:
3259 return;
3260 case XML_PARSER_EOF:
3261 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3263 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3264 ctxt->wellFormed = 0;
3265 ctxt->disableSAX = 1;
3266 return;
3267 case XML_PARSER_PROLOG:
3268 case XML_PARSER_START:
3269 case XML_PARSER_MISC:
3270 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3272 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3273 ctxt->wellFormed = 0;
3274 ctxt->disableSAX = 1;
3275 return;
3276 case XML_PARSER_EPILOG:
3277 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3279 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3280 ctxt->wellFormed = 0;
3281 ctxt->disableSAX = 1;
3282 return;
3283 case XML_PARSER_ENTITY_VALUE:
3284 /*
3285 * NOTE: in the case of entity values, we don't do the
3286 * substitution here since we need the literal
3287 * entity value to be able to save the internal
3288 * subset of the document.
3289 * This will be handled by xmlStringDecodeEntities
3290 */
3291 return;
3292 case XML_PARSER_ATTRIBUTE_VALUE:
3293 /*
3294 * NOTE: in the case of attributes values, we don't do the
3295 * substitution here unless we are in a mode where
3296 * the parser is explicitely asked to substitute
3297 * entities. The SAX callback is called with values
3298 * without entity substitution.
3299 * This will then be handled by xmlStringDecodeEntities
3300 */
3301 return;
3302 case XML_PARSER_ENTITY_DECL:
3303 /*
3304 * we just ignore it there
3305 * the substitution will be done once the entity is referenced
3306 */
3307 return;
3308 case XML_PARSER_DTD:
3309 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "Entity references are forbiden in DTDs!\n");
3313 ctxt->wellFormed = 0;
3314 ctxt->disableSAX = 1;
3315 return;
3316 case XML_PARSER_IGNORE:
3317 return;
3318 }
3319
3320/* TODO: this seems not reached anymore .... Verify ... */
3321xmlGenericError(xmlGenericErrorContext,
3322 "Reached deprecated section in xmlParserHandleReference()\n");
3323xmlGenericError(xmlGenericErrorContext,
3324 "Please forward the document to Daniel.Veillard@w3.org\n");
3325xmlGenericError(xmlGenericErrorContext,
3326 "indicating the version: %s, thanks !\n", xmlParserVersion);
3327 NEXT;
3328 name = xmlScanName(ctxt);
3329 if (name == NULL) {
3330 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3332 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3333 ctxt->wellFormed = 0;
3334 ctxt->disableSAX = 1;
3335 ctxt->token = '&';
3336 return;
3337 }
3338 if (NXT(xmlStrlen(name)) != ';') {
3339 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3341 ctxt->sax->error(ctxt->userData,
3342 "Entity reference: ';' expected\n");
3343 ctxt->wellFormed = 0;
3344 ctxt->disableSAX = 1;
3345 ctxt->token = '&';
3346 xmlFree(name);
3347 return;
3348 }
3349 SKIP(xmlStrlen(name) + 1);
3350 if (ctxt->sax != NULL) {
3351 if (ctxt->sax->getEntity != NULL)
3352 ent = ctxt->sax->getEntity(ctxt->userData, name);
3353 }
3354
3355 /*
3356 * [ WFC: Entity Declared ]
3357 * the Name given in the entity reference must match that in an entity
3358 * declaration, except that well-formed documents need not declare any
3359 * of the following entities: amp, lt, gt, apos, quot.
3360 */
3361 if (ent == NULL)
3362 ent = xmlGetPredefinedEntity(name);
3363 if (ent == NULL) {
3364 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3366 ctxt->sax->error(ctxt->userData,
3367 "Entity reference: entity %s not declared\n",
3368 name);
3369 ctxt->wellFormed = 0;
3370 ctxt->disableSAX = 1;
3371 xmlFree(name);
3372 return;
3373 }
3374
3375 /*
3376 * [ WFC: Parsed Entity ]
3377 * An entity reference must not contain the name of an unparsed entity
3378 */
3379 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3380 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3382 ctxt->sax->error(ctxt->userData,
3383 "Entity reference to unparsed entity %s\n", name);
3384 ctxt->wellFormed = 0;
3385 ctxt->disableSAX = 1;
3386 }
3387
3388 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3389 ctxt->token = ent->content[0];
3390 xmlFree(name);
3391 return;
3392 }
3393 input = xmlNewEntityInputStream(ctxt, ent);
3394 xmlPushInput(ctxt, input);
3395 xmlFree(name);
3396#endif
3397 return;
3398}
3399
3400/**
3401 * xmlHandleEntity:
3402 * @ctxt: an XML parser context
3403 * @entity: an XML entity pointer.
3404 *
3405 * Default handling of defined entities, when should we define a new input
3406 * stream ? When do we just handle that as a set of chars ?
3407 *
3408 * OBSOLETE: to be removed at some point.
3409 */
3410
3411void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003412xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003413 static int deprecated = 0;
3414 if (!deprecated) {
3415 xmlGenericError(xmlGenericErrorContext,
3416 "xmlHandleEntity() deprecated function reached\n");
3417 deprecated = 1;
3418 }
3419
3420#if 0
3421 int len;
3422 xmlParserInputPtr input;
3423
3424 if (entity->content == NULL) {
3425 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3427 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3428 entity->name);
3429 ctxt->wellFormed = 0;
3430 ctxt->disableSAX = 1;
3431 return;
3432 }
3433 len = xmlStrlen(entity->content);
3434 if (len <= 2) goto handle_as_char;
3435
3436 /*
3437 * Redefine its content as an input stream.
3438 */
3439 input = xmlNewEntityInputStream(ctxt, entity);
3440 xmlPushInput(ctxt, input);
3441 return;
3442
3443handle_as_char:
3444 /*
3445 * Just handle the content as a set of chars.
3446 */
3447 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3448 (ctxt->sax->characters != NULL))
3449 ctxt->sax->characters(ctxt->userData, entity->content, len);
3450#endif
3451}
3452
3453/**
3454 * xmlNewGlobalNs:
3455 * @doc: the document carrying the namespace
3456 * @href: the URI associated
3457 * @prefix: the prefix for the namespace
3458 *
3459 * Creation of a Namespace, the old way using PI and without scoping
3460 * DEPRECATED !!!
3461 * It now create a namespace on the root element of the document if found.
3462 * Returns NULL this functionnality had been removed
3463 */
3464xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003465xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3466 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003467 static int deprecated = 0;
3468 if (!deprecated) {
3469 xmlGenericError(xmlGenericErrorContext,
3470 "xmlNewGlobalNs() deprecated function reached\n");
3471 deprecated = 1;
3472 }
3473 return(NULL);
3474#if 0
3475 xmlNodePtr root;
3476
3477 xmlNsPtr cur;
3478
3479 root = xmlDocGetRootElement(doc);
3480 if (root != NULL)
3481 return(xmlNewNs(root, href, prefix));
3482
3483 /*
3484 * if there is no root element yet, create an old Namespace type
3485 * and it will be moved to the root at save time.
3486 */
3487 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3488 if (cur == NULL) {
3489 xmlGenericError(xmlGenericErrorContext,
3490 "xmlNewGlobalNs : malloc failed\n");
3491 return(NULL);
3492 }
3493 memset(cur, 0, sizeof(xmlNs));
3494 cur->type = XML_GLOBAL_NAMESPACE;
3495
3496 if (href != NULL)
3497 cur->href = xmlStrdup(href);
3498 if (prefix != NULL)
3499 cur->prefix = xmlStrdup(prefix);
3500
3501 /*
3502 * Add it at the end to preserve parsing order ...
3503 */
3504 if (doc != NULL) {
3505 if (doc->oldNs == NULL) {
3506 doc->oldNs = cur;
3507 } else {
3508 xmlNsPtr prev = doc->oldNs;
3509
3510 while (prev->next != NULL) prev = prev->next;
3511 prev->next = cur;
3512 }
3513 }
3514
3515 return(NULL);
3516#endif
3517}
3518
3519/**
3520 * xmlUpgradeOldNs:
3521 * @doc: a document pointer
3522 *
3523 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3524 * DEPRECATED
3525 */
3526void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003527xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003528 static int deprecated = 0;
3529 if (!deprecated) {
3530 xmlGenericError(xmlGenericErrorContext,
3531 "xmlNewGlobalNs() deprecated function reached\n");
3532 deprecated = 1;
3533 }
3534#if 0
3535 xmlNsPtr cur;
3536
3537 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3538 if (doc->children == NULL) {
3539#ifdef DEBUG_TREE
3540 xmlGenericError(xmlGenericErrorContext,
3541 "xmlUpgradeOldNs: failed no root !\n");
3542#endif
3543 return;
3544 }
3545
3546 cur = doc->oldNs;
3547 while (cur->next != NULL) {
3548 cur->type = XML_LOCAL_NAMESPACE;
3549 cur = cur->next;
3550 }
3551 cur->type = XML_LOCAL_NAMESPACE;
3552 cur->next = doc->children->nsDef;
3553 doc->children->nsDef = doc->oldNs;
3554 doc->oldNs = NULL;
3555#endif
3556}
3557