blob: a20faf5b6ebb8872bd5ce4b6f13ff7d5baf2bd00 [file] [log] [blame]
Daniel Veillardb1059e22000-09-16 14:02:43 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
Daniel Veillardaaf58b92000-10-06 14:07:26 +000042#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000044#include <libxml/entities.h>
Daniel Veillardb71379b2000-10-09 12:30:39 +000045#include <libxml/xmlerror.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000046#include <libxml/encoding.h>
47#include <libxml/valid.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000048#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000050
51
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
70 if ((myversion / 10000) != (version / 10000)) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000071 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +000072 "Fatal: program compiled against libxml %d using libxml %d\n",
73 (version / 10000), (myversion / 10000));
74 exit(1);
75 }
76 if ((myversion / 100) < (version / 100)) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000077 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +000078 "Warning: program compiled against libxml %d using older %d\n",
79 (version / 100), (myversion / 100));
80 }
81}
82
83
84const char *xmlFeaturesList[] = {
85 "validate",
86 "keep blanks",
87 "disable SAX",
88 "fetch external entities",
89 "substitute entities",
90 "gather line info",
91 "user data",
92 "is html",
93 "is standalone",
94 "stop parser",
95 "document",
96 "is well formed",
97 "is valid",
98 "SAX block",
99 "SAX function internalSubset",
100 "SAX function isStandalone",
101 "SAX function hasInternalSubset",
102 "SAX function hasExternalSubset",
103 "SAX function resolveEntity",
104 "SAX function getEntity",
105 "SAX function entityDecl",
106 "SAX function notationDecl",
107 "SAX function attributeDecl",
108 "SAX function elementDecl",
109 "SAX function unparsedEntityDecl",
110 "SAX function setDocumentLocator",
111 "SAX function startDocument",
112 "SAX function endDocument",
113 "SAX function startElement",
114 "SAX function endElement",
115 "SAX function reference",
116 "SAX function characters",
117 "SAX function ignorableWhitespace",
118 "SAX function processingInstruction",
119 "SAX function comment",
120 "SAX function warning",
121 "SAX function error",
122 "SAX function fatalError",
123 "SAX function getParameterEntity",
124 "SAX function cdataBlock",
125 "SAX function externalSubset",
126};
127
128/*
129 * xmlGetFeaturesList:
130 * @len: the length of the features name array (input/output)
131 * @result: an array of string to be filled with the features name.
132 *
133 * Copy at most *@len feature names into the @result array
134 *
135 * Returns -1 in case or error, or the total number of features,
136 * len is updated with the number of strings copied,
137 * strings must not be deallocated
138 */
139int
140xmlGetFeaturesList(int *len, const char **result) {
141 int ret, i;
142
143 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
144 if ((len == NULL) || (result == NULL))
145 return(ret);
146 if ((*len < 0) || (*len >= 1000))
147 return(-1);
148 if (*len > ret)
149 *len = ret;
150 for (i = 0;i < *len;i++)
151 result[i] = xmlFeaturesList[i];
152 return(ret);
153}
154
155/*
156 * xmlGetFeature:
157 * @ctxt: an XML/HTML parser context
158 * @name: the feature name
159 * @result: location to store the result
160 *
161 * Read the current value of one feature of this parser instance
162 *
163 * Returns -1 in case or error, 0 otherwise
164 */
165int
166xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
167 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
168 return(-1);
169
170 if (!strcmp(name, "validate")) {
171 *((int *) result) = ctxt->validate;
172 } else if (!strcmp(name, "keep blanks")) {
173 *((int *) result) = ctxt->keepBlanks;
174 } else if (!strcmp(name, "disable SAX")) {
175 *((int *) result) = ctxt->disableSAX;
176 } else if (!strcmp(name, "fetch external entities")) {
177 *((int *) result) = ctxt->validate;
178 } else if (!strcmp(name, "substitute entities")) {
179 *((int *) result) = ctxt->replaceEntities;
180 } else if (!strcmp(name, "gather line info")) {
181 *((int *) result) = ctxt->record_info;
182 } else if (!strcmp(name, "user data")) {
183 *((void **)result) = ctxt->userData;
184 } else if (!strcmp(name, "is html")) {
185 *((int *) result) = ctxt->html;
186 } else if (!strcmp(name, "is standalone")) {
187 *((int *) result) = ctxt->standalone;
188 } else if (!strcmp(name, "document")) {
189 *((xmlDocPtr *) result) = ctxt->myDoc;
190 } else if (!strcmp(name, "is well formed")) {
191 *((int *) result) = ctxt->wellFormed;
192 } else if (!strcmp(name, "is valid")) {
193 *((int *) result) = ctxt->valid;
194 } else if (!strcmp(name, "SAX block")) {
195 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
196 } else if (!strcmp(name, "SAX function internalSubset")) {
197 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
198 } else if (!strcmp(name, "SAX function isStandalone")) {
199 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
200 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
201 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
202 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
203 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
204 } else if (!strcmp(name, "SAX function resolveEntity")) {
205 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
206 } else if (!strcmp(name, "SAX function getEntity")) {
207 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
208 } else if (!strcmp(name, "SAX function entityDecl")) {
209 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
210 } else if (!strcmp(name, "SAX function notationDecl")) {
211 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
212 } else if (!strcmp(name, "SAX function attributeDecl")) {
213 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
214 } else if (!strcmp(name, "SAX function elementDecl")) {
215 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
216 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
217 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
218 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
219 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
220 } else if (!strcmp(name, "SAX function startDocument")) {
221 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
222 } else if (!strcmp(name, "SAX function endDocument")) {
223 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
224 } else if (!strcmp(name, "SAX function startElement")) {
225 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
226 } else if (!strcmp(name, "SAX function endElement")) {
227 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
228 } else if (!strcmp(name, "SAX function reference")) {
229 *((referenceSAXFunc *) result) = ctxt->sax->reference;
230 } else if (!strcmp(name, "SAX function characters")) {
231 *((charactersSAXFunc *) result) = ctxt->sax->characters;
232 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
233 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
234 } else if (!strcmp(name, "SAX function processingInstruction")) {
235 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
236 } else if (!strcmp(name, "SAX function comment")) {
237 *((commentSAXFunc *) result) = ctxt->sax->comment;
238 } else if (!strcmp(name, "SAX function warning")) {
239 *((warningSAXFunc *) result) = ctxt->sax->warning;
240 } else if (!strcmp(name, "SAX function error")) {
241 *((errorSAXFunc *) result) = ctxt->sax->error;
242 } else if (!strcmp(name, "SAX function fatalError")) {
243 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
244 } else if (!strcmp(name, "SAX function getParameterEntity")) {
245 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
246 } else if (!strcmp(name, "SAX function cdataBlock")) {
247 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
248 } else if (!strcmp(name, "SAX function externalSubset")) {
249 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
250 } else {
251 return(-1);
252 }
253 return(0);
254}
255
256/*
257 * xmlSetFeature:
258 * @ctxt: an XML/HTML parser context
259 * @name: the feature name
260 * @value: pointer to the location of the new value
261 *
262 * Change the current value of one feature of this parser instance
263 *
264 * Returns -1 in case or error, 0 otherwise
265 */
266int
267xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
268 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
269 return(-1);
270
271 if (!strcmp(name, "validate")) {
272 ctxt->validate = *((int *) value);
273 } else if (!strcmp(name, "keep blanks")) {
274 ctxt->keepBlanks = *((int *) value);
275 } else if (!strcmp(name, "disable SAX")) {
276 ctxt->disableSAX = *((int *) value);
277 } else if (!strcmp(name, "fetch external entities")) {
278 int newvalid = *((int *) value);
279 if ((!ctxt->validate) && (newvalid != 0)) {
280 if (ctxt->vctxt.warning == NULL)
281 ctxt->vctxt.warning = xmlParserValidityWarning;
282 if (ctxt->vctxt.error == NULL)
283 ctxt->vctxt.error = xmlParserValidityError;
284 /* Allocate the Node stack */
285 ctxt->vctxt.nodeTab = (xmlNodePtr *)
286 xmlMalloc(4 * sizeof(xmlNodePtr));
287 if (ctxt->vctxt.nodeTab == NULL) {
288 ctxt->vctxt.nodeMax = 0;
289 ctxt->validate = 0;
290 return(-1);
291 }
292 ctxt->vctxt.nodeNr = 0;
293 ctxt->vctxt.nodeMax = 4;
294 ctxt->vctxt.node = NULL;
295 }
296 ctxt->validate = newvalid;
297 } else if (!strcmp(name, "substitute entities")) {
298 ctxt->replaceEntities = *((int *) value);
299 } else if (!strcmp(name, "gather line info")) {
300 ctxt->record_info = *((int *) value);
301 } else if (!strcmp(name, "user data")) {
302 ctxt->userData = *((void **)value);
303 } else if (!strcmp(name, "is html")) {
304 ctxt->html = *((int *) value);
305 } else if (!strcmp(name, "is standalone")) {
306 ctxt->standalone = *((int *) value);
307 } else if (!strcmp(name, "document")) {
308 ctxt->myDoc = *((xmlDocPtr *) value);
309 } else if (!strcmp(name, "is well formed")) {
310 ctxt->wellFormed = *((int *) value);
311 } else if (!strcmp(name, "is valid")) {
312 ctxt->valid = *((int *) value);
313 } else if (!strcmp(name, "SAX block")) {
314 ctxt->sax = *((xmlSAXHandlerPtr *) value);
315 } else if (!strcmp(name, "SAX function internalSubset")) {
316 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function isStandalone")) {
318 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
320 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
322 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function resolveEntity")) {
324 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function getEntity")) {
326 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
327 } else if (!strcmp(name, "SAX function entityDecl")) {
328 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function notationDecl")) {
330 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function attributeDecl")) {
332 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function elementDecl")) {
334 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
336 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
338 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function startDocument")) {
340 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function endDocument")) {
342 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function startElement")) {
344 ctxt->sax->startElement = *((startElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function endElement")) {
346 ctxt->sax->endElement = *((endElementSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function reference")) {
348 ctxt->sax->reference = *((referenceSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function characters")) {
350 ctxt->sax->characters = *((charactersSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
352 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function processingInstruction")) {
354 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function comment")) {
356 ctxt->sax->comment = *((commentSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function warning")) {
358 ctxt->sax->warning = *((warningSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function error")) {
360 ctxt->sax->error = *((errorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function fatalError")) {
362 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function getParameterEntity")) {
364 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
365 } else if (!strcmp(name, "SAX function cdataBlock")) {
366 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function externalSubset")) {
368 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
369 } else {
370 return(-1);
371 }
372 return(0);
373}
374
375/************************************************************************
376 * *
377 * Some functions to avoid too large macros *
378 * *
379 ************************************************************************/
380
381/**
382 * xmlIsChar:
383 * @c: an unicode character (int)
384 *
385 * Check whether the character is allowed by the production
386 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
387 * | [#x10000-#x10FFFF]
388 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
389 * Also available as a macro IS_CHAR()
390 *
391 * Returns 0 if not, non-zero otherwise
392 */
393int
394xmlIsChar(int c) {
395 return(
396 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
397 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
398 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
399 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
400}
401
402/**
403 * xmlIsBlank:
404 * @c: an unicode character (int)
405 *
406 * Check whether the character is allowed by the production
407 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
408 * Also available as a macro IS_BLANK()
409 *
410 * Returns 0 if not, non-zero otherwise
411 */
412int
413xmlIsBlank(int c) {
414 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
415}
416
417/**
418 * xmlIsBaseChar:
419 * @c: an unicode character (int)
420 *
421 * Check whether the character is allowed by the production
422 * [85] BaseChar ::= ... long list see REC ...
423 *
424 * VI is your friend !
425 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
426 * and
427 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
428 *
429 * Returns 0 if not, non-zero otherwise
430 */
431int
432xmlIsBaseChar(int c) {
433 return(
434 (((c) >= 0x0041) && ((c) <= 0x005A)) ||
435 (((c) >= 0x0061) && ((c) <= 0x007A)) ||
436 (((c) >= 0x00C0) && ((c) <= 0x00D6)) ||
437 (((c) >= 0x00D8) && ((c) <= 0x00F6)) ||
438 (((c) >= 0x00F8) && ((c) <= 0x00FF)) ||
439 (((c) >= 0x100) && ( /* accelerator */
440 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
441 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
442 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
443 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
444 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
445 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
446 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
447 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
448 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
449 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
450 ((c) == 0x0386) ||
451 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
452 ((c) == 0x038C) ||
453 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
454 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
455 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
456 ((c) == 0x03DA) ||
457 ((c) == 0x03DC) ||
458 ((c) == 0x03DE) ||
459 ((c) == 0x03E0) ||
460 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
461 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
462 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
463 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
464 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
465 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
466 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
467 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
468 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
469 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
470 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
471 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
472 ((c) == 0x0559) ||
473 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
474 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
475 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
476 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
477 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
478 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
479 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
480 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
481 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
482 ((c) == 0x06D5) ||
483 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000484 (((c) >= 0x905) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000485 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
486 ((c) == 0x093D) ||
487 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
488 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
489 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
490 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
491 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
492 ((c) == 0x09B2) ||
493 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
494 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
495 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
496 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
497 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
498 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
499 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
500 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
501 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
502 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
503 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
504 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
505 ((c) == 0x0A5E) ||
506 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
507 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
508 ((c) == 0x0A8D) ||
509 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
510 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
511 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
512 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
513 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
514 ((c) == 0x0ABD) ||
515 ((c) == 0x0AE0) ||
516 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
517 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
518 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
519 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
520 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
521 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
522 ((c) == 0x0B3D) ||
523 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
524 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
525 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
526 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
527 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
528 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
529 ((c) == 0x0B9C) ||
530 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
531 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
532 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
533 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
534 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
535 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
536 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
537 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
538 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
539 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
540 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
541 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
542 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
543 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
544 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
545 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
546 ((c) == 0x0CDE) ||
547 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
548 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
549 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
550 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
551 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
552 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
553 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
554 ((c) == 0x0E30) ||
555 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
556 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
557 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
558 ((c) == 0x0E84) ||
559 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
560 ((c) == 0x0E8A) ||
561 ((c) == 0x0E8D) ||
562 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
563 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
564 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
565 ((c) == 0x0EA5) ||
566 ((c) == 0x0EA7) ||
567 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
568 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
569 ((c) == 0x0EB0) ||
570 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
571 ((c) == 0x0EBD) ||
572 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
573 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
574 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000575 (((c) >= 0x10A0) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000576 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
577 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
578 ((c) == 0x1100) ||
579 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
580 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
581 ((c) == 0x1109) ||
582 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
583 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
584 ((c) == 0x113C) ||
585 ((c) == 0x113E) ||
586 ((c) == 0x1140) ||
587 ((c) == 0x114C) ||
588 ((c) == 0x114E) ||
589 ((c) == 0x1150) ||
590 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
591 ((c) == 0x1159) ||
592 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
593 ((c) == 0x1163) ||
594 ((c) == 0x1165) ||
595 ((c) == 0x1167) ||
596 ((c) == 0x1169) ||
597 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
598 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
599 ((c) == 0x1175) ||
600 ((c) == 0x119E) ||
601 ((c) == 0x11A8) ||
602 ((c) == 0x11AB) ||
603 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
604 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
605 ((c) == 0x11BA) ||
606 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
607 ((c) == 0x11EB) ||
608 ((c) == 0x11F0) ||
609 ((c) == 0x11F9) ||
610 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
611 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
612 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
613 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
614 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
615 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
616 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
617 ((c) == 0x1F59) ||
618 ((c) == 0x1F5B) ||
619 ((c) == 0x1F5D) ||
620 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
621 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
622 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
623 ((c) == 0x1FBE) ||
624 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
625 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
626 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
627 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
628 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
629 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
630 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
631 ((c) == 0x2126) ||
632 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
633 ((c) == 0x212E) ||
634 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
635 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
636 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
637 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
638 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
639}
640
641/**
642 * xmlIsDigit:
643 * @c: an unicode character (int)
644 *
645 * Check whether the character is allowed by the production
646 * [88] Digit ::= ... long list see REC ...
647 *
648 * Returns 0 if not, non-zero otherwise
649 */
650int
651xmlIsDigit(int c) {
652 return(
653 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
654 (((c) >= 0x660) && ( /* accelerator */
655 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
656 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
657 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
658 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
659 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
660 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
661 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
662 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
663 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
664 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
665 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
666 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
667 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
668 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
669}
670
671/**
672 * xmlIsCombining:
673 * @c: an unicode character (int)
674 *
675 * Check whether the character is allowed by the production
676 * [87] CombiningChar ::= ... long list see REC ...
677 *
678 * Returns 0 if not, non-zero otherwise
679 */
680int
681xmlIsCombining(int c) {
682 return(
683 (((c) >= 0x300) && ( /* accelerator */
684 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
685 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
686 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
687 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
688 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
689 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
690 ((c) == 0x05BF) ||
691 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
692 ((c) == 0x05C4) ||
693 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
694 ((c) == 0x0670) ||
695 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
696 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
697 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
698 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
699 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000700 (((c) >= 0x0901) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000701 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
702 ((c) == 0x093C) ||
703 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
704 ((c) == 0x094D) ||
705 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
706 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
707 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
708 ((c) == 0x09BC) ||
709 ((c) == 0x09BE) ||
710 ((c) == 0x09BF) ||
711 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
712 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
713 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
714 ((c) == 0x09D7) ||
715 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000716 (((c) >= 0x0A02) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000717 ((c) == 0x0A02) ||
718 ((c) == 0x0A3C) ||
719 ((c) == 0x0A3E) ||
720 ((c) == 0x0A3F) ||
721 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
722 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
723 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
724 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
725 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
726 ((c) == 0x0ABC) ||
727 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
728 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
729 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
730 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
731 ((c) == 0x0B3C) ||
732 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
733 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
734 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
735 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
736 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
737 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
738 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
739 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
740 ((c) == 0x0BD7) ||
741 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
742 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
743 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
744 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
745 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
746 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
747 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
748 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
749 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
750 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
751 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
752 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
753 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
754 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
755 ((c) == 0x0D57) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000756 (((c) >= 0x0E31) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000757 ((c) == 0x0E31) ||
758 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
759 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
760 ((c) == 0x0EB1) ||
761 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
762 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
763 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
764 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
765 ((c) == 0x0F35) ||
766 ((c) == 0x0F37) ||
767 ((c) == 0x0F39) ||
768 ((c) == 0x0F3E) ||
769 ((c) == 0x0F3F) ||
770 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
771 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
772 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
773 ((c) == 0x0F97) ||
774 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
775 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
776 ((c) == 0x0FB9) ||
777 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
778 ((c) == 0x20E1) ||
779 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
780 ((c) == 0x3099) ||
781 ((c) == 0x309A))))))))));
782}
783
784/**
785 * xmlIsExtender:
786 * @c: an unicode character (int)
787 *
788 * Check whether the character is allowed by the production
789 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
790 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
791 * [#x309D-#x309E] | [#x30FC-#x30FE]
792 *
793 * Returns 0 if not, non-zero otherwise
794 */
795int
796xmlIsExtender(int c) {
797 return(
798 ((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) ||
799 ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) ||
800 ((c) == 0xec6) || ((c) == 0x3005) ||
801 (((c) >= 0x3031) && ((c) <= 0x3035)) ||
802 (((c) >= 0x309b) && ((c) <= 0x309e)) ||
803 (((c) >= 0x30fc) && ((c) <= 0x30fe)));
804}
805
806/**
807 * xmlIsIdeographic:
808 * @c: an unicode character (int)
809 *
810 * Check whether the character is allowed by the production
811 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
812 *
813 * Returns 0 if not, non-zero otherwise
814 */
815int
816xmlIsIdeographic(int c) {
817 return(
818 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
819 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
820 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
821 ((c) == 0x3007));
822}
823
824/**
825 * xmlIsLetter:
826 * @c: an unicode character (int)
827 *
828 * Check whether the character is allowed by the production
829 * [84] Letter ::= BaseChar | Ideographic
830 *
831 * Returns 0 if not, non-zero otherwise
832 */
833int
834xmlIsLetter(int c) {
835 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
836}
837
838/**
839 * xmlIsPubidChar:
840 * @c: an unicode character (int)
841 *
842 * Check whether the character is allowed by the production
843 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
844 *
845 * Returns 0 if not, non-zero otherwise
846 */
847int
848xmlIsPubidChar(int c) {
849 return(
850 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
851 (((c) >= 'a') && ((c) <= 'z')) ||
852 (((c) >= 'A') && ((c) <= 'Z')) ||
853 (((c) >= '0') && ((c) <= '9')) ||
854 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
855 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
856 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
857 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
858 ((c) == '$') || ((c) == '_') || ((c) == '%'));
859}
860
861/************************************************************************
862 * *
863 * Input handling functions for progressive parsing *
864 * *
865 ************************************************************************/
866
867/* #define DEBUG_INPUT */
868/* #define DEBUG_STACK */
869/* #define DEBUG_PUSH */
870
871
872/* we need to keep enough input to show errors in context */
873#define LINE_LEN 80
874
875#ifdef DEBUG_INPUT
876#define CHECK_BUFFER(in) check_buffer(in)
877
878void check_buffer(xmlParserInputPtr in) {
879 if (in->base != in->buf->buffer->content) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000880 xmlGenericError(xmlGenericErrorContext,
881 "xmlParserInput: base mismatch problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000882 }
883 if (in->cur < in->base) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000884 xmlGenericError(xmlGenericErrorContext,
885 "xmlParserInput: cur < base problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000886 }
887 if (in->cur > in->base + in->buf->buffer->use) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000888 xmlGenericError(xmlGenericErrorContext,
889 "xmlParserInput: cur > base + use problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000890 }
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000891 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
Daniel Veillardb1059e22000-09-16 14:02:43 +0000892 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
893 in->buf->buffer->use, in->buf->buffer->size);
894}
895
896#else
897#define CHECK_BUFFER(in)
898#endif
899
900
901/**
902 * xmlParserInputRead:
903 * @in: an XML parser input
904 * @len: an indicative size for the lookahead
905 *
906 * This function refresh the input for the parser. It doesn't try to
907 * preserve pointers to the input buffer, and discard already read data
908 *
909 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
910 * end of this entity
911 */
912int
913xmlParserInputRead(xmlParserInputPtr in, int len) {
914 int ret;
915 int used;
916 int index;
917
918#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000919 xmlGenericError(xmlGenericErrorContext, "Read\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000920#endif
921 if (in->buf == NULL) return(-1);
922 if (in->base == NULL) return(-1);
923 if (in->cur == NULL) return(-1);
924 if (in->buf->buffer == NULL) return(-1);
925 if (in->buf->readcallback == NULL) return(-1);
926
927 CHECK_BUFFER(in);
928
929 used = in->cur - in->buf->buffer->content;
930 ret = xmlBufferShrink(in->buf->buffer, used);
931 if (ret > 0) {
932 in->cur -= ret;
933 in->consumed += ret;
934 }
935 ret = xmlParserInputBufferRead(in->buf, len);
936 if (in->base != in->buf->buffer->content) {
937 /*
938 * the buffer has been realloced
939 */
940 index = in->cur - in->base;
941 in->base = in->buf->buffer->content;
942 in->cur = &in->buf->buffer->content[index];
943 }
944
945 CHECK_BUFFER(in);
946
947 return(ret);
948}
949
950/**
951 * xmlParserInputGrow:
952 * @in: an XML parser input
953 * @len: an indicative size for the lookahead
954 *
955 * This function increase the input for the parser. It tries to
956 * preserve pointers to the input buffer, and keep already read data
957 *
958 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
959 * end of this entity
960 */
961int
962xmlParserInputGrow(xmlParserInputPtr in, int len) {
963 int ret;
964 int index;
965
966#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000967 xmlGenericError(xmlGenericErrorContext, "Grow\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000968#endif
969 if (in->buf == NULL) return(-1);
970 if (in->base == NULL) return(-1);
971 if (in->cur == NULL) return(-1);
972 if (in->buf->buffer == NULL) return(-1);
973
974 CHECK_BUFFER(in);
975
976 index = in->cur - in->base;
977 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
978
979 CHECK_BUFFER(in);
980
981 return(0);
982 }
983 if (in->buf->readcallback != NULL)
984 ret = xmlParserInputBufferGrow(in->buf, len);
985 else
986 return(0);
987
988 /*
989 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
990 * block, but we use it really as an integer to do some
991 * pointer arithmetic. Insure will raise it as a bug but in
992 * that specific case, that's not !
993 */
994 if (in->base != in->buf->buffer->content) {
995 /*
996 * the buffer has been realloced
997 */
998 index = in->cur - in->base;
999 in->base = in->buf->buffer->content;
1000 in->cur = &in->buf->buffer->content[index];
1001 }
1002
1003 CHECK_BUFFER(in);
1004
1005 return(ret);
1006}
1007
1008/**
1009 * xmlParserInputShrink:
1010 * @in: an XML parser input
1011 *
1012 * This function removes used input for the parser.
1013 */
1014void
1015xmlParserInputShrink(xmlParserInputPtr in) {
1016 int used;
1017 int ret;
1018 int index;
1019
1020#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001021 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00001022#endif
1023 if (in->buf == NULL) return;
1024 if (in->base == NULL) return;
1025 if (in->cur == NULL) return;
1026 if (in->buf->buffer == NULL) return;
1027
1028 CHECK_BUFFER(in);
1029
1030 used = in->cur - in->buf->buffer->content;
Daniel Veillard1baf4122000-10-15 20:38:39 +00001031 /*
1032 * Do not shrink on large buffers whose only a tiny fraction
1033 * was consumned
1034 */
1035 if (in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1036 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00001037 if (used > INPUT_CHUNK) {
1038 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1039 if (ret > 0) {
1040 in->cur -= ret;
1041 in->consumed += ret;
1042 }
1043 }
1044
1045 CHECK_BUFFER(in);
1046
1047 if (in->buf->buffer->use > INPUT_CHUNK) {
1048 return;
1049 }
1050 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1051 if (in->base != in->buf->buffer->content) {
1052 /*
1053 * the buffer has been realloced
1054 */
1055 index = in->cur - in->base;
1056 in->base = in->buf->buffer->content;
1057 in->cur = &in->buf->buffer->content[index];
1058 }
1059
1060 CHECK_BUFFER(in);
1061}
1062
1063/************************************************************************
1064 * *
1065 * UTF8 character input and related functions *
1066 * *
1067 ************************************************************************/
1068
1069/**
1070 * xmlNextChar:
1071 * @ctxt: the XML parser context
1072 *
1073 * Skip to the next char input char.
1074 */
1075
1076void
1077xmlNextChar(xmlParserCtxtPtr ctxt) {
1078 if (ctxt->instate == XML_PARSER_EOF)
1079 return;
1080
1081 /*
1082 * 2.11 End-of-Line Handling
1083 * the literal two-character sequence "#xD#xA" or a standalone
1084 * literal #xD, an XML processor must pass to the application
1085 * the single character #xA.
1086 */
1087 if (ctxt->token != 0) ctxt->token = 0;
1088 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1089 if ((*ctxt->input->cur == 0) &&
1090 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1091 (ctxt->instate != XML_PARSER_COMMENT)) {
1092 /*
1093 * If we are at the end of the current entity and
1094 * the context allows it, we pop consumed entities
1095 * automatically.
1096 * the auto closing should be blocked in other cases
1097 */
1098 xmlPopInput(ctxt);
1099 } else {
1100 if (*(ctxt->input->cur) == '\n') {
1101 ctxt->input->line++; ctxt->input->col = 1;
1102 } else ctxt->input->col++;
1103 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1104 /*
1105 * We are supposed to handle UTF8, check it's valid
1106 * From rfc2044: encoding of the Unicode values on UTF-8:
1107 *
1108 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1109 * 0000 0000-0000 007F 0xxxxxxx
1110 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1111 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1112 *
1113 * Check for the 0x110000 limit too
1114 */
1115 const unsigned char *cur = ctxt->input->cur;
1116 unsigned char c;
1117
1118 c = *cur;
1119 if (c & 0x80) {
1120 if (cur[1] == 0)
1121 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1122 if ((cur[1] & 0xc0) != 0x80)
1123 goto encoding_error;
1124 if ((c & 0xe0) == 0xe0) {
1125 unsigned int val;
1126
1127 if (cur[2] == 0)
1128 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1129 if ((cur[2] & 0xc0) != 0x80)
1130 goto encoding_error;
1131 if ((c & 0xf0) == 0xf0) {
1132 if (cur[3] == 0)
1133 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1134 if (((c & 0xf8) != 0xf0) ||
1135 ((cur[3] & 0xc0) != 0x80))
1136 goto encoding_error;
1137 /* 4-byte code */
1138 ctxt->input->cur += 4;
1139 val = (cur[0] & 0x7) << 18;
1140 val |= (cur[1] & 0x3f) << 12;
1141 val |= (cur[2] & 0x3f) << 6;
1142 val |= cur[3] & 0x3f;
1143 } else {
1144 /* 3-byte code */
1145 ctxt->input->cur += 3;
1146 val = (cur[0] & 0xf) << 12;
1147 val |= (cur[1] & 0x3f) << 6;
1148 val |= cur[2] & 0x3f;
1149 }
1150 if (((val > 0xd7ff) && (val < 0xe000)) ||
1151 ((val > 0xfffd) && (val < 0x10000)) ||
1152 (val >= 0x110000)) {
1153 if ((ctxt->sax != NULL) &&
1154 (ctxt->sax->error != NULL))
1155 ctxt->sax->error(ctxt->userData,
1156 "Char 0x%X out of allowed range\n", val);
1157 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1158 ctxt->wellFormed = 0;
1159 ctxt->disableSAX = 1;
1160 }
1161 } else
1162 /* 2-byte code */
1163 ctxt->input->cur += 2;
1164 } else
1165 /* 1-byte code */
1166 ctxt->input->cur++;
1167 } else {
1168 /*
1169 * Assume it's a fixed lenght encoding (1) with
1170 * a compatibke encoding for the ASCII set, since
1171 * XML constructs only use < 128 chars
1172 */
1173 ctxt->input->cur++;
1174 }
1175 ctxt->nbChars++;
1176 if (*ctxt->input->cur == 0)
1177 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1178 }
1179 } else {
1180 ctxt->input->cur++;
1181 ctxt->nbChars++;
1182 if (*ctxt->input->cur == 0)
1183 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1184 }
1185 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1186 xmlParserHandlePEReference(ctxt);
1187 if ((*ctxt->input->cur == 0) &&
1188 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1189 xmlPopInput(ctxt);
1190 return;
1191encoding_error:
1192 /*
1193 * If we detect an UTF8 error that probably mean that the
1194 * input encoding didn't get properly advertized in the
1195 * declaration header. Report the error and switch the encoding
1196 * to ISO-Latin-1 (if you don't like this policy, just declare the
1197 * encoding !)
1198 */
1199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1200 ctxt->sax->error(ctxt->userData,
1201 "Input is not proper UTF-8, indicate encoding !\n");
1202 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1203 ctxt->input->cur[0], ctxt->input->cur[1],
1204 ctxt->input->cur[2], ctxt->input->cur[3]);
1205 }
1206 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1207
1208 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1209 ctxt->input->cur++;
1210 return;
1211}
1212
1213/**
1214 * xmlCurrentChar:
1215 * @ctxt: the XML parser context
1216 * @len: pointer to the length of the char read
1217 *
1218 * The current char value, if using UTF-8 this may actaully span multiple
1219 * bytes in the input buffer. Implement the end of line normalization:
1220 * 2.11 End-of-Line Handling
1221 * Wherever an external parsed entity or the literal entity value
1222 * of an internal parsed entity contains either the literal two-character
1223 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1224 * must pass to the application the single character #xA.
1225 * This behavior can conveniently be produced by normalizing all
1226 * line breaks to #xA on input, before parsing.)
1227 *
1228 * Returns the current char value and its lenght
1229 */
1230
1231int
1232xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1233 if (ctxt->instate == XML_PARSER_EOF)
1234 return(0);
1235
1236 if (ctxt->token != 0) {
1237 *len = 0;
1238 return(ctxt->token);
1239 }
1240 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1241 /*
1242 * We are supposed to handle UTF8, check it's valid
1243 * From rfc2044: encoding of the Unicode values on UTF-8:
1244 *
1245 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1246 * 0000 0000-0000 007F 0xxxxxxx
1247 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1248 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1249 *
1250 * Check for the 0x110000 limit too
1251 */
1252 const unsigned char *cur = ctxt->input->cur;
1253 unsigned char c;
1254 unsigned int val;
1255
1256 c = *cur;
1257 if (c & 0x80) {
1258 if (cur[1] == 0)
1259 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1260 if ((cur[1] & 0xc0) != 0x80)
1261 goto encoding_error;
1262 if ((c & 0xe0) == 0xe0) {
1263
1264 if (cur[2] == 0)
1265 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1266 if ((cur[2] & 0xc0) != 0x80)
1267 goto encoding_error;
1268 if ((c & 0xf0) == 0xf0) {
1269 if (cur[3] == 0)
1270 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1271 if (((c & 0xf8) != 0xf0) ||
1272 ((cur[3] & 0xc0) != 0x80))
1273 goto encoding_error;
1274 /* 4-byte code */
1275 *len = 4;
1276 val = (cur[0] & 0x7) << 18;
1277 val |= (cur[1] & 0x3f) << 12;
1278 val |= (cur[2] & 0x3f) << 6;
1279 val |= cur[3] & 0x3f;
1280 } else {
1281 /* 3-byte code */
1282 *len = 3;
1283 val = (cur[0] & 0xf) << 12;
1284 val |= (cur[1] & 0x3f) << 6;
1285 val |= cur[2] & 0x3f;
1286 }
1287 } else {
1288 /* 2-byte code */
1289 *len = 2;
1290 val = (cur[0] & 0x1f) << 6;
1291 val |= cur[1] & 0x3f;
1292 }
1293 if (!IS_CHAR(val)) {
1294 if ((ctxt->sax != NULL) &&
1295 (ctxt->sax->error != NULL))
1296 ctxt->sax->error(ctxt->userData,
1297 "Char 0x%X out of allowed range\n", val);
1298 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1299 ctxt->wellFormed = 0;
1300 ctxt->disableSAX = 1;
1301 }
1302 return(val);
1303 } else {
1304 /* 1-byte code */
1305 *len = 1;
1306 if (*ctxt->input->cur == 0xD) {
1307 if (ctxt->input->cur[1] == 0xA) {
1308 ctxt->nbChars++;
1309 ctxt->input->cur++;
1310 }
1311 return(0xA);
1312 }
1313 return((int) *ctxt->input->cur);
1314 }
1315 }
1316 /*
1317 * Assume it's a fixed lenght encoding (1) with
1318 * a compatibke encoding for the ASCII set, since
1319 * XML constructs only use < 128 chars
1320 */
1321 *len = 1;
1322 if (*ctxt->input->cur == 0xD) {
1323 if (ctxt->input->cur[1] == 0xA) {
1324 ctxt->nbChars++;
1325 ctxt->input->cur++;
1326 }
1327 return(0xA);
1328 }
1329 return((int) *ctxt->input->cur);
1330encoding_error:
1331 /*
1332 * If we detect an UTF8 error that probably mean that the
1333 * input encoding didn't get properly advertized in the
1334 * declaration header. Report the error and switch the encoding
1335 * to ISO-Latin-1 (if you don't like this policy, just declare the
1336 * encoding !)
1337 */
1338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1339 ctxt->sax->error(ctxt->userData,
1340 "Input is not proper UTF-8, indicate encoding !\n");
1341 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1342 ctxt->input->cur[0], ctxt->input->cur[1],
1343 ctxt->input->cur[2], ctxt->input->cur[3]);
1344 }
1345 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1346
1347 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1348 *len = 1;
1349 return((int) *ctxt->input->cur);
1350}
1351
1352/**
1353 * xmlStringCurrentChar:
1354 * @ctxt: the XML parser context
1355 * @cur: pointer to the beginning of the char
1356 * @len: pointer to the length of the char read
1357 *
1358 * The current char value, if using UTF-8 this may actaully span multiple
1359 * bytes in the input buffer.
1360 *
1361 * Returns the current char value and its lenght
1362 */
1363
1364int
1365xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1366 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1367 /*
1368 * We are supposed to handle UTF8, check it's valid
1369 * From rfc2044: encoding of the Unicode values on UTF-8:
1370 *
1371 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1372 * 0000 0000-0000 007F 0xxxxxxx
1373 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1374 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1375 *
1376 * Check for the 0x110000 limit too
1377 */
1378 unsigned char c;
1379 unsigned int val;
1380
1381 c = *cur;
1382 if (c & 0x80) {
1383 if ((cur[1] & 0xc0) != 0x80)
1384 goto encoding_error;
1385 if ((c & 0xe0) == 0xe0) {
1386
1387 if ((cur[2] & 0xc0) != 0x80)
1388 goto encoding_error;
1389 if ((c & 0xf0) == 0xf0) {
1390 if (((c & 0xf8) != 0xf0) ||
1391 ((cur[3] & 0xc0) != 0x80))
1392 goto encoding_error;
1393 /* 4-byte code */
1394 *len = 4;
1395 val = (cur[0] & 0x7) << 18;
1396 val |= (cur[1] & 0x3f) << 12;
1397 val |= (cur[2] & 0x3f) << 6;
1398 val |= cur[3] & 0x3f;
1399 } else {
1400 /* 3-byte code */
1401 *len = 3;
1402 val = (cur[0] & 0xf) << 12;
1403 val |= (cur[1] & 0x3f) << 6;
1404 val |= cur[2] & 0x3f;
1405 }
1406 } else {
1407 /* 2-byte code */
1408 *len = 2;
1409 val = (cur[0] & 0x1f) << 6;
1410 val |= cur[2] & 0x3f;
1411 }
1412 if (!IS_CHAR(val)) {
1413 if ((ctxt->sax != NULL) &&
1414 (ctxt->sax->error != NULL))
1415 ctxt->sax->error(ctxt->userData,
1416 "Char 0x%X out of allowed range\n", val);
1417 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1418 ctxt->wellFormed = 0;
1419 ctxt->disableSAX = 1;
1420 }
1421 return(val);
1422 } else {
1423 /* 1-byte code */
1424 *len = 1;
1425 return((int) *cur);
1426 }
1427 }
1428 /*
1429 * Assume it's a fixed lenght encoding (1) with
1430 * a compatibke encoding for the ASCII set, since
1431 * XML constructs only use < 128 chars
1432 */
1433 *len = 1;
1434 return((int) *cur);
1435encoding_error:
1436 /*
1437 * If we detect an UTF8 error that probably mean that the
1438 * input encoding didn't get properly advertized in the
1439 * declaration header. Report the error and switch the encoding
1440 * to ISO-Latin-1 (if you don't like this policy, just declare the
1441 * encoding !)
1442 */
1443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1444 ctxt->sax->error(ctxt->userData,
1445 "Input is not proper UTF-8, indicate encoding !\n");
1446 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1447 ctxt->input->cur[0], ctxt->input->cur[1],
1448 ctxt->input->cur[2], ctxt->input->cur[3]);
1449 }
1450 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1451
1452 *len = 1;
1453 return((int) *cur);
1454}
1455
1456/**
1457 * xmlCopyChar:
1458 * @len: pointer to the length of the char read (or zero)
1459 * @array: pointer to an arry of xmlChar
1460 * @val: the char value
1461 *
1462 * append the char value in the array
1463 *
1464 * Returns the number of xmlChar written
1465 */
1466
1467int
1468xmlCopyChar(int len, xmlChar *out, int val) {
1469 /*
1470 * We are supposed to handle UTF8, check it's valid
1471 * From rfc2044: encoding of the Unicode values on UTF-8:
1472 *
1473 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1474 * 0000 0000-0000 007F 0xxxxxxx
1475 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1476 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1477 */
1478 if (len == 0) {
1479 if (val < 0) len = 0;
1480 else if (val < 0x80) len = 1;
1481 else if (val < 0x800) len = 2;
1482 else if (val < 0x10000) len = 3;
1483 else if (val < 0x110000) len = 4;
1484 if (len == 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001485 xmlGenericError(xmlGenericErrorContext,
1486 "Internal error, xmlCopyChar 0x%X out of bound\n",
Daniel Veillardb1059e22000-09-16 14:02:43 +00001487 val);
1488 return(0);
1489 }
1490 }
1491 if (len > 1) {
1492 int bits;
1493
1494 if (val < 0x80) { *out++= val; bits= -6; }
1495 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1496 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1497 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1498
1499 for ( ; bits >= 0; bits-= 6)
1500 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1501
1502 return(len);
1503 }
1504 *out = (xmlChar) val;
1505 return(1);
1506}
1507
1508/************************************************************************
1509 * *
Daniel Veillard04698d92000-09-17 16:00:22 +00001510 * Commodity functions to switch encodings *
1511 * *
1512 ************************************************************************/
1513
1514/**
1515 * xmlSwitchEncoding:
1516 * @ctxt: the parser context
1517 * @enc: the encoding value (number)
1518 *
1519 * change the input functions when discovering the character encoding
1520 * of a given entity.
1521 *
1522 * Returns 0 in case of success, -1 otherwise
1523 */
1524int
1525xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1526{
1527 xmlCharEncodingHandlerPtr handler;
1528
1529 switch (enc) {
1530 case XML_CHAR_ENCODING_ERROR:
1531 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1533 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1534 ctxt->wellFormed = 0;
1535 ctxt->disableSAX = 1;
1536 break;
1537 case XML_CHAR_ENCODING_NONE:
1538 /* let's assume it's UTF-8 without the XML decl */
1539 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1540 return(0);
1541 case XML_CHAR_ENCODING_UTF8:
1542 /* default encoding, no conversion should be needed */
1543 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1544 return(0);
1545 default:
1546 break;
1547 }
1548 handler = xmlGetCharEncodingHandler(enc);
1549 if (handler == NULL) {
1550 /*
1551 * Default handlers.
1552 */
1553 switch (enc) {
1554 case XML_CHAR_ENCODING_ERROR:
1555 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1557 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1558 ctxt->wellFormed = 0;
1559 ctxt->disableSAX = 1;
1560 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1561 break;
1562 case XML_CHAR_ENCODING_NONE:
1563 /* let's assume it's UTF-8 without the XML decl */
1564 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1565 return(0);
1566 case XML_CHAR_ENCODING_UTF8:
1567 case XML_CHAR_ENCODING_ASCII:
1568 /* default encoding, no conversion should be needed */
1569 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1570 return(0);
1571 case XML_CHAR_ENCODING_UTF16LE:
1572 break;
1573 case XML_CHAR_ENCODING_UTF16BE:
1574 break;
1575 case XML_CHAR_ENCODING_UCS4LE:
1576 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1578 ctxt->sax->error(ctxt->userData,
1579 "char encoding USC4 little endian not supported\n");
1580 break;
1581 case XML_CHAR_ENCODING_UCS4BE:
1582 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1584 ctxt->sax->error(ctxt->userData,
1585 "char encoding USC4 big endian not supported\n");
1586 break;
1587 case XML_CHAR_ENCODING_EBCDIC:
1588 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1590 ctxt->sax->error(ctxt->userData,
1591 "char encoding EBCDIC not supported\n");
1592 break;
1593 case XML_CHAR_ENCODING_UCS4_2143:
1594 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1596 ctxt->sax->error(ctxt->userData,
1597 "char encoding UCS4 2143 not supported\n");
1598 break;
1599 case XML_CHAR_ENCODING_UCS4_3412:
1600 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1602 ctxt->sax->error(ctxt->userData,
1603 "char encoding UCS4 3412 not supported\n");
1604 break;
1605 case XML_CHAR_ENCODING_UCS2:
1606 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608 ctxt->sax->error(ctxt->userData,
1609 "char encoding UCS2 not supported\n");
1610 break;
1611 case XML_CHAR_ENCODING_8859_1:
1612 case XML_CHAR_ENCODING_8859_2:
1613 case XML_CHAR_ENCODING_8859_3:
1614 case XML_CHAR_ENCODING_8859_4:
1615 case XML_CHAR_ENCODING_8859_5:
1616 case XML_CHAR_ENCODING_8859_6:
1617 case XML_CHAR_ENCODING_8859_7:
1618 case XML_CHAR_ENCODING_8859_8:
1619 case XML_CHAR_ENCODING_8859_9:
1620 /*
1621 * We used to keep the internal content in the
1622 * document encoding however this turns being unmaintainable
1623 * So xmlGetCharEncodingHandler() will return non-null
1624 * values for this now.
1625 */
1626 if ((ctxt->inputNr == 1) &&
1627 (ctxt->encoding == NULL) &&
1628 (ctxt->input->encoding != NULL)) {
1629 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1630 }
1631 ctxt->charset = enc;
1632 return(0);
1633 case XML_CHAR_ENCODING_2022_JP:
1634 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636 ctxt->sax->error(ctxt->userData,
1637 "char encoding ISO-2022-JPnot supported\n");
1638 break;
1639 case XML_CHAR_ENCODING_SHIFT_JIS:
1640 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642 ctxt->sax->error(ctxt->userData,
1643 "char encoding Shift_JIS not supported\n");
1644 break;
1645 case XML_CHAR_ENCODING_EUC_JP:
1646 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1648 ctxt->sax->error(ctxt->userData,
1649 "char encoding EUC-JPnot supported\n");
1650 break;
1651 }
1652 }
1653 if (handler == NULL)
1654 return(-1);
1655 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1656 return(xmlSwitchToEncoding(ctxt, handler));
1657}
1658
1659/**
1660 * xmlSwitchToEncoding:
1661 * @ctxt: the parser context
1662 * @handler: the encoding handler
1663 *
1664 * change the input functions when discovering the character encoding
1665 * of a given entity.
1666 *
1667 * Returns 0 in case of success, -1 otherwise
1668 */
1669int
1670xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1671{
1672 int nbchars;
1673
1674 if (handler != NULL) {
1675 if (ctxt->input != NULL) {
1676 if (ctxt->input->buf != NULL) {
1677 if (ctxt->input->buf->encoder != NULL) {
1678 if (ctxt->input->buf->encoder == handler)
1679 return(0);
1680 /*
1681 * Note: this is a bit dangerous, but that's what it
1682 * takes to use nearly compatible signature for different
1683 * encodings.
1684 */
1685 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1686 ctxt->input->buf->encoder = handler;
1687 return(0);
1688 }
1689 ctxt->input->buf->encoder = handler;
1690
1691 /*
1692 * Is there already some content down the pipe to convert ?
1693 */
1694 if ((ctxt->input->buf->buffer != NULL) &&
1695 (ctxt->input->buf->buffer->use > 0)) {
1696 int processed;
1697
1698 /*
1699 * Specific handling of the Byte Order Mark for
1700 * UTF-16
1701 */
1702 if ((handler->name != NULL) &&
1703 (!strcmp(handler->name, "UTF-16LE")) &&
1704 (ctxt->input->cur[0] == 0xFF) &&
1705 (ctxt->input->cur[1] == 0xFE)) {
1706 ctxt->input->cur += 2;
1707 }
1708 if ((handler->name != NULL) &&
1709 (!strcmp(handler->name, "UTF-16BE")) &&
1710 (ctxt->input->cur[0] == 0xFE) &&
1711 (ctxt->input->cur[1] == 0xFF)) {
1712 ctxt->input->cur += 2;
1713 }
1714
1715 /*
1716 * Shring the current input buffer.
1717 * Move it as the raw buffer and create a new input buffer
1718 */
1719 processed = ctxt->input->cur - ctxt->input->base;
1720 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1721 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1722 ctxt->input->buf->buffer = xmlBufferCreate();
1723
1724 if (ctxt->html) {
1725 /*
1726 * converst as much as possbile of the buffer
1727 */
1728 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1729 ctxt->input->buf->buffer,
1730 ctxt->input->buf->raw);
1731 } else {
1732 /*
1733 * convert just enough to get
1734 * '<?xml version="1.0" encoding="xxx"?>'
1735 * parsed with the autodetected encoding
1736 * into the parser reading buffer.
1737 */
1738 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1739 ctxt->input->buf->buffer,
1740 ctxt->input->buf->raw);
1741 }
1742 if (nbchars < 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001743 xmlGenericError(xmlGenericErrorContext,
1744 "xmlSwitchToEncoding: encoder error\n");
Daniel Veillard04698d92000-09-17 16:00:22 +00001745 return(-1);
1746 }
1747 ctxt->input->base =
1748 ctxt->input->cur = ctxt->input->buf->buffer->content;
1749
1750 }
1751 return(0);
1752 } else {
1753 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1754 /*
1755 * When parsing a static memory array one must know the
1756 * size to be able to convert the buffer.
1757 */
1758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1759 ctxt->sax->error(ctxt->userData,
1760 "xmlSwitchEncoding : no input\n");
1761 return(-1);
1762 } else {
1763 int processed;
1764
1765 /*
1766 * Shring the current input buffer.
1767 * Move it as the raw buffer and create a new input buffer
1768 */
1769 processed = ctxt->input->cur - ctxt->input->base;
1770
1771 ctxt->input->buf->raw = xmlBufferCreate();
1772 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1773 ctxt->input->length - processed);
1774 ctxt->input->buf->buffer = xmlBufferCreate();
1775
1776 /*
1777 * convert as much as possible of the raw input
1778 * to the parser reading buffer.
1779 */
1780 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1781 ctxt->input->buf->buffer,
1782 ctxt->input->buf->raw);
1783 if (nbchars < 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001784 xmlGenericError(xmlGenericErrorContext,
1785 "xmlSwitchToEncoding: encoder error\n");
Daniel Veillard04698d92000-09-17 16:00:22 +00001786 return(-1);
1787 }
1788
1789 /*
1790 * Conversion succeeded, get rid of the old buffer
1791 */
1792 if ((ctxt->input->free != NULL) &&
1793 (ctxt->input->base != NULL))
1794 ctxt->input->free((xmlChar *) ctxt->input->base);
1795 ctxt->input->base =
1796 ctxt->input->cur = ctxt->input->buf->buffer->content;
1797 }
1798 }
1799 } else {
1800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1801 ctxt->sax->error(ctxt->userData,
1802 "xmlSwitchEncoding : no input\n");
1803 return(-1);
1804 }
1805 /*
1806 * The parsing is now done in UTF8 natively
1807 */
1808 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1809 } else
1810 return(-1);
1811 return(0);
1812
1813}
1814
1815/************************************************************************
1816 * *
Daniel Veillardb1059e22000-09-16 14:02:43 +00001817 * Commodity functions to handle entities processing *
1818 * *
1819 ************************************************************************/
1820
1821/**
1822 * xmlFreeInputStream:
1823 * @input: an xmlParserInputPtr
1824 *
1825 * Free up an input stream.
1826 */
1827void
1828xmlFreeInputStream(xmlParserInputPtr input) {
1829 if (input == NULL) return;
1830
1831 if (input->filename != NULL) xmlFree((char *) input->filename);
1832 if (input->directory != NULL) xmlFree((char *) input->directory);
1833 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1834 if (input->version != NULL) xmlFree((char *) input->version);
1835 if ((input->free != NULL) && (input->base != NULL))
1836 input->free((xmlChar *) input->base);
1837 if (input->buf != NULL)
1838 xmlFreeParserInputBuffer(input->buf);
1839 memset(input, -1, sizeof(xmlParserInput));
1840 xmlFree(input);
1841}
1842
1843/**
1844 * xmlNewInputStream:
1845 * @ctxt: an XML parser context
1846 *
1847 * Create a new input stream structure
1848 * Returns the new input stream or NULL
1849 */
1850xmlParserInputPtr
1851xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1852 xmlParserInputPtr input;
1853
1854 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1855 if (input == NULL) {
1856 if (ctxt != NULL) {
1857 ctxt->errNo = XML_ERR_NO_MEMORY;
1858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1859 ctxt->sax->error(ctxt->userData,
1860 "malloc: couldn't allocate a new input stream\n");
1861 ctxt->errNo = XML_ERR_NO_MEMORY;
1862 }
1863 return(NULL);
1864 }
1865 memset(input, 0, sizeof(xmlParserInput));
1866 input->line = 1;
1867 input->col = 1;
1868 input->standalone = -1;
1869 return(input);
1870}
1871
1872/**
1873 * xmlNewIOInputStream:
1874 * @ctxt: an XML parser context
1875 * @input: an I/O Input
1876 * @enc: the charset encoding if known
1877 *
1878 * Create a new input stream structure encapsulating the @input into
1879 * a stream suitable for the parser.
1880 *
1881 * Returns the new input stream or NULL
1882 */
1883xmlParserInputPtr
1884xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1885 xmlCharEncoding enc) {
1886 xmlParserInputPtr inputStream;
1887
1888 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001889 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00001890 inputStream = xmlNewInputStream(ctxt);
1891 if (inputStream == NULL) {
1892 return(NULL);
1893 }
1894 inputStream->filename = NULL;
1895 inputStream->buf = input;
1896 inputStream->base = inputStream->buf->buffer->content;
1897 inputStream->cur = inputStream->buf->buffer->content;
1898 if (enc != XML_CHAR_ENCODING_NONE) {
1899 xmlSwitchEncoding(ctxt, enc);
1900 }
1901
1902 return(inputStream);
1903}
1904
1905/**
1906 * xmlNewEntityInputStream:
1907 * @ctxt: an XML parser context
1908 * @entity: an Entity pointer
1909 *
1910 * Create a new input stream based on an xmlEntityPtr
1911 *
1912 * Returns the new input stream or NULL
1913 */
1914xmlParserInputPtr
1915xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1916 xmlParserInputPtr input;
1917
1918 if (entity == NULL) {
1919 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1921 ctxt->sax->error(ctxt->userData,
1922 "internal: xmlNewEntityInputStream entity = NULL\n");
1923 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1924 return(NULL);
1925 }
1926 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001927 xmlGenericError(xmlGenericErrorContext,
1928 "new input from entity: %s\n", entity->name);
Daniel Veillardb1059e22000-09-16 14:02:43 +00001929 if (entity->content == NULL) {
1930 switch (entity->etype) {
1931 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1932 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1934 ctxt->sax->error(ctxt->userData,
1935 "xmlNewEntityInputStream unparsed entity !\n");
1936 break;
1937 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1938 case XML_EXTERNAL_PARAMETER_ENTITY:
1939 return(xmlLoadExternalEntity((char *) entity->URI,
1940 (char *) entity->ExternalID, ctxt));
1941 case XML_INTERNAL_GENERAL_ENTITY:
1942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1943 ctxt->sax->error(ctxt->userData,
1944 "Internal entity %s without content !\n", entity->name);
1945 break;
1946 case XML_INTERNAL_PARAMETER_ENTITY:
1947 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1949 ctxt->sax->error(ctxt->userData,
1950 "Internal parameter entity %s without content !\n", entity->name);
1951 break;
1952 case XML_INTERNAL_PREDEFINED_ENTITY:
1953 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1955 ctxt->sax->error(ctxt->userData,
1956 "Predefined entity %s without content !\n", entity->name);
1957 break;
1958 }
1959 return(NULL);
1960 }
1961 input = xmlNewInputStream(ctxt);
1962 if (input == NULL) {
1963 return(NULL);
1964 }
1965 input->filename = (char *) entity->URI;
1966 input->base = entity->content;
1967 input->cur = entity->content;
1968 input->length = entity->length;
1969 return(input);
1970}
1971
1972/**
1973 * xmlNewStringInputStream:
1974 * @ctxt: an XML parser context
1975 * @buffer: an memory buffer
1976 *
1977 * Create a new input stream based on a memory buffer.
1978 * Returns the new input stream
1979 */
1980xmlParserInputPtr
1981xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1982 xmlParserInputPtr input;
1983
1984 if (buffer == NULL) {
1985 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "internal: xmlNewStringInputStream string = NULL\n");
1989 return(NULL);
1990 }
1991 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001992 xmlGenericError(xmlGenericErrorContext,
1993 "new fixed input: %.30s\n", buffer);
Daniel Veillardb1059e22000-09-16 14:02:43 +00001994 input = xmlNewInputStream(ctxt);
1995 if (input == NULL) {
1996 return(NULL);
1997 }
1998 input->base = buffer;
1999 input->cur = buffer;
2000 input->length = xmlStrlen(buffer);
2001 return(input);
2002}
2003
2004/**
2005 * xmlNewInputFromFile:
2006 * @ctxt: an XML parser context
2007 * @filename: the filename to use as entity
2008 *
2009 * Create a new input stream based on a file.
2010 *
2011 * Returns the new input stream or NULL in case of error
2012 */
2013xmlParserInputPtr
2014xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2015 xmlParserInputBufferPtr buf;
2016 xmlParserInputPtr inputStream;
2017 char *directory = NULL;
2018 xmlChar *URI = NULL;
2019
2020 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002021 xmlGenericError(xmlGenericErrorContext,
2022 "new input from file: %s\n", filename);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002023 if (ctxt == NULL) return(NULL);
2024 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2025 if (buf == NULL)
2026 return(NULL);
2027
2028 URI = xmlStrdup((xmlChar *) filename);
Daniel Veillard04698d92000-09-17 16:00:22 +00002029 directory = xmlParserGetDirectory((const char *) URI);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002030
2031 inputStream = xmlNewInputStream(ctxt);
2032 if (inputStream == NULL) {
2033 if (directory != NULL) xmlFree((char *) directory);
2034 if (URI != NULL) xmlFree((char *) URI);
2035 return(NULL);
2036 }
2037
Daniel Veillard04698d92000-09-17 16:00:22 +00002038 inputStream->filename = (const char *) URI;
Daniel Veillardb1059e22000-09-16 14:02:43 +00002039 inputStream->directory = directory;
2040 inputStream->buf = buf;
2041
2042 inputStream->base = inputStream->buf->buffer->content;
2043 inputStream->cur = inputStream->buf->buffer->content;
2044 if ((ctxt->directory == NULL) && (directory != NULL))
2045 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2046 return(inputStream);
2047}
2048
2049/************************************************************************
2050 * *
2051 * Commodity functions to handle parser contexts *
2052 * *
2053 ************************************************************************/
2054
2055/**
2056 * xmlInitParserCtxt:
2057 * @ctxt: an XML parser context
2058 *
2059 * Initialize a parser context
2060 */
2061
2062void
2063xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2064{
2065 xmlSAXHandler *sax;
2066
2067 xmlDefaultSAXHandlerInit();
2068
2069 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2070 if (sax == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002071 xmlGenericError(xmlGenericErrorContext,
2072 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002073 }
Daniel Veillard4fb87ee2000-09-19 12:25:59 +00002074 else
2075 memset(sax, 0, sizeof(xmlSAXHandler));
Daniel Veillardb1059e22000-09-16 14:02:43 +00002076
2077 /* Allocate the Input stack */
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002078 ctxt->inputTab = (xmlParserInputPtr *)
2079 xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb1059e22000-09-16 14:02:43 +00002080 if (ctxt->inputTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002081 xmlGenericError(xmlGenericErrorContext,
2082 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002083 ctxt->inputNr = 0;
2084 ctxt->inputMax = 0;
2085 ctxt->input = NULL;
2086 return;
2087 }
2088 ctxt->inputNr = 0;
2089 ctxt->inputMax = 5;
2090 ctxt->input = NULL;
2091
2092 ctxt->version = NULL;
2093 ctxt->encoding = NULL;
2094 ctxt->standalone = -1;
2095 ctxt->hasExternalSubset = 0;
2096 ctxt->hasPErefs = 0;
2097 ctxt->html = 0;
2098 ctxt->external = 0;
2099 ctxt->instate = XML_PARSER_START;
2100 ctxt->token = 0;
2101 ctxt->directory = NULL;
2102
2103 /* Allocate the Node stack */
2104 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2105 if (ctxt->nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002106 xmlGenericError(xmlGenericErrorContext,
2107 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002108 ctxt->nodeNr = 0;
2109 ctxt->nodeMax = 0;
2110 ctxt->node = NULL;
2111 ctxt->inputNr = 0;
2112 ctxt->inputMax = 0;
2113 ctxt->input = NULL;
2114 return;
2115 }
2116 ctxt->nodeNr = 0;
2117 ctxt->nodeMax = 10;
2118 ctxt->node = NULL;
2119
2120 /* Allocate the Name stack */
2121 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2122 if (ctxt->nameTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002123 xmlGenericError(xmlGenericErrorContext,
2124 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002125 ctxt->nodeNr = 0;
2126 ctxt->nodeMax = 0;
2127 ctxt->node = NULL;
2128 ctxt->inputNr = 0;
2129 ctxt->inputMax = 0;
2130 ctxt->input = NULL;
2131 ctxt->nameNr = 0;
2132 ctxt->nameMax = 0;
2133 ctxt->name = NULL;
2134 return;
2135 }
2136 ctxt->nameNr = 0;
2137 ctxt->nameMax = 10;
2138 ctxt->name = NULL;
2139
2140 /* Allocate the space stack */
2141 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2142 if (ctxt->spaceTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002143 xmlGenericError(xmlGenericErrorContext,
2144 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002145 ctxt->nodeNr = 0;
2146 ctxt->nodeMax = 0;
2147 ctxt->node = NULL;
2148 ctxt->inputNr = 0;
2149 ctxt->inputMax = 0;
2150 ctxt->input = NULL;
2151 ctxt->nameNr = 0;
2152 ctxt->nameMax = 0;
2153 ctxt->name = NULL;
2154 ctxt->spaceNr = 0;
2155 ctxt->spaceMax = 0;
2156 ctxt->space = NULL;
2157 return;
2158 }
2159 ctxt->spaceNr = 1;
2160 ctxt->spaceMax = 10;
2161 ctxt->spaceTab[0] = -1;
2162 ctxt->space = &ctxt->spaceTab[0];
2163
2164 if (sax == NULL) {
2165 ctxt->sax = &xmlDefaultSAXHandler;
2166 } else {
2167 ctxt->sax = sax;
2168 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2169 }
2170 ctxt->userData = ctxt;
2171 ctxt->myDoc = NULL;
2172 ctxt->wellFormed = 1;
2173 ctxt->valid = 1;
2174 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2175 ctxt->pedantic = xmlPedanticParserDefaultValue;
2176 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2177 ctxt->vctxt.userData = ctxt;
2178 if (ctxt->validate) {
2179 ctxt->vctxt.error = xmlParserValidityError;
2180 if (xmlGetWarningsDefaultValue == 0)
2181 ctxt->vctxt.warning = NULL;
2182 else
2183 ctxt->vctxt.warning = xmlParserValidityWarning;
2184 /* Allocate the Node stack */
2185 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2186 if (ctxt->vctxt.nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002187 xmlGenericError(xmlGenericErrorContext,
2188 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002189 ctxt->vctxt.nodeMax = 0;
2190 ctxt->validate = 0;
2191 ctxt->vctxt.error = NULL;
2192 ctxt->vctxt.warning = NULL;
2193 } else {
2194 ctxt->vctxt.nodeNr = 0;
2195 ctxt->vctxt.nodeMax = 4;
2196 ctxt->vctxt.node = NULL;
2197 }
2198 } else {
2199 ctxt->vctxt.error = NULL;
2200 ctxt->vctxt.warning = NULL;
2201 }
2202 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2203 ctxt->record_info = 0;
2204 ctxt->nbChars = 0;
2205 ctxt->checkIndex = 0;
2206 ctxt->inSubset = 0;
2207 ctxt->errNo = XML_ERR_OK;
2208 ctxt->depth = 0;
2209 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2210 xmlInitNodeInfoSeq(&ctxt->node_seq);
2211}
2212
2213/**
2214 * xmlFreeParserCtxt:
2215 * @ctxt: an XML parser context
2216 *
2217 * Free all the memory used by a parser context. However the parsed
2218 * document in ctxt->myDoc is not freed.
2219 */
2220
2221void
2222xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2223{
2224 xmlParserInputPtr input;
2225 xmlChar *oldname;
2226
2227 if (ctxt == NULL) return;
2228
2229 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2230 xmlFreeInputStream(input);
2231 }
2232 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2233 xmlFree(oldname);
2234 }
2235 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2236 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2237 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2238 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2239 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2240 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2241 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2242 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2243 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2244 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2245 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2246 xmlFree(ctxt->sax);
2247 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2248 xmlFree(ctxt);
2249}
2250
2251/**
2252 * xmlNewParserCtxt:
2253 *
2254 * Allocate and initialize a new parser context.
2255 *
2256 * Returns the xmlParserCtxtPtr or NULL
2257 */
2258
2259xmlParserCtxtPtr
2260xmlNewParserCtxt()
2261{
2262 xmlParserCtxtPtr ctxt;
2263
2264 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2265 if (ctxt == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002266 xmlGenericError(xmlGenericErrorContext,
2267 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002268 perror("malloc");
2269 return(NULL);
2270 }
2271 memset(ctxt, 0, sizeof(xmlParserCtxt));
2272 xmlInitParserCtxt(ctxt);
2273 return(ctxt);
2274}
2275
2276/************************************************************************
2277 * *
2278 * Handling of node informations *
2279 * *
2280 ************************************************************************/
2281
2282/**
2283 * xmlClearParserCtxt:
2284 * @ctxt: an XML parser context
2285 *
2286 * Clear (release owned resources) and reinitialize a parser context
2287 */
2288
2289void
2290xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2291{
2292 xmlClearNodeInfoSeq(&ctxt->node_seq);
2293 xmlInitParserCtxt(ctxt);
2294}
2295
2296/**
2297 * xmlParserFindNodeInfo:
2298 * @ctxt: an XML parser context
2299 * @node: an XML node within the tree
2300 *
2301 * Find the parser node info struct for a given node
2302 *
2303 * Returns an xmlParserNodeInfo block pointer or NULL
2304 */
2305const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2306 const xmlNode* node)
2307{
2308 unsigned long pos;
2309
2310 /* Find position where node should be at */
2311 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2312 if ( ctx->node_seq.buffer[pos].node == node )
2313 return &ctx->node_seq.buffer[pos];
2314 else
2315 return NULL;
2316}
2317
2318
2319/**
2320 * xmlInitNodeInfoSeq:
2321 * @seq: a node info sequence pointer
2322 *
2323 * -- Initialize (set to initial state) node info sequence
2324 */
2325void
2326xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2327{
2328 seq->length = 0;
2329 seq->maximum = 0;
2330 seq->buffer = NULL;
2331}
2332
2333/**
2334 * xmlClearNodeInfoSeq:
2335 * @seq: a node info sequence pointer
2336 *
2337 * -- Clear (release memory and reinitialize) node
2338 * info sequence
2339 */
2340void
2341xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2342{
2343 if ( seq->buffer != NULL )
2344 xmlFree(seq->buffer);
2345 xmlInitNodeInfoSeq(seq);
2346}
2347
2348
2349/**
2350 * xmlParserFindNodeInfoIndex:
2351 * @seq: a node info sequence pointer
2352 * @node: an XML node pointer
2353 *
2354 *
2355 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2356 * the given node is or should be at in a sorted sequence
2357 *
2358 * Returns a long indicating the position of the record
2359 */
2360unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2361 const xmlNode* node)
2362{
2363 unsigned long upper, lower, middle;
2364 int found = 0;
2365
2366 /* Do a binary search for the key */
2367 lower = 1;
2368 upper = seq->length;
2369 middle = 0;
2370 while ( lower <= upper && !found) {
2371 middle = lower + (upper - lower) / 2;
2372 if ( node == seq->buffer[middle - 1].node )
2373 found = 1;
2374 else if ( node < seq->buffer[middle - 1].node )
2375 upper = middle - 1;
2376 else
2377 lower = middle + 1;
2378 }
2379
2380 /* Return position */
2381 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2382 return middle;
2383 else
2384 return middle - 1;
2385}
2386
2387
2388/**
2389 * xmlParserAddNodeInfo:
2390 * @ctxt: an XML parser context
2391 * @info: a node info sequence pointer
2392 *
2393 * Insert node info record into the sorted sequence
2394 */
2395void
2396xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2397 const xmlParserNodeInfo* info)
2398{
2399 unsigned long pos;
2400 static unsigned int block_size = 5;
2401
2402 /* Find pos and check to see if node is already in the sequence */
2403 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2404 if ( pos < ctxt->node_seq.length
2405 && ctxt->node_seq.buffer[pos].node == info->node ) {
2406 ctxt->node_seq.buffer[pos] = *info;
2407 }
2408
2409 /* Otherwise, we need to add new node to buffer */
2410 else {
2411 /* Expand buffer by 5 if needed */
2412 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2413 xmlParserNodeInfo* tmp_buffer;
2414 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2415 *(ctxt->node_seq.maximum + block_size));
2416
2417 if ( ctxt->node_seq.buffer == NULL )
2418 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2419 else
2420 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2421
2422 if ( tmp_buffer == NULL ) {
2423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2424 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2425 ctxt->errNo = XML_ERR_NO_MEMORY;
2426 return;
2427 }
2428 ctxt->node_seq.buffer = tmp_buffer;
2429 ctxt->node_seq.maximum += block_size;
2430 }
2431
2432 /* If position is not at end, move elements out of the way */
2433 if ( pos != ctxt->node_seq.length ) {
2434 unsigned long i;
2435
2436 for ( i = ctxt->node_seq.length; i > pos; i-- )
2437 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2438 }
2439
2440 /* Copy element and increase length */
2441 ctxt->node_seq.buffer[pos] = *info;
2442 ctxt->node_seq.length++;
2443 }
2444}
2445
2446/************************************************************************
2447 * *
2448 * Deprecated functions kept for compatibility *
2449 * *
2450 ************************************************************************/
2451
2452/*
2453 * xmlCheckLanguageID
2454 * @lang: pointer to the string value
2455 *
2456 * Checks that the value conforms to the LanguageID production:
2457 *
2458 * NOTE: this is somewhat deprecated, those productions were removed from
2459 * the XML Second edition.
2460 *
2461 * [33] LanguageID ::= Langcode ('-' Subcode)*
2462 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2463 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2464 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2465 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2466 * [38] Subcode ::= ([a-z] | [A-Z])+
2467 *
2468 * Returns 1 if correct 0 otherwise
2469 **/
2470int
2471xmlCheckLanguageID(const xmlChar *lang) {
2472 const xmlChar *cur = lang;
2473
2474 if (cur == NULL)
2475 return(0);
2476 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2477 ((cur[0] == 'I') && (cur[1] == '-'))) {
2478 /*
2479 * IANA code
2480 */
2481 cur += 2;
2482 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2483 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2484 cur++;
2485 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2486 ((cur[0] == 'X') && (cur[1] == '-'))) {
2487 /*
2488 * User code
2489 */
2490 cur += 2;
2491 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2492 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2493 cur++;
2494 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2495 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2496 /*
2497 * ISO639
2498 */
2499 cur++;
2500 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2501 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2502 cur++;
2503 else
2504 return(0);
2505 } else
2506 return(0);
2507 while (cur[0] != 0) { /* non input consuming */
2508 if (cur[0] != '-')
2509 return(0);
2510 cur++;
2511 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2512 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2513 cur++;
2514 else
2515 return(0);
2516 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2517 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2518 cur++;
2519 }
2520 return(1);
2521}
2522
2523/**
2524 * xmlDecodeEntities:
2525 * @ctxt: the parser context
2526 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2527 * @len: the len to decode (in bytes !), -1 for no size limit
2528 * @end: an end marker xmlChar, 0 if none
2529 * @end2: an end marker xmlChar, 0 if none
2530 * @end3: an end marker xmlChar, 0 if none
2531 *
2532 * This function is deprecated, we now always process entities content
2533 * through xmlStringDecodeEntities
2534 *
2535 * TODO: remove it in next major release.
2536 *
2537 * [67] Reference ::= EntityRef | CharRef
2538 *
2539 * [69] PEReference ::= '%' Name ';'
2540 *
2541 * Returns A newly allocated string with the substitution done. The caller
2542 * must deallocate it !
2543 */
2544xmlChar *
2545xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2546 xmlChar end, xmlChar end2, xmlChar end3) {
2547#if 0
2548 xmlChar *buffer = NULL;
2549 unsigned int buffer_size = 0;
2550 unsigned int nbchars = 0;
2551
2552 xmlChar *current = NULL;
2553 xmlEntityPtr ent;
2554 unsigned int max = (unsigned int) len;
2555 int c,l;
2556#endif
2557
2558 static int deprecated = 0;
2559 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002560 xmlGenericError(xmlGenericErrorContext,
2561 "xmlDecodeEntities() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002562 deprecated = 1;
2563 }
2564
2565#if 0
2566 if (ctxt->depth > 40) {
2567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2568 ctxt->sax->error(ctxt->userData,
2569 "Detected entity reference loop\n");
2570 ctxt->wellFormed = 0;
2571 ctxt->disableSAX = 1;
2572 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2573 return(NULL);
2574 }
2575
2576 /*
2577 * allocate a translation buffer.
2578 */
2579 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2580 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2581 if (buffer == NULL) {
2582 perror("xmlDecodeEntities: malloc failed");
2583 return(NULL);
2584 }
2585
2586 /*
2587 * Ok loop until we reach one of the ending char or a size limit.
2588 */
2589 GROW;
2590 c = CUR_CHAR(l);
2591 while ((nbchars < max) && (c != end) && /* NOTUSED */
2592 (c != end2) && (c != end3)) {
2593 GROW;
2594 if (c == 0) break;
2595 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2596 int val = xmlParseCharRef(ctxt);
2597 COPY_BUF(0,buffer,nbchars,val);
2598 NEXTL(l);
2599 } else if ((c == '&') && (ctxt->token != '&') &&
2600 (what & XML_SUBSTITUTE_REF)) {
2601 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002602 xmlGenericError(xmlGenericErrorContext,
2603 "decoding Entity Reference\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002604 ent = xmlParseEntityRef(ctxt);
2605 if ((ent != NULL) &&
2606 (ctxt->replaceEntities != 0)) {
2607 current = ent->content;
2608 while (*current != 0) { /* non input consuming loop */
2609 buffer[nbchars++] = *current++;
2610 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2611 growBuffer(buffer);
2612 }
2613 }
2614 } else if (ent != NULL) {
2615 const xmlChar *cur = ent->name;
2616
2617 buffer[nbchars++] = '&';
2618 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2619 growBuffer(buffer);
2620 }
2621 while (*cur != 0) { /* non input consuming loop */
2622 buffer[nbchars++] = *cur++;
2623 }
2624 buffer[nbchars++] = ';';
2625 }
2626 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2627 /*
2628 * a PEReference induce to switch the entity flow,
2629 * we break here to flush the current set of chars
2630 * parsed if any. We will be called back later.
2631 */
2632 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002633 xmlGenericError(xmlGenericErrorContext,
2634 "decoding PE Reference\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002635 if (nbchars != 0) break;
2636
2637 xmlParsePEReference(ctxt);
2638
2639 /*
2640 * Pop-up of finished entities.
2641 */
2642 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2643 xmlPopInput(ctxt);
2644
2645 break;
2646 } else {
2647 COPY_BUF(l,buffer,nbchars,c);
2648 NEXTL(l);
2649 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2650 growBuffer(buffer);
2651 }
2652 }
2653 c = CUR_CHAR(l);
2654 }
2655 buffer[nbchars++] = 0;
2656 return(buffer);
2657#endif
2658 return(NULL);
2659}
2660
2661/**
2662 * xmlNamespaceParseNCName:
2663 * @ctxt: an XML parser context
2664 *
2665 * parse an XML namespace name.
2666 *
2667 * TODO: this seems not in use anymore, the namespace handling is done on
2668 * top of the SAX interfaces, i.e. not on raw input.
2669 *
2670 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2671 *
2672 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2673 * CombiningChar | Extender
2674 *
2675 * Returns the namespace name or NULL
2676 */
2677
2678xmlChar *
2679xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2680#if 0
2681 xmlChar buf[XML_MAX_NAMELEN + 5];
2682 int len = 0, l;
2683 int cur = CUR_CHAR(l);
2684#endif
2685
2686 static int deprecated = 0;
2687 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002688 xmlGenericError(xmlGenericErrorContext,
2689 "xmlNamespaceParseNCName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002690 deprecated = 1;
2691 }
2692
2693#if 0
2694 /* load first the value of the char !!! */
2695 GROW;
2696 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2697
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002698xmlGenericError(xmlGenericErrorContext,
2699 "xmlNamespaceParseNCName: reached loop 3\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002700 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2701 (cur == '.') || (cur == '-') ||
2702 (cur == '_') ||
2703 (IS_COMBINING(cur)) ||
2704 (IS_EXTENDER(cur))) {
2705 COPY_BUF(l,buf,len,cur);
2706 NEXTL(l);
2707 cur = CUR_CHAR(l);
2708 if (len >= XML_MAX_NAMELEN) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002709 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +00002710 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2711 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2712 (cur == '.') || (cur == '-') ||
2713 (cur == '_') ||
2714 (IS_COMBINING(cur)) ||
2715 (IS_EXTENDER(cur))) {
2716 NEXTL(l);
2717 cur = CUR_CHAR(l);
2718 }
2719 break;
2720 }
2721 }
2722 return(xmlStrndup(buf, len));
2723#endif
2724 return(NULL);
2725}
2726
2727/**
2728 * xmlNamespaceParseQName:
2729 * @ctxt: an XML parser context
2730 * @prefix: a xmlChar **
2731 *
2732 * TODO: this seems not in use anymore, the namespace handling is done on
2733 * top of the SAX interfaces, i.e. not on raw input.
2734 *
2735 * parse an XML qualified name
2736 *
2737 * [NS 5] QName ::= (Prefix ':')? LocalPart
2738 *
2739 * [NS 6] Prefix ::= NCName
2740 *
2741 * [NS 7] LocalPart ::= NCName
2742 *
2743 * Returns the local part, and prefix is updated
2744 * to get the Prefix if any.
2745 */
2746
2747xmlChar *
2748xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2749
2750 static int deprecated = 0;
2751 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002752 xmlGenericError(xmlGenericErrorContext,
2753 "xmlNamespaceParseQName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002754 deprecated = 1;
2755 }
2756
2757#if 0
2758 xmlChar *ret = NULL;
2759
2760 *prefix = NULL;
2761 ret = xmlNamespaceParseNCName(ctxt);
2762 if (RAW == ':') {
2763 *prefix = ret;
2764 NEXT;
2765 ret = xmlNamespaceParseNCName(ctxt);
2766 }
2767
2768 return(ret);
2769#endif
2770 return(NULL);
2771}
2772
2773/**
2774 * xmlNamespaceParseNSDef:
2775 * @ctxt: an XML parser context
2776 *
2777 * parse a namespace prefix declaration
2778 *
2779 * TODO: this seems not in use anymore, the namespace handling is done on
2780 * top of the SAX interfaces, i.e. not on raw input.
2781 *
2782 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2783 *
2784 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2785 *
2786 * Returns the namespace name
2787 */
2788
2789xmlChar *
2790xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2791 static int deprecated = 0;
2792 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002793 xmlGenericError(xmlGenericErrorContext,
2794 "xmlNamespaceParseNSDef() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002795 deprecated = 1;
2796 }
2797 return(NULL);
2798#if 0
2799 xmlChar *name = NULL;
2800
2801 if ((RAW == 'x') && (NXT(1) == 'm') &&
2802 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2803 (NXT(4) == 's')) {
2804 SKIP(5);
2805 if (RAW == ':') {
2806 NEXT;
2807 name = xmlNamespaceParseNCName(ctxt);
2808 }
2809 }
2810 return(name);
2811#endif
2812}
2813
2814/**
2815 * xmlParseQuotedString:
2816 * @ctxt: an XML parser context
2817 *
2818 * Parse and return a string between quotes or doublequotes
2819 *
2820 * TODO: Deprecated, to be removed at next drop of binary compatibility
2821 *
2822 * Returns the string parser or NULL.
2823 */
2824xmlChar *
2825xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2826 static int deprecated = 0;
2827 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002828 xmlGenericError(xmlGenericErrorContext,
2829 "xmlParseQuotedString() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002830 deprecated = 1;
2831 }
2832 return(NULL);
2833
2834#if 0
2835 xmlChar *buf = NULL;
2836 int len = 0,l;
2837 int size = XML_PARSER_BUFFER_SIZE;
2838 int c;
2839
2840 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2841 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002842 xmlGenericError(xmlGenericErrorContext,
2843 "malloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002844 return(NULL);
2845 }
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002846xmlGenericError(xmlGenericErrorContext,
2847 "xmlParseQuotedString: reached loop 4\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002848 if (RAW == '"') {
2849 NEXT;
2850 c = CUR_CHAR(l);
2851 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2852 if (len + 5 >= size) {
2853 size *= 2;
2854 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2855 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002856 xmlGenericError(xmlGenericErrorContext,
2857 "realloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002858 return(NULL);
2859 }
2860 }
2861 COPY_BUF(l,buf,len,c);
2862 NEXTL(l);
2863 c = CUR_CHAR(l);
2864 }
2865 if (c != '"') {
2866 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2868 ctxt->sax->error(ctxt->userData,
2869 "String not closed \"%.50s\"\n", buf);
2870 ctxt->wellFormed = 0;
2871 ctxt->disableSAX = 1;
2872 } else {
2873 NEXT;
2874 }
2875 } else if (RAW == '\''){
2876 NEXT;
2877 c = CUR;
2878 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2879 if (len + 1 >= size) {
2880 size *= 2;
2881 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2882 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002883 xmlGenericError(xmlGenericErrorContext,
2884 "realloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002885 return(NULL);
2886 }
2887 }
2888 buf[len++] = c;
2889 NEXT;
2890 c = CUR;
2891 }
2892 if (RAW != '\'') {
2893 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2895 ctxt->sax->error(ctxt->userData,
2896 "String not closed \"%.50s\"\n", buf);
2897 ctxt->wellFormed = 0;
2898 ctxt->disableSAX = 1;
2899 } else {
2900 NEXT;
2901 }
2902 }
2903 return(buf);
2904#endif
2905}
2906
2907/**
2908 * xmlParseNamespace:
2909 * @ctxt: an XML parser context
2910 *
2911 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2912 *
2913 * This is what the older xml-name Working Draft specified, a bunch of
2914 * other stuff may still rely on it, so support is still here as
2915 * if it was declared on the root of the Tree:-(
2916 *
2917 * TODO: remove from library
2918 *
2919 * To be removed at next drop of binary compatibility
2920 */
2921
2922void
2923xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2924 static int deprecated = 0;
2925 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002926 xmlGenericError(xmlGenericErrorContext,
2927 "xmlParseNamespace() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002928 deprecated = 1;
2929 }
2930
2931#if 0
2932 xmlChar *href = NULL;
2933 xmlChar *prefix = NULL;
2934 int garbage = 0;
2935
2936 /*
2937 * We just skipped "namespace" or "xml:namespace"
2938 */
2939 SKIP_BLANKS;
2940
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002941xmlGenericError(xmlGenericErrorContext,
2942 "xmlParseNamespace: reached loop 5\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002943 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2944 /*
2945 * We can have "ns" or "prefix" attributes
2946 * Old encoding as 'href' or 'AS' attributes is still supported
2947 */
2948 if ((RAW == 'n') && (NXT(1) == 's')) {
2949 garbage = 0;
2950 SKIP(2);
2951 SKIP_BLANKS;
2952
2953 if (RAW != '=') continue;
2954 NEXT;
2955 SKIP_BLANKS;
2956
2957 href = xmlParseQuotedString(ctxt);
2958 SKIP_BLANKS;
2959 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2960 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2961 garbage = 0;
2962 SKIP(4);
2963 SKIP_BLANKS;
2964
2965 if (RAW != '=') continue;
2966 NEXT;
2967 SKIP_BLANKS;
2968
2969 href = xmlParseQuotedString(ctxt);
2970 SKIP_BLANKS;
2971 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2972 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2973 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2974 garbage = 0;
2975 SKIP(6);
2976 SKIP_BLANKS;
2977
2978 if (RAW != '=') continue;
2979 NEXT;
2980 SKIP_BLANKS;
2981
2982 prefix = xmlParseQuotedString(ctxt);
2983 SKIP_BLANKS;
2984 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
2985 garbage = 0;
2986 SKIP(2);
2987 SKIP_BLANKS;
2988
2989 if (RAW != '=') continue;
2990 NEXT;
2991 SKIP_BLANKS;
2992
2993 prefix = xmlParseQuotedString(ctxt);
2994 SKIP_BLANKS;
2995 } else if ((RAW == '?') && (NXT(1) == '>')) {
2996 garbage = 0;
2997 NEXT;
2998 } else {
2999 /*
3000 * Found garbage when parsing the namespace
3001 */
3002 if (!garbage) {
3003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3004 ctxt->sax->error(ctxt->userData,
3005 "xmlParseNamespace found garbage\n");
3006 }
3007 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3008 ctxt->wellFormed = 0;
3009 ctxt->disableSAX = 1;
3010 NEXT;
3011 }
3012 }
3013
3014 MOVETO_ENDTAG(CUR_PTR);
3015 NEXT;
3016
3017 /*
3018 * Register the DTD.
3019 if (href != NULL)
3020 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3021 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3022 */
3023
3024 if (prefix != NULL) xmlFree(prefix);
3025 if (href != NULL) xmlFree(href);
3026#endif
3027}
3028
3029/**
3030 * xmlScanName:
3031 * @ctxt: an XML parser context
3032 *
3033 * Trickery: parse an XML name but without consuming the input flow
3034 * Needed for rollback cases. Used only when parsing entities references.
3035 *
3036 * TODO: seems deprecated now, only used in the default part of
3037 * xmlParserHandleReference
3038 *
3039 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3040 * CombiningChar | Extender
3041 *
3042 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3043 *
3044 * [6] Names ::= Name (S Name)*
3045 *
3046 * Returns the Name parsed or NULL
3047 */
3048
3049xmlChar *
3050xmlScanName(xmlParserCtxtPtr ctxt) {
3051 static int deprecated = 0;
3052 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003053 xmlGenericError(xmlGenericErrorContext,
3054 "xmlScanName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003055 deprecated = 1;
3056 }
3057 return(NULL);
3058
3059#if 0
3060 xmlChar buf[XML_MAX_NAMELEN];
3061 int len = 0;
3062
3063 GROW;
3064 if (!IS_LETTER(RAW) && (RAW != '_') &&
3065 (RAW != ':')) {
3066 return(NULL);
3067 }
3068
3069
3070 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3071 (NXT(len) == '.') || (NXT(len) == '-') ||
3072 (NXT(len) == '_') || (NXT(len) == ':') ||
3073 (IS_COMBINING(NXT(len))) ||
3074 (IS_EXTENDER(NXT(len)))) {
3075 GROW;
3076 buf[len] = NXT(len);
3077 len++;
3078 if (len >= XML_MAX_NAMELEN) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003079 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +00003080 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3081 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3082 (IS_DIGIT(NXT(len))) ||
3083 (NXT(len) == '.') || (NXT(len) == '-') ||
3084 (NXT(len) == '_') || (NXT(len) == ':') ||
3085 (IS_COMBINING(NXT(len))) ||
3086 (IS_EXTENDER(NXT(len))))
3087 len++;
3088 break;
3089 }
3090 }
3091 return(xmlStrndup(buf, len));
3092#endif
3093}
3094
3095/**
3096 * xmlParserHandleReference:
3097 * @ctxt: the parser context
3098 *
3099 * TODO: Remove, now deprecated ... the test is done directly in the
3100 * content parsing
3101 * routines.
3102 *
3103 * [67] Reference ::= EntityRef | CharRef
3104 *
3105 * [68] EntityRef ::= '&' Name ';'
3106 *
3107 * [ WFC: Entity Declared ]
3108 * the Name given in the entity reference must match that in an entity
3109 * declaration, except that well-formed documents need not declare any
3110 * of the following entities: amp, lt, gt, apos, quot.
3111 *
3112 * [ WFC: Parsed Entity ]
3113 * An entity reference must not contain the name of an unparsed entity
3114 *
3115 * [66] CharRef ::= '&#' [0-9]+ ';' |
3116 * '&#x' [0-9a-fA-F]+ ';'
3117 *
3118 * A PEReference may have been detectect in the current input stream
3119 * the handling is done accordingly to
3120 * http://www.w3.org/TR/REC-xml#entproc
3121 */
3122void
3123xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3124 static int deprecated = 0;
3125 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003126 xmlGenericError(xmlGenericErrorContext,
3127 "xmlParserHandleReference() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003128 deprecated = 1;
3129 }
3130
3131#if 0
3132 xmlParserInputPtr input;
3133 xmlChar *name;
3134 xmlEntityPtr ent = NULL;
3135
3136 if (ctxt->token != 0) {
3137 return;
3138 }
3139 if (RAW != '&') return;
3140 GROW;
3141 if ((RAW == '&') && (NXT(1) == '#')) {
3142 switch(ctxt->instate) {
3143 case XML_PARSER_ENTITY_DECL:
3144 case XML_PARSER_PI:
3145 case XML_PARSER_CDATA_SECTION:
3146 case XML_PARSER_COMMENT:
3147 case XML_PARSER_SYSTEM_LITERAL:
3148 /* we just ignore it there */
3149 return;
3150 case XML_PARSER_START_TAG:
3151 return;
3152 case XML_PARSER_END_TAG:
3153 return;
3154 case XML_PARSER_EOF:
3155 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3157 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3158 ctxt->wellFormed = 0;
3159 ctxt->disableSAX = 1;
3160 return;
3161 case XML_PARSER_PROLOG:
3162 case XML_PARSER_START:
3163 case XML_PARSER_MISC:
3164 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3166 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3167 ctxt->wellFormed = 0;
3168 ctxt->disableSAX = 1;
3169 return;
3170 case XML_PARSER_EPILOG:
3171 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3173 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3174 ctxt->wellFormed = 0;
3175 ctxt->disableSAX = 1;
3176 return;
3177 case XML_PARSER_DTD:
3178 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "CharRef are forbiden in DTDs!\n");
3182 ctxt->wellFormed = 0;
3183 ctxt->disableSAX = 1;
3184 return;
3185 case XML_PARSER_ENTITY_VALUE:
3186 /*
3187 * NOTE: in the case of entity values, we don't do the
3188 * substitution here since we need the literal
3189 * entity value to be able to save the internal
3190 * subset of the document.
3191 * This will be handled by xmlStringDecodeEntities
3192 */
3193 return;
3194 case XML_PARSER_CONTENT:
3195 return;
3196 case XML_PARSER_ATTRIBUTE_VALUE:
3197 /* ctxt->token = xmlParseCharRef(ctxt); */
3198 return;
Daniel Veillard41e06512000-11-13 11:47:47 +00003199 case XML_PARSER_IGNORE:
3200 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00003201 }
3202 return;
3203 }
3204
3205 switch(ctxt->instate) {
3206 case XML_PARSER_CDATA_SECTION:
3207 return;
3208 case XML_PARSER_PI:
3209 case XML_PARSER_COMMENT:
3210 case XML_PARSER_SYSTEM_LITERAL:
3211 case XML_PARSER_CONTENT:
3212 return;
3213 case XML_PARSER_START_TAG:
3214 return;
3215 case XML_PARSER_END_TAG:
3216 return;
3217 case XML_PARSER_EOF:
3218 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3220 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3221 ctxt->wellFormed = 0;
3222 ctxt->disableSAX = 1;
3223 return;
3224 case XML_PARSER_PROLOG:
3225 case XML_PARSER_START:
3226 case XML_PARSER_MISC:
3227 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3230 ctxt->wellFormed = 0;
3231 ctxt->disableSAX = 1;
3232 return;
3233 case XML_PARSER_EPILOG:
3234 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3237 ctxt->wellFormed = 0;
3238 ctxt->disableSAX = 1;
3239 return;
3240 case XML_PARSER_ENTITY_VALUE:
3241 /*
3242 * NOTE: in the case of entity values, we don't do the
3243 * substitution here since we need the literal
3244 * entity value to be able to save the internal
3245 * subset of the document.
3246 * This will be handled by xmlStringDecodeEntities
3247 */
3248 return;
3249 case XML_PARSER_ATTRIBUTE_VALUE:
3250 /*
3251 * NOTE: in the case of attributes values, we don't do the
3252 * substitution here unless we are in a mode where
3253 * the parser is explicitely asked to substitute
3254 * entities. The SAX callback is called with values
3255 * without entity substitution.
3256 * This will then be handled by xmlStringDecodeEntities
3257 */
3258 return;
3259 case XML_PARSER_ENTITY_DECL:
3260 /*
3261 * we just ignore it there
3262 * the substitution will be done once the entity is referenced
3263 */
3264 return;
3265 case XML_PARSER_DTD:
3266 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3268 ctxt->sax->error(ctxt->userData,
3269 "Entity references are forbiden in DTDs!\n");
3270 ctxt->wellFormed = 0;
3271 ctxt->disableSAX = 1;
3272 return;
Daniel Veillard41e06512000-11-13 11:47:47 +00003273 case XML_PARSER_IGNORE:
3274 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00003275 }
3276
3277/* TODO: this seems not reached anymore .... Verify ... */
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003278xmlGenericError(xmlGenericErrorContext,
3279 "Reached deprecated section in xmlParserHandleReference()\n");
3280xmlGenericError(xmlGenericErrorContext,
3281 "Please forward the document to Daniel.Veillard@w3.org\n");
3282xmlGenericError(xmlGenericErrorContext,
3283 "indicating the version: %s, thanks !\n", xmlParserVersion);
Daniel Veillardb1059e22000-09-16 14:02:43 +00003284 NEXT;
3285 name = xmlScanName(ctxt);
3286 if (name == NULL) {
3287 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3289 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3290 ctxt->wellFormed = 0;
3291 ctxt->disableSAX = 1;
3292 ctxt->token = '&';
3293 return;
3294 }
3295 if (NXT(xmlStrlen(name)) != ';') {
3296 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3298 ctxt->sax->error(ctxt->userData,
3299 "Entity reference: ';' expected\n");
3300 ctxt->wellFormed = 0;
3301 ctxt->disableSAX = 1;
3302 ctxt->token = '&';
3303 xmlFree(name);
3304 return;
3305 }
3306 SKIP(xmlStrlen(name) + 1);
3307 if (ctxt->sax != NULL) {
3308 if (ctxt->sax->getEntity != NULL)
3309 ent = ctxt->sax->getEntity(ctxt->userData, name);
3310 }
3311
3312 /*
3313 * [ WFC: Entity Declared ]
3314 * the Name given in the entity reference must match that in an entity
3315 * declaration, except that well-formed documents need not declare any
3316 * of the following entities: amp, lt, gt, apos, quot.
3317 */
3318 if (ent == NULL)
3319 ent = xmlGetPredefinedEntity(name);
3320 if (ent == NULL) {
3321 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3323 ctxt->sax->error(ctxt->userData,
3324 "Entity reference: entity %s not declared\n",
3325 name);
3326 ctxt->wellFormed = 0;
3327 ctxt->disableSAX = 1;
3328 xmlFree(name);
3329 return;
3330 }
3331
3332 /*
3333 * [ WFC: Parsed Entity ]
3334 * An entity reference must not contain the name of an unparsed entity
3335 */
3336 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3337 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3339 ctxt->sax->error(ctxt->userData,
3340 "Entity reference to unparsed entity %s\n", name);
3341 ctxt->wellFormed = 0;
3342 ctxt->disableSAX = 1;
3343 }
3344
3345 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3346 ctxt->token = ent->content[0];
3347 xmlFree(name);
3348 return;
3349 }
3350 input = xmlNewEntityInputStream(ctxt, ent);
3351 xmlPushInput(ctxt, input);
3352 xmlFree(name);
3353#endif
3354 return;
3355}
3356
3357/**
3358 * xmlHandleEntity:
3359 * @ctxt: an XML parser context
3360 * @entity: an XML entity pointer.
3361 *
3362 * Default handling of defined entities, when should we define a new input
3363 * stream ? When do we just handle that as a set of chars ?
3364 *
3365 * OBSOLETE: to be removed at some point.
3366 */
3367
3368void
3369xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3370 static int deprecated = 0;
3371 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003372 xmlGenericError(xmlGenericErrorContext,
3373 "xmlHandleEntity() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003374 deprecated = 1;
3375 }
3376
3377#if 0
3378 int len;
3379 xmlParserInputPtr input;
3380
3381 if (entity->content == NULL) {
3382 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3385 entity->name);
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 return;
3389 }
3390 len = xmlStrlen(entity->content);
3391 if (len <= 2) goto handle_as_char;
3392
3393 /*
3394 * Redefine its content as an input stream.
3395 */
3396 input = xmlNewEntityInputStream(ctxt, entity);
3397 xmlPushInput(ctxt, input);
3398 return;
3399
3400handle_as_char:
3401 /*
3402 * Just handle the content as a set of chars.
3403 */
3404 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3405 (ctxt->sax->characters != NULL))
3406 ctxt->sax->characters(ctxt->userData, entity->content, len);
3407#endif
3408}
3409
Daniel Veillarda4964b72000-10-31 18:23:44 +00003410/**
3411 * xmlNewGlobalNs:
3412 * @doc: the document carrying the namespace
3413 * @href: the URI associated
3414 * @prefix: the prefix for the namespace
3415 *
3416 * Creation of a Namespace, the old way using PI and without scoping
3417 * DEPRECATED !!!
3418 * It now create a namespace on the root element of the document if found.
3419 * Returns NULL this functionnality had been removed
3420 */
3421xmlNsPtr
3422xmlNewGlobalNs(xmlDocPtr doc, const xmlChar *href, const xmlChar *prefix) {
3423 static int deprecated = 0;
3424 if (!deprecated) {
3425 xmlGenericError(xmlGenericErrorContext,
3426 "xmlNewGlobalNs() deprecated function reached\n");
3427 deprecated = 1;
3428 }
3429 return(NULL);
3430#if 0
3431 xmlNodePtr root;
3432
3433 xmlNsPtr cur;
3434
3435 root = xmlDocGetRootElement(doc);
3436 if (root != NULL)
3437 return(xmlNewNs(root, href, prefix));
3438
3439 /*
3440 * if there is no root element yet, create an old Namespace type
3441 * and it will be moved to the root at save time.
3442 */
3443 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3444 if (cur == NULL) {
3445 xmlGenericError(xmlGenericErrorContext,
3446 "xmlNewGlobalNs : malloc failed\n");
3447 return(NULL);
3448 }
3449 memset(cur, 0, sizeof(xmlNs));
3450 cur->type = XML_GLOBAL_NAMESPACE;
3451
3452 if (href != NULL)
3453 cur->href = xmlStrdup(href);
3454 if (prefix != NULL)
3455 cur->prefix = xmlStrdup(prefix);
3456
3457 /*
3458 * Add it at the end to preserve parsing order ...
3459 */
3460 if (doc != NULL) {
3461 if (doc->oldNs == NULL) {
3462 doc->oldNs = cur;
3463 } else {
3464 xmlNsPtr prev = doc->oldNs;
3465
3466 while (prev->next != NULL) prev = prev->next;
3467 prev->next = cur;
3468 }
3469 }
3470
3471 return(NULL);
3472#endif
3473}
3474
3475/**
3476 * xmlUpgradeOldNs:
3477 * @doc: a document pointer
3478 *
3479 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3480 * DEPRECATED
3481 */
3482void
3483xmlUpgradeOldNs(xmlDocPtr doc) {
3484 static int deprecated = 0;
3485 if (!deprecated) {
3486 xmlGenericError(xmlGenericErrorContext,
3487 "xmlNewGlobalNs() deprecated function reached\n");
3488 deprecated = 1;
3489 }
3490#if 0
3491 xmlNsPtr cur;
3492
3493 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3494 if (doc->children == NULL) {
3495#ifdef DEBUG_TREE
3496 xmlGenericError(xmlGenericErrorContext,
3497 "xmlUpgradeOldNs: failed no root !\n");
3498#endif
3499 return;
3500 }
3501
3502 cur = doc->oldNs;
3503 while (cur->next != NULL) {
3504 cur->type = XML_LOCAL_NAMESPACE;
3505 cur = cur->next;
3506 }
3507 cur->type = XML_LOCAL_NAMESPACE;
3508 cur->next = doc->children->nsDef;
3509 doc->children->nsDef = doc->oldNs;
3510 doc->oldNs = NULL;
3511#endif
3512}
3513