blob: ec5c7eaec5b8922eaba15cf7654ee6fdf95ecc9c [file] [log] [blame]
Daniel Veillardb1059e22000-09-16 14:02:43 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/entities.h>
43#include <libxml/encoding.h>
44#include <libxml/valid.h>
45#include <libxml/parserInternals.h>
46#include <libxml/xmlIO.h>
47#include <libxml/uri.h>
48#include "xml-error.h"
49
50
51/************************************************************************
52 * *
53 * Version and Features handling *
54 * *
55 ************************************************************************/
56const char *xmlParserVersion = LIBXML_VERSION_STRING;
57
58/*
59 * xmlCheckVersion:
60 * @version: the include version number
61 *
62 * check the compiled lib version against the include one.
63 * This can warn or immediately kill the application
64 */
65void
66xmlCheckVersion(int version) {
67 int myversion = (int) LIBXML_VERSION;
68
69 if ((myversion / 10000) != (version / 10000)) {
70 fprintf(stderr,
71 "Fatal: program compiled against libxml %d using libxml %d\n",
72 (version / 10000), (myversion / 10000));
73 exit(1);
74 }
75 if ((myversion / 100) < (version / 100)) {
76 fprintf(stderr,
77 "Warning: program compiled against libxml %d using older %d\n",
78 (version / 100), (myversion / 100));
79 }
80}
81
82
83const char *xmlFeaturesList[] = {
84 "validate",
85 "keep blanks",
86 "disable SAX",
87 "fetch external entities",
88 "substitute entities",
89 "gather line info",
90 "user data",
91 "is html",
92 "is standalone",
93 "stop parser",
94 "document",
95 "is well formed",
96 "is valid",
97 "SAX block",
98 "SAX function internalSubset",
99 "SAX function isStandalone",
100 "SAX function hasInternalSubset",
101 "SAX function hasExternalSubset",
102 "SAX function resolveEntity",
103 "SAX function getEntity",
104 "SAX function entityDecl",
105 "SAX function notationDecl",
106 "SAX function attributeDecl",
107 "SAX function elementDecl",
108 "SAX function unparsedEntityDecl",
109 "SAX function setDocumentLocator",
110 "SAX function startDocument",
111 "SAX function endDocument",
112 "SAX function startElement",
113 "SAX function endElement",
114 "SAX function reference",
115 "SAX function characters",
116 "SAX function ignorableWhitespace",
117 "SAX function processingInstruction",
118 "SAX function comment",
119 "SAX function warning",
120 "SAX function error",
121 "SAX function fatalError",
122 "SAX function getParameterEntity",
123 "SAX function cdataBlock",
124 "SAX function externalSubset",
125};
126
127/*
128 * xmlGetFeaturesList:
129 * @len: the length of the features name array (input/output)
130 * @result: an array of string to be filled with the features name.
131 *
132 * Copy at most *@len feature names into the @result array
133 *
134 * Returns -1 in case or error, or the total number of features,
135 * len is updated with the number of strings copied,
136 * strings must not be deallocated
137 */
138int
139xmlGetFeaturesList(int *len, const char **result) {
140 int ret, i;
141
142 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
143 if ((len == NULL) || (result == NULL))
144 return(ret);
145 if ((*len < 0) || (*len >= 1000))
146 return(-1);
147 if (*len > ret)
148 *len = ret;
149 for (i = 0;i < *len;i++)
150 result[i] = xmlFeaturesList[i];
151 return(ret);
152}
153
154/*
155 * xmlGetFeature:
156 * @ctxt: an XML/HTML parser context
157 * @name: the feature name
158 * @result: location to store the result
159 *
160 * Read the current value of one feature of this parser instance
161 *
162 * Returns -1 in case or error, 0 otherwise
163 */
164int
165xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
166 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
167 return(-1);
168
169 if (!strcmp(name, "validate")) {
170 *((int *) result) = ctxt->validate;
171 } else if (!strcmp(name, "keep blanks")) {
172 *((int *) result) = ctxt->keepBlanks;
173 } else if (!strcmp(name, "disable SAX")) {
174 *((int *) result) = ctxt->disableSAX;
175 } else if (!strcmp(name, "fetch external entities")) {
176 *((int *) result) = ctxt->validate;
177 } else if (!strcmp(name, "substitute entities")) {
178 *((int *) result) = ctxt->replaceEntities;
179 } else if (!strcmp(name, "gather line info")) {
180 *((int *) result) = ctxt->record_info;
181 } else if (!strcmp(name, "user data")) {
182 *((void **)result) = ctxt->userData;
183 } else if (!strcmp(name, "is html")) {
184 *((int *) result) = ctxt->html;
185 } else if (!strcmp(name, "is standalone")) {
186 *((int *) result) = ctxt->standalone;
187 } else if (!strcmp(name, "document")) {
188 *((xmlDocPtr *) result) = ctxt->myDoc;
189 } else if (!strcmp(name, "is well formed")) {
190 *((int *) result) = ctxt->wellFormed;
191 } else if (!strcmp(name, "is valid")) {
192 *((int *) result) = ctxt->valid;
193 } else if (!strcmp(name, "SAX block")) {
194 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
195 } else if (!strcmp(name, "SAX function internalSubset")) {
196 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
197 } else if (!strcmp(name, "SAX function isStandalone")) {
198 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
199 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
200 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
201 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
202 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
203 } else if (!strcmp(name, "SAX function resolveEntity")) {
204 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
205 } else if (!strcmp(name, "SAX function getEntity")) {
206 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
207 } else if (!strcmp(name, "SAX function entityDecl")) {
208 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
209 } else if (!strcmp(name, "SAX function notationDecl")) {
210 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
211 } else if (!strcmp(name, "SAX function attributeDecl")) {
212 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
213 } else if (!strcmp(name, "SAX function elementDecl")) {
214 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
215 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
216 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
217 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
218 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
219 } else if (!strcmp(name, "SAX function startDocument")) {
220 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
221 } else if (!strcmp(name, "SAX function endDocument")) {
222 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
223 } else if (!strcmp(name, "SAX function startElement")) {
224 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
225 } else if (!strcmp(name, "SAX function endElement")) {
226 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
227 } else if (!strcmp(name, "SAX function reference")) {
228 *((referenceSAXFunc *) result) = ctxt->sax->reference;
229 } else if (!strcmp(name, "SAX function characters")) {
230 *((charactersSAXFunc *) result) = ctxt->sax->characters;
231 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
232 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
233 } else if (!strcmp(name, "SAX function processingInstruction")) {
234 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
235 } else if (!strcmp(name, "SAX function comment")) {
236 *((commentSAXFunc *) result) = ctxt->sax->comment;
237 } else if (!strcmp(name, "SAX function warning")) {
238 *((warningSAXFunc *) result) = ctxt->sax->warning;
239 } else if (!strcmp(name, "SAX function error")) {
240 *((errorSAXFunc *) result) = ctxt->sax->error;
241 } else if (!strcmp(name, "SAX function fatalError")) {
242 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
243 } else if (!strcmp(name, "SAX function getParameterEntity")) {
244 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
245 } else if (!strcmp(name, "SAX function cdataBlock")) {
246 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
247 } else if (!strcmp(name, "SAX function externalSubset")) {
248 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
249 } else {
250 return(-1);
251 }
252 return(0);
253}
254
255/*
256 * xmlSetFeature:
257 * @ctxt: an XML/HTML parser context
258 * @name: the feature name
259 * @value: pointer to the location of the new value
260 *
261 * Change the current value of one feature of this parser instance
262 *
263 * Returns -1 in case or error, 0 otherwise
264 */
265int
266xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
267 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
268 return(-1);
269
270 if (!strcmp(name, "validate")) {
271 ctxt->validate = *((int *) value);
272 } else if (!strcmp(name, "keep blanks")) {
273 ctxt->keepBlanks = *((int *) value);
274 } else if (!strcmp(name, "disable SAX")) {
275 ctxt->disableSAX = *((int *) value);
276 } else if (!strcmp(name, "fetch external entities")) {
277 int newvalid = *((int *) value);
278 if ((!ctxt->validate) && (newvalid != 0)) {
279 if (ctxt->vctxt.warning == NULL)
280 ctxt->vctxt.warning = xmlParserValidityWarning;
281 if (ctxt->vctxt.error == NULL)
282 ctxt->vctxt.error = xmlParserValidityError;
283 /* Allocate the Node stack */
284 ctxt->vctxt.nodeTab = (xmlNodePtr *)
285 xmlMalloc(4 * sizeof(xmlNodePtr));
286 if (ctxt->vctxt.nodeTab == NULL) {
287 ctxt->vctxt.nodeMax = 0;
288 ctxt->validate = 0;
289 return(-1);
290 }
291 ctxt->vctxt.nodeNr = 0;
292 ctxt->vctxt.nodeMax = 4;
293 ctxt->vctxt.node = NULL;
294 }
295 ctxt->validate = newvalid;
296 } else if (!strcmp(name, "substitute entities")) {
297 ctxt->replaceEntities = *((int *) value);
298 } else if (!strcmp(name, "gather line info")) {
299 ctxt->record_info = *((int *) value);
300 } else if (!strcmp(name, "user data")) {
301 ctxt->userData = *((void **)value);
302 } else if (!strcmp(name, "is html")) {
303 ctxt->html = *((int *) value);
304 } else if (!strcmp(name, "is standalone")) {
305 ctxt->standalone = *((int *) value);
306 } else if (!strcmp(name, "document")) {
307 ctxt->myDoc = *((xmlDocPtr *) value);
308 } else if (!strcmp(name, "is well formed")) {
309 ctxt->wellFormed = *((int *) value);
310 } else if (!strcmp(name, "is valid")) {
311 ctxt->valid = *((int *) value);
312 } else if (!strcmp(name, "SAX block")) {
313 ctxt->sax = *((xmlSAXHandlerPtr *) value);
314 } else if (!strcmp(name, "SAX function internalSubset")) {
315 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function isStandalone")) {
317 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
319 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
321 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function resolveEntity")) {
323 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
324 } else if (!strcmp(name, "SAX function getEntity")) {
325 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function entityDecl")) {
327 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function notationDecl")) {
329 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function attributeDecl")) {
331 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function elementDecl")) {
333 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
335 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
337 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function startDocument")) {
339 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function endDocument")) {
341 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startElement")) {
343 ctxt->sax->startElement = *((startElementSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endElement")) {
345 ctxt->sax->endElement = *((endElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function reference")) {
347 ctxt->sax->reference = *((referenceSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function characters")) {
349 ctxt->sax->characters = *((charactersSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
351 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function processingInstruction")) {
353 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function comment")) {
355 ctxt->sax->comment = *((commentSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function warning")) {
357 ctxt->sax->warning = *((warningSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function error")) {
359 ctxt->sax->error = *((errorSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function fatalError")) {
361 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function getParameterEntity")) {
363 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
364 } else if (!strcmp(name, "SAX function cdataBlock")) {
365 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function externalSubset")) {
367 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
368 } else {
369 return(-1);
370 }
371 return(0);
372}
373
374/************************************************************************
375 * *
376 * Some functions to avoid too large macros *
377 * *
378 ************************************************************************/
379
380/**
381 * xmlIsChar:
382 * @c: an unicode character (int)
383 *
384 * Check whether the character is allowed by the production
385 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
386 * | [#x10000-#x10FFFF]
387 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
388 * Also available as a macro IS_CHAR()
389 *
390 * Returns 0 if not, non-zero otherwise
391 */
392int
393xmlIsChar(int c) {
394 return(
395 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
396 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
397 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
398 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
399}
400
401/**
402 * xmlIsBlank:
403 * @c: an unicode character (int)
404 *
405 * Check whether the character is allowed by the production
406 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
407 * Also available as a macro IS_BLANK()
408 *
409 * Returns 0 if not, non-zero otherwise
410 */
411int
412xmlIsBlank(int c) {
413 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
414}
415
416/**
417 * xmlIsBaseChar:
418 * @c: an unicode character (int)
419 *
420 * Check whether the character is allowed by the production
421 * [85] BaseChar ::= ... long list see REC ...
422 *
423 * VI is your friend !
424 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
425 * and
426 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
427 *
428 * Returns 0 if not, non-zero otherwise
429 */
430int
431xmlIsBaseChar(int c) {
432 return(
433 (((c) >= 0x0041) && ((c) <= 0x005A)) ||
434 (((c) >= 0x0061) && ((c) <= 0x007A)) ||
435 (((c) >= 0x00C0) && ((c) <= 0x00D6)) ||
436 (((c) >= 0x00D8) && ((c) <= 0x00F6)) ||
437 (((c) >= 0x00F8) && ((c) <= 0x00FF)) ||
438 (((c) >= 0x100) && ( /* accelerator */
439 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
440 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
441 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
442 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
443 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
444 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
445 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
446 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
447 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
448 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
449 ((c) == 0x0386) ||
450 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
451 ((c) == 0x038C) ||
452 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
453 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
454 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
455 ((c) == 0x03DA) ||
456 ((c) == 0x03DC) ||
457 ((c) == 0x03DE) ||
458 ((c) == 0x03E0) ||
459 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
460 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
461 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
462 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
463 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
464 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
465 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
466 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
467 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
468 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
469 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
470 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
471 ((c) == 0x0559) ||
472 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
473 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
474 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
475 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
476 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
477 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
478 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
479 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
480 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
481 ((c) == 0x06D5) ||
482 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
483 (((c) > 0x905) && ( /* accelerator */
484 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
485 ((c) == 0x093D) ||
486 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
487 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
488 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
489 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
490 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
491 ((c) == 0x09B2) ||
492 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
493 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
494 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
495 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
496 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
497 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
498 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
499 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
500 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
501 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
502 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
503 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
504 ((c) == 0x0A5E) ||
505 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
506 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
507 ((c) == 0x0A8D) ||
508 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
509 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
510 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
511 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
512 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
513 ((c) == 0x0ABD) ||
514 ((c) == 0x0AE0) ||
515 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
516 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
517 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
518 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
519 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
520 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
521 ((c) == 0x0B3D) ||
522 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
523 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
524 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
525 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
526 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
527 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
528 ((c) == 0x0B9C) ||
529 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
530 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
531 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
532 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
533 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
534 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
535 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
536 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
537 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
538 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
539 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
540 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
541 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
542 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
543 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
544 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
545 ((c) == 0x0CDE) ||
546 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
547 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
548 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
549 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
550 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
551 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
552 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
553 ((c) == 0x0E30) ||
554 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
555 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
556 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
557 ((c) == 0x0E84) ||
558 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
559 ((c) == 0x0E8A) ||
560 ((c) == 0x0E8D) ||
561 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
562 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
563 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
564 ((c) == 0x0EA5) ||
565 ((c) == 0x0EA7) ||
566 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
567 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
568 ((c) == 0x0EB0) ||
569 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
570 ((c) == 0x0EBD) ||
571 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
572 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
573 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
574 (((c) > 0x10A0) && ( /* accelerator */
575 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
576 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
577 ((c) == 0x1100) ||
578 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
579 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
580 ((c) == 0x1109) ||
581 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
582 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
583 ((c) == 0x113C) ||
584 ((c) == 0x113E) ||
585 ((c) == 0x1140) ||
586 ((c) == 0x114C) ||
587 ((c) == 0x114E) ||
588 ((c) == 0x1150) ||
589 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
590 ((c) == 0x1159) ||
591 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
592 ((c) == 0x1163) ||
593 ((c) == 0x1165) ||
594 ((c) == 0x1167) ||
595 ((c) == 0x1169) ||
596 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
597 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
598 ((c) == 0x1175) ||
599 ((c) == 0x119E) ||
600 ((c) == 0x11A8) ||
601 ((c) == 0x11AB) ||
602 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
603 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
604 ((c) == 0x11BA) ||
605 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
606 ((c) == 0x11EB) ||
607 ((c) == 0x11F0) ||
608 ((c) == 0x11F9) ||
609 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
610 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
611 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
612 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
613 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
614 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
615 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
616 ((c) == 0x1F59) ||
617 ((c) == 0x1F5B) ||
618 ((c) == 0x1F5D) ||
619 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
620 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
621 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
622 ((c) == 0x1FBE) ||
623 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
624 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
625 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
626 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
627 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
628 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
629 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
630 ((c) == 0x2126) ||
631 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
632 ((c) == 0x212E) ||
633 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
634 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
635 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
636 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
637 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
638}
639
640/**
641 * xmlIsDigit:
642 * @c: an unicode character (int)
643 *
644 * Check whether the character is allowed by the production
645 * [88] Digit ::= ... long list see REC ...
646 *
647 * Returns 0 if not, non-zero otherwise
648 */
649int
650xmlIsDigit(int c) {
651 return(
652 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
653 (((c) >= 0x660) && ( /* accelerator */
654 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
655 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
656 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
657 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
658 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
659 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
660 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
661 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
662 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
663 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
664 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
665 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
666 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
667 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
668}
669
670/**
671 * xmlIsCombining:
672 * @c: an unicode character (int)
673 *
674 * Check whether the character is allowed by the production
675 * [87] CombiningChar ::= ... long list see REC ...
676 *
677 * Returns 0 if not, non-zero otherwise
678 */
679int
680xmlIsCombining(int c) {
681 return(
682 (((c) >= 0x300) && ( /* accelerator */
683 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
684 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
685 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
686 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
687 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
688 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
689 ((c) == 0x05BF) ||
690 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
691 ((c) == 0x05C4) ||
692 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
693 ((c) == 0x0670) ||
694 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
695 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
696 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
697 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
698 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
699 (((c) > 0x0901) && ( /* accelerator */
700 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
701 ((c) == 0x093C) ||
702 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
703 ((c) == 0x094D) ||
704 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
705 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
706 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
707 ((c) == 0x09BC) ||
708 ((c) == 0x09BE) ||
709 ((c) == 0x09BF) ||
710 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
711 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
712 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
713 ((c) == 0x09D7) ||
714 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
715 (((c) > 0x0A02) && ( /* accelerator */
716 ((c) == 0x0A02) ||
717 ((c) == 0x0A3C) ||
718 ((c) == 0x0A3E) ||
719 ((c) == 0x0A3F) ||
720 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
721 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
722 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
723 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
724 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
725 ((c) == 0x0ABC) ||
726 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
727 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
728 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
729 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
730 ((c) == 0x0B3C) ||
731 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
732 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
733 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
734 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
735 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
736 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
737 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
738 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
739 ((c) == 0x0BD7) ||
740 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
741 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
742 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
743 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
744 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
745 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
746 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
747 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
748 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
749 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
750 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
751 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
752 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
753 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
754 ((c) == 0x0D57) ||
755 (((c) > 0x0E31) && ( /* accelerator */
756 ((c) == 0x0E31) ||
757 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
758 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
759 ((c) == 0x0EB1) ||
760 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
761 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
762 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
763 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
764 ((c) == 0x0F35) ||
765 ((c) == 0x0F37) ||
766 ((c) == 0x0F39) ||
767 ((c) == 0x0F3E) ||
768 ((c) == 0x0F3F) ||
769 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
770 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
771 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
772 ((c) == 0x0F97) ||
773 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
774 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
775 ((c) == 0x0FB9) ||
776 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
777 ((c) == 0x20E1) ||
778 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
779 ((c) == 0x3099) ||
780 ((c) == 0x309A))))))))));
781}
782
783/**
784 * xmlIsExtender:
785 * @c: an unicode character (int)
786 *
787 * Check whether the character is allowed by the production
788 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
789 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
790 * [#x309D-#x309E] | [#x30FC-#x30FE]
791 *
792 * Returns 0 if not, non-zero otherwise
793 */
794int
795xmlIsExtender(int c) {
796 return(
797 ((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) ||
798 ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) ||
799 ((c) == 0xec6) || ((c) == 0x3005) ||
800 (((c) >= 0x3031) && ((c) <= 0x3035)) ||
801 (((c) >= 0x309b) && ((c) <= 0x309e)) ||
802 (((c) >= 0x30fc) && ((c) <= 0x30fe)));
803}
804
805/**
806 * xmlIsIdeographic:
807 * @c: an unicode character (int)
808 *
809 * Check whether the character is allowed by the production
810 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
811 *
812 * Returns 0 if not, non-zero otherwise
813 */
814int
815xmlIsIdeographic(int c) {
816 return(
817 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
818 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
819 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
820 ((c) == 0x3007));
821}
822
823/**
824 * xmlIsLetter:
825 * @c: an unicode character (int)
826 *
827 * Check whether the character is allowed by the production
828 * [84] Letter ::= BaseChar | Ideographic
829 *
830 * Returns 0 if not, non-zero otherwise
831 */
832int
833xmlIsLetter(int c) {
834 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
835}
836
837/**
838 * xmlIsPubidChar:
839 * @c: an unicode character (int)
840 *
841 * Check whether the character is allowed by the production
842 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
843 *
844 * Returns 0 if not, non-zero otherwise
845 */
846int
847xmlIsPubidChar(int c) {
848 return(
849 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
850 (((c) >= 'a') && ((c) <= 'z')) ||
851 (((c) >= 'A') && ((c) <= 'Z')) ||
852 (((c) >= '0') && ((c) <= '9')) ||
853 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
854 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
855 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
856 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
857 ((c) == '$') || ((c) == '_') || ((c) == '%'));
858}
859
860/************************************************************************
861 * *
862 * Input handling functions for progressive parsing *
863 * *
864 ************************************************************************/
865
866/* #define DEBUG_INPUT */
867/* #define DEBUG_STACK */
868/* #define DEBUG_PUSH */
869
870
871/* we need to keep enough input to show errors in context */
872#define LINE_LEN 80
873
874#ifdef DEBUG_INPUT
875#define CHECK_BUFFER(in) check_buffer(in)
876
877void check_buffer(xmlParserInputPtr in) {
878 if (in->base != in->buf->buffer->content) {
879 fprintf(stderr, "xmlParserInput: base mismatch problem\n");
880 }
881 if (in->cur < in->base) {
882 fprintf(stderr, "xmlParserInput: cur < base problem\n");
883 }
884 if (in->cur > in->base + in->buf->buffer->use) {
885 fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
886 }
887 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
888 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
889 in->buf->buffer->use, in->buf->buffer->size);
890}
891
892#else
893#define CHECK_BUFFER(in)
894#endif
895
896
897/**
898 * xmlParserInputRead:
899 * @in: an XML parser input
900 * @len: an indicative size for the lookahead
901 *
902 * This function refresh the input for the parser. It doesn't try to
903 * preserve pointers to the input buffer, and discard already read data
904 *
905 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
906 * end of this entity
907 */
908int
909xmlParserInputRead(xmlParserInputPtr in, int len) {
910 int ret;
911 int used;
912 int index;
913
914#ifdef DEBUG_INPUT
915 fprintf(stderr, "Read\n");
916#endif
917 if (in->buf == NULL) return(-1);
918 if (in->base == NULL) return(-1);
919 if (in->cur == NULL) return(-1);
920 if (in->buf->buffer == NULL) return(-1);
921 if (in->buf->readcallback == NULL) return(-1);
922
923 CHECK_BUFFER(in);
924
925 used = in->cur - in->buf->buffer->content;
926 ret = xmlBufferShrink(in->buf->buffer, used);
927 if (ret > 0) {
928 in->cur -= ret;
929 in->consumed += ret;
930 }
931 ret = xmlParserInputBufferRead(in->buf, len);
932 if (in->base != in->buf->buffer->content) {
933 /*
934 * the buffer has been realloced
935 */
936 index = in->cur - in->base;
937 in->base = in->buf->buffer->content;
938 in->cur = &in->buf->buffer->content[index];
939 }
940
941 CHECK_BUFFER(in);
942
943 return(ret);
944}
945
946/**
947 * xmlParserInputGrow:
948 * @in: an XML parser input
949 * @len: an indicative size for the lookahead
950 *
951 * This function increase the input for the parser. It tries to
952 * preserve pointers to the input buffer, and keep already read data
953 *
954 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
955 * end of this entity
956 */
957int
958xmlParserInputGrow(xmlParserInputPtr in, int len) {
959 int ret;
960 int index;
961
962#ifdef DEBUG_INPUT
963 fprintf(stderr, "Grow\n");
964#endif
965 if (in->buf == NULL) return(-1);
966 if (in->base == NULL) return(-1);
967 if (in->cur == NULL) return(-1);
968 if (in->buf->buffer == NULL) return(-1);
969
970 CHECK_BUFFER(in);
971
972 index = in->cur - in->base;
973 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
974
975 CHECK_BUFFER(in);
976
977 return(0);
978 }
979 if (in->buf->readcallback != NULL)
980 ret = xmlParserInputBufferGrow(in->buf, len);
981 else
982 return(0);
983
984 /*
985 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
986 * block, but we use it really as an integer to do some
987 * pointer arithmetic. Insure will raise it as a bug but in
988 * that specific case, that's not !
989 */
990 if (in->base != in->buf->buffer->content) {
991 /*
992 * the buffer has been realloced
993 */
994 index = in->cur - in->base;
995 in->base = in->buf->buffer->content;
996 in->cur = &in->buf->buffer->content[index];
997 }
998
999 CHECK_BUFFER(in);
1000
1001 return(ret);
1002}
1003
1004/**
1005 * xmlParserInputShrink:
1006 * @in: an XML parser input
1007 *
1008 * This function removes used input for the parser.
1009 */
1010void
1011xmlParserInputShrink(xmlParserInputPtr in) {
1012 int used;
1013 int ret;
1014 int index;
1015
1016#ifdef DEBUG_INPUT
1017 fprintf(stderr, "Shrink\n");
1018#endif
1019 if (in->buf == NULL) return;
1020 if (in->base == NULL) return;
1021 if (in->cur == NULL) return;
1022 if (in->buf->buffer == NULL) return;
1023
1024 CHECK_BUFFER(in);
1025
1026 used = in->cur - in->buf->buffer->content;
1027 if (used > INPUT_CHUNK) {
1028 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1029 if (ret > 0) {
1030 in->cur -= ret;
1031 in->consumed += ret;
1032 }
1033 }
1034
1035 CHECK_BUFFER(in);
1036
1037 if (in->buf->buffer->use > INPUT_CHUNK) {
1038 return;
1039 }
1040 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1041 if (in->base != in->buf->buffer->content) {
1042 /*
1043 * the buffer has been realloced
1044 */
1045 index = in->cur - in->base;
1046 in->base = in->buf->buffer->content;
1047 in->cur = &in->buf->buffer->content[index];
1048 }
1049
1050 CHECK_BUFFER(in);
1051}
1052
1053/************************************************************************
1054 * *
1055 * UTF8 character input and related functions *
1056 * *
1057 ************************************************************************/
1058
1059/**
1060 * xmlNextChar:
1061 * @ctxt: the XML parser context
1062 *
1063 * Skip to the next char input char.
1064 */
1065
1066void
1067xmlNextChar(xmlParserCtxtPtr ctxt) {
1068 if (ctxt->instate == XML_PARSER_EOF)
1069 return;
1070
1071 /*
1072 * 2.11 End-of-Line Handling
1073 * the literal two-character sequence "#xD#xA" or a standalone
1074 * literal #xD, an XML processor must pass to the application
1075 * the single character #xA.
1076 */
1077 if (ctxt->token != 0) ctxt->token = 0;
1078 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1079 if ((*ctxt->input->cur == 0) &&
1080 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1081 (ctxt->instate != XML_PARSER_COMMENT)) {
1082 /*
1083 * If we are at the end of the current entity and
1084 * the context allows it, we pop consumed entities
1085 * automatically.
1086 * the auto closing should be blocked in other cases
1087 */
1088 xmlPopInput(ctxt);
1089 } else {
1090 if (*(ctxt->input->cur) == '\n') {
1091 ctxt->input->line++; ctxt->input->col = 1;
1092 } else ctxt->input->col++;
1093 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1094 /*
1095 * We are supposed to handle UTF8, check it's valid
1096 * From rfc2044: encoding of the Unicode values on UTF-8:
1097 *
1098 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1099 * 0000 0000-0000 007F 0xxxxxxx
1100 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1101 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1102 *
1103 * Check for the 0x110000 limit too
1104 */
1105 const unsigned char *cur = ctxt->input->cur;
1106 unsigned char c;
1107
1108 c = *cur;
1109 if (c & 0x80) {
1110 if (cur[1] == 0)
1111 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1112 if ((cur[1] & 0xc0) != 0x80)
1113 goto encoding_error;
1114 if ((c & 0xe0) == 0xe0) {
1115 unsigned int val;
1116
1117 if (cur[2] == 0)
1118 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1119 if ((cur[2] & 0xc0) != 0x80)
1120 goto encoding_error;
1121 if ((c & 0xf0) == 0xf0) {
1122 if (cur[3] == 0)
1123 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1124 if (((c & 0xf8) != 0xf0) ||
1125 ((cur[3] & 0xc0) != 0x80))
1126 goto encoding_error;
1127 /* 4-byte code */
1128 ctxt->input->cur += 4;
1129 val = (cur[0] & 0x7) << 18;
1130 val |= (cur[1] & 0x3f) << 12;
1131 val |= (cur[2] & 0x3f) << 6;
1132 val |= cur[3] & 0x3f;
1133 } else {
1134 /* 3-byte code */
1135 ctxt->input->cur += 3;
1136 val = (cur[0] & 0xf) << 12;
1137 val |= (cur[1] & 0x3f) << 6;
1138 val |= cur[2] & 0x3f;
1139 }
1140 if (((val > 0xd7ff) && (val < 0xe000)) ||
1141 ((val > 0xfffd) && (val < 0x10000)) ||
1142 (val >= 0x110000)) {
1143 if ((ctxt->sax != NULL) &&
1144 (ctxt->sax->error != NULL))
1145 ctxt->sax->error(ctxt->userData,
1146 "Char 0x%X out of allowed range\n", val);
1147 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1148 ctxt->wellFormed = 0;
1149 ctxt->disableSAX = 1;
1150 }
1151 } else
1152 /* 2-byte code */
1153 ctxt->input->cur += 2;
1154 } else
1155 /* 1-byte code */
1156 ctxt->input->cur++;
1157 } else {
1158 /*
1159 * Assume it's a fixed lenght encoding (1) with
1160 * a compatibke encoding for the ASCII set, since
1161 * XML constructs only use < 128 chars
1162 */
1163 ctxt->input->cur++;
1164 }
1165 ctxt->nbChars++;
1166 if (*ctxt->input->cur == 0)
1167 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1168 }
1169 } else {
1170 ctxt->input->cur++;
1171 ctxt->nbChars++;
1172 if (*ctxt->input->cur == 0)
1173 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1174 }
1175 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1176 xmlParserHandlePEReference(ctxt);
1177 if ((*ctxt->input->cur == 0) &&
1178 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1179 xmlPopInput(ctxt);
1180 return;
1181encoding_error:
1182 /*
1183 * If we detect an UTF8 error that probably mean that the
1184 * input encoding didn't get properly advertized in the
1185 * declaration header. Report the error and switch the encoding
1186 * to ISO-Latin-1 (if you don't like this policy, just declare the
1187 * encoding !)
1188 */
1189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1190 ctxt->sax->error(ctxt->userData,
1191 "Input is not proper UTF-8, indicate encoding !\n");
1192 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1193 ctxt->input->cur[0], ctxt->input->cur[1],
1194 ctxt->input->cur[2], ctxt->input->cur[3]);
1195 }
1196 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1197
1198 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1199 ctxt->input->cur++;
1200 return;
1201}
1202
1203/**
1204 * xmlCurrentChar:
1205 * @ctxt: the XML parser context
1206 * @len: pointer to the length of the char read
1207 *
1208 * The current char value, if using UTF-8 this may actaully span multiple
1209 * bytes in the input buffer. Implement the end of line normalization:
1210 * 2.11 End-of-Line Handling
1211 * Wherever an external parsed entity or the literal entity value
1212 * of an internal parsed entity contains either the literal two-character
1213 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1214 * must pass to the application the single character #xA.
1215 * This behavior can conveniently be produced by normalizing all
1216 * line breaks to #xA on input, before parsing.)
1217 *
1218 * Returns the current char value and its lenght
1219 */
1220
1221int
1222xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1223 if (ctxt->instate == XML_PARSER_EOF)
1224 return(0);
1225
1226 if (ctxt->token != 0) {
1227 *len = 0;
1228 return(ctxt->token);
1229 }
1230 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1231 /*
1232 * We are supposed to handle UTF8, check it's valid
1233 * From rfc2044: encoding of the Unicode values on UTF-8:
1234 *
1235 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1236 * 0000 0000-0000 007F 0xxxxxxx
1237 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1238 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1239 *
1240 * Check for the 0x110000 limit too
1241 */
1242 const unsigned char *cur = ctxt->input->cur;
1243 unsigned char c;
1244 unsigned int val;
1245
1246 c = *cur;
1247 if (c & 0x80) {
1248 if (cur[1] == 0)
1249 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1250 if ((cur[1] & 0xc0) != 0x80)
1251 goto encoding_error;
1252 if ((c & 0xe0) == 0xe0) {
1253
1254 if (cur[2] == 0)
1255 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1256 if ((cur[2] & 0xc0) != 0x80)
1257 goto encoding_error;
1258 if ((c & 0xf0) == 0xf0) {
1259 if (cur[3] == 0)
1260 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1261 if (((c & 0xf8) != 0xf0) ||
1262 ((cur[3] & 0xc0) != 0x80))
1263 goto encoding_error;
1264 /* 4-byte code */
1265 *len = 4;
1266 val = (cur[0] & 0x7) << 18;
1267 val |= (cur[1] & 0x3f) << 12;
1268 val |= (cur[2] & 0x3f) << 6;
1269 val |= cur[3] & 0x3f;
1270 } else {
1271 /* 3-byte code */
1272 *len = 3;
1273 val = (cur[0] & 0xf) << 12;
1274 val |= (cur[1] & 0x3f) << 6;
1275 val |= cur[2] & 0x3f;
1276 }
1277 } else {
1278 /* 2-byte code */
1279 *len = 2;
1280 val = (cur[0] & 0x1f) << 6;
1281 val |= cur[1] & 0x3f;
1282 }
1283 if (!IS_CHAR(val)) {
1284 if ((ctxt->sax != NULL) &&
1285 (ctxt->sax->error != NULL))
1286 ctxt->sax->error(ctxt->userData,
1287 "Char 0x%X out of allowed range\n", val);
1288 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1289 ctxt->wellFormed = 0;
1290 ctxt->disableSAX = 1;
1291 }
1292 return(val);
1293 } else {
1294 /* 1-byte code */
1295 *len = 1;
1296 if (*ctxt->input->cur == 0xD) {
1297 if (ctxt->input->cur[1] == 0xA) {
1298 ctxt->nbChars++;
1299 ctxt->input->cur++;
1300 }
1301 return(0xA);
1302 }
1303 return((int) *ctxt->input->cur);
1304 }
1305 }
1306 /*
1307 * Assume it's a fixed lenght encoding (1) with
1308 * a compatibke encoding for the ASCII set, since
1309 * XML constructs only use < 128 chars
1310 */
1311 *len = 1;
1312 if (*ctxt->input->cur == 0xD) {
1313 if (ctxt->input->cur[1] == 0xA) {
1314 ctxt->nbChars++;
1315 ctxt->input->cur++;
1316 }
1317 return(0xA);
1318 }
1319 return((int) *ctxt->input->cur);
1320encoding_error:
1321 /*
1322 * If we detect an UTF8 error that probably mean that the
1323 * input encoding didn't get properly advertized in the
1324 * declaration header. Report the error and switch the encoding
1325 * to ISO-Latin-1 (if you don't like this policy, just declare the
1326 * encoding !)
1327 */
1328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1329 ctxt->sax->error(ctxt->userData,
1330 "Input is not proper UTF-8, indicate encoding !\n");
1331 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1332 ctxt->input->cur[0], ctxt->input->cur[1],
1333 ctxt->input->cur[2], ctxt->input->cur[3]);
1334 }
1335 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1336
1337 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1338 *len = 1;
1339 return((int) *ctxt->input->cur);
1340}
1341
1342/**
1343 * xmlStringCurrentChar:
1344 * @ctxt: the XML parser context
1345 * @cur: pointer to the beginning of the char
1346 * @len: pointer to the length of the char read
1347 *
1348 * The current char value, if using UTF-8 this may actaully span multiple
1349 * bytes in the input buffer.
1350 *
1351 * Returns the current char value and its lenght
1352 */
1353
1354int
1355xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1356 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1357 /*
1358 * We are supposed to handle UTF8, check it's valid
1359 * From rfc2044: encoding of the Unicode values on UTF-8:
1360 *
1361 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1362 * 0000 0000-0000 007F 0xxxxxxx
1363 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1364 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1365 *
1366 * Check for the 0x110000 limit too
1367 */
1368 unsigned char c;
1369 unsigned int val;
1370
1371 c = *cur;
1372 if (c & 0x80) {
1373 if ((cur[1] & 0xc0) != 0x80)
1374 goto encoding_error;
1375 if ((c & 0xe0) == 0xe0) {
1376
1377 if ((cur[2] & 0xc0) != 0x80)
1378 goto encoding_error;
1379 if ((c & 0xf0) == 0xf0) {
1380 if (((c & 0xf8) != 0xf0) ||
1381 ((cur[3] & 0xc0) != 0x80))
1382 goto encoding_error;
1383 /* 4-byte code */
1384 *len = 4;
1385 val = (cur[0] & 0x7) << 18;
1386 val |= (cur[1] & 0x3f) << 12;
1387 val |= (cur[2] & 0x3f) << 6;
1388 val |= cur[3] & 0x3f;
1389 } else {
1390 /* 3-byte code */
1391 *len = 3;
1392 val = (cur[0] & 0xf) << 12;
1393 val |= (cur[1] & 0x3f) << 6;
1394 val |= cur[2] & 0x3f;
1395 }
1396 } else {
1397 /* 2-byte code */
1398 *len = 2;
1399 val = (cur[0] & 0x1f) << 6;
1400 val |= cur[2] & 0x3f;
1401 }
1402 if (!IS_CHAR(val)) {
1403 if ((ctxt->sax != NULL) &&
1404 (ctxt->sax->error != NULL))
1405 ctxt->sax->error(ctxt->userData,
1406 "Char 0x%X out of allowed range\n", val);
1407 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1408 ctxt->wellFormed = 0;
1409 ctxt->disableSAX = 1;
1410 }
1411 return(val);
1412 } else {
1413 /* 1-byte code */
1414 *len = 1;
1415 return((int) *cur);
1416 }
1417 }
1418 /*
1419 * Assume it's a fixed lenght encoding (1) with
1420 * a compatibke encoding for the ASCII set, since
1421 * XML constructs only use < 128 chars
1422 */
1423 *len = 1;
1424 return((int) *cur);
1425encoding_error:
1426 /*
1427 * If we detect an UTF8 error that probably mean that the
1428 * input encoding didn't get properly advertized in the
1429 * declaration header. Report the error and switch the encoding
1430 * to ISO-Latin-1 (if you don't like this policy, just declare the
1431 * encoding !)
1432 */
1433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1434 ctxt->sax->error(ctxt->userData,
1435 "Input is not proper UTF-8, indicate encoding !\n");
1436 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1437 ctxt->input->cur[0], ctxt->input->cur[1],
1438 ctxt->input->cur[2], ctxt->input->cur[3]);
1439 }
1440 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1441
1442 *len = 1;
1443 return((int) *cur);
1444}
1445
1446/**
1447 * xmlCopyChar:
1448 * @len: pointer to the length of the char read (or zero)
1449 * @array: pointer to an arry of xmlChar
1450 * @val: the char value
1451 *
1452 * append the char value in the array
1453 *
1454 * Returns the number of xmlChar written
1455 */
1456
1457int
1458xmlCopyChar(int len, xmlChar *out, int val) {
1459 /*
1460 * We are supposed to handle UTF8, check it's valid
1461 * From rfc2044: encoding of the Unicode values on UTF-8:
1462 *
1463 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1464 * 0000 0000-0000 007F 0xxxxxxx
1465 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1466 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1467 */
1468 if (len == 0) {
1469 if (val < 0) len = 0;
1470 else if (val < 0x80) len = 1;
1471 else if (val < 0x800) len = 2;
1472 else if (val < 0x10000) len = 3;
1473 else if (val < 0x110000) len = 4;
1474 if (len == 0) {
1475 fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
1476 val);
1477 return(0);
1478 }
1479 }
1480 if (len > 1) {
1481 int bits;
1482
1483 if (val < 0x80) { *out++= val; bits= -6; }
1484 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1485 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1486 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1487
1488 for ( ; bits >= 0; bits-= 6)
1489 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1490
1491 return(len);
1492 }
1493 *out = (xmlChar) val;
1494 return(1);
1495}
1496
1497/************************************************************************
1498 * *
Daniel Veillard04698d92000-09-17 16:00:22 +00001499 * Commodity functions to switch encodings *
1500 * *
1501 ************************************************************************/
1502
1503/**
1504 * xmlSwitchEncoding:
1505 * @ctxt: the parser context
1506 * @enc: the encoding value (number)
1507 *
1508 * change the input functions when discovering the character encoding
1509 * of a given entity.
1510 *
1511 * Returns 0 in case of success, -1 otherwise
1512 */
1513int
1514xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1515{
1516 xmlCharEncodingHandlerPtr handler;
1517
1518 switch (enc) {
1519 case XML_CHAR_ENCODING_ERROR:
1520 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1522 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1523 ctxt->wellFormed = 0;
1524 ctxt->disableSAX = 1;
1525 break;
1526 case XML_CHAR_ENCODING_NONE:
1527 /* let's assume it's UTF-8 without the XML decl */
1528 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1529 return(0);
1530 case XML_CHAR_ENCODING_UTF8:
1531 /* default encoding, no conversion should be needed */
1532 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1533 return(0);
1534 default:
1535 break;
1536 }
1537 handler = xmlGetCharEncodingHandler(enc);
1538 if (handler == NULL) {
1539 /*
1540 * Default handlers.
1541 */
1542 switch (enc) {
1543 case XML_CHAR_ENCODING_ERROR:
1544 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1546 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1547 ctxt->wellFormed = 0;
1548 ctxt->disableSAX = 1;
1549 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1550 break;
1551 case XML_CHAR_ENCODING_NONE:
1552 /* let's assume it's UTF-8 without the XML decl */
1553 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1554 return(0);
1555 case XML_CHAR_ENCODING_UTF8:
1556 case XML_CHAR_ENCODING_ASCII:
1557 /* default encoding, no conversion should be needed */
1558 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1559 return(0);
1560 case XML_CHAR_ENCODING_UTF16LE:
1561 break;
1562 case XML_CHAR_ENCODING_UTF16BE:
1563 break;
1564 case XML_CHAR_ENCODING_UCS4LE:
1565 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1566 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1567 ctxt->sax->error(ctxt->userData,
1568 "char encoding USC4 little endian not supported\n");
1569 break;
1570 case XML_CHAR_ENCODING_UCS4BE:
1571 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1573 ctxt->sax->error(ctxt->userData,
1574 "char encoding USC4 big endian not supported\n");
1575 break;
1576 case XML_CHAR_ENCODING_EBCDIC:
1577 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1579 ctxt->sax->error(ctxt->userData,
1580 "char encoding EBCDIC not supported\n");
1581 break;
1582 case XML_CHAR_ENCODING_UCS4_2143:
1583 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1585 ctxt->sax->error(ctxt->userData,
1586 "char encoding UCS4 2143 not supported\n");
1587 break;
1588 case XML_CHAR_ENCODING_UCS4_3412:
1589 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1591 ctxt->sax->error(ctxt->userData,
1592 "char encoding UCS4 3412 not supported\n");
1593 break;
1594 case XML_CHAR_ENCODING_UCS2:
1595 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1597 ctxt->sax->error(ctxt->userData,
1598 "char encoding UCS2 not supported\n");
1599 break;
1600 case XML_CHAR_ENCODING_8859_1:
1601 case XML_CHAR_ENCODING_8859_2:
1602 case XML_CHAR_ENCODING_8859_3:
1603 case XML_CHAR_ENCODING_8859_4:
1604 case XML_CHAR_ENCODING_8859_5:
1605 case XML_CHAR_ENCODING_8859_6:
1606 case XML_CHAR_ENCODING_8859_7:
1607 case XML_CHAR_ENCODING_8859_8:
1608 case XML_CHAR_ENCODING_8859_9:
1609 /*
1610 * We used to keep the internal content in the
1611 * document encoding however this turns being unmaintainable
1612 * So xmlGetCharEncodingHandler() will return non-null
1613 * values for this now.
1614 */
1615 if ((ctxt->inputNr == 1) &&
1616 (ctxt->encoding == NULL) &&
1617 (ctxt->input->encoding != NULL)) {
1618 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1619 }
1620 ctxt->charset = enc;
1621 return(0);
1622 case XML_CHAR_ENCODING_2022_JP:
1623 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1625 ctxt->sax->error(ctxt->userData,
1626 "char encoding ISO-2022-JPnot supported\n");
1627 break;
1628 case XML_CHAR_ENCODING_SHIFT_JIS:
1629 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1631 ctxt->sax->error(ctxt->userData,
1632 "char encoding Shift_JIS not supported\n");
1633 break;
1634 case XML_CHAR_ENCODING_EUC_JP:
1635 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1637 ctxt->sax->error(ctxt->userData,
1638 "char encoding EUC-JPnot supported\n");
1639 break;
1640 }
1641 }
1642 if (handler == NULL)
1643 return(-1);
1644 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1645 return(xmlSwitchToEncoding(ctxt, handler));
1646}
1647
1648/**
1649 * xmlSwitchToEncoding:
1650 * @ctxt: the parser context
1651 * @handler: the encoding handler
1652 *
1653 * change the input functions when discovering the character encoding
1654 * of a given entity.
1655 *
1656 * Returns 0 in case of success, -1 otherwise
1657 */
1658int
1659xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1660{
1661 int nbchars;
1662
1663 if (handler != NULL) {
1664 if (ctxt->input != NULL) {
1665 if (ctxt->input->buf != NULL) {
1666 if (ctxt->input->buf->encoder != NULL) {
1667 if (ctxt->input->buf->encoder == handler)
1668 return(0);
1669 /*
1670 * Note: this is a bit dangerous, but that's what it
1671 * takes to use nearly compatible signature for different
1672 * encodings.
1673 */
1674 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1675 ctxt->input->buf->encoder = handler;
1676 return(0);
1677 }
1678 ctxt->input->buf->encoder = handler;
1679
1680 /*
1681 * Is there already some content down the pipe to convert ?
1682 */
1683 if ((ctxt->input->buf->buffer != NULL) &&
1684 (ctxt->input->buf->buffer->use > 0)) {
1685 int processed;
1686
1687 /*
1688 * Specific handling of the Byte Order Mark for
1689 * UTF-16
1690 */
1691 if ((handler->name != NULL) &&
1692 (!strcmp(handler->name, "UTF-16LE")) &&
1693 (ctxt->input->cur[0] == 0xFF) &&
1694 (ctxt->input->cur[1] == 0xFE)) {
1695 ctxt->input->cur += 2;
1696 }
1697 if ((handler->name != NULL) &&
1698 (!strcmp(handler->name, "UTF-16BE")) &&
1699 (ctxt->input->cur[0] == 0xFE) &&
1700 (ctxt->input->cur[1] == 0xFF)) {
1701 ctxt->input->cur += 2;
1702 }
1703
1704 /*
1705 * Shring the current input buffer.
1706 * Move it as the raw buffer and create a new input buffer
1707 */
1708 processed = ctxt->input->cur - ctxt->input->base;
1709 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1710 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1711 ctxt->input->buf->buffer = xmlBufferCreate();
1712
1713 if (ctxt->html) {
1714 /*
1715 * converst as much as possbile of the buffer
1716 */
1717 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1718 ctxt->input->buf->buffer,
1719 ctxt->input->buf->raw);
1720 } else {
1721 /*
1722 * convert just enough to get
1723 * '<?xml version="1.0" encoding="xxx"?>'
1724 * parsed with the autodetected encoding
1725 * into the parser reading buffer.
1726 */
1727 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1728 ctxt->input->buf->buffer,
1729 ctxt->input->buf->raw);
1730 }
1731 if (nbchars < 0) {
1732 fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
1733 return(-1);
1734 }
1735 ctxt->input->base =
1736 ctxt->input->cur = ctxt->input->buf->buffer->content;
1737
1738 }
1739 return(0);
1740 } else {
1741 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1742 /*
1743 * When parsing a static memory array one must know the
1744 * size to be able to convert the buffer.
1745 */
1746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1747 ctxt->sax->error(ctxt->userData,
1748 "xmlSwitchEncoding : no input\n");
1749 return(-1);
1750 } else {
1751 int processed;
1752
1753 /*
1754 * Shring the current input buffer.
1755 * Move it as the raw buffer and create a new input buffer
1756 */
1757 processed = ctxt->input->cur - ctxt->input->base;
1758
1759 ctxt->input->buf->raw = xmlBufferCreate();
1760 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1761 ctxt->input->length - processed);
1762 ctxt->input->buf->buffer = xmlBufferCreate();
1763
1764 /*
1765 * convert as much as possible of the raw input
1766 * to the parser reading buffer.
1767 */
1768 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1769 ctxt->input->buf->buffer,
1770 ctxt->input->buf->raw);
1771 if (nbchars < 0) {
1772 fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
1773 return(-1);
1774 }
1775
1776 /*
1777 * Conversion succeeded, get rid of the old buffer
1778 */
1779 if ((ctxt->input->free != NULL) &&
1780 (ctxt->input->base != NULL))
1781 ctxt->input->free((xmlChar *) ctxt->input->base);
1782 ctxt->input->base =
1783 ctxt->input->cur = ctxt->input->buf->buffer->content;
1784 }
1785 }
1786 } else {
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
1789 "xmlSwitchEncoding : no input\n");
1790 return(-1);
1791 }
1792 /*
1793 * The parsing is now done in UTF8 natively
1794 */
1795 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1796 } else
1797 return(-1);
1798 return(0);
1799
1800}
1801
1802/************************************************************************
1803 * *
Daniel Veillardb1059e22000-09-16 14:02:43 +00001804 * Commodity functions to handle entities processing *
1805 * *
1806 ************************************************************************/
1807
1808/**
1809 * xmlFreeInputStream:
1810 * @input: an xmlParserInputPtr
1811 *
1812 * Free up an input stream.
1813 */
1814void
1815xmlFreeInputStream(xmlParserInputPtr input) {
1816 if (input == NULL) return;
1817
1818 if (input->filename != NULL) xmlFree((char *) input->filename);
1819 if (input->directory != NULL) xmlFree((char *) input->directory);
1820 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1821 if (input->version != NULL) xmlFree((char *) input->version);
1822 if ((input->free != NULL) && (input->base != NULL))
1823 input->free((xmlChar *) input->base);
1824 if (input->buf != NULL)
1825 xmlFreeParserInputBuffer(input->buf);
1826 memset(input, -1, sizeof(xmlParserInput));
1827 xmlFree(input);
1828}
1829
1830/**
1831 * xmlNewInputStream:
1832 * @ctxt: an XML parser context
1833 *
1834 * Create a new input stream structure
1835 * Returns the new input stream or NULL
1836 */
1837xmlParserInputPtr
1838xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1839 xmlParserInputPtr input;
1840
1841 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1842 if (input == NULL) {
1843 if (ctxt != NULL) {
1844 ctxt->errNo = XML_ERR_NO_MEMORY;
1845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1846 ctxt->sax->error(ctxt->userData,
1847 "malloc: couldn't allocate a new input stream\n");
1848 ctxt->errNo = XML_ERR_NO_MEMORY;
1849 }
1850 return(NULL);
1851 }
1852 memset(input, 0, sizeof(xmlParserInput));
1853 input->line = 1;
1854 input->col = 1;
1855 input->standalone = -1;
1856 return(input);
1857}
1858
1859/**
1860 * xmlNewIOInputStream:
1861 * @ctxt: an XML parser context
1862 * @input: an I/O Input
1863 * @enc: the charset encoding if known
1864 *
1865 * Create a new input stream structure encapsulating the @input into
1866 * a stream suitable for the parser.
1867 *
1868 * Returns the new input stream or NULL
1869 */
1870xmlParserInputPtr
1871xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1872 xmlCharEncoding enc) {
1873 xmlParserInputPtr inputStream;
1874
1875 if (xmlParserDebugEntities)
1876 fprintf(stderr, "new input from I/O\n");
1877 inputStream = xmlNewInputStream(ctxt);
1878 if (inputStream == NULL) {
1879 return(NULL);
1880 }
1881 inputStream->filename = NULL;
1882 inputStream->buf = input;
1883 inputStream->base = inputStream->buf->buffer->content;
1884 inputStream->cur = inputStream->buf->buffer->content;
1885 if (enc != XML_CHAR_ENCODING_NONE) {
1886 xmlSwitchEncoding(ctxt, enc);
1887 }
1888
1889 return(inputStream);
1890}
1891
1892/**
1893 * xmlNewEntityInputStream:
1894 * @ctxt: an XML parser context
1895 * @entity: an Entity pointer
1896 *
1897 * Create a new input stream based on an xmlEntityPtr
1898 *
1899 * Returns the new input stream or NULL
1900 */
1901xmlParserInputPtr
1902xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1903 xmlParserInputPtr input;
1904
1905 if (entity == NULL) {
1906 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1908 ctxt->sax->error(ctxt->userData,
1909 "internal: xmlNewEntityInputStream entity = NULL\n");
1910 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1911 return(NULL);
1912 }
1913 if (xmlParserDebugEntities)
1914 fprintf(stderr, "new input from entity: %s\n", entity->name);
1915 if (entity->content == NULL) {
1916 switch (entity->etype) {
1917 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1918 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1920 ctxt->sax->error(ctxt->userData,
1921 "xmlNewEntityInputStream unparsed entity !\n");
1922 break;
1923 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1924 case XML_EXTERNAL_PARAMETER_ENTITY:
1925 return(xmlLoadExternalEntity((char *) entity->URI,
1926 (char *) entity->ExternalID, ctxt));
1927 case XML_INTERNAL_GENERAL_ENTITY:
1928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1929 ctxt->sax->error(ctxt->userData,
1930 "Internal entity %s without content !\n", entity->name);
1931 break;
1932 case XML_INTERNAL_PARAMETER_ENTITY:
1933 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1935 ctxt->sax->error(ctxt->userData,
1936 "Internal parameter entity %s without content !\n", entity->name);
1937 break;
1938 case XML_INTERNAL_PREDEFINED_ENTITY:
1939 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1941 ctxt->sax->error(ctxt->userData,
1942 "Predefined entity %s without content !\n", entity->name);
1943 break;
1944 }
1945 return(NULL);
1946 }
1947 input = xmlNewInputStream(ctxt);
1948 if (input == NULL) {
1949 return(NULL);
1950 }
1951 input->filename = (char *) entity->URI;
1952 input->base = entity->content;
1953 input->cur = entity->content;
1954 input->length = entity->length;
1955 return(input);
1956}
1957
1958/**
1959 * xmlNewStringInputStream:
1960 * @ctxt: an XML parser context
1961 * @buffer: an memory buffer
1962 *
1963 * Create a new input stream based on a memory buffer.
1964 * Returns the new input stream
1965 */
1966xmlParserInputPtr
1967xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1968 xmlParserInputPtr input;
1969
1970 if (buffer == NULL) {
1971 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1973 ctxt->sax->error(ctxt->userData,
1974 "internal: xmlNewStringInputStream string = NULL\n");
1975 return(NULL);
1976 }
1977 if (xmlParserDebugEntities)
1978 fprintf(stderr, "new fixed input: %.30s\n", buffer);
1979 input = xmlNewInputStream(ctxt);
1980 if (input == NULL) {
1981 return(NULL);
1982 }
1983 input->base = buffer;
1984 input->cur = buffer;
1985 input->length = xmlStrlen(buffer);
1986 return(input);
1987}
1988
1989/**
1990 * xmlNewInputFromFile:
1991 * @ctxt: an XML parser context
1992 * @filename: the filename to use as entity
1993 *
1994 * Create a new input stream based on a file.
1995 *
1996 * Returns the new input stream or NULL in case of error
1997 */
1998xmlParserInputPtr
1999xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2000 xmlParserInputBufferPtr buf;
2001 xmlParserInputPtr inputStream;
2002 char *directory = NULL;
2003 xmlChar *URI = NULL;
2004
2005 if (xmlParserDebugEntities)
2006 fprintf(stderr, "new input from file: %s\n", filename);
2007 if (ctxt == NULL) return(NULL);
2008 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2009 if (buf == NULL)
2010 return(NULL);
2011
2012 URI = xmlStrdup((xmlChar *) filename);
Daniel Veillard04698d92000-09-17 16:00:22 +00002013 directory = xmlParserGetDirectory((const char *) URI);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002014
2015 inputStream = xmlNewInputStream(ctxt);
2016 if (inputStream == NULL) {
2017 if (directory != NULL) xmlFree((char *) directory);
2018 if (URI != NULL) xmlFree((char *) URI);
2019 return(NULL);
2020 }
2021
Daniel Veillard04698d92000-09-17 16:00:22 +00002022 inputStream->filename = (const char *) URI;
Daniel Veillardb1059e22000-09-16 14:02:43 +00002023 inputStream->directory = directory;
2024 inputStream->buf = buf;
2025
2026 inputStream->base = inputStream->buf->buffer->content;
2027 inputStream->cur = inputStream->buf->buffer->content;
2028 if ((ctxt->directory == NULL) && (directory != NULL))
2029 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2030 return(inputStream);
2031}
2032
2033/************************************************************************
2034 * *
2035 * Commodity functions to handle parser contexts *
2036 * *
2037 ************************************************************************/
2038
2039/**
2040 * xmlInitParserCtxt:
2041 * @ctxt: an XML parser context
2042 *
2043 * Initialize a parser context
2044 */
2045
2046void
2047xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2048{
2049 xmlSAXHandler *sax;
2050
2051 xmlDefaultSAXHandlerInit();
2052
2053 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2054 if (sax == NULL) {
2055 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
2056 }
Daniel Veillard4fb87ee2000-09-19 12:25:59 +00002057 else
2058 memset(sax, 0, sizeof(xmlSAXHandler));
Daniel Veillardb1059e22000-09-16 14:02:43 +00002059
2060 /* Allocate the Input stack */
2061 ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
2062 if (ctxt->inputTab == NULL) {
2063 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
2064 ctxt->inputNr = 0;
2065 ctxt->inputMax = 0;
2066 ctxt->input = NULL;
2067 return;
2068 }
2069 ctxt->inputNr = 0;
2070 ctxt->inputMax = 5;
2071 ctxt->input = NULL;
2072
2073 ctxt->version = NULL;
2074 ctxt->encoding = NULL;
2075 ctxt->standalone = -1;
2076 ctxt->hasExternalSubset = 0;
2077 ctxt->hasPErefs = 0;
2078 ctxt->html = 0;
2079 ctxt->external = 0;
2080 ctxt->instate = XML_PARSER_START;
2081 ctxt->token = 0;
2082 ctxt->directory = NULL;
2083
2084 /* Allocate the Node stack */
2085 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2086 if (ctxt->nodeTab == NULL) {
2087 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
2088 ctxt->nodeNr = 0;
2089 ctxt->nodeMax = 0;
2090 ctxt->node = NULL;
2091 ctxt->inputNr = 0;
2092 ctxt->inputMax = 0;
2093 ctxt->input = NULL;
2094 return;
2095 }
2096 ctxt->nodeNr = 0;
2097 ctxt->nodeMax = 10;
2098 ctxt->node = NULL;
2099
2100 /* Allocate the Name stack */
2101 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2102 if (ctxt->nameTab == NULL) {
2103 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
2104 ctxt->nodeNr = 0;
2105 ctxt->nodeMax = 0;
2106 ctxt->node = NULL;
2107 ctxt->inputNr = 0;
2108 ctxt->inputMax = 0;
2109 ctxt->input = NULL;
2110 ctxt->nameNr = 0;
2111 ctxt->nameMax = 0;
2112 ctxt->name = NULL;
2113 return;
2114 }
2115 ctxt->nameNr = 0;
2116 ctxt->nameMax = 10;
2117 ctxt->name = NULL;
2118
2119 /* Allocate the space stack */
2120 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2121 if (ctxt->spaceTab == NULL) {
2122 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
2123 ctxt->nodeNr = 0;
2124 ctxt->nodeMax = 0;
2125 ctxt->node = NULL;
2126 ctxt->inputNr = 0;
2127 ctxt->inputMax = 0;
2128 ctxt->input = NULL;
2129 ctxt->nameNr = 0;
2130 ctxt->nameMax = 0;
2131 ctxt->name = NULL;
2132 ctxt->spaceNr = 0;
2133 ctxt->spaceMax = 0;
2134 ctxt->space = NULL;
2135 return;
2136 }
2137 ctxt->spaceNr = 1;
2138 ctxt->spaceMax = 10;
2139 ctxt->spaceTab[0] = -1;
2140 ctxt->space = &ctxt->spaceTab[0];
2141
2142 if (sax == NULL) {
2143 ctxt->sax = &xmlDefaultSAXHandler;
2144 } else {
2145 ctxt->sax = sax;
2146 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2147 }
2148 ctxt->userData = ctxt;
2149 ctxt->myDoc = NULL;
2150 ctxt->wellFormed = 1;
2151 ctxt->valid = 1;
2152 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2153 ctxt->pedantic = xmlPedanticParserDefaultValue;
2154 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2155 ctxt->vctxt.userData = ctxt;
2156 if (ctxt->validate) {
2157 ctxt->vctxt.error = xmlParserValidityError;
2158 if (xmlGetWarningsDefaultValue == 0)
2159 ctxt->vctxt.warning = NULL;
2160 else
2161 ctxt->vctxt.warning = xmlParserValidityWarning;
2162 /* Allocate the Node stack */
2163 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2164 if (ctxt->vctxt.nodeTab == NULL) {
2165 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
2166 ctxt->vctxt.nodeMax = 0;
2167 ctxt->validate = 0;
2168 ctxt->vctxt.error = NULL;
2169 ctxt->vctxt.warning = NULL;
2170 } else {
2171 ctxt->vctxt.nodeNr = 0;
2172 ctxt->vctxt.nodeMax = 4;
2173 ctxt->vctxt.node = NULL;
2174 }
2175 } else {
2176 ctxt->vctxt.error = NULL;
2177 ctxt->vctxt.warning = NULL;
2178 }
2179 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2180 ctxt->record_info = 0;
2181 ctxt->nbChars = 0;
2182 ctxt->checkIndex = 0;
2183 ctxt->inSubset = 0;
2184 ctxt->errNo = XML_ERR_OK;
2185 ctxt->depth = 0;
2186 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2187 xmlInitNodeInfoSeq(&ctxt->node_seq);
2188}
2189
2190/**
2191 * xmlFreeParserCtxt:
2192 * @ctxt: an XML parser context
2193 *
2194 * Free all the memory used by a parser context. However the parsed
2195 * document in ctxt->myDoc is not freed.
2196 */
2197
2198void
2199xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2200{
2201 xmlParserInputPtr input;
2202 xmlChar *oldname;
2203
2204 if (ctxt == NULL) return;
2205
2206 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2207 xmlFreeInputStream(input);
2208 }
2209 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2210 xmlFree(oldname);
2211 }
2212 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2213 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2214 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2215 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2216 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2217 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2218 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2219 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2220 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2221 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2222 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2223 xmlFree(ctxt->sax);
2224 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2225 xmlFree(ctxt);
2226}
2227
2228/**
2229 * xmlNewParserCtxt:
2230 *
2231 * Allocate and initialize a new parser context.
2232 *
2233 * Returns the xmlParserCtxtPtr or NULL
2234 */
2235
2236xmlParserCtxtPtr
2237xmlNewParserCtxt()
2238{
2239 xmlParserCtxtPtr ctxt;
2240
2241 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2242 if (ctxt == NULL) {
2243 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
2244 perror("malloc");
2245 return(NULL);
2246 }
2247 memset(ctxt, 0, sizeof(xmlParserCtxt));
2248 xmlInitParserCtxt(ctxt);
2249 return(ctxt);
2250}
2251
2252/************************************************************************
2253 * *
2254 * Handling of node informations *
2255 * *
2256 ************************************************************************/
2257
2258/**
2259 * xmlClearParserCtxt:
2260 * @ctxt: an XML parser context
2261 *
2262 * Clear (release owned resources) and reinitialize a parser context
2263 */
2264
2265void
2266xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2267{
2268 xmlClearNodeInfoSeq(&ctxt->node_seq);
2269 xmlInitParserCtxt(ctxt);
2270}
2271
2272/**
2273 * xmlParserFindNodeInfo:
2274 * @ctxt: an XML parser context
2275 * @node: an XML node within the tree
2276 *
2277 * Find the parser node info struct for a given node
2278 *
2279 * Returns an xmlParserNodeInfo block pointer or NULL
2280 */
2281const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2282 const xmlNode* node)
2283{
2284 unsigned long pos;
2285
2286 /* Find position where node should be at */
2287 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2288 if ( ctx->node_seq.buffer[pos].node == node )
2289 return &ctx->node_seq.buffer[pos];
2290 else
2291 return NULL;
2292}
2293
2294
2295/**
2296 * xmlInitNodeInfoSeq:
2297 * @seq: a node info sequence pointer
2298 *
2299 * -- Initialize (set to initial state) node info sequence
2300 */
2301void
2302xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2303{
2304 seq->length = 0;
2305 seq->maximum = 0;
2306 seq->buffer = NULL;
2307}
2308
2309/**
2310 * xmlClearNodeInfoSeq:
2311 * @seq: a node info sequence pointer
2312 *
2313 * -- Clear (release memory and reinitialize) node
2314 * info sequence
2315 */
2316void
2317xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2318{
2319 if ( seq->buffer != NULL )
2320 xmlFree(seq->buffer);
2321 xmlInitNodeInfoSeq(seq);
2322}
2323
2324
2325/**
2326 * xmlParserFindNodeInfoIndex:
2327 * @seq: a node info sequence pointer
2328 * @node: an XML node pointer
2329 *
2330 *
2331 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2332 * the given node is or should be at in a sorted sequence
2333 *
2334 * Returns a long indicating the position of the record
2335 */
2336unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2337 const xmlNode* node)
2338{
2339 unsigned long upper, lower, middle;
2340 int found = 0;
2341
2342 /* Do a binary search for the key */
2343 lower = 1;
2344 upper = seq->length;
2345 middle = 0;
2346 while ( lower <= upper && !found) {
2347 middle = lower + (upper - lower) / 2;
2348 if ( node == seq->buffer[middle - 1].node )
2349 found = 1;
2350 else if ( node < seq->buffer[middle - 1].node )
2351 upper = middle - 1;
2352 else
2353 lower = middle + 1;
2354 }
2355
2356 /* Return position */
2357 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2358 return middle;
2359 else
2360 return middle - 1;
2361}
2362
2363
2364/**
2365 * xmlParserAddNodeInfo:
2366 * @ctxt: an XML parser context
2367 * @info: a node info sequence pointer
2368 *
2369 * Insert node info record into the sorted sequence
2370 */
2371void
2372xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2373 const xmlParserNodeInfo* info)
2374{
2375 unsigned long pos;
2376 static unsigned int block_size = 5;
2377
2378 /* Find pos and check to see if node is already in the sequence */
2379 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2380 if ( pos < ctxt->node_seq.length
2381 && ctxt->node_seq.buffer[pos].node == info->node ) {
2382 ctxt->node_seq.buffer[pos] = *info;
2383 }
2384
2385 /* Otherwise, we need to add new node to buffer */
2386 else {
2387 /* Expand buffer by 5 if needed */
2388 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2389 xmlParserNodeInfo* tmp_buffer;
2390 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2391 *(ctxt->node_seq.maximum + block_size));
2392
2393 if ( ctxt->node_seq.buffer == NULL )
2394 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2395 else
2396 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2397
2398 if ( tmp_buffer == NULL ) {
2399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2400 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2401 ctxt->errNo = XML_ERR_NO_MEMORY;
2402 return;
2403 }
2404 ctxt->node_seq.buffer = tmp_buffer;
2405 ctxt->node_seq.maximum += block_size;
2406 }
2407
2408 /* If position is not at end, move elements out of the way */
2409 if ( pos != ctxt->node_seq.length ) {
2410 unsigned long i;
2411
2412 for ( i = ctxt->node_seq.length; i > pos; i-- )
2413 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2414 }
2415
2416 /* Copy element and increase length */
2417 ctxt->node_seq.buffer[pos] = *info;
2418 ctxt->node_seq.length++;
2419 }
2420}
2421
2422/************************************************************************
2423 * *
2424 * Deprecated functions kept for compatibility *
2425 * *
2426 ************************************************************************/
2427
2428/*
2429 * xmlCheckLanguageID
2430 * @lang: pointer to the string value
2431 *
2432 * Checks that the value conforms to the LanguageID production:
2433 *
2434 * NOTE: this is somewhat deprecated, those productions were removed from
2435 * the XML Second edition.
2436 *
2437 * [33] LanguageID ::= Langcode ('-' Subcode)*
2438 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2439 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2440 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2441 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2442 * [38] Subcode ::= ([a-z] | [A-Z])+
2443 *
2444 * Returns 1 if correct 0 otherwise
2445 **/
2446int
2447xmlCheckLanguageID(const xmlChar *lang) {
2448 const xmlChar *cur = lang;
2449
2450 if (cur == NULL)
2451 return(0);
2452 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2453 ((cur[0] == 'I') && (cur[1] == '-'))) {
2454 /*
2455 * IANA code
2456 */
2457 cur += 2;
2458 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2459 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2460 cur++;
2461 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2462 ((cur[0] == 'X') && (cur[1] == '-'))) {
2463 /*
2464 * User code
2465 */
2466 cur += 2;
2467 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2468 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2469 cur++;
2470 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2471 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2472 /*
2473 * ISO639
2474 */
2475 cur++;
2476 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2477 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2478 cur++;
2479 else
2480 return(0);
2481 } else
2482 return(0);
2483 while (cur[0] != 0) { /* non input consuming */
2484 if (cur[0] != '-')
2485 return(0);
2486 cur++;
2487 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2488 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2489 cur++;
2490 else
2491 return(0);
2492 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2493 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2494 cur++;
2495 }
2496 return(1);
2497}
2498
2499/**
2500 * xmlDecodeEntities:
2501 * @ctxt: the parser context
2502 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2503 * @len: the len to decode (in bytes !), -1 for no size limit
2504 * @end: an end marker xmlChar, 0 if none
2505 * @end2: an end marker xmlChar, 0 if none
2506 * @end3: an end marker xmlChar, 0 if none
2507 *
2508 * This function is deprecated, we now always process entities content
2509 * through xmlStringDecodeEntities
2510 *
2511 * TODO: remove it in next major release.
2512 *
2513 * [67] Reference ::= EntityRef | CharRef
2514 *
2515 * [69] PEReference ::= '%' Name ';'
2516 *
2517 * Returns A newly allocated string with the substitution done. The caller
2518 * must deallocate it !
2519 */
2520xmlChar *
2521xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2522 xmlChar end, xmlChar end2, xmlChar end3) {
2523#if 0
2524 xmlChar *buffer = NULL;
2525 unsigned int buffer_size = 0;
2526 unsigned int nbchars = 0;
2527
2528 xmlChar *current = NULL;
2529 xmlEntityPtr ent;
2530 unsigned int max = (unsigned int) len;
2531 int c,l;
2532#endif
2533
2534 static int deprecated = 0;
2535 if (!deprecated) {
2536 fprintf(stderr, "xmlDecodeEntities() deprecated function reached\n");
2537 deprecated = 1;
2538 }
2539
2540#if 0
2541 if (ctxt->depth > 40) {
2542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2543 ctxt->sax->error(ctxt->userData,
2544 "Detected entity reference loop\n");
2545 ctxt->wellFormed = 0;
2546 ctxt->disableSAX = 1;
2547 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2548 return(NULL);
2549 }
2550
2551 /*
2552 * allocate a translation buffer.
2553 */
2554 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2555 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2556 if (buffer == NULL) {
2557 perror("xmlDecodeEntities: malloc failed");
2558 return(NULL);
2559 }
2560
2561 /*
2562 * Ok loop until we reach one of the ending char or a size limit.
2563 */
2564 GROW;
2565 c = CUR_CHAR(l);
2566 while ((nbchars < max) && (c != end) && /* NOTUSED */
2567 (c != end2) && (c != end3)) {
2568 GROW;
2569 if (c == 0) break;
2570 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2571 int val = xmlParseCharRef(ctxt);
2572 COPY_BUF(0,buffer,nbchars,val);
2573 NEXTL(l);
2574 } else if ((c == '&') && (ctxt->token != '&') &&
2575 (what & XML_SUBSTITUTE_REF)) {
2576 if (xmlParserDebugEntities)
2577 fprintf(stderr, "decoding Entity Reference\n");
2578 ent = xmlParseEntityRef(ctxt);
2579 if ((ent != NULL) &&
2580 (ctxt->replaceEntities != 0)) {
2581 current = ent->content;
2582 while (*current != 0) { /* non input consuming loop */
2583 buffer[nbchars++] = *current++;
2584 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2585 growBuffer(buffer);
2586 }
2587 }
2588 } else if (ent != NULL) {
2589 const xmlChar *cur = ent->name;
2590
2591 buffer[nbchars++] = '&';
2592 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2593 growBuffer(buffer);
2594 }
2595 while (*cur != 0) { /* non input consuming loop */
2596 buffer[nbchars++] = *cur++;
2597 }
2598 buffer[nbchars++] = ';';
2599 }
2600 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2601 /*
2602 * a PEReference induce to switch the entity flow,
2603 * we break here to flush the current set of chars
2604 * parsed if any. We will be called back later.
2605 */
2606 if (xmlParserDebugEntities)
2607 fprintf(stderr, "decoding PE Reference\n");
2608 if (nbchars != 0) break;
2609
2610 xmlParsePEReference(ctxt);
2611
2612 /*
2613 * Pop-up of finished entities.
2614 */
2615 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2616 xmlPopInput(ctxt);
2617
2618 break;
2619 } else {
2620 COPY_BUF(l,buffer,nbchars,c);
2621 NEXTL(l);
2622 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2623 growBuffer(buffer);
2624 }
2625 }
2626 c = CUR_CHAR(l);
2627 }
2628 buffer[nbchars++] = 0;
2629 return(buffer);
2630#endif
2631 return(NULL);
2632}
2633
2634/**
2635 * xmlNamespaceParseNCName:
2636 * @ctxt: an XML parser context
2637 *
2638 * parse an XML namespace name.
2639 *
2640 * TODO: this seems not in use anymore, the namespace handling is done on
2641 * top of the SAX interfaces, i.e. not on raw input.
2642 *
2643 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2644 *
2645 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2646 * CombiningChar | Extender
2647 *
2648 * Returns the namespace name or NULL
2649 */
2650
2651xmlChar *
2652xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2653#if 0
2654 xmlChar buf[XML_MAX_NAMELEN + 5];
2655 int len = 0, l;
2656 int cur = CUR_CHAR(l);
2657#endif
2658
2659 static int deprecated = 0;
2660 if (!deprecated) {
2661 fprintf(stderr, "xmlNamespaceParseNCName() deprecated function reached\n");
2662 deprecated = 1;
2663 }
2664
2665#if 0
2666 /* load first the value of the char !!! */
2667 GROW;
2668 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2669
2670fprintf(stderr, "xmlNamespaceParseNCName: reached loop 3\n");
2671 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2672 (cur == '.') || (cur == '-') ||
2673 (cur == '_') ||
2674 (IS_COMBINING(cur)) ||
2675 (IS_EXTENDER(cur))) {
2676 COPY_BUF(l,buf,len,cur);
2677 NEXTL(l);
2678 cur = CUR_CHAR(l);
2679 if (len >= XML_MAX_NAMELEN) {
2680 fprintf(stderr,
2681 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2682 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2683 (cur == '.') || (cur == '-') ||
2684 (cur == '_') ||
2685 (IS_COMBINING(cur)) ||
2686 (IS_EXTENDER(cur))) {
2687 NEXTL(l);
2688 cur = CUR_CHAR(l);
2689 }
2690 break;
2691 }
2692 }
2693 return(xmlStrndup(buf, len));
2694#endif
2695 return(NULL);
2696}
2697
2698/**
2699 * xmlNamespaceParseQName:
2700 * @ctxt: an XML parser context
2701 * @prefix: a xmlChar **
2702 *
2703 * TODO: this seems not in use anymore, the namespace handling is done on
2704 * top of the SAX interfaces, i.e. not on raw input.
2705 *
2706 * parse an XML qualified name
2707 *
2708 * [NS 5] QName ::= (Prefix ':')? LocalPart
2709 *
2710 * [NS 6] Prefix ::= NCName
2711 *
2712 * [NS 7] LocalPart ::= NCName
2713 *
2714 * Returns the local part, and prefix is updated
2715 * to get the Prefix if any.
2716 */
2717
2718xmlChar *
2719xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2720
2721 static int deprecated = 0;
2722 if (!deprecated) {
2723 fprintf(stderr, "xmlNamespaceParseQName() deprecated function reached\n");
2724 deprecated = 1;
2725 }
2726
2727#if 0
2728 xmlChar *ret = NULL;
2729
2730 *prefix = NULL;
2731 ret = xmlNamespaceParseNCName(ctxt);
2732 if (RAW == ':') {
2733 *prefix = ret;
2734 NEXT;
2735 ret = xmlNamespaceParseNCName(ctxt);
2736 }
2737
2738 return(ret);
2739#endif
2740 return(NULL);
2741}
2742
2743/**
2744 * xmlNamespaceParseNSDef:
2745 * @ctxt: an XML parser context
2746 *
2747 * parse a namespace prefix declaration
2748 *
2749 * TODO: this seems not in use anymore, the namespace handling is done on
2750 * top of the SAX interfaces, i.e. not on raw input.
2751 *
2752 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2753 *
2754 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2755 *
2756 * Returns the namespace name
2757 */
2758
2759xmlChar *
2760xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2761 static int deprecated = 0;
2762 if (!deprecated) {
2763 fprintf(stderr, "xmlNamespaceParseNSDef() deprecated function reached\n");
2764 deprecated = 1;
2765 }
2766 return(NULL);
2767#if 0
2768 xmlChar *name = NULL;
2769
2770 if ((RAW == 'x') && (NXT(1) == 'm') &&
2771 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2772 (NXT(4) == 's')) {
2773 SKIP(5);
2774 if (RAW == ':') {
2775 NEXT;
2776 name = xmlNamespaceParseNCName(ctxt);
2777 }
2778 }
2779 return(name);
2780#endif
2781}
2782
2783/**
2784 * xmlParseQuotedString:
2785 * @ctxt: an XML parser context
2786 *
2787 * Parse and return a string between quotes or doublequotes
2788 *
2789 * TODO: Deprecated, to be removed at next drop of binary compatibility
2790 *
2791 * Returns the string parser or NULL.
2792 */
2793xmlChar *
2794xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2795 static int deprecated = 0;
2796 if (!deprecated) {
2797 fprintf(stderr, "xmlParseQuotedString() deprecated function reached\n");
2798 deprecated = 1;
2799 }
2800 return(NULL);
2801
2802#if 0
2803 xmlChar *buf = NULL;
2804 int len = 0,l;
2805 int size = XML_PARSER_BUFFER_SIZE;
2806 int c;
2807
2808 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2809 if (buf == NULL) {
2810 fprintf(stderr, "malloc of %d byte failed\n", size);
2811 return(NULL);
2812 }
2813fprintf(stderr, "xmlParseQuotedString: reached loop 4\n");
2814 if (RAW == '"') {
2815 NEXT;
2816 c = CUR_CHAR(l);
2817 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2818 if (len + 5 >= size) {
2819 size *= 2;
2820 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2821 if (buf == NULL) {
2822 fprintf(stderr, "realloc of %d byte failed\n", size);
2823 return(NULL);
2824 }
2825 }
2826 COPY_BUF(l,buf,len,c);
2827 NEXTL(l);
2828 c = CUR_CHAR(l);
2829 }
2830 if (c != '"') {
2831 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2833 ctxt->sax->error(ctxt->userData,
2834 "String not closed \"%.50s\"\n", buf);
2835 ctxt->wellFormed = 0;
2836 ctxt->disableSAX = 1;
2837 } else {
2838 NEXT;
2839 }
2840 } else if (RAW == '\''){
2841 NEXT;
2842 c = CUR;
2843 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2844 if (len + 1 >= size) {
2845 size *= 2;
2846 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2847 if (buf == NULL) {
2848 fprintf(stderr, "realloc of %d byte failed\n", size);
2849 return(NULL);
2850 }
2851 }
2852 buf[len++] = c;
2853 NEXT;
2854 c = CUR;
2855 }
2856 if (RAW != '\'') {
2857 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2859 ctxt->sax->error(ctxt->userData,
2860 "String not closed \"%.50s\"\n", buf);
2861 ctxt->wellFormed = 0;
2862 ctxt->disableSAX = 1;
2863 } else {
2864 NEXT;
2865 }
2866 }
2867 return(buf);
2868#endif
2869}
2870
2871/**
2872 * xmlParseNamespace:
2873 * @ctxt: an XML parser context
2874 *
2875 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2876 *
2877 * This is what the older xml-name Working Draft specified, a bunch of
2878 * other stuff may still rely on it, so support is still here as
2879 * if it was declared on the root of the Tree:-(
2880 *
2881 * TODO: remove from library
2882 *
2883 * To be removed at next drop of binary compatibility
2884 */
2885
2886void
2887xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2888 static int deprecated = 0;
2889 if (!deprecated) {
2890 fprintf(stderr, "xmlParseNamespace() deprecated function reached\n");
2891 deprecated = 1;
2892 }
2893
2894#if 0
2895 xmlChar *href = NULL;
2896 xmlChar *prefix = NULL;
2897 int garbage = 0;
2898
2899 /*
2900 * We just skipped "namespace" or "xml:namespace"
2901 */
2902 SKIP_BLANKS;
2903
2904fprintf(stderr, "xmlParseNamespace: reached loop 5\n");
2905 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2906 /*
2907 * We can have "ns" or "prefix" attributes
2908 * Old encoding as 'href' or 'AS' attributes is still supported
2909 */
2910 if ((RAW == 'n') && (NXT(1) == 's')) {
2911 garbage = 0;
2912 SKIP(2);
2913 SKIP_BLANKS;
2914
2915 if (RAW != '=') continue;
2916 NEXT;
2917 SKIP_BLANKS;
2918
2919 href = xmlParseQuotedString(ctxt);
2920 SKIP_BLANKS;
2921 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2922 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2923 garbage = 0;
2924 SKIP(4);
2925 SKIP_BLANKS;
2926
2927 if (RAW != '=') continue;
2928 NEXT;
2929 SKIP_BLANKS;
2930
2931 href = xmlParseQuotedString(ctxt);
2932 SKIP_BLANKS;
2933 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2934 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2935 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2936 garbage = 0;
2937 SKIP(6);
2938 SKIP_BLANKS;
2939
2940 if (RAW != '=') continue;
2941 NEXT;
2942 SKIP_BLANKS;
2943
2944 prefix = xmlParseQuotedString(ctxt);
2945 SKIP_BLANKS;
2946 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
2947 garbage = 0;
2948 SKIP(2);
2949 SKIP_BLANKS;
2950
2951 if (RAW != '=') continue;
2952 NEXT;
2953 SKIP_BLANKS;
2954
2955 prefix = xmlParseQuotedString(ctxt);
2956 SKIP_BLANKS;
2957 } else if ((RAW == '?') && (NXT(1) == '>')) {
2958 garbage = 0;
2959 NEXT;
2960 } else {
2961 /*
2962 * Found garbage when parsing the namespace
2963 */
2964 if (!garbage) {
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "xmlParseNamespace found garbage\n");
2968 }
2969 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
2970 ctxt->wellFormed = 0;
2971 ctxt->disableSAX = 1;
2972 NEXT;
2973 }
2974 }
2975
2976 MOVETO_ENDTAG(CUR_PTR);
2977 NEXT;
2978
2979 /*
2980 * Register the DTD.
2981 if (href != NULL)
2982 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
2983 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
2984 */
2985
2986 if (prefix != NULL) xmlFree(prefix);
2987 if (href != NULL) xmlFree(href);
2988#endif
2989}
2990
2991/**
2992 * xmlScanName:
2993 * @ctxt: an XML parser context
2994 *
2995 * Trickery: parse an XML name but without consuming the input flow
2996 * Needed for rollback cases. Used only when parsing entities references.
2997 *
2998 * TODO: seems deprecated now, only used in the default part of
2999 * xmlParserHandleReference
3000 *
3001 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3002 * CombiningChar | Extender
3003 *
3004 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3005 *
3006 * [6] Names ::= Name (S Name)*
3007 *
3008 * Returns the Name parsed or NULL
3009 */
3010
3011xmlChar *
3012xmlScanName(xmlParserCtxtPtr ctxt) {
3013 static int deprecated = 0;
3014 if (!deprecated) {
3015 fprintf(stderr, "xmlScanName() deprecated function reached\n");
3016 deprecated = 1;
3017 }
3018 return(NULL);
3019
3020#if 0
3021 xmlChar buf[XML_MAX_NAMELEN];
3022 int len = 0;
3023
3024 GROW;
3025 if (!IS_LETTER(RAW) && (RAW != '_') &&
3026 (RAW != ':')) {
3027 return(NULL);
3028 }
3029
3030
3031 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3032 (NXT(len) == '.') || (NXT(len) == '-') ||
3033 (NXT(len) == '_') || (NXT(len) == ':') ||
3034 (IS_COMBINING(NXT(len))) ||
3035 (IS_EXTENDER(NXT(len)))) {
3036 GROW;
3037 buf[len] = NXT(len);
3038 len++;
3039 if (len >= XML_MAX_NAMELEN) {
3040 fprintf(stderr,
3041 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3042 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3043 (IS_DIGIT(NXT(len))) ||
3044 (NXT(len) == '.') || (NXT(len) == '-') ||
3045 (NXT(len) == '_') || (NXT(len) == ':') ||
3046 (IS_COMBINING(NXT(len))) ||
3047 (IS_EXTENDER(NXT(len))))
3048 len++;
3049 break;
3050 }
3051 }
3052 return(xmlStrndup(buf, len));
3053#endif
3054}
3055
3056/**
3057 * xmlParserHandleReference:
3058 * @ctxt: the parser context
3059 *
3060 * TODO: Remove, now deprecated ... the test is done directly in the
3061 * content parsing
3062 * routines.
3063 *
3064 * [67] Reference ::= EntityRef | CharRef
3065 *
3066 * [68] EntityRef ::= '&' Name ';'
3067 *
3068 * [ WFC: Entity Declared ]
3069 * the Name given in the entity reference must match that in an entity
3070 * declaration, except that well-formed documents need not declare any
3071 * of the following entities: amp, lt, gt, apos, quot.
3072 *
3073 * [ WFC: Parsed Entity ]
3074 * An entity reference must not contain the name of an unparsed entity
3075 *
3076 * [66] CharRef ::= '&#' [0-9]+ ';' |
3077 * '&#x' [0-9a-fA-F]+ ';'
3078 *
3079 * A PEReference may have been detectect in the current input stream
3080 * the handling is done accordingly to
3081 * http://www.w3.org/TR/REC-xml#entproc
3082 */
3083void
3084xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3085 static int deprecated = 0;
3086 if (!deprecated) {
3087 fprintf(stderr, "xmlParserHandleReference() deprecated function reached\n");
3088 deprecated = 1;
3089 }
3090
3091#if 0
3092 xmlParserInputPtr input;
3093 xmlChar *name;
3094 xmlEntityPtr ent = NULL;
3095
3096 if (ctxt->token != 0) {
3097 return;
3098 }
3099 if (RAW != '&') return;
3100 GROW;
3101 if ((RAW == '&') && (NXT(1) == '#')) {
3102 switch(ctxt->instate) {
3103 case XML_PARSER_ENTITY_DECL:
3104 case XML_PARSER_PI:
3105 case XML_PARSER_CDATA_SECTION:
3106 case XML_PARSER_COMMENT:
3107 case XML_PARSER_SYSTEM_LITERAL:
3108 /* we just ignore it there */
3109 return;
3110 case XML_PARSER_START_TAG:
3111 return;
3112 case XML_PARSER_END_TAG:
3113 return;
3114 case XML_PARSER_EOF:
3115 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3117 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3118 ctxt->wellFormed = 0;
3119 ctxt->disableSAX = 1;
3120 return;
3121 case XML_PARSER_PROLOG:
3122 case XML_PARSER_START:
3123 case XML_PARSER_MISC:
3124 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3126 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3127 ctxt->wellFormed = 0;
3128 ctxt->disableSAX = 1;
3129 return;
3130 case XML_PARSER_EPILOG:
3131 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3133 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3134 ctxt->wellFormed = 0;
3135 ctxt->disableSAX = 1;
3136 return;
3137 case XML_PARSER_DTD:
3138 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3140 ctxt->sax->error(ctxt->userData,
3141 "CharRef are forbiden in DTDs!\n");
3142 ctxt->wellFormed = 0;
3143 ctxt->disableSAX = 1;
3144 return;
3145 case XML_PARSER_ENTITY_VALUE:
3146 /*
3147 * NOTE: in the case of entity values, we don't do the
3148 * substitution here since we need the literal
3149 * entity value to be able to save the internal
3150 * subset of the document.
3151 * This will be handled by xmlStringDecodeEntities
3152 */
3153 return;
3154 case XML_PARSER_CONTENT:
3155 return;
3156 case XML_PARSER_ATTRIBUTE_VALUE:
3157 /* ctxt->token = xmlParseCharRef(ctxt); */
3158 return;
3159 }
3160 return;
3161 }
3162
3163 switch(ctxt->instate) {
3164 case XML_PARSER_CDATA_SECTION:
3165 return;
3166 case XML_PARSER_PI:
3167 case XML_PARSER_COMMENT:
3168 case XML_PARSER_SYSTEM_LITERAL:
3169 case XML_PARSER_CONTENT:
3170 return;
3171 case XML_PARSER_START_TAG:
3172 return;
3173 case XML_PARSER_END_TAG:
3174 return;
3175 case XML_PARSER_EOF:
3176 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3178 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3179 ctxt->wellFormed = 0;
3180 ctxt->disableSAX = 1;
3181 return;
3182 case XML_PARSER_PROLOG:
3183 case XML_PARSER_START:
3184 case XML_PARSER_MISC:
3185 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3187 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3188 ctxt->wellFormed = 0;
3189 ctxt->disableSAX = 1;
3190 return;
3191 case XML_PARSER_EPILOG:
3192 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3194 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3195 ctxt->wellFormed = 0;
3196 ctxt->disableSAX = 1;
3197 return;
3198 case XML_PARSER_ENTITY_VALUE:
3199 /*
3200 * NOTE: in the case of entity values, we don't do the
3201 * substitution here since we need the literal
3202 * entity value to be able to save the internal
3203 * subset of the document.
3204 * This will be handled by xmlStringDecodeEntities
3205 */
3206 return;
3207 case XML_PARSER_ATTRIBUTE_VALUE:
3208 /*
3209 * NOTE: in the case of attributes values, we don't do the
3210 * substitution here unless we are in a mode where
3211 * the parser is explicitely asked to substitute
3212 * entities. The SAX callback is called with values
3213 * without entity substitution.
3214 * This will then be handled by xmlStringDecodeEntities
3215 */
3216 return;
3217 case XML_PARSER_ENTITY_DECL:
3218 /*
3219 * we just ignore it there
3220 * the substitution will be done once the entity is referenced
3221 */
3222 return;
3223 case XML_PARSER_DTD:
3224 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3226 ctxt->sax->error(ctxt->userData,
3227 "Entity references are forbiden in DTDs!\n");
3228 ctxt->wellFormed = 0;
3229 ctxt->disableSAX = 1;
3230 return;
3231 }
3232
3233/* TODO: this seems not reached anymore .... Verify ... */
3234fprintf(stderr, "Reached deprecated section in xmlParserHandleReference()\n");
3235fprintf(stderr, "Please forward the document to Daniel.Veillard@w3.org\n");
3236fprintf(stderr, "indicating the version: %s, thanks !\n", xmlParserVersion);
3237 NEXT;
3238 name = xmlScanName(ctxt);
3239 if (name == NULL) {
3240 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3242 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3243 ctxt->wellFormed = 0;
3244 ctxt->disableSAX = 1;
3245 ctxt->token = '&';
3246 return;
3247 }
3248 if (NXT(xmlStrlen(name)) != ';') {
3249 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3251 ctxt->sax->error(ctxt->userData,
3252 "Entity reference: ';' expected\n");
3253 ctxt->wellFormed = 0;
3254 ctxt->disableSAX = 1;
3255 ctxt->token = '&';
3256 xmlFree(name);
3257 return;
3258 }
3259 SKIP(xmlStrlen(name) + 1);
3260 if (ctxt->sax != NULL) {
3261 if (ctxt->sax->getEntity != NULL)
3262 ent = ctxt->sax->getEntity(ctxt->userData, name);
3263 }
3264
3265 /*
3266 * [ WFC: Entity Declared ]
3267 * the Name given in the entity reference must match that in an entity
3268 * declaration, except that well-formed documents need not declare any
3269 * of the following entities: amp, lt, gt, apos, quot.
3270 */
3271 if (ent == NULL)
3272 ent = xmlGetPredefinedEntity(name);
3273 if (ent == NULL) {
3274 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3276 ctxt->sax->error(ctxt->userData,
3277 "Entity reference: entity %s not declared\n",
3278 name);
3279 ctxt->wellFormed = 0;
3280 ctxt->disableSAX = 1;
3281 xmlFree(name);
3282 return;
3283 }
3284
3285 /*
3286 * [ WFC: Parsed Entity ]
3287 * An entity reference must not contain the name of an unparsed entity
3288 */
3289 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3290 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3292 ctxt->sax->error(ctxt->userData,
3293 "Entity reference to unparsed entity %s\n", name);
3294 ctxt->wellFormed = 0;
3295 ctxt->disableSAX = 1;
3296 }
3297
3298 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3299 ctxt->token = ent->content[0];
3300 xmlFree(name);
3301 return;
3302 }
3303 input = xmlNewEntityInputStream(ctxt, ent);
3304 xmlPushInput(ctxt, input);
3305 xmlFree(name);
3306#endif
3307 return;
3308}
3309
3310/**
3311 * xmlHandleEntity:
3312 * @ctxt: an XML parser context
3313 * @entity: an XML entity pointer.
3314 *
3315 * Default handling of defined entities, when should we define a new input
3316 * stream ? When do we just handle that as a set of chars ?
3317 *
3318 * OBSOLETE: to be removed at some point.
3319 */
3320
3321void
3322xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3323 static int deprecated = 0;
3324 if (!deprecated) {
3325 fprintf(stderr, "xmlHandleEntity() deprecated function reached\n");
3326 deprecated = 1;
3327 }
3328
3329#if 0
3330 int len;
3331 xmlParserInputPtr input;
3332
3333 if (entity->content == NULL) {
3334 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3336 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3337 entity->name);
3338 ctxt->wellFormed = 0;
3339 ctxt->disableSAX = 1;
3340 return;
3341 }
3342 len = xmlStrlen(entity->content);
3343 if (len <= 2) goto handle_as_char;
3344
3345 /*
3346 * Redefine its content as an input stream.
3347 */
3348 input = xmlNewEntityInputStream(ctxt, entity);
3349 xmlPushInput(ctxt, input);
3350 return;
3351
3352handle_as_char:
3353 /*
3354 * Just handle the content as a set of chars.
3355 */
3356 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3357 (ctxt->sax->characters != NULL))
3358 ctxt->sax->characters(ctxt->userData, entity->content, len);
3359#endif
3360}
3361