blob: 3e2e76f4a6f2c11559808aa9c4c1884f6609c7e0 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
49
Daniel Veillard56a4cb82001-03-24 17:00:36 +000050void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000051
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000070 xmlInitMemory();
71
Owen Taylor3473f882001-02-23 17:55:21 +000072 if ((myversion / 10000) != (version / 10000)) {
73 xmlGenericError(xmlGenericErrorContext,
74 "Fatal: program compiled against libxml %d using libxml %d\n",
75 (version / 10000), (myversion / 10000));
76 exit(1);
77 }
78 if ((myversion / 100) < (version / 100)) {
79 xmlGenericError(xmlGenericErrorContext,
80 "Warning: program compiled against libxml %d using older %d\n",
81 (version / 100), (myversion / 100));
82 }
83}
84
85
86const char *xmlFeaturesList[] = {
87 "validate",
88 "load subset",
89 "keep blanks",
90 "disable SAX",
91 "fetch external entities",
92 "substitute entities",
93 "gather line info",
94 "user data",
95 "is html",
96 "is standalone",
97 "stop parser",
98 "document",
99 "is well formed",
100 "is valid",
101 "SAX block",
102 "SAX function internalSubset",
103 "SAX function isStandalone",
104 "SAX function hasInternalSubset",
105 "SAX function hasExternalSubset",
106 "SAX function resolveEntity",
107 "SAX function getEntity",
108 "SAX function entityDecl",
109 "SAX function notationDecl",
110 "SAX function attributeDecl",
111 "SAX function elementDecl",
112 "SAX function unparsedEntityDecl",
113 "SAX function setDocumentLocator",
114 "SAX function startDocument",
115 "SAX function endDocument",
116 "SAX function startElement",
117 "SAX function endElement",
118 "SAX function reference",
119 "SAX function characters",
120 "SAX function ignorableWhitespace",
121 "SAX function processingInstruction",
122 "SAX function comment",
123 "SAX function warning",
124 "SAX function error",
125 "SAX function fatalError",
126 "SAX function getParameterEntity",
127 "SAX function cdataBlock",
128 "SAX function externalSubset",
129};
130
131/*
132 * xmlGetFeaturesList:
133 * @len: the length of the features name array (input/output)
134 * @result: an array of string to be filled with the features name.
135 *
136 * Copy at most *@len feature names into the @result array
137 *
138 * Returns -1 in case or error, or the total number of features,
139 * len is updated with the number of strings copied,
140 * strings must not be deallocated
141 */
142int
143xmlGetFeaturesList(int *len, const char **result) {
144 int ret, i;
145
146 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
147 if ((len == NULL) || (result == NULL))
148 return(ret);
149 if ((*len < 0) || (*len >= 1000))
150 return(-1);
151 if (*len > ret)
152 *len = ret;
153 for (i = 0;i < *len;i++)
154 result[i] = xmlFeaturesList[i];
155 return(ret);
156}
157
158/*
159 * xmlGetFeature:
160 * @ctxt: an XML/HTML parser context
161 * @name: the feature name
162 * @result: location to store the result
163 *
164 * Read the current value of one feature of this parser instance
165 *
166 * Returns -1 in case or error, 0 otherwise
167 */
168int
169xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
170 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
171 return(-1);
172
173 if (!strcmp(name, "validate")) {
174 *((int *) result) = ctxt->validate;
175 } else if (!strcmp(name, "keep blanks")) {
176 *((int *) result) = ctxt->keepBlanks;
177 } else if (!strcmp(name, "disable SAX")) {
178 *((int *) result) = ctxt->disableSAX;
179 } else if (!strcmp(name, "fetch external entities")) {
180 *((int *) result) = ctxt->loadsubset;
181 } else if (!strcmp(name, "substitute entities")) {
182 *((int *) result) = ctxt->replaceEntities;
183 } else if (!strcmp(name, "gather line info")) {
184 *((int *) result) = ctxt->record_info;
185 } else if (!strcmp(name, "user data")) {
186 *((void **)result) = ctxt->userData;
187 } else if (!strcmp(name, "is html")) {
188 *((int *) result) = ctxt->html;
189 } else if (!strcmp(name, "is standalone")) {
190 *((int *) result) = ctxt->standalone;
191 } else if (!strcmp(name, "document")) {
192 *((xmlDocPtr *) result) = ctxt->myDoc;
193 } else if (!strcmp(name, "is well formed")) {
194 *((int *) result) = ctxt->wellFormed;
195 } else if (!strcmp(name, "is valid")) {
196 *((int *) result) = ctxt->valid;
197 } else if (!strcmp(name, "SAX block")) {
198 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
199 } else if (!strcmp(name, "SAX function internalSubset")) {
200 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
201 } else if (!strcmp(name, "SAX function isStandalone")) {
202 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
203 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
204 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
205 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
206 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
207 } else if (!strcmp(name, "SAX function resolveEntity")) {
208 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
209 } else if (!strcmp(name, "SAX function getEntity")) {
210 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
211 } else if (!strcmp(name, "SAX function entityDecl")) {
212 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
213 } else if (!strcmp(name, "SAX function notationDecl")) {
214 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
215 } else if (!strcmp(name, "SAX function attributeDecl")) {
216 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
217 } else if (!strcmp(name, "SAX function elementDecl")) {
218 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
219 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
220 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
221 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
222 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
223 } else if (!strcmp(name, "SAX function startDocument")) {
224 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
225 } else if (!strcmp(name, "SAX function endDocument")) {
226 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
227 } else if (!strcmp(name, "SAX function startElement")) {
228 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
229 } else if (!strcmp(name, "SAX function endElement")) {
230 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
231 } else if (!strcmp(name, "SAX function reference")) {
232 *((referenceSAXFunc *) result) = ctxt->sax->reference;
233 } else if (!strcmp(name, "SAX function characters")) {
234 *((charactersSAXFunc *) result) = ctxt->sax->characters;
235 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
236 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
237 } else if (!strcmp(name, "SAX function processingInstruction")) {
238 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
239 } else if (!strcmp(name, "SAX function comment")) {
240 *((commentSAXFunc *) result) = ctxt->sax->comment;
241 } else if (!strcmp(name, "SAX function warning")) {
242 *((warningSAXFunc *) result) = ctxt->sax->warning;
243 } else if (!strcmp(name, "SAX function error")) {
244 *((errorSAXFunc *) result) = ctxt->sax->error;
245 } else if (!strcmp(name, "SAX function fatalError")) {
246 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
247 } else if (!strcmp(name, "SAX function getParameterEntity")) {
248 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
249 } else if (!strcmp(name, "SAX function cdataBlock")) {
250 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
251 } else if (!strcmp(name, "SAX function externalSubset")) {
252 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
253 } else {
254 return(-1);
255 }
256 return(0);
257}
258
259/*
260 * xmlSetFeature:
261 * @ctxt: an XML/HTML parser context
262 * @name: the feature name
263 * @value: pointer to the location of the new value
264 *
265 * Change the current value of one feature of this parser instance
266 *
267 * Returns -1 in case or error, 0 otherwise
268 */
269int
270xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
271 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
272 return(-1);
273
274 if (!strcmp(name, "validate")) {
275 int newvalidate = *((int *) value);
276 if ((!ctxt->validate) && (newvalidate != 0)) {
277 if (ctxt->vctxt.warning == NULL)
278 ctxt->vctxt.warning = xmlParserValidityWarning;
279 if (ctxt->vctxt.error == NULL)
280 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000281 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000282 }
283 ctxt->validate = newvalidate;
284 } else if (!strcmp(name, "keep blanks")) {
285 ctxt->keepBlanks = *((int *) value);
286 } else if (!strcmp(name, "disable SAX")) {
287 ctxt->disableSAX = *((int *) value);
288 } else if (!strcmp(name, "fetch external entities")) {
289 ctxt->loadsubset = *((int *) value);
290 } else if (!strcmp(name, "substitute entities")) {
291 ctxt->replaceEntities = *((int *) value);
292 } else if (!strcmp(name, "gather line info")) {
293 ctxt->record_info = *((int *) value);
294 } else if (!strcmp(name, "user data")) {
295 ctxt->userData = *((void **)value);
296 } else if (!strcmp(name, "is html")) {
297 ctxt->html = *((int *) value);
298 } else if (!strcmp(name, "is standalone")) {
299 ctxt->standalone = *((int *) value);
300 } else if (!strcmp(name, "document")) {
301 ctxt->myDoc = *((xmlDocPtr *) value);
302 } else if (!strcmp(name, "is well formed")) {
303 ctxt->wellFormed = *((int *) value);
304 } else if (!strcmp(name, "is valid")) {
305 ctxt->valid = *((int *) value);
306 } else if (!strcmp(name, "SAX block")) {
307 ctxt->sax = *((xmlSAXHandlerPtr *) value);
308 } else if (!strcmp(name, "SAX function internalSubset")) {
309 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
310 } else if (!strcmp(name, "SAX function isStandalone")) {
311 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
312 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
313 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
314 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
315 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function resolveEntity")) {
317 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
318 } else if (!strcmp(name, "SAX function getEntity")) {
319 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
320 } else if (!strcmp(name, "SAX function entityDecl")) {
321 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function notationDecl")) {
323 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function attributeDecl")) {
325 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function elementDecl")) {
327 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
329 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
331 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function startDocument")) {
333 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function endDocument")) {
335 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function startElement")) {
337 ctxt->sax->startElement = *((startElementSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function endElement")) {
339 ctxt->sax->endElement = *((endElementSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function reference")) {
341 ctxt->sax->reference = *((referenceSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function characters")) {
343 ctxt->sax->characters = *((charactersSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
345 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function processingInstruction")) {
347 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function comment")) {
349 ctxt->sax->comment = *((commentSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function warning")) {
351 ctxt->sax->warning = *((warningSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function error")) {
353 ctxt->sax->error = *((errorSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function fatalError")) {
355 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function getParameterEntity")) {
357 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
358 } else if (!strcmp(name, "SAX function cdataBlock")) {
359 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function externalSubset")) {
361 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
362 } else {
363 return(-1);
364 }
365 return(0);
366}
367
368/************************************************************************
369 * *
370 * Some functions to avoid too large macros *
371 * *
372 ************************************************************************/
373
374/**
375 * xmlIsChar:
376 * @c: an unicode character (int)
377 *
378 * Check whether the character is allowed by the production
379 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
380 * | [#x10000-#x10FFFF]
381 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
382 * Also available as a macro IS_CHAR()
383 *
384 * Returns 0 if not, non-zero otherwise
385 */
386int
387xmlIsChar(int c) {
388 return(
389 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
390 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
391 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
392 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
393}
394
395/**
396 * xmlIsBlank:
397 * @c: an unicode character (int)
398 *
399 * Check whether the character is allowed by the production
400 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
401 * Also available as a macro IS_BLANK()
402 *
403 * Returns 0 if not, non-zero otherwise
404 */
405int
406xmlIsBlank(int c) {
407 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
408}
409
410/**
411 * xmlIsBaseChar:
412 * @c: an unicode character (int)
413 *
414 * Check whether the character is allowed by the production
415 * [85] BaseChar ::= ... long list see REC ...
416 *
417 * VI is your friend !
418 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
419 * and
420 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
421 *
422 * Returns 0 if not, non-zero otherwise
423 */
424static int xmlBaseArray[] = {
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
429 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
431 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
438 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
440 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
441};
442
443int
444xmlIsBaseChar(int c) {
445 return(
446 (((c) < 0x0100) ? xmlBaseArray[c] :
447 ( /* accelerator */
448 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
449 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
450 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
451 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
452 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
453 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
454 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
455 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
456 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
457 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
458 ((c) == 0x0386) ||
459 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
460 ((c) == 0x038C) ||
461 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
462 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
463 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
464 ((c) == 0x03DA) ||
465 ((c) == 0x03DC) ||
466 ((c) == 0x03DE) ||
467 ((c) == 0x03E0) ||
468 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
469 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
470 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
471 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
472 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
473 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
474 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
475 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
476 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
477 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
478 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
479 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
480 ((c) == 0x0559) ||
481 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
482 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
483 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
484 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
485 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
486 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
487 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
488 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
489 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
490 ((c) == 0x06D5) ||
491 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
492 (((c) >= 0x905) && ( /* accelerator */
493 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
494 ((c) == 0x093D) ||
495 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
496 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
497 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
498 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
499 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
500 ((c) == 0x09B2) ||
501 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
502 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
503 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
504 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
505 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
506 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
507 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
508 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
509 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
510 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
511 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
512 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
513 ((c) == 0x0A5E) ||
514 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
515 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
516 ((c) == 0x0A8D) ||
517 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
518 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
519 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
520 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
521 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
522 ((c) == 0x0ABD) ||
523 ((c) == 0x0AE0) ||
524 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
525 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
526 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
527 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
528 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
529 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
530 ((c) == 0x0B3D) ||
531 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
532 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
533 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
534 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
535 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
536 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
537 ((c) == 0x0B9C) ||
538 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
539 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
540 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
541 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
542 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
543 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
544 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
545 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
546 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
547 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
548 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
549 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
550 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
551 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
552 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
553 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
554 ((c) == 0x0CDE) ||
555 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
556 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
557 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
558 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
559 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
560 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
561 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
562 ((c) == 0x0E30) ||
563 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
564 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
565 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
566 ((c) == 0x0E84) ||
567 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
568 ((c) == 0x0E8A) ||
569 ((c) == 0x0E8D) ||
570 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
571 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
572 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
573 ((c) == 0x0EA5) ||
574 ((c) == 0x0EA7) ||
575 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
576 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
577 ((c) == 0x0EB0) ||
578 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
579 ((c) == 0x0EBD) ||
580 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
581 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
582 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
583 (((c) >= 0x10A0) && ( /* accelerator */
584 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
585 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
586 ((c) == 0x1100) ||
587 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
588 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
589 ((c) == 0x1109) ||
590 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
591 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
592 ((c) == 0x113C) ||
593 ((c) == 0x113E) ||
594 ((c) == 0x1140) ||
595 ((c) == 0x114C) ||
596 ((c) == 0x114E) ||
597 ((c) == 0x1150) ||
598 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
599 ((c) == 0x1159) ||
600 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
601 ((c) == 0x1163) ||
602 ((c) == 0x1165) ||
603 ((c) == 0x1167) ||
604 ((c) == 0x1169) ||
605 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
606 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
607 ((c) == 0x1175) ||
608 ((c) == 0x119E) ||
609 ((c) == 0x11A8) ||
610 ((c) == 0x11AB) ||
611 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
612 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
613 ((c) == 0x11BA) ||
614 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
615 ((c) == 0x11EB) ||
616 ((c) == 0x11F0) ||
617 ((c) == 0x11F9) ||
618 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
619 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
620 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
621 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
622 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
623 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
624 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
625 ((c) == 0x1F59) ||
626 ((c) == 0x1F5B) ||
627 ((c) == 0x1F5D) ||
628 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
629 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
630 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
631 ((c) == 0x1FBE) ||
632 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
633 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
634 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
635 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
636 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
637 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
638 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
639 ((c) == 0x2126) ||
640 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
641 ((c) == 0x212E) ||
642 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
643 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
644 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
645 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
646 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
647}
648
649/**
650 * xmlIsDigit:
651 * @c: an unicode character (int)
652 *
653 * Check whether the character is allowed by the production
654 * [88] Digit ::= ... long list see REC ...
655 *
656 * Returns 0 if not, non-zero otherwise
657 */
658int
659xmlIsDigit(int c) {
660 return(
661 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
662 (((c) >= 0x660) && ( /* accelerator */
663 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
664 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
665 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
666 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
667 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
668 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
669 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
670 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
671 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
672 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
673 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
674 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
675 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
676 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
677}
678
679/**
680 * xmlIsCombining:
681 * @c: an unicode character (int)
682 *
683 * Check whether the character is allowed by the production
684 * [87] CombiningChar ::= ... long list see REC ...
685 *
686 * Returns 0 if not, non-zero otherwise
687 */
688int
689xmlIsCombining(int c) {
690 return(
691 (((c) >= 0x300) && ( /* accelerator */
692 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
693 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
694 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
695 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
696 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
697 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
698 ((c) == 0x05BF) ||
699 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
700 ((c) == 0x05C4) ||
701 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
702 ((c) == 0x0670) ||
703 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
704 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
705 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
706 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
707 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
708 (((c) >= 0x0901) && ( /* accelerator */
709 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
710 ((c) == 0x093C) ||
711 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
712 ((c) == 0x094D) ||
713 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
714 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
715 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
716 ((c) == 0x09BC) ||
717 ((c) == 0x09BE) ||
718 ((c) == 0x09BF) ||
719 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
720 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
721 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
722 ((c) == 0x09D7) ||
723 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
724 (((c) >= 0x0A02) && ( /* accelerator */
725 ((c) == 0x0A02) ||
726 ((c) == 0x0A3C) ||
727 ((c) == 0x0A3E) ||
728 ((c) == 0x0A3F) ||
729 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
730 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
731 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
732 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
733 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
734 ((c) == 0x0ABC) ||
735 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
736 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
737 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
738 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
739 ((c) == 0x0B3C) ||
740 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
741 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
742 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
743 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
744 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
745 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
746 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
747 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
748 ((c) == 0x0BD7) ||
749 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
750 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
751 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
752 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
753 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
754 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
755 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
756 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
757 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
758 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
759 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
760 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
761 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
762 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
763 ((c) == 0x0D57) ||
764 (((c) >= 0x0E31) && ( /* accelerator */
765 ((c) == 0x0E31) ||
766 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
767 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
768 ((c) == 0x0EB1) ||
769 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
770 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
771 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
772 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
773 ((c) == 0x0F35) ||
774 ((c) == 0x0F37) ||
775 ((c) == 0x0F39) ||
776 ((c) == 0x0F3E) ||
777 ((c) == 0x0F3F) ||
778 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
779 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
780 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
781 ((c) == 0x0F97) ||
782 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
783 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
784 ((c) == 0x0FB9) ||
785 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
786 ((c) == 0x20E1) ||
787 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
788 ((c) == 0x3099) ||
789 ((c) == 0x309A))))))))));
790}
791
792/**
793 * xmlIsExtender:
794 * @c: an unicode character (int)
795 *
796 * Check whether the character is allowed by the production
797 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
798 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
799 * [#x309D-#x309E] | [#x30FC-#x30FE]
800 *
801 * Returns 0 if not, non-zero otherwise
802 */
803int
804xmlIsExtender(int c) {
805 switch (c) {
806 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
807 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
808 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
809 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
810 case 0x30FE:
811 return 1;
812 default:
813 return 0;
814 }
815}
816
817/**
818 * xmlIsIdeographic:
819 * @c: an unicode character (int)
820 *
821 * Check whether the character is allowed by the production
822 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
823 *
824 * Returns 0 if not, non-zero otherwise
825 */
826int
827xmlIsIdeographic(int c) {
828 return(((c) < 0x0100) ? 0 :
829 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
830 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
831 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
832 ((c) == 0x3007));
833}
834
835/**
836 * xmlIsLetter:
837 * @c: an unicode character (int)
838 *
839 * Check whether the character is allowed by the production
840 * [84] Letter ::= BaseChar | Ideographic
841 *
842 * Returns 0 if not, non-zero otherwise
843 */
844int
845xmlIsLetter(int c) {
846 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
847}
848
849/**
850 * xmlIsPubidChar:
851 * @c: an unicode character (int)
852 *
853 * Check whether the character is allowed by the production
854 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
855 *
856 * Returns 0 if not, non-zero otherwise
857 */
858int
859xmlIsPubidChar(int c) {
860 return(
861 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
862 (((c) >= 'a') && ((c) <= 'z')) ||
863 (((c) >= 'A') && ((c) <= 'Z')) ||
864 (((c) >= '0') && ((c) <= '9')) ||
865 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
866 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
867 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
868 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
869 ((c) == '$') || ((c) == '_') || ((c) == '%'));
870}
871
872/************************************************************************
873 * *
874 * Input handling functions for progressive parsing *
875 * *
876 ************************************************************************/
877
878/* #define DEBUG_INPUT */
879/* #define DEBUG_STACK */
880/* #define DEBUG_PUSH */
881
882
883/* we need to keep enough input to show errors in context */
884#define LINE_LEN 80
885
886#ifdef DEBUG_INPUT
887#define CHECK_BUFFER(in) check_buffer(in)
888
889void check_buffer(xmlParserInputPtr in) {
890 if (in->base != in->buf->buffer->content) {
891 xmlGenericError(xmlGenericErrorContext,
892 "xmlParserInput: base mismatch problem\n");
893 }
894 if (in->cur < in->base) {
895 xmlGenericError(xmlGenericErrorContext,
896 "xmlParserInput: cur < base problem\n");
897 }
898 if (in->cur > in->base + in->buf->buffer->use) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: cur > base + use problem\n");
901 }
902 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
903 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
904 in->buf->buffer->use, in->buf->buffer->size);
905}
906
907#else
908#define CHECK_BUFFER(in)
909#endif
910
911
912/**
913 * xmlParserInputRead:
914 * @in: an XML parser input
915 * @len: an indicative size for the lookahead
916 *
917 * This function refresh the input for the parser. It doesn't try to
918 * preserve pointers to the input buffer, and discard already read data
919 *
920 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
921 * end of this entity
922 */
923int
924xmlParserInputRead(xmlParserInputPtr in, int len) {
925 int ret;
926 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000927 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000928
929#ifdef DEBUG_INPUT
930 xmlGenericError(xmlGenericErrorContext, "Read\n");
931#endif
932 if (in->buf == NULL) return(-1);
933 if (in->base == NULL) return(-1);
934 if (in->cur == NULL) return(-1);
935 if (in->buf->buffer == NULL) return(-1);
936 if (in->buf->readcallback == NULL) return(-1);
937
938 CHECK_BUFFER(in);
939
940 used = in->cur - in->buf->buffer->content;
941 ret = xmlBufferShrink(in->buf->buffer, used);
942 if (ret > 0) {
943 in->cur -= ret;
944 in->consumed += ret;
945 }
946 ret = xmlParserInputBufferRead(in->buf, len);
947 if (in->base != in->buf->buffer->content) {
948 /*
949 * the buffer has been realloced
950 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000951 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000952 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000953 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000954 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000955 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000956
957 CHECK_BUFFER(in);
958
959 return(ret);
960}
961
962/**
963 * xmlParserInputGrow:
964 * @in: an XML parser input
965 * @len: an indicative size for the lookahead
966 *
967 * This function increase the input for the parser. It tries to
968 * preserve pointers to the input buffer, and keep already read data
969 *
970 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
971 * end of this entity
972 */
973int
974xmlParserInputGrow(xmlParserInputPtr in, int len) {
975 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000976 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000977
978#ifdef DEBUG_INPUT
979 xmlGenericError(xmlGenericErrorContext, "Grow\n");
980#endif
981 if (in->buf == NULL) return(-1);
982 if (in->base == NULL) return(-1);
983 if (in->cur == NULL) return(-1);
984 if (in->buf->buffer == NULL) return(-1);
985
986 CHECK_BUFFER(in);
987
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000988 indx = in->cur - in->base;
989 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000990
991 CHECK_BUFFER(in);
992
993 return(0);
994 }
995 if (in->buf->readcallback != NULL)
996 ret = xmlParserInputBufferGrow(in->buf, len);
997 else
998 return(0);
999
1000 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001001 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001002 * block, but we use it really as an integer to do some
1003 * pointer arithmetic. Insure will raise it as a bug but in
1004 * that specific case, that's not !
1005 */
1006 if (in->base != in->buf->buffer->content) {
1007 /*
1008 * the buffer has been realloced
1009 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001010 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001011 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001012 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001014 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001015
1016 CHECK_BUFFER(in);
1017
1018 return(ret);
1019}
1020
1021/**
1022 * xmlParserInputShrink:
1023 * @in: an XML parser input
1024 *
1025 * This function removes used input for the parser.
1026 */
1027void
1028xmlParserInputShrink(xmlParserInputPtr in) {
1029 int used;
1030 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001031 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001032
1033#ifdef DEBUG_INPUT
1034 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1035#endif
1036 if (in->buf == NULL) return;
1037 if (in->base == NULL) return;
1038 if (in->cur == NULL) return;
1039 if (in->buf->buffer == NULL) return;
1040
1041 CHECK_BUFFER(in);
1042
1043 used = in->cur - in->buf->buffer->content;
1044 /*
1045 * Do not shrink on large buffers whose only a tiny fraction
1046 * was consumned
1047 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001048 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001049 return;
1050 if (used > INPUT_CHUNK) {
1051 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1052 if (ret > 0) {
1053 in->cur -= ret;
1054 in->consumed += ret;
1055 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001056 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001057 }
1058
1059 CHECK_BUFFER(in);
1060
1061 if (in->buf->buffer->use > INPUT_CHUNK) {
1062 return;
1063 }
1064 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1065 if (in->base != in->buf->buffer->content) {
1066 /*
1067 * the buffer has been realloced
1068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001071 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001072 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001073 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001074
1075 CHECK_BUFFER(in);
1076}
1077
1078/************************************************************************
1079 * *
1080 * UTF8 character input and related functions *
1081 * *
1082 ************************************************************************/
1083
1084/**
1085 * xmlNextChar:
1086 * @ctxt: the XML parser context
1087 *
1088 * Skip to the next char input char.
1089 */
1090
1091void
1092xmlNextChar(xmlParserCtxtPtr ctxt) {
1093 if (ctxt->instate == XML_PARSER_EOF)
1094 return;
1095
1096 /*
1097 * 2.11 End-of-Line Handling
1098 * the literal two-character sequence "#xD#xA" or a standalone
1099 * literal #xD, an XML processor must pass to the application
1100 * the single character #xA.
1101 */
1102 if (ctxt->token != 0) ctxt->token = 0;
1103 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1104 if ((*ctxt->input->cur == 0) &&
1105 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1106 (ctxt->instate != XML_PARSER_COMMENT)) {
1107 /*
1108 * If we are at the end of the current entity and
1109 * the context allows it, we pop consumed entities
1110 * automatically.
1111 * the auto closing should be blocked in other cases
1112 */
1113 xmlPopInput(ctxt);
1114 } else {
1115 if (*(ctxt->input->cur) == '\n') {
1116 ctxt->input->line++; ctxt->input->col = 1;
1117 } else ctxt->input->col++;
1118 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1119 /*
1120 * We are supposed to handle UTF8, check it's valid
1121 * From rfc2044: encoding of the Unicode values on UTF-8:
1122 *
1123 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1124 * 0000 0000-0000 007F 0xxxxxxx
1125 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1126 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1127 *
1128 * Check for the 0x110000 limit too
1129 */
1130 const unsigned char *cur = ctxt->input->cur;
1131 unsigned char c;
1132
1133 c = *cur;
1134 if (c & 0x80) {
1135 if (cur[1] == 0)
1136 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1137 if ((cur[1] & 0xc0) != 0x80)
1138 goto encoding_error;
1139 if ((c & 0xe0) == 0xe0) {
1140 unsigned int val;
1141
1142 if (cur[2] == 0)
1143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1144 if ((cur[2] & 0xc0) != 0x80)
1145 goto encoding_error;
1146 if ((c & 0xf0) == 0xf0) {
1147 if (cur[3] == 0)
1148 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1149 if (((c & 0xf8) != 0xf0) ||
1150 ((cur[3] & 0xc0) != 0x80))
1151 goto encoding_error;
1152 /* 4-byte code */
1153 ctxt->input->cur += 4;
1154 val = (cur[0] & 0x7) << 18;
1155 val |= (cur[1] & 0x3f) << 12;
1156 val |= (cur[2] & 0x3f) << 6;
1157 val |= cur[3] & 0x3f;
1158 } else {
1159 /* 3-byte code */
1160 ctxt->input->cur += 3;
1161 val = (cur[0] & 0xf) << 12;
1162 val |= (cur[1] & 0x3f) << 6;
1163 val |= cur[2] & 0x3f;
1164 }
1165 if (((val > 0xd7ff) && (val < 0xe000)) ||
1166 ((val > 0xfffd) && (val < 0x10000)) ||
1167 (val >= 0x110000)) {
1168 if ((ctxt->sax != NULL) &&
1169 (ctxt->sax->error != NULL))
1170 ctxt->sax->error(ctxt->userData,
1171 "Char 0x%X out of allowed range\n", val);
1172 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1173 ctxt->wellFormed = 0;
1174 ctxt->disableSAX = 1;
1175 }
1176 } else
1177 /* 2-byte code */
1178 ctxt->input->cur += 2;
1179 } else
1180 /* 1-byte code */
1181 ctxt->input->cur++;
1182 } else {
1183 /*
1184 * Assume it's a fixed lenght encoding (1) with
1185 * a compatibke encoding for the ASCII set, since
1186 * XML constructs only use < 128 chars
1187 */
1188 ctxt->input->cur++;
1189 }
1190 ctxt->nbChars++;
1191 if (*ctxt->input->cur == 0)
1192 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1193 }
1194 } else {
1195 ctxt->input->cur++;
1196 ctxt->nbChars++;
1197 if (*ctxt->input->cur == 0)
1198 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1199 }
1200 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1201 xmlParserHandlePEReference(ctxt);
1202 if ((*ctxt->input->cur == 0) &&
1203 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1204 xmlPopInput(ctxt);
1205 return;
1206encoding_error:
1207 /*
1208 * If we detect an UTF8 error that probably mean that the
1209 * input encoding didn't get properly advertized in the
1210 * declaration header. Report the error and switch the encoding
1211 * to ISO-Latin-1 (if you don't like this policy, just declare the
1212 * encoding !)
1213 */
1214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1215 ctxt->sax->error(ctxt->userData,
1216 "Input is not proper UTF-8, indicate encoding !\n");
1217 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1218 ctxt->input->cur[0], ctxt->input->cur[1],
1219 ctxt->input->cur[2], ctxt->input->cur[3]);
1220 }
1221 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1222
1223 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1224 ctxt->input->cur++;
1225 return;
1226}
1227
1228/**
1229 * xmlCurrentChar:
1230 * @ctxt: the XML parser context
1231 * @len: pointer to the length of the char read
1232 *
1233 * The current char value, if using UTF-8 this may actaully span multiple
1234 * bytes in the input buffer. Implement the end of line normalization:
1235 * 2.11 End-of-Line Handling
1236 * Wherever an external parsed entity or the literal entity value
1237 * of an internal parsed entity contains either the literal two-character
1238 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1239 * must pass to the application the single character #xA.
1240 * This behavior can conveniently be produced by normalizing all
1241 * line breaks to #xA on input, before parsing.)
1242 *
1243 * Returns the current char value and its lenght
1244 */
1245
1246int
1247xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1248 if (ctxt->instate == XML_PARSER_EOF)
1249 return(0);
1250
1251 if (ctxt->token != 0) {
1252 *len = 0;
1253 return(ctxt->token);
1254 }
1255 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1256 *len = 1;
1257 return((int) *ctxt->input->cur);
1258 }
1259 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1260 /*
1261 * We are supposed to handle UTF8, check it's valid
1262 * From rfc2044: encoding of the Unicode values on UTF-8:
1263 *
1264 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1265 * 0000 0000-0000 007F 0xxxxxxx
1266 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1267 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1268 *
1269 * Check for the 0x110000 limit too
1270 */
1271 const unsigned char *cur = ctxt->input->cur;
1272 unsigned char c;
1273 unsigned int val;
1274
1275 c = *cur;
1276 if (c & 0x80) {
1277 if (cur[1] == 0)
1278 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1279 if ((cur[1] & 0xc0) != 0x80)
1280 goto encoding_error;
1281 if ((c & 0xe0) == 0xe0) {
1282
1283 if (cur[2] == 0)
1284 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1285 if ((cur[2] & 0xc0) != 0x80)
1286 goto encoding_error;
1287 if ((c & 0xf0) == 0xf0) {
1288 if (cur[3] == 0)
1289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1290 if (((c & 0xf8) != 0xf0) ||
1291 ((cur[3] & 0xc0) != 0x80))
1292 goto encoding_error;
1293 /* 4-byte code */
1294 *len = 4;
1295 val = (cur[0] & 0x7) << 18;
1296 val |= (cur[1] & 0x3f) << 12;
1297 val |= (cur[2] & 0x3f) << 6;
1298 val |= cur[3] & 0x3f;
1299 } else {
1300 /* 3-byte code */
1301 *len = 3;
1302 val = (cur[0] & 0xf) << 12;
1303 val |= (cur[1] & 0x3f) << 6;
1304 val |= cur[2] & 0x3f;
1305 }
1306 } else {
1307 /* 2-byte code */
1308 *len = 2;
1309 val = (cur[0] & 0x1f) << 6;
1310 val |= cur[1] & 0x3f;
1311 }
1312 if (!IS_CHAR(val)) {
1313 if ((ctxt->sax != NULL) &&
1314 (ctxt->sax->error != NULL))
1315 ctxt->sax->error(ctxt->userData,
1316 "Char 0x%X out of allowed range\n", val);
1317 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1318 ctxt->wellFormed = 0;
1319 ctxt->disableSAX = 1;
1320 }
1321 return(val);
1322 } else {
1323 /* 1-byte code */
1324 *len = 1;
1325 if (*ctxt->input->cur == 0xD) {
1326 if (ctxt->input->cur[1] == 0xA) {
1327 ctxt->nbChars++;
1328 ctxt->input->cur++;
1329 }
1330 return(0xA);
1331 }
1332 return((int) *ctxt->input->cur);
1333 }
1334 }
1335 /*
1336 * Assume it's a fixed lenght encoding (1) with
1337 * a compatibke encoding for the ASCII set, since
1338 * XML constructs only use < 128 chars
1339 */
1340 *len = 1;
1341 if (*ctxt->input->cur == 0xD) {
1342 if (ctxt->input->cur[1] == 0xA) {
1343 ctxt->nbChars++;
1344 ctxt->input->cur++;
1345 }
1346 return(0xA);
1347 }
1348 return((int) *ctxt->input->cur);
1349encoding_error:
1350 /*
1351 * If we detect an UTF8 error that probably mean that the
1352 * input encoding didn't get properly advertized in the
1353 * declaration header. Report the error and switch the encoding
1354 * to ISO-Latin-1 (if you don't like this policy, just declare the
1355 * encoding !)
1356 */
1357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1358 ctxt->sax->error(ctxt->userData,
1359 "Input is not proper UTF-8, indicate encoding !\n");
1360 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1361 ctxt->input->cur[0], ctxt->input->cur[1],
1362 ctxt->input->cur[2], ctxt->input->cur[3]);
1363 }
1364 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1365
1366 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1367 *len = 1;
1368 return((int) *ctxt->input->cur);
1369}
1370
1371/**
1372 * xmlStringCurrentChar:
1373 * @ctxt: the XML parser context
1374 * @cur: pointer to the beginning of the char
1375 * @len: pointer to the length of the char read
1376 *
1377 * The current char value, if using UTF-8 this may actaully span multiple
1378 * bytes in the input buffer.
1379 *
1380 * Returns the current char value and its lenght
1381 */
1382
1383int
1384xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001385 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001386 /*
1387 * We are supposed to handle UTF8, check it's valid
1388 * From rfc2044: encoding of the Unicode values on UTF-8:
1389 *
1390 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1391 * 0000 0000-0000 007F 0xxxxxxx
1392 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1393 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1394 *
1395 * Check for the 0x110000 limit too
1396 */
1397 unsigned char c;
1398 unsigned int val;
1399
1400 c = *cur;
1401 if (c & 0x80) {
1402 if ((cur[1] & 0xc0) != 0x80)
1403 goto encoding_error;
1404 if ((c & 0xe0) == 0xe0) {
1405
1406 if ((cur[2] & 0xc0) != 0x80)
1407 goto encoding_error;
1408 if ((c & 0xf0) == 0xf0) {
1409 if (((c & 0xf8) != 0xf0) ||
1410 ((cur[3] & 0xc0) != 0x80))
1411 goto encoding_error;
1412 /* 4-byte code */
1413 *len = 4;
1414 val = (cur[0] & 0x7) << 18;
1415 val |= (cur[1] & 0x3f) << 12;
1416 val |= (cur[2] & 0x3f) << 6;
1417 val |= cur[3] & 0x3f;
1418 } else {
1419 /* 3-byte code */
1420 *len = 3;
1421 val = (cur[0] & 0xf) << 12;
1422 val |= (cur[1] & 0x3f) << 6;
1423 val |= cur[2] & 0x3f;
1424 }
1425 } else {
1426 /* 2-byte code */
1427 *len = 2;
1428 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001429 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001430 }
1431 if (!IS_CHAR(val)) {
1432 if ((ctxt->sax != NULL) &&
1433 (ctxt->sax->error != NULL))
1434 ctxt->sax->error(ctxt->userData,
1435 "Char 0x%X out of allowed range\n", val);
1436 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1437 ctxt->wellFormed = 0;
1438 ctxt->disableSAX = 1;
1439 }
1440 return(val);
1441 } else {
1442 /* 1-byte code */
1443 *len = 1;
1444 return((int) *cur);
1445 }
1446 }
1447 /*
1448 * Assume it's a fixed lenght encoding (1) with
1449 * a compatibke encoding for the ASCII set, since
1450 * XML constructs only use < 128 chars
1451 */
1452 *len = 1;
1453 return((int) *cur);
1454encoding_error:
1455 /*
1456 * If we detect an UTF8 error that probably mean that the
1457 * input encoding didn't get properly advertized in the
1458 * declaration header. Report the error and switch the encoding
1459 * to ISO-Latin-1 (if you don't like this policy, just declare the
1460 * encoding !)
1461 */
1462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1463 ctxt->sax->error(ctxt->userData,
1464 "Input is not proper UTF-8, indicate encoding !\n");
1465 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1466 ctxt->input->cur[0], ctxt->input->cur[1],
1467 ctxt->input->cur[2], ctxt->input->cur[3]);
1468 }
1469 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1470
1471 *len = 1;
1472 return((int) *cur);
1473}
1474
1475/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001476 * xmlCopyCharMultiByte:
1477 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001478 * @val: the char value
1479 *
1480 * append the char value in the array
1481 *
1482 * Returns the number of xmlChar written
1483 */
Owen Taylor3473f882001-02-23 17:55:21 +00001484int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001485xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001486 /*
1487 * We are supposed to handle UTF8, check it's valid
1488 * From rfc2044: encoding of the Unicode values on UTF-8:
1489 *
1490 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1491 * 0000 0000-0000 007F 0xxxxxxx
1492 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1493 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495 if (val >= 0x80) {
1496 xmlChar *savedout = out;
1497 int bits;
1498 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1499 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1500 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1501 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001502 xmlGenericError(xmlGenericErrorContext,
1503 "Internal error, xmlCopyChar 0x%X out of bound\n",
1504 val);
1505 return(0);
1506 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507 for ( ; bits >= 0; bits-= 6)
1508 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1509 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001510 }
1511 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001512 return 1;
1513}
1514
1515/**
1516 * xmlCopyChar:
1517 * @len: Ignored, compatibility
1518 * @out: pointer to an arry of xmlChar
1519 * @val: the char value
1520 *
1521 * append the char value in the array
1522 *
1523 * Returns the number of xmlChar written
1524 */
1525
1526int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001527xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 /* the len parameter is ignored */
1529 if (val >= 0x80) {
1530 return(xmlCopyCharMultiByte (out, val));
1531 }
1532 *out = (xmlChar) val;
1533 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001534}
1535
1536/************************************************************************
1537 * *
1538 * Commodity functions to switch encodings *
1539 * *
1540 ************************************************************************/
1541
1542/**
1543 * xmlSwitchEncoding:
1544 * @ctxt: the parser context
1545 * @enc: the encoding value (number)
1546 *
1547 * change the input functions when discovering the character encoding
1548 * of a given entity.
1549 *
1550 * Returns 0 in case of success, -1 otherwise
1551 */
1552int
1553xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1554{
1555 xmlCharEncodingHandlerPtr handler;
1556
1557 switch (enc) {
1558 case XML_CHAR_ENCODING_ERROR:
1559 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1562 ctxt->wellFormed = 0;
1563 ctxt->disableSAX = 1;
1564 break;
1565 case XML_CHAR_ENCODING_NONE:
1566 /* let's assume it's UTF-8 without the XML decl */
1567 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1568 return(0);
1569 case XML_CHAR_ENCODING_UTF8:
1570 /* default encoding, no conversion should be needed */
1571 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1572 return(0);
1573 default:
1574 break;
1575 }
1576 handler = xmlGetCharEncodingHandler(enc);
1577 if (handler == NULL) {
1578 /*
1579 * Default handlers.
1580 */
1581 switch (enc) {
1582 case XML_CHAR_ENCODING_ERROR:
1583 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1585 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1586 ctxt->wellFormed = 0;
1587 ctxt->disableSAX = 1;
1588 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1589 break;
1590 case XML_CHAR_ENCODING_NONE:
1591 /* let's assume it's UTF-8 without the XML decl */
1592 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1593 return(0);
1594 case XML_CHAR_ENCODING_UTF8:
1595 case XML_CHAR_ENCODING_ASCII:
1596 /* default encoding, no conversion should be needed */
1597 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1598 return(0);
1599 case XML_CHAR_ENCODING_UTF16LE:
1600 break;
1601 case XML_CHAR_ENCODING_UTF16BE:
1602 break;
1603 case XML_CHAR_ENCODING_UCS4LE:
1604 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1606 ctxt->sax->error(ctxt->userData,
1607 "char encoding USC4 little endian not supported\n");
1608 break;
1609 case XML_CHAR_ENCODING_UCS4BE:
1610 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1612 ctxt->sax->error(ctxt->userData,
1613 "char encoding USC4 big endian not supported\n");
1614 break;
1615 case XML_CHAR_ENCODING_EBCDIC:
1616 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1618 ctxt->sax->error(ctxt->userData,
1619 "char encoding EBCDIC not supported\n");
1620 break;
1621 case XML_CHAR_ENCODING_UCS4_2143:
1622 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624 ctxt->sax->error(ctxt->userData,
1625 "char encoding UCS4 2143 not supported\n");
1626 break;
1627 case XML_CHAR_ENCODING_UCS4_3412:
1628 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1630 ctxt->sax->error(ctxt->userData,
1631 "char encoding UCS4 3412 not supported\n");
1632 break;
1633 case XML_CHAR_ENCODING_UCS2:
1634 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636 ctxt->sax->error(ctxt->userData,
1637 "char encoding UCS2 not supported\n");
1638 break;
1639 case XML_CHAR_ENCODING_8859_1:
1640 case XML_CHAR_ENCODING_8859_2:
1641 case XML_CHAR_ENCODING_8859_3:
1642 case XML_CHAR_ENCODING_8859_4:
1643 case XML_CHAR_ENCODING_8859_5:
1644 case XML_CHAR_ENCODING_8859_6:
1645 case XML_CHAR_ENCODING_8859_7:
1646 case XML_CHAR_ENCODING_8859_8:
1647 case XML_CHAR_ENCODING_8859_9:
1648 /*
1649 * We used to keep the internal content in the
1650 * document encoding however this turns being unmaintainable
1651 * So xmlGetCharEncodingHandler() will return non-null
1652 * values for this now.
1653 */
1654 if ((ctxt->inputNr == 1) &&
1655 (ctxt->encoding == NULL) &&
1656 (ctxt->input->encoding != NULL)) {
1657 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1658 }
1659 ctxt->charset = enc;
1660 return(0);
1661 case XML_CHAR_ENCODING_2022_JP:
1662 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664 ctxt->sax->error(ctxt->userData,
1665 "char encoding ISO-2022-JPnot supported\n");
1666 break;
1667 case XML_CHAR_ENCODING_SHIFT_JIS:
1668 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1670 ctxt->sax->error(ctxt->userData,
1671 "char encoding Shift_JIS not supported\n");
1672 break;
1673 case XML_CHAR_ENCODING_EUC_JP:
1674 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1676 ctxt->sax->error(ctxt->userData,
1677 "char encoding EUC-JPnot supported\n");
1678 break;
1679 }
1680 }
1681 if (handler == NULL)
1682 return(-1);
1683 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1684 return(xmlSwitchToEncoding(ctxt, handler));
1685}
1686
1687/**
1688 * xmlSwitchToEncoding:
1689 * @ctxt: the parser context
1690 * @handler: the encoding handler
1691 *
1692 * change the input functions when discovering the character encoding
1693 * of a given entity.
1694 *
1695 * Returns 0 in case of success, -1 otherwise
1696 */
1697int
1698xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1699{
1700 int nbchars;
1701
1702 if (handler != NULL) {
1703 if (ctxt->input != NULL) {
1704 if (ctxt->input->buf != NULL) {
1705 if (ctxt->input->buf->encoder != NULL) {
1706 if (ctxt->input->buf->encoder == handler)
1707 return(0);
1708 /*
1709 * Note: this is a bit dangerous, but that's what it
1710 * takes to use nearly compatible signature for different
1711 * encodings.
1712 */
1713 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1714 ctxt->input->buf->encoder = handler;
1715 return(0);
1716 }
1717 ctxt->input->buf->encoder = handler;
1718
1719 /*
1720 * Is there already some content down the pipe to convert ?
1721 */
1722 if ((ctxt->input->buf->buffer != NULL) &&
1723 (ctxt->input->buf->buffer->use > 0)) {
1724 int processed;
1725
1726 /*
1727 * Specific handling of the Byte Order Mark for
1728 * UTF-16
1729 */
1730 if ((handler->name != NULL) &&
1731 (!strcmp(handler->name, "UTF-16LE")) &&
1732 (ctxt->input->cur[0] == 0xFF) &&
1733 (ctxt->input->cur[1] == 0xFE)) {
1734 ctxt->input->cur += 2;
1735 }
1736 if ((handler->name != NULL) &&
1737 (!strcmp(handler->name, "UTF-16BE")) &&
1738 (ctxt->input->cur[0] == 0xFE) &&
1739 (ctxt->input->cur[1] == 0xFF)) {
1740 ctxt->input->cur += 2;
1741 }
1742
1743 /*
1744 * Shring the current input buffer.
1745 * Move it as the raw buffer and create a new input buffer
1746 */
1747 processed = ctxt->input->cur - ctxt->input->base;
1748 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1749 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1750 ctxt->input->buf->buffer = xmlBufferCreate();
1751
1752 if (ctxt->html) {
1753 /*
1754 * converst as much as possbile of the buffer
1755 */
1756 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1757 ctxt->input->buf->buffer,
1758 ctxt->input->buf->raw);
1759 } else {
1760 /*
1761 * convert just enough to get
1762 * '<?xml version="1.0" encoding="xxx"?>'
1763 * parsed with the autodetected encoding
1764 * into the parser reading buffer.
1765 */
1766 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1767 ctxt->input->buf->buffer,
1768 ctxt->input->buf->raw);
1769 }
1770 if (nbchars < 0) {
1771 xmlGenericError(xmlGenericErrorContext,
1772 "xmlSwitchToEncoding: encoder error\n");
1773 return(-1);
1774 }
1775 ctxt->input->base =
1776 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001777 ctxt->input->end =
1778 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001779
1780 }
1781 return(0);
1782 } else {
1783 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1784 /*
1785 * When parsing a static memory array one must know the
1786 * size to be able to convert the buffer.
1787 */
1788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1789 ctxt->sax->error(ctxt->userData,
1790 "xmlSwitchEncoding : no input\n");
1791 return(-1);
1792 } else {
1793 int processed;
1794
1795 /*
1796 * Shring the current input buffer.
1797 * Move it as the raw buffer and create a new input buffer
1798 */
1799 processed = ctxt->input->cur - ctxt->input->base;
1800
1801 ctxt->input->buf->raw = xmlBufferCreate();
1802 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1803 ctxt->input->length - processed);
1804 ctxt->input->buf->buffer = xmlBufferCreate();
1805
1806 /*
1807 * convert as much as possible of the raw input
1808 * to the parser reading buffer.
1809 */
1810 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1811 ctxt->input->buf->buffer,
1812 ctxt->input->buf->raw);
1813 if (nbchars < 0) {
1814 xmlGenericError(xmlGenericErrorContext,
1815 "xmlSwitchToEncoding: encoder error\n");
1816 return(-1);
1817 }
1818
1819 /*
1820 * Conversion succeeded, get rid of the old buffer
1821 */
1822 if ((ctxt->input->free != NULL) &&
1823 (ctxt->input->base != NULL))
1824 ctxt->input->free((xmlChar *) ctxt->input->base);
1825 ctxt->input->base =
1826 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001827 ctxt->input->end =
1828 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001829 }
1830 }
1831 } else {
1832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1833 ctxt->sax->error(ctxt->userData,
1834 "xmlSwitchEncoding : no input\n");
1835 return(-1);
1836 }
1837 /*
1838 * The parsing is now done in UTF8 natively
1839 */
1840 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1841 } else
1842 return(-1);
1843 return(0);
1844
1845}
1846
1847/************************************************************************
1848 * *
1849 * Commodity functions to handle entities processing *
1850 * *
1851 ************************************************************************/
1852
1853/**
1854 * xmlFreeInputStream:
1855 * @input: an xmlParserInputPtr
1856 *
1857 * Free up an input stream.
1858 */
1859void
1860xmlFreeInputStream(xmlParserInputPtr input) {
1861 if (input == NULL) return;
1862
1863 if (input->filename != NULL) xmlFree((char *) input->filename);
1864 if (input->directory != NULL) xmlFree((char *) input->directory);
1865 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1866 if (input->version != NULL) xmlFree((char *) input->version);
1867 if ((input->free != NULL) && (input->base != NULL))
1868 input->free((xmlChar *) input->base);
1869 if (input->buf != NULL)
1870 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001871 xmlFree(input);
1872}
1873
1874/**
1875 * xmlNewInputStream:
1876 * @ctxt: an XML parser context
1877 *
1878 * Create a new input stream structure
1879 * Returns the new input stream or NULL
1880 */
1881xmlParserInputPtr
1882xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1883 xmlParserInputPtr input;
1884
1885 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1886 if (input == NULL) {
1887 if (ctxt != NULL) {
1888 ctxt->errNo = XML_ERR_NO_MEMORY;
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "malloc: couldn't allocate a new input stream\n");
1892 ctxt->errNo = XML_ERR_NO_MEMORY;
1893 }
1894 return(NULL);
1895 }
1896 memset(input, 0, sizeof(xmlParserInput));
1897 input->line = 1;
1898 input->col = 1;
1899 input->standalone = -1;
1900 return(input);
1901}
1902
1903/**
1904 * xmlNewIOInputStream:
1905 * @ctxt: an XML parser context
1906 * @input: an I/O Input
1907 * @enc: the charset encoding if known
1908 *
1909 * Create a new input stream structure encapsulating the @input into
1910 * a stream suitable for the parser.
1911 *
1912 * Returns the new input stream or NULL
1913 */
1914xmlParserInputPtr
1915xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1916 xmlCharEncoding enc) {
1917 xmlParserInputPtr inputStream;
1918
1919 if (xmlParserDebugEntities)
1920 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1921 inputStream = xmlNewInputStream(ctxt);
1922 if (inputStream == NULL) {
1923 return(NULL);
1924 }
1925 inputStream->filename = NULL;
1926 inputStream->buf = input;
1927 inputStream->base = inputStream->buf->buffer->content;
1928 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001929 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001930 if (enc != XML_CHAR_ENCODING_NONE) {
1931 xmlSwitchEncoding(ctxt, enc);
1932 }
1933
1934 return(inputStream);
1935}
1936
1937/**
1938 * xmlNewEntityInputStream:
1939 * @ctxt: an XML parser context
1940 * @entity: an Entity pointer
1941 *
1942 * Create a new input stream based on an xmlEntityPtr
1943 *
1944 * Returns the new input stream or NULL
1945 */
1946xmlParserInputPtr
1947xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1948 xmlParserInputPtr input;
1949
1950 if (entity == NULL) {
1951 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData,
1954 "internal: xmlNewEntityInputStream entity = NULL\n");
1955 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1956 return(NULL);
1957 }
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext,
1960 "new input from entity: %s\n", entity->name);
1961 if (entity->content == NULL) {
1962 switch (entity->etype) {
1963 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1964 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1966 ctxt->sax->error(ctxt->userData,
1967 "xmlNewEntityInputStream unparsed entity !\n");
1968 break;
1969 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1970 case XML_EXTERNAL_PARAMETER_ENTITY:
1971 return(xmlLoadExternalEntity((char *) entity->URI,
1972 (char *) entity->ExternalID, ctxt));
1973 case XML_INTERNAL_GENERAL_ENTITY:
1974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975 ctxt->sax->error(ctxt->userData,
1976 "Internal entity %s without content !\n", entity->name);
1977 break;
1978 case XML_INTERNAL_PARAMETER_ENTITY:
1979 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981 ctxt->sax->error(ctxt->userData,
1982 "Internal parameter entity %s without content !\n", entity->name);
1983 break;
1984 case XML_INTERNAL_PREDEFINED_ENTITY:
1985 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "Predefined entity %s without content !\n", entity->name);
1989 break;
1990 }
1991 return(NULL);
1992 }
1993 input = xmlNewInputStream(ctxt);
1994 if (input == NULL) {
1995 return(NULL);
1996 }
1997 input->filename = (char *) entity->URI;
1998 input->base = entity->content;
1999 input->cur = entity->content;
2000 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002001 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002002 return(input);
2003}
2004
2005/**
2006 * xmlNewStringInputStream:
2007 * @ctxt: an XML parser context
2008 * @buffer: an memory buffer
2009 *
2010 * Create a new input stream based on a memory buffer.
2011 * Returns the new input stream
2012 */
2013xmlParserInputPtr
2014xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2015 xmlParserInputPtr input;
2016
2017 if (buffer == NULL) {
2018 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData,
2021 "internal: xmlNewStringInputStream string = NULL\n");
2022 return(NULL);
2023 }
2024 if (xmlParserDebugEntities)
2025 xmlGenericError(xmlGenericErrorContext,
2026 "new fixed input: %.30s\n", buffer);
2027 input = xmlNewInputStream(ctxt);
2028 if (input == NULL) {
2029 return(NULL);
2030 }
2031 input->base = buffer;
2032 input->cur = buffer;
2033 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002034 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002035 return(input);
2036}
2037
2038/**
2039 * xmlNewInputFromFile:
2040 * @ctxt: an XML parser context
2041 * @filename: the filename to use as entity
2042 *
2043 * Create a new input stream based on a file.
2044 *
2045 * Returns the new input stream or NULL in case of error
2046 */
2047xmlParserInputPtr
2048xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2049 xmlParserInputBufferPtr buf;
2050 xmlParserInputPtr inputStream;
2051 char *directory = NULL;
2052 xmlChar *URI = NULL;
2053
2054 if (xmlParserDebugEntities)
2055 xmlGenericError(xmlGenericErrorContext,
2056 "new input from file: %s\n", filename);
2057 if (ctxt == NULL) return(NULL);
2058 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2059 if (buf == NULL)
2060 return(NULL);
2061
2062 URI = xmlStrdup((xmlChar *) filename);
2063 directory = xmlParserGetDirectory((const char *) URI);
2064
2065 inputStream = xmlNewInputStream(ctxt);
2066 if (inputStream == NULL) {
2067 if (directory != NULL) xmlFree((char *) directory);
2068 if (URI != NULL) xmlFree((char *) URI);
2069 return(NULL);
2070 }
2071
2072 inputStream->filename = (const char *) URI;
2073 inputStream->directory = directory;
2074 inputStream->buf = buf;
2075
2076 inputStream->base = inputStream->buf->buffer->content;
2077 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002078 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002079 if ((ctxt->directory == NULL) && (directory != NULL))
2080 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2081 return(inputStream);
2082}
2083
2084/************************************************************************
2085 * *
2086 * Commodity functions to handle parser contexts *
2087 * *
2088 ************************************************************************/
2089
2090/**
2091 * xmlInitParserCtxt:
2092 * @ctxt: an XML parser context
2093 *
2094 * Initialize a parser context
2095 */
2096
2097void
2098xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2099{
2100 xmlSAXHandler *sax;
2101
2102 xmlDefaultSAXHandlerInit();
2103
2104 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2105 if (sax == NULL) {
2106 xmlGenericError(xmlGenericErrorContext,
2107 "xmlInitParserCtxt: out of memory\n");
2108 }
2109 else
2110 memset(sax, 0, sizeof(xmlSAXHandler));
2111
2112 /* Allocate the Input stack */
2113 ctxt->inputTab = (xmlParserInputPtr *)
2114 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2115 if (ctxt->inputTab == NULL) {
2116 xmlGenericError(xmlGenericErrorContext,
2117 "xmlInitParserCtxt: out of memory\n");
2118 ctxt->inputNr = 0;
2119 ctxt->inputMax = 0;
2120 ctxt->input = NULL;
2121 return;
2122 }
2123 ctxt->inputNr = 0;
2124 ctxt->inputMax = 5;
2125 ctxt->input = NULL;
2126
2127 ctxt->version = NULL;
2128 ctxt->encoding = NULL;
2129 ctxt->standalone = -1;
2130 ctxt->hasExternalSubset = 0;
2131 ctxt->hasPErefs = 0;
2132 ctxt->html = 0;
2133 ctxt->external = 0;
2134 ctxt->instate = XML_PARSER_START;
2135 ctxt->token = 0;
2136 ctxt->directory = NULL;
2137
2138 /* Allocate the Node stack */
2139 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2140 if (ctxt->nodeTab == NULL) {
2141 xmlGenericError(xmlGenericErrorContext,
2142 "xmlInitParserCtxt: out of memory\n");
2143 ctxt->nodeNr = 0;
2144 ctxt->nodeMax = 0;
2145 ctxt->node = NULL;
2146 ctxt->inputNr = 0;
2147 ctxt->inputMax = 0;
2148 ctxt->input = NULL;
2149 return;
2150 }
2151 ctxt->nodeNr = 0;
2152 ctxt->nodeMax = 10;
2153 ctxt->node = NULL;
2154
2155 /* Allocate the Name stack */
2156 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2157 if (ctxt->nameTab == NULL) {
2158 xmlGenericError(xmlGenericErrorContext,
2159 "xmlInitParserCtxt: out of memory\n");
2160 ctxt->nodeNr = 0;
2161 ctxt->nodeMax = 0;
2162 ctxt->node = NULL;
2163 ctxt->inputNr = 0;
2164 ctxt->inputMax = 0;
2165 ctxt->input = NULL;
2166 ctxt->nameNr = 0;
2167 ctxt->nameMax = 0;
2168 ctxt->name = NULL;
2169 return;
2170 }
2171 ctxt->nameNr = 0;
2172 ctxt->nameMax = 10;
2173 ctxt->name = NULL;
2174
2175 /* Allocate the space stack */
2176 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2177 if (ctxt->spaceTab == NULL) {
2178 xmlGenericError(xmlGenericErrorContext,
2179 "xmlInitParserCtxt: out of memory\n");
2180 ctxt->nodeNr = 0;
2181 ctxt->nodeMax = 0;
2182 ctxt->node = NULL;
2183 ctxt->inputNr = 0;
2184 ctxt->inputMax = 0;
2185 ctxt->input = NULL;
2186 ctxt->nameNr = 0;
2187 ctxt->nameMax = 0;
2188 ctxt->name = NULL;
2189 ctxt->spaceNr = 0;
2190 ctxt->spaceMax = 0;
2191 ctxt->space = NULL;
2192 return;
2193 }
2194 ctxt->spaceNr = 1;
2195 ctxt->spaceMax = 10;
2196 ctxt->spaceTab[0] = -1;
2197 ctxt->space = &ctxt->spaceTab[0];
2198
Daniel Veillard14be0a12001-03-03 18:50:55 +00002199 ctxt->sax = sax;
2200 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202 ctxt->userData = ctxt;
2203 ctxt->myDoc = NULL;
2204 ctxt->wellFormed = 1;
2205 ctxt->valid = 1;
2206 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2207 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2208 ctxt->pedantic = xmlPedanticParserDefaultValue;
2209 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2210 ctxt->vctxt.userData = ctxt;
2211 if (ctxt->validate) {
2212 ctxt->vctxt.error = xmlParserValidityError;
2213 if (xmlGetWarningsDefaultValue == 0)
2214 ctxt->vctxt.warning = NULL;
2215 else
2216 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002217 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002218 } else {
2219 ctxt->vctxt.error = NULL;
2220 ctxt->vctxt.warning = NULL;
2221 }
2222 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2223 ctxt->record_info = 0;
2224 ctxt->nbChars = 0;
2225 ctxt->checkIndex = 0;
2226 ctxt->inSubset = 0;
2227 ctxt->errNo = XML_ERR_OK;
2228 ctxt->depth = 0;
2229 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2230 xmlInitNodeInfoSeq(&ctxt->node_seq);
2231}
2232
2233/**
2234 * xmlFreeParserCtxt:
2235 * @ctxt: an XML parser context
2236 *
2237 * Free all the memory used by a parser context. However the parsed
2238 * document in ctxt->myDoc is not freed.
2239 */
2240
2241void
2242xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2243{
2244 xmlParserInputPtr input;
2245 xmlChar *oldname;
2246
2247 if (ctxt == NULL) return;
2248
2249 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2250 xmlFreeInputStream(input);
2251 }
2252 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2253 xmlFree(oldname);
2254 }
2255 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2256 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2257 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2258 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2259 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2260 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2261 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2262 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2263 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002264 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2265 xmlFree(ctxt->sax);
2266 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2267 xmlFree(ctxt);
2268}
2269
2270/**
2271 * xmlNewParserCtxt:
2272 *
2273 * Allocate and initialize a new parser context.
2274 *
2275 * Returns the xmlParserCtxtPtr or NULL
2276 */
2277
2278xmlParserCtxtPtr
2279xmlNewParserCtxt()
2280{
2281 xmlParserCtxtPtr ctxt;
2282
2283 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2284 if (ctxt == NULL) {
2285 xmlGenericError(xmlGenericErrorContext,
2286 "xmlNewParserCtxt : cannot allocate context\n");
2287 perror("malloc");
2288 return(NULL);
2289 }
2290 memset(ctxt, 0, sizeof(xmlParserCtxt));
2291 xmlInitParserCtxt(ctxt);
2292 return(ctxt);
2293}
2294
2295/************************************************************************
2296 * *
2297 * Handling of node informations *
2298 * *
2299 ************************************************************************/
2300
2301/**
2302 * xmlClearParserCtxt:
2303 * @ctxt: an XML parser context
2304 *
2305 * Clear (release owned resources) and reinitialize a parser context
2306 */
2307
2308void
2309xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2310{
2311 xmlClearNodeInfoSeq(&ctxt->node_seq);
2312 xmlInitParserCtxt(ctxt);
2313}
2314
2315/**
2316 * xmlParserFindNodeInfo:
2317 * @ctxt: an XML parser context
2318 * @node: an XML node within the tree
2319 *
2320 * Find the parser node info struct for a given node
2321 *
2322 * Returns an xmlParserNodeInfo block pointer or NULL
2323 */
2324const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2325 const xmlNode* node)
2326{
2327 unsigned long pos;
2328
2329 /* Find position where node should be at */
2330 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2331 if ( ctx->node_seq.buffer[pos].node == node )
2332 return &ctx->node_seq.buffer[pos];
2333 else
2334 return NULL;
2335}
2336
2337
2338/**
2339 * xmlInitNodeInfoSeq:
2340 * @seq: a node info sequence pointer
2341 *
2342 * -- Initialize (set to initial state) node info sequence
2343 */
2344void
2345xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2346{
2347 seq->length = 0;
2348 seq->maximum = 0;
2349 seq->buffer = NULL;
2350}
2351
2352/**
2353 * xmlClearNodeInfoSeq:
2354 * @seq: a node info sequence pointer
2355 *
2356 * -- Clear (release memory and reinitialize) node
2357 * info sequence
2358 */
2359void
2360xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2361{
2362 if ( seq->buffer != NULL )
2363 xmlFree(seq->buffer);
2364 xmlInitNodeInfoSeq(seq);
2365}
2366
2367
2368/**
2369 * xmlParserFindNodeInfoIndex:
2370 * @seq: a node info sequence pointer
2371 * @node: an XML node pointer
2372 *
2373 *
2374 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2375 * the given node is or should be at in a sorted sequence
2376 *
2377 * Returns a long indicating the position of the record
2378 */
2379unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2380 const xmlNode* node)
2381{
2382 unsigned long upper, lower, middle;
2383 int found = 0;
2384
2385 /* Do a binary search for the key */
2386 lower = 1;
2387 upper = seq->length;
2388 middle = 0;
2389 while ( lower <= upper && !found) {
2390 middle = lower + (upper - lower) / 2;
2391 if ( node == seq->buffer[middle - 1].node )
2392 found = 1;
2393 else if ( node < seq->buffer[middle - 1].node )
2394 upper = middle - 1;
2395 else
2396 lower = middle + 1;
2397 }
2398
2399 /* Return position */
2400 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2401 return middle;
2402 else
2403 return middle - 1;
2404}
2405
2406
2407/**
2408 * xmlParserAddNodeInfo:
2409 * @ctxt: an XML parser context
2410 * @info: a node info sequence pointer
2411 *
2412 * Insert node info record into the sorted sequence
2413 */
2414void
2415xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2416 const xmlParserNodeInfo* info)
2417{
2418 unsigned long pos;
2419 static unsigned int block_size = 5;
2420
2421 /* Find pos and check to see if node is already in the sequence */
2422 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2423 if ( pos < ctxt->node_seq.length
2424 && ctxt->node_seq.buffer[pos].node == info->node ) {
2425 ctxt->node_seq.buffer[pos] = *info;
2426 }
2427
2428 /* Otherwise, we need to add new node to buffer */
2429 else {
2430 /* Expand buffer by 5 if needed */
2431 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2432 xmlParserNodeInfo* tmp_buffer;
2433 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2434 *(ctxt->node_seq.maximum + block_size));
2435
2436 if ( ctxt->node_seq.buffer == NULL )
2437 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2438 else
2439 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2440
2441 if ( tmp_buffer == NULL ) {
2442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2443 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2444 ctxt->errNo = XML_ERR_NO_MEMORY;
2445 return;
2446 }
2447 ctxt->node_seq.buffer = tmp_buffer;
2448 ctxt->node_seq.maximum += block_size;
2449 }
2450
2451 /* If position is not at end, move elements out of the way */
2452 if ( pos != ctxt->node_seq.length ) {
2453 unsigned long i;
2454
2455 for ( i = ctxt->node_seq.length; i > pos; i-- )
2456 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2457 }
2458
2459 /* Copy element and increase length */
2460 ctxt->node_seq.buffer[pos] = *info;
2461 ctxt->node_seq.length++;
2462 }
2463}
2464
2465/************************************************************************
2466 * *
2467 * Deprecated functions kept for compatibility *
2468 * *
2469 ************************************************************************/
2470
2471/*
2472 * xmlCheckLanguageID
2473 * @lang: pointer to the string value
2474 *
2475 * Checks that the value conforms to the LanguageID production:
2476 *
2477 * NOTE: this is somewhat deprecated, those productions were removed from
2478 * the XML Second edition.
2479 *
2480 * [33] LanguageID ::= Langcode ('-' Subcode)*
2481 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2482 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2483 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2484 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2485 * [38] Subcode ::= ([a-z] | [A-Z])+
2486 *
2487 * Returns 1 if correct 0 otherwise
2488 **/
2489int
2490xmlCheckLanguageID(const xmlChar *lang) {
2491 const xmlChar *cur = lang;
2492
2493 if (cur == NULL)
2494 return(0);
2495 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2496 ((cur[0] == 'I') && (cur[1] == '-'))) {
2497 /*
2498 * IANA code
2499 */
2500 cur += 2;
2501 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2502 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2503 cur++;
2504 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2505 ((cur[0] == 'X') && (cur[1] == '-'))) {
2506 /*
2507 * User code
2508 */
2509 cur += 2;
2510 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2511 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2512 cur++;
2513 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2514 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2515 /*
2516 * ISO639
2517 */
2518 cur++;
2519 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2520 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2521 cur++;
2522 else
2523 return(0);
2524 } else
2525 return(0);
2526 while (cur[0] != 0) { /* non input consuming */
2527 if (cur[0] != '-')
2528 return(0);
2529 cur++;
2530 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2531 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2532 cur++;
2533 else
2534 return(0);
2535 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2536 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2537 cur++;
2538 }
2539 return(1);
2540}
2541
2542/**
2543 * xmlDecodeEntities:
2544 * @ctxt: the parser context
2545 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2546 * @len: the len to decode (in bytes !), -1 for no size limit
2547 * @end: an end marker xmlChar, 0 if none
2548 * @end2: an end marker xmlChar, 0 if none
2549 * @end3: an end marker xmlChar, 0 if none
2550 *
2551 * This function is deprecated, we now always process entities content
2552 * through xmlStringDecodeEntities
2553 *
2554 * TODO: remove it in next major release.
2555 *
2556 * [67] Reference ::= EntityRef | CharRef
2557 *
2558 * [69] PEReference ::= '%' Name ';'
2559 *
2560 * Returns A newly allocated string with the substitution done. The caller
2561 * must deallocate it !
2562 */
2563xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002564xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2565 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002566#if 0
2567 xmlChar *buffer = NULL;
2568 unsigned int buffer_size = 0;
2569 unsigned int nbchars = 0;
2570
2571 xmlChar *current = NULL;
2572 xmlEntityPtr ent;
2573 unsigned int max = (unsigned int) len;
2574 int c,l;
2575#endif
2576
2577 static int deprecated = 0;
2578 if (!deprecated) {
2579 xmlGenericError(xmlGenericErrorContext,
2580 "xmlDecodeEntities() deprecated function reached\n");
2581 deprecated = 1;
2582 }
2583
2584#if 0
2585 if (ctxt->depth > 40) {
2586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2587 ctxt->sax->error(ctxt->userData,
2588 "Detected entity reference loop\n");
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2592 return(NULL);
2593 }
2594
2595 /*
2596 * allocate a translation buffer.
2597 */
2598 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2599 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2600 if (buffer == NULL) {
2601 perror("xmlDecodeEntities: malloc failed");
2602 return(NULL);
2603 }
2604
2605 /*
2606 * Ok loop until we reach one of the ending char or a size limit.
2607 */
2608 GROW;
2609 c = CUR_CHAR(l);
2610 while ((nbchars < max) && (c != end) && /* NOTUSED */
2611 (c != end2) && (c != end3)) {
2612 GROW;
2613 if (c == 0) break;
2614 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2615 int val = xmlParseCharRef(ctxt);
2616 COPY_BUF(0,buffer,nbchars,val);
2617 NEXTL(l);
2618 } else if ((c == '&') && (ctxt->token != '&') &&
2619 (what & XML_SUBSTITUTE_REF)) {
2620 if (xmlParserDebugEntities)
2621 xmlGenericError(xmlGenericErrorContext,
2622 "decoding Entity Reference\n");
2623 ent = xmlParseEntityRef(ctxt);
2624 if ((ent != NULL) &&
2625 (ctxt->replaceEntities != 0)) {
2626 current = ent->content;
2627 while (*current != 0) { /* non input consuming loop */
2628 buffer[nbchars++] = *current++;
2629 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2630 growBuffer(buffer);
2631 }
2632 }
2633 } else if (ent != NULL) {
2634 const xmlChar *cur = ent->name;
2635
2636 buffer[nbchars++] = '&';
2637 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2638 growBuffer(buffer);
2639 }
2640 while (*cur != 0) { /* non input consuming loop */
2641 buffer[nbchars++] = *cur++;
2642 }
2643 buffer[nbchars++] = ';';
2644 }
2645 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2646 /*
2647 * a PEReference induce to switch the entity flow,
2648 * we break here to flush the current set of chars
2649 * parsed if any. We will be called back later.
2650 */
2651 if (xmlParserDebugEntities)
2652 xmlGenericError(xmlGenericErrorContext,
2653 "decoding PE Reference\n");
2654 if (nbchars != 0) break;
2655
2656 xmlParsePEReference(ctxt);
2657
2658 /*
2659 * Pop-up of finished entities.
2660 */
2661 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2662 xmlPopInput(ctxt);
2663
2664 break;
2665 } else {
2666 COPY_BUF(l,buffer,nbchars,c);
2667 NEXTL(l);
2668 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2669 growBuffer(buffer);
2670 }
2671 }
2672 c = CUR_CHAR(l);
2673 }
2674 buffer[nbchars++] = 0;
2675 return(buffer);
2676#endif
2677 return(NULL);
2678}
2679
2680/**
2681 * xmlNamespaceParseNCName:
2682 * @ctxt: an XML parser context
2683 *
2684 * parse an XML namespace name.
2685 *
2686 * TODO: this seems not in use anymore, the namespace handling is done on
2687 * top of the SAX interfaces, i.e. not on raw input.
2688 *
2689 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2690 *
2691 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2692 * CombiningChar | Extender
2693 *
2694 * Returns the namespace name or NULL
2695 */
2696
2697xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002698xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002699#if 0
2700 xmlChar buf[XML_MAX_NAMELEN + 5];
2701 int len = 0, l;
2702 int cur = CUR_CHAR(l);
2703#endif
2704
2705 static int deprecated = 0;
2706 if (!deprecated) {
2707 xmlGenericError(xmlGenericErrorContext,
2708 "xmlNamespaceParseNCName() deprecated function reached\n");
2709 deprecated = 1;
2710 }
2711
2712#if 0
2713 /* load first the value of the char !!! */
2714 GROW;
2715 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2716
2717xmlGenericError(xmlGenericErrorContext,
2718 "xmlNamespaceParseNCName: reached loop 3\n");
2719 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2720 (cur == '.') || (cur == '-') ||
2721 (cur == '_') ||
2722 (IS_COMBINING(cur)) ||
2723 (IS_EXTENDER(cur))) {
2724 COPY_BUF(l,buf,len,cur);
2725 NEXTL(l);
2726 cur = CUR_CHAR(l);
2727 if (len >= XML_MAX_NAMELEN) {
2728 xmlGenericError(xmlGenericErrorContext,
2729 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2730 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2731 (cur == '.') || (cur == '-') ||
2732 (cur == '_') ||
2733 (IS_COMBINING(cur)) ||
2734 (IS_EXTENDER(cur))) {
2735 NEXTL(l);
2736 cur = CUR_CHAR(l);
2737 }
2738 break;
2739 }
2740 }
2741 return(xmlStrndup(buf, len));
2742#endif
2743 return(NULL);
2744}
2745
2746/**
2747 * xmlNamespaceParseQName:
2748 * @ctxt: an XML parser context
2749 * @prefix: a xmlChar **
2750 *
2751 * TODO: this seems not in use anymore, the namespace handling is done on
2752 * top of the SAX interfaces, i.e. not on raw input.
2753 *
2754 * parse an XML qualified name
2755 *
2756 * [NS 5] QName ::= (Prefix ':')? LocalPart
2757 *
2758 * [NS 6] Prefix ::= NCName
2759 *
2760 * [NS 7] LocalPart ::= NCName
2761 *
2762 * Returns the local part, and prefix is updated
2763 * to get the Prefix if any.
2764 */
2765
2766xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002767xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002768
2769 static int deprecated = 0;
2770 if (!deprecated) {
2771 xmlGenericError(xmlGenericErrorContext,
2772 "xmlNamespaceParseQName() deprecated function reached\n");
2773 deprecated = 1;
2774 }
2775
2776#if 0
2777 xmlChar *ret = NULL;
2778
2779 *prefix = NULL;
2780 ret = xmlNamespaceParseNCName(ctxt);
2781 if (RAW == ':') {
2782 *prefix = ret;
2783 NEXT;
2784 ret = xmlNamespaceParseNCName(ctxt);
2785 }
2786
2787 return(ret);
2788#endif
2789 return(NULL);
2790}
2791
2792/**
2793 * xmlNamespaceParseNSDef:
2794 * @ctxt: an XML parser context
2795 *
2796 * parse a namespace prefix declaration
2797 *
2798 * TODO: this seems not in use anymore, the namespace handling is done on
2799 * top of the SAX interfaces, i.e. not on raw input.
2800 *
2801 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2802 *
2803 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2804 *
2805 * Returns the namespace name
2806 */
2807
2808xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002809xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002810 static int deprecated = 0;
2811 if (!deprecated) {
2812 xmlGenericError(xmlGenericErrorContext,
2813 "xmlNamespaceParseNSDef() deprecated function reached\n");
2814 deprecated = 1;
2815 }
2816 return(NULL);
2817#if 0
2818 xmlChar *name = NULL;
2819
2820 if ((RAW == 'x') && (NXT(1) == 'm') &&
2821 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2822 (NXT(4) == 's')) {
2823 SKIP(5);
2824 if (RAW == ':') {
2825 NEXT;
2826 name = xmlNamespaceParseNCName(ctxt);
2827 }
2828 }
2829 return(name);
2830#endif
2831}
2832
2833/**
2834 * xmlParseQuotedString:
2835 * @ctxt: an XML parser context
2836 *
2837 * Parse and return a string between quotes or doublequotes
2838 *
2839 * TODO: Deprecated, to be removed at next drop of binary compatibility
2840 *
2841 * Returns the string parser or NULL.
2842 */
2843xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002844xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002845 static int deprecated = 0;
2846 if (!deprecated) {
2847 xmlGenericError(xmlGenericErrorContext,
2848 "xmlParseQuotedString() deprecated function reached\n");
2849 deprecated = 1;
2850 }
2851 return(NULL);
2852
2853#if 0
2854 xmlChar *buf = NULL;
2855 int len = 0,l;
2856 int size = XML_PARSER_BUFFER_SIZE;
2857 int c;
2858
2859 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2860 if (buf == NULL) {
2861 xmlGenericError(xmlGenericErrorContext,
2862 "malloc of %d byte failed\n", size);
2863 return(NULL);
2864 }
2865xmlGenericError(xmlGenericErrorContext,
2866 "xmlParseQuotedString: reached loop 4\n");
2867 if (RAW == '"') {
2868 NEXT;
2869 c = CUR_CHAR(l);
2870 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2871 if (len + 5 >= size) {
2872 size *= 2;
2873 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2874 if (buf == NULL) {
2875 xmlGenericError(xmlGenericErrorContext,
2876 "realloc of %d byte failed\n", size);
2877 return(NULL);
2878 }
2879 }
2880 COPY_BUF(l,buf,len,c);
2881 NEXTL(l);
2882 c = CUR_CHAR(l);
2883 }
2884 if (c != '"') {
2885 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2887 ctxt->sax->error(ctxt->userData,
2888 "String not closed \"%.50s\"\n", buf);
2889 ctxt->wellFormed = 0;
2890 ctxt->disableSAX = 1;
2891 } else {
2892 NEXT;
2893 }
2894 } else if (RAW == '\''){
2895 NEXT;
2896 c = CUR;
2897 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2898 if (len + 1 >= size) {
2899 size *= 2;
2900 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2901 if (buf == NULL) {
2902 xmlGenericError(xmlGenericErrorContext,
2903 "realloc of %d byte failed\n", size);
2904 return(NULL);
2905 }
2906 }
2907 buf[len++] = c;
2908 NEXT;
2909 c = CUR;
2910 }
2911 if (RAW != '\'') {
2912 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2914 ctxt->sax->error(ctxt->userData,
2915 "String not closed \"%.50s\"\n", buf);
2916 ctxt->wellFormed = 0;
2917 ctxt->disableSAX = 1;
2918 } else {
2919 NEXT;
2920 }
2921 }
2922 return(buf);
2923#endif
2924}
2925
2926/**
2927 * xmlParseNamespace:
2928 * @ctxt: an XML parser context
2929 *
2930 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2931 *
2932 * This is what the older xml-name Working Draft specified, a bunch of
2933 * other stuff may still rely on it, so support is still here as
2934 * if it was declared on the root of the Tree:-(
2935 *
2936 * TODO: remove from library
2937 *
2938 * To be removed at next drop of binary compatibility
2939 */
2940
2941void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002942xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002943 static int deprecated = 0;
2944 if (!deprecated) {
2945 xmlGenericError(xmlGenericErrorContext,
2946 "xmlParseNamespace() deprecated function reached\n");
2947 deprecated = 1;
2948 }
2949
2950#if 0
2951 xmlChar *href = NULL;
2952 xmlChar *prefix = NULL;
2953 int garbage = 0;
2954
2955 /*
2956 * We just skipped "namespace" or "xml:namespace"
2957 */
2958 SKIP_BLANKS;
2959
2960xmlGenericError(xmlGenericErrorContext,
2961 "xmlParseNamespace: reached loop 5\n");
2962 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2963 /*
2964 * We can have "ns" or "prefix" attributes
2965 * Old encoding as 'href' or 'AS' attributes is still supported
2966 */
2967 if ((RAW == 'n') && (NXT(1) == 's')) {
2968 garbage = 0;
2969 SKIP(2);
2970 SKIP_BLANKS;
2971
2972 if (RAW != '=') continue;
2973 NEXT;
2974 SKIP_BLANKS;
2975
2976 href = xmlParseQuotedString(ctxt);
2977 SKIP_BLANKS;
2978 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2979 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2980 garbage = 0;
2981 SKIP(4);
2982 SKIP_BLANKS;
2983
2984 if (RAW != '=') continue;
2985 NEXT;
2986 SKIP_BLANKS;
2987
2988 href = xmlParseQuotedString(ctxt);
2989 SKIP_BLANKS;
2990 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2991 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2992 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2993 garbage = 0;
2994 SKIP(6);
2995 SKIP_BLANKS;
2996
2997 if (RAW != '=') continue;
2998 NEXT;
2999 SKIP_BLANKS;
3000
3001 prefix = xmlParseQuotedString(ctxt);
3002 SKIP_BLANKS;
3003 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3004 garbage = 0;
3005 SKIP(2);
3006 SKIP_BLANKS;
3007
3008 if (RAW != '=') continue;
3009 NEXT;
3010 SKIP_BLANKS;
3011
3012 prefix = xmlParseQuotedString(ctxt);
3013 SKIP_BLANKS;
3014 } else if ((RAW == '?') && (NXT(1) == '>')) {
3015 garbage = 0;
3016 NEXT;
3017 } else {
3018 /*
3019 * Found garbage when parsing the namespace
3020 */
3021 if (!garbage) {
3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3023 ctxt->sax->error(ctxt->userData,
3024 "xmlParseNamespace found garbage\n");
3025 }
3026 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3027 ctxt->wellFormed = 0;
3028 ctxt->disableSAX = 1;
3029 NEXT;
3030 }
3031 }
3032
3033 MOVETO_ENDTAG(CUR_PTR);
3034 NEXT;
3035
3036 /*
3037 * Register the DTD.
3038 if (href != NULL)
3039 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3040 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3041 */
3042
3043 if (prefix != NULL) xmlFree(prefix);
3044 if (href != NULL) xmlFree(href);
3045#endif
3046}
3047
3048/**
3049 * xmlScanName:
3050 * @ctxt: an XML parser context
3051 *
3052 * Trickery: parse an XML name but without consuming the input flow
3053 * Needed for rollback cases. Used only when parsing entities references.
3054 *
3055 * TODO: seems deprecated now, only used in the default part of
3056 * xmlParserHandleReference
3057 *
3058 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3059 * CombiningChar | Extender
3060 *
3061 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3062 *
3063 * [6] Names ::= Name (S Name)*
3064 *
3065 * Returns the Name parsed or NULL
3066 */
3067
3068xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003069xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003070 static int deprecated = 0;
3071 if (!deprecated) {
3072 xmlGenericError(xmlGenericErrorContext,
3073 "xmlScanName() deprecated function reached\n");
3074 deprecated = 1;
3075 }
3076 return(NULL);
3077
3078#if 0
3079 xmlChar buf[XML_MAX_NAMELEN];
3080 int len = 0;
3081
3082 GROW;
3083 if (!IS_LETTER(RAW) && (RAW != '_') &&
3084 (RAW != ':')) {
3085 return(NULL);
3086 }
3087
3088
3089 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3090 (NXT(len) == '.') || (NXT(len) == '-') ||
3091 (NXT(len) == '_') || (NXT(len) == ':') ||
3092 (IS_COMBINING(NXT(len))) ||
3093 (IS_EXTENDER(NXT(len)))) {
3094 GROW;
3095 buf[len] = NXT(len);
3096 len++;
3097 if (len >= XML_MAX_NAMELEN) {
3098 xmlGenericError(xmlGenericErrorContext,
3099 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3100 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3101 (IS_DIGIT(NXT(len))) ||
3102 (NXT(len) == '.') || (NXT(len) == '-') ||
3103 (NXT(len) == '_') || (NXT(len) == ':') ||
3104 (IS_COMBINING(NXT(len))) ||
3105 (IS_EXTENDER(NXT(len))))
3106 len++;
3107 break;
3108 }
3109 }
3110 return(xmlStrndup(buf, len));
3111#endif
3112}
3113
3114/**
3115 * xmlParserHandleReference:
3116 * @ctxt: the parser context
3117 *
3118 * TODO: Remove, now deprecated ... the test is done directly in the
3119 * content parsing
3120 * routines.
3121 *
3122 * [67] Reference ::= EntityRef | CharRef
3123 *
3124 * [68] EntityRef ::= '&' Name ';'
3125 *
3126 * [ WFC: Entity Declared ]
3127 * the Name given in the entity reference must match that in an entity
3128 * declaration, except that well-formed documents need not declare any
3129 * of the following entities: amp, lt, gt, apos, quot.
3130 *
3131 * [ WFC: Parsed Entity ]
3132 * An entity reference must not contain the name of an unparsed entity
3133 *
3134 * [66] CharRef ::= '&#' [0-9]+ ';' |
3135 * '&#x' [0-9a-fA-F]+ ';'
3136 *
3137 * A PEReference may have been detectect in the current input stream
3138 * the handling is done accordingly to
3139 * http://www.w3.org/TR/REC-xml#entproc
3140 */
3141void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003142xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003143 static int deprecated = 0;
3144 if (!deprecated) {
3145 xmlGenericError(xmlGenericErrorContext,
3146 "xmlParserHandleReference() deprecated function reached\n");
3147 deprecated = 1;
3148 }
3149
3150#if 0
3151 xmlParserInputPtr input;
3152 xmlChar *name;
3153 xmlEntityPtr ent = NULL;
3154
3155 if (ctxt->token != 0) {
3156 return;
3157 }
3158 if (RAW != '&') return;
3159 GROW;
3160 if ((RAW == '&') && (NXT(1) == '#')) {
3161 switch(ctxt->instate) {
3162 case XML_PARSER_ENTITY_DECL:
3163 case XML_PARSER_PI:
3164 case XML_PARSER_CDATA_SECTION:
3165 case XML_PARSER_COMMENT:
3166 case XML_PARSER_SYSTEM_LITERAL:
3167 /* we just ignore it there */
3168 return;
3169 case XML_PARSER_START_TAG:
3170 return;
3171 case XML_PARSER_END_TAG:
3172 return;
3173 case XML_PARSER_EOF:
3174 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3176 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3177 ctxt->wellFormed = 0;
3178 ctxt->disableSAX = 1;
3179 return;
3180 case XML_PARSER_PROLOG:
3181 case XML_PARSER_START:
3182 case XML_PARSER_MISC:
3183 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3185 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3186 ctxt->wellFormed = 0;
3187 ctxt->disableSAX = 1;
3188 return;
3189 case XML_PARSER_EPILOG:
3190 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3192 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3193 ctxt->wellFormed = 0;
3194 ctxt->disableSAX = 1;
3195 return;
3196 case XML_PARSER_DTD:
3197 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3199 ctxt->sax->error(ctxt->userData,
3200 "CharRef are forbiden in DTDs!\n");
3201 ctxt->wellFormed = 0;
3202 ctxt->disableSAX = 1;
3203 return;
3204 case XML_PARSER_ENTITY_VALUE:
3205 /*
3206 * NOTE: in the case of entity values, we don't do the
3207 * substitution here since we need the literal
3208 * entity value to be able to save the internal
3209 * subset of the document.
3210 * This will be handled by xmlStringDecodeEntities
3211 */
3212 return;
3213 case XML_PARSER_CONTENT:
3214 return;
3215 case XML_PARSER_ATTRIBUTE_VALUE:
3216 /* ctxt->token = xmlParseCharRef(ctxt); */
3217 return;
3218 case XML_PARSER_IGNORE:
3219 return;
3220 }
3221 return;
3222 }
3223
3224 switch(ctxt->instate) {
3225 case XML_PARSER_CDATA_SECTION:
3226 return;
3227 case XML_PARSER_PI:
3228 case XML_PARSER_COMMENT:
3229 case XML_PARSER_SYSTEM_LITERAL:
3230 case XML_PARSER_CONTENT:
3231 return;
3232 case XML_PARSER_START_TAG:
3233 return;
3234 case XML_PARSER_END_TAG:
3235 return;
3236 case XML_PARSER_EOF:
3237 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3239 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3240 ctxt->wellFormed = 0;
3241 ctxt->disableSAX = 1;
3242 return;
3243 case XML_PARSER_PROLOG:
3244 case XML_PARSER_START:
3245 case XML_PARSER_MISC:
3246 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3248 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3249 ctxt->wellFormed = 0;
3250 ctxt->disableSAX = 1;
3251 return;
3252 case XML_PARSER_EPILOG:
3253 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3255 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3256 ctxt->wellFormed = 0;
3257 ctxt->disableSAX = 1;
3258 return;
3259 case XML_PARSER_ENTITY_VALUE:
3260 /*
3261 * NOTE: in the case of entity values, we don't do the
3262 * substitution here since we need the literal
3263 * entity value to be able to save the internal
3264 * subset of the document.
3265 * This will be handled by xmlStringDecodeEntities
3266 */
3267 return;
3268 case XML_PARSER_ATTRIBUTE_VALUE:
3269 /*
3270 * NOTE: in the case of attributes values, we don't do the
3271 * substitution here unless we are in a mode where
3272 * the parser is explicitely asked to substitute
3273 * entities. The SAX callback is called with values
3274 * without entity substitution.
3275 * This will then be handled by xmlStringDecodeEntities
3276 */
3277 return;
3278 case XML_PARSER_ENTITY_DECL:
3279 /*
3280 * we just ignore it there
3281 * the substitution will be done once the entity is referenced
3282 */
3283 return;
3284 case XML_PARSER_DTD:
3285 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288 "Entity references are forbiden in DTDs!\n");
3289 ctxt->wellFormed = 0;
3290 ctxt->disableSAX = 1;
3291 return;
3292 case XML_PARSER_IGNORE:
3293 return;
3294 }
3295
3296/* TODO: this seems not reached anymore .... Verify ... */
3297xmlGenericError(xmlGenericErrorContext,
3298 "Reached deprecated section in xmlParserHandleReference()\n");
3299xmlGenericError(xmlGenericErrorContext,
3300 "Please forward the document to Daniel.Veillard@w3.org\n");
3301xmlGenericError(xmlGenericErrorContext,
3302 "indicating the version: %s, thanks !\n", xmlParserVersion);
3303 NEXT;
3304 name = xmlScanName(ctxt);
3305 if (name == NULL) {
3306 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3308 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3309 ctxt->wellFormed = 0;
3310 ctxt->disableSAX = 1;
3311 ctxt->token = '&';
3312 return;
3313 }
3314 if (NXT(xmlStrlen(name)) != ';') {
3315 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3317 ctxt->sax->error(ctxt->userData,
3318 "Entity reference: ';' expected\n");
3319 ctxt->wellFormed = 0;
3320 ctxt->disableSAX = 1;
3321 ctxt->token = '&';
3322 xmlFree(name);
3323 return;
3324 }
3325 SKIP(xmlStrlen(name) + 1);
3326 if (ctxt->sax != NULL) {
3327 if (ctxt->sax->getEntity != NULL)
3328 ent = ctxt->sax->getEntity(ctxt->userData, name);
3329 }
3330
3331 /*
3332 * [ WFC: Entity Declared ]
3333 * the Name given in the entity reference must match that in an entity
3334 * declaration, except that well-formed documents need not declare any
3335 * of the following entities: amp, lt, gt, apos, quot.
3336 */
3337 if (ent == NULL)
3338 ent = xmlGetPredefinedEntity(name);
3339 if (ent == NULL) {
3340 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3342 ctxt->sax->error(ctxt->userData,
3343 "Entity reference: entity %s not declared\n",
3344 name);
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 xmlFree(name);
3348 return;
3349 }
3350
3351 /*
3352 * [ WFC: Parsed Entity ]
3353 * An entity reference must not contain the name of an unparsed entity
3354 */
3355 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3356 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3358 ctxt->sax->error(ctxt->userData,
3359 "Entity reference to unparsed entity %s\n", name);
3360 ctxt->wellFormed = 0;
3361 ctxt->disableSAX = 1;
3362 }
3363
3364 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3365 ctxt->token = ent->content[0];
3366 xmlFree(name);
3367 return;
3368 }
3369 input = xmlNewEntityInputStream(ctxt, ent);
3370 xmlPushInput(ctxt, input);
3371 xmlFree(name);
3372#endif
3373 return;
3374}
3375
3376/**
3377 * xmlHandleEntity:
3378 * @ctxt: an XML parser context
3379 * @entity: an XML entity pointer.
3380 *
3381 * Default handling of defined entities, when should we define a new input
3382 * stream ? When do we just handle that as a set of chars ?
3383 *
3384 * OBSOLETE: to be removed at some point.
3385 */
3386
3387void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003388xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003389 static int deprecated = 0;
3390 if (!deprecated) {
3391 xmlGenericError(xmlGenericErrorContext,
3392 "xmlHandleEntity() deprecated function reached\n");
3393 deprecated = 1;
3394 }
3395
3396#if 0
3397 int len;
3398 xmlParserInputPtr input;
3399
3400 if (entity->content == NULL) {
3401 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3403 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3404 entity->name);
3405 ctxt->wellFormed = 0;
3406 ctxt->disableSAX = 1;
3407 return;
3408 }
3409 len = xmlStrlen(entity->content);
3410 if (len <= 2) goto handle_as_char;
3411
3412 /*
3413 * Redefine its content as an input stream.
3414 */
3415 input = xmlNewEntityInputStream(ctxt, entity);
3416 xmlPushInput(ctxt, input);
3417 return;
3418
3419handle_as_char:
3420 /*
3421 * Just handle the content as a set of chars.
3422 */
3423 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3424 (ctxt->sax->characters != NULL))
3425 ctxt->sax->characters(ctxt->userData, entity->content, len);
3426#endif
3427}
3428
3429/**
3430 * xmlNewGlobalNs:
3431 * @doc: the document carrying the namespace
3432 * @href: the URI associated
3433 * @prefix: the prefix for the namespace
3434 *
3435 * Creation of a Namespace, the old way using PI and without scoping
3436 * DEPRECATED !!!
3437 * It now create a namespace on the root element of the document if found.
3438 * Returns NULL this functionnality had been removed
3439 */
3440xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003441xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3442 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003443 static int deprecated = 0;
3444 if (!deprecated) {
3445 xmlGenericError(xmlGenericErrorContext,
3446 "xmlNewGlobalNs() deprecated function reached\n");
3447 deprecated = 1;
3448 }
3449 return(NULL);
3450#if 0
3451 xmlNodePtr root;
3452
3453 xmlNsPtr cur;
3454
3455 root = xmlDocGetRootElement(doc);
3456 if (root != NULL)
3457 return(xmlNewNs(root, href, prefix));
3458
3459 /*
3460 * if there is no root element yet, create an old Namespace type
3461 * and it will be moved to the root at save time.
3462 */
3463 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3464 if (cur == NULL) {
3465 xmlGenericError(xmlGenericErrorContext,
3466 "xmlNewGlobalNs : malloc failed\n");
3467 return(NULL);
3468 }
3469 memset(cur, 0, sizeof(xmlNs));
3470 cur->type = XML_GLOBAL_NAMESPACE;
3471
3472 if (href != NULL)
3473 cur->href = xmlStrdup(href);
3474 if (prefix != NULL)
3475 cur->prefix = xmlStrdup(prefix);
3476
3477 /*
3478 * Add it at the end to preserve parsing order ...
3479 */
3480 if (doc != NULL) {
3481 if (doc->oldNs == NULL) {
3482 doc->oldNs = cur;
3483 } else {
3484 xmlNsPtr prev = doc->oldNs;
3485
3486 while (prev->next != NULL) prev = prev->next;
3487 prev->next = cur;
3488 }
3489 }
3490
3491 return(NULL);
3492#endif
3493}
3494
3495/**
3496 * xmlUpgradeOldNs:
3497 * @doc: a document pointer
3498 *
3499 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3500 * DEPRECATED
3501 */
3502void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003503xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003504 static int deprecated = 0;
3505 if (!deprecated) {
3506 xmlGenericError(xmlGenericErrorContext,
3507 "xmlNewGlobalNs() deprecated function reached\n");
3508 deprecated = 1;
3509 }
3510#if 0
3511 xmlNsPtr cur;
3512
3513 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3514 if (doc->children == NULL) {
3515#ifdef DEBUG_TREE
3516 xmlGenericError(xmlGenericErrorContext,
3517 "xmlUpgradeOldNs: failed no root !\n");
3518#endif
3519 return;
3520 }
3521
3522 cur = doc->oldNs;
3523 while (cur->next != NULL) {
3524 cur->type = XML_LOCAL_NAMESPACE;
3525 cur = cur->next;
3526 }
3527 cur->type = XML_LOCAL_NAMESPACE;
3528 cur->next = doc->children->nsDef;
3529 doc->children->nsDef = doc->oldNs;
3530 doc->oldNs = NULL;
3531#endif
3532}
3533