blob: cf7a96b84aa41917b9bc7a380ef965c24b5c4f8b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
49
Daniel Veillard56a4cb82001-03-24 17:00:36 +000050void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000051
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000070 xmlInitMemory();
71
Owen Taylor3473f882001-02-23 17:55:21 +000072 if ((myversion / 10000) != (version / 10000)) {
73 xmlGenericError(xmlGenericErrorContext,
74 "Fatal: program compiled against libxml %d using libxml %d\n",
75 (version / 10000), (myversion / 10000));
76 exit(1);
77 }
78 if ((myversion / 100) < (version / 100)) {
79 xmlGenericError(xmlGenericErrorContext,
80 "Warning: program compiled against libxml %d using older %d\n",
81 (version / 100), (myversion / 100));
82 }
83}
84
85
86const char *xmlFeaturesList[] = {
87 "validate",
88 "load subset",
89 "keep blanks",
90 "disable SAX",
91 "fetch external entities",
92 "substitute entities",
93 "gather line info",
94 "user data",
95 "is html",
96 "is standalone",
97 "stop parser",
98 "document",
99 "is well formed",
100 "is valid",
101 "SAX block",
102 "SAX function internalSubset",
103 "SAX function isStandalone",
104 "SAX function hasInternalSubset",
105 "SAX function hasExternalSubset",
106 "SAX function resolveEntity",
107 "SAX function getEntity",
108 "SAX function entityDecl",
109 "SAX function notationDecl",
110 "SAX function attributeDecl",
111 "SAX function elementDecl",
112 "SAX function unparsedEntityDecl",
113 "SAX function setDocumentLocator",
114 "SAX function startDocument",
115 "SAX function endDocument",
116 "SAX function startElement",
117 "SAX function endElement",
118 "SAX function reference",
119 "SAX function characters",
120 "SAX function ignorableWhitespace",
121 "SAX function processingInstruction",
122 "SAX function comment",
123 "SAX function warning",
124 "SAX function error",
125 "SAX function fatalError",
126 "SAX function getParameterEntity",
127 "SAX function cdataBlock",
128 "SAX function externalSubset",
129};
130
131/*
132 * xmlGetFeaturesList:
133 * @len: the length of the features name array (input/output)
134 * @result: an array of string to be filled with the features name.
135 *
136 * Copy at most *@len feature names into the @result array
137 *
138 * Returns -1 in case or error, or the total number of features,
139 * len is updated with the number of strings copied,
140 * strings must not be deallocated
141 */
142int
143xmlGetFeaturesList(int *len, const char **result) {
144 int ret, i;
145
146 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
147 if ((len == NULL) || (result == NULL))
148 return(ret);
149 if ((*len < 0) || (*len >= 1000))
150 return(-1);
151 if (*len > ret)
152 *len = ret;
153 for (i = 0;i < *len;i++)
154 result[i] = xmlFeaturesList[i];
155 return(ret);
156}
157
158/*
159 * xmlGetFeature:
160 * @ctxt: an XML/HTML parser context
161 * @name: the feature name
162 * @result: location to store the result
163 *
164 * Read the current value of one feature of this parser instance
165 *
166 * Returns -1 in case or error, 0 otherwise
167 */
168int
169xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
170 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
171 return(-1);
172
173 if (!strcmp(name, "validate")) {
174 *((int *) result) = ctxt->validate;
175 } else if (!strcmp(name, "keep blanks")) {
176 *((int *) result) = ctxt->keepBlanks;
177 } else if (!strcmp(name, "disable SAX")) {
178 *((int *) result) = ctxt->disableSAX;
179 } else if (!strcmp(name, "fetch external entities")) {
180 *((int *) result) = ctxt->loadsubset;
181 } else if (!strcmp(name, "substitute entities")) {
182 *((int *) result) = ctxt->replaceEntities;
183 } else if (!strcmp(name, "gather line info")) {
184 *((int *) result) = ctxt->record_info;
185 } else if (!strcmp(name, "user data")) {
186 *((void **)result) = ctxt->userData;
187 } else if (!strcmp(name, "is html")) {
188 *((int *) result) = ctxt->html;
189 } else if (!strcmp(name, "is standalone")) {
190 *((int *) result) = ctxt->standalone;
191 } else if (!strcmp(name, "document")) {
192 *((xmlDocPtr *) result) = ctxt->myDoc;
193 } else if (!strcmp(name, "is well formed")) {
194 *((int *) result) = ctxt->wellFormed;
195 } else if (!strcmp(name, "is valid")) {
196 *((int *) result) = ctxt->valid;
197 } else if (!strcmp(name, "SAX block")) {
198 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
199 } else if (!strcmp(name, "SAX function internalSubset")) {
200 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
201 } else if (!strcmp(name, "SAX function isStandalone")) {
202 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
203 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
204 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
205 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
206 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
207 } else if (!strcmp(name, "SAX function resolveEntity")) {
208 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
209 } else if (!strcmp(name, "SAX function getEntity")) {
210 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
211 } else if (!strcmp(name, "SAX function entityDecl")) {
212 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
213 } else if (!strcmp(name, "SAX function notationDecl")) {
214 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
215 } else if (!strcmp(name, "SAX function attributeDecl")) {
216 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
217 } else if (!strcmp(name, "SAX function elementDecl")) {
218 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
219 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
220 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
221 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
222 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
223 } else if (!strcmp(name, "SAX function startDocument")) {
224 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
225 } else if (!strcmp(name, "SAX function endDocument")) {
226 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
227 } else if (!strcmp(name, "SAX function startElement")) {
228 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
229 } else if (!strcmp(name, "SAX function endElement")) {
230 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
231 } else if (!strcmp(name, "SAX function reference")) {
232 *((referenceSAXFunc *) result) = ctxt->sax->reference;
233 } else if (!strcmp(name, "SAX function characters")) {
234 *((charactersSAXFunc *) result) = ctxt->sax->characters;
235 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
236 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
237 } else if (!strcmp(name, "SAX function processingInstruction")) {
238 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
239 } else if (!strcmp(name, "SAX function comment")) {
240 *((commentSAXFunc *) result) = ctxt->sax->comment;
241 } else if (!strcmp(name, "SAX function warning")) {
242 *((warningSAXFunc *) result) = ctxt->sax->warning;
243 } else if (!strcmp(name, "SAX function error")) {
244 *((errorSAXFunc *) result) = ctxt->sax->error;
245 } else if (!strcmp(name, "SAX function fatalError")) {
246 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
247 } else if (!strcmp(name, "SAX function getParameterEntity")) {
248 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
249 } else if (!strcmp(name, "SAX function cdataBlock")) {
250 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
251 } else if (!strcmp(name, "SAX function externalSubset")) {
252 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
253 } else {
254 return(-1);
255 }
256 return(0);
257}
258
259/*
260 * xmlSetFeature:
261 * @ctxt: an XML/HTML parser context
262 * @name: the feature name
263 * @value: pointer to the location of the new value
264 *
265 * Change the current value of one feature of this parser instance
266 *
267 * Returns -1 in case or error, 0 otherwise
268 */
269int
270xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
271 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
272 return(-1);
273
274 if (!strcmp(name, "validate")) {
275 int newvalidate = *((int *) value);
276 if ((!ctxt->validate) && (newvalidate != 0)) {
277 if (ctxt->vctxt.warning == NULL)
278 ctxt->vctxt.warning = xmlParserValidityWarning;
279 if (ctxt->vctxt.error == NULL)
280 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000281 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000282 }
283 ctxt->validate = newvalidate;
284 } else if (!strcmp(name, "keep blanks")) {
285 ctxt->keepBlanks = *((int *) value);
286 } else if (!strcmp(name, "disable SAX")) {
287 ctxt->disableSAX = *((int *) value);
288 } else if (!strcmp(name, "fetch external entities")) {
289 ctxt->loadsubset = *((int *) value);
290 } else if (!strcmp(name, "substitute entities")) {
291 ctxt->replaceEntities = *((int *) value);
292 } else if (!strcmp(name, "gather line info")) {
293 ctxt->record_info = *((int *) value);
294 } else if (!strcmp(name, "user data")) {
295 ctxt->userData = *((void **)value);
296 } else if (!strcmp(name, "is html")) {
297 ctxt->html = *((int *) value);
298 } else if (!strcmp(name, "is standalone")) {
299 ctxt->standalone = *((int *) value);
300 } else if (!strcmp(name, "document")) {
301 ctxt->myDoc = *((xmlDocPtr *) value);
302 } else if (!strcmp(name, "is well formed")) {
303 ctxt->wellFormed = *((int *) value);
304 } else if (!strcmp(name, "is valid")) {
305 ctxt->valid = *((int *) value);
306 } else if (!strcmp(name, "SAX block")) {
307 ctxt->sax = *((xmlSAXHandlerPtr *) value);
308 } else if (!strcmp(name, "SAX function internalSubset")) {
309 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
310 } else if (!strcmp(name, "SAX function isStandalone")) {
311 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
312 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
313 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
314 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
315 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function resolveEntity")) {
317 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
318 } else if (!strcmp(name, "SAX function getEntity")) {
319 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
320 } else if (!strcmp(name, "SAX function entityDecl")) {
321 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function notationDecl")) {
323 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function attributeDecl")) {
325 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function elementDecl")) {
327 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
329 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
331 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function startDocument")) {
333 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function endDocument")) {
335 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function startElement")) {
337 ctxt->sax->startElement = *((startElementSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function endElement")) {
339 ctxt->sax->endElement = *((endElementSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function reference")) {
341 ctxt->sax->reference = *((referenceSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function characters")) {
343 ctxt->sax->characters = *((charactersSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
345 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function processingInstruction")) {
347 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function comment")) {
349 ctxt->sax->comment = *((commentSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function warning")) {
351 ctxt->sax->warning = *((warningSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function error")) {
353 ctxt->sax->error = *((errorSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function fatalError")) {
355 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function getParameterEntity")) {
357 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
358 } else if (!strcmp(name, "SAX function cdataBlock")) {
359 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function externalSubset")) {
361 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
362 } else {
363 return(-1);
364 }
365 return(0);
366}
367
368/************************************************************************
369 * *
370 * Some functions to avoid too large macros *
371 * *
372 ************************************************************************/
373
374/**
375 * xmlIsChar:
376 * @c: an unicode character (int)
377 *
378 * Check whether the character is allowed by the production
379 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
380 * | [#x10000-#x10FFFF]
381 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
382 * Also available as a macro IS_CHAR()
383 *
384 * Returns 0 if not, non-zero otherwise
385 */
386int
387xmlIsChar(int c) {
388 return(
389 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
390 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
391 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
392 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
393}
394
395/**
396 * xmlIsBlank:
397 * @c: an unicode character (int)
398 *
399 * Check whether the character is allowed by the production
400 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
401 * Also available as a macro IS_BLANK()
402 *
403 * Returns 0 if not, non-zero otherwise
404 */
405int
406xmlIsBlank(int c) {
407 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
408}
409
410/**
411 * xmlIsBaseChar:
412 * @c: an unicode character (int)
413 *
414 * Check whether the character is allowed by the production
415 * [85] BaseChar ::= ... long list see REC ...
416 *
417 * VI is your friend !
418 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
419 * and
420 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
421 *
422 * Returns 0 if not, non-zero otherwise
423 */
424static int xmlBaseArray[] = {
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
429 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
431 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
438 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
440 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
441};
442
443int
444xmlIsBaseChar(int c) {
445 return(
446 (((c) < 0x0100) ? xmlBaseArray[c] :
447 ( /* accelerator */
448 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
449 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
450 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
451 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
452 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
453 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
454 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
455 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
456 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
457 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
458 ((c) == 0x0386) ||
459 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
460 ((c) == 0x038C) ||
461 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
462 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
463 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
464 ((c) == 0x03DA) ||
465 ((c) == 0x03DC) ||
466 ((c) == 0x03DE) ||
467 ((c) == 0x03E0) ||
468 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
469 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
470 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
471 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
472 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
473 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
474 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
475 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
476 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
477 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
478 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
479 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
480 ((c) == 0x0559) ||
481 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
482 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
483 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
484 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
485 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
486 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
487 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
488 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
489 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
490 ((c) == 0x06D5) ||
491 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
492 (((c) >= 0x905) && ( /* accelerator */
493 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
494 ((c) == 0x093D) ||
495 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
496 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
497 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
498 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
499 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
500 ((c) == 0x09B2) ||
501 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
502 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
503 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
504 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
505 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
506 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
507 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
508 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
509 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
510 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
511 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
512 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
513 ((c) == 0x0A5E) ||
514 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
515 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
516 ((c) == 0x0A8D) ||
517 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
518 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
519 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
520 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
521 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
522 ((c) == 0x0ABD) ||
523 ((c) == 0x0AE0) ||
524 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
525 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
526 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
527 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
528 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
529 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
530 ((c) == 0x0B3D) ||
531 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
532 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
533 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
534 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
535 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
536 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
537 ((c) == 0x0B9C) ||
538 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
539 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
540 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
541 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
542 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
543 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
544 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
545 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
546 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
547 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
548 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
549 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
550 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
551 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
552 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
553 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
554 ((c) == 0x0CDE) ||
555 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
556 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
557 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
558 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
559 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
560 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
561 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
562 ((c) == 0x0E30) ||
563 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
564 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
565 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
566 ((c) == 0x0E84) ||
567 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
568 ((c) == 0x0E8A) ||
569 ((c) == 0x0E8D) ||
570 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
571 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
572 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
573 ((c) == 0x0EA5) ||
574 ((c) == 0x0EA7) ||
575 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
576 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
577 ((c) == 0x0EB0) ||
578 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
579 ((c) == 0x0EBD) ||
580 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
581 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
582 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
583 (((c) >= 0x10A0) && ( /* accelerator */
584 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
585 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
586 ((c) == 0x1100) ||
587 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
588 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
589 ((c) == 0x1109) ||
590 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
591 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
592 ((c) == 0x113C) ||
593 ((c) == 0x113E) ||
594 ((c) == 0x1140) ||
595 ((c) == 0x114C) ||
596 ((c) == 0x114E) ||
597 ((c) == 0x1150) ||
598 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
599 ((c) == 0x1159) ||
600 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
601 ((c) == 0x1163) ||
602 ((c) == 0x1165) ||
603 ((c) == 0x1167) ||
604 ((c) == 0x1169) ||
605 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
606 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
607 ((c) == 0x1175) ||
608 ((c) == 0x119E) ||
609 ((c) == 0x11A8) ||
610 ((c) == 0x11AB) ||
611 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
612 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
613 ((c) == 0x11BA) ||
614 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
615 ((c) == 0x11EB) ||
616 ((c) == 0x11F0) ||
617 ((c) == 0x11F9) ||
618 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
619 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
620 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
621 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
622 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
623 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
624 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
625 ((c) == 0x1F59) ||
626 ((c) == 0x1F5B) ||
627 ((c) == 0x1F5D) ||
628 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
629 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
630 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
631 ((c) == 0x1FBE) ||
632 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
633 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
634 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
635 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
636 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
637 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
638 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
639 ((c) == 0x2126) ||
640 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
641 ((c) == 0x212E) ||
642 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
643 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
644 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
645 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
646 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
647}
648
649/**
650 * xmlIsDigit:
651 * @c: an unicode character (int)
652 *
653 * Check whether the character is allowed by the production
654 * [88] Digit ::= ... long list see REC ...
655 *
656 * Returns 0 if not, non-zero otherwise
657 */
658int
659xmlIsDigit(int c) {
660 return(
661 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
662 (((c) >= 0x660) && ( /* accelerator */
663 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
664 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
665 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
666 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
667 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
668 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
669 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
670 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
671 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
672 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
673 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
674 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
675 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
676 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
677}
678
679/**
680 * xmlIsCombining:
681 * @c: an unicode character (int)
682 *
683 * Check whether the character is allowed by the production
684 * [87] CombiningChar ::= ... long list see REC ...
685 *
686 * Returns 0 if not, non-zero otherwise
687 */
688int
689xmlIsCombining(int c) {
690 return(
691 (((c) >= 0x300) && ( /* accelerator */
692 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
693 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
694 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
695 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
696 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
697 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
698 ((c) == 0x05BF) ||
699 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
700 ((c) == 0x05C4) ||
701 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
702 ((c) == 0x0670) ||
703 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
704 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
705 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
706 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
707 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
708 (((c) >= 0x0901) && ( /* accelerator */
709 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
710 ((c) == 0x093C) ||
711 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
712 ((c) == 0x094D) ||
713 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
714 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
715 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
716 ((c) == 0x09BC) ||
717 ((c) == 0x09BE) ||
718 ((c) == 0x09BF) ||
719 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
720 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
721 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
722 ((c) == 0x09D7) ||
723 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
724 (((c) >= 0x0A02) && ( /* accelerator */
725 ((c) == 0x0A02) ||
726 ((c) == 0x0A3C) ||
727 ((c) == 0x0A3E) ||
728 ((c) == 0x0A3F) ||
729 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
730 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
731 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
732 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
733 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
734 ((c) == 0x0ABC) ||
735 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
736 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
737 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
738 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
739 ((c) == 0x0B3C) ||
740 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
741 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
742 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
743 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
744 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
745 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
746 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
747 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
748 ((c) == 0x0BD7) ||
749 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
750 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
751 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
752 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
753 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
754 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
755 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
756 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
757 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
758 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
759 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
760 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
761 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
762 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
763 ((c) == 0x0D57) ||
764 (((c) >= 0x0E31) && ( /* accelerator */
765 ((c) == 0x0E31) ||
766 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
767 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
768 ((c) == 0x0EB1) ||
769 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
770 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
771 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
772 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
773 ((c) == 0x0F35) ||
774 ((c) == 0x0F37) ||
775 ((c) == 0x0F39) ||
776 ((c) == 0x0F3E) ||
777 ((c) == 0x0F3F) ||
778 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
779 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
780 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
781 ((c) == 0x0F97) ||
782 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
783 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
784 ((c) == 0x0FB9) ||
785 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
786 ((c) == 0x20E1) ||
787 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
788 ((c) == 0x3099) ||
789 ((c) == 0x309A))))))))));
790}
791
792/**
793 * xmlIsExtender:
794 * @c: an unicode character (int)
795 *
796 * Check whether the character is allowed by the production
797 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
798 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
799 * [#x309D-#x309E] | [#x30FC-#x30FE]
800 *
801 * Returns 0 if not, non-zero otherwise
802 */
803int
804xmlIsExtender(int c) {
805 switch (c) {
806 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
807 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
808 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
809 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
810 case 0x30FE:
811 return 1;
812 default:
813 return 0;
814 }
815}
816
817/**
818 * xmlIsIdeographic:
819 * @c: an unicode character (int)
820 *
821 * Check whether the character is allowed by the production
822 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
823 *
824 * Returns 0 if not, non-zero otherwise
825 */
826int
827xmlIsIdeographic(int c) {
828 return(((c) < 0x0100) ? 0 :
829 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
830 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
831 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
832 ((c) == 0x3007));
833}
834
835/**
836 * xmlIsLetter:
837 * @c: an unicode character (int)
838 *
839 * Check whether the character is allowed by the production
840 * [84] Letter ::= BaseChar | Ideographic
841 *
842 * Returns 0 if not, non-zero otherwise
843 */
844int
845xmlIsLetter(int c) {
846 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
847}
848
849/**
850 * xmlIsPubidChar:
851 * @c: an unicode character (int)
852 *
853 * Check whether the character is allowed by the production
854 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
855 *
856 * Returns 0 if not, non-zero otherwise
857 */
858int
859xmlIsPubidChar(int c) {
860 return(
861 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
862 (((c) >= 'a') && ((c) <= 'z')) ||
863 (((c) >= 'A') && ((c) <= 'Z')) ||
864 (((c) >= '0') && ((c) <= '9')) ||
865 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
866 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
867 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
868 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
869 ((c) == '$') || ((c) == '_') || ((c) == '%'));
870}
871
872/************************************************************************
873 * *
874 * Input handling functions for progressive parsing *
875 * *
876 ************************************************************************/
877
878/* #define DEBUG_INPUT */
879/* #define DEBUG_STACK */
880/* #define DEBUG_PUSH */
881
882
883/* we need to keep enough input to show errors in context */
884#define LINE_LEN 80
885
886#ifdef DEBUG_INPUT
887#define CHECK_BUFFER(in) check_buffer(in)
888
889void check_buffer(xmlParserInputPtr in) {
890 if (in->base != in->buf->buffer->content) {
891 xmlGenericError(xmlGenericErrorContext,
892 "xmlParserInput: base mismatch problem\n");
893 }
894 if (in->cur < in->base) {
895 xmlGenericError(xmlGenericErrorContext,
896 "xmlParserInput: cur < base problem\n");
897 }
898 if (in->cur > in->base + in->buf->buffer->use) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: cur > base + use problem\n");
901 }
902 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
903 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
904 in->buf->buffer->use, in->buf->buffer->size);
905}
906
907#else
908#define CHECK_BUFFER(in)
909#endif
910
911
912/**
913 * xmlParserInputRead:
914 * @in: an XML parser input
915 * @len: an indicative size for the lookahead
916 *
917 * This function refresh the input for the parser. It doesn't try to
918 * preserve pointers to the input buffer, and discard already read data
919 *
920 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
921 * end of this entity
922 */
923int
924xmlParserInputRead(xmlParserInputPtr in, int len) {
925 int ret;
926 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000927 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000928
929#ifdef DEBUG_INPUT
930 xmlGenericError(xmlGenericErrorContext, "Read\n");
931#endif
932 if (in->buf == NULL) return(-1);
933 if (in->base == NULL) return(-1);
934 if (in->cur == NULL) return(-1);
935 if (in->buf->buffer == NULL) return(-1);
936 if (in->buf->readcallback == NULL) return(-1);
937
938 CHECK_BUFFER(in);
939
940 used = in->cur - in->buf->buffer->content;
941 ret = xmlBufferShrink(in->buf->buffer, used);
942 if (ret > 0) {
943 in->cur -= ret;
944 in->consumed += ret;
945 }
946 ret = xmlParserInputBufferRead(in->buf, len);
947 if (in->base != in->buf->buffer->content) {
948 /*
949 * the buffer has been realloced
950 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000951 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000952 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000953 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000954 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000955 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000956
957 CHECK_BUFFER(in);
958
959 return(ret);
960}
961
962/**
963 * xmlParserInputGrow:
964 * @in: an XML parser input
965 * @len: an indicative size for the lookahead
966 *
967 * This function increase the input for the parser. It tries to
968 * preserve pointers to the input buffer, and keep already read data
969 *
970 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
971 * end of this entity
972 */
973int
974xmlParserInputGrow(xmlParserInputPtr in, int len) {
975 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000976 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000977
978#ifdef DEBUG_INPUT
979 xmlGenericError(xmlGenericErrorContext, "Grow\n");
980#endif
981 if (in->buf == NULL) return(-1);
982 if (in->base == NULL) return(-1);
983 if (in->cur == NULL) return(-1);
984 if (in->buf->buffer == NULL) return(-1);
985
986 CHECK_BUFFER(in);
987
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000988 indx = in->cur - in->base;
989 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000990
991 CHECK_BUFFER(in);
992
993 return(0);
994 }
995 if (in->buf->readcallback != NULL)
996 ret = xmlParserInputBufferGrow(in->buf, len);
997 else
998 return(0);
999
1000 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001001 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001002 * block, but we use it really as an integer to do some
1003 * pointer arithmetic. Insure will raise it as a bug but in
1004 * that specific case, that's not !
1005 */
1006 if (in->base != in->buf->buffer->content) {
1007 /*
1008 * the buffer has been realloced
1009 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001010 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001011 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001012 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001014 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001015
1016 CHECK_BUFFER(in);
1017
1018 return(ret);
1019}
1020
1021/**
1022 * xmlParserInputShrink:
1023 * @in: an XML parser input
1024 *
1025 * This function removes used input for the parser.
1026 */
1027void
1028xmlParserInputShrink(xmlParserInputPtr in) {
1029 int used;
1030 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001031 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001032
1033#ifdef DEBUG_INPUT
1034 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1035#endif
1036 if (in->buf == NULL) return;
1037 if (in->base == NULL) return;
1038 if (in->cur == NULL) return;
1039 if (in->buf->buffer == NULL) return;
1040
1041 CHECK_BUFFER(in);
1042
1043 used = in->cur - in->buf->buffer->content;
1044 /*
1045 * Do not shrink on large buffers whose only a tiny fraction
1046 * was consumned
1047 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001048 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001049 return;
1050 if (used > INPUT_CHUNK) {
1051 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1052 if (ret > 0) {
1053 in->cur -= ret;
1054 in->consumed += ret;
1055 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001056 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001057 }
1058
1059 CHECK_BUFFER(in);
1060
1061 if (in->buf->buffer->use > INPUT_CHUNK) {
1062 return;
1063 }
1064 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1065 if (in->base != in->buf->buffer->content) {
1066 /*
1067 * the buffer has been realloced
1068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001071 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001072 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001073 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001074
1075 CHECK_BUFFER(in);
1076}
1077
1078/************************************************************************
1079 * *
1080 * UTF8 character input and related functions *
1081 * *
1082 ************************************************************************/
1083
1084/**
1085 * xmlNextChar:
1086 * @ctxt: the XML parser context
1087 *
1088 * Skip to the next char input char.
1089 */
1090
1091void
1092xmlNextChar(xmlParserCtxtPtr ctxt) {
1093 if (ctxt->instate == XML_PARSER_EOF)
1094 return;
1095
1096 /*
1097 * 2.11 End-of-Line Handling
1098 * the literal two-character sequence "#xD#xA" or a standalone
1099 * literal #xD, an XML processor must pass to the application
1100 * the single character #xA.
1101 */
1102 if (ctxt->token != 0) ctxt->token = 0;
1103 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1104 if ((*ctxt->input->cur == 0) &&
1105 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1106 (ctxt->instate != XML_PARSER_COMMENT)) {
1107 /*
1108 * If we are at the end of the current entity and
1109 * the context allows it, we pop consumed entities
1110 * automatically.
1111 * the auto closing should be blocked in other cases
1112 */
1113 xmlPopInput(ctxt);
1114 } else {
1115 if (*(ctxt->input->cur) == '\n') {
1116 ctxt->input->line++; ctxt->input->col = 1;
1117 } else ctxt->input->col++;
1118 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1119 /*
1120 * We are supposed to handle UTF8, check it's valid
1121 * From rfc2044: encoding of the Unicode values on UTF-8:
1122 *
1123 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1124 * 0000 0000-0000 007F 0xxxxxxx
1125 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1126 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1127 *
1128 * Check for the 0x110000 limit too
1129 */
1130 const unsigned char *cur = ctxt->input->cur;
1131 unsigned char c;
1132
1133 c = *cur;
1134 if (c & 0x80) {
1135 if (cur[1] == 0)
1136 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1137 if ((cur[1] & 0xc0) != 0x80)
1138 goto encoding_error;
1139 if ((c & 0xe0) == 0xe0) {
1140 unsigned int val;
1141
1142 if (cur[2] == 0)
1143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1144 if ((cur[2] & 0xc0) != 0x80)
1145 goto encoding_error;
1146 if ((c & 0xf0) == 0xf0) {
1147 if (cur[3] == 0)
1148 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1149 if (((c & 0xf8) != 0xf0) ||
1150 ((cur[3] & 0xc0) != 0x80))
1151 goto encoding_error;
1152 /* 4-byte code */
1153 ctxt->input->cur += 4;
1154 val = (cur[0] & 0x7) << 18;
1155 val |= (cur[1] & 0x3f) << 12;
1156 val |= (cur[2] & 0x3f) << 6;
1157 val |= cur[3] & 0x3f;
1158 } else {
1159 /* 3-byte code */
1160 ctxt->input->cur += 3;
1161 val = (cur[0] & 0xf) << 12;
1162 val |= (cur[1] & 0x3f) << 6;
1163 val |= cur[2] & 0x3f;
1164 }
1165 if (((val > 0xd7ff) && (val < 0xe000)) ||
1166 ((val > 0xfffd) && (val < 0x10000)) ||
1167 (val >= 0x110000)) {
1168 if ((ctxt->sax != NULL) &&
1169 (ctxt->sax->error != NULL))
1170 ctxt->sax->error(ctxt->userData,
1171 "Char 0x%X out of allowed range\n", val);
1172 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1173 ctxt->wellFormed = 0;
1174 ctxt->disableSAX = 1;
1175 }
1176 } else
1177 /* 2-byte code */
1178 ctxt->input->cur += 2;
1179 } else
1180 /* 1-byte code */
1181 ctxt->input->cur++;
1182 } else {
1183 /*
1184 * Assume it's a fixed lenght encoding (1) with
1185 * a compatibke encoding for the ASCII set, since
1186 * XML constructs only use < 128 chars
1187 */
1188 ctxt->input->cur++;
1189 }
1190 ctxt->nbChars++;
1191 if (*ctxt->input->cur == 0)
1192 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1193 }
1194 } else {
1195 ctxt->input->cur++;
1196 ctxt->nbChars++;
1197 if (*ctxt->input->cur == 0)
1198 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1199 }
1200 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1201 xmlParserHandlePEReference(ctxt);
1202 if ((*ctxt->input->cur == 0) &&
1203 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1204 xmlPopInput(ctxt);
1205 return;
1206encoding_error:
1207 /*
1208 * If we detect an UTF8 error that probably mean that the
1209 * input encoding didn't get properly advertized in the
1210 * declaration header. Report the error and switch the encoding
1211 * to ISO-Latin-1 (if you don't like this policy, just declare the
1212 * encoding !)
1213 */
1214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1215 ctxt->sax->error(ctxt->userData,
1216 "Input is not proper UTF-8, indicate encoding !\n");
1217 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1218 ctxt->input->cur[0], ctxt->input->cur[1],
1219 ctxt->input->cur[2], ctxt->input->cur[3]);
1220 }
1221 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1222
1223 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1224 ctxt->input->cur++;
1225 return;
1226}
1227
1228/**
1229 * xmlCurrentChar:
1230 * @ctxt: the XML parser context
1231 * @len: pointer to the length of the char read
1232 *
1233 * The current char value, if using UTF-8 this may actaully span multiple
1234 * bytes in the input buffer. Implement the end of line normalization:
1235 * 2.11 End-of-Line Handling
1236 * Wherever an external parsed entity or the literal entity value
1237 * of an internal parsed entity contains either the literal two-character
1238 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1239 * must pass to the application the single character #xA.
1240 * This behavior can conveniently be produced by normalizing all
1241 * line breaks to #xA on input, before parsing.)
1242 *
1243 * Returns the current char value and its lenght
1244 */
1245
1246int
1247xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1248 if (ctxt->instate == XML_PARSER_EOF)
1249 return(0);
1250
1251 if (ctxt->token != 0) {
1252 *len = 0;
1253 return(ctxt->token);
1254 }
1255 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1256 *len = 1;
1257 return((int) *ctxt->input->cur);
1258 }
1259 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1260 /*
1261 * We are supposed to handle UTF8, check it's valid
1262 * From rfc2044: encoding of the Unicode values on UTF-8:
1263 *
1264 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1265 * 0000 0000-0000 007F 0xxxxxxx
1266 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1267 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1268 *
1269 * Check for the 0x110000 limit too
1270 */
1271 const unsigned char *cur = ctxt->input->cur;
1272 unsigned char c;
1273 unsigned int val;
1274
1275 c = *cur;
1276 if (c & 0x80) {
1277 if (cur[1] == 0)
1278 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1279 if ((cur[1] & 0xc0) != 0x80)
1280 goto encoding_error;
1281 if ((c & 0xe0) == 0xe0) {
1282
1283 if (cur[2] == 0)
1284 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1285 if ((cur[2] & 0xc0) != 0x80)
1286 goto encoding_error;
1287 if ((c & 0xf0) == 0xf0) {
1288 if (cur[3] == 0)
1289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1290 if (((c & 0xf8) != 0xf0) ||
1291 ((cur[3] & 0xc0) != 0x80))
1292 goto encoding_error;
1293 /* 4-byte code */
1294 *len = 4;
1295 val = (cur[0] & 0x7) << 18;
1296 val |= (cur[1] & 0x3f) << 12;
1297 val |= (cur[2] & 0x3f) << 6;
1298 val |= cur[3] & 0x3f;
1299 } else {
1300 /* 3-byte code */
1301 *len = 3;
1302 val = (cur[0] & 0xf) << 12;
1303 val |= (cur[1] & 0x3f) << 6;
1304 val |= cur[2] & 0x3f;
1305 }
1306 } else {
1307 /* 2-byte code */
1308 *len = 2;
1309 val = (cur[0] & 0x1f) << 6;
1310 val |= cur[1] & 0x3f;
1311 }
1312 if (!IS_CHAR(val)) {
1313 if ((ctxt->sax != NULL) &&
1314 (ctxt->sax->error != NULL))
1315 ctxt->sax->error(ctxt->userData,
1316 "Char 0x%X out of allowed range\n", val);
1317 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1318 ctxt->wellFormed = 0;
1319 ctxt->disableSAX = 1;
1320 }
1321 return(val);
1322 } else {
1323 /* 1-byte code */
1324 *len = 1;
1325 if (*ctxt->input->cur == 0xD) {
1326 if (ctxt->input->cur[1] == 0xA) {
1327 ctxt->nbChars++;
1328 ctxt->input->cur++;
1329 }
1330 return(0xA);
1331 }
1332 return((int) *ctxt->input->cur);
1333 }
1334 }
1335 /*
1336 * Assume it's a fixed lenght encoding (1) with
1337 * a compatibke encoding for the ASCII set, since
1338 * XML constructs only use < 128 chars
1339 */
1340 *len = 1;
1341 if (*ctxt->input->cur == 0xD) {
1342 if (ctxt->input->cur[1] == 0xA) {
1343 ctxt->nbChars++;
1344 ctxt->input->cur++;
1345 }
1346 return(0xA);
1347 }
1348 return((int) *ctxt->input->cur);
1349encoding_error:
1350 /*
1351 * If we detect an UTF8 error that probably mean that the
1352 * input encoding didn't get properly advertized in the
1353 * declaration header. Report the error and switch the encoding
1354 * to ISO-Latin-1 (if you don't like this policy, just declare the
1355 * encoding !)
1356 */
1357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1358 ctxt->sax->error(ctxt->userData,
1359 "Input is not proper UTF-8, indicate encoding !\n");
1360 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1361 ctxt->input->cur[0], ctxt->input->cur[1],
1362 ctxt->input->cur[2], ctxt->input->cur[3]);
1363 }
1364 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1365
1366 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1367 *len = 1;
1368 return((int) *ctxt->input->cur);
1369}
1370
1371/**
1372 * xmlStringCurrentChar:
1373 * @ctxt: the XML parser context
1374 * @cur: pointer to the beginning of the char
1375 * @len: pointer to the length of the char read
1376 *
1377 * The current char value, if using UTF-8 this may actaully span multiple
1378 * bytes in the input buffer.
1379 *
1380 * Returns the current char value and its lenght
1381 */
1382
1383int
1384xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001385 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001386 /*
1387 * We are supposed to handle UTF8, check it's valid
1388 * From rfc2044: encoding of the Unicode values on UTF-8:
1389 *
1390 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1391 * 0000 0000-0000 007F 0xxxxxxx
1392 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1393 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1394 *
1395 * Check for the 0x110000 limit too
1396 */
1397 unsigned char c;
1398 unsigned int val;
1399
1400 c = *cur;
1401 if (c & 0x80) {
1402 if ((cur[1] & 0xc0) != 0x80)
1403 goto encoding_error;
1404 if ((c & 0xe0) == 0xe0) {
1405
1406 if ((cur[2] & 0xc0) != 0x80)
1407 goto encoding_error;
1408 if ((c & 0xf0) == 0xf0) {
1409 if (((c & 0xf8) != 0xf0) ||
1410 ((cur[3] & 0xc0) != 0x80))
1411 goto encoding_error;
1412 /* 4-byte code */
1413 *len = 4;
1414 val = (cur[0] & 0x7) << 18;
1415 val |= (cur[1] & 0x3f) << 12;
1416 val |= (cur[2] & 0x3f) << 6;
1417 val |= cur[3] & 0x3f;
1418 } else {
1419 /* 3-byte code */
1420 *len = 3;
1421 val = (cur[0] & 0xf) << 12;
1422 val |= (cur[1] & 0x3f) << 6;
1423 val |= cur[2] & 0x3f;
1424 }
1425 } else {
1426 /* 2-byte code */
1427 *len = 2;
1428 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001429 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001430 }
1431 if (!IS_CHAR(val)) {
1432 if ((ctxt->sax != NULL) &&
1433 (ctxt->sax->error != NULL))
1434 ctxt->sax->error(ctxt->userData,
1435 "Char 0x%X out of allowed range\n", val);
1436 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1437 ctxt->wellFormed = 0;
1438 ctxt->disableSAX = 1;
1439 }
1440 return(val);
1441 } else {
1442 /* 1-byte code */
1443 *len = 1;
1444 return((int) *cur);
1445 }
1446 }
1447 /*
1448 * Assume it's a fixed lenght encoding (1) with
1449 * a compatibke encoding for the ASCII set, since
1450 * XML constructs only use < 128 chars
1451 */
1452 *len = 1;
1453 return((int) *cur);
1454encoding_error:
1455 /*
1456 * If we detect an UTF8 error that probably mean that the
1457 * input encoding didn't get properly advertized in the
1458 * declaration header. Report the error and switch the encoding
1459 * to ISO-Latin-1 (if you don't like this policy, just declare the
1460 * encoding !)
1461 */
1462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1463 ctxt->sax->error(ctxt->userData,
1464 "Input is not proper UTF-8, indicate encoding !\n");
1465 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1466 ctxt->input->cur[0], ctxt->input->cur[1],
1467 ctxt->input->cur[2], ctxt->input->cur[3]);
1468 }
1469 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1470
1471 *len = 1;
1472 return((int) *cur);
1473}
1474
1475/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001476 * xmlCopyCharMultiByte:
1477 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001478 * @val: the char value
1479 *
1480 * append the char value in the array
1481 *
1482 * Returns the number of xmlChar written
1483 */
Owen Taylor3473f882001-02-23 17:55:21 +00001484int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001485xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001486 /*
1487 * We are supposed to handle UTF8, check it's valid
1488 * From rfc2044: encoding of the Unicode values on UTF-8:
1489 *
1490 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1491 * 0000 0000-0000 007F 0xxxxxxx
1492 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1493 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495 if (val >= 0x80) {
1496 xmlChar *savedout = out;
1497 int bits;
1498 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1499 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1500 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1501 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001502 xmlGenericError(xmlGenericErrorContext,
1503 "Internal error, xmlCopyChar 0x%X out of bound\n",
1504 val);
1505 return(0);
1506 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507 for ( ; bits >= 0; bits-= 6)
1508 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1509 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001510 }
1511 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001512 return 1;
1513}
1514
1515/**
1516 * xmlCopyChar:
1517 * @len: Ignored, compatibility
1518 * @out: pointer to an arry of xmlChar
1519 * @val: the char value
1520 *
1521 * append the char value in the array
1522 *
1523 * Returns the number of xmlChar written
1524 */
1525
1526int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001527xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 /* the len parameter is ignored */
1529 if (val >= 0x80) {
1530 return(xmlCopyCharMultiByte (out, val));
1531 }
1532 *out = (xmlChar) val;
1533 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001534}
1535
1536/************************************************************************
1537 * *
1538 * Commodity functions to switch encodings *
1539 * *
1540 ************************************************************************/
1541
1542/**
1543 * xmlSwitchEncoding:
1544 * @ctxt: the parser context
1545 * @enc: the encoding value (number)
1546 *
1547 * change the input functions when discovering the character encoding
1548 * of a given entity.
1549 *
1550 * Returns 0 in case of success, -1 otherwise
1551 */
1552int
1553xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1554{
1555 xmlCharEncodingHandlerPtr handler;
1556
1557 switch (enc) {
1558 case XML_CHAR_ENCODING_ERROR:
1559 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1562 ctxt->wellFormed = 0;
1563 ctxt->disableSAX = 1;
1564 break;
1565 case XML_CHAR_ENCODING_NONE:
1566 /* let's assume it's UTF-8 without the XML decl */
1567 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1568 return(0);
1569 case XML_CHAR_ENCODING_UTF8:
1570 /* default encoding, no conversion should be needed */
1571 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001572
1573 /*
1574 * Errata on XML-1.0 June 20 2001
1575 * Specific handling of the Byte Order Mark for
1576 * UTF-8
1577 */
1578 if ((ctxt->input->cur[0] == 0xEF) &&
1579 (ctxt->input->cur[1] == 0xBB) &&
1580 (ctxt->input->cur[2] == 0xBF)) {
1581 ctxt->input->cur += 3;
1582 }
Owen Taylor3473f882001-02-23 17:55:21 +00001583 return(0);
1584 default:
1585 break;
1586 }
1587 handler = xmlGetCharEncodingHandler(enc);
1588 if (handler == NULL) {
1589 /*
1590 * Default handlers.
1591 */
1592 switch (enc) {
1593 case XML_CHAR_ENCODING_ERROR:
1594 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1596 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1597 ctxt->wellFormed = 0;
1598 ctxt->disableSAX = 1;
1599 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1600 break;
1601 case XML_CHAR_ENCODING_NONE:
1602 /* let's assume it's UTF-8 without the XML decl */
1603 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1604 return(0);
1605 case XML_CHAR_ENCODING_UTF8:
1606 case XML_CHAR_ENCODING_ASCII:
1607 /* default encoding, no conversion should be needed */
1608 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1609 return(0);
1610 case XML_CHAR_ENCODING_UTF16LE:
1611 break;
1612 case XML_CHAR_ENCODING_UTF16BE:
1613 break;
1614 case XML_CHAR_ENCODING_UCS4LE:
1615 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "char encoding USC4 little endian not supported\n");
1619 break;
1620 case XML_CHAR_ENCODING_UCS4BE:
1621 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1623 ctxt->sax->error(ctxt->userData,
1624 "char encoding USC4 big endian not supported\n");
1625 break;
1626 case XML_CHAR_ENCODING_EBCDIC:
1627 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1629 ctxt->sax->error(ctxt->userData,
1630 "char encoding EBCDIC not supported\n");
1631 break;
1632 case XML_CHAR_ENCODING_UCS4_2143:
1633 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1635 ctxt->sax->error(ctxt->userData,
1636 "char encoding UCS4 2143 not supported\n");
1637 break;
1638 case XML_CHAR_ENCODING_UCS4_3412:
1639 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1641 ctxt->sax->error(ctxt->userData,
1642 "char encoding UCS4 3412 not supported\n");
1643 break;
1644 case XML_CHAR_ENCODING_UCS2:
1645 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1647 ctxt->sax->error(ctxt->userData,
1648 "char encoding UCS2 not supported\n");
1649 break;
1650 case XML_CHAR_ENCODING_8859_1:
1651 case XML_CHAR_ENCODING_8859_2:
1652 case XML_CHAR_ENCODING_8859_3:
1653 case XML_CHAR_ENCODING_8859_4:
1654 case XML_CHAR_ENCODING_8859_5:
1655 case XML_CHAR_ENCODING_8859_6:
1656 case XML_CHAR_ENCODING_8859_7:
1657 case XML_CHAR_ENCODING_8859_8:
1658 case XML_CHAR_ENCODING_8859_9:
1659 /*
1660 * We used to keep the internal content in the
1661 * document encoding however this turns being unmaintainable
1662 * So xmlGetCharEncodingHandler() will return non-null
1663 * values for this now.
1664 */
1665 if ((ctxt->inputNr == 1) &&
1666 (ctxt->encoding == NULL) &&
1667 (ctxt->input->encoding != NULL)) {
1668 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1669 }
1670 ctxt->charset = enc;
1671 return(0);
1672 case XML_CHAR_ENCODING_2022_JP:
1673 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt->userData,
1676 "char encoding ISO-2022-JPnot supported\n");
1677 break;
1678 case XML_CHAR_ENCODING_SHIFT_JIS:
1679 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1681 ctxt->sax->error(ctxt->userData,
1682 "char encoding Shift_JIS not supported\n");
1683 break;
1684 case XML_CHAR_ENCODING_EUC_JP:
1685 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1687 ctxt->sax->error(ctxt->userData,
1688 "char encoding EUC-JPnot supported\n");
1689 break;
1690 }
1691 }
1692 if (handler == NULL)
1693 return(-1);
1694 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1695 return(xmlSwitchToEncoding(ctxt, handler));
1696}
1697
1698/**
1699 * xmlSwitchToEncoding:
1700 * @ctxt: the parser context
1701 * @handler: the encoding handler
1702 *
1703 * change the input functions when discovering the character encoding
1704 * of a given entity.
1705 *
1706 * Returns 0 in case of success, -1 otherwise
1707 */
1708int
1709xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1710{
1711 int nbchars;
1712
1713 if (handler != NULL) {
1714 if (ctxt->input != NULL) {
1715 if (ctxt->input->buf != NULL) {
1716 if (ctxt->input->buf->encoder != NULL) {
1717 if (ctxt->input->buf->encoder == handler)
1718 return(0);
1719 /*
1720 * Note: this is a bit dangerous, but that's what it
1721 * takes to use nearly compatible signature for different
1722 * encodings.
1723 */
1724 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1725 ctxt->input->buf->encoder = handler;
1726 return(0);
1727 }
1728 ctxt->input->buf->encoder = handler;
1729
1730 /*
1731 * Is there already some content down the pipe to convert ?
1732 */
1733 if ((ctxt->input->buf->buffer != NULL) &&
1734 (ctxt->input->buf->buffer->use > 0)) {
1735 int processed;
1736
1737 /*
1738 * Specific handling of the Byte Order Mark for
1739 * UTF-16
1740 */
1741 if ((handler->name != NULL) &&
1742 (!strcmp(handler->name, "UTF-16LE")) &&
1743 (ctxt->input->cur[0] == 0xFF) &&
1744 (ctxt->input->cur[1] == 0xFE)) {
1745 ctxt->input->cur += 2;
1746 }
1747 if ((handler->name != NULL) &&
1748 (!strcmp(handler->name, "UTF-16BE")) &&
1749 (ctxt->input->cur[0] == 0xFE) &&
1750 (ctxt->input->cur[1] == 0xFF)) {
1751 ctxt->input->cur += 2;
1752 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001753 /*
1754 * Errata on XML-1.0 June 20 2001
1755 * Specific handling of the Byte Order Mark for
1756 * UTF-8
1757 */
1758 if ((handler->name != NULL) &&
1759 (!strcmp(handler->name, "UTF-8")) &&
1760 (ctxt->input->cur[0] == 0xEF) &&
1761 (ctxt->input->cur[1] == 0xBB) &&
1762 (ctxt->input->cur[1] == 0xBF)) {
1763 ctxt->input->cur += 3;
1764 }
Owen Taylor3473f882001-02-23 17:55:21 +00001765
1766 /*
1767 * Shring the current input buffer.
1768 * Move it as the raw buffer and create a new input buffer
1769 */
1770 processed = ctxt->input->cur - ctxt->input->base;
1771 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1772 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1773 ctxt->input->buf->buffer = xmlBufferCreate();
1774
1775 if (ctxt->html) {
1776 /*
1777 * converst as much as possbile of the buffer
1778 */
1779 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1780 ctxt->input->buf->buffer,
1781 ctxt->input->buf->raw);
1782 } else {
1783 /*
1784 * convert just enough to get
1785 * '<?xml version="1.0" encoding="xxx"?>'
1786 * parsed with the autodetected encoding
1787 * into the parser reading buffer.
1788 */
1789 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1790 ctxt->input->buf->buffer,
1791 ctxt->input->buf->raw);
1792 }
1793 if (nbchars < 0) {
1794 xmlGenericError(xmlGenericErrorContext,
1795 "xmlSwitchToEncoding: encoder error\n");
1796 return(-1);
1797 }
1798 ctxt->input->base =
1799 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001800 ctxt->input->end =
1801 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001802
1803 }
1804 return(0);
1805 } else {
1806 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1807 /*
1808 * When parsing a static memory array one must know the
1809 * size to be able to convert the buffer.
1810 */
1811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1812 ctxt->sax->error(ctxt->userData,
1813 "xmlSwitchEncoding : no input\n");
1814 return(-1);
1815 } else {
1816 int processed;
1817
1818 /*
1819 * Shring the current input buffer.
1820 * Move it as the raw buffer and create a new input buffer
1821 */
1822 processed = ctxt->input->cur - ctxt->input->base;
1823
1824 ctxt->input->buf->raw = xmlBufferCreate();
1825 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1826 ctxt->input->length - processed);
1827 ctxt->input->buf->buffer = xmlBufferCreate();
1828
1829 /*
1830 * convert as much as possible of the raw input
1831 * to the parser reading buffer.
1832 */
1833 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1834 ctxt->input->buf->buffer,
1835 ctxt->input->buf->raw);
1836 if (nbchars < 0) {
1837 xmlGenericError(xmlGenericErrorContext,
1838 "xmlSwitchToEncoding: encoder error\n");
1839 return(-1);
1840 }
1841
1842 /*
1843 * Conversion succeeded, get rid of the old buffer
1844 */
1845 if ((ctxt->input->free != NULL) &&
1846 (ctxt->input->base != NULL))
1847 ctxt->input->free((xmlChar *) ctxt->input->base);
1848 ctxt->input->base =
1849 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001850 ctxt->input->end =
1851 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001852 }
1853 }
1854 } else {
1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1856 ctxt->sax->error(ctxt->userData,
1857 "xmlSwitchEncoding : no input\n");
1858 return(-1);
1859 }
1860 /*
1861 * The parsing is now done in UTF8 natively
1862 */
1863 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1864 } else
1865 return(-1);
1866 return(0);
1867
1868}
1869
1870/************************************************************************
1871 * *
1872 * Commodity functions to handle entities processing *
1873 * *
1874 ************************************************************************/
1875
1876/**
1877 * xmlFreeInputStream:
1878 * @input: an xmlParserInputPtr
1879 *
1880 * Free up an input stream.
1881 */
1882void
1883xmlFreeInputStream(xmlParserInputPtr input) {
1884 if (input == NULL) return;
1885
1886 if (input->filename != NULL) xmlFree((char *) input->filename);
1887 if (input->directory != NULL) xmlFree((char *) input->directory);
1888 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1889 if (input->version != NULL) xmlFree((char *) input->version);
1890 if ((input->free != NULL) && (input->base != NULL))
1891 input->free((xmlChar *) input->base);
1892 if (input->buf != NULL)
1893 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001894 xmlFree(input);
1895}
1896
1897/**
1898 * xmlNewInputStream:
1899 * @ctxt: an XML parser context
1900 *
1901 * Create a new input stream structure
1902 * Returns the new input stream or NULL
1903 */
1904xmlParserInputPtr
1905xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1906 xmlParserInputPtr input;
1907
1908 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1909 if (input == NULL) {
1910 if (ctxt != NULL) {
1911 ctxt->errNo = XML_ERR_NO_MEMORY;
1912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1913 ctxt->sax->error(ctxt->userData,
1914 "malloc: couldn't allocate a new input stream\n");
1915 ctxt->errNo = XML_ERR_NO_MEMORY;
1916 }
1917 return(NULL);
1918 }
1919 memset(input, 0, sizeof(xmlParserInput));
1920 input->line = 1;
1921 input->col = 1;
1922 input->standalone = -1;
1923 return(input);
1924}
1925
1926/**
1927 * xmlNewIOInputStream:
1928 * @ctxt: an XML parser context
1929 * @input: an I/O Input
1930 * @enc: the charset encoding if known
1931 *
1932 * Create a new input stream structure encapsulating the @input into
1933 * a stream suitable for the parser.
1934 *
1935 * Returns the new input stream or NULL
1936 */
1937xmlParserInputPtr
1938xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1939 xmlCharEncoding enc) {
1940 xmlParserInputPtr inputStream;
1941
1942 if (xmlParserDebugEntities)
1943 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1944 inputStream = xmlNewInputStream(ctxt);
1945 if (inputStream == NULL) {
1946 return(NULL);
1947 }
1948 inputStream->filename = NULL;
1949 inputStream->buf = input;
1950 inputStream->base = inputStream->buf->buffer->content;
1951 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001952 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001953 if (enc != XML_CHAR_ENCODING_NONE) {
1954 xmlSwitchEncoding(ctxt, enc);
1955 }
1956
1957 return(inputStream);
1958}
1959
1960/**
1961 * xmlNewEntityInputStream:
1962 * @ctxt: an XML parser context
1963 * @entity: an Entity pointer
1964 *
1965 * Create a new input stream based on an xmlEntityPtr
1966 *
1967 * Returns the new input stream or NULL
1968 */
1969xmlParserInputPtr
1970xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1971 xmlParserInputPtr input;
1972
1973 if (entity == NULL) {
1974 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1976 ctxt->sax->error(ctxt->userData,
1977 "internal: xmlNewEntityInputStream entity = NULL\n");
1978 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1979 return(NULL);
1980 }
1981 if (xmlParserDebugEntities)
1982 xmlGenericError(xmlGenericErrorContext,
1983 "new input from entity: %s\n", entity->name);
1984 if (entity->content == NULL) {
1985 switch (entity->etype) {
1986 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1987 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1989 ctxt->sax->error(ctxt->userData,
1990 "xmlNewEntityInputStream unparsed entity !\n");
1991 break;
1992 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1993 case XML_EXTERNAL_PARAMETER_ENTITY:
1994 return(xmlLoadExternalEntity((char *) entity->URI,
1995 (char *) entity->ExternalID, ctxt));
1996 case XML_INTERNAL_GENERAL_ENTITY:
1997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1998 ctxt->sax->error(ctxt->userData,
1999 "Internal entity %s without content !\n", entity->name);
2000 break;
2001 case XML_INTERNAL_PARAMETER_ENTITY:
2002 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2004 ctxt->sax->error(ctxt->userData,
2005 "Internal parameter entity %s without content !\n", entity->name);
2006 break;
2007 case XML_INTERNAL_PREDEFINED_ENTITY:
2008 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2010 ctxt->sax->error(ctxt->userData,
2011 "Predefined entity %s without content !\n", entity->name);
2012 break;
2013 }
2014 return(NULL);
2015 }
2016 input = xmlNewInputStream(ctxt);
2017 if (input == NULL) {
2018 return(NULL);
2019 }
2020 input->filename = (char *) entity->URI;
2021 input->base = entity->content;
2022 input->cur = entity->content;
2023 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002024 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002025 return(input);
2026}
2027
2028/**
2029 * xmlNewStringInputStream:
2030 * @ctxt: an XML parser context
2031 * @buffer: an memory buffer
2032 *
2033 * Create a new input stream based on a memory buffer.
2034 * Returns the new input stream
2035 */
2036xmlParserInputPtr
2037xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2038 xmlParserInputPtr input;
2039
2040 if (buffer == NULL) {
2041 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2043 ctxt->sax->error(ctxt->userData,
2044 "internal: xmlNewStringInputStream string = NULL\n");
2045 return(NULL);
2046 }
2047 if (xmlParserDebugEntities)
2048 xmlGenericError(xmlGenericErrorContext,
2049 "new fixed input: %.30s\n", buffer);
2050 input = xmlNewInputStream(ctxt);
2051 if (input == NULL) {
2052 return(NULL);
2053 }
2054 input->base = buffer;
2055 input->cur = buffer;
2056 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002057 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002058 return(input);
2059}
2060
2061/**
2062 * xmlNewInputFromFile:
2063 * @ctxt: an XML parser context
2064 * @filename: the filename to use as entity
2065 *
2066 * Create a new input stream based on a file.
2067 *
2068 * Returns the new input stream or NULL in case of error
2069 */
2070xmlParserInputPtr
2071xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2072 xmlParserInputBufferPtr buf;
2073 xmlParserInputPtr inputStream;
2074 char *directory = NULL;
2075 xmlChar *URI = NULL;
2076
2077 if (xmlParserDebugEntities)
2078 xmlGenericError(xmlGenericErrorContext,
2079 "new input from file: %s\n", filename);
2080 if (ctxt == NULL) return(NULL);
2081 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2082 if (buf == NULL)
2083 return(NULL);
2084
2085 URI = xmlStrdup((xmlChar *) filename);
2086 directory = xmlParserGetDirectory((const char *) URI);
2087
2088 inputStream = xmlNewInputStream(ctxt);
2089 if (inputStream == NULL) {
2090 if (directory != NULL) xmlFree((char *) directory);
2091 if (URI != NULL) xmlFree((char *) URI);
2092 return(NULL);
2093 }
2094
2095 inputStream->filename = (const char *) URI;
2096 inputStream->directory = directory;
2097 inputStream->buf = buf;
2098
2099 inputStream->base = inputStream->buf->buffer->content;
2100 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002101 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002102 if ((ctxt->directory == NULL) && (directory != NULL))
2103 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2104 return(inputStream);
2105}
2106
2107/************************************************************************
2108 * *
2109 * Commodity functions to handle parser contexts *
2110 * *
2111 ************************************************************************/
2112
2113/**
2114 * xmlInitParserCtxt:
2115 * @ctxt: an XML parser context
2116 *
2117 * Initialize a parser context
2118 */
2119
2120void
2121xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2122{
2123 xmlSAXHandler *sax;
2124
2125 xmlDefaultSAXHandlerInit();
2126
2127 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2128 if (sax == NULL) {
2129 xmlGenericError(xmlGenericErrorContext,
2130 "xmlInitParserCtxt: out of memory\n");
2131 }
2132 else
2133 memset(sax, 0, sizeof(xmlSAXHandler));
2134
2135 /* Allocate the Input stack */
2136 ctxt->inputTab = (xmlParserInputPtr *)
2137 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2138 if (ctxt->inputTab == NULL) {
2139 xmlGenericError(xmlGenericErrorContext,
2140 "xmlInitParserCtxt: out of memory\n");
2141 ctxt->inputNr = 0;
2142 ctxt->inputMax = 0;
2143 ctxt->input = NULL;
2144 return;
2145 }
2146 ctxt->inputNr = 0;
2147 ctxt->inputMax = 5;
2148 ctxt->input = NULL;
2149
2150 ctxt->version = NULL;
2151 ctxt->encoding = NULL;
2152 ctxt->standalone = -1;
2153 ctxt->hasExternalSubset = 0;
2154 ctxt->hasPErefs = 0;
2155 ctxt->html = 0;
2156 ctxt->external = 0;
2157 ctxt->instate = XML_PARSER_START;
2158 ctxt->token = 0;
2159 ctxt->directory = NULL;
2160
2161 /* Allocate the Node stack */
2162 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2163 if (ctxt->nodeTab == NULL) {
2164 xmlGenericError(xmlGenericErrorContext,
2165 "xmlInitParserCtxt: out of memory\n");
2166 ctxt->nodeNr = 0;
2167 ctxt->nodeMax = 0;
2168 ctxt->node = NULL;
2169 ctxt->inputNr = 0;
2170 ctxt->inputMax = 0;
2171 ctxt->input = NULL;
2172 return;
2173 }
2174 ctxt->nodeNr = 0;
2175 ctxt->nodeMax = 10;
2176 ctxt->node = NULL;
2177
2178 /* Allocate the Name stack */
2179 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2180 if (ctxt->nameTab == NULL) {
2181 xmlGenericError(xmlGenericErrorContext,
2182 "xmlInitParserCtxt: out of memory\n");
2183 ctxt->nodeNr = 0;
2184 ctxt->nodeMax = 0;
2185 ctxt->node = NULL;
2186 ctxt->inputNr = 0;
2187 ctxt->inputMax = 0;
2188 ctxt->input = NULL;
2189 ctxt->nameNr = 0;
2190 ctxt->nameMax = 0;
2191 ctxt->name = NULL;
2192 return;
2193 }
2194 ctxt->nameNr = 0;
2195 ctxt->nameMax = 10;
2196 ctxt->name = NULL;
2197
2198 /* Allocate the space stack */
2199 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2200 if (ctxt->spaceTab == NULL) {
2201 xmlGenericError(xmlGenericErrorContext,
2202 "xmlInitParserCtxt: out of memory\n");
2203 ctxt->nodeNr = 0;
2204 ctxt->nodeMax = 0;
2205 ctxt->node = NULL;
2206 ctxt->inputNr = 0;
2207 ctxt->inputMax = 0;
2208 ctxt->input = NULL;
2209 ctxt->nameNr = 0;
2210 ctxt->nameMax = 0;
2211 ctxt->name = NULL;
2212 ctxt->spaceNr = 0;
2213 ctxt->spaceMax = 0;
2214 ctxt->space = NULL;
2215 return;
2216 }
2217 ctxt->spaceNr = 1;
2218 ctxt->spaceMax = 10;
2219 ctxt->spaceTab[0] = -1;
2220 ctxt->space = &ctxt->spaceTab[0];
2221
Daniel Veillard14be0a12001-03-03 18:50:55 +00002222 ctxt->sax = sax;
2223 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2224
Owen Taylor3473f882001-02-23 17:55:21 +00002225 ctxt->userData = ctxt;
2226 ctxt->myDoc = NULL;
2227 ctxt->wellFormed = 1;
2228 ctxt->valid = 1;
2229 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2230 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2231 ctxt->pedantic = xmlPedanticParserDefaultValue;
2232 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2233 ctxt->vctxt.userData = ctxt;
2234 if (ctxt->validate) {
2235 ctxt->vctxt.error = xmlParserValidityError;
2236 if (xmlGetWarningsDefaultValue == 0)
2237 ctxt->vctxt.warning = NULL;
2238 else
2239 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002240 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002241 } else {
2242 ctxt->vctxt.error = NULL;
2243 ctxt->vctxt.warning = NULL;
2244 }
2245 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2246 ctxt->record_info = 0;
2247 ctxt->nbChars = 0;
2248 ctxt->checkIndex = 0;
2249 ctxt->inSubset = 0;
2250 ctxt->errNo = XML_ERR_OK;
2251 ctxt->depth = 0;
2252 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2253 xmlInitNodeInfoSeq(&ctxt->node_seq);
2254}
2255
2256/**
2257 * xmlFreeParserCtxt:
2258 * @ctxt: an XML parser context
2259 *
2260 * Free all the memory used by a parser context. However the parsed
2261 * document in ctxt->myDoc is not freed.
2262 */
2263
2264void
2265xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2266{
2267 xmlParserInputPtr input;
2268 xmlChar *oldname;
2269
2270 if (ctxt == NULL) return;
2271
2272 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2273 xmlFreeInputStream(input);
2274 }
2275 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2276 xmlFree(oldname);
2277 }
2278 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2279 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2280 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2281 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2282 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2283 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2284 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2285 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2286 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002287 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2288 xmlFree(ctxt->sax);
2289 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002290 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 xmlFree(ctxt);
2292}
2293
2294/**
2295 * xmlNewParserCtxt:
2296 *
2297 * Allocate and initialize a new parser context.
2298 *
2299 * Returns the xmlParserCtxtPtr or NULL
2300 */
2301
2302xmlParserCtxtPtr
2303xmlNewParserCtxt()
2304{
2305 xmlParserCtxtPtr ctxt;
2306
2307 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2308 if (ctxt == NULL) {
2309 xmlGenericError(xmlGenericErrorContext,
2310 "xmlNewParserCtxt : cannot allocate context\n");
2311 perror("malloc");
2312 return(NULL);
2313 }
2314 memset(ctxt, 0, sizeof(xmlParserCtxt));
2315 xmlInitParserCtxt(ctxt);
2316 return(ctxt);
2317}
2318
2319/************************************************************************
2320 * *
2321 * Handling of node informations *
2322 * *
2323 ************************************************************************/
2324
2325/**
2326 * xmlClearParserCtxt:
2327 * @ctxt: an XML parser context
2328 *
2329 * Clear (release owned resources) and reinitialize a parser context
2330 */
2331
2332void
2333xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2334{
2335 xmlClearNodeInfoSeq(&ctxt->node_seq);
2336 xmlInitParserCtxt(ctxt);
2337}
2338
2339/**
2340 * xmlParserFindNodeInfo:
2341 * @ctxt: an XML parser context
2342 * @node: an XML node within the tree
2343 *
2344 * Find the parser node info struct for a given node
2345 *
2346 * Returns an xmlParserNodeInfo block pointer or NULL
2347 */
2348const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2349 const xmlNode* node)
2350{
2351 unsigned long pos;
2352
2353 /* Find position where node should be at */
2354 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2355 if ( ctx->node_seq.buffer[pos].node == node )
2356 return &ctx->node_seq.buffer[pos];
2357 else
2358 return NULL;
2359}
2360
2361
2362/**
2363 * xmlInitNodeInfoSeq:
2364 * @seq: a node info sequence pointer
2365 *
2366 * -- Initialize (set to initial state) node info sequence
2367 */
2368void
2369xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2370{
2371 seq->length = 0;
2372 seq->maximum = 0;
2373 seq->buffer = NULL;
2374}
2375
2376/**
2377 * xmlClearNodeInfoSeq:
2378 * @seq: a node info sequence pointer
2379 *
2380 * -- Clear (release memory and reinitialize) node
2381 * info sequence
2382 */
2383void
2384xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2385{
2386 if ( seq->buffer != NULL )
2387 xmlFree(seq->buffer);
2388 xmlInitNodeInfoSeq(seq);
2389}
2390
2391
2392/**
2393 * xmlParserFindNodeInfoIndex:
2394 * @seq: a node info sequence pointer
2395 * @node: an XML node pointer
2396 *
2397 *
2398 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2399 * the given node is or should be at in a sorted sequence
2400 *
2401 * Returns a long indicating the position of the record
2402 */
2403unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2404 const xmlNode* node)
2405{
2406 unsigned long upper, lower, middle;
2407 int found = 0;
2408
2409 /* Do a binary search for the key */
2410 lower = 1;
2411 upper = seq->length;
2412 middle = 0;
2413 while ( lower <= upper && !found) {
2414 middle = lower + (upper - lower) / 2;
2415 if ( node == seq->buffer[middle - 1].node )
2416 found = 1;
2417 else if ( node < seq->buffer[middle - 1].node )
2418 upper = middle - 1;
2419 else
2420 lower = middle + 1;
2421 }
2422
2423 /* Return position */
2424 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2425 return middle;
2426 else
2427 return middle - 1;
2428}
2429
2430
2431/**
2432 * xmlParserAddNodeInfo:
2433 * @ctxt: an XML parser context
2434 * @info: a node info sequence pointer
2435 *
2436 * Insert node info record into the sorted sequence
2437 */
2438void
2439xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2440 const xmlParserNodeInfo* info)
2441{
2442 unsigned long pos;
2443 static unsigned int block_size = 5;
2444
2445 /* Find pos and check to see if node is already in the sequence */
2446 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2447 if ( pos < ctxt->node_seq.length
2448 && ctxt->node_seq.buffer[pos].node == info->node ) {
2449 ctxt->node_seq.buffer[pos] = *info;
2450 }
2451
2452 /* Otherwise, we need to add new node to buffer */
2453 else {
2454 /* Expand buffer by 5 if needed */
2455 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2456 xmlParserNodeInfo* tmp_buffer;
2457 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2458 *(ctxt->node_seq.maximum + block_size));
2459
2460 if ( ctxt->node_seq.buffer == NULL )
2461 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2462 else
2463 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2464
2465 if ( tmp_buffer == NULL ) {
2466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2467 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2468 ctxt->errNo = XML_ERR_NO_MEMORY;
2469 return;
2470 }
2471 ctxt->node_seq.buffer = tmp_buffer;
2472 ctxt->node_seq.maximum += block_size;
2473 }
2474
2475 /* If position is not at end, move elements out of the way */
2476 if ( pos != ctxt->node_seq.length ) {
2477 unsigned long i;
2478
2479 for ( i = ctxt->node_seq.length; i > pos; i-- )
2480 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2481 }
2482
2483 /* Copy element and increase length */
2484 ctxt->node_seq.buffer[pos] = *info;
2485 ctxt->node_seq.length++;
2486 }
2487}
2488
2489/************************************************************************
2490 * *
2491 * Deprecated functions kept for compatibility *
2492 * *
2493 ************************************************************************/
2494
2495/*
2496 * xmlCheckLanguageID
2497 * @lang: pointer to the string value
2498 *
2499 * Checks that the value conforms to the LanguageID production:
2500 *
2501 * NOTE: this is somewhat deprecated, those productions were removed from
2502 * the XML Second edition.
2503 *
2504 * [33] LanguageID ::= Langcode ('-' Subcode)*
2505 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2506 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2507 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2508 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2509 * [38] Subcode ::= ([a-z] | [A-Z])+
2510 *
2511 * Returns 1 if correct 0 otherwise
2512 **/
2513int
2514xmlCheckLanguageID(const xmlChar *lang) {
2515 const xmlChar *cur = lang;
2516
2517 if (cur == NULL)
2518 return(0);
2519 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2520 ((cur[0] == 'I') && (cur[1] == '-'))) {
2521 /*
2522 * IANA code
2523 */
2524 cur += 2;
2525 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2526 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2527 cur++;
2528 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2529 ((cur[0] == 'X') && (cur[1] == '-'))) {
2530 /*
2531 * User code
2532 */
2533 cur += 2;
2534 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2535 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2536 cur++;
2537 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2538 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2539 /*
2540 * ISO639
2541 */
2542 cur++;
2543 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2544 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2545 cur++;
2546 else
2547 return(0);
2548 } else
2549 return(0);
2550 while (cur[0] != 0) { /* non input consuming */
2551 if (cur[0] != '-')
2552 return(0);
2553 cur++;
2554 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2555 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2556 cur++;
2557 else
2558 return(0);
2559 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2560 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2561 cur++;
2562 }
2563 return(1);
2564}
2565
2566/**
2567 * xmlDecodeEntities:
2568 * @ctxt: the parser context
2569 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2570 * @len: the len to decode (in bytes !), -1 for no size limit
2571 * @end: an end marker xmlChar, 0 if none
2572 * @end2: an end marker xmlChar, 0 if none
2573 * @end3: an end marker xmlChar, 0 if none
2574 *
2575 * This function is deprecated, we now always process entities content
2576 * through xmlStringDecodeEntities
2577 *
2578 * TODO: remove it in next major release.
2579 *
2580 * [67] Reference ::= EntityRef | CharRef
2581 *
2582 * [69] PEReference ::= '%' Name ';'
2583 *
2584 * Returns A newly allocated string with the substitution done. The caller
2585 * must deallocate it !
2586 */
2587xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002588xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2589 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002590#if 0
2591 xmlChar *buffer = NULL;
2592 unsigned int buffer_size = 0;
2593 unsigned int nbchars = 0;
2594
2595 xmlChar *current = NULL;
2596 xmlEntityPtr ent;
2597 unsigned int max = (unsigned int) len;
2598 int c,l;
2599#endif
2600
2601 static int deprecated = 0;
2602 if (!deprecated) {
2603 xmlGenericError(xmlGenericErrorContext,
2604 "xmlDecodeEntities() deprecated function reached\n");
2605 deprecated = 1;
2606 }
2607
2608#if 0
2609 if (ctxt->depth > 40) {
2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2611 ctxt->sax->error(ctxt->userData,
2612 "Detected entity reference loop\n");
2613 ctxt->wellFormed = 0;
2614 ctxt->disableSAX = 1;
2615 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2616 return(NULL);
2617 }
2618
2619 /*
2620 * allocate a translation buffer.
2621 */
2622 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2623 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2624 if (buffer == NULL) {
2625 perror("xmlDecodeEntities: malloc failed");
2626 return(NULL);
2627 }
2628
2629 /*
2630 * Ok loop until we reach one of the ending char or a size limit.
2631 */
2632 GROW;
2633 c = CUR_CHAR(l);
2634 while ((nbchars < max) && (c != end) && /* NOTUSED */
2635 (c != end2) && (c != end3)) {
2636 GROW;
2637 if (c == 0) break;
2638 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2639 int val = xmlParseCharRef(ctxt);
2640 COPY_BUF(0,buffer,nbchars,val);
2641 NEXTL(l);
2642 } else if ((c == '&') && (ctxt->token != '&') &&
2643 (what & XML_SUBSTITUTE_REF)) {
2644 if (xmlParserDebugEntities)
2645 xmlGenericError(xmlGenericErrorContext,
2646 "decoding Entity Reference\n");
2647 ent = xmlParseEntityRef(ctxt);
2648 if ((ent != NULL) &&
2649 (ctxt->replaceEntities != 0)) {
2650 current = ent->content;
2651 while (*current != 0) { /* non input consuming loop */
2652 buffer[nbchars++] = *current++;
2653 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2654 growBuffer(buffer);
2655 }
2656 }
2657 } else if (ent != NULL) {
2658 const xmlChar *cur = ent->name;
2659
2660 buffer[nbchars++] = '&';
2661 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2662 growBuffer(buffer);
2663 }
2664 while (*cur != 0) { /* non input consuming loop */
2665 buffer[nbchars++] = *cur++;
2666 }
2667 buffer[nbchars++] = ';';
2668 }
2669 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2670 /*
2671 * a PEReference induce to switch the entity flow,
2672 * we break here to flush the current set of chars
2673 * parsed if any. We will be called back later.
2674 */
2675 if (xmlParserDebugEntities)
2676 xmlGenericError(xmlGenericErrorContext,
2677 "decoding PE Reference\n");
2678 if (nbchars != 0) break;
2679
2680 xmlParsePEReference(ctxt);
2681
2682 /*
2683 * Pop-up of finished entities.
2684 */
2685 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2686 xmlPopInput(ctxt);
2687
2688 break;
2689 } else {
2690 COPY_BUF(l,buffer,nbchars,c);
2691 NEXTL(l);
2692 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2693 growBuffer(buffer);
2694 }
2695 }
2696 c = CUR_CHAR(l);
2697 }
2698 buffer[nbchars++] = 0;
2699 return(buffer);
2700#endif
2701 return(NULL);
2702}
2703
2704/**
2705 * xmlNamespaceParseNCName:
2706 * @ctxt: an XML parser context
2707 *
2708 * parse an XML namespace name.
2709 *
2710 * TODO: this seems not in use anymore, the namespace handling is done on
2711 * top of the SAX interfaces, i.e. not on raw input.
2712 *
2713 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2714 *
2715 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2716 * CombiningChar | Extender
2717 *
2718 * Returns the namespace name or NULL
2719 */
2720
2721xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002722xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723#if 0
2724 xmlChar buf[XML_MAX_NAMELEN + 5];
2725 int len = 0, l;
2726 int cur = CUR_CHAR(l);
2727#endif
2728
2729 static int deprecated = 0;
2730 if (!deprecated) {
2731 xmlGenericError(xmlGenericErrorContext,
2732 "xmlNamespaceParseNCName() deprecated function reached\n");
2733 deprecated = 1;
2734 }
2735
2736#if 0
2737 /* load first the value of the char !!! */
2738 GROW;
2739 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2740
2741xmlGenericError(xmlGenericErrorContext,
2742 "xmlNamespaceParseNCName: reached loop 3\n");
2743 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2744 (cur == '.') || (cur == '-') ||
2745 (cur == '_') ||
2746 (IS_COMBINING(cur)) ||
2747 (IS_EXTENDER(cur))) {
2748 COPY_BUF(l,buf,len,cur);
2749 NEXTL(l);
2750 cur = CUR_CHAR(l);
2751 if (len >= XML_MAX_NAMELEN) {
2752 xmlGenericError(xmlGenericErrorContext,
2753 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2754 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2755 (cur == '.') || (cur == '-') ||
2756 (cur == '_') ||
2757 (IS_COMBINING(cur)) ||
2758 (IS_EXTENDER(cur))) {
2759 NEXTL(l);
2760 cur = CUR_CHAR(l);
2761 }
2762 break;
2763 }
2764 }
2765 return(xmlStrndup(buf, len));
2766#endif
2767 return(NULL);
2768}
2769
2770/**
2771 * xmlNamespaceParseQName:
2772 * @ctxt: an XML parser context
2773 * @prefix: a xmlChar **
2774 *
2775 * TODO: this seems not in use anymore, the namespace handling is done on
2776 * top of the SAX interfaces, i.e. not on raw input.
2777 *
2778 * parse an XML qualified name
2779 *
2780 * [NS 5] QName ::= (Prefix ':')? LocalPart
2781 *
2782 * [NS 6] Prefix ::= NCName
2783 *
2784 * [NS 7] LocalPart ::= NCName
2785 *
2786 * Returns the local part, and prefix is updated
2787 * to get the Prefix if any.
2788 */
2789
2790xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002791xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002792
2793 static int deprecated = 0;
2794 if (!deprecated) {
2795 xmlGenericError(xmlGenericErrorContext,
2796 "xmlNamespaceParseQName() deprecated function reached\n");
2797 deprecated = 1;
2798 }
2799
2800#if 0
2801 xmlChar *ret = NULL;
2802
2803 *prefix = NULL;
2804 ret = xmlNamespaceParseNCName(ctxt);
2805 if (RAW == ':') {
2806 *prefix = ret;
2807 NEXT;
2808 ret = xmlNamespaceParseNCName(ctxt);
2809 }
2810
2811 return(ret);
2812#endif
2813 return(NULL);
2814}
2815
2816/**
2817 * xmlNamespaceParseNSDef:
2818 * @ctxt: an XML parser context
2819 *
2820 * parse a namespace prefix declaration
2821 *
2822 * TODO: this seems not in use anymore, the namespace handling is done on
2823 * top of the SAX interfaces, i.e. not on raw input.
2824 *
2825 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2826 *
2827 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2828 *
2829 * Returns the namespace name
2830 */
2831
2832xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002833xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 static int deprecated = 0;
2835 if (!deprecated) {
2836 xmlGenericError(xmlGenericErrorContext,
2837 "xmlNamespaceParseNSDef() deprecated function reached\n");
2838 deprecated = 1;
2839 }
2840 return(NULL);
2841#if 0
2842 xmlChar *name = NULL;
2843
2844 if ((RAW == 'x') && (NXT(1) == 'm') &&
2845 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2846 (NXT(4) == 's')) {
2847 SKIP(5);
2848 if (RAW == ':') {
2849 NEXT;
2850 name = xmlNamespaceParseNCName(ctxt);
2851 }
2852 }
2853 return(name);
2854#endif
2855}
2856
2857/**
2858 * xmlParseQuotedString:
2859 * @ctxt: an XML parser context
2860 *
2861 * Parse and return a string between quotes or doublequotes
2862 *
2863 * TODO: Deprecated, to be removed at next drop of binary compatibility
2864 *
2865 * Returns the string parser or NULL.
2866 */
2867xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002868xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002869 static int deprecated = 0;
2870 if (!deprecated) {
2871 xmlGenericError(xmlGenericErrorContext,
2872 "xmlParseQuotedString() deprecated function reached\n");
2873 deprecated = 1;
2874 }
2875 return(NULL);
2876
2877#if 0
2878 xmlChar *buf = NULL;
2879 int len = 0,l;
2880 int size = XML_PARSER_BUFFER_SIZE;
2881 int c;
2882
2883 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2884 if (buf == NULL) {
2885 xmlGenericError(xmlGenericErrorContext,
2886 "malloc of %d byte failed\n", size);
2887 return(NULL);
2888 }
2889xmlGenericError(xmlGenericErrorContext,
2890 "xmlParseQuotedString: reached loop 4\n");
2891 if (RAW == '"') {
2892 NEXT;
2893 c = CUR_CHAR(l);
2894 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2895 if (len + 5 >= size) {
2896 size *= 2;
2897 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2898 if (buf == NULL) {
2899 xmlGenericError(xmlGenericErrorContext,
2900 "realloc of %d byte failed\n", size);
2901 return(NULL);
2902 }
2903 }
2904 COPY_BUF(l,buf,len,c);
2905 NEXTL(l);
2906 c = CUR_CHAR(l);
2907 }
2908 if (c != '"') {
2909 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2911 ctxt->sax->error(ctxt->userData,
2912 "String not closed \"%.50s\"\n", buf);
2913 ctxt->wellFormed = 0;
2914 ctxt->disableSAX = 1;
2915 } else {
2916 NEXT;
2917 }
2918 } else if (RAW == '\''){
2919 NEXT;
2920 c = CUR;
2921 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2922 if (len + 1 >= size) {
2923 size *= 2;
2924 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2925 if (buf == NULL) {
2926 xmlGenericError(xmlGenericErrorContext,
2927 "realloc of %d byte failed\n", size);
2928 return(NULL);
2929 }
2930 }
2931 buf[len++] = c;
2932 NEXT;
2933 c = CUR;
2934 }
2935 if (RAW != '\'') {
2936 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2938 ctxt->sax->error(ctxt->userData,
2939 "String not closed \"%.50s\"\n", buf);
2940 ctxt->wellFormed = 0;
2941 ctxt->disableSAX = 1;
2942 } else {
2943 NEXT;
2944 }
2945 }
2946 return(buf);
2947#endif
2948}
2949
2950/**
2951 * xmlParseNamespace:
2952 * @ctxt: an XML parser context
2953 *
2954 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2955 *
2956 * This is what the older xml-name Working Draft specified, a bunch of
2957 * other stuff may still rely on it, so support is still here as
2958 * if it was declared on the root of the Tree:-(
2959 *
2960 * TODO: remove from library
2961 *
2962 * To be removed at next drop of binary compatibility
2963 */
2964
2965void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002966xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002967 static int deprecated = 0;
2968 if (!deprecated) {
2969 xmlGenericError(xmlGenericErrorContext,
2970 "xmlParseNamespace() deprecated function reached\n");
2971 deprecated = 1;
2972 }
2973
2974#if 0
2975 xmlChar *href = NULL;
2976 xmlChar *prefix = NULL;
2977 int garbage = 0;
2978
2979 /*
2980 * We just skipped "namespace" or "xml:namespace"
2981 */
2982 SKIP_BLANKS;
2983
2984xmlGenericError(xmlGenericErrorContext,
2985 "xmlParseNamespace: reached loop 5\n");
2986 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2987 /*
2988 * We can have "ns" or "prefix" attributes
2989 * Old encoding as 'href' or 'AS' attributes is still supported
2990 */
2991 if ((RAW == 'n') && (NXT(1) == 's')) {
2992 garbage = 0;
2993 SKIP(2);
2994 SKIP_BLANKS;
2995
2996 if (RAW != '=') continue;
2997 NEXT;
2998 SKIP_BLANKS;
2999
3000 href = xmlParseQuotedString(ctxt);
3001 SKIP_BLANKS;
3002 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3003 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3004 garbage = 0;
3005 SKIP(4);
3006 SKIP_BLANKS;
3007
3008 if (RAW != '=') continue;
3009 NEXT;
3010 SKIP_BLANKS;
3011
3012 href = xmlParseQuotedString(ctxt);
3013 SKIP_BLANKS;
3014 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3015 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3016 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3017 garbage = 0;
3018 SKIP(6);
3019 SKIP_BLANKS;
3020
3021 if (RAW != '=') continue;
3022 NEXT;
3023 SKIP_BLANKS;
3024
3025 prefix = xmlParseQuotedString(ctxt);
3026 SKIP_BLANKS;
3027 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3028 garbage = 0;
3029 SKIP(2);
3030 SKIP_BLANKS;
3031
3032 if (RAW != '=') continue;
3033 NEXT;
3034 SKIP_BLANKS;
3035
3036 prefix = xmlParseQuotedString(ctxt);
3037 SKIP_BLANKS;
3038 } else if ((RAW == '?') && (NXT(1) == '>')) {
3039 garbage = 0;
3040 NEXT;
3041 } else {
3042 /*
3043 * Found garbage when parsing the namespace
3044 */
3045 if (!garbage) {
3046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3047 ctxt->sax->error(ctxt->userData,
3048 "xmlParseNamespace found garbage\n");
3049 }
3050 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3051 ctxt->wellFormed = 0;
3052 ctxt->disableSAX = 1;
3053 NEXT;
3054 }
3055 }
3056
3057 MOVETO_ENDTAG(CUR_PTR);
3058 NEXT;
3059
3060 /*
3061 * Register the DTD.
3062 if (href != NULL)
3063 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3064 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3065 */
3066
3067 if (prefix != NULL) xmlFree(prefix);
3068 if (href != NULL) xmlFree(href);
3069#endif
3070}
3071
3072/**
3073 * xmlScanName:
3074 * @ctxt: an XML parser context
3075 *
3076 * Trickery: parse an XML name but without consuming the input flow
3077 * Needed for rollback cases. Used only when parsing entities references.
3078 *
3079 * TODO: seems deprecated now, only used in the default part of
3080 * xmlParserHandleReference
3081 *
3082 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3083 * CombiningChar | Extender
3084 *
3085 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3086 *
3087 * [6] Names ::= Name (S Name)*
3088 *
3089 * Returns the Name parsed or NULL
3090 */
3091
3092xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003093xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 static int deprecated = 0;
3095 if (!deprecated) {
3096 xmlGenericError(xmlGenericErrorContext,
3097 "xmlScanName() deprecated function reached\n");
3098 deprecated = 1;
3099 }
3100 return(NULL);
3101
3102#if 0
3103 xmlChar buf[XML_MAX_NAMELEN];
3104 int len = 0;
3105
3106 GROW;
3107 if (!IS_LETTER(RAW) && (RAW != '_') &&
3108 (RAW != ':')) {
3109 return(NULL);
3110 }
3111
3112
3113 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3114 (NXT(len) == '.') || (NXT(len) == '-') ||
3115 (NXT(len) == '_') || (NXT(len) == ':') ||
3116 (IS_COMBINING(NXT(len))) ||
3117 (IS_EXTENDER(NXT(len)))) {
3118 GROW;
3119 buf[len] = NXT(len);
3120 len++;
3121 if (len >= XML_MAX_NAMELEN) {
3122 xmlGenericError(xmlGenericErrorContext,
3123 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3124 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3125 (IS_DIGIT(NXT(len))) ||
3126 (NXT(len) == '.') || (NXT(len) == '-') ||
3127 (NXT(len) == '_') || (NXT(len) == ':') ||
3128 (IS_COMBINING(NXT(len))) ||
3129 (IS_EXTENDER(NXT(len))))
3130 len++;
3131 break;
3132 }
3133 }
3134 return(xmlStrndup(buf, len));
3135#endif
3136}
3137
3138/**
3139 * xmlParserHandleReference:
3140 * @ctxt: the parser context
3141 *
3142 * TODO: Remove, now deprecated ... the test is done directly in the
3143 * content parsing
3144 * routines.
3145 *
3146 * [67] Reference ::= EntityRef | CharRef
3147 *
3148 * [68] EntityRef ::= '&' Name ';'
3149 *
3150 * [ WFC: Entity Declared ]
3151 * the Name given in the entity reference must match that in an entity
3152 * declaration, except that well-formed documents need not declare any
3153 * of the following entities: amp, lt, gt, apos, quot.
3154 *
3155 * [ WFC: Parsed Entity ]
3156 * An entity reference must not contain the name of an unparsed entity
3157 *
3158 * [66] CharRef ::= '&#' [0-9]+ ';' |
3159 * '&#x' [0-9a-fA-F]+ ';'
3160 *
3161 * A PEReference may have been detectect in the current input stream
3162 * the handling is done accordingly to
3163 * http://www.w3.org/TR/REC-xml#entproc
3164 */
3165void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003166xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003167 static int deprecated = 0;
3168 if (!deprecated) {
3169 xmlGenericError(xmlGenericErrorContext,
3170 "xmlParserHandleReference() deprecated function reached\n");
3171 deprecated = 1;
3172 }
3173
3174#if 0
3175 xmlParserInputPtr input;
3176 xmlChar *name;
3177 xmlEntityPtr ent = NULL;
3178
3179 if (ctxt->token != 0) {
3180 return;
3181 }
3182 if (RAW != '&') return;
3183 GROW;
3184 if ((RAW == '&') && (NXT(1) == '#')) {
3185 switch(ctxt->instate) {
3186 case XML_PARSER_ENTITY_DECL:
3187 case XML_PARSER_PI:
3188 case XML_PARSER_CDATA_SECTION:
3189 case XML_PARSER_COMMENT:
3190 case XML_PARSER_SYSTEM_LITERAL:
3191 /* we just ignore it there */
3192 return;
3193 case XML_PARSER_START_TAG:
3194 return;
3195 case XML_PARSER_END_TAG:
3196 return;
3197 case XML_PARSER_EOF:
3198 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3201 ctxt->wellFormed = 0;
3202 ctxt->disableSAX = 1;
3203 return;
3204 case XML_PARSER_PROLOG:
3205 case XML_PARSER_START:
3206 case XML_PARSER_MISC:
3207 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3209 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3210 ctxt->wellFormed = 0;
3211 ctxt->disableSAX = 1;
3212 return;
3213 case XML_PARSER_EPILOG:
3214 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3216 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3217 ctxt->wellFormed = 0;
3218 ctxt->disableSAX = 1;
3219 return;
3220 case XML_PARSER_DTD:
3221 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3223 ctxt->sax->error(ctxt->userData,
3224 "CharRef are forbiden in DTDs!\n");
3225 ctxt->wellFormed = 0;
3226 ctxt->disableSAX = 1;
3227 return;
3228 case XML_PARSER_ENTITY_VALUE:
3229 /*
3230 * NOTE: in the case of entity values, we don't do the
3231 * substitution here since we need the literal
3232 * entity value to be able to save the internal
3233 * subset of the document.
3234 * This will be handled by xmlStringDecodeEntities
3235 */
3236 return;
3237 case XML_PARSER_CONTENT:
3238 return;
3239 case XML_PARSER_ATTRIBUTE_VALUE:
3240 /* ctxt->token = xmlParseCharRef(ctxt); */
3241 return;
3242 case XML_PARSER_IGNORE:
3243 return;
3244 }
3245 return;
3246 }
3247
3248 switch(ctxt->instate) {
3249 case XML_PARSER_CDATA_SECTION:
3250 return;
3251 case XML_PARSER_PI:
3252 case XML_PARSER_COMMENT:
3253 case XML_PARSER_SYSTEM_LITERAL:
3254 case XML_PARSER_CONTENT:
3255 return;
3256 case XML_PARSER_START_TAG:
3257 return;
3258 case XML_PARSER_END_TAG:
3259 return;
3260 case XML_PARSER_EOF:
3261 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3263 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3264 ctxt->wellFormed = 0;
3265 ctxt->disableSAX = 1;
3266 return;
3267 case XML_PARSER_PROLOG:
3268 case XML_PARSER_START:
3269 case XML_PARSER_MISC:
3270 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3272 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3273 ctxt->wellFormed = 0;
3274 ctxt->disableSAX = 1;
3275 return;
3276 case XML_PARSER_EPILOG:
3277 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3279 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3280 ctxt->wellFormed = 0;
3281 ctxt->disableSAX = 1;
3282 return;
3283 case XML_PARSER_ENTITY_VALUE:
3284 /*
3285 * NOTE: in the case of entity values, we don't do the
3286 * substitution here since we need the literal
3287 * entity value to be able to save the internal
3288 * subset of the document.
3289 * This will be handled by xmlStringDecodeEntities
3290 */
3291 return;
3292 case XML_PARSER_ATTRIBUTE_VALUE:
3293 /*
3294 * NOTE: in the case of attributes values, we don't do the
3295 * substitution here unless we are in a mode where
3296 * the parser is explicitely asked to substitute
3297 * entities. The SAX callback is called with values
3298 * without entity substitution.
3299 * This will then be handled by xmlStringDecodeEntities
3300 */
3301 return;
3302 case XML_PARSER_ENTITY_DECL:
3303 /*
3304 * we just ignore it there
3305 * the substitution will be done once the entity is referenced
3306 */
3307 return;
3308 case XML_PARSER_DTD:
3309 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "Entity references are forbiden in DTDs!\n");
3313 ctxt->wellFormed = 0;
3314 ctxt->disableSAX = 1;
3315 return;
3316 case XML_PARSER_IGNORE:
3317 return;
3318 }
3319
3320/* TODO: this seems not reached anymore .... Verify ... */
3321xmlGenericError(xmlGenericErrorContext,
3322 "Reached deprecated section in xmlParserHandleReference()\n");
3323xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003324 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003325xmlGenericError(xmlGenericErrorContext,
3326 "indicating the version: %s, thanks !\n", xmlParserVersion);
3327 NEXT;
3328 name = xmlScanName(ctxt);
3329 if (name == NULL) {
3330 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3332 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3333 ctxt->wellFormed = 0;
3334 ctxt->disableSAX = 1;
3335 ctxt->token = '&';
3336 return;
3337 }
3338 if (NXT(xmlStrlen(name)) != ';') {
3339 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3341 ctxt->sax->error(ctxt->userData,
3342 "Entity reference: ';' expected\n");
3343 ctxt->wellFormed = 0;
3344 ctxt->disableSAX = 1;
3345 ctxt->token = '&';
3346 xmlFree(name);
3347 return;
3348 }
3349 SKIP(xmlStrlen(name) + 1);
3350 if (ctxt->sax != NULL) {
3351 if (ctxt->sax->getEntity != NULL)
3352 ent = ctxt->sax->getEntity(ctxt->userData, name);
3353 }
3354
3355 /*
3356 * [ WFC: Entity Declared ]
3357 * the Name given in the entity reference must match that in an entity
3358 * declaration, except that well-formed documents need not declare any
3359 * of the following entities: amp, lt, gt, apos, quot.
3360 */
3361 if (ent == NULL)
3362 ent = xmlGetPredefinedEntity(name);
3363 if (ent == NULL) {
3364 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3366 ctxt->sax->error(ctxt->userData,
3367 "Entity reference: entity %s not declared\n",
3368 name);
3369 ctxt->wellFormed = 0;
3370 ctxt->disableSAX = 1;
3371 xmlFree(name);
3372 return;
3373 }
3374
3375 /*
3376 * [ WFC: Parsed Entity ]
3377 * An entity reference must not contain the name of an unparsed entity
3378 */
3379 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3380 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3382 ctxt->sax->error(ctxt->userData,
3383 "Entity reference to unparsed entity %s\n", name);
3384 ctxt->wellFormed = 0;
3385 ctxt->disableSAX = 1;
3386 }
3387
3388 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3389 ctxt->token = ent->content[0];
3390 xmlFree(name);
3391 return;
3392 }
3393 input = xmlNewEntityInputStream(ctxt, ent);
3394 xmlPushInput(ctxt, input);
3395 xmlFree(name);
3396#endif
3397 return;
3398}
3399
3400/**
3401 * xmlHandleEntity:
3402 * @ctxt: an XML parser context
3403 * @entity: an XML entity pointer.
3404 *
3405 * Default handling of defined entities, when should we define a new input
3406 * stream ? When do we just handle that as a set of chars ?
3407 *
3408 * OBSOLETE: to be removed at some point.
3409 */
3410
3411void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003412xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003413 static int deprecated = 0;
3414 if (!deprecated) {
3415 xmlGenericError(xmlGenericErrorContext,
3416 "xmlHandleEntity() deprecated function reached\n");
3417 deprecated = 1;
3418 }
3419
3420#if 0
3421 int len;
3422 xmlParserInputPtr input;
3423
3424 if (entity->content == NULL) {
3425 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3427 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3428 entity->name);
3429 ctxt->wellFormed = 0;
3430 ctxt->disableSAX = 1;
3431 return;
3432 }
3433 len = xmlStrlen(entity->content);
3434 if (len <= 2) goto handle_as_char;
3435
3436 /*
3437 * Redefine its content as an input stream.
3438 */
3439 input = xmlNewEntityInputStream(ctxt, entity);
3440 xmlPushInput(ctxt, input);
3441 return;
3442
3443handle_as_char:
3444 /*
3445 * Just handle the content as a set of chars.
3446 */
3447 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3448 (ctxt->sax->characters != NULL))
3449 ctxt->sax->characters(ctxt->userData, entity->content, len);
3450#endif
3451}
3452
3453/**
3454 * xmlNewGlobalNs:
3455 * @doc: the document carrying the namespace
3456 * @href: the URI associated
3457 * @prefix: the prefix for the namespace
3458 *
3459 * Creation of a Namespace, the old way using PI and without scoping
3460 * DEPRECATED !!!
3461 * It now create a namespace on the root element of the document if found.
3462 * Returns NULL this functionnality had been removed
3463 */
3464xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003465xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3466 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003467 static int deprecated = 0;
3468 if (!deprecated) {
3469 xmlGenericError(xmlGenericErrorContext,
3470 "xmlNewGlobalNs() deprecated function reached\n");
3471 deprecated = 1;
3472 }
3473 return(NULL);
3474#if 0
3475 xmlNodePtr root;
3476
3477 xmlNsPtr cur;
3478
3479 root = xmlDocGetRootElement(doc);
3480 if (root != NULL)
3481 return(xmlNewNs(root, href, prefix));
3482
3483 /*
3484 * if there is no root element yet, create an old Namespace type
3485 * and it will be moved to the root at save time.
3486 */
3487 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3488 if (cur == NULL) {
3489 xmlGenericError(xmlGenericErrorContext,
3490 "xmlNewGlobalNs : malloc failed\n");
3491 return(NULL);
3492 }
3493 memset(cur, 0, sizeof(xmlNs));
3494 cur->type = XML_GLOBAL_NAMESPACE;
3495
3496 if (href != NULL)
3497 cur->href = xmlStrdup(href);
3498 if (prefix != NULL)
3499 cur->prefix = xmlStrdup(prefix);
3500
3501 /*
3502 * Add it at the end to preserve parsing order ...
3503 */
3504 if (doc != NULL) {
3505 if (doc->oldNs == NULL) {
3506 doc->oldNs = cur;
3507 } else {
3508 xmlNsPtr prev = doc->oldNs;
3509
3510 while (prev->next != NULL) prev = prev->next;
3511 prev->next = cur;
3512 }
3513 }
3514
3515 return(NULL);
3516#endif
3517}
3518
3519/**
3520 * xmlUpgradeOldNs:
3521 * @doc: a document pointer
3522 *
3523 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3524 * DEPRECATED
3525 */
3526void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003527xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003528 static int deprecated = 0;
3529 if (!deprecated) {
3530 xmlGenericError(xmlGenericErrorContext,
3531 "xmlNewGlobalNs() deprecated function reached\n");
3532 deprecated = 1;
3533 }
3534#if 0
3535 xmlNsPtr cur;
3536
3537 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3538 if (doc->children == NULL) {
3539#ifdef DEBUG_TREE
3540 xmlGenericError(xmlGenericErrorContext,
3541 "xmlUpgradeOldNs: failed no root !\n");
3542#endif
3543 return;
3544 }
3545
3546 cur = doc->oldNs;
3547 while (cur->next != NULL) {
3548 cur->type = XML_LOCAL_NAMESPACE;
3549 cur = cur->next;
3550 }
3551 cur->type = XML_LOCAL_NAMESPACE;
3552 cur->next = doc->children->nsDef;
3553 doc->children->nsDef = doc->oldNs;
3554 doc->oldNs = NULL;
3555#endif
3556}
3557