blob: 4039c99af5a97e6b594824cb7a2330627e433cdb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
49
Daniel Veillard56a4cb82001-03-24 17:00:36 +000050void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000051
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000070 xmlInitMemory();
71
Owen Taylor3473f882001-02-23 17:55:21 +000072 if ((myversion / 10000) != (version / 10000)) {
73 xmlGenericError(xmlGenericErrorContext,
74 "Fatal: program compiled against libxml %d using libxml %d\n",
75 (version / 10000), (myversion / 10000));
76 exit(1);
77 }
78 if ((myversion / 100) < (version / 100)) {
79 xmlGenericError(xmlGenericErrorContext,
80 "Warning: program compiled against libxml %d using older %d\n",
81 (version / 100), (myversion / 100));
82 }
83}
84
85
86const char *xmlFeaturesList[] = {
87 "validate",
88 "load subset",
89 "keep blanks",
90 "disable SAX",
91 "fetch external entities",
92 "substitute entities",
93 "gather line info",
94 "user data",
95 "is html",
96 "is standalone",
97 "stop parser",
98 "document",
99 "is well formed",
100 "is valid",
101 "SAX block",
102 "SAX function internalSubset",
103 "SAX function isStandalone",
104 "SAX function hasInternalSubset",
105 "SAX function hasExternalSubset",
106 "SAX function resolveEntity",
107 "SAX function getEntity",
108 "SAX function entityDecl",
109 "SAX function notationDecl",
110 "SAX function attributeDecl",
111 "SAX function elementDecl",
112 "SAX function unparsedEntityDecl",
113 "SAX function setDocumentLocator",
114 "SAX function startDocument",
115 "SAX function endDocument",
116 "SAX function startElement",
117 "SAX function endElement",
118 "SAX function reference",
119 "SAX function characters",
120 "SAX function ignorableWhitespace",
121 "SAX function processingInstruction",
122 "SAX function comment",
123 "SAX function warning",
124 "SAX function error",
125 "SAX function fatalError",
126 "SAX function getParameterEntity",
127 "SAX function cdataBlock",
128 "SAX function externalSubset",
129};
130
131/*
132 * xmlGetFeaturesList:
133 * @len: the length of the features name array (input/output)
134 * @result: an array of string to be filled with the features name.
135 *
136 * Copy at most *@len feature names into the @result array
137 *
138 * Returns -1 in case or error, or the total number of features,
139 * len is updated with the number of strings copied,
140 * strings must not be deallocated
141 */
142int
143xmlGetFeaturesList(int *len, const char **result) {
144 int ret, i;
145
146 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
147 if ((len == NULL) || (result == NULL))
148 return(ret);
149 if ((*len < 0) || (*len >= 1000))
150 return(-1);
151 if (*len > ret)
152 *len = ret;
153 for (i = 0;i < *len;i++)
154 result[i] = xmlFeaturesList[i];
155 return(ret);
156}
157
158/*
159 * xmlGetFeature:
160 * @ctxt: an XML/HTML parser context
161 * @name: the feature name
162 * @result: location to store the result
163 *
164 * Read the current value of one feature of this parser instance
165 *
166 * Returns -1 in case or error, 0 otherwise
167 */
168int
169xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
170 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
171 return(-1);
172
173 if (!strcmp(name, "validate")) {
174 *((int *) result) = ctxt->validate;
175 } else if (!strcmp(name, "keep blanks")) {
176 *((int *) result) = ctxt->keepBlanks;
177 } else if (!strcmp(name, "disable SAX")) {
178 *((int *) result) = ctxt->disableSAX;
179 } else if (!strcmp(name, "fetch external entities")) {
180 *((int *) result) = ctxt->loadsubset;
181 } else if (!strcmp(name, "substitute entities")) {
182 *((int *) result) = ctxt->replaceEntities;
183 } else if (!strcmp(name, "gather line info")) {
184 *((int *) result) = ctxt->record_info;
185 } else if (!strcmp(name, "user data")) {
186 *((void **)result) = ctxt->userData;
187 } else if (!strcmp(name, "is html")) {
188 *((int *) result) = ctxt->html;
189 } else if (!strcmp(name, "is standalone")) {
190 *((int *) result) = ctxt->standalone;
191 } else if (!strcmp(name, "document")) {
192 *((xmlDocPtr *) result) = ctxt->myDoc;
193 } else if (!strcmp(name, "is well formed")) {
194 *((int *) result) = ctxt->wellFormed;
195 } else if (!strcmp(name, "is valid")) {
196 *((int *) result) = ctxt->valid;
197 } else if (!strcmp(name, "SAX block")) {
198 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
199 } else if (!strcmp(name, "SAX function internalSubset")) {
200 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
201 } else if (!strcmp(name, "SAX function isStandalone")) {
202 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
203 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
204 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
205 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
206 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
207 } else if (!strcmp(name, "SAX function resolveEntity")) {
208 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
209 } else if (!strcmp(name, "SAX function getEntity")) {
210 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
211 } else if (!strcmp(name, "SAX function entityDecl")) {
212 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
213 } else if (!strcmp(name, "SAX function notationDecl")) {
214 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
215 } else if (!strcmp(name, "SAX function attributeDecl")) {
216 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
217 } else if (!strcmp(name, "SAX function elementDecl")) {
218 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
219 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
220 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
221 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
222 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
223 } else if (!strcmp(name, "SAX function startDocument")) {
224 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
225 } else if (!strcmp(name, "SAX function endDocument")) {
226 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
227 } else if (!strcmp(name, "SAX function startElement")) {
228 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
229 } else if (!strcmp(name, "SAX function endElement")) {
230 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
231 } else if (!strcmp(name, "SAX function reference")) {
232 *((referenceSAXFunc *) result) = ctxt->sax->reference;
233 } else if (!strcmp(name, "SAX function characters")) {
234 *((charactersSAXFunc *) result) = ctxt->sax->characters;
235 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
236 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
237 } else if (!strcmp(name, "SAX function processingInstruction")) {
238 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
239 } else if (!strcmp(name, "SAX function comment")) {
240 *((commentSAXFunc *) result) = ctxt->sax->comment;
241 } else if (!strcmp(name, "SAX function warning")) {
242 *((warningSAXFunc *) result) = ctxt->sax->warning;
243 } else if (!strcmp(name, "SAX function error")) {
244 *((errorSAXFunc *) result) = ctxt->sax->error;
245 } else if (!strcmp(name, "SAX function fatalError")) {
246 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
247 } else if (!strcmp(name, "SAX function getParameterEntity")) {
248 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
249 } else if (!strcmp(name, "SAX function cdataBlock")) {
250 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
251 } else if (!strcmp(name, "SAX function externalSubset")) {
252 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
253 } else {
254 return(-1);
255 }
256 return(0);
257}
258
259/*
260 * xmlSetFeature:
261 * @ctxt: an XML/HTML parser context
262 * @name: the feature name
263 * @value: pointer to the location of the new value
264 *
265 * Change the current value of one feature of this parser instance
266 *
267 * Returns -1 in case or error, 0 otherwise
268 */
269int
270xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
271 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
272 return(-1);
273
274 if (!strcmp(name, "validate")) {
275 int newvalidate = *((int *) value);
276 if ((!ctxt->validate) && (newvalidate != 0)) {
277 if (ctxt->vctxt.warning == NULL)
278 ctxt->vctxt.warning = xmlParserValidityWarning;
279 if (ctxt->vctxt.error == NULL)
280 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000281 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000282 }
283 ctxt->validate = newvalidate;
284 } else if (!strcmp(name, "keep blanks")) {
285 ctxt->keepBlanks = *((int *) value);
286 } else if (!strcmp(name, "disable SAX")) {
287 ctxt->disableSAX = *((int *) value);
288 } else if (!strcmp(name, "fetch external entities")) {
289 ctxt->loadsubset = *((int *) value);
290 } else if (!strcmp(name, "substitute entities")) {
291 ctxt->replaceEntities = *((int *) value);
292 } else if (!strcmp(name, "gather line info")) {
293 ctxt->record_info = *((int *) value);
294 } else if (!strcmp(name, "user data")) {
295 ctxt->userData = *((void **)value);
296 } else if (!strcmp(name, "is html")) {
297 ctxt->html = *((int *) value);
298 } else if (!strcmp(name, "is standalone")) {
299 ctxt->standalone = *((int *) value);
300 } else if (!strcmp(name, "document")) {
301 ctxt->myDoc = *((xmlDocPtr *) value);
302 } else if (!strcmp(name, "is well formed")) {
303 ctxt->wellFormed = *((int *) value);
304 } else if (!strcmp(name, "is valid")) {
305 ctxt->valid = *((int *) value);
306 } else if (!strcmp(name, "SAX block")) {
307 ctxt->sax = *((xmlSAXHandlerPtr *) value);
308 } else if (!strcmp(name, "SAX function internalSubset")) {
309 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
310 } else if (!strcmp(name, "SAX function isStandalone")) {
311 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
312 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
313 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
314 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
315 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function resolveEntity")) {
317 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
318 } else if (!strcmp(name, "SAX function getEntity")) {
319 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
320 } else if (!strcmp(name, "SAX function entityDecl")) {
321 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function notationDecl")) {
323 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function attributeDecl")) {
325 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function elementDecl")) {
327 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
329 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
331 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function startDocument")) {
333 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function endDocument")) {
335 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function startElement")) {
337 ctxt->sax->startElement = *((startElementSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function endElement")) {
339 ctxt->sax->endElement = *((endElementSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function reference")) {
341 ctxt->sax->reference = *((referenceSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function characters")) {
343 ctxt->sax->characters = *((charactersSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
345 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function processingInstruction")) {
347 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function comment")) {
349 ctxt->sax->comment = *((commentSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function warning")) {
351 ctxt->sax->warning = *((warningSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function error")) {
353 ctxt->sax->error = *((errorSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function fatalError")) {
355 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function getParameterEntity")) {
357 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
358 } else if (!strcmp(name, "SAX function cdataBlock")) {
359 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function externalSubset")) {
361 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
362 } else {
363 return(-1);
364 }
365 return(0);
366}
367
368/************************************************************************
369 * *
370 * Some functions to avoid too large macros *
371 * *
372 ************************************************************************/
373
374/**
375 * xmlIsChar:
376 * @c: an unicode character (int)
377 *
378 * Check whether the character is allowed by the production
379 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
380 * | [#x10000-#x10FFFF]
381 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
382 * Also available as a macro IS_CHAR()
383 *
384 * Returns 0 if not, non-zero otherwise
385 */
386int
387xmlIsChar(int c) {
388 return(
389 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
390 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
391 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
392 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
393}
394
395/**
396 * xmlIsBlank:
397 * @c: an unicode character (int)
398 *
399 * Check whether the character is allowed by the production
400 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
401 * Also available as a macro IS_BLANK()
402 *
403 * Returns 0 if not, non-zero otherwise
404 */
405int
406xmlIsBlank(int c) {
407 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
408}
409
410/**
411 * xmlIsBaseChar:
412 * @c: an unicode character (int)
413 *
414 * Check whether the character is allowed by the production
415 * [85] BaseChar ::= ... long list see REC ...
416 *
417 * VI is your friend !
418 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
419 * and
420 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
421 *
422 * Returns 0 if not, non-zero otherwise
423 */
424static int xmlBaseArray[] = {
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
429 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
431 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
438 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
440 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
441};
442
443int
444xmlIsBaseChar(int c) {
445 return(
446 (((c) < 0x0100) ? xmlBaseArray[c] :
447 ( /* accelerator */
448 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
449 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
450 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
451 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
452 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
453 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
454 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
455 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
456 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
457 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
458 ((c) == 0x0386) ||
459 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
460 ((c) == 0x038C) ||
461 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
462 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
463 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
464 ((c) == 0x03DA) ||
465 ((c) == 0x03DC) ||
466 ((c) == 0x03DE) ||
467 ((c) == 0x03E0) ||
468 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
469 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
470 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
471 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
472 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
473 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
474 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
475 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
476 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
477 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
478 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
479 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
480 ((c) == 0x0559) ||
481 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
482 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
483 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
484 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
485 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
486 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
487 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
488 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
489 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
490 ((c) == 0x06D5) ||
491 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
492 (((c) >= 0x905) && ( /* accelerator */
493 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
494 ((c) == 0x093D) ||
495 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
496 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
497 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
498 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
499 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
500 ((c) == 0x09B2) ||
501 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
502 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
503 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
504 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
505 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
506 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
507 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
508 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
509 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
510 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
511 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
512 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
513 ((c) == 0x0A5E) ||
514 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
515 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
516 ((c) == 0x0A8D) ||
517 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
518 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
519 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
520 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
521 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
522 ((c) == 0x0ABD) ||
523 ((c) == 0x0AE0) ||
524 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
525 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
526 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
527 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
528 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
529 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
530 ((c) == 0x0B3D) ||
531 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
532 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
533 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
534 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
535 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
536 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
537 ((c) == 0x0B9C) ||
538 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
539 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
540 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
541 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
542 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
543 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
544 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
545 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
546 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
547 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
548 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
549 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
550 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
551 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
552 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
553 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
554 ((c) == 0x0CDE) ||
555 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
556 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
557 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
558 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
559 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
560 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
561 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
562 ((c) == 0x0E30) ||
563 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
564 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
565 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
566 ((c) == 0x0E84) ||
567 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
568 ((c) == 0x0E8A) ||
569 ((c) == 0x0E8D) ||
570 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
571 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
572 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
573 ((c) == 0x0EA5) ||
574 ((c) == 0x0EA7) ||
575 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
576 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
577 ((c) == 0x0EB0) ||
578 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
579 ((c) == 0x0EBD) ||
580 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
581 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
582 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
583 (((c) >= 0x10A0) && ( /* accelerator */
584 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
585 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
586 ((c) == 0x1100) ||
587 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
588 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
589 ((c) == 0x1109) ||
590 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
591 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
592 ((c) == 0x113C) ||
593 ((c) == 0x113E) ||
594 ((c) == 0x1140) ||
595 ((c) == 0x114C) ||
596 ((c) == 0x114E) ||
597 ((c) == 0x1150) ||
598 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
599 ((c) == 0x1159) ||
600 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
601 ((c) == 0x1163) ||
602 ((c) == 0x1165) ||
603 ((c) == 0x1167) ||
604 ((c) == 0x1169) ||
605 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
606 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
607 ((c) == 0x1175) ||
608 ((c) == 0x119E) ||
609 ((c) == 0x11A8) ||
610 ((c) == 0x11AB) ||
611 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
612 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
613 ((c) == 0x11BA) ||
614 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
615 ((c) == 0x11EB) ||
616 ((c) == 0x11F0) ||
617 ((c) == 0x11F9) ||
618 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
619 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
620 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
621 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
622 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
623 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
624 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
625 ((c) == 0x1F59) ||
626 ((c) == 0x1F5B) ||
627 ((c) == 0x1F5D) ||
628 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
629 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
630 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
631 ((c) == 0x1FBE) ||
632 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
633 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
634 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
635 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
636 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
637 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
638 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
639 ((c) == 0x2126) ||
640 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
641 ((c) == 0x212E) ||
642 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
643 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
644 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
645 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
646 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
647}
648
649/**
650 * xmlIsDigit:
651 * @c: an unicode character (int)
652 *
653 * Check whether the character is allowed by the production
654 * [88] Digit ::= ... long list see REC ...
655 *
656 * Returns 0 if not, non-zero otherwise
657 */
658int
659xmlIsDigit(int c) {
660 return(
661 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
662 (((c) >= 0x660) && ( /* accelerator */
663 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
664 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
665 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
666 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
667 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
668 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
669 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
670 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
671 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
672 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
673 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
674 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
675 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
676 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
677}
678
679/**
680 * xmlIsCombining:
681 * @c: an unicode character (int)
682 *
683 * Check whether the character is allowed by the production
684 * [87] CombiningChar ::= ... long list see REC ...
685 *
686 * Returns 0 if not, non-zero otherwise
687 */
688int
689xmlIsCombining(int c) {
690 return(
691 (((c) >= 0x300) && ( /* accelerator */
692 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
693 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
694 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
695 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
696 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
697 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
698 ((c) == 0x05BF) ||
699 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
700 ((c) == 0x05C4) ||
701 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
702 ((c) == 0x0670) ||
703 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
704 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
705 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
706 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
707 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
708 (((c) >= 0x0901) && ( /* accelerator */
709 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
710 ((c) == 0x093C) ||
711 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
712 ((c) == 0x094D) ||
713 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
714 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
715 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
716 ((c) == 0x09BC) ||
717 ((c) == 0x09BE) ||
718 ((c) == 0x09BF) ||
719 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
720 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
721 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
722 ((c) == 0x09D7) ||
723 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
724 (((c) >= 0x0A02) && ( /* accelerator */
725 ((c) == 0x0A02) ||
726 ((c) == 0x0A3C) ||
727 ((c) == 0x0A3E) ||
728 ((c) == 0x0A3F) ||
729 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
730 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
731 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
732 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
733 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
734 ((c) == 0x0ABC) ||
735 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
736 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
737 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
738 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
739 ((c) == 0x0B3C) ||
740 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
741 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
742 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
743 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
744 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
745 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
746 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
747 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
748 ((c) == 0x0BD7) ||
749 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
750 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
751 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
752 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
753 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
754 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
755 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
756 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
757 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
758 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
759 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
760 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
761 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
762 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
763 ((c) == 0x0D57) ||
764 (((c) >= 0x0E31) && ( /* accelerator */
765 ((c) == 0x0E31) ||
766 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
767 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
768 ((c) == 0x0EB1) ||
769 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
770 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
771 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
772 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
773 ((c) == 0x0F35) ||
774 ((c) == 0x0F37) ||
775 ((c) == 0x0F39) ||
776 ((c) == 0x0F3E) ||
777 ((c) == 0x0F3F) ||
778 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
779 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
780 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
781 ((c) == 0x0F97) ||
782 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
783 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
784 ((c) == 0x0FB9) ||
785 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
786 ((c) == 0x20E1) ||
787 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
788 ((c) == 0x3099) ||
789 ((c) == 0x309A))))))))));
790}
791
792/**
793 * xmlIsExtender:
794 * @c: an unicode character (int)
795 *
796 * Check whether the character is allowed by the production
797 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
798 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
799 * [#x309D-#x309E] | [#x30FC-#x30FE]
800 *
801 * Returns 0 if not, non-zero otherwise
802 */
803int
804xmlIsExtender(int c) {
805 switch (c) {
806 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
807 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
808 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
809 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
810 case 0x30FE:
811 return 1;
812 default:
813 return 0;
814 }
815}
816
817/**
818 * xmlIsIdeographic:
819 * @c: an unicode character (int)
820 *
821 * Check whether the character is allowed by the production
822 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
823 *
824 * Returns 0 if not, non-zero otherwise
825 */
826int
827xmlIsIdeographic(int c) {
828 return(((c) < 0x0100) ? 0 :
829 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
830 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
831 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
832 ((c) == 0x3007));
833}
834
835/**
836 * xmlIsLetter:
837 * @c: an unicode character (int)
838 *
839 * Check whether the character is allowed by the production
840 * [84] Letter ::= BaseChar | Ideographic
841 *
842 * Returns 0 if not, non-zero otherwise
843 */
844int
845xmlIsLetter(int c) {
846 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
847}
848
849/**
850 * xmlIsPubidChar:
851 * @c: an unicode character (int)
852 *
853 * Check whether the character is allowed by the production
854 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
855 *
856 * Returns 0 if not, non-zero otherwise
857 */
858int
859xmlIsPubidChar(int c) {
860 return(
861 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
862 (((c) >= 'a') && ((c) <= 'z')) ||
863 (((c) >= 'A') && ((c) <= 'Z')) ||
864 (((c) >= '0') && ((c) <= '9')) ||
865 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
866 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
867 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
868 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
869 ((c) == '$') || ((c) == '_') || ((c) == '%'));
870}
871
872/************************************************************************
873 * *
874 * Input handling functions for progressive parsing *
875 * *
876 ************************************************************************/
877
878/* #define DEBUG_INPUT */
879/* #define DEBUG_STACK */
880/* #define DEBUG_PUSH */
881
882
883/* we need to keep enough input to show errors in context */
884#define LINE_LEN 80
885
886#ifdef DEBUG_INPUT
887#define CHECK_BUFFER(in) check_buffer(in)
888
889void check_buffer(xmlParserInputPtr in) {
890 if (in->base != in->buf->buffer->content) {
891 xmlGenericError(xmlGenericErrorContext,
892 "xmlParserInput: base mismatch problem\n");
893 }
894 if (in->cur < in->base) {
895 xmlGenericError(xmlGenericErrorContext,
896 "xmlParserInput: cur < base problem\n");
897 }
898 if (in->cur > in->base + in->buf->buffer->use) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: cur > base + use problem\n");
901 }
902 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
903 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
904 in->buf->buffer->use, in->buf->buffer->size);
905}
906
907#else
908#define CHECK_BUFFER(in)
909#endif
910
911
912/**
913 * xmlParserInputRead:
914 * @in: an XML parser input
915 * @len: an indicative size for the lookahead
916 *
917 * This function refresh the input for the parser. It doesn't try to
918 * preserve pointers to the input buffer, and discard already read data
919 *
920 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
921 * end of this entity
922 */
923int
924xmlParserInputRead(xmlParserInputPtr in, int len) {
925 int ret;
926 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000927 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000928
929#ifdef DEBUG_INPUT
930 xmlGenericError(xmlGenericErrorContext, "Read\n");
931#endif
932 if (in->buf == NULL) return(-1);
933 if (in->base == NULL) return(-1);
934 if (in->cur == NULL) return(-1);
935 if (in->buf->buffer == NULL) return(-1);
936 if (in->buf->readcallback == NULL) return(-1);
937
938 CHECK_BUFFER(in);
939
940 used = in->cur - in->buf->buffer->content;
941 ret = xmlBufferShrink(in->buf->buffer, used);
942 if (ret > 0) {
943 in->cur -= ret;
944 in->consumed += ret;
945 }
946 ret = xmlParserInputBufferRead(in->buf, len);
947 if (in->base != in->buf->buffer->content) {
948 /*
949 * the buffer has been realloced
950 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000951 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000952 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000953 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000954 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000955 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000956
957 CHECK_BUFFER(in);
958
959 return(ret);
960}
961
962/**
963 * xmlParserInputGrow:
964 * @in: an XML parser input
965 * @len: an indicative size for the lookahead
966 *
967 * This function increase the input for the parser. It tries to
968 * preserve pointers to the input buffer, and keep already read data
969 *
970 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
971 * end of this entity
972 */
973int
974xmlParserInputGrow(xmlParserInputPtr in, int len) {
975 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000976 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000977
978#ifdef DEBUG_INPUT
979 xmlGenericError(xmlGenericErrorContext, "Grow\n");
980#endif
981 if (in->buf == NULL) return(-1);
982 if (in->base == NULL) return(-1);
983 if (in->cur == NULL) return(-1);
984 if (in->buf->buffer == NULL) return(-1);
985
986 CHECK_BUFFER(in);
987
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000988 indx = in->cur - in->base;
989 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000990
991 CHECK_BUFFER(in);
992
993 return(0);
994 }
995 if (in->buf->readcallback != NULL)
996 ret = xmlParserInputBufferGrow(in->buf, len);
997 else
998 return(0);
999
1000 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001001 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001002 * block, but we use it really as an integer to do some
1003 * pointer arithmetic. Insure will raise it as a bug but in
1004 * that specific case, that's not !
1005 */
1006 if (in->base != in->buf->buffer->content) {
1007 /*
1008 * the buffer has been realloced
1009 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001010 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001011 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001012 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001014 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001015
1016 CHECK_BUFFER(in);
1017
1018 return(ret);
1019}
1020
1021/**
1022 * xmlParserInputShrink:
1023 * @in: an XML parser input
1024 *
1025 * This function removes used input for the parser.
1026 */
1027void
1028xmlParserInputShrink(xmlParserInputPtr in) {
1029 int used;
1030 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001031 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001032
1033#ifdef DEBUG_INPUT
1034 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1035#endif
1036 if (in->buf == NULL) return;
1037 if (in->base == NULL) return;
1038 if (in->cur == NULL) return;
1039 if (in->buf->buffer == NULL) return;
1040
1041 CHECK_BUFFER(in);
1042
1043 used = in->cur - in->buf->buffer->content;
1044 /*
1045 * Do not shrink on large buffers whose only a tiny fraction
1046 * was consumned
1047 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001048 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001049 return;
1050 if (used > INPUT_CHUNK) {
1051 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1052 if (ret > 0) {
1053 in->cur -= ret;
1054 in->consumed += ret;
1055 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001056 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001057 }
1058
1059 CHECK_BUFFER(in);
1060
1061 if (in->buf->buffer->use > INPUT_CHUNK) {
1062 return;
1063 }
1064 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1065 if (in->base != in->buf->buffer->content) {
1066 /*
1067 * the buffer has been realloced
1068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001071 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001072 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001073 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001074
1075 CHECK_BUFFER(in);
1076}
1077
1078/************************************************************************
1079 * *
1080 * UTF8 character input and related functions *
1081 * *
1082 ************************************************************************/
1083
1084/**
1085 * xmlNextChar:
1086 * @ctxt: the XML parser context
1087 *
1088 * Skip to the next char input char.
1089 */
1090
1091void
1092xmlNextChar(xmlParserCtxtPtr ctxt) {
1093 if (ctxt->instate == XML_PARSER_EOF)
1094 return;
1095
1096 /*
1097 * 2.11 End-of-Line Handling
1098 * the literal two-character sequence "#xD#xA" or a standalone
1099 * literal #xD, an XML processor must pass to the application
1100 * the single character #xA.
1101 */
1102 if (ctxt->token != 0) ctxt->token = 0;
1103 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1104 if ((*ctxt->input->cur == 0) &&
1105 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1106 (ctxt->instate != XML_PARSER_COMMENT)) {
1107 /*
1108 * If we are at the end of the current entity and
1109 * the context allows it, we pop consumed entities
1110 * automatically.
1111 * the auto closing should be blocked in other cases
1112 */
1113 xmlPopInput(ctxt);
1114 } else {
1115 if (*(ctxt->input->cur) == '\n') {
1116 ctxt->input->line++; ctxt->input->col = 1;
1117 } else ctxt->input->col++;
1118 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1119 /*
1120 * We are supposed to handle UTF8, check it's valid
1121 * From rfc2044: encoding of the Unicode values on UTF-8:
1122 *
1123 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1124 * 0000 0000-0000 007F 0xxxxxxx
1125 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1126 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1127 *
1128 * Check for the 0x110000 limit too
1129 */
1130 const unsigned char *cur = ctxt->input->cur;
1131 unsigned char c;
1132
1133 c = *cur;
1134 if (c & 0x80) {
1135 if (cur[1] == 0)
1136 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1137 if ((cur[1] & 0xc0) != 0x80)
1138 goto encoding_error;
1139 if ((c & 0xe0) == 0xe0) {
1140 unsigned int val;
1141
1142 if (cur[2] == 0)
1143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1144 if ((cur[2] & 0xc0) != 0x80)
1145 goto encoding_error;
1146 if ((c & 0xf0) == 0xf0) {
1147 if (cur[3] == 0)
1148 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1149 if (((c & 0xf8) != 0xf0) ||
1150 ((cur[3] & 0xc0) != 0x80))
1151 goto encoding_error;
1152 /* 4-byte code */
1153 ctxt->input->cur += 4;
1154 val = (cur[0] & 0x7) << 18;
1155 val |= (cur[1] & 0x3f) << 12;
1156 val |= (cur[2] & 0x3f) << 6;
1157 val |= cur[3] & 0x3f;
1158 } else {
1159 /* 3-byte code */
1160 ctxt->input->cur += 3;
1161 val = (cur[0] & 0xf) << 12;
1162 val |= (cur[1] & 0x3f) << 6;
1163 val |= cur[2] & 0x3f;
1164 }
1165 if (((val > 0xd7ff) && (val < 0xe000)) ||
1166 ((val > 0xfffd) && (val < 0x10000)) ||
1167 (val >= 0x110000)) {
1168 if ((ctxt->sax != NULL) &&
1169 (ctxt->sax->error != NULL))
1170 ctxt->sax->error(ctxt->userData,
1171 "Char 0x%X out of allowed range\n", val);
1172 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1173 ctxt->wellFormed = 0;
1174 ctxt->disableSAX = 1;
1175 }
1176 } else
1177 /* 2-byte code */
1178 ctxt->input->cur += 2;
1179 } else
1180 /* 1-byte code */
1181 ctxt->input->cur++;
1182 } else {
1183 /*
1184 * Assume it's a fixed lenght encoding (1) with
1185 * a compatibke encoding for the ASCII set, since
1186 * XML constructs only use < 128 chars
1187 */
1188 ctxt->input->cur++;
1189 }
1190 ctxt->nbChars++;
1191 if (*ctxt->input->cur == 0)
1192 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1193 }
1194 } else {
1195 ctxt->input->cur++;
1196 ctxt->nbChars++;
1197 if (*ctxt->input->cur == 0)
1198 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1199 }
1200 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1201 xmlParserHandlePEReference(ctxt);
1202 if ((*ctxt->input->cur == 0) &&
1203 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1204 xmlPopInput(ctxt);
1205 return;
1206encoding_error:
1207 /*
1208 * If we detect an UTF8 error that probably mean that the
1209 * input encoding didn't get properly advertized in the
1210 * declaration header. Report the error and switch the encoding
1211 * to ISO-Latin-1 (if you don't like this policy, just declare the
1212 * encoding !)
1213 */
1214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1215 ctxt->sax->error(ctxt->userData,
1216 "Input is not proper UTF-8, indicate encoding !\n");
1217 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1218 ctxt->input->cur[0], ctxt->input->cur[1],
1219 ctxt->input->cur[2], ctxt->input->cur[3]);
1220 }
1221 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1222
1223 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1224 ctxt->input->cur++;
1225 return;
1226}
1227
1228/**
1229 * xmlCurrentChar:
1230 * @ctxt: the XML parser context
1231 * @len: pointer to the length of the char read
1232 *
1233 * The current char value, if using UTF-8 this may actaully span multiple
1234 * bytes in the input buffer. Implement the end of line normalization:
1235 * 2.11 End-of-Line Handling
1236 * Wherever an external parsed entity or the literal entity value
1237 * of an internal parsed entity contains either the literal two-character
1238 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1239 * must pass to the application the single character #xA.
1240 * This behavior can conveniently be produced by normalizing all
1241 * line breaks to #xA on input, before parsing.)
1242 *
1243 * Returns the current char value and its lenght
1244 */
1245
1246int
1247xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1248 if (ctxt->instate == XML_PARSER_EOF)
1249 return(0);
1250
1251 if (ctxt->token != 0) {
1252 *len = 0;
1253 return(ctxt->token);
1254 }
1255 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1256 *len = 1;
1257 return((int) *ctxt->input->cur);
1258 }
1259 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1260 /*
1261 * We are supposed to handle UTF8, check it's valid
1262 * From rfc2044: encoding of the Unicode values on UTF-8:
1263 *
1264 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1265 * 0000 0000-0000 007F 0xxxxxxx
1266 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1267 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1268 *
1269 * Check for the 0x110000 limit too
1270 */
1271 const unsigned char *cur = ctxt->input->cur;
1272 unsigned char c;
1273 unsigned int val;
1274
1275 c = *cur;
1276 if (c & 0x80) {
1277 if (cur[1] == 0)
1278 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1279 if ((cur[1] & 0xc0) != 0x80)
1280 goto encoding_error;
1281 if ((c & 0xe0) == 0xe0) {
1282
1283 if (cur[2] == 0)
1284 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1285 if ((cur[2] & 0xc0) != 0x80)
1286 goto encoding_error;
1287 if ((c & 0xf0) == 0xf0) {
1288 if (cur[3] == 0)
1289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1290 if (((c & 0xf8) != 0xf0) ||
1291 ((cur[3] & 0xc0) != 0x80))
1292 goto encoding_error;
1293 /* 4-byte code */
1294 *len = 4;
1295 val = (cur[0] & 0x7) << 18;
1296 val |= (cur[1] & 0x3f) << 12;
1297 val |= (cur[2] & 0x3f) << 6;
1298 val |= cur[3] & 0x3f;
1299 } else {
1300 /* 3-byte code */
1301 *len = 3;
1302 val = (cur[0] & 0xf) << 12;
1303 val |= (cur[1] & 0x3f) << 6;
1304 val |= cur[2] & 0x3f;
1305 }
1306 } else {
1307 /* 2-byte code */
1308 *len = 2;
1309 val = (cur[0] & 0x1f) << 6;
1310 val |= cur[1] & 0x3f;
1311 }
1312 if (!IS_CHAR(val)) {
1313 if ((ctxt->sax != NULL) &&
1314 (ctxt->sax->error != NULL))
1315 ctxt->sax->error(ctxt->userData,
1316 "Char 0x%X out of allowed range\n", val);
1317 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1318 ctxt->wellFormed = 0;
1319 ctxt->disableSAX = 1;
1320 }
1321 return(val);
1322 } else {
1323 /* 1-byte code */
1324 *len = 1;
1325 if (*ctxt->input->cur == 0xD) {
1326 if (ctxt->input->cur[1] == 0xA) {
1327 ctxt->nbChars++;
1328 ctxt->input->cur++;
1329 }
1330 return(0xA);
1331 }
1332 return((int) *ctxt->input->cur);
1333 }
1334 }
1335 /*
1336 * Assume it's a fixed lenght encoding (1) with
1337 * a compatibke encoding for the ASCII set, since
1338 * XML constructs only use < 128 chars
1339 */
1340 *len = 1;
1341 if (*ctxt->input->cur == 0xD) {
1342 if (ctxt->input->cur[1] == 0xA) {
1343 ctxt->nbChars++;
1344 ctxt->input->cur++;
1345 }
1346 return(0xA);
1347 }
1348 return((int) *ctxt->input->cur);
1349encoding_error:
1350 /*
1351 * If we detect an UTF8 error that probably mean that the
1352 * input encoding didn't get properly advertized in the
1353 * declaration header. Report the error and switch the encoding
1354 * to ISO-Latin-1 (if you don't like this policy, just declare the
1355 * encoding !)
1356 */
1357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1358 ctxt->sax->error(ctxt->userData,
1359 "Input is not proper UTF-8, indicate encoding !\n");
1360 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1361 ctxt->input->cur[0], ctxt->input->cur[1],
1362 ctxt->input->cur[2], ctxt->input->cur[3]);
1363 }
1364 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1365
1366 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1367 *len = 1;
1368 return((int) *ctxt->input->cur);
1369}
1370
1371/**
1372 * xmlStringCurrentChar:
1373 * @ctxt: the XML parser context
1374 * @cur: pointer to the beginning of the char
1375 * @len: pointer to the length of the char read
1376 *
1377 * The current char value, if using UTF-8 this may actaully span multiple
1378 * bytes in the input buffer.
1379 *
1380 * Returns the current char value and its lenght
1381 */
1382
1383int
1384xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001385 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001386 /*
1387 * We are supposed to handle UTF8, check it's valid
1388 * From rfc2044: encoding of the Unicode values on UTF-8:
1389 *
1390 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1391 * 0000 0000-0000 007F 0xxxxxxx
1392 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1393 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1394 *
1395 * Check for the 0x110000 limit too
1396 */
1397 unsigned char c;
1398 unsigned int val;
1399
1400 c = *cur;
1401 if (c & 0x80) {
1402 if ((cur[1] & 0xc0) != 0x80)
1403 goto encoding_error;
1404 if ((c & 0xe0) == 0xe0) {
1405
1406 if ((cur[2] & 0xc0) != 0x80)
1407 goto encoding_error;
1408 if ((c & 0xf0) == 0xf0) {
1409 if (((c & 0xf8) != 0xf0) ||
1410 ((cur[3] & 0xc0) != 0x80))
1411 goto encoding_error;
1412 /* 4-byte code */
1413 *len = 4;
1414 val = (cur[0] & 0x7) << 18;
1415 val |= (cur[1] & 0x3f) << 12;
1416 val |= (cur[2] & 0x3f) << 6;
1417 val |= cur[3] & 0x3f;
1418 } else {
1419 /* 3-byte code */
1420 *len = 3;
1421 val = (cur[0] & 0xf) << 12;
1422 val |= (cur[1] & 0x3f) << 6;
1423 val |= cur[2] & 0x3f;
1424 }
1425 } else {
1426 /* 2-byte code */
1427 *len = 2;
1428 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001429 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001430 }
1431 if (!IS_CHAR(val)) {
1432 if ((ctxt->sax != NULL) &&
1433 (ctxt->sax->error != NULL))
1434 ctxt->sax->error(ctxt->userData,
1435 "Char 0x%X out of allowed range\n", val);
1436 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1437 ctxt->wellFormed = 0;
1438 ctxt->disableSAX = 1;
1439 }
1440 return(val);
1441 } else {
1442 /* 1-byte code */
1443 *len = 1;
1444 return((int) *cur);
1445 }
1446 }
1447 /*
1448 * Assume it's a fixed lenght encoding (1) with
1449 * a compatibke encoding for the ASCII set, since
1450 * XML constructs only use < 128 chars
1451 */
1452 *len = 1;
1453 return((int) *cur);
1454encoding_error:
1455 /*
1456 * If we detect an UTF8 error that probably mean that the
1457 * input encoding didn't get properly advertized in the
1458 * declaration header. Report the error and switch the encoding
1459 * to ISO-Latin-1 (if you don't like this policy, just declare the
1460 * encoding !)
1461 */
1462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1463 ctxt->sax->error(ctxt->userData,
1464 "Input is not proper UTF-8, indicate encoding !\n");
1465 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1466 ctxt->input->cur[0], ctxt->input->cur[1],
1467 ctxt->input->cur[2], ctxt->input->cur[3]);
1468 }
1469 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1470
1471 *len = 1;
1472 return((int) *cur);
1473}
1474
1475/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001476 * xmlCopyCharMultiByte:
1477 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001478 * @val: the char value
1479 *
1480 * append the char value in the array
1481 *
1482 * Returns the number of xmlChar written
1483 */
Owen Taylor3473f882001-02-23 17:55:21 +00001484int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001485xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001486 /*
1487 * We are supposed to handle UTF8, check it's valid
1488 * From rfc2044: encoding of the Unicode values on UTF-8:
1489 *
1490 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1491 * 0000 0000-0000 007F 0xxxxxxx
1492 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1493 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495 if (val >= 0x80) {
1496 xmlChar *savedout = out;
1497 int bits;
1498 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1499 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1500 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1501 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001502 xmlGenericError(xmlGenericErrorContext,
1503 "Internal error, xmlCopyChar 0x%X out of bound\n",
1504 val);
1505 return(0);
1506 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507 for ( ; bits >= 0; bits-= 6)
1508 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1509 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001510 }
1511 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001512 return 1;
1513}
1514
1515/**
1516 * xmlCopyChar:
1517 * @len: Ignored, compatibility
1518 * @out: pointer to an arry of xmlChar
1519 * @val: the char value
1520 *
1521 * append the char value in the array
1522 *
1523 * Returns the number of xmlChar written
1524 */
1525
1526int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001527xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 /* the len parameter is ignored */
1529 if (val >= 0x80) {
1530 return(xmlCopyCharMultiByte (out, val));
1531 }
1532 *out = (xmlChar) val;
1533 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001534}
1535
1536/************************************************************************
1537 * *
1538 * Commodity functions to switch encodings *
1539 * *
1540 ************************************************************************/
1541
1542/**
1543 * xmlSwitchEncoding:
1544 * @ctxt: the parser context
1545 * @enc: the encoding value (number)
1546 *
1547 * change the input functions when discovering the character encoding
1548 * of a given entity.
1549 *
1550 * Returns 0 in case of success, -1 otherwise
1551 */
1552int
1553xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1554{
1555 xmlCharEncodingHandlerPtr handler;
1556
1557 switch (enc) {
1558 case XML_CHAR_ENCODING_ERROR:
1559 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1562 ctxt->wellFormed = 0;
1563 ctxt->disableSAX = 1;
1564 break;
1565 case XML_CHAR_ENCODING_NONE:
1566 /* let's assume it's UTF-8 without the XML decl */
1567 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1568 return(0);
1569 case XML_CHAR_ENCODING_UTF8:
1570 /* default encoding, no conversion should be needed */
1571 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1572 return(0);
1573 default:
1574 break;
1575 }
1576 handler = xmlGetCharEncodingHandler(enc);
1577 if (handler == NULL) {
1578 /*
1579 * Default handlers.
1580 */
1581 switch (enc) {
1582 case XML_CHAR_ENCODING_ERROR:
1583 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1585 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1586 ctxt->wellFormed = 0;
1587 ctxt->disableSAX = 1;
1588 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1589 break;
1590 case XML_CHAR_ENCODING_NONE:
1591 /* let's assume it's UTF-8 without the XML decl */
1592 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1593 return(0);
1594 case XML_CHAR_ENCODING_UTF8:
1595 case XML_CHAR_ENCODING_ASCII:
1596 /* default encoding, no conversion should be needed */
1597 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1598 return(0);
1599 case XML_CHAR_ENCODING_UTF16LE:
1600 break;
1601 case XML_CHAR_ENCODING_UTF16BE:
1602 break;
1603 case XML_CHAR_ENCODING_UCS4LE:
1604 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1606 ctxt->sax->error(ctxt->userData,
1607 "char encoding USC4 little endian not supported\n");
1608 break;
1609 case XML_CHAR_ENCODING_UCS4BE:
1610 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1612 ctxt->sax->error(ctxt->userData,
1613 "char encoding USC4 big endian not supported\n");
1614 break;
1615 case XML_CHAR_ENCODING_EBCDIC:
1616 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1618 ctxt->sax->error(ctxt->userData,
1619 "char encoding EBCDIC not supported\n");
1620 break;
1621 case XML_CHAR_ENCODING_UCS4_2143:
1622 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624 ctxt->sax->error(ctxt->userData,
1625 "char encoding UCS4 2143 not supported\n");
1626 break;
1627 case XML_CHAR_ENCODING_UCS4_3412:
1628 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1630 ctxt->sax->error(ctxt->userData,
1631 "char encoding UCS4 3412 not supported\n");
1632 break;
1633 case XML_CHAR_ENCODING_UCS2:
1634 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636 ctxt->sax->error(ctxt->userData,
1637 "char encoding UCS2 not supported\n");
1638 break;
1639 case XML_CHAR_ENCODING_8859_1:
1640 case XML_CHAR_ENCODING_8859_2:
1641 case XML_CHAR_ENCODING_8859_3:
1642 case XML_CHAR_ENCODING_8859_4:
1643 case XML_CHAR_ENCODING_8859_5:
1644 case XML_CHAR_ENCODING_8859_6:
1645 case XML_CHAR_ENCODING_8859_7:
1646 case XML_CHAR_ENCODING_8859_8:
1647 case XML_CHAR_ENCODING_8859_9:
1648 /*
1649 * We used to keep the internal content in the
1650 * document encoding however this turns being unmaintainable
1651 * So xmlGetCharEncodingHandler() will return non-null
1652 * values for this now.
1653 */
1654 if ((ctxt->inputNr == 1) &&
1655 (ctxt->encoding == NULL) &&
1656 (ctxt->input->encoding != NULL)) {
1657 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1658 }
1659 ctxt->charset = enc;
1660 return(0);
1661 case XML_CHAR_ENCODING_2022_JP:
1662 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664 ctxt->sax->error(ctxt->userData,
1665 "char encoding ISO-2022-JPnot supported\n");
1666 break;
1667 case XML_CHAR_ENCODING_SHIFT_JIS:
1668 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1670 ctxt->sax->error(ctxt->userData,
1671 "char encoding Shift_JIS not supported\n");
1672 break;
1673 case XML_CHAR_ENCODING_EUC_JP:
1674 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1676 ctxt->sax->error(ctxt->userData,
1677 "char encoding EUC-JPnot supported\n");
1678 break;
1679 }
1680 }
1681 if (handler == NULL)
1682 return(-1);
1683 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1684 return(xmlSwitchToEncoding(ctxt, handler));
1685}
1686
1687/**
1688 * xmlSwitchToEncoding:
1689 * @ctxt: the parser context
1690 * @handler: the encoding handler
1691 *
1692 * change the input functions when discovering the character encoding
1693 * of a given entity.
1694 *
1695 * Returns 0 in case of success, -1 otherwise
1696 */
1697int
1698xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1699{
1700 int nbchars;
1701
1702 if (handler != NULL) {
1703 if (ctxt->input != NULL) {
1704 if (ctxt->input->buf != NULL) {
1705 if (ctxt->input->buf->encoder != NULL) {
1706 if (ctxt->input->buf->encoder == handler)
1707 return(0);
1708 /*
1709 * Note: this is a bit dangerous, but that's what it
1710 * takes to use nearly compatible signature for different
1711 * encodings.
1712 */
1713 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1714 ctxt->input->buf->encoder = handler;
1715 return(0);
1716 }
1717 ctxt->input->buf->encoder = handler;
1718
1719 /*
1720 * Is there already some content down the pipe to convert ?
1721 */
1722 if ((ctxt->input->buf->buffer != NULL) &&
1723 (ctxt->input->buf->buffer->use > 0)) {
1724 int processed;
1725
1726 /*
1727 * Specific handling of the Byte Order Mark for
1728 * UTF-16
1729 */
1730 if ((handler->name != NULL) &&
1731 (!strcmp(handler->name, "UTF-16LE")) &&
1732 (ctxt->input->cur[0] == 0xFF) &&
1733 (ctxt->input->cur[1] == 0xFE)) {
1734 ctxt->input->cur += 2;
1735 }
1736 if ((handler->name != NULL) &&
1737 (!strcmp(handler->name, "UTF-16BE")) &&
1738 (ctxt->input->cur[0] == 0xFE) &&
1739 (ctxt->input->cur[1] == 0xFF)) {
1740 ctxt->input->cur += 2;
1741 }
1742
1743 /*
1744 * Shring the current input buffer.
1745 * Move it as the raw buffer and create a new input buffer
1746 */
1747 processed = ctxt->input->cur - ctxt->input->base;
1748 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1749 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1750 ctxt->input->buf->buffer = xmlBufferCreate();
1751
1752 if (ctxt->html) {
1753 /*
1754 * converst as much as possbile of the buffer
1755 */
1756 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1757 ctxt->input->buf->buffer,
1758 ctxt->input->buf->raw);
1759 } else {
1760 /*
1761 * convert just enough to get
1762 * '<?xml version="1.0" encoding="xxx"?>'
1763 * parsed with the autodetected encoding
1764 * into the parser reading buffer.
1765 */
1766 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1767 ctxt->input->buf->buffer,
1768 ctxt->input->buf->raw);
1769 }
1770 if (nbchars < 0) {
1771 xmlGenericError(xmlGenericErrorContext,
1772 "xmlSwitchToEncoding: encoder error\n");
1773 return(-1);
1774 }
1775 ctxt->input->base =
1776 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001777 ctxt->input->end =
1778 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001779
1780 }
1781 return(0);
1782 } else {
1783 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1784 /*
1785 * When parsing a static memory array one must know the
1786 * size to be able to convert the buffer.
1787 */
1788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1789 ctxt->sax->error(ctxt->userData,
1790 "xmlSwitchEncoding : no input\n");
1791 return(-1);
1792 } else {
1793 int processed;
1794
1795 /*
1796 * Shring the current input buffer.
1797 * Move it as the raw buffer and create a new input buffer
1798 */
1799 processed = ctxt->input->cur - ctxt->input->base;
1800
1801 ctxt->input->buf->raw = xmlBufferCreate();
1802 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1803 ctxt->input->length - processed);
1804 ctxt->input->buf->buffer = xmlBufferCreate();
1805
1806 /*
1807 * convert as much as possible of the raw input
1808 * to the parser reading buffer.
1809 */
1810 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1811 ctxt->input->buf->buffer,
1812 ctxt->input->buf->raw);
1813 if (nbchars < 0) {
1814 xmlGenericError(xmlGenericErrorContext,
1815 "xmlSwitchToEncoding: encoder error\n");
1816 return(-1);
1817 }
1818
1819 /*
1820 * Conversion succeeded, get rid of the old buffer
1821 */
1822 if ((ctxt->input->free != NULL) &&
1823 (ctxt->input->base != NULL))
1824 ctxt->input->free((xmlChar *) ctxt->input->base);
1825 ctxt->input->base =
1826 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001827 ctxt->input->end =
1828 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001829 }
1830 }
1831 } else {
1832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1833 ctxt->sax->error(ctxt->userData,
1834 "xmlSwitchEncoding : no input\n");
1835 return(-1);
1836 }
1837 /*
1838 * The parsing is now done in UTF8 natively
1839 */
1840 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1841 } else
1842 return(-1);
1843 return(0);
1844
1845}
1846
1847/************************************************************************
1848 * *
1849 * Commodity functions to handle entities processing *
1850 * *
1851 ************************************************************************/
1852
1853/**
1854 * xmlFreeInputStream:
1855 * @input: an xmlParserInputPtr
1856 *
1857 * Free up an input stream.
1858 */
1859void
1860xmlFreeInputStream(xmlParserInputPtr input) {
1861 if (input == NULL) return;
1862
1863 if (input->filename != NULL) xmlFree((char *) input->filename);
1864 if (input->directory != NULL) xmlFree((char *) input->directory);
1865 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1866 if (input->version != NULL) xmlFree((char *) input->version);
1867 if ((input->free != NULL) && (input->base != NULL))
1868 input->free((xmlChar *) input->base);
1869 if (input->buf != NULL)
1870 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001871 xmlFree(input);
1872}
1873
1874/**
1875 * xmlNewInputStream:
1876 * @ctxt: an XML parser context
1877 *
1878 * Create a new input stream structure
1879 * Returns the new input stream or NULL
1880 */
1881xmlParserInputPtr
1882xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1883 xmlParserInputPtr input;
1884
1885 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1886 if (input == NULL) {
1887 if (ctxt != NULL) {
1888 ctxt->errNo = XML_ERR_NO_MEMORY;
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "malloc: couldn't allocate a new input stream\n");
1892 ctxt->errNo = XML_ERR_NO_MEMORY;
1893 }
1894 return(NULL);
1895 }
1896 memset(input, 0, sizeof(xmlParserInput));
1897 input->line = 1;
1898 input->col = 1;
1899 input->standalone = -1;
1900 return(input);
1901}
1902
1903/**
1904 * xmlNewIOInputStream:
1905 * @ctxt: an XML parser context
1906 * @input: an I/O Input
1907 * @enc: the charset encoding if known
1908 *
1909 * Create a new input stream structure encapsulating the @input into
1910 * a stream suitable for the parser.
1911 *
1912 * Returns the new input stream or NULL
1913 */
1914xmlParserInputPtr
1915xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1916 xmlCharEncoding enc) {
1917 xmlParserInputPtr inputStream;
1918
1919 if (xmlParserDebugEntities)
1920 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1921 inputStream = xmlNewInputStream(ctxt);
1922 if (inputStream == NULL) {
1923 return(NULL);
1924 }
1925 inputStream->filename = NULL;
1926 inputStream->buf = input;
1927 inputStream->base = inputStream->buf->buffer->content;
1928 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001929 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001930 if (enc != XML_CHAR_ENCODING_NONE) {
1931 xmlSwitchEncoding(ctxt, enc);
1932 }
1933
1934 return(inputStream);
1935}
1936
1937/**
1938 * xmlNewEntityInputStream:
1939 * @ctxt: an XML parser context
1940 * @entity: an Entity pointer
1941 *
1942 * Create a new input stream based on an xmlEntityPtr
1943 *
1944 * Returns the new input stream or NULL
1945 */
1946xmlParserInputPtr
1947xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1948 xmlParserInputPtr input;
1949
1950 if (entity == NULL) {
1951 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData,
1954 "internal: xmlNewEntityInputStream entity = NULL\n");
1955 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1956 return(NULL);
1957 }
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext,
1960 "new input from entity: %s\n", entity->name);
1961 if (entity->content == NULL) {
1962 switch (entity->etype) {
1963 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1964 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1966 ctxt->sax->error(ctxt->userData,
1967 "xmlNewEntityInputStream unparsed entity !\n");
1968 break;
1969 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1970 case XML_EXTERNAL_PARAMETER_ENTITY:
1971 return(xmlLoadExternalEntity((char *) entity->URI,
1972 (char *) entity->ExternalID, ctxt));
1973 case XML_INTERNAL_GENERAL_ENTITY:
1974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975 ctxt->sax->error(ctxt->userData,
1976 "Internal entity %s without content !\n", entity->name);
1977 break;
1978 case XML_INTERNAL_PARAMETER_ENTITY:
1979 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981 ctxt->sax->error(ctxt->userData,
1982 "Internal parameter entity %s without content !\n", entity->name);
1983 break;
1984 case XML_INTERNAL_PREDEFINED_ENTITY:
1985 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "Predefined entity %s without content !\n", entity->name);
1989 break;
1990 }
1991 return(NULL);
1992 }
1993 input = xmlNewInputStream(ctxt);
1994 if (input == NULL) {
1995 return(NULL);
1996 }
1997 input->filename = (char *) entity->URI;
1998 input->base = entity->content;
1999 input->cur = entity->content;
2000 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002001 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002002 return(input);
2003}
2004
2005/**
2006 * xmlNewStringInputStream:
2007 * @ctxt: an XML parser context
2008 * @buffer: an memory buffer
2009 *
2010 * Create a new input stream based on a memory buffer.
2011 * Returns the new input stream
2012 */
2013xmlParserInputPtr
2014xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2015 xmlParserInputPtr input;
2016
2017 if (buffer == NULL) {
2018 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData,
2021 "internal: xmlNewStringInputStream string = NULL\n");
2022 return(NULL);
2023 }
2024 if (xmlParserDebugEntities)
2025 xmlGenericError(xmlGenericErrorContext,
2026 "new fixed input: %.30s\n", buffer);
2027 input = xmlNewInputStream(ctxt);
2028 if (input == NULL) {
2029 return(NULL);
2030 }
2031 input->base = buffer;
2032 input->cur = buffer;
2033 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002034 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002035 return(input);
2036}
2037
2038/**
2039 * xmlNewInputFromFile:
2040 * @ctxt: an XML parser context
2041 * @filename: the filename to use as entity
2042 *
2043 * Create a new input stream based on a file.
2044 *
2045 * Returns the new input stream or NULL in case of error
2046 */
2047xmlParserInputPtr
2048xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2049 xmlParserInputBufferPtr buf;
2050 xmlParserInputPtr inputStream;
2051 char *directory = NULL;
2052 xmlChar *URI = NULL;
2053
2054 if (xmlParserDebugEntities)
2055 xmlGenericError(xmlGenericErrorContext,
2056 "new input from file: %s\n", filename);
2057 if (ctxt == NULL) return(NULL);
2058 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2059 if (buf == NULL)
2060 return(NULL);
2061
2062 URI = xmlStrdup((xmlChar *) filename);
2063 directory = xmlParserGetDirectory((const char *) URI);
2064
2065 inputStream = xmlNewInputStream(ctxt);
2066 if (inputStream == NULL) {
2067 if (directory != NULL) xmlFree((char *) directory);
2068 if (URI != NULL) xmlFree((char *) URI);
2069 return(NULL);
2070 }
2071
2072 inputStream->filename = (const char *) URI;
2073 inputStream->directory = directory;
2074 inputStream->buf = buf;
2075
2076 inputStream->base = inputStream->buf->buffer->content;
2077 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002078 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002079 if ((ctxt->directory == NULL) && (directory != NULL))
2080 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2081 return(inputStream);
2082}
2083
2084/************************************************************************
2085 * *
2086 * Commodity functions to handle parser contexts *
2087 * *
2088 ************************************************************************/
2089
2090/**
2091 * xmlInitParserCtxt:
2092 * @ctxt: an XML parser context
2093 *
2094 * Initialize a parser context
2095 */
2096
2097void
2098xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2099{
2100 xmlSAXHandler *sax;
2101
2102 xmlDefaultSAXHandlerInit();
2103
2104 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2105 if (sax == NULL) {
2106 xmlGenericError(xmlGenericErrorContext,
2107 "xmlInitParserCtxt: out of memory\n");
2108 }
2109 else
2110 memset(sax, 0, sizeof(xmlSAXHandler));
2111
2112 /* Allocate the Input stack */
2113 ctxt->inputTab = (xmlParserInputPtr *)
2114 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2115 if (ctxt->inputTab == NULL) {
2116 xmlGenericError(xmlGenericErrorContext,
2117 "xmlInitParserCtxt: out of memory\n");
2118 ctxt->inputNr = 0;
2119 ctxt->inputMax = 0;
2120 ctxt->input = NULL;
2121 return;
2122 }
2123 ctxt->inputNr = 0;
2124 ctxt->inputMax = 5;
2125 ctxt->input = NULL;
2126
2127 ctxt->version = NULL;
2128 ctxt->encoding = NULL;
2129 ctxt->standalone = -1;
2130 ctxt->hasExternalSubset = 0;
2131 ctxt->hasPErefs = 0;
2132 ctxt->html = 0;
2133 ctxt->external = 0;
2134 ctxt->instate = XML_PARSER_START;
2135 ctxt->token = 0;
2136 ctxt->directory = NULL;
2137
2138 /* Allocate the Node stack */
2139 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2140 if (ctxt->nodeTab == NULL) {
2141 xmlGenericError(xmlGenericErrorContext,
2142 "xmlInitParserCtxt: out of memory\n");
2143 ctxt->nodeNr = 0;
2144 ctxt->nodeMax = 0;
2145 ctxt->node = NULL;
2146 ctxt->inputNr = 0;
2147 ctxt->inputMax = 0;
2148 ctxt->input = NULL;
2149 return;
2150 }
2151 ctxt->nodeNr = 0;
2152 ctxt->nodeMax = 10;
2153 ctxt->node = NULL;
2154
2155 /* Allocate the Name stack */
2156 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2157 if (ctxt->nameTab == NULL) {
2158 xmlGenericError(xmlGenericErrorContext,
2159 "xmlInitParserCtxt: out of memory\n");
2160 ctxt->nodeNr = 0;
2161 ctxt->nodeMax = 0;
2162 ctxt->node = NULL;
2163 ctxt->inputNr = 0;
2164 ctxt->inputMax = 0;
2165 ctxt->input = NULL;
2166 ctxt->nameNr = 0;
2167 ctxt->nameMax = 0;
2168 ctxt->name = NULL;
2169 return;
2170 }
2171 ctxt->nameNr = 0;
2172 ctxt->nameMax = 10;
2173 ctxt->name = NULL;
2174
2175 /* Allocate the space stack */
2176 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2177 if (ctxt->spaceTab == NULL) {
2178 xmlGenericError(xmlGenericErrorContext,
2179 "xmlInitParserCtxt: out of memory\n");
2180 ctxt->nodeNr = 0;
2181 ctxt->nodeMax = 0;
2182 ctxt->node = NULL;
2183 ctxt->inputNr = 0;
2184 ctxt->inputMax = 0;
2185 ctxt->input = NULL;
2186 ctxt->nameNr = 0;
2187 ctxt->nameMax = 0;
2188 ctxt->name = NULL;
2189 ctxt->spaceNr = 0;
2190 ctxt->spaceMax = 0;
2191 ctxt->space = NULL;
2192 return;
2193 }
2194 ctxt->spaceNr = 1;
2195 ctxt->spaceMax = 10;
2196 ctxt->spaceTab[0] = -1;
2197 ctxt->space = &ctxt->spaceTab[0];
2198
Daniel Veillard14be0a12001-03-03 18:50:55 +00002199 ctxt->sax = sax;
2200 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202 ctxt->userData = ctxt;
2203 ctxt->myDoc = NULL;
2204 ctxt->wellFormed = 1;
2205 ctxt->valid = 1;
2206 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2207 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2208 ctxt->pedantic = xmlPedanticParserDefaultValue;
2209 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2210 ctxt->vctxt.userData = ctxt;
2211 if (ctxt->validate) {
2212 ctxt->vctxt.error = xmlParserValidityError;
2213 if (xmlGetWarningsDefaultValue == 0)
2214 ctxt->vctxt.warning = NULL;
2215 else
2216 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002217 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002218 } else {
2219 ctxt->vctxt.error = NULL;
2220 ctxt->vctxt.warning = NULL;
2221 }
2222 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2223 ctxt->record_info = 0;
2224 ctxt->nbChars = 0;
2225 ctxt->checkIndex = 0;
2226 ctxt->inSubset = 0;
2227 ctxt->errNo = XML_ERR_OK;
2228 ctxt->depth = 0;
2229 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2230 xmlInitNodeInfoSeq(&ctxt->node_seq);
2231}
2232
2233/**
2234 * xmlFreeParserCtxt:
2235 * @ctxt: an XML parser context
2236 *
2237 * Free all the memory used by a parser context. However the parsed
2238 * document in ctxt->myDoc is not freed.
2239 */
2240
2241void
2242xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2243{
2244 xmlParserInputPtr input;
2245 xmlChar *oldname;
2246
2247 if (ctxt == NULL) return;
2248
2249 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2250 xmlFreeInputStream(input);
2251 }
2252 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2253 xmlFree(oldname);
2254 }
2255 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2256 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2257 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2258 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2259 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2260 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2261 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2262 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2263 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002264 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2265 xmlFree(ctxt->sax);
2266 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002267 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Owen Taylor3473f882001-02-23 17:55:21 +00002268 xmlFree(ctxt);
2269}
2270
2271/**
2272 * xmlNewParserCtxt:
2273 *
2274 * Allocate and initialize a new parser context.
2275 *
2276 * Returns the xmlParserCtxtPtr or NULL
2277 */
2278
2279xmlParserCtxtPtr
2280xmlNewParserCtxt()
2281{
2282 xmlParserCtxtPtr ctxt;
2283
2284 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2285 if (ctxt == NULL) {
2286 xmlGenericError(xmlGenericErrorContext,
2287 "xmlNewParserCtxt : cannot allocate context\n");
2288 perror("malloc");
2289 return(NULL);
2290 }
2291 memset(ctxt, 0, sizeof(xmlParserCtxt));
2292 xmlInitParserCtxt(ctxt);
2293 return(ctxt);
2294}
2295
2296/************************************************************************
2297 * *
2298 * Handling of node informations *
2299 * *
2300 ************************************************************************/
2301
2302/**
2303 * xmlClearParserCtxt:
2304 * @ctxt: an XML parser context
2305 *
2306 * Clear (release owned resources) and reinitialize a parser context
2307 */
2308
2309void
2310xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2311{
2312 xmlClearNodeInfoSeq(&ctxt->node_seq);
2313 xmlInitParserCtxt(ctxt);
2314}
2315
2316/**
2317 * xmlParserFindNodeInfo:
2318 * @ctxt: an XML parser context
2319 * @node: an XML node within the tree
2320 *
2321 * Find the parser node info struct for a given node
2322 *
2323 * Returns an xmlParserNodeInfo block pointer or NULL
2324 */
2325const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2326 const xmlNode* node)
2327{
2328 unsigned long pos;
2329
2330 /* Find position where node should be at */
2331 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2332 if ( ctx->node_seq.buffer[pos].node == node )
2333 return &ctx->node_seq.buffer[pos];
2334 else
2335 return NULL;
2336}
2337
2338
2339/**
2340 * xmlInitNodeInfoSeq:
2341 * @seq: a node info sequence pointer
2342 *
2343 * -- Initialize (set to initial state) node info sequence
2344 */
2345void
2346xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2347{
2348 seq->length = 0;
2349 seq->maximum = 0;
2350 seq->buffer = NULL;
2351}
2352
2353/**
2354 * xmlClearNodeInfoSeq:
2355 * @seq: a node info sequence pointer
2356 *
2357 * -- Clear (release memory and reinitialize) node
2358 * info sequence
2359 */
2360void
2361xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2362{
2363 if ( seq->buffer != NULL )
2364 xmlFree(seq->buffer);
2365 xmlInitNodeInfoSeq(seq);
2366}
2367
2368
2369/**
2370 * xmlParserFindNodeInfoIndex:
2371 * @seq: a node info sequence pointer
2372 * @node: an XML node pointer
2373 *
2374 *
2375 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2376 * the given node is or should be at in a sorted sequence
2377 *
2378 * Returns a long indicating the position of the record
2379 */
2380unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2381 const xmlNode* node)
2382{
2383 unsigned long upper, lower, middle;
2384 int found = 0;
2385
2386 /* Do a binary search for the key */
2387 lower = 1;
2388 upper = seq->length;
2389 middle = 0;
2390 while ( lower <= upper && !found) {
2391 middle = lower + (upper - lower) / 2;
2392 if ( node == seq->buffer[middle - 1].node )
2393 found = 1;
2394 else if ( node < seq->buffer[middle - 1].node )
2395 upper = middle - 1;
2396 else
2397 lower = middle + 1;
2398 }
2399
2400 /* Return position */
2401 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2402 return middle;
2403 else
2404 return middle - 1;
2405}
2406
2407
2408/**
2409 * xmlParserAddNodeInfo:
2410 * @ctxt: an XML parser context
2411 * @info: a node info sequence pointer
2412 *
2413 * Insert node info record into the sorted sequence
2414 */
2415void
2416xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2417 const xmlParserNodeInfo* info)
2418{
2419 unsigned long pos;
2420 static unsigned int block_size = 5;
2421
2422 /* Find pos and check to see if node is already in the sequence */
2423 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2424 if ( pos < ctxt->node_seq.length
2425 && ctxt->node_seq.buffer[pos].node == info->node ) {
2426 ctxt->node_seq.buffer[pos] = *info;
2427 }
2428
2429 /* Otherwise, we need to add new node to buffer */
2430 else {
2431 /* Expand buffer by 5 if needed */
2432 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2433 xmlParserNodeInfo* tmp_buffer;
2434 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2435 *(ctxt->node_seq.maximum + block_size));
2436
2437 if ( ctxt->node_seq.buffer == NULL )
2438 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2439 else
2440 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2441
2442 if ( tmp_buffer == NULL ) {
2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2445 ctxt->errNo = XML_ERR_NO_MEMORY;
2446 return;
2447 }
2448 ctxt->node_seq.buffer = tmp_buffer;
2449 ctxt->node_seq.maximum += block_size;
2450 }
2451
2452 /* If position is not at end, move elements out of the way */
2453 if ( pos != ctxt->node_seq.length ) {
2454 unsigned long i;
2455
2456 for ( i = ctxt->node_seq.length; i > pos; i-- )
2457 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2458 }
2459
2460 /* Copy element and increase length */
2461 ctxt->node_seq.buffer[pos] = *info;
2462 ctxt->node_seq.length++;
2463 }
2464}
2465
2466/************************************************************************
2467 * *
2468 * Deprecated functions kept for compatibility *
2469 * *
2470 ************************************************************************/
2471
2472/*
2473 * xmlCheckLanguageID
2474 * @lang: pointer to the string value
2475 *
2476 * Checks that the value conforms to the LanguageID production:
2477 *
2478 * NOTE: this is somewhat deprecated, those productions were removed from
2479 * the XML Second edition.
2480 *
2481 * [33] LanguageID ::= Langcode ('-' Subcode)*
2482 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2483 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2484 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2485 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2486 * [38] Subcode ::= ([a-z] | [A-Z])+
2487 *
2488 * Returns 1 if correct 0 otherwise
2489 **/
2490int
2491xmlCheckLanguageID(const xmlChar *lang) {
2492 const xmlChar *cur = lang;
2493
2494 if (cur == NULL)
2495 return(0);
2496 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2497 ((cur[0] == 'I') && (cur[1] == '-'))) {
2498 /*
2499 * IANA code
2500 */
2501 cur += 2;
2502 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2503 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2504 cur++;
2505 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2506 ((cur[0] == 'X') && (cur[1] == '-'))) {
2507 /*
2508 * User code
2509 */
2510 cur += 2;
2511 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2512 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2513 cur++;
2514 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2515 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2516 /*
2517 * ISO639
2518 */
2519 cur++;
2520 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2521 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2522 cur++;
2523 else
2524 return(0);
2525 } else
2526 return(0);
2527 while (cur[0] != 0) { /* non input consuming */
2528 if (cur[0] != '-')
2529 return(0);
2530 cur++;
2531 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2532 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2533 cur++;
2534 else
2535 return(0);
2536 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2537 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2538 cur++;
2539 }
2540 return(1);
2541}
2542
2543/**
2544 * xmlDecodeEntities:
2545 * @ctxt: the parser context
2546 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2547 * @len: the len to decode (in bytes !), -1 for no size limit
2548 * @end: an end marker xmlChar, 0 if none
2549 * @end2: an end marker xmlChar, 0 if none
2550 * @end3: an end marker xmlChar, 0 if none
2551 *
2552 * This function is deprecated, we now always process entities content
2553 * through xmlStringDecodeEntities
2554 *
2555 * TODO: remove it in next major release.
2556 *
2557 * [67] Reference ::= EntityRef | CharRef
2558 *
2559 * [69] PEReference ::= '%' Name ';'
2560 *
2561 * Returns A newly allocated string with the substitution done. The caller
2562 * must deallocate it !
2563 */
2564xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002565xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2566 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002567#if 0
2568 xmlChar *buffer = NULL;
2569 unsigned int buffer_size = 0;
2570 unsigned int nbchars = 0;
2571
2572 xmlChar *current = NULL;
2573 xmlEntityPtr ent;
2574 unsigned int max = (unsigned int) len;
2575 int c,l;
2576#endif
2577
2578 static int deprecated = 0;
2579 if (!deprecated) {
2580 xmlGenericError(xmlGenericErrorContext,
2581 "xmlDecodeEntities() deprecated function reached\n");
2582 deprecated = 1;
2583 }
2584
2585#if 0
2586 if (ctxt->depth > 40) {
2587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2588 ctxt->sax->error(ctxt->userData,
2589 "Detected entity reference loop\n");
2590 ctxt->wellFormed = 0;
2591 ctxt->disableSAX = 1;
2592 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2593 return(NULL);
2594 }
2595
2596 /*
2597 * allocate a translation buffer.
2598 */
2599 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2600 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2601 if (buffer == NULL) {
2602 perror("xmlDecodeEntities: malloc failed");
2603 return(NULL);
2604 }
2605
2606 /*
2607 * Ok loop until we reach one of the ending char or a size limit.
2608 */
2609 GROW;
2610 c = CUR_CHAR(l);
2611 while ((nbchars < max) && (c != end) && /* NOTUSED */
2612 (c != end2) && (c != end3)) {
2613 GROW;
2614 if (c == 0) break;
2615 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2616 int val = xmlParseCharRef(ctxt);
2617 COPY_BUF(0,buffer,nbchars,val);
2618 NEXTL(l);
2619 } else if ((c == '&') && (ctxt->token != '&') &&
2620 (what & XML_SUBSTITUTE_REF)) {
2621 if (xmlParserDebugEntities)
2622 xmlGenericError(xmlGenericErrorContext,
2623 "decoding Entity Reference\n");
2624 ent = xmlParseEntityRef(ctxt);
2625 if ((ent != NULL) &&
2626 (ctxt->replaceEntities != 0)) {
2627 current = ent->content;
2628 while (*current != 0) { /* non input consuming loop */
2629 buffer[nbchars++] = *current++;
2630 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2631 growBuffer(buffer);
2632 }
2633 }
2634 } else if (ent != NULL) {
2635 const xmlChar *cur = ent->name;
2636
2637 buffer[nbchars++] = '&';
2638 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2639 growBuffer(buffer);
2640 }
2641 while (*cur != 0) { /* non input consuming loop */
2642 buffer[nbchars++] = *cur++;
2643 }
2644 buffer[nbchars++] = ';';
2645 }
2646 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2647 /*
2648 * a PEReference induce to switch the entity flow,
2649 * we break here to flush the current set of chars
2650 * parsed if any. We will be called back later.
2651 */
2652 if (xmlParserDebugEntities)
2653 xmlGenericError(xmlGenericErrorContext,
2654 "decoding PE Reference\n");
2655 if (nbchars != 0) break;
2656
2657 xmlParsePEReference(ctxt);
2658
2659 /*
2660 * Pop-up of finished entities.
2661 */
2662 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2663 xmlPopInput(ctxt);
2664
2665 break;
2666 } else {
2667 COPY_BUF(l,buffer,nbchars,c);
2668 NEXTL(l);
2669 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2670 growBuffer(buffer);
2671 }
2672 }
2673 c = CUR_CHAR(l);
2674 }
2675 buffer[nbchars++] = 0;
2676 return(buffer);
2677#endif
2678 return(NULL);
2679}
2680
2681/**
2682 * xmlNamespaceParseNCName:
2683 * @ctxt: an XML parser context
2684 *
2685 * parse an XML namespace name.
2686 *
2687 * TODO: this seems not in use anymore, the namespace handling is done on
2688 * top of the SAX interfaces, i.e. not on raw input.
2689 *
2690 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2691 *
2692 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2693 * CombiningChar | Extender
2694 *
2695 * Returns the namespace name or NULL
2696 */
2697
2698xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002699xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002700#if 0
2701 xmlChar buf[XML_MAX_NAMELEN + 5];
2702 int len = 0, l;
2703 int cur = CUR_CHAR(l);
2704#endif
2705
2706 static int deprecated = 0;
2707 if (!deprecated) {
2708 xmlGenericError(xmlGenericErrorContext,
2709 "xmlNamespaceParseNCName() deprecated function reached\n");
2710 deprecated = 1;
2711 }
2712
2713#if 0
2714 /* load first the value of the char !!! */
2715 GROW;
2716 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2717
2718xmlGenericError(xmlGenericErrorContext,
2719 "xmlNamespaceParseNCName: reached loop 3\n");
2720 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2721 (cur == '.') || (cur == '-') ||
2722 (cur == '_') ||
2723 (IS_COMBINING(cur)) ||
2724 (IS_EXTENDER(cur))) {
2725 COPY_BUF(l,buf,len,cur);
2726 NEXTL(l);
2727 cur = CUR_CHAR(l);
2728 if (len >= XML_MAX_NAMELEN) {
2729 xmlGenericError(xmlGenericErrorContext,
2730 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2731 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2732 (cur == '.') || (cur == '-') ||
2733 (cur == '_') ||
2734 (IS_COMBINING(cur)) ||
2735 (IS_EXTENDER(cur))) {
2736 NEXTL(l);
2737 cur = CUR_CHAR(l);
2738 }
2739 break;
2740 }
2741 }
2742 return(xmlStrndup(buf, len));
2743#endif
2744 return(NULL);
2745}
2746
2747/**
2748 * xmlNamespaceParseQName:
2749 * @ctxt: an XML parser context
2750 * @prefix: a xmlChar **
2751 *
2752 * TODO: this seems not in use anymore, the namespace handling is done on
2753 * top of the SAX interfaces, i.e. not on raw input.
2754 *
2755 * parse an XML qualified name
2756 *
2757 * [NS 5] QName ::= (Prefix ':')? LocalPart
2758 *
2759 * [NS 6] Prefix ::= NCName
2760 *
2761 * [NS 7] LocalPart ::= NCName
2762 *
2763 * Returns the local part, and prefix is updated
2764 * to get the Prefix if any.
2765 */
2766
2767xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002768xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002769
2770 static int deprecated = 0;
2771 if (!deprecated) {
2772 xmlGenericError(xmlGenericErrorContext,
2773 "xmlNamespaceParseQName() deprecated function reached\n");
2774 deprecated = 1;
2775 }
2776
2777#if 0
2778 xmlChar *ret = NULL;
2779
2780 *prefix = NULL;
2781 ret = xmlNamespaceParseNCName(ctxt);
2782 if (RAW == ':') {
2783 *prefix = ret;
2784 NEXT;
2785 ret = xmlNamespaceParseNCName(ctxt);
2786 }
2787
2788 return(ret);
2789#endif
2790 return(NULL);
2791}
2792
2793/**
2794 * xmlNamespaceParseNSDef:
2795 * @ctxt: an XML parser context
2796 *
2797 * parse a namespace prefix declaration
2798 *
2799 * TODO: this seems not in use anymore, the namespace handling is done on
2800 * top of the SAX interfaces, i.e. not on raw input.
2801 *
2802 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2803 *
2804 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2805 *
2806 * Returns the namespace name
2807 */
2808
2809xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002810xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002811 static int deprecated = 0;
2812 if (!deprecated) {
2813 xmlGenericError(xmlGenericErrorContext,
2814 "xmlNamespaceParseNSDef() deprecated function reached\n");
2815 deprecated = 1;
2816 }
2817 return(NULL);
2818#if 0
2819 xmlChar *name = NULL;
2820
2821 if ((RAW == 'x') && (NXT(1) == 'm') &&
2822 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2823 (NXT(4) == 's')) {
2824 SKIP(5);
2825 if (RAW == ':') {
2826 NEXT;
2827 name = xmlNamespaceParseNCName(ctxt);
2828 }
2829 }
2830 return(name);
2831#endif
2832}
2833
2834/**
2835 * xmlParseQuotedString:
2836 * @ctxt: an XML parser context
2837 *
2838 * Parse and return a string between quotes or doublequotes
2839 *
2840 * TODO: Deprecated, to be removed at next drop of binary compatibility
2841 *
2842 * Returns the string parser or NULL.
2843 */
2844xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002845xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002846 static int deprecated = 0;
2847 if (!deprecated) {
2848 xmlGenericError(xmlGenericErrorContext,
2849 "xmlParseQuotedString() deprecated function reached\n");
2850 deprecated = 1;
2851 }
2852 return(NULL);
2853
2854#if 0
2855 xmlChar *buf = NULL;
2856 int len = 0,l;
2857 int size = XML_PARSER_BUFFER_SIZE;
2858 int c;
2859
2860 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2861 if (buf == NULL) {
2862 xmlGenericError(xmlGenericErrorContext,
2863 "malloc of %d byte failed\n", size);
2864 return(NULL);
2865 }
2866xmlGenericError(xmlGenericErrorContext,
2867 "xmlParseQuotedString: reached loop 4\n");
2868 if (RAW == '"') {
2869 NEXT;
2870 c = CUR_CHAR(l);
2871 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2872 if (len + 5 >= size) {
2873 size *= 2;
2874 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2875 if (buf == NULL) {
2876 xmlGenericError(xmlGenericErrorContext,
2877 "realloc of %d byte failed\n", size);
2878 return(NULL);
2879 }
2880 }
2881 COPY_BUF(l,buf,len,c);
2882 NEXTL(l);
2883 c = CUR_CHAR(l);
2884 }
2885 if (c != '"') {
2886 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt->userData,
2889 "String not closed \"%.50s\"\n", buf);
2890 ctxt->wellFormed = 0;
2891 ctxt->disableSAX = 1;
2892 } else {
2893 NEXT;
2894 }
2895 } else if (RAW == '\''){
2896 NEXT;
2897 c = CUR;
2898 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2899 if (len + 1 >= size) {
2900 size *= 2;
2901 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2902 if (buf == NULL) {
2903 xmlGenericError(xmlGenericErrorContext,
2904 "realloc of %d byte failed\n", size);
2905 return(NULL);
2906 }
2907 }
2908 buf[len++] = c;
2909 NEXT;
2910 c = CUR;
2911 }
2912 if (RAW != '\'') {
2913 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2915 ctxt->sax->error(ctxt->userData,
2916 "String not closed \"%.50s\"\n", buf);
2917 ctxt->wellFormed = 0;
2918 ctxt->disableSAX = 1;
2919 } else {
2920 NEXT;
2921 }
2922 }
2923 return(buf);
2924#endif
2925}
2926
2927/**
2928 * xmlParseNamespace:
2929 * @ctxt: an XML parser context
2930 *
2931 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2932 *
2933 * This is what the older xml-name Working Draft specified, a bunch of
2934 * other stuff may still rely on it, so support is still here as
2935 * if it was declared on the root of the Tree:-(
2936 *
2937 * TODO: remove from library
2938 *
2939 * To be removed at next drop of binary compatibility
2940 */
2941
2942void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002943xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002944 static int deprecated = 0;
2945 if (!deprecated) {
2946 xmlGenericError(xmlGenericErrorContext,
2947 "xmlParseNamespace() deprecated function reached\n");
2948 deprecated = 1;
2949 }
2950
2951#if 0
2952 xmlChar *href = NULL;
2953 xmlChar *prefix = NULL;
2954 int garbage = 0;
2955
2956 /*
2957 * We just skipped "namespace" or "xml:namespace"
2958 */
2959 SKIP_BLANKS;
2960
2961xmlGenericError(xmlGenericErrorContext,
2962 "xmlParseNamespace: reached loop 5\n");
2963 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2964 /*
2965 * We can have "ns" or "prefix" attributes
2966 * Old encoding as 'href' or 'AS' attributes is still supported
2967 */
2968 if ((RAW == 'n') && (NXT(1) == 's')) {
2969 garbage = 0;
2970 SKIP(2);
2971 SKIP_BLANKS;
2972
2973 if (RAW != '=') continue;
2974 NEXT;
2975 SKIP_BLANKS;
2976
2977 href = xmlParseQuotedString(ctxt);
2978 SKIP_BLANKS;
2979 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2980 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2981 garbage = 0;
2982 SKIP(4);
2983 SKIP_BLANKS;
2984
2985 if (RAW != '=') continue;
2986 NEXT;
2987 SKIP_BLANKS;
2988
2989 href = xmlParseQuotedString(ctxt);
2990 SKIP_BLANKS;
2991 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2992 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2993 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2994 garbage = 0;
2995 SKIP(6);
2996 SKIP_BLANKS;
2997
2998 if (RAW != '=') continue;
2999 NEXT;
3000 SKIP_BLANKS;
3001
3002 prefix = xmlParseQuotedString(ctxt);
3003 SKIP_BLANKS;
3004 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3005 garbage = 0;
3006 SKIP(2);
3007 SKIP_BLANKS;
3008
3009 if (RAW != '=') continue;
3010 NEXT;
3011 SKIP_BLANKS;
3012
3013 prefix = xmlParseQuotedString(ctxt);
3014 SKIP_BLANKS;
3015 } else if ((RAW == '?') && (NXT(1) == '>')) {
3016 garbage = 0;
3017 NEXT;
3018 } else {
3019 /*
3020 * Found garbage when parsing the namespace
3021 */
3022 if (!garbage) {
3023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3024 ctxt->sax->error(ctxt->userData,
3025 "xmlParseNamespace found garbage\n");
3026 }
3027 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3028 ctxt->wellFormed = 0;
3029 ctxt->disableSAX = 1;
3030 NEXT;
3031 }
3032 }
3033
3034 MOVETO_ENDTAG(CUR_PTR);
3035 NEXT;
3036
3037 /*
3038 * Register the DTD.
3039 if (href != NULL)
3040 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3041 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3042 */
3043
3044 if (prefix != NULL) xmlFree(prefix);
3045 if (href != NULL) xmlFree(href);
3046#endif
3047}
3048
3049/**
3050 * xmlScanName:
3051 * @ctxt: an XML parser context
3052 *
3053 * Trickery: parse an XML name but without consuming the input flow
3054 * Needed for rollback cases. Used only when parsing entities references.
3055 *
3056 * TODO: seems deprecated now, only used in the default part of
3057 * xmlParserHandleReference
3058 *
3059 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3060 * CombiningChar | Extender
3061 *
3062 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3063 *
3064 * [6] Names ::= Name (S Name)*
3065 *
3066 * Returns the Name parsed or NULL
3067 */
3068
3069xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003070xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003071 static int deprecated = 0;
3072 if (!deprecated) {
3073 xmlGenericError(xmlGenericErrorContext,
3074 "xmlScanName() deprecated function reached\n");
3075 deprecated = 1;
3076 }
3077 return(NULL);
3078
3079#if 0
3080 xmlChar buf[XML_MAX_NAMELEN];
3081 int len = 0;
3082
3083 GROW;
3084 if (!IS_LETTER(RAW) && (RAW != '_') &&
3085 (RAW != ':')) {
3086 return(NULL);
3087 }
3088
3089
3090 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3091 (NXT(len) == '.') || (NXT(len) == '-') ||
3092 (NXT(len) == '_') || (NXT(len) == ':') ||
3093 (IS_COMBINING(NXT(len))) ||
3094 (IS_EXTENDER(NXT(len)))) {
3095 GROW;
3096 buf[len] = NXT(len);
3097 len++;
3098 if (len >= XML_MAX_NAMELEN) {
3099 xmlGenericError(xmlGenericErrorContext,
3100 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3101 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3102 (IS_DIGIT(NXT(len))) ||
3103 (NXT(len) == '.') || (NXT(len) == '-') ||
3104 (NXT(len) == '_') || (NXT(len) == ':') ||
3105 (IS_COMBINING(NXT(len))) ||
3106 (IS_EXTENDER(NXT(len))))
3107 len++;
3108 break;
3109 }
3110 }
3111 return(xmlStrndup(buf, len));
3112#endif
3113}
3114
3115/**
3116 * xmlParserHandleReference:
3117 * @ctxt: the parser context
3118 *
3119 * TODO: Remove, now deprecated ... the test is done directly in the
3120 * content parsing
3121 * routines.
3122 *
3123 * [67] Reference ::= EntityRef | CharRef
3124 *
3125 * [68] EntityRef ::= '&' Name ';'
3126 *
3127 * [ WFC: Entity Declared ]
3128 * the Name given in the entity reference must match that in an entity
3129 * declaration, except that well-formed documents need not declare any
3130 * of the following entities: amp, lt, gt, apos, quot.
3131 *
3132 * [ WFC: Parsed Entity ]
3133 * An entity reference must not contain the name of an unparsed entity
3134 *
3135 * [66] CharRef ::= '&#' [0-9]+ ';' |
3136 * '&#x' [0-9a-fA-F]+ ';'
3137 *
3138 * A PEReference may have been detectect in the current input stream
3139 * the handling is done accordingly to
3140 * http://www.w3.org/TR/REC-xml#entproc
3141 */
3142void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003143xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003144 static int deprecated = 0;
3145 if (!deprecated) {
3146 xmlGenericError(xmlGenericErrorContext,
3147 "xmlParserHandleReference() deprecated function reached\n");
3148 deprecated = 1;
3149 }
3150
3151#if 0
3152 xmlParserInputPtr input;
3153 xmlChar *name;
3154 xmlEntityPtr ent = NULL;
3155
3156 if (ctxt->token != 0) {
3157 return;
3158 }
3159 if (RAW != '&') return;
3160 GROW;
3161 if ((RAW == '&') && (NXT(1) == '#')) {
3162 switch(ctxt->instate) {
3163 case XML_PARSER_ENTITY_DECL:
3164 case XML_PARSER_PI:
3165 case XML_PARSER_CDATA_SECTION:
3166 case XML_PARSER_COMMENT:
3167 case XML_PARSER_SYSTEM_LITERAL:
3168 /* we just ignore it there */
3169 return;
3170 case XML_PARSER_START_TAG:
3171 return;
3172 case XML_PARSER_END_TAG:
3173 return;
3174 case XML_PARSER_EOF:
3175 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3178 ctxt->wellFormed = 0;
3179 ctxt->disableSAX = 1;
3180 return;
3181 case XML_PARSER_PROLOG:
3182 case XML_PARSER_START:
3183 case XML_PARSER_MISC:
3184 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3186 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3187 ctxt->wellFormed = 0;
3188 ctxt->disableSAX = 1;
3189 return;
3190 case XML_PARSER_EPILOG:
3191 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3193 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3194 ctxt->wellFormed = 0;
3195 ctxt->disableSAX = 1;
3196 return;
3197 case XML_PARSER_DTD:
3198 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData,
3201 "CharRef are forbiden in DTDs!\n");
3202 ctxt->wellFormed = 0;
3203 ctxt->disableSAX = 1;
3204 return;
3205 case XML_PARSER_ENTITY_VALUE:
3206 /*
3207 * NOTE: in the case of entity values, we don't do the
3208 * substitution here since we need the literal
3209 * entity value to be able to save the internal
3210 * subset of the document.
3211 * This will be handled by xmlStringDecodeEntities
3212 */
3213 return;
3214 case XML_PARSER_CONTENT:
3215 return;
3216 case XML_PARSER_ATTRIBUTE_VALUE:
3217 /* ctxt->token = xmlParseCharRef(ctxt); */
3218 return;
3219 case XML_PARSER_IGNORE:
3220 return;
3221 }
3222 return;
3223 }
3224
3225 switch(ctxt->instate) {
3226 case XML_PARSER_CDATA_SECTION:
3227 return;
3228 case XML_PARSER_PI:
3229 case XML_PARSER_COMMENT:
3230 case XML_PARSER_SYSTEM_LITERAL:
3231 case XML_PARSER_CONTENT:
3232 return;
3233 case XML_PARSER_START_TAG:
3234 return;
3235 case XML_PARSER_END_TAG:
3236 return;
3237 case XML_PARSER_EOF:
3238 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3240 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3241 ctxt->wellFormed = 0;
3242 ctxt->disableSAX = 1;
3243 return;
3244 case XML_PARSER_PROLOG:
3245 case XML_PARSER_START:
3246 case XML_PARSER_MISC:
3247 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3249 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3250 ctxt->wellFormed = 0;
3251 ctxt->disableSAX = 1;
3252 return;
3253 case XML_PARSER_EPILOG:
3254 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3257 ctxt->wellFormed = 0;
3258 ctxt->disableSAX = 1;
3259 return;
3260 case XML_PARSER_ENTITY_VALUE:
3261 /*
3262 * NOTE: in the case of entity values, we don't do the
3263 * substitution here since we need the literal
3264 * entity value to be able to save the internal
3265 * subset of the document.
3266 * This will be handled by xmlStringDecodeEntities
3267 */
3268 return;
3269 case XML_PARSER_ATTRIBUTE_VALUE:
3270 /*
3271 * NOTE: in the case of attributes values, we don't do the
3272 * substitution here unless we are in a mode where
3273 * the parser is explicitely asked to substitute
3274 * entities. The SAX callback is called with values
3275 * without entity substitution.
3276 * This will then be handled by xmlStringDecodeEntities
3277 */
3278 return;
3279 case XML_PARSER_ENTITY_DECL:
3280 /*
3281 * we just ignore it there
3282 * the substitution will be done once the entity is referenced
3283 */
3284 return;
3285 case XML_PARSER_DTD:
3286 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3288 ctxt->sax->error(ctxt->userData,
3289 "Entity references are forbiden in DTDs!\n");
3290 ctxt->wellFormed = 0;
3291 ctxt->disableSAX = 1;
3292 return;
3293 case XML_PARSER_IGNORE:
3294 return;
3295 }
3296
3297/* TODO: this seems not reached anymore .... Verify ... */
3298xmlGenericError(xmlGenericErrorContext,
3299 "Reached deprecated section in xmlParserHandleReference()\n");
3300xmlGenericError(xmlGenericErrorContext,
3301 "Please forward the document to Daniel.Veillard@w3.org\n");
3302xmlGenericError(xmlGenericErrorContext,
3303 "indicating the version: %s, thanks !\n", xmlParserVersion);
3304 NEXT;
3305 name = xmlScanName(ctxt);
3306 if (name == NULL) {
3307 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3309 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3310 ctxt->wellFormed = 0;
3311 ctxt->disableSAX = 1;
3312 ctxt->token = '&';
3313 return;
3314 }
3315 if (NXT(xmlStrlen(name)) != ';') {
3316 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3318 ctxt->sax->error(ctxt->userData,
3319 "Entity reference: ';' expected\n");
3320 ctxt->wellFormed = 0;
3321 ctxt->disableSAX = 1;
3322 ctxt->token = '&';
3323 xmlFree(name);
3324 return;
3325 }
3326 SKIP(xmlStrlen(name) + 1);
3327 if (ctxt->sax != NULL) {
3328 if (ctxt->sax->getEntity != NULL)
3329 ent = ctxt->sax->getEntity(ctxt->userData, name);
3330 }
3331
3332 /*
3333 * [ WFC: Entity Declared ]
3334 * the Name given in the entity reference must match that in an entity
3335 * declaration, except that well-formed documents need not declare any
3336 * of the following entities: amp, lt, gt, apos, quot.
3337 */
3338 if (ent == NULL)
3339 ent = xmlGetPredefinedEntity(name);
3340 if (ent == NULL) {
3341 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3343 ctxt->sax->error(ctxt->userData,
3344 "Entity reference: entity %s not declared\n",
3345 name);
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 xmlFree(name);
3349 return;
3350 }
3351
3352 /*
3353 * [ WFC: Parsed Entity ]
3354 * An entity reference must not contain the name of an unparsed entity
3355 */
3356 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3357 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3359 ctxt->sax->error(ctxt->userData,
3360 "Entity reference to unparsed entity %s\n", name);
3361 ctxt->wellFormed = 0;
3362 ctxt->disableSAX = 1;
3363 }
3364
3365 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3366 ctxt->token = ent->content[0];
3367 xmlFree(name);
3368 return;
3369 }
3370 input = xmlNewEntityInputStream(ctxt, ent);
3371 xmlPushInput(ctxt, input);
3372 xmlFree(name);
3373#endif
3374 return;
3375}
3376
3377/**
3378 * xmlHandleEntity:
3379 * @ctxt: an XML parser context
3380 * @entity: an XML entity pointer.
3381 *
3382 * Default handling of defined entities, when should we define a new input
3383 * stream ? When do we just handle that as a set of chars ?
3384 *
3385 * OBSOLETE: to be removed at some point.
3386 */
3387
3388void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003389xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003390 static int deprecated = 0;
3391 if (!deprecated) {
3392 xmlGenericError(xmlGenericErrorContext,
3393 "xmlHandleEntity() deprecated function reached\n");
3394 deprecated = 1;
3395 }
3396
3397#if 0
3398 int len;
3399 xmlParserInputPtr input;
3400
3401 if (entity->content == NULL) {
3402 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3404 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3405 entity->name);
3406 ctxt->wellFormed = 0;
3407 ctxt->disableSAX = 1;
3408 return;
3409 }
3410 len = xmlStrlen(entity->content);
3411 if (len <= 2) goto handle_as_char;
3412
3413 /*
3414 * Redefine its content as an input stream.
3415 */
3416 input = xmlNewEntityInputStream(ctxt, entity);
3417 xmlPushInput(ctxt, input);
3418 return;
3419
3420handle_as_char:
3421 /*
3422 * Just handle the content as a set of chars.
3423 */
3424 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3425 (ctxt->sax->characters != NULL))
3426 ctxt->sax->characters(ctxt->userData, entity->content, len);
3427#endif
3428}
3429
3430/**
3431 * xmlNewGlobalNs:
3432 * @doc: the document carrying the namespace
3433 * @href: the URI associated
3434 * @prefix: the prefix for the namespace
3435 *
3436 * Creation of a Namespace, the old way using PI and without scoping
3437 * DEPRECATED !!!
3438 * It now create a namespace on the root element of the document if found.
3439 * Returns NULL this functionnality had been removed
3440 */
3441xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003442xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3443 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003444 static int deprecated = 0;
3445 if (!deprecated) {
3446 xmlGenericError(xmlGenericErrorContext,
3447 "xmlNewGlobalNs() deprecated function reached\n");
3448 deprecated = 1;
3449 }
3450 return(NULL);
3451#if 0
3452 xmlNodePtr root;
3453
3454 xmlNsPtr cur;
3455
3456 root = xmlDocGetRootElement(doc);
3457 if (root != NULL)
3458 return(xmlNewNs(root, href, prefix));
3459
3460 /*
3461 * if there is no root element yet, create an old Namespace type
3462 * and it will be moved to the root at save time.
3463 */
3464 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3465 if (cur == NULL) {
3466 xmlGenericError(xmlGenericErrorContext,
3467 "xmlNewGlobalNs : malloc failed\n");
3468 return(NULL);
3469 }
3470 memset(cur, 0, sizeof(xmlNs));
3471 cur->type = XML_GLOBAL_NAMESPACE;
3472
3473 if (href != NULL)
3474 cur->href = xmlStrdup(href);
3475 if (prefix != NULL)
3476 cur->prefix = xmlStrdup(prefix);
3477
3478 /*
3479 * Add it at the end to preserve parsing order ...
3480 */
3481 if (doc != NULL) {
3482 if (doc->oldNs == NULL) {
3483 doc->oldNs = cur;
3484 } else {
3485 xmlNsPtr prev = doc->oldNs;
3486
3487 while (prev->next != NULL) prev = prev->next;
3488 prev->next = cur;
3489 }
3490 }
3491
3492 return(NULL);
3493#endif
3494}
3495
3496/**
3497 * xmlUpgradeOldNs:
3498 * @doc: a document pointer
3499 *
3500 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3501 * DEPRECATED
3502 */
3503void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003504xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003505 static int deprecated = 0;
3506 if (!deprecated) {
3507 xmlGenericError(xmlGenericErrorContext,
3508 "xmlNewGlobalNs() deprecated function reached\n");
3509 deprecated = 1;
3510 }
3511#if 0
3512 xmlNsPtr cur;
3513
3514 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3515 if (doc->children == NULL) {
3516#ifdef DEBUG_TREE
3517 xmlGenericError(xmlGenericErrorContext,
3518 "xmlUpgradeOldNs: failed no root !\n");
3519#endif
3520 return;
3521 }
3522
3523 cur = doc->oldNs;
3524 while (cur->next != NULL) {
3525 cur->type = XML_LOCAL_NAMESPACE;
3526 cur = cur->next;
3527 }
3528 cur->type = XML_LOCAL_NAMESPACE;
3529 cur->next = doc->children->nsDef;
3530 doc->children->nsDef = doc->oldNs;
3531 doc->oldNs = NULL;
3532#endif
3533}
3534