blob: bf41a28e4e0e1adf640eef8802ada17ade4cc09a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
49
Daniel Veillard56a4cb82001-03-24 17:00:36 +000050void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000051
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
70 if ((myversion / 10000) != (version / 10000)) {
71 xmlGenericError(xmlGenericErrorContext,
72 "Fatal: program compiled against libxml %d using libxml %d\n",
73 (version / 10000), (myversion / 10000));
74 exit(1);
75 }
76 if ((myversion / 100) < (version / 100)) {
77 xmlGenericError(xmlGenericErrorContext,
78 "Warning: program compiled against libxml %d using older %d\n",
79 (version / 100), (myversion / 100));
80 }
81}
82
83
84const char *xmlFeaturesList[] = {
85 "validate",
86 "load subset",
87 "keep blanks",
88 "disable SAX",
89 "fetch external entities",
90 "substitute entities",
91 "gather line info",
92 "user data",
93 "is html",
94 "is standalone",
95 "stop parser",
96 "document",
97 "is well formed",
98 "is valid",
99 "SAX block",
100 "SAX function internalSubset",
101 "SAX function isStandalone",
102 "SAX function hasInternalSubset",
103 "SAX function hasExternalSubset",
104 "SAX function resolveEntity",
105 "SAX function getEntity",
106 "SAX function entityDecl",
107 "SAX function notationDecl",
108 "SAX function attributeDecl",
109 "SAX function elementDecl",
110 "SAX function unparsedEntityDecl",
111 "SAX function setDocumentLocator",
112 "SAX function startDocument",
113 "SAX function endDocument",
114 "SAX function startElement",
115 "SAX function endElement",
116 "SAX function reference",
117 "SAX function characters",
118 "SAX function ignorableWhitespace",
119 "SAX function processingInstruction",
120 "SAX function comment",
121 "SAX function warning",
122 "SAX function error",
123 "SAX function fatalError",
124 "SAX function getParameterEntity",
125 "SAX function cdataBlock",
126 "SAX function externalSubset",
127};
128
129/*
130 * xmlGetFeaturesList:
131 * @len: the length of the features name array (input/output)
132 * @result: an array of string to be filled with the features name.
133 *
134 * Copy at most *@len feature names into the @result array
135 *
136 * Returns -1 in case or error, or the total number of features,
137 * len is updated with the number of strings copied,
138 * strings must not be deallocated
139 */
140int
141xmlGetFeaturesList(int *len, const char **result) {
142 int ret, i;
143
144 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
145 if ((len == NULL) || (result == NULL))
146 return(ret);
147 if ((*len < 0) || (*len >= 1000))
148 return(-1);
149 if (*len > ret)
150 *len = ret;
151 for (i = 0;i < *len;i++)
152 result[i] = xmlFeaturesList[i];
153 return(ret);
154}
155
156/*
157 * xmlGetFeature:
158 * @ctxt: an XML/HTML parser context
159 * @name: the feature name
160 * @result: location to store the result
161 *
162 * Read the current value of one feature of this parser instance
163 *
164 * Returns -1 in case or error, 0 otherwise
165 */
166int
167xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
168 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
169 return(-1);
170
171 if (!strcmp(name, "validate")) {
172 *((int *) result) = ctxt->validate;
173 } else if (!strcmp(name, "keep blanks")) {
174 *((int *) result) = ctxt->keepBlanks;
175 } else if (!strcmp(name, "disable SAX")) {
176 *((int *) result) = ctxt->disableSAX;
177 } else if (!strcmp(name, "fetch external entities")) {
178 *((int *) result) = ctxt->loadsubset;
179 } else if (!strcmp(name, "substitute entities")) {
180 *((int *) result) = ctxt->replaceEntities;
181 } else if (!strcmp(name, "gather line info")) {
182 *((int *) result) = ctxt->record_info;
183 } else if (!strcmp(name, "user data")) {
184 *((void **)result) = ctxt->userData;
185 } else if (!strcmp(name, "is html")) {
186 *((int *) result) = ctxt->html;
187 } else if (!strcmp(name, "is standalone")) {
188 *((int *) result) = ctxt->standalone;
189 } else if (!strcmp(name, "document")) {
190 *((xmlDocPtr *) result) = ctxt->myDoc;
191 } else if (!strcmp(name, "is well formed")) {
192 *((int *) result) = ctxt->wellFormed;
193 } else if (!strcmp(name, "is valid")) {
194 *((int *) result) = ctxt->valid;
195 } else if (!strcmp(name, "SAX block")) {
196 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
197 } else if (!strcmp(name, "SAX function internalSubset")) {
198 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
199 } else if (!strcmp(name, "SAX function isStandalone")) {
200 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
201 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
202 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
203 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
204 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
205 } else if (!strcmp(name, "SAX function resolveEntity")) {
206 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
207 } else if (!strcmp(name, "SAX function getEntity")) {
208 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
209 } else if (!strcmp(name, "SAX function entityDecl")) {
210 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
211 } else if (!strcmp(name, "SAX function notationDecl")) {
212 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
213 } else if (!strcmp(name, "SAX function attributeDecl")) {
214 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
215 } else if (!strcmp(name, "SAX function elementDecl")) {
216 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
217 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
218 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
219 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
220 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
221 } else if (!strcmp(name, "SAX function startDocument")) {
222 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
223 } else if (!strcmp(name, "SAX function endDocument")) {
224 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
225 } else if (!strcmp(name, "SAX function startElement")) {
226 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
227 } else if (!strcmp(name, "SAX function endElement")) {
228 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
229 } else if (!strcmp(name, "SAX function reference")) {
230 *((referenceSAXFunc *) result) = ctxt->sax->reference;
231 } else if (!strcmp(name, "SAX function characters")) {
232 *((charactersSAXFunc *) result) = ctxt->sax->characters;
233 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
234 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
235 } else if (!strcmp(name, "SAX function processingInstruction")) {
236 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
237 } else if (!strcmp(name, "SAX function comment")) {
238 *((commentSAXFunc *) result) = ctxt->sax->comment;
239 } else if (!strcmp(name, "SAX function warning")) {
240 *((warningSAXFunc *) result) = ctxt->sax->warning;
241 } else if (!strcmp(name, "SAX function error")) {
242 *((errorSAXFunc *) result) = ctxt->sax->error;
243 } else if (!strcmp(name, "SAX function fatalError")) {
244 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
245 } else if (!strcmp(name, "SAX function getParameterEntity")) {
246 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
247 } else if (!strcmp(name, "SAX function cdataBlock")) {
248 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
249 } else if (!strcmp(name, "SAX function externalSubset")) {
250 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
251 } else {
252 return(-1);
253 }
254 return(0);
255}
256
257/*
258 * xmlSetFeature:
259 * @ctxt: an XML/HTML parser context
260 * @name: the feature name
261 * @value: pointer to the location of the new value
262 *
263 * Change the current value of one feature of this parser instance
264 *
265 * Returns -1 in case or error, 0 otherwise
266 */
267int
268xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
269 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
270 return(-1);
271
272 if (!strcmp(name, "validate")) {
273 int newvalidate = *((int *) value);
274 if ((!ctxt->validate) && (newvalidate != 0)) {
275 if (ctxt->vctxt.warning == NULL)
276 ctxt->vctxt.warning = xmlParserValidityWarning;
277 if (ctxt->vctxt.error == NULL)
278 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000279 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000280 }
281 ctxt->validate = newvalidate;
282 } else if (!strcmp(name, "keep blanks")) {
283 ctxt->keepBlanks = *((int *) value);
284 } else if (!strcmp(name, "disable SAX")) {
285 ctxt->disableSAX = *((int *) value);
286 } else if (!strcmp(name, "fetch external entities")) {
287 ctxt->loadsubset = *((int *) value);
288 } else if (!strcmp(name, "substitute entities")) {
289 ctxt->replaceEntities = *((int *) value);
290 } else if (!strcmp(name, "gather line info")) {
291 ctxt->record_info = *((int *) value);
292 } else if (!strcmp(name, "user data")) {
293 ctxt->userData = *((void **)value);
294 } else if (!strcmp(name, "is html")) {
295 ctxt->html = *((int *) value);
296 } else if (!strcmp(name, "is standalone")) {
297 ctxt->standalone = *((int *) value);
298 } else if (!strcmp(name, "document")) {
299 ctxt->myDoc = *((xmlDocPtr *) value);
300 } else if (!strcmp(name, "is well formed")) {
301 ctxt->wellFormed = *((int *) value);
302 } else if (!strcmp(name, "is valid")) {
303 ctxt->valid = *((int *) value);
304 } else if (!strcmp(name, "SAX block")) {
305 ctxt->sax = *((xmlSAXHandlerPtr *) value);
306 } else if (!strcmp(name, "SAX function internalSubset")) {
307 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
308 } else if (!strcmp(name, "SAX function isStandalone")) {
309 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
310 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
311 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
312 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
313 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
314 } else if (!strcmp(name, "SAX function resolveEntity")) {
315 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
316 } else if (!strcmp(name, "SAX function getEntity")) {
317 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
318 } else if (!strcmp(name, "SAX function entityDecl")) {
319 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function notationDecl")) {
321 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function attributeDecl")) {
323 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function elementDecl")) {
325 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
327 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
329 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function startDocument")) {
331 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function endDocument")) {
333 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function startElement")) {
335 ctxt->sax->startElement = *((startElementSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function endElement")) {
337 ctxt->sax->endElement = *((endElementSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function reference")) {
339 ctxt->sax->reference = *((referenceSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function characters")) {
341 ctxt->sax->characters = *((charactersSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
343 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function processingInstruction")) {
345 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function comment")) {
347 ctxt->sax->comment = *((commentSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function warning")) {
349 ctxt->sax->warning = *((warningSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function error")) {
351 ctxt->sax->error = *((errorSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function fatalError")) {
353 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function getParameterEntity")) {
355 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
356 } else if (!strcmp(name, "SAX function cdataBlock")) {
357 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function externalSubset")) {
359 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
360 } else {
361 return(-1);
362 }
363 return(0);
364}
365
366/************************************************************************
367 * *
368 * Some functions to avoid too large macros *
369 * *
370 ************************************************************************/
371
372/**
373 * xmlIsChar:
374 * @c: an unicode character (int)
375 *
376 * Check whether the character is allowed by the production
377 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
378 * | [#x10000-#x10FFFF]
379 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
380 * Also available as a macro IS_CHAR()
381 *
382 * Returns 0 if not, non-zero otherwise
383 */
384int
385xmlIsChar(int c) {
386 return(
387 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
388 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
389 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
390 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
391}
392
393/**
394 * xmlIsBlank:
395 * @c: an unicode character (int)
396 *
397 * Check whether the character is allowed by the production
398 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
399 * Also available as a macro IS_BLANK()
400 *
401 * Returns 0 if not, non-zero otherwise
402 */
403int
404xmlIsBlank(int c) {
405 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
406}
407
408/**
409 * xmlIsBaseChar:
410 * @c: an unicode character (int)
411 *
412 * Check whether the character is allowed by the production
413 * [85] BaseChar ::= ... long list see REC ...
414 *
415 * VI is your friend !
416 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
417 * and
418 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
419 *
420 * Returns 0 if not, non-zero otherwise
421 */
422static int xmlBaseArray[] = {
423 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
427 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
429 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
436 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
438 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
439};
440
441int
442xmlIsBaseChar(int c) {
443 return(
444 (((c) < 0x0100) ? xmlBaseArray[c] :
445 ( /* accelerator */
446 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
447 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
448 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
449 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
450 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
451 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
452 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
453 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
454 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
455 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
456 ((c) == 0x0386) ||
457 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
458 ((c) == 0x038C) ||
459 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
460 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
461 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
462 ((c) == 0x03DA) ||
463 ((c) == 0x03DC) ||
464 ((c) == 0x03DE) ||
465 ((c) == 0x03E0) ||
466 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
467 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
468 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
469 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
470 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
471 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
472 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
473 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
474 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
475 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
476 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
477 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
478 ((c) == 0x0559) ||
479 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
480 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
481 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
482 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
483 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
484 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
485 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
486 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
487 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
488 ((c) == 0x06D5) ||
489 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
490 (((c) >= 0x905) && ( /* accelerator */
491 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
492 ((c) == 0x093D) ||
493 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
494 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
495 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
496 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
497 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
498 ((c) == 0x09B2) ||
499 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
500 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
501 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
502 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
503 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
504 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
505 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
506 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
507 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
508 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
509 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
510 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
511 ((c) == 0x0A5E) ||
512 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
513 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
514 ((c) == 0x0A8D) ||
515 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
516 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
517 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
518 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
519 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
520 ((c) == 0x0ABD) ||
521 ((c) == 0x0AE0) ||
522 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
523 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
524 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
525 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
526 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
527 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
528 ((c) == 0x0B3D) ||
529 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
530 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
531 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
532 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
533 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
534 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
535 ((c) == 0x0B9C) ||
536 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
537 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
538 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
539 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
540 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
541 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
542 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
543 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
544 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
545 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
546 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
547 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
548 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
549 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
550 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
551 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
552 ((c) == 0x0CDE) ||
553 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
554 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
555 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
556 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
557 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
558 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
559 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
560 ((c) == 0x0E30) ||
561 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
562 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
563 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
564 ((c) == 0x0E84) ||
565 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
566 ((c) == 0x0E8A) ||
567 ((c) == 0x0E8D) ||
568 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
569 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
570 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
571 ((c) == 0x0EA5) ||
572 ((c) == 0x0EA7) ||
573 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
574 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
575 ((c) == 0x0EB0) ||
576 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
577 ((c) == 0x0EBD) ||
578 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
579 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
580 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
581 (((c) >= 0x10A0) && ( /* accelerator */
582 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
583 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
584 ((c) == 0x1100) ||
585 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
586 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
587 ((c) == 0x1109) ||
588 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
589 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
590 ((c) == 0x113C) ||
591 ((c) == 0x113E) ||
592 ((c) == 0x1140) ||
593 ((c) == 0x114C) ||
594 ((c) == 0x114E) ||
595 ((c) == 0x1150) ||
596 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
597 ((c) == 0x1159) ||
598 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
599 ((c) == 0x1163) ||
600 ((c) == 0x1165) ||
601 ((c) == 0x1167) ||
602 ((c) == 0x1169) ||
603 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
604 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
605 ((c) == 0x1175) ||
606 ((c) == 0x119E) ||
607 ((c) == 0x11A8) ||
608 ((c) == 0x11AB) ||
609 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
610 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
611 ((c) == 0x11BA) ||
612 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
613 ((c) == 0x11EB) ||
614 ((c) == 0x11F0) ||
615 ((c) == 0x11F9) ||
616 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
617 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
618 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
619 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
620 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
621 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
622 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
623 ((c) == 0x1F59) ||
624 ((c) == 0x1F5B) ||
625 ((c) == 0x1F5D) ||
626 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
627 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
628 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
629 ((c) == 0x1FBE) ||
630 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
631 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
632 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
633 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
634 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
635 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
636 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
637 ((c) == 0x2126) ||
638 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
639 ((c) == 0x212E) ||
640 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
641 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
642 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
643 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
644 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
645}
646
647/**
648 * xmlIsDigit:
649 * @c: an unicode character (int)
650 *
651 * Check whether the character is allowed by the production
652 * [88] Digit ::= ... long list see REC ...
653 *
654 * Returns 0 if not, non-zero otherwise
655 */
656int
657xmlIsDigit(int c) {
658 return(
659 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
660 (((c) >= 0x660) && ( /* accelerator */
661 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
662 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
663 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
664 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
665 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
666 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
667 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
668 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
669 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
670 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
671 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
672 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
673 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
674 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
675}
676
677/**
678 * xmlIsCombining:
679 * @c: an unicode character (int)
680 *
681 * Check whether the character is allowed by the production
682 * [87] CombiningChar ::= ... long list see REC ...
683 *
684 * Returns 0 if not, non-zero otherwise
685 */
686int
687xmlIsCombining(int c) {
688 return(
689 (((c) >= 0x300) && ( /* accelerator */
690 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
691 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
692 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
693 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
694 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
695 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
696 ((c) == 0x05BF) ||
697 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
698 ((c) == 0x05C4) ||
699 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
700 ((c) == 0x0670) ||
701 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
702 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
703 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
704 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
705 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
706 (((c) >= 0x0901) && ( /* accelerator */
707 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
708 ((c) == 0x093C) ||
709 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
710 ((c) == 0x094D) ||
711 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
712 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
713 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
714 ((c) == 0x09BC) ||
715 ((c) == 0x09BE) ||
716 ((c) == 0x09BF) ||
717 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
718 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
719 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
720 ((c) == 0x09D7) ||
721 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
722 (((c) >= 0x0A02) && ( /* accelerator */
723 ((c) == 0x0A02) ||
724 ((c) == 0x0A3C) ||
725 ((c) == 0x0A3E) ||
726 ((c) == 0x0A3F) ||
727 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
728 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
729 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
730 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
731 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
732 ((c) == 0x0ABC) ||
733 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
734 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
735 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
736 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
737 ((c) == 0x0B3C) ||
738 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
739 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
740 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
741 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
742 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
743 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
744 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
745 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
746 ((c) == 0x0BD7) ||
747 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
748 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
749 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
750 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
751 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
752 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
753 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
754 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
755 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
756 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
757 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
758 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
759 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
760 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
761 ((c) == 0x0D57) ||
762 (((c) >= 0x0E31) && ( /* accelerator */
763 ((c) == 0x0E31) ||
764 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
765 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
766 ((c) == 0x0EB1) ||
767 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
768 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
769 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
770 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
771 ((c) == 0x0F35) ||
772 ((c) == 0x0F37) ||
773 ((c) == 0x0F39) ||
774 ((c) == 0x0F3E) ||
775 ((c) == 0x0F3F) ||
776 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
777 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
778 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
779 ((c) == 0x0F97) ||
780 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
781 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
782 ((c) == 0x0FB9) ||
783 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
784 ((c) == 0x20E1) ||
785 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
786 ((c) == 0x3099) ||
787 ((c) == 0x309A))))))))));
788}
789
790/**
791 * xmlIsExtender:
792 * @c: an unicode character (int)
793 *
794 * Check whether the character is allowed by the production
795 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
796 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
797 * [#x309D-#x309E] | [#x30FC-#x30FE]
798 *
799 * Returns 0 if not, non-zero otherwise
800 */
801int
802xmlIsExtender(int c) {
803 switch (c) {
804 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
805 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
806 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
807 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
808 case 0x30FE:
809 return 1;
810 default:
811 return 0;
812 }
813}
814
815/**
816 * xmlIsIdeographic:
817 * @c: an unicode character (int)
818 *
819 * Check whether the character is allowed by the production
820 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
821 *
822 * Returns 0 if not, non-zero otherwise
823 */
824int
825xmlIsIdeographic(int c) {
826 return(((c) < 0x0100) ? 0 :
827 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
828 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
829 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
830 ((c) == 0x3007));
831}
832
833/**
834 * xmlIsLetter:
835 * @c: an unicode character (int)
836 *
837 * Check whether the character is allowed by the production
838 * [84] Letter ::= BaseChar | Ideographic
839 *
840 * Returns 0 if not, non-zero otherwise
841 */
842int
843xmlIsLetter(int c) {
844 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
845}
846
847/**
848 * xmlIsPubidChar:
849 * @c: an unicode character (int)
850 *
851 * Check whether the character is allowed by the production
852 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
853 *
854 * Returns 0 if not, non-zero otherwise
855 */
856int
857xmlIsPubidChar(int c) {
858 return(
859 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
860 (((c) >= 'a') && ((c) <= 'z')) ||
861 (((c) >= 'A') && ((c) <= 'Z')) ||
862 (((c) >= '0') && ((c) <= '9')) ||
863 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
864 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
865 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
866 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
867 ((c) == '$') || ((c) == '_') || ((c) == '%'));
868}
869
870/************************************************************************
871 * *
872 * Input handling functions for progressive parsing *
873 * *
874 ************************************************************************/
875
876/* #define DEBUG_INPUT */
877/* #define DEBUG_STACK */
878/* #define DEBUG_PUSH */
879
880
881/* we need to keep enough input to show errors in context */
882#define LINE_LEN 80
883
884#ifdef DEBUG_INPUT
885#define CHECK_BUFFER(in) check_buffer(in)
886
887void check_buffer(xmlParserInputPtr in) {
888 if (in->base != in->buf->buffer->content) {
889 xmlGenericError(xmlGenericErrorContext,
890 "xmlParserInput: base mismatch problem\n");
891 }
892 if (in->cur < in->base) {
893 xmlGenericError(xmlGenericErrorContext,
894 "xmlParserInput: cur < base problem\n");
895 }
896 if (in->cur > in->base + in->buf->buffer->use) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlParserInput: cur > base + use problem\n");
899 }
900 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
901 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
902 in->buf->buffer->use, in->buf->buffer->size);
903}
904
905#else
906#define CHECK_BUFFER(in)
907#endif
908
909
910/**
911 * xmlParserInputRead:
912 * @in: an XML parser input
913 * @len: an indicative size for the lookahead
914 *
915 * This function refresh the input for the parser. It doesn't try to
916 * preserve pointers to the input buffer, and discard already read data
917 *
918 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
919 * end of this entity
920 */
921int
922xmlParserInputRead(xmlParserInputPtr in, int len) {
923 int ret;
924 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000925 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000926
927#ifdef DEBUG_INPUT
928 xmlGenericError(xmlGenericErrorContext, "Read\n");
929#endif
930 if (in->buf == NULL) return(-1);
931 if (in->base == NULL) return(-1);
932 if (in->cur == NULL) return(-1);
933 if (in->buf->buffer == NULL) return(-1);
934 if (in->buf->readcallback == NULL) return(-1);
935
936 CHECK_BUFFER(in);
937
938 used = in->cur - in->buf->buffer->content;
939 ret = xmlBufferShrink(in->buf->buffer, used);
940 if (ret > 0) {
941 in->cur -= ret;
942 in->consumed += ret;
943 }
944 ret = xmlParserInputBufferRead(in->buf, len);
945 if (in->base != in->buf->buffer->content) {
946 /*
947 * the buffer has been realloced
948 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000949 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000950 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000951 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000952 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000953 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000954
955 CHECK_BUFFER(in);
956
957 return(ret);
958}
959
960/**
961 * xmlParserInputGrow:
962 * @in: an XML parser input
963 * @len: an indicative size for the lookahead
964 *
965 * This function increase the input for the parser. It tries to
966 * preserve pointers to the input buffer, and keep already read data
967 *
968 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
969 * end of this entity
970 */
971int
972xmlParserInputGrow(xmlParserInputPtr in, int len) {
973 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000974 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000975
976#ifdef DEBUG_INPUT
977 xmlGenericError(xmlGenericErrorContext, "Grow\n");
978#endif
979 if (in->buf == NULL) return(-1);
980 if (in->base == NULL) return(-1);
981 if (in->cur == NULL) return(-1);
982 if (in->buf->buffer == NULL) return(-1);
983
984 CHECK_BUFFER(in);
985
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000986 indx = in->cur - in->base;
987 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000988
989 CHECK_BUFFER(in);
990
991 return(0);
992 }
993 if (in->buf->readcallback != NULL)
994 ret = xmlParserInputBufferGrow(in->buf, len);
995 else
996 return(0);
997
998 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +0000999 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001000 * block, but we use it really as an integer to do some
1001 * pointer arithmetic. Insure will raise it as a bug but in
1002 * that specific case, that's not !
1003 */
1004 if (in->base != in->buf->buffer->content) {
1005 /*
1006 * the buffer has been realloced
1007 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001008 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001009 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001010 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001011 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001012 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001013
1014 CHECK_BUFFER(in);
1015
1016 return(ret);
1017}
1018
1019/**
1020 * xmlParserInputShrink:
1021 * @in: an XML parser input
1022 *
1023 * This function removes used input for the parser.
1024 */
1025void
1026xmlParserInputShrink(xmlParserInputPtr in) {
1027 int used;
1028 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001029 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001030
1031#ifdef DEBUG_INPUT
1032 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1033#endif
1034 if (in->buf == NULL) return;
1035 if (in->base == NULL) return;
1036 if (in->cur == NULL) return;
1037 if (in->buf->buffer == NULL) return;
1038
1039 CHECK_BUFFER(in);
1040
1041 used = in->cur - in->buf->buffer->content;
1042 /*
1043 * Do not shrink on large buffers whose only a tiny fraction
1044 * was consumned
1045 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001046 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001047 return;
1048 if (used > INPUT_CHUNK) {
1049 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1050 if (ret > 0) {
1051 in->cur -= ret;
1052 in->consumed += ret;
1053 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001054 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001055 }
1056
1057 CHECK_BUFFER(in);
1058
1059 if (in->buf->buffer->use > INPUT_CHUNK) {
1060 return;
1061 }
1062 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1063 if (in->base != in->buf->buffer->content) {
1064 /*
1065 * the buffer has been realloced
1066 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001067 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001068 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001070 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001071 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001072
1073 CHECK_BUFFER(in);
1074}
1075
1076/************************************************************************
1077 * *
1078 * UTF8 character input and related functions *
1079 * *
1080 ************************************************************************/
1081
1082/**
1083 * xmlNextChar:
1084 * @ctxt: the XML parser context
1085 *
1086 * Skip to the next char input char.
1087 */
1088
1089void
1090xmlNextChar(xmlParserCtxtPtr ctxt) {
1091 if (ctxt->instate == XML_PARSER_EOF)
1092 return;
1093
1094 /*
1095 * 2.11 End-of-Line Handling
1096 * the literal two-character sequence "#xD#xA" or a standalone
1097 * literal #xD, an XML processor must pass to the application
1098 * the single character #xA.
1099 */
1100 if (ctxt->token != 0) ctxt->token = 0;
1101 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1102 if ((*ctxt->input->cur == 0) &&
1103 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1104 (ctxt->instate != XML_PARSER_COMMENT)) {
1105 /*
1106 * If we are at the end of the current entity and
1107 * the context allows it, we pop consumed entities
1108 * automatically.
1109 * the auto closing should be blocked in other cases
1110 */
1111 xmlPopInput(ctxt);
1112 } else {
1113 if (*(ctxt->input->cur) == '\n') {
1114 ctxt->input->line++; ctxt->input->col = 1;
1115 } else ctxt->input->col++;
1116 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1117 /*
1118 * We are supposed to handle UTF8, check it's valid
1119 * From rfc2044: encoding of the Unicode values on UTF-8:
1120 *
1121 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1122 * 0000 0000-0000 007F 0xxxxxxx
1123 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1124 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1125 *
1126 * Check for the 0x110000 limit too
1127 */
1128 const unsigned char *cur = ctxt->input->cur;
1129 unsigned char c;
1130
1131 c = *cur;
1132 if (c & 0x80) {
1133 if (cur[1] == 0)
1134 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1135 if ((cur[1] & 0xc0) != 0x80)
1136 goto encoding_error;
1137 if ((c & 0xe0) == 0xe0) {
1138 unsigned int val;
1139
1140 if (cur[2] == 0)
1141 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1142 if ((cur[2] & 0xc0) != 0x80)
1143 goto encoding_error;
1144 if ((c & 0xf0) == 0xf0) {
1145 if (cur[3] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if (((c & 0xf8) != 0xf0) ||
1148 ((cur[3] & 0xc0) != 0x80))
1149 goto encoding_error;
1150 /* 4-byte code */
1151 ctxt->input->cur += 4;
1152 val = (cur[0] & 0x7) << 18;
1153 val |= (cur[1] & 0x3f) << 12;
1154 val |= (cur[2] & 0x3f) << 6;
1155 val |= cur[3] & 0x3f;
1156 } else {
1157 /* 3-byte code */
1158 ctxt->input->cur += 3;
1159 val = (cur[0] & 0xf) << 12;
1160 val |= (cur[1] & 0x3f) << 6;
1161 val |= cur[2] & 0x3f;
1162 }
1163 if (((val > 0xd7ff) && (val < 0xe000)) ||
1164 ((val > 0xfffd) && (val < 0x10000)) ||
1165 (val >= 0x110000)) {
1166 if ((ctxt->sax != NULL) &&
1167 (ctxt->sax->error != NULL))
1168 ctxt->sax->error(ctxt->userData,
1169 "Char 0x%X out of allowed range\n", val);
1170 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1171 ctxt->wellFormed = 0;
1172 ctxt->disableSAX = 1;
1173 }
1174 } else
1175 /* 2-byte code */
1176 ctxt->input->cur += 2;
1177 } else
1178 /* 1-byte code */
1179 ctxt->input->cur++;
1180 } else {
1181 /*
1182 * Assume it's a fixed lenght encoding (1) with
1183 * a compatibke encoding for the ASCII set, since
1184 * XML constructs only use < 128 chars
1185 */
1186 ctxt->input->cur++;
1187 }
1188 ctxt->nbChars++;
1189 if (*ctxt->input->cur == 0)
1190 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1191 }
1192 } else {
1193 ctxt->input->cur++;
1194 ctxt->nbChars++;
1195 if (*ctxt->input->cur == 0)
1196 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1197 }
1198 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1199 xmlParserHandlePEReference(ctxt);
1200 if ((*ctxt->input->cur == 0) &&
1201 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1202 xmlPopInput(ctxt);
1203 return;
1204encoding_error:
1205 /*
1206 * If we detect an UTF8 error that probably mean that the
1207 * input encoding didn't get properly advertized in the
1208 * declaration header. Report the error and switch the encoding
1209 * to ISO-Latin-1 (if you don't like this policy, just declare the
1210 * encoding !)
1211 */
1212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1213 ctxt->sax->error(ctxt->userData,
1214 "Input is not proper UTF-8, indicate encoding !\n");
1215 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1216 ctxt->input->cur[0], ctxt->input->cur[1],
1217 ctxt->input->cur[2], ctxt->input->cur[3]);
1218 }
1219 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1220
1221 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1222 ctxt->input->cur++;
1223 return;
1224}
1225
1226/**
1227 * xmlCurrentChar:
1228 * @ctxt: the XML parser context
1229 * @len: pointer to the length of the char read
1230 *
1231 * The current char value, if using UTF-8 this may actaully span multiple
1232 * bytes in the input buffer. Implement the end of line normalization:
1233 * 2.11 End-of-Line Handling
1234 * Wherever an external parsed entity or the literal entity value
1235 * of an internal parsed entity contains either the literal two-character
1236 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1237 * must pass to the application the single character #xA.
1238 * This behavior can conveniently be produced by normalizing all
1239 * line breaks to #xA on input, before parsing.)
1240 *
1241 * Returns the current char value and its lenght
1242 */
1243
1244int
1245xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1246 if (ctxt->instate == XML_PARSER_EOF)
1247 return(0);
1248
1249 if (ctxt->token != 0) {
1250 *len = 0;
1251 return(ctxt->token);
1252 }
1253 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1254 *len = 1;
1255 return((int) *ctxt->input->cur);
1256 }
1257 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1258 /*
1259 * We are supposed to handle UTF8, check it's valid
1260 * From rfc2044: encoding of the Unicode values on UTF-8:
1261 *
1262 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1263 * 0000 0000-0000 007F 0xxxxxxx
1264 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1265 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1266 *
1267 * Check for the 0x110000 limit too
1268 */
1269 const unsigned char *cur = ctxt->input->cur;
1270 unsigned char c;
1271 unsigned int val;
1272
1273 c = *cur;
1274 if (c & 0x80) {
1275 if (cur[1] == 0)
1276 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1277 if ((cur[1] & 0xc0) != 0x80)
1278 goto encoding_error;
1279 if ((c & 0xe0) == 0xe0) {
1280
1281 if (cur[2] == 0)
1282 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1283 if ((cur[2] & 0xc0) != 0x80)
1284 goto encoding_error;
1285 if ((c & 0xf0) == 0xf0) {
1286 if (cur[3] == 0)
1287 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1288 if (((c & 0xf8) != 0xf0) ||
1289 ((cur[3] & 0xc0) != 0x80))
1290 goto encoding_error;
1291 /* 4-byte code */
1292 *len = 4;
1293 val = (cur[0] & 0x7) << 18;
1294 val |= (cur[1] & 0x3f) << 12;
1295 val |= (cur[2] & 0x3f) << 6;
1296 val |= cur[3] & 0x3f;
1297 } else {
1298 /* 3-byte code */
1299 *len = 3;
1300 val = (cur[0] & 0xf) << 12;
1301 val |= (cur[1] & 0x3f) << 6;
1302 val |= cur[2] & 0x3f;
1303 }
1304 } else {
1305 /* 2-byte code */
1306 *len = 2;
1307 val = (cur[0] & 0x1f) << 6;
1308 val |= cur[1] & 0x3f;
1309 }
1310 if (!IS_CHAR(val)) {
1311 if ((ctxt->sax != NULL) &&
1312 (ctxt->sax->error != NULL))
1313 ctxt->sax->error(ctxt->userData,
1314 "Char 0x%X out of allowed range\n", val);
1315 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1316 ctxt->wellFormed = 0;
1317 ctxt->disableSAX = 1;
1318 }
1319 return(val);
1320 } else {
1321 /* 1-byte code */
1322 *len = 1;
1323 if (*ctxt->input->cur == 0xD) {
1324 if (ctxt->input->cur[1] == 0xA) {
1325 ctxt->nbChars++;
1326 ctxt->input->cur++;
1327 }
1328 return(0xA);
1329 }
1330 return((int) *ctxt->input->cur);
1331 }
1332 }
1333 /*
1334 * Assume it's a fixed lenght encoding (1) with
1335 * a compatibke encoding for the ASCII set, since
1336 * XML constructs only use < 128 chars
1337 */
1338 *len = 1;
1339 if (*ctxt->input->cur == 0xD) {
1340 if (ctxt->input->cur[1] == 0xA) {
1341 ctxt->nbChars++;
1342 ctxt->input->cur++;
1343 }
1344 return(0xA);
1345 }
1346 return((int) *ctxt->input->cur);
1347encoding_error:
1348 /*
1349 * If we detect an UTF8 error that probably mean that the
1350 * input encoding didn't get properly advertized in the
1351 * declaration header. Report the error and switch the encoding
1352 * to ISO-Latin-1 (if you don't like this policy, just declare the
1353 * encoding !)
1354 */
1355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1356 ctxt->sax->error(ctxt->userData,
1357 "Input is not proper UTF-8, indicate encoding !\n");
1358 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1359 ctxt->input->cur[0], ctxt->input->cur[1],
1360 ctxt->input->cur[2], ctxt->input->cur[3]);
1361 }
1362 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1363
1364 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1365 *len = 1;
1366 return((int) *ctxt->input->cur);
1367}
1368
1369/**
1370 * xmlStringCurrentChar:
1371 * @ctxt: the XML parser context
1372 * @cur: pointer to the beginning of the char
1373 * @len: pointer to the length of the char read
1374 *
1375 * The current char value, if using UTF-8 this may actaully span multiple
1376 * bytes in the input buffer.
1377 *
1378 * Returns the current char value and its lenght
1379 */
1380
1381int
1382xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1383 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1384 /*
1385 * We are supposed to handle UTF8, check it's valid
1386 * From rfc2044: encoding of the Unicode values on UTF-8:
1387 *
1388 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1389 * 0000 0000-0000 007F 0xxxxxxx
1390 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1391 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1392 *
1393 * Check for the 0x110000 limit too
1394 */
1395 unsigned char c;
1396 unsigned int val;
1397
1398 c = *cur;
1399 if (c & 0x80) {
1400 if ((cur[1] & 0xc0) != 0x80)
1401 goto encoding_error;
1402 if ((c & 0xe0) == 0xe0) {
1403
1404 if ((cur[2] & 0xc0) != 0x80)
1405 goto encoding_error;
1406 if ((c & 0xf0) == 0xf0) {
1407 if (((c & 0xf8) != 0xf0) ||
1408 ((cur[3] & 0xc0) != 0x80))
1409 goto encoding_error;
1410 /* 4-byte code */
1411 *len = 4;
1412 val = (cur[0] & 0x7) << 18;
1413 val |= (cur[1] & 0x3f) << 12;
1414 val |= (cur[2] & 0x3f) << 6;
1415 val |= cur[3] & 0x3f;
1416 } else {
1417 /* 3-byte code */
1418 *len = 3;
1419 val = (cur[0] & 0xf) << 12;
1420 val |= (cur[1] & 0x3f) << 6;
1421 val |= cur[2] & 0x3f;
1422 }
1423 } else {
1424 /* 2-byte code */
1425 *len = 2;
1426 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001427 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001428 }
1429 if (!IS_CHAR(val)) {
1430 if ((ctxt->sax != NULL) &&
1431 (ctxt->sax->error != NULL))
1432 ctxt->sax->error(ctxt->userData,
1433 "Char 0x%X out of allowed range\n", val);
1434 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1435 ctxt->wellFormed = 0;
1436 ctxt->disableSAX = 1;
1437 }
1438 return(val);
1439 } else {
1440 /* 1-byte code */
1441 *len = 1;
1442 return((int) *cur);
1443 }
1444 }
1445 /*
1446 * Assume it's a fixed lenght encoding (1) with
1447 * a compatibke encoding for the ASCII set, since
1448 * XML constructs only use < 128 chars
1449 */
1450 *len = 1;
1451 return((int) *cur);
1452encoding_error:
1453 /*
1454 * If we detect an UTF8 error that probably mean that the
1455 * input encoding didn't get properly advertized in the
1456 * declaration header. Report the error and switch the encoding
1457 * to ISO-Latin-1 (if you don't like this policy, just declare the
1458 * encoding !)
1459 */
1460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1461 ctxt->sax->error(ctxt->userData,
1462 "Input is not proper UTF-8, indicate encoding !\n");
1463 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1464 ctxt->input->cur[0], ctxt->input->cur[1],
1465 ctxt->input->cur[2], ctxt->input->cur[3]);
1466 }
1467 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1468
1469 *len = 1;
1470 return((int) *cur);
1471}
1472
1473/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001474 * xmlCopyCharMultiByte:
1475 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001476 * @val: the char value
1477 *
1478 * append the char value in the array
1479 *
1480 * Returns the number of xmlChar written
1481 */
Owen Taylor3473f882001-02-23 17:55:21 +00001482int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001483xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001484 /*
1485 * We are supposed to handle UTF8, check it's valid
1486 * From rfc2044: encoding of the Unicode values on UTF-8:
1487 *
1488 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1489 * 0000 0000-0000 007F 0xxxxxxx
1490 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1491 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1492 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001493 if (val >= 0x80) {
1494 xmlChar *savedout = out;
1495 int bits;
1496 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1497 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1498 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1499 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001500 xmlGenericError(xmlGenericErrorContext,
1501 "Internal error, xmlCopyChar 0x%X out of bound\n",
1502 val);
1503 return(0);
1504 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001505 for ( ; bits >= 0; bits-= 6)
1506 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1507 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001508 }
1509 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001510 return 1;
1511}
1512
1513/**
1514 * xmlCopyChar:
1515 * @len: Ignored, compatibility
1516 * @out: pointer to an arry of xmlChar
1517 * @val: the char value
1518 *
1519 * append the char value in the array
1520 *
1521 * Returns the number of xmlChar written
1522 */
1523
1524int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001525xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001526 /* the len parameter is ignored */
1527 if (val >= 0x80) {
1528 return(xmlCopyCharMultiByte (out, val));
1529 }
1530 *out = (xmlChar) val;
1531 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001532}
1533
1534/************************************************************************
1535 * *
1536 * Commodity functions to switch encodings *
1537 * *
1538 ************************************************************************/
1539
1540/**
1541 * xmlSwitchEncoding:
1542 * @ctxt: the parser context
1543 * @enc: the encoding value (number)
1544 *
1545 * change the input functions when discovering the character encoding
1546 * of a given entity.
1547 *
1548 * Returns 0 in case of success, -1 otherwise
1549 */
1550int
1551xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1552{
1553 xmlCharEncodingHandlerPtr handler;
1554
1555 switch (enc) {
1556 case XML_CHAR_ENCODING_ERROR:
1557 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1559 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1560 ctxt->wellFormed = 0;
1561 ctxt->disableSAX = 1;
1562 break;
1563 case XML_CHAR_ENCODING_NONE:
1564 /* let's assume it's UTF-8 without the XML decl */
1565 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1566 return(0);
1567 case XML_CHAR_ENCODING_UTF8:
1568 /* default encoding, no conversion should be needed */
1569 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1570 return(0);
1571 default:
1572 break;
1573 }
1574 handler = xmlGetCharEncodingHandler(enc);
1575 if (handler == NULL) {
1576 /*
1577 * Default handlers.
1578 */
1579 switch (enc) {
1580 case XML_CHAR_ENCODING_ERROR:
1581 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1583 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1584 ctxt->wellFormed = 0;
1585 ctxt->disableSAX = 1;
1586 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1587 break;
1588 case XML_CHAR_ENCODING_NONE:
1589 /* let's assume it's UTF-8 without the XML decl */
1590 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1591 return(0);
1592 case XML_CHAR_ENCODING_UTF8:
1593 case XML_CHAR_ENCODING_ASCII:
1594 /* default encoding, no conversion should be needed */
1595 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1596 return(0);
1597 case XML_CHAR_ENCODING_UTF16LE:
1598 break;
1599 case XML_CHAR_ENCODING_UTF16BE:
1600 break;
1601 case XML_CHAR_ENCODING_UCS4LE:
1602 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1604 ctxt->sax->error(ctxt->userData,
1605 "char encoding USC4 little endian not supported\n");
1606 break;
1607 case XML_CHAR_ENCODING_UCS4BE:
1608 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610 ctxt->sax->error(ctxt->userData,
1611 "char encoding USC4 big endian not supported\n");
1612 break;
1613 case XML_CHAR_ENCODING_EBCDIC:
1614 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616 ctxt->sax->error(ctxt->userData,
1617 "char encoding EBCDIC not supported\n");
1618 break;
1619 case XML_CHAR_ENCODING_UCS4_2143:
1620 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1622 ctxt->sax->error(ctxt->userData,
1623 "char encoding UCS4 2143 not supported\n");
1624 break;
1625 case XML_CHAR_ENCODING_UCS4_3412:
1626 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1628 ctxt->sax->error(ctxt->userData,
1629 "char encoding UCS4 3412 not supported\n");
1630 break;
1631 case XML_CHAR_ENCODING_UCS2:
1632 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1634 ctxt->sax->error(ctxt->userData,
1635 "char encoding UCS2 not supported\n");
1636 break;
1637 case XML_CHAR_ENCODING_8859_1:
1638 case XML_CHAR_ENCODING_8859_2:
1639 case XML_CHAR_ENCODING_8859_3:
1640 case XML_CHAR_ENCODING_8859_4:
1641 case XML_CHAR_ENCODING_8859_5:
1642 case XML_CHAR_ENCODING_8859_6:
1643 case XML_CHAR_ENCODING_8859_7:
1644 case XML_CHAR_ENCODING_8859_8:
1645 case XML_CHAR_ENCODING_8859_9:
1646 /*
1647 * We used to keep the internal content in the
1648 * document encoding however this turns being unmaintainable
1649 * So xmlGetCharEncodingHandler() will return non-null
1650 * values for this now.
1651 */
1652 if ((ctxt->inputNr == 1) &&
1653 (ctxt->encoding == NULL) &&
1654 (ctxt->input->encoding != NULL)) {
1655 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1656 }
1657 ctxt->charset = enc;
1658 return(0);
1659 case XML_CHAR_ENCODING_2022_JP:
1660 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1662 ctxt->sax->error(ctxt->userData,
1663 "char encoding ISO-2022-JPnot supported\n");
1664 break;
1665 case XML_CHAR_ENCODING_SHIFT_JIS:
1666 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1668 ctxt->sax->error(ctxt->userData,
1669 "char encoding Shift_JIS not supported\n");
1670 break;
1671 case XML_CHAR_ENCODING_EUC_JP:
1672 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1674 ctxt->sax->error(ctxt->userData,
1675 "char encoding EUC-JPnot supported\n");
1676 break;
1677 }
1678 }
1679 if (handler == NULL)
1680 return(-1);
1681 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1682 return(xmlSwitchToEncoding(ctxt, handler));
1683}
1684
1685/**
1686 * xmlSwitchToEncoding:
1687 * @ctxt: the parser context
1688 * @handler: the encoding handler
1689 *
1690 * change the input functions when discovering the character encoding
1691 * of a given entity.
1692 *
1693 * Returns 0 in case of success, -1 otherwise
1694 */
1695int
1696xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1697{
1698 int nbchars;
1699
1700 if (handler != NULL) {
1701 if (ctxt->input != NULL) {
1702 if (ctxt->input->buf != NULL) {
1703 if (ctxt->input->buf->encoder != NULL) {
1704 if (ctxt->input->buf->encoder == handler)
1705 return(0);
1706 /*
1707 * Note: this is a bit dangerous, but that's what it
1708 * takes to use nearly compatible signature for different
1709 * encodings.
1710 */
1711 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1712 ctxt->input->buf->encoder = handler;
1713 return(0);
1714 }
1715 ctxt->input->buf->encoder = handler;
1716
1717 /*
1718 * Is there already some content down the pipe to convert ?
1719 */
1720 if ((ctxt->input->buf->buffer != NULL) &&
1721 (ctxt->input->buf->buffer->use > 0)) {
1722 int processed;
1723
1724 /*
1725 * Specific handling of the Byte Order Mark for
1726 * UTF-16
1727 */
1728 if ((handler->name != NULL) &&
1729 (!strcmp(handler->name, "UTF-16LE")) &&
1730 (ctxt->input->cur[0] == 0xFF) &&
1731 (ctxt->input->cur[1] == 0xFE)) {
1732 ctxt->input->cur += 2;
1733 }
1734 if ((handler->name != NULL) &&
1735 (!strcmp(handler->name, "UTF-16BE")) &&
1736 (ctxt->input->cur[0] == 0xFE) &&
1737 (ctxt->input->cur[1] == 0xFF)) {
1738 ctxt->input->cur += 2;
1739 }
1740
1741 /*
1742 * Shring the current input buffer.
1743 * Move it as the raw buffer and create a new input buffer
1744 */
1745 processed = ctxt->input->cur - ctxt->input->base;
1746 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1747 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1748 ctxt->input->buf->buffer = xmlBufferCreate();
1749
1750 if (ctxt->html) {
1751 /*
1752 * converst as much as possbile of the buffer
1753 */
1754 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1755 ctxt->input->buf->buffer,
1756 ctxt->input->buf->raw);
1757 } else {
1758 /*
1759 * convert just enough to get
1760 * '<?xml version="1.0" encoding="xxx"?>'
1761 * parsed with the autodetected encoding
1762 * into the parser reading buffer.
1763 */
1764 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1765 ctxt->input->buf->buffer,
1766 ctxt->input->buf->raw);
1767 }
1768 if (nbchars < 0) {
1769 xmlGenericError(xmlGenericErrorContext,
1770 "xmlSwitchToEncoding: encoder error\n");
1771 return(-1);
1772 }
1773 ctxt->input->base =
1774 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001775 ctxt->input->end =
1776 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001777
1778 }
1779 return(0);
1780 } else {
1781 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1782 /*
1783 * When parsing a static memory array one must know the
1784 * size to be able to convert the buffer.
1785 */
1786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1787 ctxt->sax->error(ctxt->userData,
1788 "xmlSwitchEncoding : no input\n");
1789 return(-1);
1790 } else {
1791 int processed;
1792
1793 /*
1794 * Shring the current input buffer.
1795 * Move it as the raw buffer and create a new input buffer
1796 */
1797 processed = ctxt->input->cur - ctxt->input->base;
1798
1799 ctxt->input->buf->raw = xmlBufferCreate();
1800 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1801 ctxt->input->length - processed);
1802 ctxt->input->buf->buffer = xmlBufferCreate();
1803
1804 /*
1805 * convert as much as possible of the raw input
1806 * to the parser reading buffer.
1807 */
1808 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1809 ctxt->input->buf->buffer,
1810 ctxt->input->buf->raw);
1811 if (nbchars < 0) {
1812 xmlGenericError(xmlGenericErrorContext,
1813 "xmlSwitchToEncoding: encoder error\n");
1814 return(-1);
1815 }
1816
1817 /*
1818 * Conversion succeeded, get rid of the old buffer
1819 */
1820 if ((ctxt->input->free != NULL) &&
1821 (ctxt->input->base != NULL))
1822 ctxt->input->free((xmlChar *) ctxt->input->base);
1823 ctxt->input->base =
1824 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001825 ctxt->input->end =
1826 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001827 }
1828 }
1829 } else {
1830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1831 ctxt->sax->error(ctxt->userData,
1832 "xmlSwitchEncoding : no input\n");
1833 return(-1);
1834 }
1835 /*
1836 * The parsing is now done in UTF8 natively
1837 */
1838 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1839 } else
1840 return(-1);
1841 return(0);
1842
1843}
1844
1845/************************************************************************
1846 * *
1847 * Commodity functions to handle entities processing *
1848 * *
1849 ************************************************************************/
1850
1851/**
1852 * xmlFreeInputStream:
1853 * @input: an xmlParserInputPtr
1854 *
1855 * Free up an input stream.
1856 */
1857void
1858xmlFreeInputStream(xmlParserInputPtr input) {
1859 if (input == NULL) return;
1860
1861 if (input->filename != NULL) xmlFree((char *) input->filename);
1862 if (input->directory != NULL) xmlFree((char *) input->directory);
1863 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1864 if (input->version != NULL) xmlFree((char *) input->version);
1865 if ((input->free != NULL) && (input->base != NULL))
1866 input->free((xmlChar *) input->base);
1867 if (input->buf != NULL)
1868 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001869 xmlFree(input);
1870}
1871
1872/**
1873 * xmlNewInputStream:
1874 * @ctxt: an XML parser context
1875 *
1876 * Create a new input stream structure
1877 * Returns the new input stream or NULL
1878 */
1879xmlParserInputPtr
1880xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1881 xmlParserInputPtr input;
1882
1883 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1884 if (input == NULL) {
1885 if (ctxt != NULL) {
1886 ctxt->errNo = XML_ERR_NO_MEMORY;
1887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1888 ctxt->sax->error(ctxt->userData,
1889 "malloc: couldn't allocate a new input stream\n");
1890 ctxt->errNo = XML_ERR_NO_MEMORY;
1891 }
1892 return(NULL);
1893 }
1894 memset(input, 0, sizeof(xmlParserInput));
1895 input->line = 1;
1896 input->col = 1;
1897 input->standalone = -1;
1898 return(input);
1899}
1900
1901/**
1902 * xmlNewIOInputStream:
1903 * @ctxt: an XML parser context
1904 * @input: an I/O Input
1905 * @enc: the charset encoding if known
1906 *
1907 * Create a new input stream structure encapsulating the @input into
1908 * a stream suitable for the parser.
1909 *
1910 * Returns the new input stream or NULL
1911 */
1912xmlParserInputPtr
1913xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1914 xmlCharEncoding enc) {
1915 xmlParserInputPtr inputStream;
1916
1917 if (xmlParserDebugEntities)
1918 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1919 inputStream = xmlNewInputStream(ctxt);
1920 if (inputStream == NULL) {
1921 return(NULL);
1922 }
1923 inputStream->filename = NULL;
1924 inputStream->buf = input;
1925 inputStream->base = inputStream->buf->buffer->content;
1926 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001927 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001928 if (enc != XML_CHAR_ENCODING_NONE) {
1929 xmlSwitchEncoding(ctxt, enc);
1930 }
1931
1932 return(inputStream);
1933}
1934
1935/**
1936 * xmlNewEntityInputStream:
1937 * @ctxt: an XML parser context
1938 * @entity: an Entity pointer
1939 *
1940 * Create a new input stream based on an xmlEntityPtr
1941 *
1942 * Returns the new input stream or NULL
1943 */
1944xmlParserInputPtr
1945xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1946 xmlParserInputPtr input;
1947
1948 if (entity == NULL) {
1949 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1951 ctxt->sax->error(ctxt->userData,
1952 "internal: xmlNewEntityInputStream entity = NULL\n");
1953 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1954 return(NULL);
1955 }
1956 if (xmlParserDebugEntities)
1957 xmlGenericError(xmlGenericErrorContext,
1958 "new input from entity: %s\n", entity->name);
1959 if (entity->content == NULL) {
1960 switch (entity->etype) {
1961 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1962 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964 ctxt->sax->error(ctxt->userData,
1965 "xmlNewEntityInputStream unparsed entity !\n");
1966 break;
1967 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1968 case XML_EXTERNAL_PARAMETER_ENTITY:
1969 return(xmlLoadExternalEntity((char *) entity->URI,
1970 (char *) entity->ExternalID, ctxt));
1971 case XML_INTERNAL_GENERAL_ENTITY:
1972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1973 ctxt->sax->error(ctxt->userData,
1974 "Internal entity %s without content !\n", entity->name);
1975 break;
1976 case XML_INTERNAL_PARAMETER_ENTITY:
1977 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1979 ctxt->sax->error(ctxt->userData,
1980 "Internal parameter entity %s without content !\n", entity->name);
1981 break;
1982 case XML_INTERNAL_PREDEFINED_ENTITY:
1983 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
1986 "Predefined entity %s without content !\n", entity->name);
1987 break;
1988 }
1989 return(NULL);
1990 }
1991 input = xmlNewInputStream(ctxt);
1992 if (input == NULL) {
1993 return(NULL);
1994 }
1995 input->filename = (char *) entity->URI;
1996 input->base = entity->content;
1997 input->cur = entity->content;
1998 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001999 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002000 return(input);
2001}
2002
2003/**
2004 * xmlNewStringInputStream:
2005 * @ctxt: an XML parser context
2006 * @buffer: an memory buffer
2007 *
2008 * Create a new input stream based on a memory buffer.
2009 * Returns the new input stream
2010 */
2011xmlParserInputPtr
2012xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2013 xmlParserInputPtr input;
2014
2015 if (buffer == NULL) {
2016 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2018 ctxt->sax->error(ctxt->userData,
2019 "internal: xmlNewStringInputStream string = NULL\n");
2020 return(NULL);
2021 }
2022 if (xmlParserDebugEntities)
2023 xmlGenericError(xmlGenericErrorContext,
2024 "new fixed input: %.30s\n", buffer);
2025 input = xmlNewInputStream(ctxt);
2026 if (input == NULL) {
2027 return(NULL);
2028 }
2029 input->base = buffer;
2030 input->cur = buffer;
2031 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002032 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002033 return(input);
2034}
2035
2036/**
2037 * xmlNewInputFromFile:
2038 * @ctxt: an XML parser context
2039 * @filename: the filename to use as entity
2040 *
2041 * Create a new input stream based on a file.
2042 *
2043 * Returns the new input stream or NULL in case of error
2044 */
2045xmlParserInputPtr
2046xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2047 xmlParserInputBufferPtr buf;
2048 xmlParserInputPtr inputStream;
2049 char *directory = NULL;
2050 xmlChar *URI = NULL;
2051
2052 if (xmlParserDebugEntities)
2053 xmlGenericError(xmlGenericErrorContext,
2054 "new input from file: %s\n", filename);
2055 if (ctxt == NULL) return(NULL);
2056 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2057 if (buf == NULL)
2058 return(NULL);
2059
2060 URI = xmlStrdup((xmlChar *) filename);
2061 directory = xmlParserGetDirectory((const char *) URI);
2062
2063 inputStream = xmlNewInputStream(ctxt);
2064 if (inputStream == NULL) {
2065 if (directory != NULL) xmlFree((char *) directory);
2066 if (URI != NULL) xmlFree((char *) URI);
2067 return(NULL);
2068 }
2069
2070 inputStream->filename = (const char *) URI;
2071 inputStream->directory = directory;
2072 inputStream->buf = buf;
2073
2074 inputStream->base = inputStream->buf->buffer->content;
2075 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002076 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002077 if ((ctxt->directory == NULL) && (directory != NULL))
2078 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2079 return(inputStream);
2080}
2081
2082/************************************************************************
2083 * *
2084 * Commodity functions to handle parser contexts *
2085 * *
2086 ************************************************************************/
2087
2088/**
2089 * xmlInitParserCtxt:
2090 * @ctxt: an XML parser context
2091 *
2092 * Initialize a parser context
2093 */
2094
2095void
2096xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2097{
2098 xmlSAXHandler *sax;
2099
2100 xmlDefaultSAXHandlerInit();
2101
2102 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2103 if (sax == NULL) {
2104 xmlGenericError(xmlGenericErrorContext,
2105 "xmlInitParserCtxt: out of memory\n");
2106 }
2107 else
2108 memset(sax, 0, sizeof(xmlSAXHandler));
2109
2110 /* Allocate the Input stack */
2111 ctxt->inputTab = (xmlParserInputPtr *)
2112 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2113 if (ctxt->inputTab == NULL) {
2114 xmlGenericError(xmlGenericErrorContext,
2115 "xmlInitParserCtxt: out of memory\n");
2116 ctxt->inputNr = 0;
2117 ctxt->inputMax = 0;
2118 ctxt->input = NULL;
2119 return;
2120 }
2121 ctxt->inputNr = 0;
2122 ctxt->inputMax = 5;
2123 ctxt->input = NULL;
2124
2125 ctxt->version = NULL;
2126 ctxt->encoding = NULL;
2127 ctxt->standalone = -1;
2128 ctxt->hasExternalSubset = 0;
2129 ctxt->hasPErefs = 0;
2130 ctxt->html = 0;
2131 ctxt->external = 0;
2132 ctxt->instate = XML_PARSER_START;
2133 ctxt->token = 0;
2134 ctxt->directory = NULL;
2135
2136 /* Allocate the Node stack */
2137 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2138 if (ctxt->nodeTab == NULL) {
2139 xmlGenericError(xmlGenericErrorContext,
2140 "xmlInitParserCtxt: out of memory\n");
2141 ctxt->nodeNr = 0;
2142 ctxt->nodeMax = 0;
2143 ctxt->node = NULL;
2144 ctxt->inputNr = 0;
2145 ctxt->inputMax = 0;
2146 ctxt->input = NULL;
2147 return;
2148 }
2149 ctxt->nodeNr = 0;
2150 ctxt->nodeMax = 10;
2151 ctxt->node = NULL;
2152
2153 /* Allocate the Name stack */
2154 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2155 if (ctxt->nameTab == NULL) {
2156 xmlGenericError(xmlGenericErrorContext,
2157 "xmlInitParserCtxt: out of memory\n");
2158 ctxt->nodeNr = 0;
2159 ctxt->nodeMax = 0;
2160 ctxt->node = NULL;
2161 ctxt->inputNr = 0;
2162 ctxt->inputMax = 0;
2163 ctxt->input = NULL;
2164 ctxt->nameNr = 0;
2165 ctxt->nameMax = 0;
2166 ctxt->name = NULL;
2167 return;
2168 }
2169 ctxt->nameNr = 0;
2170 ctxt->nameMax = 10;
2171 ctxt->name = NULL;
2172
2173 /* Allocate the space stack */
2174 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2175 if (ctxt->spaceTab == NULL) {
2176 xmlGenericError(xmlGenericErrorContext,
2177 "xmlInitParserCtxt: out of memory\n");
2178 ctxt->nodeNr = 0;
2179 ctxt->nodeMax = 0;
2180 ctxt->node = NULL;
2181 ctxt->inputNr = 0;
2182 ctxt->inputMax = 0;
2183 ctxt->input = NULL;
2184 ctxt->nameNr = 0;
2185 ctxt->nameMax = 0;
2186 ctxt->name = NULL;
2187 ctxt->spaceNr = 0;
2188 ctxt->spaceMax = 0;
2189 ctxt->space = NULL;
2190 return;
2191 }
2192 ctxt->spaceNr = 1;
2193 ctxt->spaceMax = 10;
2194 ctxt->spaceTab[0] = -1;
2195 ctxt->space = &ctxt->spaceTab[0];
2196
Daniel Veillard14be0a12001-03-03 18:50:55 +00002197 ctxt->sax = sax;
2198 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2199
Owen Taylor3473f882001-02-23 17:55:21 +00002200 ctxt->userData = ctxt;
2201 ctxt->myDoc = NULL;
2202 ctxt->wellFormed = 1;
2203 ctxt->valid = 1;
2204 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2205 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2206 ctxt->pedantic = xmlPedanticParserDefaultValue;
2207 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2208 ctxt->vctxt.userData = ctxt;
2209 if (ctxt->validate) {
2210 ctxt->vctxt.error = xmlParserValidityError;
2211 if (xmlGetWarningsDefaultValue == 0)
2212 ctxt->vctxt.warning = NULL;
2213 else
2214 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002215 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002216 } else {
2217 ctxt->vctxt.error = NULL;
2218 ctxt->vctxt.warning = NULL;
2219 }
2220 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2221 ctxt->record_info = 0;
2222 ctxt->nbChars = 0;
2223 ctxt->checkIndex = 0;
2224 ctxt->inSubset = 0;
2225 ctxt->errNo = XML_ERR_OK;
2226 ctxt->depth = 0;
2227 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2228 xmlInitNodeInfoSeq(&ctxt->node_seq);
2229}
2230
2231/**
2232 * xmlFreeParserCtxt:
2233 * @ctxt: an XML parser context
2234 *
2235 * Free all the memory used by a parser context. However the parsed
2236 * document in ctxt->myDoc is not freed.
2237 */
2238
2239void
2240xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2241{
2242 xmlParserInputPtr input;
2243 xmlChar *oldname;
2244
2245 if (ctxt == NULL) return;
2246
2247 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2248 xmlFreeInputStream(input);
2249 }
2250 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2251 xmlFree(oldname);
2252 }
2253 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2254 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2255 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2256 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2257 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2258 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2259 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2260 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2261 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002262 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2263 xmlFree(ctxt->sax);
2264 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2265 xmlFree(ctxt);
2266}
2267
2268/**
2269 * xmlNewParserCtxt:
2270 *
2271 * Allocate and initialize a new parser context.
2272 *
2273 * Returns the xmlParserCtxtPtr or NULL
2274 */
2275
2276xmlParserCtxtPtr
2277xmlNewParserCtxt()
2278{
2279 xmlParserCtxtPtr ctxt;
2280
2281 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2282 if (ctxt == NULL) {
2283 xmlGenericError(xmlGenericErrorContext,
2284 "xmlNewParserCtxt : cannot allocate context\n");
2285 perror("malloc");
2286 return(NULL);
2287 }
2288 memset(ctxt, 0, sizeof(xmlParserCtxt));
2289 xmlInitParserCtxt(ctxt);
2290 return(ctxt);
2291}
2292
2293/************************************************************************
2294 * *
2295 * Handling of node informations *
2296 * *
2297 ************************************************************************/
2298
2299/**
2300 * xmlClearParserCtxt:
2301 * @ctxt: an XML parser context
2302 *
2303 * Clear (release owned resources) and reinitialize a parser context
2304 */
2305
2306void
2307xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2308{
2309 xmlClearNodeInfoSeq(&ctxt->node_seq);
2310 xmlInitParserCtxt(ctxt);
2311}
2312
2313/**
2314 * xmlParserFindNodeInfo:
2315 * @ctxt: an XML parser context
2316 * @node: an XML node within the tree
2317 *
2318 * Find the parser node info struct for a given node
2319 *
2320 * Returns an xmlParserNodeInfo block pointer or NULL
2321 */
2322const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2323 const xmlNode* node)
2324{
2325 unsigned long pos;
2326
2327 /* Find position where node should be at */
2328 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2329 if ( ctx->node_seq.buffer[pos].node == node )
2330 return &ctx->node_seq.buffer[pos];
2331 else
2332 return NULL;
2333}
2334
2335
2336/**
2337 * xmlInitNodeInfoSeq:
2338 * @seq: a node info sequence pointer
2339 *
2340 * -- Initialize (set to initial state) node info sequence
2341 */
2342void
2343xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2344{
2345 seq->length = 0;
2346 seq->maximum = 0;
2347 seq->buffer = NULL;
2348}
2349
2350/**
2351 * xmlClearNodeInfoSeq:
2352 * @seq: a node info sequence pointer
2353 *
2354 * -- Clear (release memory and reinitialize) node
2355 * info sequence
2356 */
2357void
2358xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2359{
2360 if ( seq->buffer != NULL )
2361 xmlFree(seq->buffer);
2362 xmlInitNodeInfoSeq(seq);
2363}
2364
2365
2366/**
2367 * xmlParserFindNodeInfoIndex:
2368 * @seq: a node info sequence pointer
2369 * @node: an XML node pointer
2370 *
2371 *
2372 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2373 * the given node is or should be at in a sorted sequence
2374 *
2375 * Returns a long indicating the position of the record
2376 */
2377unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2378 const xmlNode* node)
2379{
2380 unsigned long upper, lower, middle;
2381 int found = 0;
2382
2383 /* Do a binary search for the key */
2384 lower = 1;
2385 upper = seq->length;
2386 middle = 0;
2387 while ( lower <= upper && !found) {
2388 middle = lower + (upper - lower) / 2;
2389 if ( node == seq->buffer[middle - 1].node )
2390 found = 1;
2391 else if ( node < seq->buffer[middle - 1].node )
2392 upper = middle - 1;
2393 else
2394 lower = middle + 1;
2395 }
2396
2397 /* Return position */
2398 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2399 return middle;
2400 else
2401 return middle - 1;
2402}
2403
2404
2405/**
2406 * xmlParserAddNodeInfo:
2407 * @ctxt: an XML parser context
2408 * @info: a node info sequence pointer
2409 *
2410 * Insert node info record into the sorted sequence
2411 */
2412void
2413xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2414 const xmlParserNodeInfo* info)
2415{
2416 unsigned long pos;
2417 static unsigned int block_size = 5;
2418
2419 /* Find pos and check to see if node is already in the sequence */
2420 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2421 if ( pos < ctxt->node_seq.length
2422 && ctxt->node_seq.buffer[pos].node == info->node ) {
2423 ctxt->node_seq.buffer[pos] = *info;
2424 }
2425
2426 /* Otherwise, we need to add new node to buffer */
2427 else {
2428 /* Expand buffer by 5 if needed */
2429 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2430 xmlParserNodeInfo* tmp_buffer;
2431 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2432 *(ctxt->node_seq.maximum + block_size));
2433
2434 if ( ctxt->node_seq.buffer == NULL )
2435 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2436 else
2437 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2438
2439 if ( tmp_buffer == NULL ) {
2440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2441 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2442 ctxt->errNo = XML_ERR_NO_MEMORY;
2443 return;
2444 }
2445 ctxt->node_seq.buffer = tmp_buffer;
2446 ctxt->node_seq.maximum += block_size;
2447 }
2448
2449 /* If position is not at end, move elements out of the way */
2450 if ( pos != ctxt->node_seq.length ) {
2451 unsigned long i;
2452
2453 for ( i = ctxt->node_seq.length; i > pos; i-- )
2454 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2455 }
2456
2457 /* Copy element and increase length */
2458 ctxt->node_seq.buffer[pos] = *info;
2459 ctxt->node_seq.length++;
2460 }
2461}
2462
2463/************************************************************************
2464 * *
2465 * Deprecated functions kept for compatibility *
2466 * *
2467 ************************************************************************/
2468
2469/*
2470 * xmlCheckLanguageID
2471 * @lang: pointer to the string value
2472 *
2473 * Checks that the value conforms to the LanguageID production:
2474 *
2475 * NOTE: this is somewhat deprecated, those productions were removed from
2476 * the XML Second edition.
2477 *
2478 * [33] LanguageID ::= Langcode ('-' Subcode)*
2479 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2480 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2481 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2482 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2483 * [38] Subcode ::= ([a-z] | [A-Z])+
2484 *
2485 * Returns 1 if correct 0 otherwise
2486 **/
2487int
2488xmlCheckLanguageID(const xmlChar *lang) {
2489 const xmlChar *cur = lang;
2490
2491 if (cur == NULL)
2492 return(0);
2493 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2494 ((cur[0] == 'I') && (cur[1] == '-'))) {
2495 /*
2496 * IANA code
2497 */
2498 cur += 2;
2499 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2500 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2501 cur++;
2502 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2503 ((cur[0] == 'X') && (cur[1] == '-'))) {
2504 /*
2505 * User code
2506 */
2507 cur += 2;
2508 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2509 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2510 cur++;
2511 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2512 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2513 /*
2514 * ISO639
2515 */
2516 cur++;
2517 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2518 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2519 cur++;
2520 else
2521 return(0);
2522 } else
2523 return(0);
2524 while (cur[0] != 0) { /* non input consuming */
2525 if (cur[0] != '-')
2526 return(0);
2527 cur++;
2528 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2529 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2530 cur++;
2531 else
2532 return(0);
2533 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2534 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2535 cur++;
2536 }
2537 return(1);
2538}
2539
2540/**
2541 * xmlDecodeEntities:
2542 * @ctxt: the parser context
2543 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2544 * @len: the len to decode (in bytes !), -1 for no size limit
2545 * @end: an end marker xmlChar, 0 if none
2546 * @end2: an end marker xmlChar, 0 if none
2547 * @end3: an end marker xmlChar, 0 if none
2548 *
2549 * This function is deprecated, we now always process entities content
2550 * through xmlStringDecodeEntities
2551 *
2552 * TODO: remove it in next major release.
2553 *
2554 * [67] Reference ::= EntityRef | CharRef
2555 *
2556 * [69] PEReference ::= '%' Name ';'
2557 *
2558 * Returns A newly allocated string with the substitution done. The caller
2559 * must deallocate it !
2560 */
2561xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002562xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2563 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002564#if 0
2565 xmlChar *buffer = NULL;
2566 unsigned int buffer_size = 0;
2567 unsigned int nbchars = 0;
2568
2569 xmlChar *current = NULL;
2570 xmlEntityPtr ent;
2571 unsigned int max = (unsigned int) len;
2572 int c,l;
2573#endif
2574
2575 static int deprecated = 0;
2576 if (!deprecated) {
2577 xmlGenericError(xmlGenericErrorContext,
2578 "xmlDecodeEntities() deprecated function reached\n");
2579 deprecated = 1;
2580 }
2581
2582#if 0
2583 if (ctxt->depth > 40) {
2584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2585 ctxt->sax->error(ctxt->userData,
2586 "Detected entity reference loop\n");
2587 ctxt->wellFormed = 0;
2588 ctxt->disableSAX = 1;
2589 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2590 return(NULL);
2591 }
2592
2593 /*
2594 * allocate a translation buffer.
2595 */
2596 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2597 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2598 if (buffer == NULL) {
2599 perror("xmlDecodeEntities: malloc failed");
2600 return(NULL);
2601 }
2602
2603 /*
2604 * Ok loop until we reach one of the ending char or a size limit.
2605 */
2606 GROW;
2607 c = CUR_CHAR(l);
2608 while ((nbchars < max) && (c != end) && /* NOTUSED */
2609 (c != end2) && (c != end3)) {
2610 GROW;
2611 if (c == 0) break;
2612 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2613 int val = xmlParseCharRef(ctxt);
2614 COPY_BUF(0,buffer,nbchars,val);
2615 NEXTL(l);
2616 } else if ((c == '&') && (ctxt->token != '&') &&
2617 (what & XML_SUBSTITUTE_REF)) {
2618 if (xmlParserDebugEntities)
2619 xmlGenericError(xmlGenericErrorContext,
2620 "decoding Entity Reference\n");
2621 ent = xmlParseEntityRef(ctxt);
2622 if ((ent != NULL) &&
2623 (ctxt->replaceEntities != 0)) {
2624 current = ent->content;
2625 while (*current != 0) { /* non input consuming loop */
2626 buffer[nbchars++] = *current++;
2627 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2628 growBuffer(buffer);
2629 }
2630 }
2631 } else if (ent != NULL) {
2632 const xmlChar *cur = ent->name;
2633
2634 buffer[nbchars++] = '&';
2635 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2636 growBuffer(buffer);
2637 }
2638 while (*cur != 0) { /* non input consuming loop */
2639 buffer[nbchars++] = *cur++;
2640 }
2641 buffer[nbchars++] = ';';
2642 }
2643 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2644 /*
2645 * a PEReference induce to switch the entity flow,
2646 * we break here to flush the current set of chars
2647 * parsed if any. We will be called back later.
2648 */
2649 if (xmlParserDebugEntities)
2650 xmlGenericError(xmlGenericErrorContext,
2651 "decoding PE Reference\n");
2652 if (nbchars != 0) break;
2653
2654 xmlParsePEReference(ctxt);
2655
2656 /*
2657 * Pop-up of finished entities.
2658 */
2659 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2660 xmlPopInput(ctxt);
2661
2662 break;
2663 } else {
2664 COPY_BUF(l,buffer,nbchars,c);
2665 NEXTL(l);
2666 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2667 growBuffer(buffer);
2668 }
2669 }
2670 c = CUR_CHAR(l);
2671 }
2672 buffer[nbchars++] = 0;
2673 return(buffer);
2674#endif
2675 return(NULL);
2676}
2677
2678/**
2679 * xmlNamespaceParseNCName:
2680 * @ctxt: an XML parser context
2681 *
2682 * parse an XML namespace name.
2683 *
2684 * TODO: this seems not in use anymore, the namespace handling is done on
2685 * top of the SAX interfaces, i.e. not on raw input.
2686 *
2687 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2688 *
2689 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2690 * CombiningChar | Extender
2691 *
2692 * Returns the namespace name or NULL
2693 */
2694
2695xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002696xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002697#if 0
2698 xmlChar buf[XML_MAX_NAMELEN + 5];
2699 int len = 0, l;
2700 int cur = CUR_CHAR(l);
2701#endif
2702
2703 static int deprecated = 0;
2704 if (!deprecated) {
2705 xmlGenericError(xmlGenericErrorContext,
2706 "xmlNamespaceParseNCName() deprecated function reached\n");
2707 deprecated = 1;
2708 }
2709
2710#if 0
2711 /* load first the value of the char !!! */
2712 GROW;
2713 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2714
2715xmlGenericError(xmlGenericErrorContext,
2716 "xmlNamespaceParseNCName: reached loop 3\n");
2717 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2718 (cur == '.') || (cur == '-') ||
2719 (cur == '_') ||
2720 (IS_COMBINING(cur)) ||
2721 (IS_EXTENDER(cur))) {
2722 COPY_BUF(l,buf,len,cur);
2723 NEXTL(l);
2724 cur = CUR_CHAR(l);
2725 if (len >= XML_MAX_NAMELEN) {
2726 xmlGenericError(xmlGenericErrorContext,
2727 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2728 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2729 (cur == '.') || (cur == '-') ||
2730 (cur == '_') ||
2731 (IS_COMBINING(cur)) ||
2732 (IS_EXTENDER(cur))) {
2733 NEXTL(l);
2734 cur = CUR_CHAR(l);
2735 }
2736 break;
2737 }
2738 }
2739 return(xmlStrndup(buf, len));
2740#endif
2741 return(NULL);
2742}
2743
2744/**
2745 * xmlNamespaceParseQName:
2746 * @ctxt: an XML parser context
2747 * @prefix: a xmlChar **
2748 *
2749 * TODO: this seems not in use anymore, the namespace handling is done on
2750 * top of the SAX interfaces, i.e. not on raw input.
2751 *
2752 * parse an XML qualified name
2753 *
2754 * [NS 5] QName ::= (Prefix ':')? LocalPart
2755 *
2756 * [NS 6] Prefix ::= NCName
2757 *
2758 * [NS 7] LocalPart ::= NCName
2759 *
2760 * Returns the local part, and prefix is updated
2761 * to get the Prefix if any.
2762 */
2763
2764xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002765xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002766
2767 static int deprecated = 0;
2768 if (!deprecated) {
2769 xmlGenericError(xmlGenericErrorContext,
2770 "xmlNamespaceParseQName() deprecated function reached\n");
2771 deprecated = 1;
2772 }
2773
2774#if 0
2775 xmlChar *ret = NULL;
2776
2777 *prefix = NULL;
2778 ret = xmlNamespaceParseNCName(ctxt);
2779 if (RAW == ':') {
2780 *prefix = ret;
2781 NEXT;
2782 ret = xmlNamespaceParseNCName(ctxt);
2783 }
2784
2785 return(ret);
2786#endif
2787 return(NULL);
2788}
2789
2790/**
2791 * xmlNamespaceParseNSDef:
2792 * @ctxt: an XML parser context
2793 *
2794 * parse a namespace prefix declaration
2795 *
2796 * TODO: this seems not in use anymore, the namespace handling is done on
2797 * top of the SAX interfaces, i.e. not on raw input.
2798 *
2799 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2800 *
2801 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2802 *
2803 * Returns the namespace name
2804 */
2805
2806xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002807xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002808 static int deprecated = 0;
2809 if (!deprecated) {
2810 xmlGenericError(xmlGenericErrorContext,
2811 "xmlNamespaceParseNSDef() deprecated function reached\n");
2812 deprecated = 1;
2813 }
2814 return(NULL);
2815#if 0
2816 xmlChar *name = NULL;
2817
2818 if ((RAW == 'x') && (NXT(1) == 'm') &&
2819 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2820 (NXT(4) == 's')) {
2821 SKIP(5);
2822 if (RAW == ':') {
2823 NEXT;
2824 name = xmlNamespaceParseNCName(ctxt);
2825 }
2826 }
2827 return(name);
2828#endif
2829}
2830
2831/**
2832 * xmlParseQuotedString:
2833 * @ctxt: an XML parser context
2834 *
2835 * Parse and return a string between quotes or doublequotes
2836 *
2837 * TODO: Deprecated, to be removed at next drop of binary compatibility
2838 *
2839 * Returns the string parser or NULL.
2840 */
2841xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002842xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002843 static int deprecated = 0;
2844 if (!deprecated) {
2845 xmlGenericError(xmlGenericErrorContext,
2846 "xmlParseQuotedString() deprecated function reached\n");
2847 deprecated = 1;
2848 }
2849 return(NULL);
2850
2851#if 0
2852 xmlChar *buf = NULL;
2853 int len = 0,l;
2854 int size = XML_PARSER_BUFFER_SIZE;
2855 int c;
2856
2857 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2858 if (buf == NULL) {
2859 xmlGenericError(xmlGenericErrorContext,
2860 "malloc of %d byte failed\n", size);
2861 return(NULL);
2862 }
2863xmlGenericError(xmlGenericErrorContext,
2864 "xmlParseQuotedString: reached loop 4\n");
2865 if (RAW == '"') {
2866 NEXT;
2867 c = CUR_CHAR(l);
2868 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2869 if (len + 5 >= size) {
2870 size *= 2;
2871 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2872 if (buf == NULL) {
2873 xmlGenericError(xmlGenericErrorContext,
2874 "realloc of %d byte failed\n", size);
2875 return(NULL);
2876 }
2877 }
2878 COPY_BUF(l,buf,len,c);
2879 NEXTL(l);
2880 c = CUR_CHAR(l);
2881 }
2882 if (c != '"') {
2883 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2885 ctxt->sax->error(ctxt->userData,
2886 "String not closed \"%.50s\"\n", buf);
2887 ctxt->wellFormed = 0;
2888 ctxt->disableSAX = 1;
2889 } else {
2890 NEXT;
2891 }
2892 } else if (RAW == '\''){
2893 NEXT;
2894 c = CUR;
2895 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2896 if (len + 1 >= size) {
2897 size *= 2;
2898 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2899 if (buf == NULL) {
2900 xmlGenericError(xmlGenericErrorContext,
2901 "realloc of %d byte failed\n", size);
2902 return(NULL);
2903 }
2904 }
2905 buf[len++] = c;
2906 NEXT;
2907 c = CUR;
2908 }
2909 if (RAW != '\'') {
2910 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2912 ctxt->sax->error(ctxt->userData,
2913 "String not closed \"%.50s\"\n", buf);
2914 ctxt->wellFormed = 0;
2915 ctxt->disableSAX = 1;
2916 } else {
2917 NEXT;
2918 }
2919 }
2920 return(buf);
2921#endif
2922}
2923
2924/**
2925 * xmlParseNamespace:
2926 * @ctxt: an XML parser context
2927 *
2928 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2929 *
2930 * This is what the older xml-name Working Draft specified, a bunch of
2931 * other stuff may still rely on it, so support is still here as
2932 * if it was declared on the root of the Tree:-(
2933 *
2934 * TODO: remove from library
2935 *
2936 * To be removed at next drop of binary compatibility
2937 */
2938
2939void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002940xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002941 static int deprecated = 0;
2942 if (!deprecated) {
2943 xmlGenericError(xmlGenericErrorContext,
2944 "xmlParseNamespace() deprecated function reached\n");
2945 deprecated = 1;
2946 }
2947
2948#if 0
2949 xmlChar *href = NULL;
2950 xmlChar *prefix = NULL;
2951 int garbage = 0;
2952
2953 /*
2954 * We just skipped "namespace" or "xml:namespace"
2955 */
2956 SKIP_BLANKS;
2957
2958xmlGenericError(xmlGenericErrorContext,
2959 "xmlParseNamespace: reached loop 5\n");
2960 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2961 /*
2962 * We can have "ns" or "prefix" attributes
2963 * Old encoding as 'href' or 'AS' attributes is still supported
2964 */
2965 if ((RAW == 'n') && (NXT(1) == 's')) {
2966 garbage = 0;
2967 SKIP(2);
2968 SKIP_BLANKS;
2969
2970 if (RAW != '=') continue;
2971 NEXT;
2972 SKIP_BLANKS;
2973
2974 href = xmlParseQuotedString(ctxt);
2975 SKIP_BLANKS;
2976 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2977 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2978 garbage = 0;
2979 SKIP(4);
2980 SKIP_BLANKS;
2981
2982 if (RAW != '=') continue;
2983 NEXT;
2984 SKIP_BLANKS;
2985
2986 href = xmlParseQuotedString(ctxt);
2987 SKIP_BLANKS;
2988 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2989 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2990 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2991 garbage = 0;
2992 SKIP(6);
2993 SKIP_BLANKS;
2994
2995 if (RAW != '=') continue;
2996 NEXT;
2997 SKIP_BLANKS;
2998
2999 prefix = xmlParseQuotedString(ctxt);
3000 SKIP_BLANKS;
3001 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3002 garbage = 0;
3003 SKIP(2);
3004 SKIP_BLANKS;
3005
3006 if (RAW != '=') continue;
3007 NEXT;
3008 SKIP_BLANKS;
3009
3010 prefix = xmlParseQuotedString(ctxt);
3011 SKIP_BLANKS;
3012 } else if ((RAW == '?') && (NXT(1) == '>')) {
3013 garbage = 0;
3014 NEXT;
3015 } else {
3016 /*
3017 * Found garbage when parsing the namespace
3018 */
3019 if (!garbage) {
3020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021 ctxt->sax->error(ctxt->userData,
3022 "xmlParseNamespace found garbage\n");
3023 }
3024 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3025 ctxt->wellFormed = 0;
3026 ctxt->disableSAX = 1;
3027 NEXT;
3028 }
3029 }
3030
3031 MOVETO_ENDTAG(CUR_PTR);
3032 NEXT;
3033
3034 /*
3035 * Register the DTD.
3036 if (href != NULL)
3037 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3038 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3039 */
3040
3041 if (prefix != NULL) xmlFree(prefix);
3042 if (href != NULL) xmlFree(href);
3043#endif
3044}
3045
3046/**
3047 * xmlScanName:
3048 * @ctxt: an XML parser context
3049 *
3050 * Trickery: parse an XML name but without consuming the input flow
3051 * Needed for rollback cases. Used only when parsing entities references.
3052 *
3053 * TODO: seems deprecated now, only used in the default part of
3054 * xmlParserHandleReference
3055 *
3056 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3057 * CombiningChar | Extender
3058 *
3059 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3060 *
3061 * [6] Names ::= Name (S Name)*
3062 *
3063 * Returns the Name parsed or NULL
3064 */
3065
3066xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003067xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003068 static int deprecated = 0;
3069 if (!deprecated) {
3070 xmlGenericError(xmlGenericErrorContext,
3071 "xmlScanName() deprecated function reached\n");
3072 deprecated = 1;
3073 }
3074 return(NULL);
3075
3076#if 0
3077 xmlChar buf[XML_MAX_NAMELEN];
3078 int len = 0;
3079
3080 GROW;
3081 if (!IS_LETTER(RAW) && (RAW != '_') &&
3082 (RAW != ':')) {
3083 return(NULL);
3084 }
3085
3086
3087 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3088 (NXT(len) == '.') || (NXT(len) == '-') ||
3089 (NXT(len) == '_') || (NXT(len) == ':') ||
3090 (IS_COMBINING(NXT(len))) ||
3091 (IS_EXTENDER(NXT(len)))) {
3092 GROW;
3093 buf[len] = NXT(len);
3094 len++;
3095 if (len >= XML_MAX_NAMELEN) {
3096 xmlGenericError(xmlGenericErrorContext,
3097 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3098 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3099 (IS_DIGIT(NXT(len))) ||
3100 (NXT(len) == '.') || (NXT(len) == '-') ||
3101 (NXT(len) == '_') || (NXT(len) == ':') ||
3102 (IS_COMBINING(NXT(len))) ||
3103 (IS_EXTENDER(NXT(len))))
3104 len++;
3105 break;
3106 }
3107 }
3108 return(xmlStrndup(buf, len));
3109#endif
3110}
3111
3112/**
3113 * xmlParserHandleReference:
3114 * @ctxt: the parser context
3115 *
3116 * TODO: Remove, now deprecated ... the test is done directly in the
3117 * content parsing
3118 * routines.
3119 *
3120 * [67] Reference ::= EntityRef | CharRef
3121 *
3122 * [68] EntityRef ::= '&' Name ';'
3123 *
3124 * [ WFC: Entity Declared ]
3125 * the Name given in the entity reference must match that in an entity
3126 * declaration, except that well-formed documents need not declare any
3127 * of the following entities: amp, lt, gt, apos, quot.
3128 *
3129 * [ WFC: Parsed Entity ]
3130 * An entity reference must not contain the name of an unparsed entity
3131 *
3132 * [66] CharRef ::= '&#' [0-9]+ ';' |
3133 * '&#x' [0-9a-fA-F]+ ';'
3134 *
3135 * A PEReference may have been detectect in the current input stream
3136 * the handling is done accordingly to
3137 * http://www.w3.org/TR/REC-xml#entproc
3138 */
3139void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003140xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003141 static int deprecated = 0;
3142 if (!deprecated) {
3143 xmlGenericError(xmlGenericErrorContext,
3144 "xmlParserHandleReference() deprecated function reached\n");
3145 deprecated = 1;
3146 }
3147
3148#if 0
3149 xmlParserInputPtr input;
3150 xmlChar *name;
3151 xmlEntityPtr ent = NULL;
3152
3153 if (ctxt->token != 0) {
3154 return;
3155 }
3156 if (RAW != '&') return;
3157 GROW;
3158 if ((RAW == '&') && (NXT(1) == '#')) {
3159 switch(ctxt->instate) {
3160 case XML_PARSER_ENTITY_DECL:
3161 case XML_PARSER_PI:
3162 case XML_PARSER_CDATA_SECTION:
3163 case XML_PARSER_COMMENT:
3164 case XML_PARSER_SYSTEM_LITERAL:
3165 /* we just ignore it there */
3166 return;
3167 case XML_PARSER_START_TAG:
3168 return;
3169 case XML_PARSER_END_TAG:
3170 return;
3171 case XML_PARSER_EOF:
3172 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3174 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3175 ctxt->wellFormed = 0;
3176 ctxt->disableSAX = 1;
3177 return;
3178 case XML_PARSER_PROLOG:
3179 case XML_PARSER_START:
3180 case XML_PARSER_MISC:
3181 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3183 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3184 ctxt->wellFormed = 0;
3185 ctxt->disableSAX = 1;
3186 return;
3187 case XML_PARSER_EPILOG:
3188 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3190 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3191 ctxt->wellFormed = 0;
3192 ctxt->disableSAX = 1;
3193 return;
3194 case XML_PARSER_DTD:
3195 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3197 ctxt->sax->error(ctxt->userData,
3198 "CharRef are forbiden in DTDs!\n");
3199 ctxt->wellFormed = 0;
3200 ctxt->disableSAX = 1;
3201 return;
3202 case XML_PARSER_ENTITY_VALUE:
3203 /*
3204 * NOTE: in the case of entity values, we don't do the
3205 * substitution here since we need the literal
3206 * entity value to be able to save the internal
3207 * subset of the document.
3208 * This will be handled by xmlStringDecodeEntities
3209 */
3210 return;
3211 case XML_PARSER_CONTENT:
3212 return;
3213 case XML_PARSER_ATTRIBUTE_VALUE:
3214 /* ctxt->token = xmlParseCharRef(ctxt); */
3215 return;
3216 case XML_PARSER_IGNORE:
3217 return;
3218 }
3219 return;
3220 }
3221
3222 switch(ctxt->instate) {
3223 case XML_PARSER_CDATA_SECTION:
3224 return;
3225 case XML_PARSER_PI:
3226 case XML_PARSER_COMMENT:
3227 case XML_PARSER_SYSTEM_LITERAL:
3228 case XML_PARSER_CONTENT:
3229 return;
3230 case XML_PARSER_START_TAG:
3231 return;
3232 case XML_PARSER_END_TAG:
3233 return;
3234 case XML_PARSER_EOF:
3235 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3237 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3238 ctxt->wellFormed = 0;
3239 ctxt->disableSAX = 1;
3240 return;
3241 case XML_PARSER_PROLOG:
3242 case XML_PARSER_START:
3243 case XML_PARSER_MISC:
3244 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3246 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3247 ctxt->wellFormed = 0;
3248 ctxt->disableSAX = 1;
3249 return;
3250 case XML_PARSER_EPILOG:
3251 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3254 ctxt->wellFormed = 0;
3255 ctxt->disableSAX = 1;
3256 return;
3257 case XML_PARSER_ENTITY_VALUE:
3258 /*
3259 * NOTE: in the case of entity values, we don't do the
3260 * substitution here since we need the literal
3261 * entity value to be able to save the internal
3262 * subset of the document.
3263 * This will be handled by xmlStringDecodeEntities
3264 */
3265 return;
3266 case XML_PARSER_ATTRIBUTE_VALUE:
3267 /*
3268 * NOTE: in the case of attributes values, we don't do the
3269 * substitution here unless we are in a mode where
3270 * the parser is explicitely asked to substitute
3271 * entities. The SAX callback is called with values
3272 * without entity substitution.
3273 * This will then be handled by xmlStringDecodeEntities
3274 */
3275 return;
3276 case XML_PARSER_ENTITY_DECL:
3277 /*
3278 * we just ignore it there
3279 * the substitution will be done once the entity is referenced
3280 */
3281 return;
3282 case XML_PARSER_DTD:
3283 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3285 ctxt->sax->error(ctxt->userData,
3286 "Entity references are forbiden in DTDs!\n");
3287 ctxt->wellFormed = 0;
3288 ctxt->disableSAX = 1;
3289 return;
3290 case XML_PARSER_IGNORE:
3291 return;
3292 }
3293
3294/* TODO: this seems not reached anymore .... Verify ... */
3295xmlGenericError(xmlGenericErrorContext,
3296 "Reached deprecated section in xmlParserHandleReference()\n");
3297xmlGenericError(xmlGenericErrorContext,
3298 "Please forward the document to Daniel.Veillard@w3.org\n");
3299xmlGenericError(xmlGenericErrorContext,
3300 "indicating the version: %s, thanks !\n", xmlParserVersion);
3301 NEXT;
3302 name = xmlScanName(ctxt);
3303 if (name == NULL) {
3304 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3306 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3307 ctxt->wellFormed = 0;
3308 ctxt->disableSAX = 1;
3309 ctxt->token = '&';
3310 return;
3311 }
3312 if (NXT(xmlStrlen(name)) != ';') {
3313 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3315 ctxt->sax->error(ctxt->userData,
3316 "Entity reference: ';' expected\n");
3317 ctxt->wellFormed = 0;
3318 ctxt->disableSAX = 1;
3319 ctxt->token = '&';
3320 xmlFree(name);
3321 return;
3322 }
3323 SKIP(xmlStrlen(name) + 1);
3324 if (ctxt->sax != NULL) {
3325 if (ctxt->sax->getEntity != NULL)
3326 ent = ctxt->sax->getEntity(ctxt->userData, name);
3327 }
3328
3329 /*
3330 * [ WFC: Entity Declared ]
3331 * the Name given in the entity reference must match that in an entity
3332 * declaration, except that well-formed documents need not declare any
3333 * of the following entities: amp, lt, gt, apos, quot.
3334 */
3335 if (ent == NULL)
3336 ent = xmlGetPredefinedEntity(name);
3337 if (ent == NULL) {
3338 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData,
3341 "Entity reference: entity %s not declared\n",
3342 name);
3343 ctxt->wellFormed = 0;
3344 ctxt->disableSAX = 1;
3345 xmlFree(name);
3346 return;
3347 }
3348
3349 /*
3350 * [ WFC: Parsed Entity ]
3351 * An entity reference must not contain the name of an unparsed entity
3352 */
3353 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3354 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356 ctxt->sax->error(ctxt->userData,
3357 "Entity reference to unparsed entity %s\n", name);
3358 ctxt->wellFormed = 0;
3359 ctxt->disableSAX = 1;
3360 }
3361
3362 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3363 ctxt->token = ent->content[0];
3364 xmlFree(name);
3365 return;
3366 }
3367 input = xmlNewEntityInputStream(ctxt, ent);
3368 xmlPushInput(ctxt, input);
3369 xmlFree(name);
3370#endif
3371 return;
3372}
3373
3374/**
3375 * xmlHandleEntity:
3376 * @ctxt: an XML parser context
3377 * @entity: an XML entity pointer.
3378 *
3379 * Default handling of defined entities, when should we define a new input
3380 * stream ? When do we just handle that as a set of chars ?
3381 *
3382 * OBSOLETE: to be removed at some point.
3383 */
3384
3385void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003386xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003387 static int deprecated = 0;
3388 if (!deprecated) {
3389 xmlGenericError(xmlGenericErrorContext,
3390 "xmlHandleEntity() deprecated function reached\n");
3391 deprecated = 1;
3392 }
3393
3394#if 0
3395 int len;
3396 xmlParserInputPtr input;
3397
3398 if (entity->content == NULL) {
3399 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3401 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3402 entity->name);
3403 ctxt->wellFormed = 0;
3404 ctxt->disableSAX = 1;
3405 return;
3406 }
3407 len = xmlStrlen(entity->content);
3408 if (len <= 2) goto handle_as_char;
3409
3410 /*
3411 * Redefine its content as an input stream.
3412 */
3413 input = xmlNewEntityInputStream(ctxt, entity);
3414 xmlPushInput(ctxt, input);
3415 return;
3416
3417handle_as_char:
3418 /*
3419 * Just handle the content as a set of chars.
3420 */
3421 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3422 (ctxt->sax->characters != NULL))
3423 ctxt->sax->characters(ctxt->userData, entity->content, len);
3424#endif
3425}
3426
3427/**
3428 * xmlNewGlobalNs:
3429 * @doc: the document carrying the namespace
3430 * @href: the URI associated
3431 * @prefix: the prefix for the namespace
3432 *
3433 * Creation of a Namespace, the old way using PI and without scoping
3434 * DEPRECATED !!!
3435 * It now create a namespace on the root element of the document if found.
3436 * Returns NULL this functionnality had been removed
3437 */
3438xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003439xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3440 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003441 static int deprecated = 0;
3442 if (!deprecated) {
3443 xmlGenericError(xmlGenericErrorContext,
3444 "xmlNewGlobalNs() deprecated function reached\n");
3445 deprecated = 1;
3446 }
3447 return(NULL);
3448#if 0
3449 xmlNodePtr root;
3450
3451 xmlNsPtr cur;
3452
3453 root = xmlDocGetRootElement(doc);
3454 if (root != NULL)
3455 return(xmlNewNs(root, href, prefix));
3456
3457 /*
3458 * if there is no root element yet, create an old Namespace type
3459 * and it will be moved to the root at save time.
3460 */
3461 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3462 if (cur == NULL) {
3463 xmlGenericError(xmlGenericErrorContext,
3464 "xmlNewGlobalNs : malloc failed\n");
3465 return(NULL);
3466 }
3467 memset(cur, 0, sizeof(xmlNs));
3468 cur->type = XML_GLOBAL_NAMESPACE;
3469
3470 if (href != NULL)
3471 cur->href = xmlStrdup(href);
3472 if (prefix != NULL)
3473 cur->prefix = xmlStrdup(prefix);
3474
3475 /*
3476 * Add it at the end to preserve parsing order ...
3477 */
3478 if (doc != NULL) {
3479 if (doc->oldNs == NULL) {
3480 doc->oldNs = cur;
3481 } else {
3482 xmlNsPtr prev = doc->oldNs;
3483
3484 while (prev->next != NULL) prev = prev->next;
3485 prev->next = cur;
3486 }
3487 }
3488
3489 return(NULL);
3490#endif
3491}
3492
3493/**
3494 * xmlUpgradeOldNs:
3495 * @doc: a document pointer
3496 *
3497 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3498 * DEPRECATED
3499 */
3500void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003501xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003502 static int deprecated = 0;
3503 if (!deprecated) {
3504 xmlGenericError(xmlGenericErrorContext,
3505 "xmlNewGlobalNs() deprecated function reached\n");
3506 deprecated = 1;
3507 }
3508#if 0
3509 xmlNsPtr cur;
3510
3511 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3512 if (doc->children == NULL) {
3513#ifdef DEBUG_TREE
3514 xmlGenericError(xmlGenericErrorContext,
3515 "xmlUpgradeOldNs: failed no root !\n");
3516#endif
3517 return;
3518 }
3519
3520 cur = doc->oldNs;
3521 while (cur->next != NULL) {
3522 cur->type = XML_LOCAL_NAMESPACE;
3523 cur = cur->next;
3524 }
3525 cur->type = XML_LOCAL_NAMESPACE;
3526 cur->next = doc->children->nsDef;
3527 doc->children->nsDef = doc->oldNs;
3528 doc->oldNs = NULL;
3529#endif
3530}
3531