blob: b9fd3cf47ad348d732e86b755b7b8885f17b0596 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50
Daniel Veillard56a4cb82001-03-24 17:00:36 +000051void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000052
53/************************************************************************
54 * *
55 * Version and Features handling *
56 * *
57 ************************************************************************/
58const char *xmlParserVersion = LIBXML_VERSION_STRING;
59
60/*
61 * xmlCheckVersion:
62 * @version: the include version number
63 *
64 * check the compiled lib version against the include one.
65 * This can warn or immediately kill the application
66 */
67void
68xmlCheckVersion(int version) {
69 int myversion = (int) LIBXML_VERSION;
70
71 if ((myversion / 10000) != (version / 10000)) {
72 xmlGenericError(xmlGenericErrorContext,
73 "Fatal: program compiled against libxml %d using libxml %d\n",
74 (version / 10000), (myversion / 10000));
75 exit(1);
76 }
77 if ((myversion / 100) < (version / 100)) {
78 xmlGenericError(xmlGenericErrorContext,
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
81 }
82}
83
84
85const char *xmlFeaturesList[] = {
86 "validate",
87 "load subset",
88 "keep blanks",
89 "disable SAX",
90 "fetch external entities",
91 "substitute entities",
92 "gather line info",
93 "user data",
94 "is html",
95 "is standalone",
96 "stop parser",
97 "document",
98 "is well formed",
99 "is valid",
100 "SAX block",
101 "SAX function internalSubset",
102 "SAX function isStandalone",
103 "SAX function hasInternalSubset",
104 "SAX function hasExternalSubset",
105 "SAX function resolveEntity",
106 "SAX function getEntity",
107 "SAX function entityDecl",
108 "SAX function notationDecl",
109 "SAX function attributeDecl",
110 "SAX function elementDecl",
111 "SAX function unparsedEntityDecl",
112 "SAX function setDocumentLocator",
113 "SAX function startDocument",
114 "SAX function endDocument",
115 "SAX function startElement",
116 "SAX function endElement",
117 "SAX function reference",
118 "SAX function characters",
119 "SAX function ignorableWhitespace",
120 "SAX function processingInstruction",
121 "SAX function comment",
122 "SAX function warning",
123 "SAX function error",
124 "SAX function fatalError",
125 "SAX function getParameterEntity",
126 "SAX function cdataBlock",
127 "SAX function externalSubset",
128};
129
130/*
131 * xmlGetFeaturesList:
132 * @len: the length of the features name array (input/output)
133 * @result: an array of string to be filled with the features name.
134 *
135 * Copy at most *@len feature names into the @result array
136 *
137 * Returns -1 in case or error, or the total number of features,
138 * len is updated with the number of strings copied,
139 * strings must not be deallocated
140 */
141int
142xmlGetFeaturesList(int *len, const char **result) {
143 int ret, i;
144
145 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
146 if ((len == NULL) || (result == NULL))
147 return(ret);
148 if ((*len < 0) || (*len >= 1000))
149 return(-1);
150 if (*len > ret)
151 *len = ret;
152 for (i = 0;i < *len;i++)
153 result[i] = xmlFeaturesList[i];
154 return(ret);
155}
156
157/*
158 * xmlGetFeature:
159 * @ctxt: an XML/HTML parser context
160 * @name: the feature name
161 * @result: location to store the result
162 *
163 * Read the current value of one feature of this parser instance
164 *
165 * Returns -1 in case or error, 0 otherwise
166 */
167int
168xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
169 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
170 return(-1);
171
172 if (!strcmp(name, "validate")) {
173 *((int *) result) = ctxt->validate;
174 } else if (!strcmp(name, "keep blanks")) {
175 *((int *) result) = ctxt->keepBlanks;
176 } else if (!strcmp(name, "disable SAX")) {
177 *((int *) result) = ctxt->disableSAX;
178 } else if (!strcmp(name, "fetch external entities")) {
179 *((int *) result) = ctxt->loadsubset;
180 } else if (!strcmp(name, "substitute entities")) {
181 *((int *) result) = ctxt->replaceEntities;
182 } else if (!strcmp(name, "gather line info")) {
183 *((int *) result) = ctxt->record_info;
184 } else if (!strcmp(name, "user data")) {
185 *((void **)result) = ctxt->userData;
186 } else if (!strcmp(name, "is html")) {
187 *((int *) result) = ctxt->html;
188 } else if (!strcmp(name, "is standalone")) {
189 *((int *) result) = ctxt->standalone;
190 } else if (!strcmp(name, "document")) {
191 *((xmlDocPtr *) result) = ctxt->myDoc;
192 } else if (!strcmp(name, "is well formed")) {
193 *((int *) result) = ctxt->wellFormed;
194 } else if (!strcmp(name, "is valid")) {
195 *((int *) result) = ctxt->valid;
196 } else if (!strcmp(name, "SAX block")) {
197 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
198 } else if (!strcmp(name, "SAX function internalSubset")) {
199 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
200 } else if (!strcmp(name, "SAX function isStandalone")) {
201 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
202 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
203 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
204 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
205 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
206 } else if (!strcmp(name, "SAX function resolveEntity")) {
207 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
208 } else if (!strcmp(name, "SAX function getEntity")) {
209 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
210 } else if (!strcmp(name, "SAX function entityDecl")) {
211 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
212 } else if (!strcmp(name, "SAX function notationDecl")) {
213 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
214 } else if (!strcmp(name, "SAX function attributeDecl")) {
215 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
216 } else if (!strcmp(name, "SAX function elementDecl")) {
217 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
218 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
219 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
220 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
221 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
222 } else if (!strcmp(name, "SAX function startDocument")) {
223 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
224 } else if (!strcmp(name, "SAX function endDocument")) {
225 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
226 } else if (!strcmp(name, "SAX function startElement")) {
227 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
228 } else if (!strcmp(name, "SAX function endElement")) {
229 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
230 } else if (!strcmp(name, "SAX function reference")) {
231 *((referenceSAXFunc *) result) = ctxt->sax->reference;
232 } else if (!strcmp(name, "SAX function characters")) {
233 *((charactersSAXFunc *) result) = ctxt->sax->characters;
234 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
235 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
236 } else if (!strcmp(name, "SAX function processingInstruction")) {
237 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
238 } else if (!strcmp(name, "SAX function comment")) {
239 *((commentSAXFunc *) result) = ctxt->sax->comment;
240 } else if (!strcmp(name, "SAX function warning")) {
241 *((warningSAXFunc *) result) = ctxt->sax->warning;
242 } else if (!strcmp(name, "SAX function error")) {
243 *((errorSAXFunc *) result) = ctxt->sax->error;
244 } else if (!strcmp(name, "SAX function fatalError")) {
245 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
246 } else if (!strcmp(name, "SAX function getParameterEntity")) {
247 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
248 } else if (!strcmp(name, "SAX function cdataBlock")) {
249 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
250 } else if (!strcmp(name, "SAX function externalSubset")) {
251 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
252 } else {
253 return(-1);
254 }
255 return(0);
256}
257
258/*
259 * xmlSetFeature:
260 * @ctxt: an XML/HTML parser context
261 * @name: the feature name
262 * @value: pointer to the location of the new value
263 *
264 * Change the current value of one feature of this parser instance
265 *
266 * Returns -1 in case or error, 0 otherwise
267 */
268int
269xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
270 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
271 return(-1);
272
273 if (!strcmp(name, "validate")) {
274 int newvalidate = *((int *) value);
275 if ((!ctxt->validate) && (newvalidate != 0)) {
276 if (ctxt->vctxt.warning == NULL)
277 ctxt->vctxt.warning = xmlParserValidityWarning;
278 if (ctxt->vctxt.error == NULL)
279 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000280 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000281 }
282 ctxt->validate = newvalidate;
283 } else if (!strcmp(name, "keep blanks")) {
284 ctxt->keepBlanks = *((int *) value);
285 } else if (!strcmp(name, "disable SAX")) {
286 ctxt->disableSAX = *((int *) value);
287 } else if (!strcmp(name, "fetch external entities")) {
288 ctxt->loadsubset = *((int *) value);
289 } else if (!strcmp(name, "substitute entities")) {
290 ctxt->replaceEntities = *((int *) value);
291 } else if (!strcmp(name, "gather line info")) {
292 ctxt->record_info = *((int *) value);
293 } else if (!strcmp(name, "user data")) {
294 ctxt->userData = *((void **)value);
295 } else if (!strcmp(name, "is html")) {
296 ctxt->html = *((int *) value);
297 } else if (!strcmp(name, "is standalone")) {
298 ctxt->standalone = *((int *) value);
299 } else if (!strcmp(name, "document")) {
300 ctxt->myDoc = *((xmlDocPtr *) value);
301 } else if (!strcmp(name, "is well formed")) {
302 ctxt->wellFormed = *((int *) value);
303 } else if (!strcmp(name, "is valid")) {
304 ctxt->valid = *((int *) value);
305 } else if (!strcmp(name, "SAX block")) {
306 ctxt->sax = *((xmlSAXHandlerPtr *) value);
307 } else if (!strcmp(name, "SAX function internalSubset")) {
308 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
309 } else if (!strcmp(name, "SAX function isStandalone")) {
310 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
311 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
312 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
313 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
314 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function resolveEntity")) {
316 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
317 } else if (!strcmp(name, "SAX function getEntity")) {
318 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
319 } else if (!strcmp(name, "SAX function entityDecl")) {
320 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function notationDecl")) {
322 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function attributeDecl")) {
324 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
325 } else if (!strcmp(name, "SAX function elementDecl")) {
326 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
328 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
330 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function startDocument")) {
332 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function endDocument")) {
334 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function startElement")) {
336 ctxt->sax->startElement = *((startElementSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function endElement")) {
338 ctxt->sax->endElement = *((endElementSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function reference")) {
340 ctxt->sax->reference = *((referenceSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function characters")) {
342 ctxt->sax->characters = *((charactersSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
344 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function processingInstruction")) {
346 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function comment")) {
348 ctxt->sax->comment = *((commentSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function warning")) {
350 ctxt->sax->warning = *((warningSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function error")) {
352 ctxt->sax->error = *((errorSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function fatalError")) {
354 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function getParameterEntity")) {
356 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
357 } else if (!strcmp(name, "SAX function cdataBlock")) {
358 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function externalSubset")) {
360 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
361 } else {
362 return(-1);
363 }
364 return(0);
365}
366
367/************************************************************************
368 * *
369 * Some functions to avoid too large macros *
370 * *
371 ************************************************************************/
372
373/**
374 * xmlIsChar:
375 * @c: an unicode character (int)
376 *
377 * Check whether the character is allowed by the production
378 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
379 * | [#x10000-#x10FFFF]
380 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
381 * Also available as a macro IS_CHAR()
382 *
383 * Returns 0 if not, non-zero otherwise
384 */
385int
386xmlIsChar(int c) {
387 return(
388 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
389 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
390 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
391 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
392}
393
394/**
395 * xmlIsBlank:
396 * @c: an unicode character (int)
397 *
398 * Check whether the character is allowed by the production
399 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
400 * Also available as a macro IS_BLANK()
401 *
402 * Returns 0 if not, non-zero otherwise
403 */
404int
405xmlIsBlank(int c) {
406 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
407}
408
409/**
410 * xmlIsBaseChar:
411 * @c: an unicode character (int)
412 *
413 * Check whether the character is allowed by the production
414 * [85] BaseChar ::= ... long list see REC ...
415 *
416 * VI is your friend !
417 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
418 * and
419 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
420 *
421 * Returns 0 if not, non-zero otherwise
422 */
423static int xmlBaseArray[] = {
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
428 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
430 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
436 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
437 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
439 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
440};
441
442int
443xmlIsBaseChar(int c) {
444 return(
445 (((c) < 0x0100) ? xmlBaseArray[c] :
446 ( /* accelerator */
447 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
448 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
449 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
450 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
451 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
452 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
453 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
454 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
455 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
456 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
457 ((c) == 0x0386) ||
458 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
459 ((c) == 0x038C) ||
460 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
461 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
462 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
463 ((c) == 0x03DA) ||
464 ((c) == 0x03DC) ||
465 ((c) == 0x03DE) ||
466 ((c) == 0x03E0) ||
467 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
468 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
469 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
470 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
471 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
472 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
473 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
474 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
475 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
476 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
477 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
478 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
479 ((c) == 0x0559) ||
480 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
481 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
482 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
483 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
484 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
485 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
486 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
487 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
488 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
489 ((c) == 0x06D5) ||
490 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
491 (((c) >= 0x905) && ( /* accelerator */
492 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
493 ((c) == 0x093D) ||
494 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
495 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
496 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
497 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
498 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
499 ((c) == 0x09B2) ||
500 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
501 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
502 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
503 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
504 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
505 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
506 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
507 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
508 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
509 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
510 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
511 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
512 ((c) == 0x0A5E) ||
513 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
514 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
515 ((c) == 0x0A8D) ||
516 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
517 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
518 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
519 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
520 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
521 ((c) == 0x0ABD) ||
522 ((c) == 0x0AE0) ||
523 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
524 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
525 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
526 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
527 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
528 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
529 ((c) == 0x0B3D) ||
530 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
531 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
532 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
533 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
534 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
535 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
536 ((c) == 0x0B9C) ||
537 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
538 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
539 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
540 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
541 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
542 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
543 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
544 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
545 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
546 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
547 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
548 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
549 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
550 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
551 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
552 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
553 ((c) == 0x0CDE) ||
554 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
555 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
556 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
557 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
558 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
559 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
560 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
561 ((c) == 0x0E30) ||
562 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
563 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
564 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
565 ((c) == 0x0E84) ||
566 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
567 ((c) == 0x0E8A) ||
568 ((c) == 0x0E8D) ||
569 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
570 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
571 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
572 ((c) == 0x0EA5) ||
573 ((c) == 0x0EA7) ||
574 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
575 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
576 ((c) == 0x0EB0) ||
577 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
578 ((c) == 0x0EBD) ||
579 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
580 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
581 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
582 (((c) >= 0x10A0) && ( /* accelerator */
583 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
584 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
585 ((c) == 0x1100) ||
586 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
587 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
588 ((c) == 0x1109) ||
589 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
590 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
591 ((c) == 0x113C) ||
592 ((c) == 0x113E) ||
593 ((c) == 0x1140) ||
594 ((c) == 0x114C) ||
595 ((c) == 0x114E) ||
596 ((c) == 0x1150) ||
597 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
598 ((c) == 0x1159) ||
599 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
600 ((c) == 0x1163) ||
601 ((c) == 0x1165) ||
602 ((c) == 0x1167) ||
603 ((c) == 0x1169) ||
604 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
605 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
606 ((c) == 0x1175) ||
607 ((c) == 0x119E) ||
608 ((c) == 0x11A8) ||
609 ((c) == 0x11AB) ||
610 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
611 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
612 ((c) == 0x11BA) ||
613 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
614 ((c) == 0x11EB) ||
615 ((c) == 0x11F0) ||
616 ((c) == 0x11F9) ||
617 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
618 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
619 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
620 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
621 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
622 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
623 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
624 ((c) == 0x1F59) ||
625 ((c) == 0x1F5B) ||
626 ((c) == 0x1F5D) ||
627 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
628 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
629 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
630 ((c) == 0x1FBE) ||
631 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
632 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
633 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
634 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
635 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
636 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
637 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
638 ((c) == 0x2126) ||
639 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
640 ((c) == 0x212E) ||
641 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
642 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
643 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
644 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
645 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
646}
647
648/**
649 * xmlIsDigit:
650 * @c: an unicode character (int)
651 *
652 * Check whether the character is allowed by the production
653 * [88] Digit ::= ... long list see REC ...
654 *
655 * Returns 0 if not, non-zero otherwise
656 */
657int
658xmlIsDigit(int c) {
659 return(
660 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
661 (((c) >= 0x660) && ( /* accelerator */
662 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
663 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
664 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
665 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
666 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
667 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
668 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
669 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
670 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
671 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
672 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
673 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
674 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
675 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
676}
677
678/**
679 * xmlIsCombining:
680 * @c: an unicode character (int)
681 *
682 * Check whether the character is allowed by the production
683 * [87] CombiningChar ::= ... long list see REC ...
684 *
685 * Returns 0 if not, non-zero otherwise
686 */
687int
688xmlIsCombining(int c) {
689 return(
690 (((c) >= 0x300) && ( /* accelerator */
691 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
692 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
693 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
694 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
695 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
696 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
697 ((c) == 0x05BF) ||
698 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
699 ((c) == 0x05C4) ||
700 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
701 ((c) == 0x0670) ||
702 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
703 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
704 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
705 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
706 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
707 (((c) >= 0x0901) && ( /* accelerator */
708 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
709 ((c) == 0x093C) ||
710 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
711 ((c) == 0x094D) ||
712 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
713 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
714 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
715 ((c) == 0x09BC) ||
716 ((c) == 0x09BE) ||
717 ((c) == 0x09BF) ||
718 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
719 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
720 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
721 ((c) == 0x09D7) ||
722 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
723 (((c) >= 0x0A02) && ( /* accelerator */
724 ((c) == 0x0A02) ||
725 ((c) == 0x0A3C) ||
726 ((c) == 0x0A3E) ||
727 ((c) == 0x0A3F) ||
728 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
729 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
730 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
731 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
732 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
733 ((c) == 0x0ABC) ||
734 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
735 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
736 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
737 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
738 ((c) == 0x0B3C) ||
739 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
740 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
741 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
742 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
743 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
744 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
745 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
746 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
747 ((c) == 0x0BD7) ||
748 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
749 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
750 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
751 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
752 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
753 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
754 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
755 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
756 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
757 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
758 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
759 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
760 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
761 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
762 ((c) == 0x0D57) ||
763 (((c) >= 0x0E31) && ( /* accelerator */
764 ((c) == 0x0E31) ||
765 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
766 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
767 ((c) == 0x0EB1) ||
768 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
769 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
770 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
771 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
772 ((c) == 0x0F35) ||
773 ((c) == 0x0F37) ||
774 ((c) == 0x0F39) ||
775 ((c) == 0x0F3E) ||
776 ((c) == 0x0F3F) ||
777 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
778 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
779 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
780 ((c) == 0x0F97) ||
781 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
782 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
783 ((c) == 0x0FB9) ||
784 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
785 ((c) == 0x20E1) ||
786 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
787 ((c) == 0x3099) ||
788 ((c) == 0x309A))))))))));
789}
790
791/**
792 * xmlIsExtender:
793 * @c: an unicode character (int)
794 *
795 * Check whether the character is allowed by the production
796 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
797 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
798 * [#x309D-#x309E] | [#x30FC-#x30FE]
799 *
800 * Returns 0 if not, non-zero otherwise
801 */
802int
803xmlIsExtender(int c) {
804 switch (c) {
805 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
806 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
807 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
808 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
809 case 0x30FE:
810 return 1;
811 default:
812 return 0;
813 }
814}
815
816/**
817 * xmlIsIdeographic:
818 * @c: an unicode character (int)
819 *
820 * Check whether the character is allowed by the production
821 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
822 *
823 * Returns 0 if not, non-zero otherwise
824 */
825int
826xmlIsIdeographic(int c) {
827 return(((c) < 0x0100) ? 0 :
828 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
829 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
830 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
831 ((c) == 0x3007));
832}
833
834/**
835 * xmlIsLetter:
836 * @c: an unicode character (int)
837 *
838 * Check whether the character is allowed by the production
839 * [84] Letter ::= BaseChar | Ideographic
840 *
841 * Returns 0 if not, non-zero otherwise
842 */
843int
844xmlIsLetter(int c) {
845 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
846}
847
848/**
849 * xmlIsPubidChar:
850 * @c: an unicode character (int)
851 *
852 * Check whether the character is allowed by the production
853 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
854 *
855 * Returns 0 if not, non-zero otherwise
856 */
857int
858xmlIsPubidChar(int c) {
859 return(
860 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
861 (((c) >= 'a') && ((c) <= 'z')) ||
862 (((c) >= 'A') && ((c) <= 'Z')) ||
863 (((c) >= '0') && ((c) <= '9')) ||
864 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
865 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
866 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
867 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
868 ((c) == '$') || ((c) == '_') || ((c) == '%'));
869}
870
871/************************************************************************
872 * *
873 * Input handling functions for progressive parsing *
874 * *
875 ************************************************************************/
876
877/* #define DEBUG_INPUT */
878/* #define DEBUG_STACK */
879/* #define DEBUG_PUSH */
880
881
882/* we need to keep enough input to show errors in context */
883#define LINE_LEN 80
884
885#ifdef DEBUG_INPUT
886#define CHECK_BUFFER(in) check_buffer(in)
887
888void check_buffer(xmlParserInputPtr in) {
889 if (in->base != in->buf->buffer->content) {
890 xmlGenericError(xmlGenericErrorContext,
891 "xmlParserInput: base mismatch problem\n");
892 }
893 if (in->cur < in->base) {
894 xmlGenericError(xmlGenericErrorContext,
895 "xmlParserInput: cur < base problem\n");
896 }
897 if (in->cur > in->base + in->buf->buffer->use) {
898 xmlGenericError(xmlGenericErrorContext,
899 "xmlParserInput: cur > base + use problem\n");
900 }
901 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
902 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
903 in->buf->buffer->use, in->buf->buffer->size);
904}
905
906#else
907#define CHECK_BUFFER(in)
908#endif
909
910
911/**
912 * xmlParserInputRead:
913 * @in: an XML parser input
914 * @len: an indicative size for the lookahead
915 *
916 * This function refresh the input for the parser. It doesn't try to
917 * preserve pointers to the input buffer, and discard already read data
918 *
919 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
920 * end of this entity
921 */
922int
923xmlParserInputRead(xmlParserInputPtr in, int len) {
924 int ret;
925 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000926 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000927
928#ifdef DEBUG_INPUT
929 xmlGenericError(xmlGenericErrorContext, "Read\n");
930#endif
931 if (in->buf == NULL) return(-1);
932 if (in->base == NULL) return(-1);
933 if (in->cur == NULL) return(-1);
934 if (in->buf->buffer == NULL) return(-1);
935 if (in->buf->readcallback == NULL) return(-1);
936
937 CHECK_BUFFER(in);
938
939 used = in->cur - in->buf->buffer->content;
940 ret = xmlBufferShrink(in->buf->buffer, used);
941 if (ret > 0) {
942 in->cur -= ret;
943 in->consumed += ret;
944 }
945 ret = xmlParserInputBufferRead(in->buf, len);
946 if (in->base != in->buf->buffer->content) {
947 /*
948 * the buffer has been realloced
949 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000950 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000951 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000952 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000953 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000954 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000955
956 CHECK_BUFFER(in);
957
958 return(ret);
959}
960
961/**
962 * xmlParserInputGrow:
963 * @in: an XML parser input
964 * @len: an indicative size for the lookahead
965 *
966 * This function increase the input for the parser. It tries to
967 * preserve pointers to the input buffer, and keep already read data
968 *
969 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
970 * end of this entity
971 */
972int
973xmlParserInputGrow(xmlParserInputPtr in, int len) {
974 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000975 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000976
977#ifdef DEBUG_INPUT
978 xmlGenericError(xmlGenericErrorContext, "Grow\n");
979#endif
980 if (in->buf == NULL) return(-1);
981 if (in->base == NULL) return(-1);
982 if (in->cur == NULL) return(-1);
983 if (in->buf->buffer == NULL) return(-1);
984
985 CHECK_BUFFER(in);
986
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000987 indx = in->cur - in->base;
988 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000989
990 CHECK_BUFFER(in);
991
992 return(0);
993 }
994 if (in->buf->readcallback != NULL)
995 ret = xmlParserInputBufferGrow(in->buf, len);
996 else
997 return(0);
998
999 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001000 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001001 * block, but we use it really as an integer to do some
1002 * pointer arithmetic. Insure will raise it as a bug but in
1003 * that specific case, that's not !
1004 */
1005 if (in->base != in->buf->buffer->content) {
1006 /*
1007 * the buffer has been realloced
1008 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001009 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001010 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001011 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001012 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001013 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001014
1015 CHECK_BUFFER(in);
1016
1017 return(ret);
1018}
1019
1020/**
1021 * xmlParserInputShrink:
1022 * @in: an XML parser input
1023 *
1024 * This function removes used input for the parser.
1025 */
1026void
1027xmlParserInputShrink(xmlParserInputPtr in) {
1028 int used;
1029 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001030 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001031
1032#ifdef DEBUG_INPUT
1033 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1034#endif
1035 if (in->buf == NULL) return;
1036 if (in->base == NULL) return;
1037 if (in->cur == NULL) return;
1038 if (in->buf->buffer == NULL) return;
1039
1040 CHECK_BUFFER(in);
1041
1042 used = in->cur - in->buf->buffer->content;
1043 /*
1044 * Do not shrink on large buffers whose only a tiny fraction
1045 * was consumned
1046 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001047 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001048 return;
1049 if (used > INPUT_CHUNK) {
1050 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1051 if (ret > 0) {
1052 in->cur -= ret;
1053 in->consumed += ret;
1054 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001055 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001056 }
1057
1058 CHECK_BUFFER(in);
1059
1060 if (in->buf->buffer->use > INPUT_CHUNK) {
1061 return;
1062 }
1063 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1064 if (in->base != in->buf->buffer->content) {
1065 /*
1066 * the buffer has been realloced
1067 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001068 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001069 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001070 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001071 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001072 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001073
1074 CHECK_BUFFER(in);
1075}
1076
1077/************************************************************************
1078 * *
1079 * UTF8 character input and related functions *
1080 * *
1081 ************************************************************************/
1082
1083/**
1084 * xmlNextChar:
1085 * @ctxt: the XML parser context
1086 *
1087 * Skip to the next char input char.
1088 */
1089
1090void
1091xmlNextChar(xmlParserCtxtPtr ctxt) {
1092 if (ctxt->instate == XML_PARSER_EOF)
1093 return;
1094
1095 /*
1096 * 2.11 End-of-Line Handling
1097 * the literal two-character sequence "#xD#xA" or a standalone
1098 * literal #xD, an XML processor must pass to the application
1099 * the single character #xA.
1100 */
1101 if (ctxt->token != 0) ctxt->token = 0;
1102 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1103 if ((*ctxt->input->cur == 0) &&
1104 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1105 (ctxt->instate != XML_PARSER_COMMENT)) {
1106 /*
1107 * If we are at the end of the current entity and
1108 * the context allows it, we pop consumed entities
1109 * automatically.
1110 * the auto closing should be blocked in other cases
1111 */
1112 xmlPopInput(ctxt);
1113 } else {
1114 if (*(ctxt->input->cur) == '\n') {
1115 ctxt->input->line++; ctxt->input->col = 1;
1116 } else ctxt->input->col++;
1117 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1118 /*
1119 * We are supposed to handle UTF8, check it's valid
1120 * From rfc2044: encoding of the Unicode values on UTF-8:
1121 *
1122 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1123 * 0000 0000-0000 007F 0xxxxxxx
1124 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1125 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1126 *
1127 * Check for the 0x110000 limit too
1128 */
1129 const unsigned char *cur = ctxt->input->cur;
1130 unsigned char c;
1131
1132 c = *cur;
1133 if (c & 0x80) {
1134 if (cur[1] == 0)
1135 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1136 if ((cur[1] & 0xc0) != 0x80)
1137 goto encoding_error;
1138 if ((c & 0xe0) == 0xe0) {
1139 unsigned int val;
1140
1141 if (cur[2] == 0)
1142 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1143 if ((cur[2] & 0xc0) != 0x80)
1144 goto encoding_error;
1145 if ((c & 0xf0) == 0xf0) {
1146 if (cur[3] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if (((c & 0xf8) != 0xf0) ||
1149 ((cur[3] & 0xc0) != 0x80))
1150 goto encoding_error;
1151 /* 4-byte code */
1152 ctxt->input->cur += 4;
1153 val = (cur[0] & 0x7) << 18;
1154 val |= (cur[1] & 0x3f) << 12;
1155 val |= (cur[2] & 0x3f) << 6;
1156 val |= cur[3] & 0x3f;
1157 } else {
1158 /* 3-byte code */
1159 ctxt->input->cur += 3;
1160 val = (cur[0] & 0xf) << 12;
1161 val |= (cur[1] & 0x3f) << 6;
1162 val |= cur[2] & 0x3f;
1163 }
1164 if (((val > 0xd7ff) && (val < 0xe000)) ||
1165 ((val > 0xfffd) && (val < 0x10000)) ||
1166 (val >= 0x110000)) {
1167 if ((ctxt->sax != NULL) &&
1168 (ctxt->sax->error != NULL))
1169 ctxt->sax->error(ctxt->userData,
1170 "Char 0x%X out of allowed range\n", val);
1171 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1172 ctxt->wellFormed = 0;
1173 ctxt->disableSAX = 1;
1174 }
1175 } else
1176 /* 2-byte code */
1177 ctxt->input->cur += 2;
1178 } else
1179 /* 1-byte code */
1180 ctxt->input->cur++;
1181 } else {
1182 /*
1183 * Assume it's a fixed lenght encoding (1) with
1184 * a compatibke encoding for the ASCII set, since
1185 * XML constructs only use < 128 chars
1186 */
1187 ctxt->input->cur++;
1188 }
1189 ctxt->nbChars++;
1190 if (*ctxt->input->cur == 0)
1191 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1192 }
1193 } else {
1194 ctxt->input->cur++;
1195 ctxt->nbChars++;
1196 if (*ctxt->input->cur == 0)
1197 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1198 }
1199 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1200 xmlParserHandlePEReference(ctxt);
1201 if ((*ctxt->input->cur == 0) &&
1202 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1203 xmlPopInput(ctxt);
1204 return;
1205encoding_error:
1206 /*
1207 * If we detect an UTF8 error that probably mean that the
1208 * input encoding didn't get properly advertized in the
1209 * declaration header. Report the error and switch the encoding
1210 * to ISO-Latin-1 (if you don't like this policy, just declare the
1211 * encoding !)
1212 */
1213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1214 ctxt->sax->error(ctxt->userData,
1215 "Input is not proper UTF-8, indicate encoding !\n");
1216 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1217 ctxt->input->cur[0], ctxt->input->cur[1],
1218 ctxt->input->cur[2], ctxt->input->cur[3]);
1219 }
1220 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1221
1222 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1223 ctxt->input->cur++;
1224 return;
1225}
1226
1227/**
1228 * xmlCurrentChar:
1229 * @ctxt: the XML parser context
1230 * @len: pointer to the length of the char read
1231 *
1232 * The current char value, if using UTF-8 this may actaully span multiple
1233 * bytes in the input buffer. Implement the end of line normalization:
1234 * 2.11 End-of-Line Handling
1235 * Wherever an external parsed entity or the literal entity value
1236 * of an internal parsed entity contains either the literal two-character
1237 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1238 * must pass to the application the single character #xA.
1239 * This behavior can conveniently be produced by normalizing all
1240 * line breaks to #xA on input, before parsing.)
1241 *
1242 * Returns the current char value and its lenght
1243 */
1244
1245int
1246xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1247 if (ctxt->instate == XML_PARSER_EOF)
1248 return(0);
1249
1250 if (ctxt->token != 0) {
1251 *len = 0;
1252 return(ctxt->token);
1253 }
1254 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1255 *len = 1;
1256 return((int) *ctxt->input->cur);
1257 }
1258 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1259 /*
1260 * We are supposed to handle UTF8, check it's valid
1261 * From rfc2044: encoding of the Unicode values on UTF-8:
1262 *
1263 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1264 * 0000 0000-0000 007F 0xxxxxxx
1265 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1266 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1267 *
1268 * Check for the 0x110000 limit too
1269 */
1270 const unsigned char *cur = ctxt->input->cur;
1271 unsigned char c;
1272 unsigned int val;
1273
1274 c = *cur;
1275 if (c & 0x80) {
1276 if (cur[1] == 0)
1277 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1278 if ((cur[1] & 0xc0) != 0x80)
1279 goto encoding_error;
1280 if ((c & 0xe0) == 0xe0) {
1281
1282 if (cur[2] == 0)
1283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1284 if ((cur[2] & 0xc0) != 0x80)
1285 goto encoding_error;
1286 if ((c & 0xf0) == 0xf0) {
1287 if (cur[3] == 0)
1288 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1289 if (((c & 0xf8) != 0xf0) ||
1290 ((cur[3] & 0xc0) != 0x80))
1291 goto encoding_error;
1292 /* 4-byte code */
1293 *len = 4;
1294 val = (cur[0] & 0x7) << 18;
1295 val |= (cur[1] & 0x3f) << 12;
1296 val |= (cur[2] & 0x3f) << 6;
1297 val |= cur[3] & 0x3f;
1298 } else {
1299 /* 3-byte code */
1300 *len = 3;
1301 val = (cur[0] & 0xf) << 12;
1302 val |= (cur[1] & 0x3f) << 6;
1303 val |= cur[2] & 0x3f;
1304 }
1305 } else {
1306 /* 2-byte code */
1307 *len = 2;
1308 val = (cur[0] & 0x1f) << 6;
1309 val |= cur[1] & 0x3f;
1310 }
1311 if (!IS_CHAR(val)) {
1312 if ((ctxt->sax != NULL) &&
1313 (ctxt->sax->error != NULL))
1314 ctxt->sax->error(ctxt->userData,
1315 "Char 0x%X out of allowed range\n", val);
1316 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1317 ctxt->wellFormed = 0;
1318 ctxt->disableSAX = 1;
1319 }
1320 return(val);
1321 } else {
1322 /* 1-byte code */
1323 *len = 1;
1324 if (*ctxt->input->cur == 0xD) {
1325 if (ctxt->input->cur[1] == 0xA) {
1326 ctxt->nbChars++;
1327 ctxt->input->cur++;
1328 }
1329 return(0xA);
1330 }
1331 return((int) *ctxt->input->cur);
1332 }
1333 }
1334 /*
1335 * Assume it's a fixed lenght encoding (1) with
1336 * a compatibke encoding for the ASCII set, since
1337 * XML constructs only use < 128 chars
1338 */
1339 *len = 1;
1340 if (*ctxt->input->cur == 0xD) {
1341 if (ctxt->input->cur[1] == 0xA) {
1342 ctxt->nbChars++;
1343 ctxt->input->cur++;
1344 }
1345 return(0xA);
1346 }
1347 return((int) *ctxt->input->cur);
1348encoding_error:
1349 /*
1350 * If we detect an UTF8 error that probably mean that the
1351 * input encoding didn't get properly advertized in the
1352 * declaration header. Report the error and switch the encoding
1353 * to ISO-Latin-1 (if you don't like this policy, just declare the
1354 * encoding !)
1355 */
1356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1357 ctxt->sax->error(ctxt->userData,
1358 "Input is not proper UTF-8, indicate encoding !\n");
1359 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1360 ctxt->input->cur[0], ctxt->input->cur[1],
1361 ctxt->input->cur[2], ctxt->input->cur[3]);
1362 }
1363 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1364
1365 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1366 *len = 1;
1367 return((int) *ctxt->input->cur);
1368}
1369
1370/**
1371 * xmlStringCurrentChar:
1372 * @ctxt: the XML parser context
1373 * @cur: pointer to the beginning of the char
1374 * @len: pointer to the length of the char read
1375 *
1376 * The current char value, if using UTF-8 this may actaully span multiple
1377 * bytes in the input buffer.
1378 *
1379 * Returns the current char value and its lenght
1380 */
1381
1382int
1383xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1384 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1385 /*
1386 * We are supposed to handle UTF8, check it's valid
1387 * From rfc2044: encoding of the Unicode values on UTF-8:
1388 *
1389 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1390 * 0000 0000-0000 007F 0xxxxxxx
1391 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1392 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1393 *
1394 * Check for the 0x110000 limit too
1395 */
1396 unsigned char c;
1397 unsigned int val;
1398
1399 c = *cur;
1400 if (c & 0x80) {
1401 if ((cur[1] & 0xc0) != 0x80)
1402 goto encoding_error;
1403 if ((c & 0xe0) == 0xe0) {
1404
1405 if ((cur[2] & 0xc0) != 0x80)
1406 goto encoding_error;
1407 if ((c & 0xf0) == 0xf0) {
1408 if (((c & 0xf8) != 0xf0) ||
1409 ((cur[3] & 0xc0) != 0x80))
1410 goto encoding_error;
1411 /* 4-byte code */
1412 *len = 4;
1413 val = (cur[0] & 0x7) << 18;
1414 val |= (cur[1] & 0x3f) << 12;
1415 val |= (cur[2] & 0x3f) << 6;
1416 val |= cur[3] & 0x3f;
1417 } else {
1418 /* 3-byte code */
1419 *len = 3;
1420 val = (cur[0] & 0xf) << 12;
1421 val |= (cur[1] & 0x3f) << 6;
1422 val |= cur[2] & 0x3f;
1423 }
1424 } else {
1425 /* 2-byte code */
1426 *len = 2;
1427 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001428 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001429 }
1430 if (!IS_CHAR(val)) {
1431 if ((ctxt->sax != NULL) &&
1432 (ctxt->sax->error != NULL))
1433 ctxt->sax->error(ctxt->userData,
1434 "Char 0x%X out of allowed range\n", val);
1435 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1436 ctxt->wellFormed = 0;
1437 ctxt->disableSAX = 1;
1438 }
1439 return(val);
1440 } else {
1441 /* 1-byte code */
1442 *len = 1;
1443 return((int) *cur);
1444 }
1445 }
1446 /*
1447 * Assume it's a fixed lenght encoding (1) with
1448 * a compatibke encoding for the ASCII set, since
1449 * XML constructs only use < 128 chars
1450 */
1451 *len = 1;
1452 return((int) *cur);
1453encoding_error:
1454 /*
1455 * If we detect an UTF8 error that probably mean that the
1456 * input encoding didn't get properly advertized in the
1457 * declaration header. Report the error and switch the encoding
1458 * to ISO-Latin-1 (if you don't like this policy, just declare the
1459 * encoding !)
1460 */
1461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1462 ctxt->sax->error(ctxt->userData,
1463 "Input is not proper UTF-8, indicate encoding !\n");
1464 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1465 ctxt->input->cur[0], ctxt->input->cur[1],
1466 ctxt->input->cur[2], ctxt->input->cur[3]);
1467 }
1468 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1469
1470 *len = 1;
1471 return((int) *cur);
1472}
1473
1474/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001475 * xmlCopyCharMultiByte:
1476 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001477 * @val: the char value
1478 *
1479 * append the char value in the array
1480 *
1481 * Returns the number of xmlChar written
1482 */
Owen Taylor3473f882001-02-23 17:55:21 +00001483int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001484xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001485 /*
1486 * We are supposed to handle UTF8, check it's valid
1487 * From rfc2044: encoding of the Unicode values on UTF-8:
1488 *
1489 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1490 * 0000 0000-0000 007F 0xxxxxxx
1491 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1492 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1493 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001494 if (val >= 0x80) {
1495 xmlChar *savedout = out;
1496 int bits;
1497 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1498 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1499 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1500 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001501 xmlGenericError(xmlGenericErrorContext,
1502 "Internal error, xmlCopyChar 0x%X out of bound\n",
1503 val);
1504 return(0);
1505 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001506 for ( ; bits >= 0; bits-= 6)
1507 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1508 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001509 }
1510 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001511 return 1;
1512}
1513
1514/**
1515 * xmlCopyChar:
1516 * @len: Ignored, compatibility
1517 * @out: pointer to an arry of xmlChar
1518 * @val: the char value
1519 *
1520 * append the char value in the array
1521 *
1522 * Returns the number of xmlChar written
1523 */
1524
1525int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001526xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001527 /* the len parameter is ignored */
1528 if (val >= 0x80) {
1529 return(xmlCopyCharMultiByte (out, val));
1530 }
1531 *out = (xmlChar) val;
1532 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001533}
1534
1535/************************************************************************
1536 * *
1537 * Commodity functions to switch encodings *
1538 * *
1539 ************************************************************************/
1540
1541/**
1542 * xmlSwitchEncoding:
1543 * @ctxt: the parser context
1544 * @enc: the encoding value (number)
1545 *
1546 * change the input functions when discovering the character encoding
1547 * of a given entity.
1548 *
1549 * Returns 0 in case of success, -1 otherwise
1550 */
1551int
1552xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1553{
1554 xmlCharEncodingHandlerPtr handler;
1555
1556 switch (enc) {
1557 case XML_CHAR_ENCODING_ERROR:
1558 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1561 ctxt->wellFormed = 0;
1562 ctxt->disableSAX = 1;
1563 break;
1564 case XML_CHAR_ENCODING_NONE:
1565 /* let's assume it's UTF-8 without the XML decl */
1566 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1567 return(0);
1568 case XML_CHAR_ENCODING_UTF8:
1569 /* default encoding, no conversion should be needed */
1570 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1571 return(0);
1572 default:
1573 break;
1574 }
1575 handler = xmlGetCharEncodingHandler(enc);
1576 if (handler == NULL) {
1577 /*
1578 * Default handlers.
1579 */
1580 switch (enc) {
1581 case XML_CHAR_ENCODING_ERROR:
1582 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1584 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1585 ctxt->wellFormed = 0;
1586 ctxt->disableSAX = 1;
1587 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1588 break;
1589 case XML_CHAR_ENCODING_NONE:
1590 /* let's assume it's UTF-8 without the XML decl */
1591 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1592 return(0);
1593 case XML_CHAR_ENCODING_UTF8:
1594 case XML_CHAR_ENCODING_ASCII:
1595 /* default encoding, no conversion should be needed */
1596 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1597 return(0);
1598 case XML_CHAR_ENCODING_UTF16LE:
1599 break;
1600 case XML_CHAR_ENCODING_UTF16BE:
1601 break;
1602 case XML_CHAR_ENCODING_UCS4LE:
1603 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "char encoding USC4 little endian not supported\n");
1607 break;
1608 case XML_CHAR_ENCODING_UCS4BE:
1609 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1611 ctxt->sax->error(ctxt->userData,
1612 "char encoding USC4 big endian not supported\n");
1613 break;
1614 case XML_CHAR_ENCODING_EBCDIC:
1615 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "char encoding EBCDIC not supported\n");
1619 break;
1620 case XML_CHAR_ENCODING_UCS4_2143:
1621 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1623 ctxt->sax->error(ctxt->userData,
1624 "char encoding UCS4 2143 not supported\n");
1625 break;
1626 case XML_CHAR_ENCODING_UCS4_3412:
1627 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1629 ctxt->sax->error(ctxt->userData,
1630 "char encoding UCS4 3412 not supported\n");
1631 break;
1632 case XML_CHAR_ENCODING_UCS2:
1633 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1635 ctxt->sax->error(ctxt->userData,
1636 "char encoding UCS2 not supported\n");
1637 break;
1638 case XML_CHAR_ENCODING_8859_1:
1639 case XML_CHAR_ENCODING_8859_2:
1640 case XML_CHAR_ENCODING_8859_3:
1641 case XML_CHAR_ENCODING_8859_4:
1642 case XML_CHAR_ENCODING_8859_5:
1643 case XML_CHAR_ENCODING_8859_6:
1644 case XML_CHAR_ENCODING_8859_7:
1645 case XML_CHAR_ENCODING_8859_8:
1646 case XML_CHAR_ENCODING_8859_9:
1647 /*
1648 * We used to keep the internal content in the
1649 * document encoding however this turns being unmaintainable
1650 * So xmlGetCharEncodingHandler() will return non-null
1651 * values for this now.
1652 */
1653 if ((ctxt->inputNr == 1) &&
1654 (ctxt->encoding == NULL) &&
1655 (ctxt->input->encoding != NULL)) {
1656 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1657 }
1658 ctxt->charset = enc;
1659 return(0);
1660 case XML_CHAR_ENCODING_2022_JP:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding ISO-2022-JPnot supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_SHIFT_JIS:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding Shift_JIS not supported\n");
1671 break;
1672 case XML_CHAR_ENCODING_EUC_JP:
1673 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt->userData,
1676 "char encoding EUC-JPnot supported\n");
1677 break;
1678 }
1679 }
1680 if (handler == NULL)
1681 return(-1);
1682 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1683 return(xmlSwitchToEncoding(ctxt, handler));
1684}
1685
1686/**
1687 * xmlSwitchToEncoding:
1688 * @ctxt: the parser context
1689 * @handler: the encoding handler
1690 *
1691 * change the input functions when discovering the character encoding
1692 * of a given entity.
1693 *
1694 * Returns 0 in case of success, -1 otherwise
1695 */
1696int
1697xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1698{
1699 int nbchars;
1700
1701 if (handler != NULL) {
1702 if (ctxt->input != NULL) {
1703 if (ctxt->input->buf != NULL) {
1704 if (ctxt->input->buf->encoder != NULL) {
1705 if (ctxt->input->buf->encoder == handler)
1706 return(0);
1707 /*
1708 * Note: this is a bit dangerous, but that's what it
1709 * takes to use nearly compatible signature for different
1710 * encodings.
1711 */
1712 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1713 ctxt->input->buf->encoder = handler;
1714 return(0);
1715 }
1716 ctxt->input->buf->encoder = handler;
1717
1718 /*
1719 * Is there already some content down the pipe to convert ?
1720 */
1721 if ((ctxt->input->buf->buffer != NULL) &&
1722 (ctxt->input->buf->buffer->use > 0)) {
1723 int processed;
1724
1725 /*
1726 * Specific handling of the Byte Order Mark for
1727 * UTF-16
1728 */
1729 if ((handler->name != NULL) &&
1730 (!strcmp(handler->name, "UTF-16LE")) &&
1731 (ctxt->input->cur[0] == 0xFF) &&
1732 (ctxt->input->cur[1] == 0xFE)) {
1733 ctxt->input->cur += 2;
1734 }
1735 if ((handler->name != NULL) &&
1736 (!strcmp(handler->name, "UTF-16BE")) &&
1737 (ctxt->input->cur[0] == 0xFE) &&
1738 (ctxt->input->cur[1] == 0xFF)) {
1739 ctxt->input->cur += 2;
1740 }
1741
1742 /*
1743 * Shring the current input buffer.
1744 * Move it as the raw buffer and create a new input buffer
1745 */
1746 processed = ctxt->input->cur - ctxt->input->base;
1747 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1748 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1749 ctxt->input->buf->buffer = xmlBufferCreate();
1750
1751 if (ctxt->html) {
1752 /*
1753 * converst as much as possbile of the buffer
1754 */
1755 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1756 ctxt->input->buf->buffer,
1757 ctxt->input->buf->raw);
1758 } else {
1759 /*
1760 * convert just enough to get
1761 * '<?xml version="1.0" encoding="xxx"?>'
1762 * parsed with the autodetected encoding
1763 * into the parser reading buffer.
1764 */
1765 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1766 ctxt->input->buf->buffer,
1767 ctxt->input->buf->raw);
1768 }
1769 if (nbchars < 0) {
1770 xmlGenericError(xmlGenericErrorContext,
1771 "xmlSwitchToEncoding: encoder error\n");
1772 return(-1);
1773 }
1774 ctxt->input->base =
1775 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001776 ctxt->input->end =
1777 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001778
1779 }
1780 return(0);
1781 } else {
1782 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1783 /*
1784 * When parsing a static memory array one must know the
1785 * size to be able to convert the buffer.
1786 */
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
1789 "xmlSwitchEncoding : no input\n");
1790 return(-1);
1791 } else {
1792 int processed;
1793
1794 /*
1795 * Shring the current input buffer.
1796 * Move it as the raw buffer and create a new input buffer
1797 */
1798 processed = ctxt->input->cur - ctxt->input->base;
1799
1800 ctxt->input->buf->raw = xmlBufferCreate();
1801 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1802 ctxt->input->length - processed);
1803 ctxt->input->buf->buffer = xmlBufferCreate();
1804
1805 /*
1806 * convert as much as possible of the raw input
1807 * to the parser reading buffer.
1808 */
1809 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1810 ctxt->input->buf->buffer,
1811 ctxt->input->buf->raw);
1812 if (nbchars < 0) {
1813 xmlGenericError(xmlGenericErrorContext,
1814 "xmlSwitchToEncoding: encoder error\n");
1815 return(-1);
1816 }
1817
1818 /*
1819 * Conversion succeeded, get rid of the old buffer
1820 */
1821 if ((ctxt->input->free != NULL) &&
1822 (ctxt->input->base != NULL))
1823 ctxt->input->free((xmlChar *) ctxt->input->base);
1824 ctxt->input->base =
1825 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001826 ctxt->input->end =
1827 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001828 }
1829 }
1830 } else {
1831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1832 ctxt->sax->error(ctxt->userData,
1833 "xmlSwitchEncoding : no input\n");
1834 return(-1);
1835 }
1836 /*
1837 * The parsing is now done in UTF8 natively
1838 */
1839 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1840 } else
1841 return(-1);
1842 return(0);
1843
1844}
1845
1846/************************************************************************
1847 * *
1848 * Commodity functions to handle entities processing *
1849 * *
1850 ************************************************************************/
1851
1852/**
1853 * xmlFreeInputStream:
1854 * @input: an xmlParserInputPtr
1855 *
1856 * Free up an input stream.
1857 */
1858void
1859xmlFreeInputStream(xmlParserInputPtr input) {
1860 if (input == NULL) return;
1861
1862 if (input->filename != NULL) xmlFree((char *) input->filename);
1863 if (input->directory != NULL) xmlFree((char *) input->directory);
1864 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1865 if (input->version != NULL) xmlFree((char *) input->version);
1866 if ((input->free != NULL) && (input->base != NULL))
1867 input->free((xmlChar *) input->base);
1868 if (input->buf != NULL)
1869 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001870 xmlFree(input);
1871}
1872
1873/**
1874 * xmlNewInputStream:
1875 * @ctxt: an XML parser context
1876 *
1877 * Create a new input stream structure
1878 * Returns the new input stream or NULL
1879 */
1880xmlParserInputPtr
1881xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1882 xmlParserInputPtr input;
1883
1884 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1885 if (input == NULL) {
1886 if (ctxt != NULL) {
1887 ctxt->errNo = XML_ERR_NO_MEMORY;
1888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1889 ctxt->sax->error(ctxt->userData,
1890 "malloc: couldn't allocate a new input stream\n");
1891 ctxt->errNo = XML_ERR_NO_MEMORY;
1892 }
1893 return(NULL);
1894 }
1895 memset(input, 0, sizeof(xmlParserInput));
1896 input->line = 1;
1897 input->col = 1;
1898 input->standalone = -1;
1899 return(input);
1900}
1901
1902/**
1903 * xmlNewIOInputStream:
1904 * @ctxt: an XML parser context
1905 * @input: an I/O Input
1906 * @enc: the charset encoding if known
1907 *
1908 * Create a new input stream structure encapsulating the @input into
1909 * a stream suitable for the parser.
1910 *
1911 * Returns the new input stream or NULL
1912 */
1913xmlParserInputPtr
1914xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1915 xmlCharEncoding enc) {
1916 xmlParserInputPtr inputStream;
1917
1918 if (xmlParserDebugEntities)
1919 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1920 inputStream = xmlNewInputStream(ctxt);
1921 if (inputStream == NULL) {
1922 return(NULL);
1923 }
1924 inputStream->filename = NULL;
1925 inputStream->buf = input;
1926 inputStream->base = inputStream->buf->buffer->content;
1927 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001928 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001929 if (enc != XML_CHAR_ENCODING_NONE) {
1930 xmlSwitchEncoding(ctxt, enc);
1931 }
1932
1933 return(inputStream);
1934}
1935
1936/**
1937 * xmlNewEntityInputStream:
1938 * @ctxt: an XML parser context
1939 * @entity: an Entity pointer
1940 *
1941 * Create a new input stream based on an xmlEntityPtr
1942 *
1943 * Returns the new input stream or NULL
1944 */
1945xmlParserInputPtr
1946xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1947 xmlParserInputPtr input;
1948
1949 if (entity == NULL) {
1950 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1952 ctxt->sax->error(ctxt->userData,
1953 "internal: xmlNewEntityInputStream entity = NULL\n");
1954 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1955 return(NULL);
1956 }
1957 if (xmlParserDebugEntities)
1958 xmlGenericError(xmlGenericErrorContext,
1959 "new input from entity: %s\n", entity->name);
1960 if (entity->content == NULL) {
1961 switch (entity->etype) {
1962 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1963 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1965 ctxt->sax->error(ctxt->userData,
1966 "xmlNewEntityInputStream unparsed entity !\n");
1967 break;
1968 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1969 case XML_EXTERNAL_PARAMETER_ENTITY:
1970 return(xmlLoadExternalEntity((char *) entity->URI,
1971 (char *) entity->ExternalID, ctxt));
1972 case XML_INTERNAL_GENERAL_ENTITY:
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData,
1975 "Internal entity %s without content !\n", entity->name);
1976 break;
1977 case XML_INTERNAL_PARAMETER_ENTITY:
1978 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1980 ctxt->sax->error(ctxt->userData,
1981 "Internal parameter entity %s without content !\n", entity->name);
1982 break;
1983 case XML_INTERNAL_PREDEFINED_ENTITY:
1984 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1986 ctxt->sax->error(ctxt->userData,
1987 "Predefined entity %s without content !\n", entity->name);
1988 break;
1989 }
1990 return(NULL);
1991 }
1992 input = xmlNewInputStream(ctxt);
1993 if (input == NULL) {
1994 return(NULL);
1995 }
1996 input->filename = (char *) entity->URI;
1997 input->base = entity->content;
1998 input->cur = entity->content;
1999 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002000 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002001 return(input);
2002}
2003
2004/**
2005 * xmlNewStringInputStream:
2006 * @ctxt: an XML parser context
2007 * @buffer: an memory buffer
2008 *
2009 * Create a new input stream based on a memory buffer.
2010 * Returns the new input stream
2011 */
2012xmlParserInputPtr
2013xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2014 xmlParserInputPtr input;
2015
2016 if (buffer == NULL) {
2017 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2019 ctxt->sax->error(ctxt->userData,
2020 "internal: xmlNewStringInputStream string = NULL\n");
2021 return(NULL);
2022 }
2023 if (xmlParserDebugEntities)
2024 xmlGenericError(xmlGenericErrorContext,
2025 "new fixed input: %.30s\n", buffer);
2026 input = xmlNewInputStream(ctxt);
2027 if (input == NULL) {
2028 return(NULL);
2029 }
2030 input->base = buffer;
2031 input->cur = buffer;
2032 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002033 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002034 return(input);
2035}
2036
2037/**
2038 * xmlNewInputFromFile:
2039 * @ctxt: an XML parser context
2040 * @filename: the filename to use as entity
2041 *
2042 * Create a new input stream based on a file.
2043 *
2044 * Returns the new input stream or NULL in case of error
2045 */
2046xmlParserInputPtr
2047xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2048 xmlParserInputBufferPtr buf;
2049 xmlParserInputPtr inputStream;
2050 char *directory = NULL;
2051 xmlChar *URI = NULL;
2052
2053 if (xmlParserDebugEntities)
2054 xmlGenericError(xmlGenericErrorContext,
2055 "new input from file: %s\n", filename);
2056 if (ctxt == NULL) return(NULL);
2057 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2058 if (buf == NULL)
2059 return(NULL);
2060
2061 URI = xmlStrdup((xmlChar *) filename);
2062 directory = xmlParserGetDirectory((const char *) URI);
2063
2064 inputStream = xmlNewInputStream(ctxt);
2065 if (inputStream == NULL) {
2066 if (directory != NULL) xmlFree((char *) directory);
2067 if (URI != NULL) xmlFree((char *) URI);
2068 return(NULL);
2069 }
2070
2071 inputStream->filename = (const char *) URI;
2072 inputStream->directory = directory;
2073 inputStream->buf = buf;
2074
2075 inputStream->base = inputStream->buf->buffer->content;
2076 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002077 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002078 if ((ctxt->directory == NULL) && (directory != NULL))
2079 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2080 return(inputStream);
2081}
2082
2083/************************************************************************
2084 * *
2085 * Commodity functions to handle parser contexts *
2086 * *
2087 ************************************************************************/
2088
2089/**
2090 * xmlInitParserCtxt:
2091 * @ctxt: an XML parser context
2092 *
2093 * Initialize a parser context
2094 */
2095
2096void
2097xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2098{
2099 xmlSAXHandler *sax;
2100
2101 xmlDefaultSAXHandlerInit();
2102
2103 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2104 if (sax == NULL) {
2105 xmlGenericError(xmlGenericErrorContext,
2106 "xmlInitParserCtxt: out of memory\n");
2107 }
2108 else
2109 memset(sax, 0, sizeof(xmlSAXHandler));
2110
2111 /* Allocate the Input stack */
2112 ctxt->inputTab = (xmlParserInputPtr *)
2113 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2114 if (ctxt->inputTab == NULL) {
2115 xmlGenericError(xmlGenericErrorContext,
2116 "xmlInitParserCtxt: out of memory\n");
2117 ctxt->inputNr = 0;
2118 ctxt->inputMax = 0;
2119 ctxt->input = NULL;
2120 return;
2121 }
2122 ctxt->inputNr = 0;
2123 ctxt->inputMax = 5;
2124 ctxt->input = NULL;
2125
2126 ctxt->version = NULL;
2127 ctxt->encoding = NULL;
2128 ctxt->standalone = -1;
2129 ctxt->hasExternalSubset = 0;
2130 ctxt->hasPErefs = 0;
2131 ctxt->html = 0;
2132 ctxt->external = 0;
2133 ctxt->instate = XML_PARSER_START;
2134 ctxt->token = 0;
2135 ctxt->directory = NULL;
2136
2137 /* Allocate the Node stack */
2138 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2139 if (ctxt->nodeTab == NULL) {
2140 xmlGenericError(xmlGenericErrorContext,
2141 "xmlInitParserCtxt: out of memory\n");
2142 ctxt->nodeNr = 0;
2143 ctxt->nodeMax = 0;
2144 ctxt->node = NULL;
2145 ctxt->inputNr = 0;
2146 ctxt->inputMax = 0;
2147 ctxt->input = NULL;
2148 return;
2149 }
2150 ctxt->nodeNr = 0;
2151 ctxt->nodeMax = 10;
2152 ctxt->node = NULL;
2153
2154 /* Allocate the Name stack */
2155 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2156 if (ctxt->nameTab == NULL) {
2157 xmlGenericError(xmlGenericErrorContext,
2158 "xmlInitParserCtxt: out of memory\n");
2159 ctxt->nodeNr = 0;
2160 ctxt->nodeMax = 0;
2161 ctxt->node = NULL;
2162 ctxt->inputNr = 0;
2163 ctxt->inputMax = 0;
2164 ctxt->input = NULL;
2165 ctxt->nameNr = 0;
2166 ctxt->nameMax = 0;
2167 ctxt->name = NULL;
2168 return;
2169 }
2170 ctxt->nameNr = 0;
2171 ctxt->nameMax = 10;
2172 ctxt->name = NULL;
2173
2174 /* Allocate the space stack */
2175 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2176 if (ctxt->spaceTab == NULL) {
2177 xmlGenericError(xmlGenericErrorContext,
2178 "xmlInitParserCtxt: out of memory\n");
2179 ctxt->nodeNr = 0;
2180 ctxt->nodeMax = 0;
2181 ctxt->node = NULL;
2182 ctxt->inputNr = 0;
2183 ctxt->inputMax = 0;
2184 ctxt->input = NULL;
2185 ctxt->nameNr = 0;
2186 ctxt->nameMax = 0;
2187 ctxt->name = NULL;
2188 ctxt->spaceNr = 0;
2189 ctxt->spaceMax = 0;
2190 ctxt->space = NULL;
2191 return;
2192 }
2193 ctxt->spaceNr = 1;
2194 ctxt->spaceMax = 10;
2195 ctxt->spaceTab[0] = -1;
2196 ctxt->space = &ctxt->spaceTab[0];
2197
Daniel Veillard14be0a12001-03-03 18:50:55 +00002198 ctxt->sax = sax;
2199 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2200
Owen Taylor3473f882001-02-23 17:55:21 +00002201 ctxt->userData = ctxt;
2202 ctxt->myDoc = NULL;
2203 ctxt->wellFormed = 1;
2204 ctxt->valid = 1;
2205 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2206 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2207 ctxt->pedantic = xmlPedanticParserDefaultValue;
2208 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2209 ctxt->vctxt.userData = ctxt;
2210 if (ctxt->validate) {
2211 ctxt->vctxt.error = xmlParserValidityError;
2212 if (xmlGetWarningsDefaultValue == 0)
2213 ctxt->vctxt.warning = NULL;
2214 else
2215 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002216 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 } else {
2218 ctxt->vctxt.error = NULL;
2219 ctxt->vctxt.warning = NULL;
2220 }
2221 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2222 ctxt->record_info = 0;
2223 ctxt->nbChars = 0;
2224 ctxt->checkIndex = 0;
2225 ctxt->inSubset = 0;
2226 ctxt->errNo = XML_ERR_OK;
2227 ctxt->depth = 0;
2228 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2229 xmlInitNodeInfoSeq(&ctxt->node_seq);
2230}
2231
2232/**
2233 * xmlFreeParserCtxt:
2234 * @ctxt: an XML parser context
2235 *
2236 * Free all the memory used by a parser context. However the parsed
2237 * document in ctxt->myDoc is not freed.
2238 */
2239
2240void
2241xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2242{
2243 xmlParserInputPtr input;
2244 xmlChar *oldname;
2245
2246 if (ctxt == NULL) return;
2247
2248 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2249 xmlFreeInputStream(input);
2250 }
2251 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2252 xmlFree(oldname);
2253 }
2254 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2255 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2256 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2257 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2258 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2259 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2260 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2261 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2262 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002263 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2264 xmlFree(ctxt->sax);
2265 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2266 xmlFree(ctxt);
2267}
2268
2269/**
2270 * xmlNewParserCtxt:
2271 *
2272 * Allocate and initialize a new parser context.
2273 *
2274 * Returns the xmlParserCtxtPtr or NULL
2275 */
2276
2277xmlParserCtxtPtr
2278xmlNewParserCtxt()
2279{
2280 xmlParserCtxtPtr ctxt;
2281
2282 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2283 if (ctxt == NULL) {
2284 xmlGenericError(xmlGenericErrorContext,
2285 "xmlNewParserCtxt : cannot allocate context\n");
2286 perror("malloc");
2287 return(NULL);
2288 }
2289 memset(ctxt, 0, sizeof(xmlParserCtxt));
2290 xmlInitParserCtxt(ctxt);
2291 return(ctxt);
2292}
2293
2294/************************************************************************
2295 * *
2296 * Handling of node informations *
2297 * *
2298 ************************************************************************/
2299
2300/**
2301 * xmlClearParserCtxt:
2302 * @ctxt: an XML parser context
2303 *
2304 * Clear (release owned resources) and reinitialize a parser context
2305 */
2306
2307void
2308xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2309{
2310 xmlClearNodeInfoSeq(&ctxt->node_seq);
2311 xmlInitParserCtxt(ctxt);
2312}
2313
2314/**
2315 * xmlParserFindNodeInfo:
2316 * @ctxt: an XML parser context
2317 * @node: an XML node within the tree
2318 *
2319 * Find the parser node info struct for a given node
2320 *
2321 * Returns an xmlParserNodeInfo block pointer or NULL
2322 */
2323const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2324 const xmlNode* node)
2325{
2326 unsigned long pos;
2327
2328 /* Find position where node should be at */
2329 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2330 if ( ctx->node_seq.buffer[pos].node == node )
2331 return &ctx->node_seq.buffer[pos];
2332 else
2333 return NULL;
2334}
2335
2336
2337/**
2338 * xmlInitNodeInfoSeq:
2339 * @seq: a node info sequence pointer
2340 *
2341 * -- Initialize (set to initial state) node info sequence
2342 */
2343void
2344xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2345{
2346 seq->length = 0;
2347 seq->maximum = 0;
2348 seq->buffer = NULL;
2349}
2350
2351/**
2352 * xmlClearNodeInfoSeq:
2353 * @seq: a node info sequence pointer
2354 *
2355 * -- Clear (release memory and reinitialize) node
2356 * info sequence
2357 */
2358void
2359xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2360{
2361 if ( seq->buffer != NULL )
2362 xmlFree(seq->buffer);
2363 xmlInitNodeInfoSeq(seq);
2364}
2365
2366
2367/**
2368 * xmlParserFindNodeInfoIndex:
2369 * @seq: a node info sequence pointer
2370 * @node: an XML node pointer
2371 *
2372 *
2373 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2374 * the given node is or should be at in a sorted sequence
2375 *
2376 * Returns a long indicating the position of the record
2377 */
2378unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2379 const xmlNode* node)
2380{
2381 unsigned long upper, lower, middle;
2382 int found = 0;
2383
2384 /* Do a binary search for the key */
2385 lower = 1;
2386 upper = seq->length;
2387 middle = 0;
2388 while ( lower <= upper && !found) {
2389 middle = lower + (upper - lower) / 2;
2390 if ( node == seq->buffer[middle - 1].node )
2391 found = 1;
2392 else if ( node < seq->buffer[middle - 1].node )
2393 upper = middle - 1;
2394 else
2395 lower = middle + 1;
2396 }
2397
2398 /* Return position */
2399 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2400 return middle;
2401 else
2402 return middle - 1;
2403}
2404
2405
2406/**
2407 * xmlParserAddNodeInfo:
2408 * @ctxt: an XML parser context
2409 * @info: a node info sequence pointer
2410 *
2411 * Insert node info record into the sorted sequence
2412 */
2413void
2414xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2415 const xmlParserNodeInfo* info)
2416{
2417 unsigned long pos;
2418 static unsigned int block_size = 5;
2419
2420 /* Find pos and check to see if node is already in the sequence */
2421 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2422 if ( pos < ctxt->node_seq.length
2423 && ctxt->node_seq.buffer[pos].node == info->node ) {
2424 ctxt->node_seq.buffer[pos] = *info;
2425 }
2426
2427 /* Otherwise, we need to add new node to buffer */
2428 else {
2429 /* Expand buffer by 5 if needed */
2430 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2431 xmlParserNodeInfo* tmp_buffer;
2432 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2433 *(ctxt->node_seq.maximum + block_size));
2434
2435 if ( ctxt->node_seq.buffer == NULL )
2436 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2437 else
2438 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2439
2440 if ( tmp_buffer == NULL ) {
2441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2442 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2443 ctxt->errNo = XML_ERR_NO_MEMORY;
2444 return;
2445 }
2446 ctxt->node_seq.buffer = tmp_buffer;
2447 ctxt->node_seq.maximum += block_size;
2448 }
2449
2450 /* If position is not at end, move elements out of the way */
2451 if ( pos != ctxt->node_seq.length ) {
2452 unsigned long i;
2453
2454 for ( i = ctxt->node_seq.length; i > pos; i-- )
2455 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2456 }
2457
2458 /* Copy element and increase length */
2459 ctxt->node_seq.buffer[pos] = *info;
2460 ctxt->node_seq.length++;
2461 }
2462}
2463
2464/************************************************************************
2465 * *
2466 * Deprecated functions kept for compatibility *
2467 * *
2468 ************************************************************************/
2469
2470/*
2471 * xmlCheckLanguageID
2472 * @lang: pointer to the string value
2473 *
2474 * Checks that the value conforms to the LanguageID production:
2475 *
2476 * NOTE: this is somewhat deprecated, those productions were removed from
2477 * the XML Second edition.
2478 *
2479 * [33] LanguageID ::= Langcode ('-' Subcode)*
2480 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2481 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2482 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2483 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2484 * [38] Subcode ::= ([a-z] | [A-Z])+
2485 *
2486 * Returns 1 if correct 0 otherwise
2487 **/
2488int
2489xmlCheckLanguageID(const xmlChar *lang) {
2490 const xmlChar *cur = lang;
2491
2492 if (cur == NULL)
2493 return(0);
2494 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2495 ((cur[0] == 'I') && (cur[1] == '-'))) {
2496 /*
2497 * IANA code
2498 */
2499 cur += 2;
2500 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2501 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2502 cur++;
2503 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2504 ((cur[0] == 'X') && (cur[1] == '-'))) {
2505 /*
2506 * User code
2507 */
2508 cur += 2;
2509 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2510 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2511 cur++;
2512 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2513 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2514 /*
2515 * ISO639
2516 */
2517 cur++;
2518 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2519 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2520 cur++;
2521 else
2522 return(0);
2523 } else
2524 return(0);
2525 while (cur[0] != 0) { /* non input consuming */
2526 if (cur[0] != '-')
2527 return(0);
2528 cur++;
2529 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2530 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2531 cur++;
2532 else
2533 return(0);
2534 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2535 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2536 cur++;
2537 }
2538 return(1);
2539}
2540
2541/**
2542 * xmlDecodeEntities:
2543 * @ctxt: the parser context
2544 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2545 * @len: the len to decode (in bytes !), -1 for no size limit
2546 * @end: an end marker xmlChar, 0 if none
2547 * @end2: an end marker xmlChar, 0 if none
2548 * @end3: an end marker xmlChar, 0 if none
2549 *
2550 * This function is deprecated, we now always process entities content
2551 * through xmlStringDecodeEntities
2552 *
2553 * TODO: remove it in next major release.
2554 *
2555 * [67] Reference ::= EntityRef | CharRef
2556 *
2557 * [69] PEReference ::= '%' Name ';'
2558 *
2559 * Returns A newly allocated string with the substitution done. The caller
2560 * must deallocate it !
2561 */
2562xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002563xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2564 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002565#if 0
2566 xmlChar *buffer = NULL;
2567 unsigned int buffer_size = 0;
2568 unsigned int nbchars = 0;
2569
2570 xmlChar *current = NULL;
2571 xmlEntityPtr ent;
2572 unsigned int max = (unsigned int) len;
2573 int c,l;
2574#endif
2575
2576 static int deprecated = 0;
2577 if (!deprecated) {
2578 xmlGenericError(xmlGenericErrorContext,
2579 "xmlDecodeEntities() deprecated function reached\n");
2580 deprecated = 1;
2581 }
2582
2583#if 0
2584 if (ctxt->depth > 40) {
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Detected entity reference loop\n");
2588 ctxt->wellFormed = 0;
2589 ctxt->disableSAX = 1;
2590 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2591 return(NULL);
2592 }
2593
2594 /*
2595 * allocate a translation buffer.
2596 */
2597 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2598 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2599 if (buffer == NULL) {
2600 perror("xmlDecodeEntities: malloc failed");
2601 return(NULL);
2602 }
2603
2604 /*
2605 * Ok loop until we reach one of the ending char or a size limit.
2606 */
2607 GROW;
2608 c = CUR_CHAR(l);
2609 while ((nbchars < max) && (c != end) && /* NOTUSED */
2610 (c != end2) && (c != end3)) {
2611 GROW;
2612 if (c == 0) break;
2613 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2614 int val = xmlParseCharRef(ctxt);
2615 COPY_BUF(0,buffer,nbchars,val);
2616 NEXTL(l);
2617 } else if ((c == '&') && (ctxt->token != '&') &&
2618 (what & XML_SUBSTITUTE_REF)) {
2619 if (xmlParserDebugEntities)
2620 xmlGenericError(xmlGenericErrorContext,
2621 "decoding Entity Reference\n");
2622 ent = xmlParseEntityRef(ctxt);
2623 if ((ent != NULL) &&
2624 (ctxt->replaceEntities != 0)) {
2625 current = ent->content;
2626 while (*current != 0) { /* non input consuming loop */
2627 buffer[nbchars++] = *current++;
2628 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2629 growBuffer(buffer);
2630 }
2631 }
2632 } else if (ent != NULL) {
2633 const xmlChar *cur = ent->name;
2634
2635 buffer[nbchars++] = '&';
2636 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2637 growBuffer(buffer);
2638 }
2639 while (*cur != 0) { /* non input consuming loop */
2640 buffer[nbchars++] = *cur++;
2641 }
2642 buffer[nbchars++] = ';';
2643 }
2644 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2645 /*
2646 * a PEReference induce to switch the entity flow,
2647 * we break here to flush the current set of chars
2648 * parsed if any. We will be called back later.
2649 */
2650 if (xmlParserDebugEntities)
2651 xmlGenericError(xmlGenericErrorContext,
2652 "decoding PE Reference\n");
2653 if (nbchars != 0) break;
2654
2655 xmlParsePEReference(ctxt);
2656
2657 /*
2658 * Pop-up of finished entities.
2659 */
2660 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2661 xmlPopInput(ctxt);
2662
2663 break;
2664 } else {
2665 COPY_BUF(l,buffer,nbchars,c);
2666 NEXTL(l);
2667 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2668 growBuffer(buffer);
2669 }
2670 }
2671 c = CUR_CHAR(l);
2672 }
2673 buffer[nbchars++] = 0;
2674 return(buffer);
2675#endif
2676 return(NULL);
2677}
2678
2679/**
2680 * xmlNamespaceParseNCName:
2681 * @ctxt: an XML parser context
2682 *
2683 * parse an XML namespace name.
2684 *
2685 * TODO: this seems not in use anymore, the namespace handling is done on
2686 * top of the SAX interfaces, i.e. not on raw input.
2687 *
2688 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2689 *
2690 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2691 * CombiningChar | Extender
2692 *
2693 * Returns the namespace name or NULL
2694 */
2695
2696xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002697xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002698#if 0
2699 xmlChar buf[XML_MAX_NAMELEN + 5];
2700 int len = 0, l;
2701 int cur = CUR_CHAR(l);
2702#endif
2703
2704 static int deprecated = 0;
2705 if (!deprecated) {
2706 xmlGenericError(xmlGenericErrorContext,
2707 "xmlNamespaceParseNCName() deprecated function reached\n");
2708 deprecated = 1;
2709 }
2710
2711#if 0
2712 /* load first the value of the char !!! */
2713 GROW;
2714 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2715
2716xmlGenericError(xmlGenericErrorContext,
2717 "xmlNamespaceParseNCName: reached loop 3\n");
2718 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2719 (cur == '.') || (cur == '-') ||
2720 (cur == '_') ||
2721 (IS_COMBINING(cur)) ||
2722 (IS_EXTENDER(cur))) {
2723 COPY_BUF(l,buf,len,cur);
2724 NEXTL(l);
2725 cur = CUR_CHAR(l);
2726 if (len >= XML_MAX_NAMELEN) {
2727 xmlGenericError(xmlGenericErrorContext,
2728 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2729 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2730 (cur == '.') || (cur == '-') ||
2731 (cur == '_') ||
2732 (IS_COMBINING(cur)) ||
2733 (IS_EXTENDER(cur))) {
2734 NEXTL(l);
2735 cur = CUR_CHAR(l);
2736 }
2737 break;
2738 }
2739 }
2740 return(xmlStrndup(buf, len));
2741#endif
2742 return(NULL);
2743}
2744
2745/**
2746 * xmlNamespaceParseQName:
2747 * @ctxt: an XML parser context
2748 * @prefix: a xmlChar **
2749 *
2750 * TODO: this seems not in use anymore, the namespace handling is done on
2751 * top of the SAX interfaces, i.e. not on raw input.
2752 *
2753 * parse an XML qualified name
2754 *
2755 * [NS 5] QName ::= (Prefix ':')? LocalPart
2756 *
2757 * [NS 6] Prefix ::= NCName
2758 *
2759 * [NS 7] LocalPart ::= NCName
2760 *
2761 * Returns the local part, and prefix is updated
2762 * to get the Prefix if any.
2763 */
2764
2765xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002766xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002767
2768 static int deprecated = 0;
2769 if (!deprecated) {
2770 xmlGenericError(xmlGenericErrorContext,
2771 "xmlNamespaceParseQName() deprecated function reached\n");
2772 deprecated = 1;
2773 }
2774
2775#if 0
2776 xmlChar *ret = NULL;
2777
2778 *prefix = NULL;
2779 ret = xmlNamespaceParseNCName(ctxt);
2780 if (RAW == ':') {
2781 *prefix = ret;
2782 NEXT;
2783 ret = xmlNamespaceParseNCName(ctxt);
2784 }
2785
2786 return(ret);
2787#endif
2788 return(NULL);
2789}
2790
2791/**
2792 * xmlNamespaceParseNSDef:
2793 * @ctxt: an XML parser context
2794 *
2795 * parse a namespace prefix declaration
2796 *
2797 * TODO: this seems not in use anymore, the namespace handling is done on
2798 * top of the SAX interfaces, i.e. not on raw input.
2799 *
2800 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2801 *
2802 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2803 *
2804 * Returns the namespace name
2805 */
2806
2807xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002808xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002809 static int deprecated = 0;
2810 if (!deprecated) {
2811 xmlGenericError(xmlGenericErrorContext,
2812 "xmlNamespaceParseNSDef() deprecated function reached\n");
2813 deprecated = 1;
2814 }
2815 return(NULL);
2816#if 0
2817 xmlChar *name = NULL;
2818
2819 if ((RAW == 'x') && (NXT(1) == 'm') &&
2820 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2821 (NXT(4) == 's')) {
2822 SKIP(5);
2823 if (RAW == ':') {
2824 NEXT;
2825 name = xmlNamespaceParseNCName(ctxt);
2826 }
2827 }
2828 return(name);
2829#endif
2830}
2831
2832/**
2833 * xmlParseQuotedString:
2834 * @ctxt: an XML parser context
2835 *
2836 * Parse and return a string between quotes or doublequotes
2837 *
2838 * TODO: Deprecated, to be removed at next drop of binary compatibility
2839 *
2840 * Returns the string parser or NULL.
2841 */
2842xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002843xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002844 static int deprecated = 0;
2845 if (!deprecated) {
2846 xmlGenericError(xmlGenericErrorContext,
2847 "xmlParseQuotedString() deprecated function reached\n");
2848 deprecated = 1;
2849 }
2850 return(NULL);
2851
2852#if 0
2853 xmlChar *buf = NULL;
2854 int len = 0,l;
2855 int size = XML_PARSER_BUFFER_SIZE;
2856 int c;
2857
2858 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2859 if (buf == NULL) {
2860 xmlGenericError(xmlGenericErrorContext,
2861 "malloc of %d byte failed\n", size);
2862 return(NULL);
2863 }
2864xmlGenericError(xmlGenericErrorContext,
2865 "xmlParseQuotedString: reached loop 4\n");
2866 if (RAW == '"') {
2867 NEXT;
2868 c = CUR_CHAR(l);
2869 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2870 if (len + 5 >= size) {
2871 size *= 2;
2872 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2873 if (buf == NULL) {
2874 xmlGenericError(xmlGenericErrorContext,
2875 "realloc of %d byte failed\n", size);
2876 return(NULL);
2877 }
2878 }
2879 COPY_BUF(l,buf,len,c);
2880 NEXTL(l);
2881 c = CUR_CHAR(l);
2882 }
2883 if (c != '"') {
2884 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2886 ctxt->sax->error(ctxt->userData,
2887 "String not closed \"%.50s\"\n", buf);
2888 ctxt->wellFormed = 0;
2889 ctxt->disableSAX = 1;
2890 } else {
2891 NEXT;
2892 }
2893 } else if (RAW == '\''){
2894 NEXT;
2895 c = CUR;
2896 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2897 if (len + 1 >= size) {
2898 size *= 2;
2899 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2900 if (buf == NULL) {
2901 xmlGenericError(xmlGenericErrorContext,
2902 "realloc of %d byte failed\n", size);
2903 return(NULL);
2904 }
2905 }
2906 buf[len++] = c;
2907 NEXT;
2908 c = CUR;
2909 }
2910 if (RAW != '\'') {
2911 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2913 ctxt->sax->error(ctxt->userData,
2914 "String not closed \"%.50s\"\n", buf);
2915 ctxt->wellFormed = 0;
2916 ctxt->disableSAX = 1;
2917 } else {
2918 NEXT;
2919 }
2920 }
2921 return(buf);
2922#endif
2923}
2924
2925/**
2926 * xmlParseNamespace:
2927 * @ctxt: an XML parser context
2928 *
2929 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2930 *
2931 * This is what the older xml-name Working Draft specified, a bunch of
2932 * other stuff may still rely on it, so support is still here as
2933 * if it was declared on the root of the Tree:-(
2934 *
2935 * TODO: remove from library
2936 *
2937 * To be removed at next drop of binary compatibility
2938 */
2939
2940void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002941xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002942 static int deprecated = 0;
2943 if (!deprecated) {
2944 xmlGenericError(xmlGenericErrorContext,
2945 "xmlParseNamespace() deprecated function reached\n");
2946 deprecated = 1;
2947 }
2948
2949#if 0
2950 xmlChar *href = NULL;
2951 xmlChar *prefix = NULL;
2952 int garbage = 0;
2953
2954 /*
2955 * We just skipped "namespace" or "xml:namespace"
2956 */
2957 SKIP_BLANKS;
2958
2959xmlGenericError(xmlGenericErrorContext,
2960 "xmlParseNamespace: reached loop 5\n");
2961 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2962 /*
2963 * We can have "ns" or "prefix" attributes
2964 * Old encoding as 'href' or 'AS' attributes is still supported
2965 */
2966 if ((RAW == 'n') && (NXT(1) == 's')) {
2967 garbage = 0;
2968 SKIP(2);
2969 SKIP_BLANKS;
2970
2971 if (RAW != '=') continue;
2972 NEXT;
2973 SKIP_BLANKS;
2974
2975 href = xmlParseQuotedString(ctxt);
2976 SKIP_BLANKS;
2977 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2978 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2979 garbage = 0;
2980 SKIP(4);
2981 SKIP_BLANKS;
2982
2983 if (RAW != '=') continue;
2984 NEXT;
2985 SKIP_BLANKS;
2986
2987 href = xmlParseQuotedString(ctxt);
2988 SKIP_BLANKS;
2989 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2990 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2991 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2992 garbage = 0;
2993 SKIP(6);
2994 SKIP_BLANKS;
2995
2996 if (RAW != '=') continue;
2997 NEXT;
2998 SKIP_BLANKS;
2999
3000 prefix = xmlParseQuotedString(ctxt);
3001 SKIP_BLANKS;
3002 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3003 garbage = 0;
3004 SKIP(2);
3005 SKIP_BLANKS;
3006
3007 if (RAW != '=') continue;
3008 NEXT;
3009 SKIP_BLANKS;
3010
3011 prefix = xmlParseQuotedString(ctxt);
3012 SKIP_BLANKS;
3013 } else if ((RAW == '?') && (NXT(1) == '>')) {
3014 garbage = 0;
3015 NEXT;
3016 } else {
3017 /*
3018 * Found garbage when parsing the namespace
3019 */
3020 if (!garbage) {
3021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3022 ctxt->sax->error(ctxt->userData,
3023 "xmlParseNamespace found garbage\n");
3024 }
3025 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3026 ctxt->wellFormed = 0;
3027 ctxt->disableSAX = 1;
3028 NEXT;
3029 }
3030 }
3031
3032 MOVETO_ENDTAG(CUR_PTR);
3033 NEXT;
3034
3035 /*
3036 * Register the DTD.
3037 if (href != NULL)
3038 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3039 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3040 */
3041
3042 if (prefix != NULL) xmlFree(prefix);
3043 if (href != NULL) xmlFree(href);
3044#endif
3045}
3046
3047/**
3048 * xmlScanName:
3049 * @ctxt: an XML parser context
3050 *
3051 * Trickery: parse an XML name but without consuming the input flow
3052 * Needed for rollback cases. Used only when parsing entities references.
3053 *
3054 * TODO: seems deprecated now, only used in the default part of
3055 * xmlParserHandleReference
3056 *
3057 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3058 * CombiningChar | Extender
3059 *
3060 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3061 *
3062 * [6] Names ::= Name (S Name)*
3063 *
3064 * Returns the Name parsed or NULL
3065 */
3066
3067xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003068xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003069 static int deprecated = 0;
3070 if (!deprecated) {
3071 xmlGenericError(xmlGenericErrorContext,
3072 "xmlScanName() deprecated function reached\n");
3073 deprecated = 1;
3074 }
3075 return(NULL);
3076
3077#if 0
3078 xmlChar buf[XML_MAX_NAMELEN];
3079 int len = 0;
3080
3081 GROW;
3082 if (!IS_LETTER(RAW) && (RAW != '_') &&
3083 (RAW != ':')) {
3084 return(NULL);
3085 }
3086
3087
3088 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3089 (NXT(len) == '.') || (NXT(len) == '-') ||
3090 (NXT(len) == '_') || (NXT(len) == ':') ||
3091 (IS_COMBINING(NXT(len))) ||
3092 (IS_EXTENDER(NXT(len)))) {
3093 GROW;
3094 buf[len] = NXT(len);
3095 len++;
3096 if (len >= XML_MAX_NAMELEN) {
3097 xmlGenericError(xmlGenericErrorContext,
3098 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3099 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3100 (IS_DIGIT(NXT(len))) ||
3101 (NXT(len) == '.') || (NXT(len) == '-') ||
3102 (NXT(len) == '_') || (NXT(len) == ':') ||
3103 (IS_COMBINING(NXT(len))) ||
3104 (IS_EXTENDER(NXT(len))))
3105 len++;
3106 break;
3107 }
3108 }
3109 return(xmlStrndup(buf, len));
3110#endif
3111}
3112
3113/**
3114 * xmlParserHandleReference:
3115 * @ctxt: the parser context
3116 *
3117 * TODO: Remove, now deprecated ... the test is done directly in the
3118 * content parsing
3119 * routines.
3120 *
3121 * [67] Reference ::= EntityRef | CharRef
3122 *
3123 * [68] EntityRef ::= '&' Name ';'
3124 *
3125 * [ WFC: Entity Declared ]
3126 * the Name given in the entity reference must match that in an entity
3127 * declaration, except that well-formed documents need not declare any
3128 * of the following entities: amp, lt, gt, apos, quot.
3129 *
3130 * [ WFC: Parsed Entity ]
3131 * An entity reference must not contain the name of an unparsed entity
3132 *
3133 * [66] CharRef ::= '&#' [0-9]+ ';' |
3134 * '&#x' [0-9a-fA-F]+ ';'
3135 *
3136 * A PEReference may have been detectect in the current input stream
3137 * the handling is done accordingly to
3138 * http://www.w3.org/TR/REC-xml#entproc
3139 */
3140void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003141xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003142 static int deprecated = 0;
3143 if (!deprecated) {
3144 xmlGenericError(xmlGenericErrorContext,
3145 "xmlParserHandleReference() deprecated function reached\n");
3146 deprecated = 1;
3147 }
3148
3149#if 0
3150 xmlParserInputPtr input;
3151 xmlChar *name;
3152 xmlEntityPtr ent = NULL;
3153
3154 if (ctxt->token != 0) {
3155 return;
3156 }
3157 if (RAW != '&') return;
3158 GROW;
3159 if ((RAW == '&') && (NXT(1) == '#')) {
3160 switch(ctxt->instate) {
3161 case XML_PARSER_ENTITY_DECL:
3162 case XML_PARSER_PI:
3163 case XML_PARSER_CDATA_SECTION:
3164 case XML_PARSER_COMMENT:
3165 case XML_PARSER_SYSTEM_LITERAL:
3166 /* we just ignore it there */
3167 return;
3168 case XML_PARSER_START_TAG:
3169 return;
3170 case XML_PARSER_END_TAG:
3171 return;
3172 case XML_PARSER_EOF:
3173 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3176 ctxt->wellFormed = 0;
3177 ctxt->disableSAX = 1;
3178 return;
3179 case XML_PARSER_PROLOG:
3180 case XML_PARSER_START:
3181 case XML_PARSER_MISC:
3182 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3185 ctxt->wellFormed = 0;
3186 ctxt->disableSAX = 1;
3187 return;
3188 case XML_PARSER_EPILOG:
3189 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3191 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3192 ctxt->wellFormed = 0;
3193 ctxt->disableSAX = 1;
3194 return;
3195 case XML_PARSER_DTD:
3196 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3198 ctxt->sax->error(ctxt->userData,
3199 "CharRef are forbiden in DTDs!\n");
3200 ctxt->wellFormed = 0;
3201 ctxt->disableSAX = 1;
3202 return;
3203 case XML_PARSER_ENTITY_VALUE:
3204 /*
3205 * NOTE: in the case of entity values, we don't do the
3206 * substitution here since we need the literal
3207 * entity value to be able to save the internal
3208 * subset of the document.
3209 * This will be handled by xmlStringDecodeEntities
3210 */
3211 return;
3212 case XML_PARSER_CONTENT:
3213 return;
3214 case XML_PARSER_ATTRIBUTE_VALUE:
3215 /* ctxt->token = xmlParseCharRef(ctxt); */
3216 return;
3217 case XML_PARSER_IGNORE:
3218 return;
3219 }
3220 return;
3221 }
3222
3223 switch(ctxt->instate) {
3224 case XML_PARSER_CDATA_SECTION:
3225 return;
3226 case XML_PARSER_PI:
3227 case XML_PARSER_COMMENT:
3228 case XML_PARSER_SYSTEM_LITERAL:
3229 case XML_PARSER_CONTENT:
3230 return;
3231 case XML_PARSER_START_TAG:
3232 return;
3233 case XML_PARSER_END_TAG:
3234 return;
3235 case XML_PARSER_EOF:
3236 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3239 ctxt->wellFormed = 0;
3240 ctxt->disableSAX = 1;
3241 return;
3242 case XML_PARSER_PROLOG:
3243 case XML_PARSER_START:
3244 case XML_PARSER_MISC:
3245 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3248 ctxt->wellFormed = 0;
3249 ctxt->disableSAX = 1;
3250 return;
3251 case XML_PARSER_EPILOG:
3252 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 return;
3258 case XML_PARSER_ENTITY_VALUE:
3259 /*
3260 * NOTE: in the case of entity values, we don't do the
3261 * substitution here since we need the literal
3262 * entity value to be able to save the internal
3263 * subset of the document.
3264 * This will be handled by xmlStringDecodeEntities
3265 */
3266 return;
3267 case XML_PARSER_ATTRIBUTE_VALUE:
3268 /*
3269 * NOTE: in the case of attributes values, we don't do the
3270 * substitution here unless we are in a mode where
3271 * the parser is explicitely asked to substitute
3272 * entities. The SAX callback is called with values
3273 * without entity substitution.
3274 * This will then be handled by xmlStringDecodeEntities
3275 */
3276 return;
3277 case XML_PARSER_ENTITY_DECL:
3278 /*
3279 * we just ignore it there
3280 * the substitution will be done once the entity is referenced
3281 */
3282 return;
3283 case XML_PARSER_DTD:
3284 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3286 ctxt->sax->error(ctxt->userData,
3287 "Entity references are forbiden in DTDs!\n");
3288 ctxt->wellFormed = 0;
3289 ctxt->disableSAX = 1;
3290 return;
3291 case XML_PARSER_IGNORE:
3292 return;
3293 }
3294
3295/* TODO: this seems not reached anymore .... Verify ... */
3296xmlGenericError(xmlGenericErrorContext,
3297 "Reached deprecated section in xmlParserHandleReference()\n");
3298xmlGenericError(xmlGenericErrorContext,
3299 "Please forward the document to Daniel.Veillard@w3.org\n");
3300xmlGenericError(xmlGenericErrorContext,
3301 "indicating the version: %s, thanks !\n", xmlParserVersion);
3302 NEXT;
3303 name = xmlScanName(ctxt);
3304 if (name == NULL) {
3305 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3307 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3308 ctxt->wellFormed = 0;
3309 ctxt->disableSAX = 1;
3310 ctxt->token = '&';
3311 return;
3312 }
3313 if (NXT(xmlStrlen(name)) != ';') {
3314 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3316 ctxt->sax->error(ctxt->userData,
3317 "Entity reference: ';' expected\n");
3318 ctxt->wellFormed = 0;
3319 ctxt->disableSAX = 1;
3320 ctxt->token = '&';
3321 xmlFree(name);
3322 return;
3323 }
3324 SKIP(xmlStrlen(name) + 1);
3325 if (ctxt->sax != NULL) {
3326 if (ctxt->sax->getEntity != NULL)
3327 ent = ctxt->sax->getEntity(ctxt->userData, name);
3328 }
3329
3330 /*
3331 * [ WFC: Entity Declared ]
3332 * the Name given in the entity reference must match that in an entity
3333 * declaration, except that well-formed documents need not declare any
3334 * of the following entities: amp, lt, gt, apos, quot.
3335 */
3336 if (ent == NULL)
3337 ent = xmlGetPredefinedEntity(name);
3338 if (ent == NULL) {
3339 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3341 ctxt->sax->error(ctxt->userData,
3342 "Entity reference: entity %s not declared\n",
3343 name);
3344 ctxt->wellFormed = 0;
3345 ctxt->disableSAX = 1;
3346 xmlFree(name);
3347 return;
3348 }
3349
3350 /*
3351 * [ WFC: Parsed Entity ]
3352 * An entity reference must not contain the name of an unparsed entity
3353 */
3354 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3355 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357 ctxt->sax->error(ctxt->userData,
3358 "Entity reference to unparsed entity %s\n", name);
3359 ctxt->wellFormed = 0;
3360 ctxt->disableSAX = 1;
3361 }
3362
3363 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3364 ctxt->token = ent->content[0];
3365 xmlFree(name);
3366 return;
3367 }
3368 input = xmlNewEntityInputStream(ctxt, ent);
3369 xmlPushInput(ctxt, input);
3370 xmlFree(name);
3371#endif
3372 return;
3373}
3374
3375/**
3376 * xmlHandleEntity:
3377 * @ctxt: an XML parser context
3378 * @entity: an XML entity pointer.
3379 *
3380 * Default handling of defined entities, when should we define a new input
3381 * stream ? When do we just handle that as a set of chars ?
3382 *
3383 * OBSOLETE: to be removed at some point.
3384 */
3385
3386void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003387xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003388 static int deprecated = 0;
3389 if (!deprecated) {
3390 xmlGenericError(xmlGenericErrorContext,
3391 "xmlHandleEntity() deprecated function reached\n");
3392 deprecated = 1;
3393 }
3394
3395#if 0
3396 int len;
3397 xmlParserInputPtr input;
3398
3399 if (entity->content == NULL) {
3400 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3402 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3403 entity->name);
3404 ctxt->wellFormed = 0;
3405 ctxt->disableSAX = 1;
3406 return;
3407 }
3408 len = xmlStrlen(entity->content);
3409 if (len <= 2) goto handle_as_char;
3410
3411 /*
3412 * Redefine its content as an input stream.
3413 */
3414 input = xmlNewEntityInputStream(ctxt, entity);
3415 xmlPushInput(ctxt, input);
3416 return;
3417
3418handle_as_char:
3419 /*
3420 * Just handle the content as a set of chars.
3421 */
3422 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3423 (ctxt->sax->characters != NULL))
3424 ctxt->sax->characters(ctxt->userData, entity->content, len);
3425#endif
3426}
3427
3428/**
3429 * xmlNewGlobalNs:
3430 * @doc: the document carrying the namespace
3431 * @href: the URI associated
3432 * @prefix: the prefix for the namespace
3433 *
3434 * Creation of a Namespace, the old way using PI and without scoping
3435 * DEPRECATED !!!
3436 * It now create a namespace on the root element of the document if found.
3437 * Returns NULL this functionnality had been removed
3438 */
3439xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003440xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3441 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003442 static int deprecated = 0;
3443 if (!deprecated) {
3444 xmlGenericError(xmlGenericErrorContext,
3445 "xmlNewGlobalNs() deprecated function reached\n");
3446 deprecated = 1;
3447 }
3448 return(NULL);
3449#if 0
3450 xmlNodePtr root;
3451
3452 xmlNsPtr cur;
3453
3454 root = xmlDocGetRootElement(doc);
3455 if (root != NULL)
3456 return(xmlNewNs(root, href, prefix));
3457
3458 /*
3459 * if there is no root element yet, create an old Namespace type
3460 * and it will be moved to the root at save time.
3461 */
3462 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3463 if (cur == NULL) {
3464 xmlGenericError(xmlGenericErrorContext,
3465 "xmlNewGlobalNs : malloc failed\n");
3466 return(NULL);
3467 }
3468 memset(cur, 0, sizeof(xmlNs));
3469 cur->type = XML_GLOBAL_NAMESPACE;
3470
3471 if (href != NULL)
3472 cur->href = xmlStrdup(href);
3473 if (prefix != NULL)
3474 cur->prefix = xmlStrdup(prefix);
3475
3476 /*
3477 * Add it at the end to preserve parsing order ...
3478 */
3479 if (doc != NULL) {
3480 if (doc->oldNs == NULL) {
3481 doc->oldNs = cur;
3482 } else {
3483 xmlNsPtr prev = doc->oldNs;
3484
3485 while (prev->next != NULL) prev = prev->next;
3486 prev->next = cur;
3487 }
3488 }
3489
3490 return(NULL);
3491#endif
3492}
3493
3494/**
3495 * xmlUpgradeOldNs:
3496 * @doc: a document pointer
3497 *
3498 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3499 * DEPRECATED
3500 */
3501void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003502xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003503 static int deprecated = 0;
3504 if (!deprecated) {
3505 xmlGenericError(xmlGenericErrorContext,
3506 "xmlNewGlobalNs() deprecated function reached\n");
3507 deprecated = 1;
3508 }
3509#if 0
3510 xmlNsPtr cur;
3511
3512 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3513 if (doc->children == NULL) {
3514#ifdef DEBUG_TREE
3515 xmlGenericError(xmlGenericErrorContext,
3516 "xmlUpgradeOldNs: failed no root !\n");
3517#endif
3518 return;
3519 }
3520
3521 cur = doc->oldNs;
3522 while (cur->next != NULL) {
3523 cur->type = XML_LOCAL_NAMESPACE;
3524 cur = cur->next;
3525 }
3526 cur->type = XML_LOCAL_NAMESPACE;
3527 cur->next = doc->children->nsDef;
3528 doc->children->nsDef = doc->oldNs;
3529 doc->oldNs = NULL;
3530#endif
3531}
3532