blob: 0e4eafc80412048c7d253cf636bfcd325ab4aae2 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
13#include <config.h>
14#endif
15#include <stdio.h>
16#include <ctype.h>
17#include <string.h> /* for memset() only */
Seth Alvese7f12e61998-10-01 20:51:15 +000018#include <stdlib.h>
Daniel Veillard260a68f1998-08-13 03:39:55 +000019#include <sys/stat.h>
20#ifdef HAVE_FCNTL_H
21#include <fcntl.h>
22#endif
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_ZLIB_H
27#include <zlib.h>
28#endif
29
30#include "tree.h"
31#include "parser.h"
32#include "entities.h"
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000033#include "valid.h"
Daniel Veillard1e346af1999-02-22 10:33:01 +000034#include "parserInternals.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000035
36/************************************************************************
37 * *
38 * Parser stacks related functions and macros *
39 * *
40 ************************************************************************/
41/*
42 * Generic function for accessing stacks in the Parser Context
43 */
44
45#define PUSH_AND_POP(type, name) \
Daniel Veillard517752b1999-04-05 12:20:10 +000046extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +000047 if (ctxt->name##Nr >= ctxt->name##Max) { \
48 ctxt->name##Max *= 2; \
49 ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
50 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
51 if (ctxt->name##Tab == NULL) { \
52 fprintf(stderr, "realloc failed !\n"); \
53 exit(1); \
54 } \
55 } \
56 ctxt->name##Tab[ctxt->name##Nr] = value; \
57 ctxt->name = value; \
58 return(ctxt->name##Nr++); \
59} \
Daniel Veillard517752b1999-04-05 12:20:10 +000060extern type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +000061 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +000062 if (ctxt->name##Nr <= 0) return(0); \
63 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +000064 if (ctxt->name##Nr > 0) \
65 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
66 else \
67 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +000068 ret = ctxt->name##Tab[ctxt->name##Nr]; \
69 ctxt->name##Tab[ctxt->name##Nr] = 0; \
70 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +000071} \
72
73PUSH_AND_POP(xmlParserInputPtr, input)
74PUSH_AND_POP(xmlNodePtr, node)
75
Daniel Veillard0ba4d531998-11-01 19:34:31 +000076/*
77 * Macros for accessing the content. Those should be used only by the parser,
78 * and not exported.
79 *
80 * Dirty macros, i.e. one need to make assumption on the context to use them
81 *
82 * CUR_PTR return the current pointer to the CHAR to be parsed.
83 * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
84 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
85 * in UNICODE mode. This should be used internally by the parser
86 * only to compare to ASCII values otherwise it would break when
87 * running with UTF-8 encoding.
88 * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
89 * to compare on ASCII based substring.
90 * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
91 * strings within the parser.
92 *
93 * Clean macros, not dependent of an ASCII context.
94 *
95 * CURRENT Returns the current char value, with the full decoding of
96 * UTF-8 if we are using this mode. It returns an int.
97 * NEXT Skip to the next character, this does the proper decoding
98 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
99 * It returns the pointer to the current CHAR.
100 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000101
102#define CUR (*ctxt->input->cur)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000103#define SKIP(val) ctxt->input->cur += (val)
104#define NXT(val) ctxt->input->cur[(val)]
105#define CUR_PTR ctxt->input->cur
106
107#define SKIP_BLANKS \
108 while (IS_BLANK(*(ctxt->input->cur))) NEXT
109
110#ifndef USE_UTF_8
111#define CURRENT (*ctxt->input->cur)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000112#define NEXT ((*ctxt->input->cur) ? \
113 (((*(ctxt->input->cur) == '\n') ? \
114 (ctxt->input->line++, ctxt->input->col = 1) : \
115 (ctxt->input->col++)), ctxt->input->cur++) : \
116 (xmlPopInput(ctxt), ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000117#else
118#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +0000119
120
Daniel Veillard11e00581998-10-24 18:27:49 +0000121/**
122 * xmlPopInput:
123 * @ctxt: an XML parser context
124 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000125 * xmlPopInput: the current input pointed by ctxt->input came to an end
126 * pop it and return the next char.
127 *
128 * TODO A deallocation of the popped Input structure is needed
Daniel Veillard1e346af1999-02-22 10:33:01 +0000129 *
130 * Returns the current CHAR in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000131 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000132CHAR
133xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000134 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardbc50b591999-03-01 12:28:53 +0000135 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000136 return(CUR);
137}
138
Daniel Veillard11e00581998-10-24 18:27:49 +0000139/**
140 * xmlPushInput:
141 * @ctxt: an XML parser context
142 * @input: an XML parser input fragment (entity, XML fragment ...).
143 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000144 * xmlPushInput: switch to a new input stream which is stacked on top
145 * of the previous one(s).
146 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000147void
148xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000149 if (input == NULL) return;
150 inputPush(ctxt, input);
151}
152
Daniel Veillard11e00581998-10-24 18:27:49 +0000153/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000154 * xmlFreeInputStream:
155 * @input: an xmlParserInputPtr
156 *
157 * Free up an input stream.
158 */
159void
160xmlFreeInputStream(xmlParserInputPtr input) {
161 if (input == NULL) return;
162
Daniel Veillardbc50b591999-03-01 12:28:53 +0000163 if (input->filename != NULL) free((char *) input->filename);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000164 if ((input->free != NULL) && (input->base != NULL))
165 input->free((char *) input->base);
166 memset(input, -1, sizeof(xmlParserInput));
167 free(input);
168}
169
170/**
Daniel Veillard11e00581998-10-24 18:27:49 +0000171 * xmlNewEntityInputStream:
172 * @ctxt: an XML parser context
173 * @entity: an Entity pointer
174 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000175 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +0000176 * Returns the new input stream
Daniel Veillard260a68f1998-08-13 03:39:55 +0000177 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000178xmlParserInputPtr
179xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000180 xmlParserInputPtr input;
181
182 if (entity == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
184 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000185 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000186 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000187 }
188 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
190 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000191 "internal: xmlNewEntityInputStream entity->input = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000192 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000193 }
194 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
195 if (input == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
197 ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000198 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000199 }
200 input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
201 input->base = entity->content;
202 input->cur = entity->content;
203 input->line = 1;
204 input->col = 1;
Daniel Veillardd692aa41999-02-28 21:54:31 +0000205 input->free = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +0000206 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000207}
208
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000209/**
210 * xmlNewStringInputStream:
211 * @ctxt: an XML parser context
212 * @entity: an Entity pointer
213 *
214 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +0000215 * Returns the new input stream
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000216 */
217xmlParserInputPtr
218xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *string) {
219 xmlParserInputPtr input;
220
221 if (string == NULL) {
222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
223 ctxt->sax->error(ctxt,
224 "internal: xmlNewStringInputStream string = NULL\n");
225 return(NULL);
226 }
227 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
228 if (input == NULL) {
229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
230 ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
231 return(NULL);
232 }
233 input->filename = NULL;
234 input->base = string;
235 input->cur = string;
236 input->line = 1;
237 input->col = 1;
Daniel Veillardd692aa41999-02-28 21:54:31 +0000238 input->free = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000239 return(input);
240}
241
Daniel Veillard260a68f1998-08-13 03:39:55 +0000242
243/************************************************************************
244 * *
245 * Commodity functions to handle CHARs *
246 * *
247 ************************************************************************/
248
Daniel Veillard11e00581998-10-24 18:27:49 +0000249/**
250 * xmlStrndup:
251 * @cur: the input CHAR *
252 * @len: the len of @cur
253 *
254 * a strndup for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000255 *
256 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000257 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000258CHAR *
259xmlStrndup(const CHAR *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000260 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
261
262 if (ret == NULL) {
263 fprintf(stderr, "malloc of %d byte failed\n",
264 (len + 1) * sizeof(CHAR));
265 return(NULL);
266 }
267 memcpy(ret, cur, len * sizeof(CHAR));
268 ret[len] = 0;
269 return(ret);
270}
271
Daniel Veillard11e00581998-10-24 18:27:49 +0000272/**
273 * xmlStrdup:
274 * @cur: the input CHAR *
275 *
276 * a strdup for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000277 *
278 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000279 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000280CHAR *
281xmlStrdup(const CHAR *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000282 const CHAR *p = cur;
283
284 while (IS_CHAR(*p)) p++;
285 return(xmlStrndup(cur, p - cur));
286}
287
Daniel Veillard11e00581998-10-24 18:27:49 +0000288/**
289 * xmlCharStrndup:
290 * @cur: the input char *
291 * @len: the len of @cur
292 *
293 * a strndup for char's to CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000294 *
295 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000296 */
297
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000298CHAR *
299xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000300 int i;
301 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
302
303 if (ret == NULL) {
304 fprintf(stderr, "malloc of %d byte failed\n",
305 (len + 1) * sizeof(CHAR));
306 return(NULL);
307 }
308 for (i = 0;i < len;i++)
309 ret[i] = (CHAR) cur[i];
310 ret[len] = 0;
311 return(ret);
312}
313
Daniel Veillard11e00581998-10-24 18:27:49 +0000314/**
315 * xmlCharStrdup:
316 * @cur: the input char *
317 * @len: the len of @cur
318 *
319 * a strdup for char's to CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000320 *
321 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000322 */
323
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000324CHAR *
325xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000326 const char *p = cur;
327
328 while (*p != '\0') p++;
329 return(xmlCharStrndup(cur, p - cur));
330}
331
Daniel Veillard11e00581998-10-24 18:27:49 +0000332/**
333 * xmlStrcmp:
334 * @str1: the first CHAR *
335 * @str2: the second CHAR *
336 *
337 * a strcmp for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000338 *
339 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +0000340 */
341
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000342int
343xmlStrcmp(const CHAR *str1, const CHAR *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000344 register int tmp;
345
346 do {
347 tmp = *str1++ - *str2++;
348 if (tmp != 0) return(tmp);
349 } while ((*str1 != 0) && (*str2 != 0));
350 return (*str1 - *str2);
351}
352
Daniel Veillard11e00581998-10-24 18:27:49 +0000353/**
354 * xmlStrncmp:
355 * @str1: the first CHAR *
356 * @str2: the second CHAR *
357 * @len: the max comparison length
358 *
359 * a strncmp for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000360 *
361 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +0000362 */
363
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000364int
365xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000366 register int tmp;
367
368 if (len <= 0) return(0);
369 do {
370 tmp = *str1++ - *str2++;
371 if (tmp != 0) return(tmp);
372 len--;
373 if (len <= 0) return(0);
374 } while ((*str1 != 0) && (*str2 != 0));
375 return (*str1 - *str2);
376}
377
Daniel Veillard11e00581998-10-24 18:27:49 +0000378/**
379 * xmlStrchr:
380 * @str: the CHAR * array
381 * @val: the CHAR to search
382 *
383 * a strchr for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000384 *
385 * Returns the CHAR * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000386 */
387
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000388CHAR *
389xmlStrchr(const CHAR *str, CHAR val) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000390 while (*str != 0) {
391 if (*str == val) return((CHAR *) str);
392 str++;
393 }
394 return(NULL);
395}
396
Daniel Veillard11e00581998-10-24 18:27:49 +0000397/**
398 * xmlStrlen:
399 * @str: the CHAR * array
400 *
401 * lenght of a CHAR's string
Daniel Veillard1e346af1999-02-22 10:33:01 +0000402 *
403 * Returns the number of CHAR contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000404 */
405
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000406int
407xmlStrlen(const CHAR *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000408 int len = 0;
409
410 if (str == NULL) return(0);
411 while (*str != 0) {
412 str++;
413 len++;
414 }
415 return(len);
416}
417
Daniel Veillard11e00581998-10-24 18:27:49 +0000418/**
419 * xmlStrncat:
Daniel Veillard1e346af1999-02-22 10:33:01 +0000420 * @cur: the original CHAR * array
Daniel Veillard11e00581998-10-24 18:27:49 +0000421 * @add: the CHAR * array added
422 * @len: the length of @add
423 *
424 * a strncat for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000425 *
426 * Returns a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000427 */
428
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000429CHAR *
430xmlStrncat(CHAR *cur, const CHAR *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000431 int size;
432 CHAR *ret;
433
434 if ((add == NULL) || (len == 0))
435 return(cur);
436 if (cur == NULL)
437 return(xmlStrndup(add, len));
438
439 size = xmlStrlen(cur);
440 ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
441 if (ret == NULL) {
442 fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
443 (size + len + 1) * sizeof(CHAR));
444 return(cur);
445 }
446 memcpy(&ret[size], add, len * sizeof(CHAR));
447 ret[size + len] = 0;
448 return(ret);
449}
450
Daniel Veillard11e00581998-10-24 18:27:49 +0000451/**
452 * xmlStrcat:
Daniel Veillard1e346af1999-02-22 10:33:01 +0000453 * @cur: the original CHAR * array
Daniel Veillard11e00581998-10-24 18:27:49 +0000454 * @add: the CHAR * array added
455 *
456 * a strcat for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000457 *
458 * Returns a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000459 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000460CHAR *
461xmlStrcat(CHAR *cur, const CHAR *add) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000462 const CHAR *p = add;
463
464 if (add == NULL) return(cur);
465 if (cur == NULL)
466 return(xmlStrdup(add));
467
468 while (IS_CHAR(*p)) p++;
469 return(xmlStrncat(cur, add, p - add));
470}
471
472/************************************************************************
473 * *
474 * Commodity functions, cleanup needed ? *
475 * *
476 ************************************************************************/
477
Daniel Veillard11e00581998-10-24 18:27:49 +0000478/**
479 * areBlanks:
480 * @ctxt: an XML parser context
481 * @str: a CHAR *
482 * @len: the size of @str
483 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000484 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +0000485 *
486 * TODO: to be corrected accodingly to DTD information if available
Daniel Veillard1e346af1999-02-22 10:33:01 +0000487 *
488 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000489 */
490
491static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
492 int i;
493 xmlNodePtr lastChild;
494
495 for (i = 0;i < len;i++)
496 if (!(IS_BLANK(str[i]))) return(0);
497
498 if (CUR != '<') return(0);
Daniel Veillard517752b1999-04-05 12:20:10 +0000499 if (ctxt->node == NULL) return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000500 lastChild = xmlGetLastChild(ctxt->node);
501 if (lastChild == NULL) {
502 if (ctxt->node->content != NULL) return(0);
503 } else if (xmlNodeIsText(lastChild))
504 return(0);
505 return(1);
506}
507
Daniel Veillard11e00581998-10-24 18:27:49 +0000508/**
509 * xmlHandleEntity:
510 * @ctxt: an XML parser context
511 * @entity: an XML entity pointer.
512 *
513 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +0000514 * stream ? When do we just handle that as a set of chars ?
Daniel Veillard11e00581998-10-24 18:27:49 +0000515 * TODO: we should call the SAX handler here and have it resolve the issue
Daniel Veillard260a68f1998-08-13 03:39:55 +0000516 */
517
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000518void
519xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000520 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +0000521 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000522
523 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
525 ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +0000526 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000527 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000528 return;
529 }
530 len = xmlStrlen(entity->content);
531 if (len <= 2) goto handle_as_char;
532
533 /*
534 * Redefine its content as an input stream.
535 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000536 input = xmlNewEntityInputStream(ctxt, entity);
537 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000538 return;
539
540handle_as_char:
541 /*
542 * Just handle the content as a set of chars.
543 */
Daniel Veillard517752b1999-04-05 12:20:10 +0000544 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
545 ctxt->sax->characters(ctxt, entity->content, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000546
547}
548
549/*
550 * Forward definition for recusive behaviour.
551 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000552CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
553CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000554
555/************************************************************************
556 * *
557 * Extra stuff for namespace support *
558 * Relates to http://www.w3.org/TR/WD-xml-names *
559 * *
560 ************************************************************************/
561
Daniel Veillard11e00581998-10-24 18:27:49 +0000562/**
563 * xmlNamespaceParseNCName:
564 * @ctxt: an XML parser context
565 *
566 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000567 *
568 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
569 *
570 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
571 * CombiningChar | Extender
Daniel Veillard1e346af1999-02-22 10:33:01 +0000572 *
573 * Returns the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000574 */
575
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000576CHAR *
577xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000578 const CHAR *q;
579 CHAR *ret = NULL;
580
581 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
582 q = NEXT;
583
584 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
585 (CUR == '.') || (CUR == '-') ||
586 (CUR == '_') ||
587 (IS_COMBINING(CUR)) ||
588 (IS_EXTENDER(CUR)))
589 NEXT;
590
591 ret = xmlStrndup(q, CUR_PTR - q);
592
593 return(ret);
594}
595
Daniel Veillard11e00581998-10-24 18:27:49 +0000596/**
597 * xmlNamespaceParseQName:
598 * @ctxt: an XML parser context
599 * @prefix: a CHAR **
600 *
601 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +0000602 *
603 * [NS 5] QName ::= (Prefix ':')? LocalPart
604 *
605 * [NS 6] Prefix ::= NCName
606 *
607 * [NS 7] LocalPart ::= NCName
Daniel Veillard1e346af1999-02-22 10:33:01 +0000608 *
609 * Returns the function returns the local part, and prefix is updated
Daniel Veillard11e00581998-10-24 18:27:49 +0000610 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000611 */
612
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000613CHAR *
614xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000615 CHAR *ret = NULL;
616
617 *prefix = NULL;
618 ret = xmlNamespaceParseNCName(ctxt);
619 if (CUR == ':') {
620 *prefix = ret;
621 NEXT;
622 ret = xmlNamespaceParseNCName(ctxt);
623 }
624
625 return(ret);
626}
627
Daniel Veillard11e00581998-10-24 18:27:49 +0000628/**
Daniel Veillard517752b1999-04-05 12:20:10 +0000629 * xmlSplitQName:
630 * @name: an XML parser context
631 * @prefix: a CHAR **
632 *
633 * parse an XML qualified name string
634 *
635 * [NS 5] QName ::= (Prefix ':')? LocalPart
636 *
637 * [NS 6] Prefix ::= NCName
638 *
639 * [NS 7] LocalPart ::= NCName
640 *
641 * Returns the function returns the local part, and prefix is updated
642 * to get the Prefix if any.
643 */
644
645CHAR *
646xmlSplitQName(const CHAR *name, CHAR **prefix) {
647 CHAR *ret = NULL;
648 const CHAR *q;
649 const CHAR *cur = name;
650
651 *prefix = NULL;
652 if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
653 q = cur++;
654
655 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
656 (*cur == '.') || (*cur == '-') ||
657 (*cur == '_') ||
658 (IS_COMBINING(*cur)) ||
659 (IS_EXTENDER(*cur)))
660 cur++;
661
662 ret = xmlStrndup(q, cur - q);
663
664 if (*cur == ':') {
665 cur++;
666 if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
667 *prefix = ret;
668
669 q = cur++;
670
671 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
672 (*cur == '.') || (*cur == '-') ||
673 (*cur == '_') ||
674 (IS_COMBINING(*cur)) ||
675 (IS_EXTENDER(*cur)))
676 cur++;
677
678 ret = xmlStrndup(q, cur - q);
679 }
680
681 return(ret);
682}
683/**
Daniel Veillard11e00581998-10-24 18:27:49 +0000684 * xmlNamespaceParseNSDef:
685 * @ctxt: an XML parser context
686 *
687 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +0000688 *
689 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
690 *
691 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard1e346af1999-02-22 10:33:01 +0000692 *
693 * Returns the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +0000694 */
695
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000696CHAR *
697xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000698 CHAR *name = NULL;
699
700 if ((CUR == 'x') && (NXT(1) == 'm') &&
701 (NXT(2) == 'l') && (NXT(3) == 'n') &&
702 (NXT(4) == 's')) {
703 SKIP(5);
704 if (CUR == ':') {
705 NEXT;
706 name = xmlNamespaceParseNCName(ctxt);
707 }
708 }
709 return(name);
710}
711
Daniel Veillard11e00581998-10-24 18:27:49 +0000712/**
713 * xmlParseQuotedString:
714 * @ctxt: an XML parser context
715 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000716 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillard1e346af1999-02-22 10:33:01 +0000717 *
718 * Returns the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000719 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000720CHAR *
721xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000722 CHAR *ret = NULL;
723 const CHAR *q;
724
725 if (CUR == '"') {
726 NEXT;
727 q = CUR_PTR;
728 while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000729 if (CUR != '"') {
730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +0000731 ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000732 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000733 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000734 ret = xmlStrndup(q, CUR_PTR - q);
735 NEXT;
736 }
737 } else if (CUR == '\''){
738 NEXT;
739 q = CUR_PTR;
740 while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000741 if (CUR != '\'') {
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +0000743 ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000744 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000745 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000746 ret = xmlStrndup(q, CUR_PTR - q);
747 NEXT;
748 }
749 }
750 return(ret);
751}
752
Daniel Veillard11e00581998-10-24 18:27:49 +0000753/**
754 * xmlParseNamespace:
755 * @ctxt: an XML parser context
756 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000757 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
758 *
759 * This is what the older xml-name Working Draft specified, a bunch of
760 * other stuff may still rely on it, so support is still here as
761 * if ot was declared on the root of the Tree:-(
762 */
763
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000764void
765xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000766 CHAR *href = NULL;
767 CHAR *prefix = NULL;
768 int garbage = 0;
769
770 /*
771 * We just skipped "namespace" or "xml:namespace"
772 */
773 SKIP_BLANKS;
774
775 while (IS_CHAR(CUR) && (CUR != '>')) {
776 /*
777 * We can have "ns" or "prefix" attributes
778 * Old encoding as 'href' or 'AS' attributes is still supported
779 */
780 if ((CUR == 'n') && (NXT(1) == 's')) {
781 garbage = 0;
782 SKIP(2);
783 SKIP_BLANKS;
784
785 if (CUR != '=') continue;
786 NEXT;
787 SKIP_BLANKS;
788
789 href = xmlParseQuotedString(ctxt);
790 SKIP_BLANKS;
791 } else if ((CUR == 'h') && (NXT(1) == 'r') &&
792 (NXT(2) == 'e') && (NXT(3) == 'f')) {
793 garbage = 0;
794 SKIP(4);
795 SKIP_BLANKS;
796
797 if (CUR != '=') continue;
798 NEXT;
799 SKIP_BLANKS;
800
801 href = xmlParseQuotedString(ctxt);
802 SKIP_BLANKS;
803 } else if ((CUR == 'p') && (NXT(1) == 'r') &&
804 (NXT(2) == 'e') && (NXT(3) == 'f') &&
805 (NXT(4) == 'i') && (NXT(5) == 'x')) {
806 garbage = 0;
807 SKIP(6);
808 SKIP_BLANKS;
809
810 if (CUR != '=') continue;
811 NEXT;
812 SKIP_BLANKS;
813
814 prefix = xmlParseQuotedString(ctxt);
815 SKIP_BLANKS;
816 } else if ((CUR == 'A') && (NXT(1) == 'S')) {
817 garbage = 0;
818 SKIP(2);
819 SKIP_BLANKS;
820
821 if (CUR != '=') continue;
822 NEXT;
823 SKIP_BLANKS;
824
825 prefix = xmlParseQuotedString(ctxt);
826 SKIP_BLANKS;
827 } else if ((CUR == '?') && (NXT(1) == '>')) {
828 garbage = 0;
829 CUR_PTR ++;
830 } else {
831 /*
832 * Found garbage when parsing the namespace
833 */
834 if (!garbage)
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
836 ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000837 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000838 NEXT;
839 }
840 }
841
842 MOVETO_ENDTAG(CUR_PTR);
843 NEXT;
844
845 /*
846 * Register the DTD.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000847 if (href != NULL)
Daniel Veillard517752b1999-04-05 12:20:10 +0000848 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
849 ctxt->sax->globalNamespace(ctxt, href, prefix);
850 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000851
852 if (prefix != NULL) free(prefix);
853 if (href != NULL) free(href);
854}
855
856/************************************************************************
857 * *
858 * The parser itself *
859 * Relates to http://www.w3.org/TR/REC-xml *
860 * *
861 ************************************************************************/
862
Daniel Veillard11e00581998-10-24 18:27:49 +0000863/**
864 * xmlParseName:
865 * @ctxt: an XML parser context
866 *
867 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000868 *
869 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
870 * CombiningChar | Extender
871 *
872 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
873 *
874 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +0000875 *
876 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000877 */
878
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000879CHAR *
880xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000881 const CHAR *q;
882 CHAR *ret = NULL;
883
884 if (!IS_LETTER(CUR) && (CUR != '_') &&
885 (CUR != ':')) return(NULL);
886 q = NEXT;
887
888 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
889 (CUR == '.') || (CUR == '-') ||
890 (CUR == '_') || (CUR == ':') ||
891 (IS_COMBINING(CUR)) ||
892 (IS_EXTENDER(CUR)))
893 NEXT;
894
895 ret = xmlStrndup(q, CUR_PTR - q);
896
897 return(ret);
898}
899
Daniel Veillard11e00581998-10-24 18:27:49 +0000900/**
901 * xmlParseNmtoken:
902 * @ctxt: an XML parser context
903 *
904 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000905 *
906 * [7] Nmtoken ::= (NameChar)+
907 *
908 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +0000909 *
910 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000911 */
912
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000913CHAR *
914xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000915 const CHAR *q;
916 CHAR *ret = NULL;
917
918 q = NEXT;
919
920 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
921 (CUR == '.') || (CUR == '-') ||
922 (CUR == '_') || (CUR == ':') ||
923 (IS_COMBINING(CUR)) ||
924 (IS_EXTENDER(CUR)))
925 NEXT;
926
927 ret = xmlStrndup(q, CUR_PTR - q);
928
929 return(ret);
930}
931
Daniel Veillard11e00581998-10-24 18:27:49 +0000932/**
933 * xmlParseEntityValue:
934 * @ctxt: an XML parser context
935 *
936 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000937 *
938 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
939 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +0000940 *
941 * Returns the EntityValue parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000942 */
943
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000944CHAR *
945xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000946 CHAR *ret = NULL, *cur;
947 const CHAR *q;
948
949 if (CUR == '"') {
950 NEXT;
951
952 q = CUR_PTR;
953 while ((IS_CHAR(CUR)) && (CUR != '"')) {
954 if (CUR == '%') {
955 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +0000956 cur = xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000957 ret = xmlStrcat(ret, cur);
958 q = CUR_PTR;
959 } else if (CUR == '&') {
960 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +0000961 cur = xmlParseReference(ctxt);
962 if (cur != NULL) {
963 CHAR buf[2];
964 buf[0] = '&';
965 buf[1] = 0;
966 ret = xmlStrncat(ret, buf, 1);
967 ret = xmlStrcat(ret, cur);
968 buf[0] = ';';
969 buf[1] = 0;
970 ret = xmlStrncat(ret, buf, 1);
971 }
Daniel Veillard260a68f1998-08-13 03:39:55 +0000972 q = CUR_PTR;
973 } else
974 NEXT;
975 }
976 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
978 ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000979 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000980 } else {
981 ret = xmlStrncat(ret, q, CUR_PTR - q);
982 NEXT;
983 }
984 } else if (CUR == '\'') {
985 NEXT;
986 q = CUR_PTR;
987 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
988 if (CUR == '%') {
989 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +0000990 cur = xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000991 ret = xmlStrcat(ret, cur);
992 q = CUR_PTR;
993 } else if (CUR == '&') {
994 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +0000995 cur = xmlParseReference(ctxt);
996 if (cur != NULL) {
997 CHAR buf[2];
998 buf[0] = '&';
999 buf[1] = 0;
1000 ret = xmlStrncat(ret, buf, 1);
1001 ret = xmlStrcat(ret, cur);
1002 buf[0] = ';';
1003 buf[1] = 0;
1004 ret = xmlStrncat(ret, buf, 1);
1005 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001006 q = CUR_PTR;
1007 } else
1008 NEXT;
1009 }
1010 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1012 ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001013 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001014 } else {
1015 ret = xmlStrncat(ret, q, CUR_PTR - q);
1016 NEXT;
1017 }
1018 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1020 ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001021 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001022 }
1023
1024 return(ret);
1025}
1026
Daniel Veillard11e00581998-10-24 18:27:49 +00001027/**
1028 * xmlParseAttValue:
1029 * @ctxt: an XML parser context
1030 *
1031 * parse a value for an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00001032 *
1033 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1034 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00001035 *
1036 * Returns the AttValue parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001037 */
1038
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001039CHAR *
1040xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001041 CHAR *ret = NULL, *cur;
1042 const CHAR *q;
1043
1044 if (CUR == '"') {
1045 NEXT;
1046
1047 q = CUR_PTR;
1048 while ((IS_CHAR(CUR)) && (CUR != '"')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001049 if (CUR == '<') {
1050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1051 ctxt->sax->error(ctxt,
1052 "Unescaped '<' not allowed in attributes values\n");
1053 ctxt->wellFormed = 0;
1054 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001055 if (CUR == '&') {
1056 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001057 cur = xmlParseReference(ctxt);
1058 if (cur != NULL) {
1059 /*
1060 * Special case for '&amp;', we don't want to
1061 * resolve it here since it will break later
1062 * when searching entities in the string.
1063 */
1064 if ((cur[0] == '&') && (cur[1] == 0)) {
1065 CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1066 ret = xmlStrncat(ret, buf, 5);
1067 } else
1068 ret = xmlStrcat(ret, cur);
1069 free(cur);
1070 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001071 q = CUR_PTR;
1072 } else
1073 NEXT;
Daniel Veillardccb09631998-10-27 06:21:04 +00001074 /*
1075 * Pop out finished entity references.
1076 */
1077 while ((CUR == 0) && (ctxt->inputNr > 1)) {
1078 if (CUR_PTR != q)
1079 ret = xmlStrncat(ret, q, CUR_PTR - q);
1080 xmlPopInput(ctxt);
1081 q = CUR_PTR;
1082 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001083 }
1084 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1086 ctxt->sax->error(ctxt, "Unfinished AttValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001087 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001088 } else {
1089 ret = xmlStrncat(ret, q, CUR_PTR - q);
1090 NEXT;
1091 }
1092 } else if (CUR == '\'') {
1093 NEXT;
1094 q = CUR_PTR;
1095 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001096 if (CUR == '<') {
1097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1098 ctxt->sax->error(ctxt,
1099 "Unescaped '<' not allowed in attributes values\n");
1100 ctxt->wellFormed = 0;
1101 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001102 if (CUR == '&') {
1103 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001104 cur = xmlParseReference(ctxt);
1105 if (cur != NULL) {
1106 /*
1107 * Special case for '&amp;', we don't want to
1108 * resolve it here since it will break later
1109 * when searching entities in the string.
1110 */
1111 if ((cur[0] == '&') && (cur[1] == 0)) {
1112 CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1113 ret = xmlStrncat(ret, buf, 5);
1114 } else
1115 ret = xmlStrcat(ret, cur);
1116 free(cur);
1117 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001118 q = CUR_PTR;
1119 } else
1120 NEXT;
Daniel Veillardccb09631998-10-27 06:21:04 +00001121 /*
1122 * Pop out finished entity references.
1123 */
1124 while ((CUR == 0) && (ctxt->inputNr > 1)) {
1125 if (CUR_PTR != q)
1126 ret = xmlStrncat(ret, q, CUR_PTR - q);
1127 xmlPopInput(ctxt);
1128 q = CUR_PTR;
1129 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001130 }
1131 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1133 ctxt->sax->error(ctxt, "Unfinished AttValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001134 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001135 } else {
1136 ret = xmlStrncat(ret, q, CUR_PTR - q);
1137 NEXT;
1138 }
1139 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1141 ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001142 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001143 }
1144
1145 return(ret);
1146}
1147
Daniel Veillard11e00581998-10-24 18:27:49 +00001148/**
1149 * xmlParseSystemLiteral:
1150 * @ctxt: an XML parser context
1151 *
1152 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00001153 *
1154 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00001155 *
1156 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001157 */
1158
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001159CHAR *
1160xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001161 const CHAR *q;
1162 CHAR *ret = NULL;
1163
1164 if (CUR == '"') {
1165 NEXT;
1166 q = CUR_PTR;
1167 while ((IS_CHAR(CUR)) && (CUR != '"'))
1168 NEXT;
1169 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1171 ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001172 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001173 } else {
1174 ret = xmlStrndup(q, CUR_PTR - q);
1175 NEXT;
1176 }
1177 } else if (CUR == '\'') {
1178 NEXT;
1179 q = CUR_PTR;
1180 while ((IS_CHAR(CUR)) && (CUR != '\''))
1181 NEXT;
1182 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1184 ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001185 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001186 } else {
1187 ret = xmlStrndup(q, CUR_PTR - q);
1188 NEXT;
1189 }
1190 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1192 ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001193 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001194 }
1195
1196 return(ret);
1197}
1198
Daniel Veillard11e00581998-10-24 18:27:49 +00001199/**
1200 * xmlParsePubidLiteral:
1201 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00001202 *
Daniel Veillard11e00581998-10-24 18:27:49 +00001203 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00001204 *
1205 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1206 *
1207 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001208 */
1209
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001210CHAR *
1211xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001212 const CHAR *q;
1213 CHAR *ret = NULL;
1214 /*
1215 * Name ::= (Letter | '_') (NameChar)*
1216 */
1217 if (CUR == '"') {
1218 NEXT;
1219 q = CUR_PTR;
1220 while (IS_PUBIDCHAR(CUR)) NEXT;
1221 if (CUR != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1223 ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001224 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001225 } else {
1226 ret = xmlStrndup(q, CUR_PTR - q);
1227 NEXT;
1228 }
1229 } else if (CUR == '\'') {
1230 NEXT;
1231 q = CUR_PTR;
1232 while ((IS_LETTER(CUR)) && (CUR != '\''))
1233 NEXT;
1234 if (!IS_LETTER(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1236 ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001237 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001238 } else {
1239 ret = xmlStrndup(q, CUR_PTR - q);
1240 NEXT;
1241 }
1242 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1244 ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001245 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001246 }
1247
1248 return(ret);
1249}
1250
Daniel Veillard11e00581998-10-24 18:27:49 +00001251/**
1252 * xmlParseCharData:
1253 * @ctxt: an XML parser context
1254 * @cdata: int indicating whether we are within a CDATA section
1255 *
1256 * parse a CharData section.
1257 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001258 *
1259 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1260 */
1261
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001262void
1263xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001264 const CHAR *q;
1265
1266 q = CUR_PTR;
1267 while ((IS_CHAR(CUR)) && (CUR != '<') &&
1268 (CUR != '&')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001269 if ((CUR == ']') && (NXT(1) == ']') &&
1270 (NXT(2) == '>')) {
1271 if (cdata) break;
1272 else {
1273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1274 ctxt->sax->error(ctxt,
1275 "Sequence ']]>' not allowed in content\n");
1276 ctxt->wellFormed = 0;
1277 }
1278 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001279 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001280 }
1281 if (q == CUR_PTR) return;
1282
1283 /*
1284 * Ok the segment [q CUR_PTR] is to be consumed as chars.
1285 */
1286 if (ctxt->sax != NULL) {
Daniel Veillard517752b1999-04-05 12:20:10 +00001287 if (areBlanks(ctxt, q, CUR_PTR - q)) {
1288 if (ctxt->sax->ignorableWhitespace != NULL)
1289 ctxt->sax->ignorableWhitespace(ctxt, q, CUR_PTR - q);
1290 } else {
1291 if (ctxt->sax->characters != NULL)
1292 ctxt->sax->characters(ctxt, q, CUR_PTR - q);
1293 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001294 }
1295}
1296
Daniel Veillard11e00581998-10-24 18:27:49 +00001297/**
1298 * xmlParseExternalID:
1299 * @ctxt: an XML parser context
1300 * @publicID: a CHAR** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00001301 * @strict: indicate whether we should restrict parsing to only
1302 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00001303 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001304 * Parse an External ID or a Public ID
1305 *
1306 * NOTE: Productions [75] and [83] interract badly since [75] can generate
1307 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00001308 *
1309 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1310 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00001311 *
1312 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1313 *
1314 * Returns the function returns SystemLiteral and in the second
1315 * case publicID receives PubidLiteral, is strict is off
1316 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001317 */
1318
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001319CHAR *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001320xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001321 CHAR *URI = NULL;
1322
1323 if ((CUR == 'S') && (NXT(1) == 'Y') &&
1324 (NXT(2) == 'S') && (NXT(3) == 'T') &&
1325 (NXT(4) == 'E') && (NXT(5) == 'M')) {
1326 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001327 if (!IS_BLANK(CUR)) {
1328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1329 ctxt->sax->error(ctxt,
1330 "Space required after 'SYSTEM'\n");
1331 ctxt->wellFormed = 0;
1332 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001333 SKIP_BLANKS;
1334 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001335 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1337 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001338 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001339 ctxt->wellFormed = 0;
1340 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001341 } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1342 (NXT(2) == 'B') && (NXT(3) == 'L') &&
1343 (NXT(4) == 'I') && (NXT(5) == 'C')) {
1344 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001345 if (!IS_BLANK(CUR)) {
1346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1347 ctxt->sax->error(ctxt,
1348 "Space required after 'PUBLIC'\n");
1349 ctxt->wellFormed = 0;
1350 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001351 SKIP_BLANKS;
1352 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001353 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1355 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001356 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001357 ctxt->wellFormed = 0;
1358 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00001359 if (strict) {
1360 /*
1361 * We don't handle [83] so "S SystemLiteral" is required.
1362 */
1363 if (!IS_BLANK(CUR)) {
1364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1365 ctxt->sax->error(ctxt,
1366 "Space required after the Public Identifier\n");
1367 ctxt->wellFormed = 0;
1368 }
1369 } else {
1370 /*
1371 * We handle [83] so we return immediately, if
1372 * "S SystemLiteral" is not detected. From a purely parsing
1373 * point of view that's a nice mess.
1374 */
1375 const CHAR *ptr = CUR_PTR;
1376 if (!IS_BLANK(*ptr)) return(NULL);
1377
1378 while (IS_BLANK(*ptr)) ptr++;
1379 if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001380 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001381 SKIP_BLANKS;
1382 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001383 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1385 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001386 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001387 ctxt->wellFormed = 0;
1388 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001389 }
1390 return(URI);
1391}
1392
Daniel Veillard11e00581998-10-24 18:27:49 +00001393/**
1394 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00001395 * @ctxt: an XML parser context
1396 * @create: should we create a node, or just skip the content
Daniel Veillard11e00581998-10-24 18:27:49 +00001397 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001398 * Skip an XML (SGML) comment <!-- .... -->
1399 * This may or may not create a node (depending on the context)
1400 * The spec says that "For compatibility, the string "--" (double-hyphen)
1401 * must not occur within comments. "
1402 *
1403 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1404 */
Daniel Veillard517752b1999-04-05 12:20:10 +00001405void
Daniel Veillard1e346af1999-02-22 10:33:01 +00001406xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001407 const CHAR *q, *start;
1408 const CHAR *r;
1409 CHAR *val;
1410
1411 /*
1412 * Check that there is a comment right here.
1413 */
1414 if ((CUR != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00001415 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001416
1417 SKIP(4);
1418 start = q = CUR_PTR;
1419 NEXT;
1420 r = CUR_PTR;
1421 NEXT;
1422 while (IS_CHAR(CUR) &&
1423 ((CUR == ':') || (CUR != '>') ||
1424 (*r != '-') || (*q != '-'))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001425 if ((*r == '-') && (*q == '-')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1427 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001428 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001429 ctxt->wellFormed = 0;
1430 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001431 NEXT;r++;q++;
1432 }
1433 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1435 ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001436 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001437 } else {
1438 NEXT;
1439 if (create) {
1440 val = xmlStrndup(start, q - start);
Daniel Veillard517752b1999-04-05 12:20:10 +00001441 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1442 ctxt->sax->comment(ctxt, val);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001443 free(val);
1444 }
1445 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001446}
1447
Daniel Veillard11e00581998-10-24 18:27:49 +00001448/**
1449 * xmlParsePITarget:
1450 * @ctxt: an XML parser context
1451 *
1452 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00001453 *
1454 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00001455 *
1456 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001457 */
1458
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001459CHAR *
1460xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001461 CHAR *name;
1462
1463 name = xmlParseName(ctxt);
1464 if ((name != NULL) && (name[3] == 0) &&
1465 ((name[0] == 'x') || (name[0] == 'X')) &&
1466 ((name[1] == 'm') || (name[1] == 'M')) &&
1467 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1469 ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001470 return(NULL);
1471 }
1472 return(name);
1473}
1474
Daniel Veillard11e00581998-10-24 18:27:49 +00001475/**
1476 * xmlParsePI:
1477 * @ctxt: an XML parser context
1478 *
1479 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001480 *
1481 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00001482 *
1483 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001484 */
1485
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001486void
1487xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001488 CHAR *target;
1489
1490 if ((CUR == '<') && (NXT(1) == '?')) {
1491 /*
1492 * this is a Processing Instruction.
1493 */
1494 SKIP(2);
1495
1496 /*
1497 * Parse the target name and check for special support like
1498 * namespace.
1499 *
1500 * TODO : PI handling should be dynamically redefinable using an
1501 * API. Only namespace should be in the code IMHO ...
1502 */
1503 target = xmlParsePITarget(ctxt);
1504 if (target != NULL) {
Daniel Veillard517752b1999-04-05 12:20:10 +00001505 const CHAR *q = CUR_PTR;
1506
1507 while (IS_CHAR(CUR) &&
1508 ((CUR != '?') || (NXT(1) != '>')))
1509 NEXT;
1510 if (!IS_CHAR(CUR)) {
1511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1512 ctxt->sax->error(ctxt,
1513 "xmlParsePI: PI %s never end ...\n", target);
1514 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001515 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00001516 CHAR *data;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001517
Daniel Veillard517752b1999-04-05 12:20:10 +00001518 data = xmlStrndup(q, CUR_PTR - q);
1519 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001520
Daniel Veillard517752b1999-04-05 12:20:10 +00001521 /*
1522 * SAX: PI detected.
1523 */
1524 if ((ctxt->sax) &&
1525 (ctxt->sax->processingInstruction != NULL))
1526 ctxt->sax->processingInstruction(ctxt, target, data);
1527 free(data);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001528 }
1529 free(target);
1530 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1532 ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001533 ctxt->wellFormed = 0;
1534
Daniel Veillard260a68f1998-08-13 03:39:55 +00001535 /********* Should we try to complete parsing the PI ???
1536 while (IS_CHAR(CUR) &&
1537 (CUR != '?') && (CUR != '>'))
1538 NEXT;
1539 if (!IS_CHAR(CUR)) {
1540 fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1541 target);
1542 }
1543 ********************************************************/
1544 }
1545 }
1546}
1547
Daniel Veillard11e00581998-10-24 18:27:49 +00001548/**
1549 * xmlParseNotationDecl:
1550 * @ctxt: an XML parser context
1551 *
1552 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00001553 *
1554 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1555 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001556 * Hence there is actually 3 choices:
1557 * 'PUBLIC' S PubidLiteral
1558 * 'PUBLIC' S PubidLiteral S SystemLiteral
1559 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00001560 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001561 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00001562 */
1563
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001564void
1565xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001566 CHAR *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00001567 CHAR *Pubid;
1568 CHAR *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001569
1570 if ((CUR == '<') && (NXT(1) == '!') &&
1571 (NXT(2) == 'N') && (NXT(3) == 'O') &&
1572 (NXT(4) == 'T') && (NXT(5) == 'A') &&
1573 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00001574 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001575 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00001576 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard1e346af1999-02-22 10:33:01 +00001578 ctxt->sax->error(ctxt, "Space required after '<!NOTATION'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001579 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001580 return;
1581 }
1582 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00001583
1584 name = xmlParseName(ctxt);
1585 if (name == NULL) {
1586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1587 ctxt->sax->error(ctxt, "NOTATION: Name expected here\n");
1588 ctxt->wellFormed = 0;
1589 return;
1590 }
1591 if (!IS_BLANK(CUR)) {
1592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1593 ctxt->sax->error(ctxt,
1594 "Space required after the NOTATION name'\n");
1595 ctxt->wellFormed = 0;
1596 return;
1597 }
1598 SKIP_BLANKS;
1599
Daniel Veillard260a68f1998-08-13 03:39:55 +00001600 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00001601 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001602 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00001603 Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
1604 SKIP_BLANKS;
1605
1606 if (CUR == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001607 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00001608 if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1609 ctxt->sax->notationDecl(ctxt, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00001610 } else {
1611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1612 ctxt->sax->error(ctxt,
1613 "'>' required to close NOTATION declaration\n");
1614 ctxt->wellFormed = 0;
1615 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001616 free(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00001617 if (Systemid != NULL) free(Systemid);
1618 if (Pubid != NULL) free(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001619 }
1620}
1621
Daniel Veillard11e00581998-10-24 18:27:49 +00001622/**
1623 * xmlParseEntityDecl:
1624 * @ctxt: an XML parser context
1625 *
1626 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00001627 *
1628 * [70] EntityDecl ::= GEDecl | PEDecl
1629 *
1630 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1631 *
1632 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1633 *
1634 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1635 *
1636 * [74] PEDef ::= EntityValue | ExternalID
1637 *
1638 * [76] NDataDecl ::= S 'NDATA' S Name
1639 */
1640
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001641void
1642xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001643 CHAR *name = NULL;
1644 CHAR *value = NULL;
1645 CHAR *URI = NULL, *literal = NULL;
1646 CHAR *ndata = NULL;
1647 int isParameter = 0;
1648
1649 if ((CUR == '<') && (NXT(1) == '!') &&
1650 (NXT(2) == 'E') && (NXT(3) == 'N') &&
1651 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001652 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001653 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001654 if (!IS_BLANK(CUR)) {
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt, "Space required after '<!ENTITY'\n");
1657 ctxt->wellFormed = 0;
1658 }
1659 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001660
1661 if (CUR == '%') {
1662 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001663 if (!IS_BLANK(CUR)) {
1664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1665 ctxt->sax->error(ctxt, "Space required after '%'\n");
1666 ctxt->wellFormed = 0;
1667 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001668 SKIP_BLANKS;
1669 isParameter = 1;
1670 }
1671
1672 name = xmlParseName(ctxt);
1673 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001676 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001677 return;
1678 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001679 if (!IS_BLANK(CUR)) {
1680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1681 ctxt->sax->error(ctxt,
1682 "Space required after the entity name\n");
1683 ctxt->wellFormed = 0;
1684 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001685 SKIP_BLANKS;
1686
1687 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00001688 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00001689 */
1690 if (isParameter) {
1691 if ((CUR == '"') || (CUR == '\''))
1692 value = xmlParseEntityValue(ctxt);
1693 if (value) {
Daniel Veillard517752b1999-04-05 12:20:10 +00001694 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1695 ctxt->sax->entityDecl(ctxt, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001696 XML_INTERNAL_PARAMETER_ENTITY,
1697 NULL, NULL, value);
1698 }
1699 else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00001700 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001701 if (URI) {
Daniel Veillard517752b1999-04-05 12:20:10 +00001702 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1703 ctxt->sax->entityDecl(ctxt, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001704 XML_EXTERNAL_PARAMETER_ENTITY,
1705 literal, URI, NULL);
1706 }
1707 }
1708 } else {
1709 if ((CUR == '"') || (CUR == '\'')) {
1710 value = xmlParseEntityValue(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00001711 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1712 ctxt->sax->entityDecl(ctxt, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001713 XML_INTERNAL_GENERAL_ENTITY,
1714 NULL, NULL, value);
1715 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00001716 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001717 if ((CUR != '>') && (!IS_BLANK(CUR))) {
1718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1719 ctxt->sax->error(ctxt,
1720 "Space required before 'NDATA'\n");
1721 ctxt->wellFormed = 0;
1722 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001723 SKIP_BLANKS;
1724 if ((CUR == 'N') && (NXT(1) == 'D') &&
1725 (NXT(2) == 'A') && (NXT(3) == 'T') &&
1726 (NXT(4) == 'A')) {
1727 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001728 if (!IS_BLANK(CUR)) {
1729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730 ctxt->sax->error(ctxt,
1731 "Space required after 'NDATA'\n");
1732 ctxt->wellFormed = 0;
1733 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001734 SKIP_BLANKS;
1735 ndata = xmlParseName(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00001736 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1737 ctxt->sax->entityDecl(ctxt, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001738 XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1739 literal, URI, ndata);
1740 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00001741 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1742 ctxt->sax->entityDecl(ctxt, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001743 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1744 literal, URI, NULL);
1745 }
1746 }
1747 }
1748 SKIP_BLANKS;
1749 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001752 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001753 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001754 } else
1755 NEXT;
1756 if (name != NULL) free(name);
1757 if (value != NULL) free(value);
1758 if (URI != NULL) free(URI);
1759 if (literal != NULL) free(literal);
1760 if (ndata != NULL) free(ndata);
1761 }
1762}
1763
Daniel Veillard11e00581998-10-24 18:27:49 +00001764/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001765 * xmlParseDefaultDecl:
1766 * @ctxt: an XML parser context
1767 * @value: Receive a possible fixed default value for the attribute
1768 *
1769 * Parse an attribute default declaration
1770 *
1771 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
1772 *
1773 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
1774 * or XML_ATTRIBUTE_FIXED.
1775 */
1776
1777int
1778xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
1779 int val;
1780 CHAR *ret;
1781
1782 *value = NULL;
1783 if ((CUR == '#') && (NXT(1) == 'R') &&
1784 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
1785 (NXT(4) == 'U') && (NXT(5) == 'I') &&
1786 (NXT(6) == 'R') && (NXT(7) == 'E') &&
1787 (NXT(8) == 'D')) {
1788 SKIP(9);
1789 return(XML_ATTRIBUTE_REQUIRED);
1790 }
1791 if ((CUR == '#') && (NXT(1) == 'I') &&
1792 (NXT(2) == 'M') && (NXT(3) == 'P') &&
1793 (NXT(4) == 'L') && (NXT(5) == 'I') &&
1794 (NXT(6) == 'E') && (NXT(7) == 'D')) {
1795 SKIP(8);
1796 return(XML_ATTRIBUTE_IMPLIED);
1797 }
1798 val = XML_ATTRIBUTE_NONE;
1799 if ((CUR == '#') && (NXT(1) == 'F') &&
1800 (NXT(2) == 'I') && (NXT(3) == 'X') &&
1801 (NXT(4) == 'E') && (NXT(5) == 'D')) {
1802 SKIP(6);
1803 val = XML_ATTRIBUTE_FIXED;
1804 if (!IS_BLANK(CUR)) {
1805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1806 ctxt->sax->error(ctxt, "Space required after '#FIXED'\n");
1807 ctxt->wellFormed = 0;
1808 }
1809 SKIP_BLANKS;
1810 }
1811 ret = xmlParseAttValue(ctxt);
1812 if (ret == NULL) {
1813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1814 ctxt->sax->error(ctxt,
1815 "Attribute default value declaration error\n");
1816 ctxt->wellFormed = 0;
1817 } else
1818 *value = ret;
1819 return(val);
1820}
1821
1822/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00001823 * xmlParseNotationType:
1824 * @ctxt: an XML parser context
1825 *
1826 * parse an Notation attribute type.
1827 *
1828 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1829 *
1830 * Note: the leading 'NOTATION' S part has already being parsed...
1831 *
1832 * Returns: the notation attribute tree built while parsing
1833 */
1834
1835xmlEnumerationPtr
1836xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1837 CHAR *name;
1838 xmlEnumerationPtr ret = NULL, last = NULL, cur;
1839
1840 if (CUR != '(') {
1841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1842 ctxt->sax->error(ctxt, "'(' required to start 'NOTATION'\n");
1843 ctxt->wellFormed = 0;
1844 return(NULL);
1845 }
1846 do {
1847 NEXT;
1848 SKIP_BLANKS;
1849 name = xmlParseName(ctxt);
1850 if (name == NULL) {
1851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1852 ctxt->sax->error(ctxt,
1853 "Name expected in NOTATION declaration\n");
1854 ctxt->wellFormed = 0;
1855 return(ret);
1856 }
1857 cur = xmlCreateEnumeration(name);
1858 free(name);
1859 if (cur == NULL) return(ret);
1860 if (last == NULL) ret = last = cur;
1861 else {
1862 last->next = cur;
1863 last = cur;
1864 }
1865 SKIP_BLANKS;
1866 } while (CUR == '|');
1867 if (CUR != ')') {
1868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1869 ctxt->sax->error(ctxt,
1870 "')' required to finish NOTATION declaration\n");
1871 ctxt->wellFormed = 0;
1872 return(ret);
1873 }
1874 NEXT;
1875 return(ret);
1876}
1877
1878/**
1879 * xmlParseEnumerationType:
1880 * @ctxt: an XML parser context
1881 *
1882 * parse an Enumeration attribute type.
1883 *
1884 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1885 *
1886 * Returns: the enumeration attribute tree built while parsing
1887 */
1888
1889xmlEnumerationPtr
1890xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1891 CHAR *name;
1892 xmlEnumerationPtr ret = NULL, last = NULL, cur;
1893
1894 if (CUR != '(') {
1895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1896 ctxt->sax->error(ctxt,
1897 "'(' required to start ATTLIST enumeration\n");
1898 ctxt->wellFormed = 0;
1899 return(NULL);
1900 }
1901 do {
1902 NEXT;
1903 SKIP_BLANKS;
1904 name = xmlParseNmtoken(ctxt);
1905 if (name == NULL) {
1906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1907 ctxt->sax->error(ctxt,
1908 "NmToken expected in ATTLIST enumeration\n");
1909 ctxt->wellFormed = 0;
1910 return(ret);
1911 }
1912 cur = xmlCreateEnumeration(name);
1913 free(name);
1914 if (cur == NULL) return(ret);
1915 if (last == NULL) ret = last = cur;
1916 else {
1917 last->next = cur;
1918 last = cur;
1919 }
1920 SKIP_BLANKS;
1921 } while (CUR == '|');
1922 if (CUR != ')') {
1923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1924 ctxt->sax->error(ctxt,
1925 "')' required to finish ATTLIST enumeration\n");
1926 ctxt->wellFormed = 0;
1927 return(ret);
1928 }
1929 NEXT;
1930 return(ret);
1931}
1932
1933/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001934 * xmlParseEnumeratedType:
1935 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00001936 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00001937 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001938 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001939 *
1940 * [57] EnumeratedType ::= NotationType | Enumeration
1941 *
1942 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1943 *
Daniel Veillard11e00581998-10-24 18:27:49 +00001944 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001945 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00001946 */
1947
Daniel Veillard1e346af1999-02-22 10:33:01 +00001948int
1949xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1950 if ((CUR == 'N') && (NXT(1) == 'O') &&
1951 (NXT(2) == 'T') && (NXT(3) == 'A') &&
1952 (NXT(4) == 'T') && (NXT(5) == 'I') &&
1953 (NXT(6) == 'O') && (NXT(7) == 'N')) {
1954 SKIP(8);
1955 if (!IS_BLANK(CUR)) {
1956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1957 ctxt->sax->error(ctxt, "Space required after 'NOTATION'\n");
1958 ctxt->wellFormed = 0;
1959 return(0);
1960 }
1961 SKIP_BLANKS;
1962 *tree = xmlParseNotationType(ctxt);
1963 if (*tree == NULL) return(0);
1964 return(XML_ATTRIBUTE_NOTATION);
1965 }
1966 *tree = xmlParseEnumerationType(ctxt);
1967 if (*tree == NULL) return(0);
1968 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001969}
1970
Daniel Veillard11e00581998-10-24 18:27:49 +00001971/**
1972 * xmlParseAttributeType:
1973 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00001974 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00001975 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001976 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00001977 *
1978 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1979 *
1980 * [55] StringType ::= 'CDATA'
1981 *
1982 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1983 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00001984 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001985 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00001986 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001987int
Daniel Veillard1e346af1999-02-22 10:33:01 +00001988xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001989 if ((CUR == 'C') && (NXT(1) == 'D') &&
1990 (NXT(2) == 'A') && (NXT(3) == 'T') &&
1991 (NXT(4) == 'A')) {
1992 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00001993 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001994 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1995 (NXT(2) == 'R') && (NXT(3) == 'E') &&
1996 (NXT(4) == 'F')) {
1997 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001998 return(XML_ATTRIBUTE_IDREF);
Daniel Veillard1e346af1999-02-22 10:33:01 +00001999 } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2000 SKIP(2);
2001 return(XML_ATTRIBUTE_ID);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002002 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2003 (NXT(2) == 'R') && (NXT(3) == 'E') &&
2004 (NXT(4) == 'F') && (NXT(5) == 'S')) {
2005 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002006 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002007 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2008 (NXT(2) == 'T') && (NXT(3) == 'I') &&
2009 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2010 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002011 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002012 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2013 (NXT(2) == 'T') && (NXT(3) == 'I') &&
2014 (NXT(4) == 'T') && (NXT(5) == 'I') &&
2015 (NXT(6) == 'E') && (NXT(7) == 'S')) {
2016 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002017 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002018 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2019 (NXT(2) == 'T') && (NXT(3) == 'O') &&
2020 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00002021 (NXT(6) == 'N') && (NXT(7) == 'S')) {
2022 SKIP(8);
2023 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002024 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2025 (NXT(2) == 'T') && (NXT(3) == 'O') &&
2026 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00002027 (NXT(6) == 'N')) {
2028 SKIP(7);
2029 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002030 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00002031 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002032}
2033
Daniel Veillard11e00581998-10-24 18:27:49 +00002034/**
2035 * xmlParseAttributeListDecl:
2036 * @ctxt: an XML parser context
2037 *
2038 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00002039 *
2040 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2041 *
2042 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00002043 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002044 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002045void
2046xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002047 CHAR *elemName;
2048 CHAR *attrName;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002049 xmlEnumerationPtr tree = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002050
Daniel Veillard260a68f1998-08-13 03:39:55 +00002051 if ((CUR == '<') && (NXT(1) == '!') &&
2052 (NXT(2) == 'A') && (NXT(3) == 'T') &&
2053 (NXT(4) == 'T') && (NXT(5) == 'L') &&
2054 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002055 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002056 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002057 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002059 ctxt->sax->error(ctxt, "Space required after '<!ATTLIST'\n");
2060 ctxt->wellFormed = 0;
2061 }
2062 SKIP_BLANKS;
2063 elemName = xmlParseName(ctxt);
2064 if (elemName == NULL) {
2065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2066 ctxt->sax->error(ctxt, "ATTLIST: no name for Element\n");
2067 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002068 return;
2069 }
2070 SKIP_BLANKS;
2071 while (CUR != '>') {
2072 const CHAR *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002073 int type;
2074 int def;
2075 CHAR *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002076
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002077 attrName = xmlParseName(ctxt);
2078 if (attrName == NULL) {
2079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2080 ctxt->sax->error(ctxt, "ATTLIST: no name for Attribute\n");
2081 ctxt->wellFormed = 0;
2082 break;
2083 }
2084 if (!IS_BLANK(CUR)) {
2085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2086 ctxt->sax->error(ctxt,
2087 "Space required after the attribute name\n");
2088 ctxt->wellFormed = 0;
2089 break;
2090 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002091 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002092
Daniel Veillard1e346af1999-02-22 10:33:01 +00002093 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002094 if (type <= 0) break;
2095
2096 if (!IS_BLANK(CUR)) {
2097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2098 ctxt->sax->error(ctxt,
2099 "Space required after the attribute type\n");
2100 ctxt->wellFormed = 0;
2101 break;
2102 }
2103 SKIP_BLANKS;
2104
2105 def = xmlParseDefaultDecl(ctxt, &defaultValue);
2106 if (def <= 0) break;
2107
2108 if (CUR != '>') {
2109 if (!IS_BLANK(CUR)) {
2110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2111 ctxt->sax->error(ctxt,
2112 "Space required after the attribute default value\n");
2113 ctxt->wellFormed = 0;
2114 break;
2115 }
2116 SKIP_BLANKS;
2117 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002118 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2120 ctxt->sax->error(ctxt,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002121 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002122 break;
2123 }
Daniel Veillard517752b1999-04-05 12:20:10 +00002124 if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
2125 ctxt->sax->attributeDecl(ctxt, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002126 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002127 if (attrName != NULL)
2128 free(attrName);
2129 if (defaultValue != NULL)
2130 free(defaultValue);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002131 }
2132 if (CUR == '>')
2133 NEXT;
2134
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002135 free(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002136 }
2137}
2138
Daniel Veillard11e00581998-10-24 18:27:49 +00002139/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002140 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00002141 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002142 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002143 * parse the declaration for a Mixed Element content
2144 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00002145 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002146 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2147 * '(' S? '#PCDATA' S? ')'
2148 *
2149 * returns: the list of the xmlElementContentPtr describing the element choices
2150 */
2151xmlElementContentPtr
2152xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00002153 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002154 CHAR *elem = NULL;
2155
2156 if ((CUR == '#') && (NXT(1) == 'P') &&
2157 (NXT(2) == 'C') && (NXT(3) == 'D') &&
2158 (NXT(4) == 'A') && (NXT(5) == 'T') &&
2159 (NXT(6) == 'A')) {
2160 SKIP(7);
2161 SKIP_BLANKS;
Daniel Veillard3b9def11999-01-31 22:15:06 +00002162 if (CUR == ')') {
2163 NEXT;
2164 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2165 return(ret);
2166 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002167 if ((CUR == '(') || (CUR == '|')) {
2168 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2169 if (ret == NULL) return(NULL);
Daniel Veillard3b9def11999-01-31 22:15:06 +00002170 } /********** else {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2172 ctxt->sax->error(ctxt,
2173 "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2174 ctxt->wellFormed = 0;
2175 return(NULL);
Daniel Veillard3b9def11999-01-31 22:15:06 +00002176 } **********/
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002177 while (CUR == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00002178 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002179 if (elem == NULL) {
2180 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2181 if (ret == NULL) return(NULL);
2182 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00002183 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002184 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00002185 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2186 if (n == NULL) return(NULL);
2187 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2188 cur->c2 = n;
2189 cur = n;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002190 free(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002191 }
2192 SKIP_BLANKS;
2193 elem = xmlParseName(ctxt);
2194 if (elem == NULL) {
2195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2196 ctxt->sax->error(ctxt,
2197 "xmlParseElementMixedContentDecl : Name expected\n");
2198 ctxt->wellFormed = 0;
2199 xmlFreeElementContent(cur);
2200 return(NULL);
2201 }
2202 SKIP_BLANKS;
2203 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00002204 if ((CUR == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00002205 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002206 cur->c2 = xmlNewElementContent(elem,
2207 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002208 free(elem);
2209 }
Daniel Veillard1899e851999-02-01 12:18:54 +00002210 ret->ocur = XML_ELEMENT_CONTENT_MULT;
2211 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002212 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00002213 if (elem != NULL) free(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215 ctxt->sax->error(ctxt,
Daniel Veillard3b9def11999-01-31 22:15:06 +00002216 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002217 ctxt->wellFormed = 0;
2218 xmlFreeElementContent(ret);
2219 return(NULL);
2220 }
2221
2222 } else {
2223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2224 ctxt->sax->error(ctxt,
2225 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
2226 ctxt->wellFormed = 0;
2227 }
2228 return(ret);
2229}
2230
2231/**
2232 * xmlParseElementChildrenContentDecl:
2233 * @ctxt: an XML parser context
2234 *
2235 * parse the declaration for a Mixed Element content
2236 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
2237 *
2238 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002239 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2240 *
2241 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2242 *
2243 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2244 *
2245 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2246 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002247 * returns: the tree of xmlElementContentPtr describing the element
2248 * hierarchy.
2249 */
2250xmlElementContentPtr
2251xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
2252 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
2253 CHAR *elem;
2254 CHAR type = 0;
2255
2256 SKIP_BLANKS;
2257 if (CUR == '(') {
2258 /* Recurse on first child */
2259 NEXT;
2260 SKIP_BLANKS;
2261 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
2262 SKIP_BLANKS;
2263 } else {
2264 elem = xmlParseName(ctxt);
2265 if (elem == NULL) {
2266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2267 ctxt->sax->error(ctxt,
2268 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2269 ctxt->wellFormed = 0;
2270 return(NULL);
2271 }
2272 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2273 if (CUR == '?') {
2274 ret->ocur = XML_ELEMENT_CONTENT_OPT;
2275 NEXT;
2276 } else if (CUR == '*') {
2277 ret->ocur = XML_ELEMENT_CONTENT_MULT;
2278 NEXT;
2279 } else if (CUR == '+') {
2280 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2281 NEXT;
2282 } else {
2283 ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2284 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00002285 free(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002286 }
2287 SKIP_BLANKS;
2288 while (CUR != ')') {
2289 /*
2290 * Each loop we parse one separator and one element.
2291 */
2292 if (CUR == ',') {
2293 if (type == 0) type = CUR;
2294
2295 /*
2296 * Detect "Name | Name , Name" error
2297 */
2298 else if (type != CUR) {
2299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2300 ctxt->sax->error(ctxt,
2301 "xmlParseElementChildrenContentDecl : '%c' expected\n",
2302 type);
2303 ctxt->wellFormed = 0;
2304 xmlFreeElementContent(ret);
2305 return(NULL);
2306 }
Daniel Veillard1899e851999-02-01 12:18:54 +00002307 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002308
2309 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
2310 if (op == NULL) {
2311 xmlFreeElementContent(ret);
2312 return(NULL);
2313 }
2314 if (last == NULL) {
2315 op->c1 = ret;
2316 ret = cur = op;
2317 } else {
2318 cur->c2 = op;
2319 op->c1 = last;
2320 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00002321 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002322 }
2323 } else if (CUR == '|') {
2324 if (type == 0) type = CUR;
2325
2326 /*
2327 * Detect "Name , Name | Name" error
2328 */
2329 else if (type != CUR) {
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt,
2332 "xmlParseElementChildrenContentDecl : '%c' expected\n",
2333 type);
2334 ctxt->wellFormed = 0;
2335 xmlFreeElementContent(ret);
2336 return(NULL);
2337 }
Daniel Veillard1899e851999-02-01 12:18:54 +00002338 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002339
2340 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2341 if (op == NULL) {
2342 xmlFreeElementContent(ret);
2343 return(NULL);
2344 }
2345 if (last == NULL) {
2346 op->c1 = ret;
2347 ret = cur = op;
2348 } else {
2349 cur->c2 = op;
2350 op->c1 = last;
2351 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00002352 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002353 }
2354 } else {
2355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2356 ctxt->sax->error(ctxt,
2357 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
2358 ctxt->wellFormed = 0;
2359 xmlFreeElementContent(ret);
2360 return(NULL);
2361 }
2362 SKIP_BLANKS;
2363 if (CUR == '(') {
2364 /* Recurse on second child */
2365 NEXT;
2366 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00002367 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002368 SKIP_BLANKS;
2369 } else {
2370 elem = xmlParseName(ctxt);
2371 if (elem == NULL) {
2372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2373 ctxt->sax->error(ctxt,
2374 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2375 ctxt->wellFormed = 0;
2376 return(NULL);
2377 }
Daniel Veillard1899e851999-02-01 12:18:54 +00002378 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002379 free(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002380 }
2381 if (CUR == '?') {
2382 ret->ocur = XML_ELEMENT_CONTENT_OPT;
2383 NEXT;
2384 } else if (CUR == '*') {
2385 ret->ocur = XML_ELEMENT_CONTENT_MULT;
2386 NEXT;
2387 } else if (CUR == '+') {
2388 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2389 NEXT;
2390 } else {
2391 ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2392 }
2393 SKIP_BLANKS;
2394 }
Daniel Veillard1899e851999-02-01 12:18:54 +00002395 if ((cur != NULL) && (last != NULL)) {
2396 cur->c2 = last;
2397 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002398 NEXT;
2399 if (CUR == '?') {
2400 ret->ocur = XML_ELEMENT_CONTENT_OPT;
2401 NEXT;
2402 } else if (CUR == '*') {
2403 ret->ocur = XML_ELEMENT_CONTENT_MULT;
2404 NEXT;
2405 } else if (CUR == '+') {
2406 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2407 NEXT;
2408 } else {
2409 ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2410 }
2411 return(ret);
2412}
2413
2414/**
2415 * xmlParseElementContentDecl:
2416 * @ctxt: an XML parser context
2417 * @name: the name of the element being defined.
2418 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00002419 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002420 * parse the declaration for an Element content either Mixed or Children,
2421 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
2422 *
2423 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00002424 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002425 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00002426 */
2427
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002428int
2429xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
2430 xmlElementContentPtr *result) {
2431
2432 xmlElementContentPtr tree = NULL;
2433 int res;
2434
2435 *result = NULL;
2436
2437 if (CUR != '(') {
2438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2439 ctxt->sax->error(ctxt,
2440 "xmlParseElementContentDecl : '(' expected\n");
2441 ctxt->wellFormed = 0;
2442 return(-1);
2443 }
2444 NEXT;
2445 SKIP_BLANKS;
2446 if ((CUR == '#') && (NXT(1) == 'P') &&
2447 (NXT(2) == 'C') && (NXT(3) == 'D') &&
2448 (NXT(4) == 'A') && (NXT(5) == 'T') &&
2449 (NXT(6) == 'A')) {
2450 tree = xmlParseElementMixedContentDecl(ctxt);
2451 res = XML_ELEMENT_TYPE_MIXED;
2452 } else {
2453 tree = xmlParseElementChildrenContentDecl(ctxt);
2454 res = XML_ELEMENT_TYPE_ELEMENT;
2455 }
2456 SKIP_BLANKS;
2457 /****************************
2458 if (CUR != ')') {
2459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2460 ctxt->sax->error(ctxt,
2461 "xmlParseElementContentDecl : ')' expected\n");
2462 ctxt->wellFormed = 0;
2463 return(-1);
2464 }
2465 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00002466 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002467 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002468}
2469
Daniel Veillard11e00581998-10-24 18:27:49 +00002470/**
2471 * xmlParseElementDecl:
2472 * @ctxt: an XML parser context
2473 *
2474 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002475 *
2476 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2477 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002478 * TODO There is a check [ VC: Unique Element Type Declaration ]
Daniel Veillard1e346af1999-02-22 10:33:01 +00002479 *
2480 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00002481 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002482int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002483xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002484 CHAR *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002485 int ret = -1;
2486 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002487
2488 if ((CUR == '<') && (NXT(1) == '!') &&
2489 (NXT(2) == 'E') && (NXT(3) == 'L') &&
2490 (NXT(4) == 'E') && (NXT(5) == 'M') &&
2491 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002492 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002493 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002494 if (!IS_BLANK(CUR)) {
2495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2496 ctxt->sax->error(ctxt,
2497 "Space required after 'ELEMENT'\n");
2498 ctxt->wellFormed = 0;
2499 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002500 SKIP_BLANKS;
2501 name = xmlParseName(ctxt);
2502 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002504 ctxt->sax->error(ctxt,
2505 "xmlParseElementDecl: no name for Element\n");
2506 ctxt->wellFormed = 0;
2507 return(-1);
2508 }
2509 if (!IS_BLANK(CUR)) {
2510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2511 ctxt->sax->error(ctxt,
2512 "Space required after the element name\n");
2513 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002514 }
2515 SKIP_BLANKS;
2516 if ((CUR == 'E') && (NXT(1) == 'M') &&
2517 (NXT(2) == 'P') && (NXT(3) == 'T') &&
2518 (NXT(4) == 'Y')) {
2519 SKIP(5);
2520 /*
2521 * Element must always be empty.
2522 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002523 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002524 } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2525 (NXT(2) == 'Y')) {
2526 SKIP(3);
2527 /*
2528 * Element is a generic container.
2529 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002530 ret = XML_ELEMENT_TYPE_ANY;
2531 } else if (CUR == '(') {
2532 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002533 } else {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2535 ctxt->sax->error(ctxt,
2536 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
2537 ctxt->wellFormed = 0;
2538 if (name != NULL) free(name);
2539 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002540 }
2541 SKIP_BLANKS;
2542 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2544 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002545 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002546 ctxt->wellFormed = 0;
2547 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002548 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00002549 if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
2550 ctxt->sax->elementDecl(ctxt, name, ret, content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002551 }
2552 if (name != NULL) {
2553 free(name);
2554 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002555 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002556 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002557}
2558
Daniel Veillard11e00581998-10-24 18:27:49 +00002559/**
2560 * xmlParseMarkupDecl:
2561 * @ctxt: an XML parser context
2562 *
2563 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002564 *
2565 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2566 * NotationDecl | PI | Comment
2567 *
2568 * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2569 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002570void
2571xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002572 xmlParseElementDecl(ctxt);
2573 xmlParseAttributeListDecl(ctxt);
2574 xmlParseEntityDecl(ctxt);
2575 xmlParseNotationDecl(ctxt);
2576 xmlParsePI(ctxt);
2577 xmlParseComment(ctxt, 0);
2578}
2579
Daniel Veillard11e00581998-10-24 18:27:49 +00002580/**
2581 * xmlParseCharRef:
2582 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002583 *
2584 * parse Reference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002585 *
2586 * [66] CharRef ::= '&#' [0-9]+ ';' |
2587 * '&#x' [0-9a-fA-F]+ ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00002588 *
2589 * Returns the value parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00002590 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002591CHAR *
2592xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002593 int val = 0;
2594 CHAR buf[2];
2595
2596 if ((CUR == '&') && (NXT(1) == '#') &&
2597 (NXT(2) == 'x')) {
2598 SKIP(3);
2599 while (CUR != ';') {
2600 if ((CUR >= '0') && (CUR <= '9'))
2601 val = val * 16 + (CUR - '0');
2602 else if ((CUR >= 'a') && (CUR <= 'f'))
2603 val = val * 16 + (CUR - 'a') + 10;
2604 else if ((CUR >= 'A') && (CUR <= 'F'))
2605 val = val * 16 + (CUR - 'A') + 10;
2606 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00002608 ctxt->sax->error(ctxt,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002609 "xmlParseCharRef: invalid hexadecimal value\n");
2610 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002611 val = 0;
2612 break;
2613 }
Daniel Veillard845664d1998-08-13 04:43:19 +00002614 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002615 }
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002616 if (CUR == ';')
Daniel Veillard260a68f1998-08-13 03:39:55 +00002617 NEXT;
2618 } else if ((CUR == '&') && (NXT(1) == '#')) {
2619 SKIP(2);
2620 while (CUR != ';') {
2621 if ((CUR >= '0') && (CUR <= '9'))
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002622 val = val * 10 + (CUR - '0');
Daniel Veillard260a68f1998-08-13 03:39:55 +00002623 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00002625 ctxt->sax->error(ctxt,
2626 "xmlParseCharRef: invalid decimal value\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002627 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002628 val = 0;
2629 break;
2630 }
Daniel Veillard845664d1998-08-13 04:43:19 +00002631 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002632 }
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002633 if (CUR == ';')
Daniel Veillard260a68f1998-08-13 03:39:55 +00002634 NEXT;
2635 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2637 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002638 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002639 }
2640 /*
2641 * Check the value IS_CHAR ...
2642 */
2643 if (IS_CHAR(val)) {
2644 buf[0] = (CHAR) val;
2645 buf[1] = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00002646 return(xmlStrndup(buf, 1));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002647 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00002649 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n",
2650 val);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002651 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002652 }
2653 return(NULL);
2654}
2655
Daniel Veillard11e00581998-10-24 18:27:49 +00002656/**
2657 * xmlParseEntityRef:
2658 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002659 *
2660 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002661 *
2662 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00002663 *
2664 * Returns the entity ref string or NULL if directly as input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002665 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002666CHAR *
2667xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002668 CHAR *ret = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00002669 const CHAR *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002670 CHAR *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00002671 xmlEntityPtr ent = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00002672 xmlParserInputPtr input = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002673
Daniel Veillardccb09631998-10-27 06:21:04 +00002674 q = CUR_PTR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002675 if (CUR == '&') {
2676 NEXT;
2677 name = xmlParseName(ctxt);
2678 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2680 ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002681 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002682 } else {
2683 if (CUR == ';') {
2684 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002685 /*
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002686 * Well Formedness Constraint if:
2687 * - standalone
2688 * or
2689 * - no external subset and no external parameter entities
2690 * referenced
2691 * then
2692 * the entity referenced must have been declared
2693 *
Daniel Veillard517752b1999-04-05 12:20:10 +00002694 * TODO: to be double checked !!! This is wrong !
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002695 */
Daniel Veillard517752b1999-04-05 12:20:10 +00002696 if (ctxt->sax != NULL) {
2697 if (ctxt->sax->getEntity != NULL)
2698 ent = ctxt->sax->getEntity(ctxt, name);
2699
2700 if (((ctxt->sax->isStandalone != NULL) &&
2701 ctxt->sax->isStandalone(ctxt) == 1) ||
2702 (((ctxt->sax->hasInternalSubset == NULL) ||
2703 ctxt->sax->hasInternalSubset(ctxt) == 0) &&
2704 ((ctxt->sax->hasExternalSubset == NULL) ||
2705 ctxt->sax->hasExternalSubset(ctxt) == 0))) {
2706 if (ent == NULL) {
2707 if ((ctxt->sax != NULL) &&
2708 (ctxt->sax->error != NULL))
2709 ctxt->sax->error(ctxt,
2710 "Entity '%s' not defined\n", name);
2711 ctxt->wellFormed = 0;
2712 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002713 }
Daniel Veillard517752b1999-04-05 12:20:10 +00002714 } else
2715 ctxt->wellFormed = 0;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002716
2717 /*
2718 * Well Formedness Constraint :
2719 * The referenced entity must be a parsed entity.
2720 */
2721 if (ent != NULL) {
2722 switch (ent->type) {
2723 case XML_INTERNAL_PARAMETER_ENTITY:
2724 case XML_EXTERNAL_PARAMETER_ENTITY:
2725 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2726 ctxt->sax->error(ctxt,
2727 "Attempt to reference the parameter entity '%s'\n", name);
2728 ctxt->wellFormed = 0;
2729 break;
2730
2731 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2733 ctxt->sax->error(ctxt,
2734 "Attempt to reference unparsed entity '%s'\n", name);
2735 ctxt->wellFormed = 0;
2736 break;
2737 }
2738 }
2739
2740 /*
2741 * Well Formedness Constraint :
2742 * The referenced entity must not lead to recursion !
2743 */
2744
2745 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00002746 * We parsed the entity reference correctly, call SAX
2747 * interface for the proper behaviour:
2748 * - get a new input stream
2749 * - or keep the reference inline
Daniel Veillard260a68f1998-08-13 03:39:55 +00002750 */
Daniel Veillard517752b1999-04-05 12:20:10 +00002751 if ((ctxt->sax) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardccb09631998-10-27 06:21:04 +00002752 input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2753 if (input != NULL)
2754 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002755 else {
Daniel Veillardccb09631998-10-27 06:21:04 +00002756 ret = xmlStrndup(q, CUR_PTR - q);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002757 }
2758 } else {
2759 char cst[2] = { '&', 0 };
2760
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002762 ctxt->sax->error(ctxt,
2763 "xmlParseEntityRef: expecting ';'\n");
2764 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002765 ret = xmlStrndup(cst, 1);
2766 ret = xmlStrcat(ret, name);
2767 }
2768 free(name);
2769 }
2770 }
2771 return(ret);
2772}
2773
Daniel Veillard11e00581998-10-24 18:27:49 +00002774/**
2775 * xmlParseReference:
2776 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002777 *
2778 * parse Reference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002779 *
2780 * [67] Reference ::= EntityRef | CharRef
Daniel Veillard1e346af1999-02-22 10:33:01 +00002781 *
2782 * Returns the entity string or NULL if handled directly by pushing
Daniel Veillardccb09631998-10-27 06:21:04 +00002783 * the entity value as the input.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002784 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002785CHAR *
2786xmlParseReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002787 if ((CUR == '&') && (NXT(1) == '#')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002788 CHAR *val = xmlParseCharRef(ctxt);
2789 xmlParserInputPtr in;
2790
2791 if (val != NULL) {
2792 in = xmlNewStringInputStream(ctxt, val);
2793 xmlPushInput(ctxt, in);
2794 }
2795 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002796 } else if (CUR == '&') {
Daniel Veillardccb09631998-10-27 06:21:04 +00002797 return(xmlParseEntityRef(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002798 }
2799 return(NULL);
2800}
2801
Daniel Veillard11e00581998-10-24 18:27:49 +00002802/**
2803 * xmlParsePEReference:
2804 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002805 *
2806 * parse PEReference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002807 *
2808 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00002809 *
2810 * Returns the entity content or NULL if handled directly.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002811 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002812CHAR *
2813xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002814 CHAR *ret = NULL;
2815 CHAR *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00002816 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00002817 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002818
2819 if (CUR == '%') {
2820 NEXT;
2821 name = xmlParseName(ctxt);
2822 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2824 ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002825 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002826 } else {
2827 if (CUR == ';') {
2828 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00002829 if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
2830 entity = ctxt->sax->getEntity(ctxt, name);
2831 /* TODO !!!! Must check that it's of the proper type !!! */
Daniel Veillard260a68f1998-08-13 03:39:55 +00002832 if (entity == NULL) {
Daniel Veillard42dc9b31998-11-09 01:17:21 +00002833 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2834 ctxt->sax->warning(ctxt,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002835 "xmlParsePEReference: %%%s; not found\n", name);
Daniel Veillardccb09631998-10-27 06:21:04 +00002836 } else {
2837 input = xmlNewEntityInputStream(ctxt, entity);
2838 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002839 }
2840 } else {
Daniel Veillardccb09631998-10-27 06:21:04 +00002841 char cst[2] = { '%', 0 };
Daniel Veillard260a68f1998-08-13 03:39:55 +00002842
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002844 ctxt->sax->error(ctxt,
2845 "xmlParsePEReference: expecting ';'\n");
2846 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002847 ret = xmlStrndup(cst, 1);
2848 ret = xmlStrcat(ret, name);
2849 }
2850 free(name);
2851 }
2852 }
2853 return(ret);
2854}
2855
Daniel Veillard11e00581998-10-24 18:27:49 +00002856/**
2857 * xmlParseDocTypeDecl :
2858 * @ctxt: an XML parser context
2859 *
2860 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00002861 *
2862 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2863 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
2864 */
2865
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002866void
2867xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002868 CHAR *name;
2869 CHAR *ExternalID = NULL;
2870 CHAR *URI = NULL;
2871
2872 /*
2873 * We know that '<!DOCTYPE' has been detected.
2874 */
2875 SKIP(9);
2876
2877 SKIP_BLANKS;
2878
2879 /*
2880 * Parse the DOCTYPE name.
2881 */
2882 name = xmlParseName(ctxt);
2883 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2885 ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002886 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002887 }
2888
2889 SKIP_BLANKS;
2890
2891 /*
2892 * Check for SystemID and ExternalID
2893 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00002894 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002895 SKIP_BLANKS;
2896
Daniel Veillard517752b1999-04-05 12:20:10 +00002897 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
2898 ctxt->sax->internalSubset(ctxt, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002899
2900 /*
2901 * Is there any DTD definition ?
2902 */
2903 if (CUR == '[') {
2904 NEXT;
2905 /*
2906 * Parse the succession of Markup declarations and
2907 * PEReferences.
2908 * Subsequence (markupdecl | PEReference | S)*
2909 */
2910 while (CUR != ']') {
2911 const CHAR *check = CUR_PTR;
2912
2913 SKIP_BLANKS;
2914 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00002915 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002916
2917 if (CUR_PTR == check) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2919 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002920 "xmlParseDocTypeDecl: error detected in Markup declaration\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002921 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002922 break;
2923 }
2924 }
2925 if (CUR == ']') NEXT;
2926 }
2927
2928 /*
2929 * We should be at the end of the DOCTYPE declaration.
2930 */
2931 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2933 ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002934 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002935 /* We shouldn't try to resynchronize ... */
2936 }
2937 NEXT;
2938
2939 /*
2940 * Cleanup, since we don't use all those identifiers
2941 * TODO : the DOCTYPE if available should be stored !
2942 */
2943 if (URI != NULL) free(URI);
2944 if (ExternalID != NULL) free(ExternalID);
2945 if (name != NULL) free(name);
2946}
2947
Daniel Veillard11e00581998-10-24 18:27:49 +00002948/**
2949 * xmlParseAttribute:
2950 * @ctxt: an XML parser context
Daniel Veillard517752b1999-04-05 12:20:10 +00002951 * @value: a CHAR ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00002952 *
2953 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00002954 *
2955 * [41] Attribute ::= Name Eq AttValue
2956 *
2957 * [25] Eq ::= S? '=' S?
2958 *
2959 * With namespace:
2960 *
2961 * [NS 11] Attribute ::= QName Eq AttValue
2962 *
2963 * Also the case QName == xmlns:??? is handled independently as a namespace
2964 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00002965 *
Daniel Veillard517752b1999-04-05 12:20:10 +00002966 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002967 */
2968
Daniel Veillard517752b1999-04-05 12:20:10 +00002969CHAR *
2970xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002971 CHAR *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002972
Daniel Veillard517752b1999-04-05 12:20:10 +00002973 *value = NULL;
2974 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002975 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2977 ctxt->sax->error(ctxt, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002978 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00002979 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002980 }
2981
2982 /*
2983 * read the value
2984 */
2985 SKIP_BLANKS;
2986 if (CUR == '=') {
2987 NEXT;
2988 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00002989 val = xmlParseAttValue(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002990 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002992 ctxt->sax->error(ctxt,
2993 "Specification mandate value for attribute %s\n", name);
2994 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00002995 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002996 }
2997
Daniel Veillard517752b1999-04-05 12:20:10 +00002998 *value = val;
2999 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003000}
3001
Daniel Veillard11e00581998-10-24 18:27:49 +00003002/**
3003 * xmlParseStartTag:
3004 * @ctxt: an XML parser context
3005 *
3006 * parse a start of tag either for rule element or
3007 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003008 *
3009 * [40] STag ::= '<' Name (S Attribute)* S? '>'
3010 *
3011 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3012 *
3013 * With namespace:
3014 *
3015 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3016 *
3017 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
3018 */
3019
Daniel Veillard517752b1999-04-05 12:20:10 +00003020void
Daniel Veillard1e346af1999-02-22 10:33:01 +00003021xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003022 CHAR *name;
3023 CHAR *attname;
3024 CHAR *attvalue;
3025 const CHAR **atts = NULL;
3026 int nbatts = 0;
3027 int maxatts = 0;
3028 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003029
Daniel Veillard517752b1999-04-05 12:20:10 +00003030 if (CUR != '<') return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003031 NEXT;
3032
Daniel Veillard517752b1999-04-05 12:20:10 +00003033 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003034 if (name == NULL) {
3035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3036 ctxt->sax->error(ctxt,
3037 "xmlParseStartTag: invalid element name\n");
3038 ctxt->wellFormed = 0;
Daniel Veillard517752b1999-04-05 12:20:10 +00003039 return;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003040 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003041
3042 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00003043 * Now parse the attributes, it ends up with the ending
3044 *
3045 * (S Attribute)* S?
3046 */
3047 SKIP_BLANKS;
3048 while ((IS_CHAR(CUR)) &&
3049 (CUR != '>') &&
3050 ((CUR != '/') || (NXT(1) != '>'))) {
3051 const CHAR *q = CUR_PTR;
3052
Daniel Veillard517752b1999-04-05 12:20:10 +00003053 attname = xmlParseAttribute(ctxt, &attvalue);
3054 if ((attname != NULL) && (attvalue != NULL)) {
3055 /*
3056 * Well formedness requires at most one declaration of an attribute
3057 */
3058 for (i = 0; i < nbatts;i += 2) {
3059 if (!xmlStrcmp(atts[i], attname)) {
3060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3061 ctxt->sax->error(ctxt, "Attribute %s redefined\n",
3062 name);
3063 ctxt->wellFormed = 0;
3064 free(attname);
3065 free(attvalue);
3066 break;
3067 }
3068 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003069
Daniel Veillard517752b1999-04-05 12:20:10 +00003070 /*
3071 * Add the pair to atts
3072 */
3073 if (atts == NULL) {
3074 maxatts = 10;
3075 atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
3076 if (atts == NULL) {
3077 fprintf(stderr, "malloc of %d byte failed\n",
3078 maxatts * sizeof(CHAR *));
3079 return;
3080 }
3081 } else if (nbatts + 2 < maxatts) {
3082 maxatts *= 2;
3083 atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
3084 if (atts == NULL) {
3085 fprintf(stderr, "realloc of %d byte failed\n",
3086 maxatts * sizeof(CHAR *));
3087 return;
3088 }
3089 }
3090 atts[nbatts++] = attname;
3091 atts[nbatts++] = attvalue;
3092 atts[nbatts] = NULL;
3093 atts[nbatts + 1] = NULL;
3094 }
3095
3096 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003097 if (q == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3099 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003100 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003101 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003102 break;
3103 }
3104 }
3105
3106 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00003107 * SAX: Start of Element !
3108 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003109 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
3110 ctxt->sax->startElement(ctxt, name, atts);
3111
Daniel Veillardccb09631998-10-27 06:21:04 +00003112 free(name);
Daniel Veillard517752b1999-04-05 12:20:10 +00003113 if (atts != NULL) {
3114 for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
3115 free(atts);
3116 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003117}
3118
Daniel Veillard11e00581998-10-24 18:27:49 +00003119/**
3120 * xmlParseEndTag:
3121 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00003122 *
3123 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00003124 *
3125 * [42] ETag ::= '</' Name S? '>'
3126 *
3127 * With namespace
3128 *
Daniel Veillard517752b1999-04-05 12:20:10 +00003129 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00003130 */
3131
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003132void
Daniel Veillard517752b1999-04-05 12:20:10 +00003133xmlParseEndTag(xmlParserCtxtPtr ctxt) {
3134 CHAR *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003135
3136 if ((CUR != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3138 ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003139 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003140 return;
3141 }
3142 SKIP(2);
3143
Daniel Veillard517752b1999-04-05 12:20:10 +00003144 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003145
3146 /*
3147 * We should definitely be at the ending "S? '>'" part
3148 */
3149 SKIP_BLANKS;
3150 if ((!IS_CHAR(CUR)) || (CUR != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3152 ctxt->sax->error(ctxt, "End tag : expected '>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003153 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003154 } else
3155 NEXT;
3156
Daniel Veillard517752b1999-04-05 12:20:10 +00003157 /*
3158 * SAX: End of Tag
3159 */
3160 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3161 ctxt->sax->endElement(ctxt, name);
3162
3163 if (name != NULL)
3164 free(name);
3165
Daniel Veillard260a68f1998-08-13 03:39:55 +00003166 return;
3167}
3168
Daniel Veillard11e00581998-10-24 18:27:49 +00003169/**
3170 * xmlParseCDSect:
3171 * @ctxt: an XML parser context
3172 *
3173 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003174 *
3175 * [18] CDSect ::= CDStart CData CDEnd
3176 *
3177 * [19] CDStart ::= '<![CDATA['
3178 *
3179 * [20] Data ::= (Char* - (Char* ']]>' Char*))
3180 *
3181 * [21] CDEnd ::= ']]>'
3182 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003183void
3184xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003185 const CHAR *r, *s, *base;
3186
3187 if ((CUR == '<') && (NXT(1) == '!') &&
3188 (NXT(2) == '[') && (NXT(3) == 'C') &&
3189 (NXT(4) == 'D') && (NXT(5) == 'A') &&
3190 (NXT(6) == 'T') && (NXT(7) == 'A') &&
3191 (NXT(8) == '[')) {
3192 SKIP(9);
3193 } else
3194 return;
3195 base = CUR_PTR;
3196 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3198 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003199 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003200 return;
3201 }
3202 r = NEXT;
3203 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3205 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003206 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003207 return;
3208 }
3209 s = NEXT;
3210 while (IS_CHAR(CUR) &&
3211 ((*r != ']') || (*s != ']') || (CUR != '>'))) {
3212 r++;s++;NEXT;
3213 }
3214 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3216 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003217 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003218 return;
3219 }
3220
3221 /*
3222 * Ok the segment [base CUR_PTR] is to be consumed as chars.
3223 */
3224 if (ctxt->sax != NULL) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003225 if (areBlanks(ctxt, base, CUR_PTR - base)) {
3226 if (ctxt->sax->ignorableWhitespace != NULL)
3227 ctxt->sax->ignorableWhitespace(ctxt, base,
3228 (CUR_PTR - base) - 2);
3229 } else {
3230 if (ctxt->sax->characters != NULL)
3231 ctxt->sax->characters(ctxt, base, (CUR_PTR - base) - 2);
3232 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003233 }
3234}
3235
Daniel Veillard11e00581998-10-24 18:27:49 +00003236/**
3237 * xmlParseContent:
3238 * @ctxt: an XML parser context
3239 *
3240 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00003241 *
3242 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
3243 */
3244
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003245void
3246xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003247 while ((CUR != '<') || (NXT(1) != '/')) {
3248 const CHAR *test = CUR_PTR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003249
3250 /*
3251 * First case : a Processing Instruction.
3252 */
3253 if ((CUR == '<') && (NXT(1) == '?')) {
3254 xmlParsePI(ctxt);
3255 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003256
Daniel Veillard260a68f1998-08-13 03:39:55 +00003257 /*
3258 * Second case : a CDSection
3259 */
3260 else if ((CUR == '<') && (NXT(1) == '!') &&
3261 (NXT(2) == '[') && (NXT(3) == 'C') &&
3262 (NXT(4) == 'D') && (NXT(5) == 'A') &&
3263 (NXT(6) == 'T') && (NXT(7) == 'A') &&
3264 (NXT(8) == '[')) {
3265 xmlParseCDSect(ctxt);
3266 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003267
Daniel Veillard260a68f1998-08-13 03:39:55 +00003268 /*
3269 * Third case : a comment
3270 */
3271 else if ((CUR == '<') && (NXT(1) == '!') &&
3272 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003273 xmlParseComment(ctxt, 1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003274 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003275
Daniel Veillard260a68f1998-08-13 03:39:55 +00003276 /*
3277 * Fourth case : a sub-element.
3278 */
3279 else if (CUR == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00003280 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003281 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003282
Daniel Veillard260a68f1998-08-13 03:39:55 +00003283 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00003284 * Fifth case : a reference. If if has not been resolved,
3285 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00003286 */
3287 else if (CUR == '&') {
Daniel Veillardccb09631998-10-27 06:21:04 +00003288 CHAR *val = xmlParseReference(ctxt);
3289 if (val != NULL) {
3290 if (val[0] != '&') {
3291 /*
3292 * inline predefined entity.
3293 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003294 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3295 ctxt->sax->characters(ctxt, val, xmlStrlen(val));
Daniel Veillardccb09631998-10-27 06:21:04 +00003296 } else {
3297 /*
3298 * user defined entity, create a node.
3299 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003300 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL))
3301 ctxt->sax->reference(ctxt, val);
Daniel Veillardccb09631998-10-27 06:21:04 +00003302 }
3303 free(val);
3304 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003305 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003306
Daniel Veillard260a68f1998-08-13 03:39:55 +00003307 /*
3308 * Last case, text. Note that References are handled directly.
3309 */
3310 else {
3311 xmlParseCharData(ctxt, 0);
3312 }
3313
3314 /*
3315 * Pop-up of finished entities.
3316 */
Daniel Veillardbc50b591999-03-01 12:28:53 +00003317 while ((CUR == 0) && (ctxt->inputNr > 1))
3318 xmlPopInput(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003319
3320 if (test == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003322 ctxt->sax->error(ctxt,
3323 "detected an error in element content\n");
3324 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003325 break;
3326 }
3327 }
3328}
3329
Daniel Veillard11e00581998-10-24 18:27:49 +00003330/**
3331 * xmlParseElement:
3332 * @ctxt: an XML parser context
3333 *
3334 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00003335 *
3336 * [39] element ::= EmptyElemTag | STag content ETag
3337 *
3338 * [41] Attribute ::= Name Eq AttValue
3339 */
3340
Daniel Veillard517752b1999-04-05 12:20:10 +00003341void
Daniel Veillard1e346af1999-02-22 10:33:01 +00003342xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003343 const CHAR *openTag = CUR_PTR;
3344 xmlParserNodeInfo node_info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003345
3346 /* Capture start position */
3347 node_info.begin_pos = CUR_PTR - ctxt->input->base;
3348 node_info.begin_line = ctxt->input->line;
3349
Daniel Veillard517752b1999-04-05 12:20:10 +00003350 xmlParseStartTag(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003351
3352 /*
3353 * Check for an Empty Element.
3354 */
3355 if ((CUR == '/') && (NXT(1) == '>')) {
3356 SKIP(2);
Daniel Veillard517752b1999-04-05 12:20:10 +00003357 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3358 ctxt->sax->endElement(ctxt, NULL);
3359 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003360 }
3361 if (CUR == '>') NEXT;
3362 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard242590e1998-11-13 18:04:35 +00003364 ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",
3365 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003366 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003367
3368 /*
3369 * end of parsing of this node.
Daniel Veillard517752b1999-04-05 12:20:10 +00003370 * TODO !!!!!!!! check the macro in case of non DOM parsing
Daniel Veillard260a68f1998-08-13 03:39:55 +00003371 */
3372 nodePop(ctxt);
3373
Daniel Veillard517752b1999-04-05 12:20:10 +00003374 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003375 }
3376
3377 /*
3378 * Parse the content of the element:
3379 */
3380 xmlParseContent(ctxt);
3381 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard242590e1998-11-13 18:04:35 +00003383 ctxt->sax->error(ctxt,
3384 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003385 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003386
3387 /*
3388 * end of parsing of this node.
Daniel Veillard517752b1999-04-05 12:20:10 +00003389 * TODO !!!!!!!! check the macro in case of non DOM parsing
Daniel Veillard260a68f1998-08-13 03:39:55 +00003390 */
3391 nodePop(ctxt);
3392
Daniel Veillard517752b1999-04-05 12:20:10 +00003393 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003394 }
3395
3396 /*
3397 * parse the end of tag: '</' should be here.
3398 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003399 xmlParseEndTag(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003400}
3401
Daniel Veillard11e00581998-10-24 18:27:49 +00003402/**
3403 * xmlParseVersionNum:
3404 * @ctxt: an XML parser context
3405 *
3406 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003407 *
3408 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00003409 *
3410 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003411 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003412CHAR *
3413xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003414 const CHAR *q = CUR_PTR;
3415 CHAR *ret;
3416
3417 while (IS_CHAR(CUR) &&
3418 (((CUR >= 'a') && (CUR <= 'z')) ||
3419 ((CUR >= 'A') && (CUR <= 'Z')) ||
3420 ((CUR >= '0') && (CUR <= '9')) ||
3421 (CUR == '_') || (CUR == '.') ||
3422 (CUR == ':') || (CUR == '-'))) NEXT;
3423 ret = xmlStrndup(q, CUR_PTR - q);
3424 return(ret);
3425}
3426
Daniel Veillard11e00581998-10-24 18:27:49 +00003427/**
3428 * xmlParseVersionInfo:
3429 * @ctxt: an XML parser context
3430 *
3431 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003432 *
3433 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3434 *
3435 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00003436 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003437 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00003438 */
3439
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003440CHAR *
3441xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003442 CHAR *version = NULL;
3443 const CHAR *q;
3444
3445 if ((CUR == 'v') && (NXT(1) == 'e') &&
3446 (NXT(2) == 'r') && (NXT(3) == 's') &&
3447 (NXT(4) == 'i') && (NXT(5) == 'o') &&
3448 (NXT(6) == 'n')) {
3449 SKIP(7);
3450 SKIP_BLANKS;
3451 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3453 ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003454 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003455 return(NULL);
3456 }
3457 NEXT;
3458 SKIP_BLANKS;
3459 if (CUR == '"') {
3460 NEXT;
3461 q = CUR_PTR;
3462 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003463 if (CUR != '"') {
3464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3465 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003466 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003467 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003468 NEXT;
3469 } else if (CUR == '\''){
3470 NEXT;
3471 q = CUR_PTR;
3472 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003473 if (CUR != '\'') {
3474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3475 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003476 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003477 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003478 NEXT;
3479 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003481 ctxt->sax->error(ctxt,
3482 "xmlParseVersionInfo : expected ' or \"\n");
3483 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003484 }
3485 }
3486 return(version);
3487}
3488
Daniel Veillard11e00581998-10-24 18:27:49 +00003489/**
3490 * xmlParseEncName:
3491 * @ctxt: an XML parser context
3492 *
3493 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003494 *
3495 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00003496 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003497 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003498 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003499CHAR *
3500xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003501 const CHAR *q = CUR_PTR;
3502 CHAR *ret = NULL;
3503
3504 if (((CUR >= 'a') && (CUR <= 'z')) ||
3505 ((CUR >= 'A') && (CUR <= 'Z'))) {
3506 NEXT;
3507 while (IS_CHAR(CUR) &&
3508 (((CUR >= 'a') && (CUR <= 'z')) ||
3509 ((CUR >= 'A') && (CUR <= 'Z')) ||
3510 ((CUR >= '0') && (CUR <= '9')) ||
3511 (CUR == '-'))) NEXT;
3512 ret = xmlStrndup(q, CUR_PTR - q);
3513 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3515 ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003516 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003517 }
3518 return(ret);
3519}
3520
Daniel Veillard11e00581998-10-24 18:27:49 +00003521/**
3522 * xmlParseEncodingDecl:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003526 *
3527 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00003528 *
3529 * TODO: this should setup the conversion filters.
3530 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003531 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003532 */
3533
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003534CHAR *
3535xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003536 CHAR *encoding = NULL;
3537 const CHAR *q;
3538
3539 SKIP_BLANKS;
3540 if ((CUR == 'e') && (NXT(1) == 'n') &&
3541 (NXT(2) == 'c') && (NXT(3) == 'o') &&
3542 (NXT(4) == 'd') && (NXT(5) == 'i') &&
3543 (NXT(6) == 'n') && (NXT(7) == 'g')) {
3544 SKIP(8);
3545 SKIP_BLANKS;
3546 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3548 ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003549 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003550 return(NULL);
3551 }
3552 NEXT;
3553 SKIP_BLANKS;
3554 if (CUR == '"') {
3555 NEXT;
3556 q = CUR_PTR;
3557 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003558 if (CUR != '"') {
3559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3560 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003561 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003562 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003563 NEXT;
3564 } else if (CUR == '\''){
3565 NEXT;
3566 q = CUR_PTR;
3567 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003568 if (CUR != '\'') {
3569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3570 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003571 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003572 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003573 NEXT;
3574 } else if (CUR == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003576 ctxt->sax->error(ctxt,
3577 "xmlParseEncodingDecl : expected ' or \"\n");
3578 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003579 }
3580 }
3581 return(encoding);
3582}
3583
Daniel Veillard11e00581998-10-24 18:27:49 +00003584/**
3585 * xmlParseSDDecl:
3586 * @ctxt: an XML parser context
3587 *
3588 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003589 *
3590 * [32] SDDecl ::= S 'standalone' Eq
3591 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00003592 *
3593 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00003594 */
3595
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003596int
3597xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003598 int standalone = -1;
3599
3600 SKIP_BLANKS;
3601 if ((CUR == 's') && (NXT(1) == 't') &&
3602 (NXT(2) == 'a') && (NXT(3) == 'n') &&
3603 (NXT(4) == 'd') && (NXT(5) == 'a') &&
3604 (NXT(6) == 'l') && (NXT(7) == 'o') &&
3605 (NXT(8) == 'n') && (NXT(9) == 'e')) {
3606 SKIP(10);
3607 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003609 ctxt->sax->error(ctxt,
3610 "XML standalone declaration : expected '='\n");
3611 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003612 return(standalone);
3613 }
3614 NEXT;
3615 SKIP_BLANKS;
3616 if (CUR == '\''){
3617 NEXT;
3618 if ((CUR == 'n') && (NXT(1) == 'o')) {
3619 standalone = 0;
3620 SKIP(2);
3621 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3622 (NXT(2) == 's')) {
3623 standalone = 1;
3624 SKIP(3);
3625 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003628 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003629 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003630 if (CUR != '\'') {
3631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3632 ctxt->sax->error(ctxt, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003633 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003634 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003635 NEXT;
3636 } else if (CUR == '"'){
3637 NEXT;
3638 if ((CUR == 'n') && (NXT(1) == 'o')) {
3639 standalone = 0;
3640 SKIP(2);
3641 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3642 (NXT(2) == 's')) {
3643 standalone = 1;
3644 SKIP(3);
3645 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003647 ctxt->sax->error(ctxt,
3648 "standalone accepts only 'yes' or 'no'\n");
3649 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003650 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003651 if (CUR != '"') {
3652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3653 ctxt->sax->error(ctxt, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003654 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003655 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003656 NEXT;
3657 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3659 ctxt->sax->error(ctxt, "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003660 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003661 }
3662 }
3663 return(standalone);
3664}
3665
Daniel Veillard11e00581998-10-24 18:27:49 +00003666/**
3667 * xmlParseXMLDecl:
3668 * @ctxt: an XML parser context
3669 *
3670 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00003671 *
3672 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
3673 */
3674
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003675void
3676xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003677 CHAR *version;
3678
3679 /*
3680 * We know that '<?xml' is here.
3681 */
3682 SKIP(5);
3683
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003684 if (!IS_BLANK(CUR)) {
3685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3686 ctxt->sax->error(ctxt, "Blank needed after '<?xml'\n");
3687 ctxt->wellFormed = 0;
3688 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003689 SKIP_BLANKS;
3690
3691 /*
3692 * We should have the VersionInfo here.
3693 */
3694 version = xmlParseVersionInfo(ctxt);
3695 if (version == NULL)
3696 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00003697 ctxt->version = xmlStrdup(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003698 free(version);
3699
3700 /*
3701 * We may have the encoding declaration
3702 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003703 if (!IS_BLANK(CUR)) {
3704 if ((CUR == '?') && (NXT(1) == '>')) {
3705 SKIP(2);
3706 return;
3707 }
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt, "Blank needed here\n");
3710 ctxt->wellFormed = 0;
3711 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003712 ctxt->encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003713
3714 /*
3715 * We may have the standalone status.
3716 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003717 if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003718 if ((CUR == '?') && (NXT(1) == '>')) {
3719 SKIP(2);
3720 return;
3721 }
3722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3723 ctxt->sax->error(ctxt, "Blank needed here\n");
3724 ctxt->wellFormed = 0;
3725 }
3726 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00003727 ctxt->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003728
3729 SKIP_BLANKS;
3730 if ((CUR == '?') && (NXT(1) == '>')) {
3731 SKIP(2);
3732 } else if (CUR == '>') {
3733 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003736 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003737 NEXT;
3738 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3740 ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003741 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003742 MOVETO_ENDTAG(CUR_PTR);
3743 NEXT;
3744 }
3745}
3746
Daniel Veillard11e00581998-10-24 18:27:49 +00003747/**
3748 * xmlParseMisc:
3749 * @ctxt: an XML parser context
3750 *
3751 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003752 *
3753 * [27] Misc ::= Comment | PI | S
3754 */
3755
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003756void
3757xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003758 while (((CUR == '<') && (NXT(1) == '?')) ||
3759 ((CUR == '<') && (NXT(1) == '!') &&
3760 (NXT(2) == '-') && (NXT(3) == '-')) ||
3761 IS_BLANK(CUR)) {
3762 if ((CUR == '<') && (NXT(1) == '?')) {
3763 xmlParsePI(ctxt);
3764 } else if (IS_BLANK(CUR)) {
3765 NEXT;
3766 } else
3767 xmlParseComment(ctxt, 0);
3768 }
3769}
3770
Daniel Veillard11e00581998-10-24 18:27:49 +00003771/**
3772 * xmlParseDocument :
3773 * @ctxt: an XML parser context
3774 *
3775 * parse an XML document (and build a tree if using the standard SAX
3776 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00003777 *
3778 * [1] document ::= prolog element Misc*
3779 *
3780 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00003781 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003782 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00003783 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003784 */
3785
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003786int
3787xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003788 xmlDefaultSAXHandlerInit();
3789
3790 /*
3791 * SAX: beginning of the document processing.
3792 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003793 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard260a68f1998-08-13 03:39:55 +00003794 ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003795
3796 /*
3797 * We should check for encoding here and plug-in some
3798 * conversion code TODO !!!!
3799 */
3800
3801 /*
3802 * Wipe out everything which is before the first '<'
3803 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003804 if (IS_BLANK(CUR)) {
3805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3806 ctxt->sax->error(ctxt,
3807 "Extra spaces at the beginning of the document are not allowed\n");
3808 ctxt->wellFormed = 0;
3809 SKIP_BLANKS;
3810 }
3811
3812 if (CUR == 0) {
3813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3814 ctxt->sax->error(ctxt, "Document is empty\n");
3815 ctxt->wellFormed = 0;
3816 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003817
3818 /*
3819 * Check for the XMLDecl in the Prolog.
3820 */
3821 if ((CUR == '<') && (NXT(1) == '?') &&
3822 (NXT(2) == 'x') && (NXT(3) == 'm') &&
3823 (NXT(4) == 'l')) {
3824 xmlParseXMLDecl(ctxt);
3825 /* SKIP_EOL(cur); */
3826 SKIP_BLANKS;
3827 } else if ((CUR == '<') && (NXT(1) == '?') &&
3828 (NXT(2) == 'X') && (NXT(3) == 'M') &&
3829 (NXT(4) == 'L')) {
3830 /*
3831 * The first drafts were using <?XML and the final W3C REC
3832 * now use <?xml ...
3833 */
3834 xmlParseXMLDecl(ctxt);
3835 /* SKIP_EOL(cur); */
3836 SKIP_BLANKS;
3837 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00003838 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003839 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003840 if ((ctxt->sax) && (ctxt->sax->startDocument))
3841 ctxt->sax->startDocument(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003842
3843 /*
3844 * The Misc part of the Prolog
3845 */
3846 xmlParseMisc(ctxt);
3847
3848 /*
3849 * Then possibly doc type declaration(s) and more Misc
3850 * (doctypedecl Misc*)?
3851 */
3852 if ((CUR == '<') && (NXT(1) == '!') &&
3853 (NXT(2) == 'D') && (NXT(3) == 'O') &&
3854 (NXT(4) == 'C') && (NXT(5) == 'T') &&
3855 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
3856 (NXT(8) == 'E')) {
3857 xmlParseDocTypeDecl(ctxt);
3858 xmlParseMisc(ctxt);
3859 }
3860
3861 /*
3862 * Time to start parsing the tree itself
3863 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003864 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003865
3866 /*
3867 * The Misc part at the end
3868 */
3869 xmlParseMisc(ctxt);
3870
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003871 if (CUR != 0) {
3872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3873 ctxt->sax->error(ctxt,
3874 "Extra content at the end of the document\n");
3875 ctxt->wellFormed = 0;
3876 }
3877
Daniel Veillard260a68f1998-08-13 03:39:55 +00003878 /*
3879 * SAX: end of the document processing.
3880 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003881 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Daniel Veillard260a68f1998-08-13 03:39:55 +00003882 ctxt->sax->endDocument(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003883 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003884 return(0);
3885}
3886
Daniel Veillard11e00581998-10-24 18:27:49 +00003887/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00003888 * xmlCreateFileParserCtxt :
3889 * @cur: a pointer to an array of CHAR
3890 *
3891 * Create a parser context for an XML in-memory document.
3892 *
3893 * Returns the new parser context or NULL
3894 */
3895xmlParserCtxtPtr
3896xmlCreateDocParserCtxt(CHAR *cur) {
3897 xmlParserCtxtPtr ctxt;
3898 xmlParserInputPtr input;
3899
3900 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3901 if (ctxt == NULL) {
3902 perror("malloc");
3903 return(NULL);
3904 }
3905 xmlInitParserCtxt(ctxt);
3906 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3907 if (input == NULL) {
3908 perror("malloc");
3909 free(ctxt);
3910 return(NULL);
3911 }
3912
3913 input->filename = NULL;
3914 input->line = 1;
3915 input->col = 1;
3916 input->base = cur;
3917 input->cur = cur;
3918 input->free = NULL;
3919
3920 inputPush(ctxt, input);
3921 return(ctxt);
3922}
3923
3924/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003925 * xmlSAXParseDoc :
3926 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00003927 * @cur: a pointer to an array of CHAR
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003928 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
3929 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00003930 *
3931 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003932 * It use the given SAX function block to handle the parsing callback.
3933 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00003934 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003935 * Returns the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00003936 */
3937
Daniel Veillard1e346af1999-02-22 10:33:01 +00003938xmlDocPtr
3939xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003940 xmlDocPtr ret;
3941 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003942
3943 if (cur == NULL) return(NULL);
3944
Daniel Veillardd692aa41999-02-28 21:54:31 +00003945
3946 ctxt = xmlCreateDocParserCtxt(cur);
3947 if (ctxt == NULL) return(NULL);
Daniel Veillard242590e1998-11-13 18:04:35 +00003948 if (sax != NULL) ctxt->sax = sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003949
3950 xmlParseDocument(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00003951 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003952 else {
3953 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00003954 xmlFreeDoc(ctxt->myDoc);
3955 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003956 }
Daniel Veillardd692aa41999-02-28 21:54:31 +00003957 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003958
3959 return(ret);
3960}
3961
Daniel Veillard11e00581998-10-24 18:27:49 +00003962/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003963 * xmlParseDoc :
3964 * @cur: a pointer to an array of CHAR
3965 *
3966 * parse an XML in-memory document and build a tree.
3967 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003968 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003969 */
3970
Daniel Veillard1e346af1999-02-22 10:33:01 +00003971xmlDocPtr
3972xmlParseDoc(CHAR *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003973 return(xmlSAXParseDoc(NULL, cur, 0));
3974}
3975
3976/**
3977 * xmlRecoverDoc :
3978 * @cur: a pointer to an array of CHAR
3979 *
3980 * parse an XML in-memory document and build a tree.
3981 * In the case the document is not Well Formed, a tree is built anyway
3982 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003983 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003984 */
3985
Daniel Veillard1e346af1999-02-22 10:33:01 +00003986xmlDocPtr
3987xmlRecoverDoc(CHAR *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003988 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003989}
3990
3991/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00003992 * xmlCreateFileParserCtxt :
Daniel Veillard11e00581998-10-24 18:27:49 +00003993 * @filename: the filename
3994 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00003995 * Create a parser context for a file content.
3996 * Automatic support for ZLIB/Compress compressed document is provided
3997 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +00003998 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00003999 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004000 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00004001xmlParserCtxtPtr
4002xmlCreateFileParserCtxt(const char *filename)
4003{
4004 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004005#ifdef HAVE_ZLIB_H
4006 gzFile input;
4007#else
4008 int input;
4009#endif
4010 int res;
Daniel Veillard27271681998-10-30 06:39:40 +00004011 int len;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004012 struct stat buf;
4013 char *buffer;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004014 xmlParserInputPtr inputStream;
4015
4016 res = stat(filename, &buf);
4017 if (res < 0) return(NULL);
4018
4019#ifdef HAVE_ZLIB_H
Daniel Veillard27271681998-10-30 06:39:40 +00004020 len = (buf.st_size * 8) + 1000;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004021retry_bigger:
Daniel Veillard27271681998-10-30 06:39:40 +00004022 buffer = malloc(len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004023#else
Daniel Veillard27271681998-10-30 06:39:40 +00004024 len = buf.st_size + 100;
4025 buffer = malloc(len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004026#endif
4027 if (buffer == NULL) {
4028 perror("malloc");
4029 return(NULL);
4030 }
4031
Daniel Veillard27271681998-10-30 06:39:40 +00004032 memset(buffer, 0, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004033#ifdef HAVE_ZLIB_H
4034 input = gzopen (filename, "r");
4035 if (input == NULL) {
4036 fprintf (stderr, "Cannot read file %s :\n", filename);
4037 perror ("gzopen failed");
4038 return(NULL);
4039 }
4040#else
Daniel Veillard64068b31999-03-24 20:42:16 +00004041#ifdef WIN32
4042 input = _open (filename, O_RDONLY | _O_BINARY);
4043#else
Daniel Veillard260a68f1998-08-13 03:39:55 +00004044 input = open (filename, O_RDONLY);
Daniel Veillard64068b31999-03-24 20:42:16 +00004045#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +00004046 if (input < 0) {
4047 fprintf (stderr, "Cannot read file %s :\n", filename);
4048 perror ("open failed");
4049 return(NULL);
4050 }
4051#endif
4052#ifdef HAVE_ZLIB_H
Daniel Veillard27271681998-10-30 06:39:40 +00004053 res = gzread(input, buffer, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004054#else
4055 res = read(input, buffer, buf.st_size);
4056#endif
4057 if (res < 0) {
4058 fprintf (stderr, "Cannot read file %s :\n", filename);
4059#ifdef HAVE_ZLIB_H
4060 perror ("gzread failed");
4061#else
4062 perror ("read failed");
4063#endif
4064 return(NULL);
4065 }
4066#ifdef HAVE_ZLIB_H
4067 gzclose(input);
Daniel Veillard27271681998-10-30 06:39:40 +00004068 if (res >= len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004069 free(buffer);
Daniel Veillard27271681998-10-30 06:39:40 +00004070 len *= 2;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004071 goto retry_bigger;
4072 }
4073 buf.st_size = res;
4074#else
4075 close(input);
4076#endif
4077
4078 buffer[buf.st_size] = '\0';
4079
4080 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4081 if (ctxt == NULL) {
4082 perror("malloc");
4083 return(NULL);
4084 }
4085 xmlInitParserCtxt(ctxt);
4086 inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4087 if (inputStream == NULL) {
4088 perror("malloc");
4089 free(ctxt);
4090 return(NULL);
4091 }
4092
4093 inputStream->filename = strdup(filename);
4094 inputStream->line = 1;
4095 inputStream->col = 1;
4096
4097 /*
4098 * TODO : plug some encoding conversion routines here. !!!
4099 */
4100 inputStream->base = buffer;
4101 inputStream->cur = buffer;
Daniel Veillardd692aa41999-02-28 21:54:31 +00004102 inputStream->free = (xmlParserInputDeallocate) free;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004103
4104 inputPush(ctxt, inputStream);
Daniel Veillardd692aa41999-02-28 21:54:31 +00004105 return(ctxt);
4106}
4107
4108/**
4109 * xmlSAXParseFile :
4110 * @sax: the SAX handler block
4111 * @filename: the filename
4112 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4113 * documents
4114 *
4115 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4116 * compressed document is provided by default if found at compile-time.
4117 * It use the given SAX function block to handle the parsing callback.
4118 * If sax is NULL, fallback to the default DOM tree building routines.
4119 *
4120 * Returns the resulting document tree
4121 */
4122
4123xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
4124 int recovery) {
4125 xmlDocPtr ret;
4126 xmlParserCtxtPtr ctxt;
4127
4128 ctxt = xmlCreateFileParserCtxt(filename);
4129 if (ctxt == NULL) return(NULL);
4130 if (sax != NULL) ctxt->sax = sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004131
4132 xmlParseDocument(ctxt);
4133
Daniel Veillard517752b1999-04-05 12:20:10 +00004134 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004135 else {
4136 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00004137 xmlFreeDoc(ctxt->myDoc);
4138 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004139 }
Daniel Veillardd692aa41999-02-28 21:54:31 +00004140 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004141
4142 return(ret);
4143}
4144
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004145/**
4146 * xmlParseFile :
4147 * @filename: the filename
4148 *
4149 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4150 * compressed document is provided by default if found at compile-time.
4151 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004152 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004153 */
4154
4155xmlDocPtr xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004156 return(xmlSAXParseFile(NULL, filename, 0));
4157}
4158
4159/**
4160 * xmlRecoverFile :
4161 * @filename: the filename
4162 *
4163 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4164 * compressed document is provided by default if found at compile-time.
4165 * In the case the document is not Well Formed, a tree is built anyway
4166 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004167 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004168 */
4169
4170xmlDocPtr xmlRecoverFile(const char *filename) {
4171 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004172}
Daniel Veillard260a68f1998-08-13 03:39:55 +00004173
Daniel Veillard11e00581998-10-24 18:27:49 +00004174/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00004175 * xmlCreateMemoryParserCtxt :
Daniel Veillard1e346af1999-02-22 10:33:01 +00004176 * @buffer: an pointer to a char array
Daniel Veillard11e00581998-10-24 18:27:49 +00004177 * @size: the siwe of the array
4178 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00004179 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +00004180 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00004181 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004182 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00004183xmlParserCtxtPtr
4184xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004185 xmlParserCtxtPtr ctxt;
4186 xmlParserInputPtr input;
4187
4188 buffer[size - 1] = '\0';
4189
4190 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4191 if (ctxt == NULL) {
4192 perror("malloc");
4193 return(NULL);
4194 }
4195 xmlInitParserCtxt(ctxt);
4196 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4197 if (input == NULL) {
4198 perror("malloc");
Daniel Veillardccb09631998-10-27 06:21:04 +00004199 free(ctxt->nodeTab);
4200 free(ctxt->inputTab);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004201 free(ctxt);
4202 return(NULL);
4203 }
4204
4205 input->filename = NULL;
4206 input->line = 1;
4207 input->col = 1;
4208
4209 /*
4210 * TODO : plug some encoding conversion routines here. !!!
4211 */
4212 input->base = buffer;
4213 input->cur = buffer;
Daniel Veillardd692aa41999-02-28 21:54:31 +00004214 input->free = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004215
4216 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00004217 return(ctxt);
4218}
4219
4220/**
4221 * xmlSAXParseMemory :
4222 * @sax: the SAX handler block
4223 * @buffer: an pointer to a char array
4224 * @size: the siwe of the array
4225 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4226 * documents
4227 *
4228 * parse an XML in-memory block and use the given SAX function block
4229 * to handle the parsing callback. If sax is NULL, fallback to the default
4230 * DOM tree building routines.
4231 *
4232 * TODO : plug some encoding conversion routines here. !!!
4233 *
4234 * Returns the resulting document tree
4235 */
4236xmlDocPtr
4237xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
4238 xmlDocPtr ret;
4239 xmlParserCtxtPtr ctxt;
4240
4241 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
4242 if (ctxt == NULL) return(NULL);
4243 if (sax != NULL) ctxt->sax = sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004244
4245 xmlParseDocument(ctxt);
4246
Daniel Veillard517752b1999-04-05 12:20:10 +00004247 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004248 else {
4249 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00004250 xmlFreeDoc(ctxt->myDoc);
4251 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004252 }
Daniel Veillardd692aa41999-02-28 21:54:31 +00004253 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004254
4255 return(ret);
4256}
4257
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004258/**
4259 * xmlParseMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00004260 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004261 * @size: the size of the array
4262 *
4263 * parse an XML in-memory block and build a tree.
4264 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004265 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004266 */
4267
4268xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004269 return(xmlSAXParseMemory(NULL, buffer, size, 0));
4270}
4271
4272/**
4273 * xmlRecoverMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00004274 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004275 * @size: the size of the array
4276 *
4277 * parse an XML in-memory block and build a tree.
4278 * In the case the document is not Well Formed, a tree is built anyway
4279 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004280 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004281 */
4282
4283xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
4284 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004285}
Daniel Veillard260a68f1998-08-13 03:39:55 +00004286
Daniel Veillard11e00581998-10-24 18:27:49 +00004287/**
4288 * xmlInitParserCtxt:
4289 * @ctxt: an XML parser context
4290 *
4291 * Initialize a parser context
4292 */
4293
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004294void
4295xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004296{
Daniel Veillardd692aa41999-02-28 21:54:31 +00004297 /* Allocate the Input stack */
4298 ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
4299 ctxt->inputNr = 0;
4300 ctxt->inputMax = 5;
4301 ctxt->input = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00004302 ctxt->version = NULL;
4303 ctxt->encoding = NULL;
4304 ctxt->standalone = -1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004305
Daniel Veillardd692aa41999-02-28 21:54:31 +00004306 /* Allocate the Node stack */
4307 ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
4308 ctxt->nodeNr = 0;
4309 ctxt->nodeMax = 10;
4310 ctxt->node = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004311
Daniel Veillardd692aa41999-02-28 21:54:31 +00004312 ctxt->sax = &xmlDefaultSAXHandler;
Daniel Veillard517752b1999-04-05 12:20:10 +00004313 ctxt->myDoc = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00004314 ctxt->wellFormed = 1;
4315 ctxt->record_info = 0;
4316 xmlInitNodeInfoSeq(&ctxt->node_seq);
4317}
4318
4319/**
4320 * xmlFreeParserCtxt:
4321 * @ctxt: an XML parser context
4322 *
4323 * Free all the memory used by a parser context. However the parsed
Daniel Veillard517752b1999-04-05 12:20:10 +00004324 * document in ctxt->myDoc is not freed.
Daniel Veillardd692aa41999-02-28 21:54:31 +00004325 */
4326
4327void
4328xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
4329{
4330 xmlParserInputPtr input;
4331
4332 if (ctxt == NULL) return;
4333
4334 while ((input = inputPop(ctxt)) != NULL) {
4335 xmlFreeInputStream(input);
4336 }
4337
4338 if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
4339 if (ctxt->inputTab != NULL) free(ctxt->inputTab);
Daniel Veillard5099ae81999-04-21 20:12:07 +00004340 if (ctxt->version != NULL) free((char *) ctxt->version);
Daniel Veillardd692aa41999-02-28 21:54:31 +00004341 free(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004342}
4343
Daniel Veillard11e00581998-10-24 18:27:49 +00004344/**
4345 * xmlClearParserCtxt:
4346 * @ctxt: an XML parser context
4347 *
4348 * Clear (release owned resources) and reinitialize a parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00004349 */
Daniel Veillard11e00581998-10-24 18:27:49 +00004350
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004351void
4352xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004353{
4354 xmlClearNodeInfoSeq(&ctxt->node_seq);
4355 xmlInitParserCtxt(ctxt);
4356}
4357
4358
Daniel Veillard11e00581998-10-24 18:27:49 +00004359/**
4360 * xmlSetupParserForBuffer:
4361 * @ctxt: an XML parser context
4362 * @buffer: a CHAR * buffer
4363 * @filename: a file name
4364 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004365 * Setup the parser context to parse a new buffer; Clears any prior
4366 * contents from the parser context. The buffer parameter must not be
4367 * NULL, but the filename parameter can be
4368 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004369void
4370xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004371 const char* filename)
4372{
4373 xmlParserInputPtr input;
4374
4375 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4376 if (input == NULL) {
4377 perror("malloc");
4378 free(ctxt);
4379 exit(1);
4380 }
4381
4382 xmlClearParserCtxt(ctxt);
4383 if (input->filename != NULL)
4384 input->filename = strdup(filename);
4385 else
4386 input->filename = NULL;
4387 input->line = 1;
4388 input->col = 1;
4389 input->base = buffer;
4390 input->cur = buffer;
4391
4392 inputPush(ctxt, input);
4393}
4394
4395
Daniel Veillard11e00581998-10-24 18:27:49 +00004396/**
4397 * xmlParserFindNodeInfo:
4398 * @ctxt: an XML parser context
4399 * @node: an XML node within the tree
4400 *
4401 * Find the parser node info struct for a given node
4402 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004403 * Returns an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004404 */
4405const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
4406 const xmlNode* node)
4407{
4408 unsigned long pos;
4409
4410 /* Find position where node should be at */
4411 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
4412 if ( ctx->node_seq.buffer[pos].node == node )
4413 return &ctx->node_seq.buffer[pos];
4414 else
4415 return NULL;
4416}
4417
4418
Daniel Veillard11e00581998-10-24 18:27:49 +00004419/**
4420 * xmlInitNodeInfoSeq :
4421 * @seq: a node info sequence pointer
4422 *
4423 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00004424 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004425void
4426xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004427{
4428 seq->length = 0;
4429 seq->maximum = 0;
4430 seq->buffer = NULL;
4431}
4432
Daniel Veillard11e00581998-10-24 18:27:49 +00004433/**
4434 * xmlClearNodeInfoSeq :
4435 * @seq: a node info sequence pointer
4436 *
4437 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +00004438 * info sequence
4439 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004440void
4441xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004442{
4443 if ( seq->buffer != NULL )
4444 free(seq->buffer);
4445 xmlInitNodeInfoSeq(seq);
4446}
4447
4448
Daniel Veillard11e00581998-10-24 18:27:49 +00004449/**
4450 * xmlParserFindNodeInfoIndex:
4451 * @seq: a node info sequence pointer
4452 * @node: an XML node pointer
4453 *
4454 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004455 * xmlParserFindNodeInfoIndex : Find the index that the info record for
4456 * the given node is or should be at in a sorted sequence
Daniel Veillard1164e751999-02-16 16:29:17 +00004457 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004458 * Returns a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +00004459 */
4460unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
4461 const xmlNode* node)
4462{
4463 unsigned long upper, lower, middle;
4464 int found = 0;
4465
4466 /* Do a binary search for the key */
4467 lower = 1;
4468 upper = seq->length;
4469 middle = 0;
4470 while ( lower <= upper && !found) {
4471 middle = lower + (upper - lower) / 2;
4472 if ( node == seq->buffer[middle - 1].node )
4473 found = 1;
4474 else if ( node < seq->buffer[middle - 1].node )
4475 upper = middle - 1;
4476 else
4477 lower = middle + 1;
4478 }
4479
4480 /* Return position */
4481 if ( middle == 0 || seq->buffer[middle - 1].node < node )
4482 return middle;
4483 else
4484 return middle - 1;
4485}
4486
4487
Daniel Veillard11e00581998-10-24 18:27:49 +00004488/**
4489 * xmlParserAddNodeInfo:
4490 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00004491 * @info: a node info sequence pointer
Daniel Veillard11e00581998-10-24 18:27:49 +00004492 *
4493 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00004494 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004495void
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004496xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004497 const xmlParserNodeInfo* info)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004498{
4499 unsigned long pos;
4500 static unsigned int block_size = 5;
4501
4502 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004503 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
4504 if ( pos < ctxt->node_seq.length
4505 && ctxt->node_seq.buffer[pos].node == info->node ) {
4506 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004507 }
4508
4509 /* Otherwise, we need to add new node to buffer */
4510 else {
4511 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004512 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004513 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004514 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
4515 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +00004516
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004517 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard260a68f1998-08-13 03:39:55 +00004518 tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
4519 else
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004520 tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004521
4522 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00004524 ctxt->sax->error(ctxt, "Out of memory\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00004525 return;
4526 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004527 ctxt->node_seq.buffer = tmp_buffer;
4528 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004529 }
4530
4531 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004532 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004533 unsigned long i;
4534
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004535 for ( i = ctxt->node_seq.length; i > pos; i-- )
4536 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +00004537 }
4538
4539 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004540 ctxt->node_seq.buffer[pos] = *info;
4541 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004542 }
4543}