blob: 03cfa4e5fd243556e7a7c93010bb130076fde661 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
13#include <config.h>
14#endif
15#include <stdio.h>
16#include <ctype.h>
17#include <string.h> /* for memset() only */
Seth Alvese7f12e61998-10-01 20:51:15 +000018#include <stdlib.h>
Daniel Veillard260a68f1998-08-13 03:39:55 +000019#include <sys/stat.h>
20#ifdef HAVE_FCNTL_H
21#include <fcntl.h>
22#endif
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_ZLIB_H
27#include <zlib.h>
28#endif
29
30#include "tree.h"
31#include "parser.h"
32#include "entities.h"
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000033#include "valid.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000034
35/************************************************************************
36 * *
37 * Parser stacks related functions and macros *
38 * *
39 ************************************************************************/
40/*
41 * Generic function for accessing stacks in the Parser Context
42 */
43
44#define PUSH_AND_POP(type, name) \
45int name##Push(xmlParserCtxtPtr ctxt, type value) { \
46 if (ctxt->name##Nr >= ctxt->name##Max) { \
47 ctxt->name##Max *= 2; \
48 ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
49 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
50 if (ctxt->name##Tab == NULL) { \
51 fprintf(stderr, "realloc failed !\n"); \
52 exit(1); \
53 } \
54 } \
55 ctxt->name##Tab[ctxt->name##Nr] = value; \
56 ctxt->name = value; \
57 return(ctxt->name##Nr++); \
58} \
59type name##Pop(xmlParserCtxtPtr ctxt) { \
60 if (ctxt->name##Nr <= 0) return(0); \
61 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +000062 if (ctxt->name##Nr > 0) \
63 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
64 else \
65 ctxt->name = NULL; \
Daniel Veillard260a68f1998-08-13 03:39:55 +000066 return(ctxt->name); \
67} \
68
69PUSH_AND_POP(xmlParserInputPtr, input)
70PUSH_AND_POP(xmlNodePtr, node)
71
Daniel Veillard0ba4d531998-11-01 19:34:31 +000072/*
73 * Macros for accessing the content. Those should be used only by the parser,
74 * and not exported.
75 *
76 * Dirty macros, i.e. one need to make assumption on the context to use them
77 *
78 * CUR_PTR return the current pointer to the CHAR to be parsed.
79 * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
80 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
81 * in UNICODE mode. This should be used internally by the parser
82 * only to compare to ASCII values otherwise it would break when
83 * running with UTF-8 encoding.
84 * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
85 * to compare on ASCII based substring.
86 * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
87 * strings within the parser.
88 *
89 * Clean macros, not dependent of an ASCII context.
90 *
91 * CURRENT Returns the current char value, with the full decoding of
92 * UTF-8 if we are using this mode. It returns an int.
93 * NEXT Skip to the next character, this does the proper decoding
94 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
95 * It returns the pointer to the current CHAR.
96 */
Daniel Veillard260a68f1998-08-13 03:39:55 +000097
98#define CUR (*ctxt->input->cur)
Daniel Veillard0ba4d531998-11-01 19:34:31 +000099#define SKIP(val) ctxt->input->cur += (val)
100#define NXT(val) ctxt->input->cur[(val)]
101#define CUR_PTR ctxt->input->cur
102
103#define SKIP_BLANKS \
104 while (IS_BLANK(*(ctxt->input->cur))) NEXT
105
106#ifndef USE_UTF_8
107#define CURRENT (*ctxt->input->cur)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000108#define NEXT ((*ctxt->input->cur) ? \
109 (((*(ctxt->input->cur) == '\n') ? \
110 (ctxt->input->line++, ctxt->input->col = 1) : \
111 (ctxt->input->col++)), ctxt->input->cur++) : \
112 (xmlPopInput(ctxt), ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000113#else
114#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +0000115
116
Daniel Veillard11e00581998-10-24 18:27:49 +0000117/**
118 * xmlPopInput:
119 * @ctxt: an XML parser context
120 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000121 * xmlPopInput: the current input pointed by ctxt->input came to an end
122 * pop it and return the next char.
123 *
124 * TODO A deallocation of the popped Input structure is needed
Daniel Veillard11e00581998-10-24 18:27:49 +0000125 * return values: the current CHAR in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000126 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000127CHAR
128xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000129 if (ctxt->inputNr == 1) return(0); /* End of main Input */
130 inputPop(ctxt);
131 return(CUR);
132}
133
Daniel Veillard11e00581998-10-24 18:27:49 +0000134/**
135 * xmlPushInput:
136 * @ctxt: an XML parser context
137 * @input: an XML parser input fragment (entity, XML fragment ...).
138 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000139 * xmlPushInput: switch to a new input stream which is stacked on top
140 * of the previous one(s).
141 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000142void
143xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000144 if (input == NULL) return;
145 inputPush(ctxt, input);
146}
147
Daniel Veillard11e00581998-10-24 18:27:49 +0000148/**
149 * xmlNewEntityInputStream:
150 * @ctxt: an XML parser context
151 * @entity: an Entity pointer
152 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000153 * Create a new input stream based on a memory buffer.
Daniel Veillardccb09631998-10-27 06:21:04 +0000154 * return vakues: the new input stream
Daniel Veillard260a68f1998-08-13 03:39:55 +0000155 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000156xmlParserInputPtr
157xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000158 xmlParserInputPtr input;
159
160 if (entity == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
162 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000163 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000164 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000165 }
166 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
168 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000169 "internal: xmlNewEntityInputStream entity->input = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000170 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000171 }
172 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
173 if (input == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
175 ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000176 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000177 }
178 input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
179 input->base = entity->content;
180 input->cur = entity->content;
181 input->line = 1;
182 input->col = 1;
Daniel Veillardccb09631998-10-27 06:21:04 +0000183 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000184}
185
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000186/**
187 * xmlNewStringInputStream:
188 * @ctxt: an XML parser context
189 * @entity: an Entity pointer
190 *
191 * Create a new input stream based on a memory buffer.
192 * return vakues: the new input stream
193 */
194xmlParserInputPtr
195xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *string) {
196 xmlParserInputPtr input;
197
198 if (string == NULL) {
199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
200 ctxt->sax->error(ctxt,
201 "internal: xmlNewStringInputStream string = NULL\n");
202 return(NULL);
203 }
204 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
205 if (input == NULL) {
206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
207 ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
208 return(NULL);
209 }
210 input->filename = NULL;
211 input->base = string;
212 input->cur = string;
213 input->line = 1;
214 input->col = 1;
215 return(input);
216}
217
Daniel Veillard260a68f1998-08-13 03:39:55 +0000218/*
219 * A few macros needed to help building the parser.
220 */
221
222#ifdef UNICODE
223/************************************************************************
224 * *
225 * UNICODE version of the macros. *
226 * *
227 ************************************************************************/
228/*
229 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
230 * | [#x10000-#x10FFFF]
231 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
232 */
233#define IS_CHAR(c) \
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000234 ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
235 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
236 (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
237 ((c) <= 0x10FFFF))
Daniel Veillard260a68f1998-08-13 03:39:55 +0000238
239/*
240 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
241 */
242#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
243 ((c) == 0x0D))
244
245/*
246 * [85] BaseChar ::= ... long list see REC ...
247 *
248 * VI is your friend !
249 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
250 * and
251 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
252 */
253#define IS_BASECHAR(c) \
254 ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
255 (((c) >= 0x0061) && ((c) <= 0x007A)) || \
256 (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
257 (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
258 (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
259 (((c) >= 0x0100) && ((c) <= 0x0131)) || \
260 (((c) >= 0x0134) && ((c) <= 0x013E)) || \
261 (((c) >= 0x0141) && ((c) <= 0x0148)) || \
262 (((c) >= 0x014A) && ((c) <= 0x017E)) || \
263 (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
264 (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
265 (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
266 (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
267 (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
268 (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
269 ((c) == 0x0386) || \
270 (((c) >= 0x0388) && ((c) <= 0x038A)) || \
271 ((c) == 0x038C) || \
272 (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
273 (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
274 (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
275 ((c) == 0x03DA) || \
276 ((c) == 0x03DC) || \
277 ((c) == 0x03DE) || \
278 ((c) == 0x03E0) || \
279 (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
280 (((c) >= 0x0401) && ((c) <= 0x040C)) || \
281 (((c) >= 0x040E) && ((c) <= 0x044F)) || \
282 (((c) >= 0x0451) && ((c) <= 0x045C)) || \
283 (((c) >= 0x045E) && ((c) <= 0x0481)) || \
284 (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
285 (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
286 (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
287 (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
288 (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
289 (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
290 (((c) >= 0x0531) && ((c) <= 0x0556)) || \
291 ((c) == 0x0559) || \
292 (((c) >= 0x0561) && ((c) <= 0x0586)) || \
293 (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
294 (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
295 (((c) >= 0x0621) && ((c) <= 0x063A)) || \
296 (((c) >= 0x0641) && ((c) <= 0x064A)) || \
297 (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
298 (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
299 (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
300 (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
301 ((c) == 0x06D5) || \
302 (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
303 (((c) >= 0x0905) && ((c) <= 0x0939)) || \
304 ((c) == 0x093D) || \
305 (((c) >= 0x0958) && ((c) <= 0x0961)) || \
306 (((c) >= 0x0985) && ((c) <= 0x098C)) || \
307 (((c) >= 0x098F) && ((c) <= 0x0990)) || \
308 (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
309 (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
310 ((c) == 0x09B2) || \
311 (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
312 (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
313 (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
314 (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
315 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
316 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
317 (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
318 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
319 (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
320 (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
321 (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
322 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
323 ((c) == 0x0A5E) || \
324 (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
325 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
326 ((c) == 0x0A8D) || \
327 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
328 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
329 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
330 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
331 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
332 ((c) == 0x0ABD) || \
333 ((c) == 0x0AE0) || \
334 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
335 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
336 (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
337 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
338 (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
339 (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
340 ((c) == 0x0B3D) || \
341 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
342 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
343 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
344 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
345 (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
346 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
347 ((c) == 0x0B9C) || \
348 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
349 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
350 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
351 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
352 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
353 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
354 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
355 (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
356 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
357 (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
358 (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
359 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
360 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
361 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
362 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
363 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
364 ((c) == 0x0CDE) || \
365 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
366 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
367 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
368 (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
369 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
370 (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
371 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
372 ((c) == 0x0E30) || \
373 (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
374 (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
375 (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
376 ((c) == 0x0E84) || \
377 (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
378 ((c) == 0x0E8A) || \
379 ((c) == 0x0E8D) || \
380 (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
381 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
382 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
383 ((c) == 0x0EA5) || \
384 ((c) == 0x0EA7) || \
385 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
386 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
387 ((c) == 0x0EB0) || \
388 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
389 ((c) == 0x0EBD) || \
390 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
391 (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
392 (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
393 (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
394 (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
395 ((c) == 0x1100) || \
396 (((c) >= 0x1102) && ((c) <= 0x1103)) || \
397 (((c) >= 0x1105) && ((c) <= 0x1107)) || \
398 ((c) == 0x1109) || \
399 (((c) >= 0x110B) && ((c) <= 0x110C)) || \
400 (((c) >= 0x110E) && ((c) <= 0x1112)) || \
401 ((c) == 0x113C) || \
402 ((c) == 0x113E) || \
403 ((c) == 0x1140) || \
404 ((c) == 0x114C) || \
405 ((c) == 0x114E) || \
406 ((c) == 0x1150) || \
407 (((c) >= 0x1154) && ((c) <= 0x1155)) || \
408 ((c) == 0x1159) || \
409 (((c) >= 0x115F) && ((c) <= 0x1161)) || \
410 ((c) == 0x1163) || \
411 ((c) == 0x1165) || \
412 ((c) == 0x1167) || \
413 ((c) == 0x1169) || \
414 (((c) >= 0x116D) && ((c) <= 0x116E)) || \
415 (((c) >= 0x1172) && ((c) <= 0x1173)) || \
416 ((c) == 0x1175) || \
417 ((c) == 0x119E) || \
418 ((c) == 0x11A8) || \
419 ((c) == 0x11AB) || \
420 (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
421 (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
422 ((c) == 0x11BA) || \
423 (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
424 ((c) == 0x11EB) || \
425 ((c) == 0x11F0) || \
426 ((c) == 0x11F9) || \
427 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
428 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
429 (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
430 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
431 (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
432 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
433 (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
434 ((c) == 0x1F59) || \
435 ((c) == 0x1F5B) || \
436 ((c) == 0x1F5D) || \
437 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
438 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
439 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
440 ((c) == 0x1FBE) || \
441 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
442 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
443 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
444 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
445 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
446 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
447 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
448 ((c) == 0x2126) || \
449 (((c) >= 0x212A) && ((c) <= 0x212B)) || \
450 ((c) == 0x212E) || \
451 (((c) >= 0x2180) && ((c) <= 0x2182)) || \
452 (((c) >= 0x3041) && ((c) <= 0x3094)) || \
453 (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
454 (((c) >= 0x3105) && ((c) <= 0x312C)) || \
455 (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
456
457/*
458 * [88] Digit ::= ... long list see REC ...
459 */
460#define IS_DIGIT(c) \
461 ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
462 (((c) >= 0x0660) && ((c) <= 0x0669)) || \
463 (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
464 (((c) >= 0x0966) && ((c) <= 0x096F)) || \
465 (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
466 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
467 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
468 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
469 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
470 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
471 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
472 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
473 (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
474 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
475 (((c) >= 0x0F20) && ((c) <= 0x0F29)))
476
477/*
478 * [87] CombiningChar ::= ... long list see REC ...
479 */
480#define IS_COMBINING(c) \
481 ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
482 (((c) >= 0x0360) && ((c) <= 0x0361)) || \
483 (((c) >= 0x0483) && ((c) <= 0x0486)) || \
484 (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
485 (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
486 (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
487 ((c) == 0x05BF) || \
488 (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
489 ((c) == 0x05C4) || \
490 (((c) >= 0x064B) && ((c) <= 0x0652)) || \
491 ((c) == 0x0670) || \
492 (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
493 (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
494 (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
495 (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
496 (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
497 (((c) >= 0x0901) && ((c) <= 0x0903)) || \
498 ((c) == 0x093C) || \
499 (((c) >= 0x093E) && ((c) <= 0x094C)) || \
500 ((c) == 0x094D) || \
501 (((c) >= 0x0951) && ((c) <= 0x0954)) || \
502 (((c) >= 0x0962) && ((c) <= 0x0963)) || \
503 (((c) >= 0x0981) && ((c) <= 0x0983)) || \
504 ((c) == 0x09BC) || \
505 ((c) == 0x09BE) || \
506 ((c) == 0x09BF) || \
507 (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
508 (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
509 (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
510 ((c) == 0x09D7) || \
511 (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
512 ((c) == 0x0A02) || \
513 ((c) == 0x0A3C) || \
514 ((c) == 0x0A3E) || \
515 ((c) == 0x0A3F) || \
516 (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
517 (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
518 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
519 (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
520 (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
521 ((c) == 0x0ABC) || \
522 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
523 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
524 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
525 (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
526 ((c) == 0x0B3C) || \
527 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
528 (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
529 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
530 (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
531 (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
532 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
533 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
534 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
535 ((c) == 0x0BD7) || \
536 (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
537 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
538 (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
539 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
540 (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
541 (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
542 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
543 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
544 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
545 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
546 (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
547 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
548 (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
549 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
550 ((c) == 0x0D57) || \
551 ((c) == 0x0E31) || \
552 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
553 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
554 ((c) == 0x0EB1) || \
555 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
556 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
557 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
558 (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
559 ((c) == 0x0F35) || \
560 ((c) == 0x0F37) || \
561 ((c) == 0x0F39) || \
562 ((c) == 0x0F3E) || \
563 ((c) == 0x0F3F) || \
564 (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
565 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
566 (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
567 ((c) == 0x0F97) || \
568 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
569 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
570 ((c) == 0x0FB9) || \
571 (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
572 ((c) == 0x20E1) || \
573 (((c) >= 0x302A) && ((c) <= 0x302F)) || \
574 ((c) == 0x3099) || \
575 ((c) == 0x309A))
576
577/*
578 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
579 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
580 * [#x309D-#x309E] | [#x30FC-#x30FE]
581 */
582#define IS_EXTENDER(c) \
583 (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
584 ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
585 ((c) == 0xec6) || ((c) == 0x3005) \
586 (((c) >= 0x3031) && ((c) <= 0x3035)) || \
587 (((c) >= 0x309b) && ((c) <= 0x309e)) || \
588 (((c) >= 0x30fc) && ((c) <= 0x30fe)))
589
590/*
591 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
592 */
593#define IS_IDEOGRAPHIC(c) \
594 ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
595 (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
596 (((c) >= 0x3021) && ((c) <= 0x3029)) || \
597 ((c) == 0x3007))
598
599/*
600 * [84] Letter ::= BaseChar | Ideographic
601 */
602#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
603
604#else
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000605#ifndef USE_UTF_8
Daniel Veillard260a68f1998-08-13 03:39:55 +0000606/************************************************************************
607 * *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000608 * 8bits / ISO-Latin version of the macros. *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000609 * *
610 ************************************************************************/
611/*
612 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
613 * | [#x10000-#x10FFFF]
614 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
615 */
616#define IS_CHAR(c) \
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000617 ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
618 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
619 (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
Daniel Veillard260a68f1998-08-13 03:39:55 +0000620
621/*
622 * [85] BaseChar ::= ... long list see REC ...
623 */
624#define IS_BASECHAR(c) \
625 ((((c) >= 0x41) && ((c) <= 0x5a)) || \
626 (((c) >= 0x61) && ((c) <= 0x7a)) || \
627 (((c) >= 0xaa) && ((c) <= 0x5b)) || \
628 (((c) >= 0xc0) && ((c) <= 0xd6)) || \
629 (((c) >= 0xd8) && ((c) <= 0xf6)) || \
630 (((c) >= 0xf8) && ((c) <= 0xff)) || \
631 ((c) == 0xba))
632
633/*
634 * [88] Digit ::= ... long list see REC ...
635 */
636#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
637
638/*
639 * [84] Letter ::= BaseChar | Ideographic
640 */
641#define IS_LETTER(c) IS_BASECHAR(c)
642
643
644/*
645 * [87] CombiningChar ::= ... long list see REC ...
646 */
647#define IS_COMBINING(c) 0
648
649/*
650 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
651 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
652 * [#x309D-#x309E] | [#x30FC-#x30FE]
653 */
654#define IS_EXTENDER(c) ((c) == 0xb7)
655
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000656#else /* USE_UTF_8 */
657/************************************************************************
658 * *
659 * 8bits / UTF-8 version of the macros. *
660 * *
661 ************************************************************************/
662
663TODO !!!
664#endif /* USE_UTF_8 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000665#endif /* !UNICODE */
666
667/*
668 * Blank chars.
669 *
670 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
671 */
672#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
673 ((c) == 0x0D))
674
675/*
676 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
677 */
678#define IS_PUBIDCHAR(c) \
679 (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
680 (((c) >= 'a') && ((c) <= 'z')) || \
681 (((c) >= 'A') && ((c) <= 'Z')) || \
682 (((c) >= '0') && ((c) <= '9')) || \
683 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
684 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
685 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
686 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
687 ((c) == '$') || ((c) == '_') || ((c) == '%'))
688
689#define SKIP_EOL(p) \
690 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
691 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
692
693#define MOVETO_ENDTAG(p) \
694 while (IS_CHAR(*p) && (*(p) != '>')) (p)++
695
696#define MOVETO_STARTTAG(p) \
697 while (IS_CHAR(*p) && (*(p) != '<')) (p)++
698
699/************************************************************************
700 * *
701 * Commodity functions to handle CHARs *
702 * *
703 ************************************************************************/
704
Daniel Veillard11e00581998-10-24 18:27:49 +0000705/**
706 * xmlStrndup:
707 * @cur: the input CHAR *
708 * @len: the len of @cur
709 *
710 * a strndup for array of CHAR's
711 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000712 */
713
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000714CHAR *
715xmlStrndup(const CHAR *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000716 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
717
718 if (ret == NULL) {
719 fprintf(stderr, "malloc of %d byte failed\n",
720 (len + 1) * sizeof(CHAR));
721 return(NULL);
722 }
723 memcpy(ret, cur, len * sizeof(CHAR));
724 ret[len] = 0;
725 return(ret);
726}
727
Daniel Veillard11e00581998-10-24 18:27:49 +0000728/**
729 * xmlStrdup:
730 * @cur: the input CHAR *
731 *
732 * a strdup for array of CHAR's
733 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000734 */
735
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000736CHAR *
737xmlStrdup(const CHAR *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000738 const CHAR *p = cur;
739
740 while (IS_CHAR(*p)) p++;
741 return(xmlStrndup(cur, p - cur));
742}
743
Daniel Veillard11e00581998-10-24 18:27:49 +0000744/**
745 * xmlCharStrndup:
746 * @cur: the input char *
747 * @len: the len of @cur
748 *
749 * a strndup for char's to CHAR's
750 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000751 */
752
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000753CHAR *
754xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000755 int i;
756 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
757
758 if (ret == NULL) {
759 fprintf(stderr, "malloc of %d byte failed\n",
760 (len + 1) * sizeof(CHAR));
761 return(NULL);
762 }
763 for (i = 0;i < len;i++)
764 ret[i] = (CHAR) cur[i];
765 ret[len] = 0;
766 return(ret);
767}
768
Daniel Veillard11e00581998-10-24 18:27:49 +0000769/**
770 * xmlCharStrdup:
771 * @cur: the input char *
772 * @len: the len of @cur
773 *
774 * a strdup for char's to CHAR's
775 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000776 */
777
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000778CHAR *
779xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000780 const char *p = cur;
781
782 while (*p != '\0') p++;
783 return(xmlCharStrndup(cur, p - cur));
784}
785
Daniel Veillard11e00581998-10-24 18:27:49 +0000786/**
787 * xmlStrcmp:
788 * @str1: the first CHAR *
789 * @str2: the second CHAR *
790 *
791 * a strcmp for CHAR's
792 * return values: the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +0000793 */
794
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000795int
796xmlStrcmp(const CHAR *str1, const CHAR *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000797 register int tmp;
798
799 do {
800 tmp = *str1++ - *str2++;
801 if (tmp != 0) return(tmp);
802 } while ((*str1 != 0) && (*str2 != 0));
803 return (*str1 - *str2);
804}
805
Daniel Veillard11e00581998-10-24 18:27:49 +0000806/**
807 * xmlStrncmp:
808 * @str1: the first CHAR *
809 * @str2: the second CHAR *
810 * @len: the max comparison length
811 *
812 * a strncmp for CHAR's
813 * return values: the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +0000814 */
815
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000816int
817xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000818 register int tmp;
819
820 if (len <= 0) return(0);
821 do {
822 tmp = *str1++ - *str2++;
823 if (tmp != 0) return(tmp);
824 len--;
825 if (len <= 0) return(0);
826 } while ((*str1 != 0) && (*str2 != 0));
827 return (*str1 - *str2);
828}
829
Daniel Veillard11e00581998-10-24 18:27:49 +0000830/**
831 * xmlStrchr:
832 * @str: the CHAR * array
833 * @val: the CHAR to search
834 *
835 * a strchr for CHAR's
836 * return values: the CHAR * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000837 */
838
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000839CHAR *
840xmlStrchr(const CHAR *str, CHAR val) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000841 while (*str != 0) {
842 if (*str == val) return((CHAR *) str);
843 str++;
844 }
845 return(NULL);
846}
847
Daniel Veillard11e00581998-10-24 18:27:49 +0000848/**
849 * xmlStrlen:
850 * @str: the CHAR * array
851 *
852 * lenght of a CHAR's string
853 * return values: the number of CHAR contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000854 */
855
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000856int
857xmlStrlen(const CHAR *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000858 int len = 0;
859
860 if (str == NULL) return(0);
861 while (*str != 0) {
862 str++;
863 len++;
864 }
865 return(len);
866}
867
Daniel Veillard11e00581998-10-24 18:27:49 +0000868/**
869 * xmlStrncat:
870 * @first: the original CHAR * array
871 * @add: the CHAR * array added
872 * @len: the length of @add
873 *
874 * a strncat for array of CHAR's
875 * return values: a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000876 */
877
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000878CHAR *
879xmlStrncat(CHAR *cur, const CHAR *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000880 int size;
881 CHAR *ret;
882
883 if ((add == NULL) || (len == 0))
884 return(cur);
885 if (cur == NULL)
886 return(xmlStrndup(add, len));
887
888 size = xmlStrlen(cur);
889 ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
890 if (ret == NULL) {
891 fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
892 (size + len + 1) * sizeof(CHAR));
893 return(cur);
894 }
895 memcpy(&ret[size], add, len * sizeof(CHAR));
896 ret[size + len] = 0;
897 return(ret);
898}
899
Daniel Veillard11e00581998-10-24 18:27:49 +0000900/**
901 * xmlStrcat:
902 * @first: the original CHAR * array
903 * @add: the CHAR * array added
904 *
905 * a strcat for array of CHAR's
906 * return values: a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000907 */
908
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000909CHAR *
910xmlStrcat(CHAR *cur, const CHAR *add) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000911 const CHAR *p = add;
912
913 if (add == NULL) return(cur);
914 if (cur == NULL)
915 return(xmlStrdup(add));
916
917 while (IS_CHAR(*p)) p++;
918 return(xmlStrncat(cur, add, p - add));
919}
920
921/************************************************************************
922 * *
923 * Commodity functions, cleanup needed ? *
924 * *
925 ************************************************************************/
926
Daniel Veillard11e00581998-10-24 18:27:49 +0000927/**
928 * areBlanks:
929 * @ctxt: an XML parser context
930 * @str: a CHAR *
931 * @len: the size of @str
932 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000933 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +0000934 *
935 * TODO: to be corrected accodingly to DTD information if available
936 * return values: 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000937 */
938
939static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
940 int i;
941 xmlNodePtr lastChild;
942
943 for (i = 0;i < len;i++)
944 if (!(IS_BLANK(str[i]))) return(0);
945
946 if (CUR != '<') return(0);
947 lastChild = xmlGetLastChild(ctxt->node);
948 if (lastChild == NULL) {
949 if (ctxt->node->content != NULL) return(0);
950 } else if (xmlNodeIsText(lastChild))
951 return(0);
952 return(1);
953}
954
Daniel Veillard11e00581998-10-24 18:27:49 +0000955/**
956 * xmlHandleEntity:
957 * @ctxt: an XML parser context
958 * @entity: an XML entity pointer.
959 *
960 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +0000961 * stream ? When do we just handle that as a set of chars ?
Daniel Veillard11e00581998-10-24 18:27:49 +0000962 * TODO: we should call the SAX handler here and have it resolve the issue
Daniel Veillard260a68f1998-08-13 03:39:55 +0000963 */
964
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000965void
966xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000967 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +0000968 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000969
970 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
972 ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +0000973 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000974 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000975 return;
976 }
977 len = xmlStrlen(entity->content);
978 if (len <= 2) goto handle_as_char;
979
980 /*
981 * Redefine its content as an input stream.
982 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000983 input = xmlNewEntityInputStream(ctxt, entity);
984 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000985 return;
986
987handle_as_char:
988 /*
989 * Just handle the content as a set of chars.
990 */
991 if (ctxt->sax != NULL)
992 ctxt->sax->characters(ctxt, entity->content, 0, len);
993
994}
995
996/*
997 * Forward definition for recusive behaviour.
998 */
999xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00001000CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
1001CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001002
1003/************************************************************************
1004 * *
1005 * Extra stuff for namespace support *
1006 * Relates to http://www.w3.org/TR/WD-xml-names *
1007 * *
1008 ************************************************************************/
1009
Daniel Veillard11e00581998-10-24 18:27:49 +00001010/**
1011 * xmlNamespaceParseNCName:
1012 * @ctxt: an XML parser context
1013 *
1014 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001015 *
1016 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1017 *
1018 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1019 * CombiningChar | Extender
Daniel Veillard11e00581998-10-24 18:27:49 +00001020 * return values: the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001021 */
1022
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001023CHAR *
1024xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001025 const CHAR *q;
1026 CHAR *ret = NULL;
1027
1028 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1029 q = NEXT;
1030
1031 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1032 (CUR == '.') || (CUR == '-') ||
1033 (CUR == '_') ||
1034 (IS_COMBINING(CUR)) ||
1035 (IS_EXTENDER(CUR)))
1036 NEXT;
1037
1038 ret = xmlStrndup(q, CUR_PTR - q);
1039
1040 return(ret);
1041}
1042
Daniel Veillard11e00581998-10-24 18:27:49 +00001043/**
1044 * xmlNamespaceParseQName:
1045 * @ctxt: an XML parser context
1046 * @prefix: a CHAR **
1047 *
1048 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001049 *
1050 * [NS 5] QName ::= (Prefix ':')? LocalPart
1051 *
1052 * [NS 6] Prefix ::= NCName
1053 *
1054 * [NS 7] LocalPart ::= NCName
Daniel Veillard11e00581998-10-24 18:27:49 +00001055 * return values: the function returns the local part, and prefix is updated
1056 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001057 */
1058
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001059CHAR *
1060xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001061 CHAR *ret = NULL;
1062
1063 *prefix = NULL;
1064 ret = xmlNamespaceParseNCName(ctxt);
1065 if (CUR == ':') {
1066 *prefix = ret;
1067 NEXT;
1068 ret = xmlNamespaceParseNCName(ctxt);
1069 }
1070
1071 return(ret);
1072}
1073
Daniel Veillard11e00581998-10-24 18:27:49 +00001074/**
1075 * xmlNamespaceParseNSDef:
1076 * @ctxt: an XML parser context
1077 *
1078 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00001079 *
1080 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1081 *
1082 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard11e00581998-10-24 18:27:49 +00001083 * return values: the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001084 */
1085
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001086CHAR *
1087xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001088 CHAR *name = NULL;
1089
1090 if ((CUR == 'x') && (NXT(1) == 'm') &&
1091 (NXT(2) == 'l') && (NXT(3) == 'n') &&
1092 (NXT(4) == 's')) {
1093 SKIP(5);
1094 if (CUR == ':') {
1095 NEXT;
1096 name = xmlNamespaceParseNCName(ctxt);
1097 }
1098 }
1099 return(name);
1100}
1101
Daniel Veillard11e00581998-10-24 18:27:49 +00001102/**
1103 * xmlParseQuotedString:
1104 * @ctxt: an XML parser context
1105 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001106 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillard11e00581998-10-24 18:27:49 +00001107 * return values: the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001108 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001109CHAR *
1110xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001111 CHAR *ret = NULL;
1112 const CHAR *q;
1113
1114 if (CUR == '"') {
1115 NEXT;
1116 q = CUR_PTR;
1117 while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001118 if (CUR != '"') {
1119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00001120 ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001121 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001122 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001123 ret = xmlStrndup(q, CUR_PTR - q);
1124 NEXT;
1125 }
1126 } else if (CUR == '\''){
1127 NEXT;
1128 q = CUR_PTR;
1129 while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001130 if (CUR != '\'') {
1131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00001132 ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001133 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001134 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001135 ret = xmlStrndup(q, CUR_PTR - q);
1136 NEXT;
1137 }
1138 }
1139 return(ret);
1140}
1141
Daniel Veillard11e00581998-10-24 18:27:49 +00001142/**
1143 * xmlParseNamespace:
1144 * @ctxt: an XML parser context
1145 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001146 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1147 *
1148 * This is what the older xml-name Working Draft specified, a bunch of
1149 * other stuff may still rely on it, so support is still here as
1150 * if ot was declared on the root of the Tree:-(
1151 */
1152
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001153void
1154xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001155 CHAR *href = NULL;
1156 CHAR *prefix = NULL;
1157 int garbage = 0;
1158
1159 /*
1160 * We just skipped "namespace" or "xml:namespace"
1161 */
1162 SKIP_BLANKS;
1163
1164 while (IS_CHAR(CUR) && (CUR != '>')) {
1165 /*
1166 * We can have "ns" or "prefix" attributes
1167 * Old encoding as 'href' or 'AS' attributes is still supported
1168 */
1169 if ((CUR == 'n') && (NXT(1) == 's')) {
1170 garbage = 0;
1171 SKIP(2);
1172 SKIP_BLANKS;
1173
1174 if (CUR != '=') continue;
1175 NEXT;
1176 SKIP_BLANKS;
1177
1178 href = xmlParseQuotedString(ctxt);
1179 SKIP_BLANKS;
1180 } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1181 (NXT(2) == 'e') && (NXT(3) == 'f')) {
1182 garbage = 0;
1183 SKIP(4);
1184 SKIP_BLANKS;
1185
1186 if (CUR != '=') continue;
1187 NEXT;
1188 SKIP_BLANKS;
1189
1190 href = xmlParseQuotedString(ctxt);
1191 SKIP_BLANKS;
1192 } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1193 (NXT(2) == 'e') && (NXT(3) == 'f') &&
1194 (NXT(4) == 'i') && (NXT(5) == 'x')) {
1195 garbage = 0;
1196 SKIP(6);
1197 SKIP_BLANKS;
1198
1199 if (CUR != '=') continue;
1200 NEXT;
1201 SKIP_BLANKS;
1202
1203 prefix = xmlParseQuotedString(ctxt);
1204 SKIP_BLANKS;
1205 } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1206 garbage = 0;
1207 SKIP(2);
1208 SKIP_BLANKS;
1209
1210 if (CUR != '=') continue;
1211 NEXT;
1212 SKIP_BLANKS;
1213
1214 prefix = xmlParseQuotedString(ctxt);
1215 SKIP_BLANKS;
1216 } else if ((CUR == '?') && (NXT(1) == '>')) {
1217 garbage = 0;
1218 CUR_PTR ++;
1219 } else {
1220 /*
1221 * Found garbage when parsing the namespace
1222 */
1223 if (!garbage)
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1225 ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001226 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001227 NEXT;
1228 }
1229 }
1230
1231 MOVETO_ENDTAG(CUR_PTR);
1232 NEXT;
1233
1234 /*
1235 * Register the DTD.
1236 */
1237 if (href != NULL)
1238 xmlNewGlobalNs(ctxt->doc, href, prefix);
1239
1240 if (prefix != NULL) free(prefix);
1241 if (href != NULL) free(href);
1242}
1243
1244/************************************************************************
1245 * *
1246 * The parser itself *
1247 * Relates to http://www.w3.org/TR/REC-xml *
1248 * *
1249 ************************************************************************/
1250
Daniel Veillard11e00581998-10-24 18:27:49 +00001251/**
1252 * xmlParseName:
1253 * @ctxt: an XML parser context
1254 *
1255 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001256 *
1257 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1258 * CombiningChar | Extender
1259 *
1260 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1261 *
1262 * [6] Names ::= Name (S Name)*
Daniel Veillard11e00581998-10-24 18:27:49 +00001263 * return values: the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001264 */
1265
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001266CHAR *
1267xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001268 const CHAR *q;
1269 CHAR *ret = NULL;
1270
1271 if (!IS_LETTER(CUR) && (CUR != '_') &&
1272 (CUR != ':')) return(NULL);
1273 q = NEXT;
1274
1275 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1276 (CUR == '.') || (CUR == '-') ||
1277 (CUR == '_') || (CUR == ':') ||
1278 (IS_COMBINING(CUR)) ||
1279 (IS_EXTENDER(CUR)))
1280 NEXT;
1281
1282 ret = xmlStrndup(q, CUR_PTR - q);
1283
1284 return(ret);
1285}
1286
Daniel Veillard11e00581998-10-24 18:27:49 +00001287/**
1288 * xmlParseNmtoken:
1289 * @ctxt: an XML parser context
1290 *
1291 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001292 *
1293 * [7] Nmtoken ::= (NameChar)+
1294 *
1295 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard11e00581998-10-24 18:27:49 +00001296 * return values: the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001297 */
1298
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001299CHAR *
1300xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001301 const CHAR *q;
1302 CHAR *ret = NULL;
1303
1304 q = NEXT;
1305
1306 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1307 (CUR == '.') || (CUR == '-') ||
1308 (CUR == '_') || (CUR == ':') ||
1309 (IS_COMBINING(CUR)) ||
1310 (IS_EXTENDER(CUR)))
1311 NEXT;
1312
1313 ret = xmlStrndup(q, CUR_PTR - q);
1314
1315 return(ret);
1316}
1317
Daniel Veillard11e00581998-10-24 18:27:49 +00001318/**
1319 * xmlParseEntityValue:
1320 * @ctxt: an XML parser context
1321 *
1322 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001323 *
1324 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1325 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard11e00581998-10-24 18:27:49 +00001326 * return values: the EntityValue parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001327 */
1328
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001329CHAR *
1330xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001331 CHAR *ret = NULL, *cur;
1332 const CHAR *q;
1333
1334 if (CUR == '"') {
1335 NEXT;
1336
1337 q = CUR_PTR;
1338 while ((IS_CHAR(CUR)) && (CUR != '"')) {
1339 if (CUR == '%') {
1340 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001341 cur = xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001342 ret = xmlStrcat(ret, cur);
1343 q = CUR_PTR;
1344 } else if (CUR == '&') {
1345 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001346 cur = xmlParseReference(ctxt);
1347 if (cur != NULL) {
1348 CHAR buf[2];
1349 buf[0] = '&';
1350 buf[1] = 0;
1351 ret = xmlStrncat(ret, buf, 1);
1352 ret = xmlStrcat(ret, cur);
1353 buf[0] = ';';
1354 buf[1] = 0;
1355 ret = xmlStrncat(ret, buf, 1);
1356 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001357 q = CUR_PTR;
1358 } else
1359 NEXT;
1360 }
1361 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363 ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001364 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001365 } else {
1366 ret = xmlStrncat(ret, q, CUR_PTR - q);
1367 NEXT;
1368 }
1369 } else if (CUR == '\'') {
1370 NEXT;
1371 q = CUR_PTR;
1372 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1373 if (CUR == '%') {
1374 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001375 cur = xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001376 ret = xmlStrcat(ret, cur);
1377 q = CUR_PTR;
1378 } else if (CUR == '&') {
1379 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001380 cur = xmlParseReference(ctxt);
1381 if (cur != NULL) {
1382 CHAR buf[2];
1383 buf[0] = '&';
1384 buf[1] = 0;
1385 ret = xmlStrncat(ret, buf, 1);
1386 ret = xmlStrcat(ret, cur);
1387 buf[0] = ';';
1388 buf[1] = 0;
1389 ret = xmlStrncat(ret, buf, 1);
1390 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001391 q = CUR_PTR;
1392 } else
1393 NEXT;
1394 }
1395 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1397 ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001398 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001399 } else {
1400 ret = xmlStrncat(ret, q, CUR_PTR - q);
1401 NEXT;
1402 }
1403 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1405 ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001406 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001407 }
1408
1409 return(ret);
1410}
1411
Daniel Veillard11e00581998-10-24 18:27:49 +00001412/**
1413 * xmlParseAttValue:
1414 * @ctxt: an XML parser context
1415 *
1416 * parse a value for an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00001417 *
1418 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1419 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard11e00581998-10-24 18:27:49 +00001420 * return values: the AttValue parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001421 */
1422
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001423CHAR *
1424xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001425 CHAR *ret = NULL, *cur;
1426 const CHAR *q;
1427
1428 if (CUR == '"') {
1429 NEXT;
1430
1431 q = CUR_PTR;
1432 while ((IS_CHAR(CUR)) && (CUR != '"')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001433 if (CUR == '<') {
1434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1435 ctxt->sax->error(ctxt,
1436 "Unescaped '<' not allowed in attributes values\n");
1437 ctxt->wellFormed = 0;
1438 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001439 if (CUR == '&') {
1440 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001441 cur = xmlParseReference(ctxt);
1442 if (cur != NULL) {
1443 /*
1444 * Special case for '&amp;', we don't want to
1445 * resolve it here since it will break later
1446 * when searching entities in the string.
1447 */
1448 if ((cur[0] == '&') && (cur[1] == 0)) {
1449 CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1450 ret = xmlStrncat(ret, buf, 5);
1451 } else
1452 ret = xmlStrcat(ret, cur);
1453 free(cur);
1454 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001455 q = CUR_PTR;
1456 } else
1457 NEXT;
Daniel Veillardccb09631998-10-27 06:21:04 +00001458 /*
1459 * Pop out finished entity references.
1460 */
1461 while ((CUR == 0) && (ctxt->inputNr > 1)) {
1462 if (CUR_PTR != q)
1463 ret = xmlStrncat(ret, q, CUR_PTR - q);
1464 xmlPopInput(ctxt);
1465 q = CUR_PTR;
1466 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001467 }
1468 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1470 ctxt->sax->error(ctxt, "Unfinished AttValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001471 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001472 } else {
1473 ret = xmlStrncat(ret, q, CUR_PTR - q);
1474 NEXT;
1475 }
1476 } else if (CUR == '\'') {
1477 NEXT;
1478 q = CUR_PTR;
1479 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001480 if (CUR == '<') {
1481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1482 ctxt->sax->error(ctxt,
1483 "Unescaped '<' not allowed in attributes values\n");
1484 ctxt->wellFormed = 0;
1485 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001486 if (CUR == '&') {
1487 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001488 cur = xmlParseReference(ctxt);
1489 if (cur != NULL) {
1490 /*
1491 * Special case for '&amp;', we don't want to
1492 * resolve it here since it will break later
1493 * when searching entities in the string.
1494 */
1495 if ((cur[0] == '&') && (cur[1] == 0)) {
1496 CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1497 ret = xmlStrncat(ret, buf, 5);
1498 } else
1499 ret = xmlStrcat(ret, cur);
1500 free(cur);
1501 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001502 q = CUR_PTR;
1503 } else
1504 NEXT;
Daniel Veillardccb09631998-10-27 06:21:04 +00001505 /*
1506 * Pop out finished entity references.
1507 */
1508 while ((CUR == 0) && (ctxt->inputNr > 1)) {
1509 if (CUR_PTR != q)
1510 ret = xmlStrncat(ret, q, CUR_PTR - q);
1511 xmlPopInput(ctxt);
1512 q = CUR_PTR;
1513 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001514 }
1515 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1517 ctxt->sax->error(ctxt, "Unfinished AttValue\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001518 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001519 } else {
1520 ret = xmlStrncat(ret, q, CUR_PTR - q);
1521 NEXT;
1522 }
1523 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1525 ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001526 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001527 }
1528
1529 return(ret);
1530}
1531
Daniel Veillard11e00581998-10-24 18:27:49 +00001532/**
1533 * xmlParseSystemLiteral:
1534 * @ctxt: an XML parser context
1535 *
1536 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00001537 *
1538 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00001539 * return values: the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001540 */
1541
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001542CHAR *
1543xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001544 const CHAR *q;
1545 CHAR *ret = NULL;
1546
1547 if (CUR == '"') {
1548 NEXT;
1549 q = CUR_PTR;
1550 while ((IS_CHAR(CUR)) && (CUR != '"'))
1551 NEXT;
1552 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1554 ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001555 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001556 } else {
1557 ret = xmlStrndup(q, CUR_PTR - q);
1558 NEXT;
1559 }
1560 } else if (CUR == '\'') {
1561 NEXT;
1562 q = CUR_PTR;
1563 while ((IS_CHAR(CUR)) && (CUR != '\''))
1564 NEXT;
1565 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001566 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1567 ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001568 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001569 } else {
1570 ret = xmlStrndup(q, CUR_PTR - q);
1571 NEXT;
1572 }
1573 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575 ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001576 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001577 }
1578
1579 return(ret);
1580}
1581
Daniel Veillard11e00581998-10-24 18:27:49 +00001582/**
1583 * xmlParsePubidLiteral:
1584 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00001585 *
Daniel Veillard11e00581998-10-24 18:27:49 +00001586 * parse an XML public literal
1587 * return values: the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001588 */
1589
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001590CHAR *
1591xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001592 const CHAR *q;
1593 CHAR *ret = NULL;
1594 /*
1595 * Name ::= (Letter | '_') (NameChar)*
1596 */
1597 if (CUR == '"') {
1598 NEXT;
1599 q = CUR_PTR;
1600 while (IS_PUBIDCHAR(CUR)) NEXT;
1601 if (CUR != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001604 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001605 } else {
1606 ret = xmlStrndup(q, CUR_PTR - q);
1607 NEXT;
1608 }
1609 } else if (CUR == '\'') {
1610 NEXT;
1611 q = CUR_PTR;
1612 while ((IS_LETTER(CUR)) && (CUR != '\''))
1613 NEXT;
1614 if (!IS_LETTER(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616 ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001617 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001618 } else {
1619 ret = xmlStrndup(q, CUR_PTR - q);
1620 NEXT;
1621 }
1622 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624 ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001625 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001626 }
1627
1628 return(ret);
1629}
1630
Daniel Veillard11e00581998-10-24 18:27:49 +00001631/**
1632 * xmlParseCharData:
1633 * @ctxt: an XML parser context
1634 * @cdata: int indicating whether we are within a CDATA section
1635 *
1636 * parse a CharData section.
1637 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001638 *
1639 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
Daniel Veillard11e00581998-10-24 18:27:49 +00001640 * return values:
Daniel Veillard260a68f1998-08-13 03:39:55 +00001641 */
1642
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001643void
1644xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001645 const CHAR *q;
1646
1647 q = CUR_PTR;
1648 while ((IS_CHAR(CUR)) && (CUR != '<') &&
1649 (CUR != '&')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001650 if ((CUR == ']') && (NXT(1) == ']') &&
1651 (NXT(2) == '>')) {
1652 if (cdata) break;
1653 else {
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt,
1656 "Sequence ']]>' not allowed in content\n");
1657 ctxt->wellFormed = 0;
1658 }
1659 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001660 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001661 }
1662 if (q == CUR_PTR) return;
1663
1664 /*
1665 * Ok the segment [q CUR_PTR] is to be consumed as chars.
1666 */
1667 if (ctxt->sax != NULL) {
1668 if (areBlanks(ctxt, q, CUR_PTR - q))
1669 ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1670 else
1671 ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1672 }
1673}
1674
Daniel Veillard11e00581998-10-24 18:27:49 +00001675/**
1676 * xmlParseExternalID:
1677 * @ctxt: an XML parser context
1678 * @publicID: a CHAR** receiving PubidLiteral
1679 *
1680 * Parse an External ID
Daniel Veillard260a68f1998-08-13 03:39:55 +00001681 *
1682 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1683 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00001684 * return values: the function returns SystemLiteral and in the second
1685 * case publicID receives PubidLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00001686 */
1687
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001688CHAR *
1689xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001690 CHAR *URI = NULL;
1691
1692 if ((CUR == 'S') && (NXT(1) == 'Y') &&
1693 (NXT(2) == 'S') && (NXT(3) == 'T') &&
1694 (NXT(4) == 'E') && (NXT(5) == 'M')) {
1695 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001696 if (!IS_BLANK(CUR)) {
1697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698 ctxt->sax->error(ctxt,
1699 "Space required after 'SYSTEM'\n");
1700 ctxt->wellFormed = 0;
1701 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001702 SKIP_BLANKS;
1703 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001704 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001707 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001708 ctxt->wellFormed = 0;
1709 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001710 } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1711 (NXT(2) == 'B') && (NXT(3) == 'L') &&
1712 (NXT(4) == 'I') && (NXT(5) == 'C')) {
1713 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001714 if (!IS_BLANK(CUR)) {
1715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1716 ctxt->sax->error(ctxt,
1717 "Space required after 'PUBLIC'\n");
1718 ctxt->wellFormed = 0;
1719 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001720 SKIP_BLANKS;
1721 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001722 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1724 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001725 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001726 ctxt->wellFormed = 0;
1727 }
1728 if (!IS_BLANK(CUR)) {
1729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730 ctxt->sax->error(ctxt,
1731 "Space required after the Public Identifier\n");
1732 ctxt->wellFormed = 0;
1733 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001734 SKIP_BLANKS;
1735 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001736 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1738 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001739 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001740 ctxt->wellFormed = 0;
1741 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001742 }
1743 return(URI);
1744}
1745
Daniel Veillard11e00581998-10-24 18:27:49 +00001746/**
1747 * xmlParseComment:
1748 * @create: should we create a node
1749 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001750 * Skip an XML (SGML) comment <!-- .... -->
1751 * This may or may not create a node (depending on the context)
1752 * The spec says that "For compatibility, the string "--" (double-hyphen)
1753 * must not occur within comments. "
1754 *
1755 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
Daniel Veillard11e00581998-10-24 18:27:49 +00001756 *
1757 * TODO: this should call a SAX function which will handle (or not) the
1758 * creation of the comment !
1759 * return values:
Daniel Veillard260a68f1998-08-13 03:39:55 +00001760 */
1761xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1762 xmlNodePtr ret = NULL;
1763 const CHAR *q, *start;
1764 const CHAR *r;
1765 CHAR *val;
1766
1767 /*
1768 * Check that there is a comment right here.
1769 */
1770 if ((CUR != '<') || (NXT(1) != '!') ||
1771 (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1772
1773 SKIP(4);
1774 start = q = CUR_PTR;
1775 NEXT;
1776 r = CUR_PTR;
1777 NEXT;
1778 while (IS_CHAR(CUR) &&
1779 ((CUR == ':') || (CUR != '>') ||
1780 (*r != '-') || (*q != '-'))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001781 if ((*r == '-') && (*q == '-')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1783 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001784 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001785 ctxt->wellFormed = 0;
1786 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001787 NEXT;r++;q++;
1788 }
1789 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001792 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001793 } else {
1794 NEXT;
1795 if (create) {
1796 val = xmlStrndup(start, q - start);
Daniel Veillard0bef1311998-10-14 02:36:47 +00001797 ret = xmlNewDocComment(ctxt->doc, val);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001798 free(val);
1799 }
1800 }
1801 return(ret);
1802}
1803
Daniel Veillard11e00581998-10-24 18:27:49 +00001804/**
1805 * xmlParsePITarget:
1806 * @ctxt: an XML parser context
1807 *
1808 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00001809 *
1810 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard11e00581998-10-24 18:27:49 +00001811 * return values: the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001812 */
1813
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001814CHAR *
1815xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001816 CHAR *name;
1817
1818 name = xmlParseName(ctxt);
1819 if ((name != NULL) && (name[3] == 0) &&
1820 ((name[0] == 'x') || (name[0] == 'X')) &&
1821 ((name[1] == 'm') || (name[1] == 'M')) &&
1822 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1824 ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001825 return(NULL);
1826 }
1827 return(name);
1828}
1829
Daniel Veillard11e00581998-10-24 18:27:49 +00001830/**
1831 * xmlParsePI:
1832 * @ctxt: an XML parser context
1833 *
1834 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001835 *
1836 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard11e00581998-10-24 18:27:49 +00001837 * return values: the PI name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001838 */
1839
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001840void
1841xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001842 CHAR *target;
1843
1844 if ((CUR == '<') && (NXT(1) == '?')) {
1845 /*
1846 * this is a Processing Instruction.
1847 */
1848 SKIP(2);
1849
1850 /*
1851 * Parse the target name and check for special support like
1852 * namespace.
1853 *
1854 * TODO : PI handling should be dynamically redefinable using an
1855 * API. Only namespace should be in the code IMHO ...
1856 */
1857 target = xmlParsePITarget(ctxt);
1858 if (target != NULL) {
1859 /*
1860 * Support for the old Processing Instruction related to namespace.
1861 */
1862 if ((target[0] == 'n') && (target[1] == 'a') &&
1863 (target[2] == 'm') && (target[3] == 'e') &&
1864 (target[4] == 's') && (target[5] == 'p') &&
1865 (target[6] == 'a') && (target[7] == 'c') &&
1866 (target[8] == 'e')) {
1867 xmlParseNamespace(ctxt);
1868 } else if ((target[0] == 'x') && (target[1] == 'm') &&
1869 (target[2] == 'l') && (target[3] == ':') &&
1870 (target[4] == 'n') && (target[5] == 'a') &&
1871 (target[6] == 'm') && (target[7] == 'e') &&
1872 (target[8] == 's') && (target[9] == 'p') &&
1873 (target[10] == 'a') && (target[11] == 'c') &&
1874 (target[12] == 'e')) {
1875 xmlParseNamespace(ctxt);
1876 } else {
1877 const CHAR *q = CUR_PTR;
1878
1879 while (IS_CHAR(CUR) &&
1880 ((CUR != '?') || (NXT(1) != '>')))
1881 NEXT;
1882 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001884 ctxt->sax->error(ctxt,
1885 "xmlParsePI: PI %s never end ...\n", target);
1886 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001887 } else {
1888 CHAR *data;
1889
1890 data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1891 SKIP(2);
1892
1893 /*
1894 * SAX: PI detected.
1895 */
1896 if (ctxt->sax)
1897 ctxt->sax->processingInstruction(ctxt, target, data);
1898 /*
1899 * Unknown PI, ignore it !
1900 */
1901 else
1902 xmlParserWarning(ctxt,
1903 "xmlParsePI : skipping unknown PI %s\n",
1904 target);
1905 free(data);
1906 }
1907 }
1908 free(target);
1909 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1911 ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001912 ctxt->wellFormed = 0;
1913
Daniel Veillard260a68f1998-08-13 03:39:55 +00001914 /********* Should we try to complete parsing the PI ???
1915 while (IS_CHAR(CUR) &&
1916 (CUR != '?') && (CUR != '>'))
1917 NEXT;
1918 if (!IS_CHAR(CUR)) {
1919 fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1920 target);
1921 }
1922 ********************************************************/
1923 }
1924 }
1925}
1926
Daniel Veillard11e00581998-10-24 18:27:49 +00001927/**
1928 * xmlParseNotationDecl:
1929 * @ctxt: an XML parser context
1930 *
1931 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00001932 *
1933 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1934 *
1935 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1936 *
1937 * NOTE: Actually [75] and [83] interract badly since [75] can generate
1938 * 'PUBLIC' S PubidLiteral S SystemLiteral
1939 *
1940 * Hence there is actually 3 choices:
1941 * 'PUBLIC' S PubidLiteral
1942 * 'PUBLIC' S PubidLiteral S SystemLiteral
1943 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00001944 *
1945 * TODO: no handling of the values parsed !
Daniel Veillard260a68f1998-08-13 03:39:55 +00001946 */
1947
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001948void
1949xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001950 CHAR *name;
1951
1952 if ((CUR == '<') && (NXT(1) == '!') &&
1953 (NXT(2) == 'N') && (NXT(3) == 'O') &&
1954 (NXT(4) == 'T') && (NXT(5) == 'A') &&
1955 (NXT(6) == 'T') && (NXT(7) == 'I') &&
1956 (NXT(8) == 'O') && (NXT(9) == 'N') &&
1957 (IS_BLANK(NXT(10)))) {
1958 SKIP(10);
1959 SKIP_BLANKS;
1960
1961 name = xmlParseName(ctxt);
1962 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001965 "xmlParseAttributeListDecl: no name for Element\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001966 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001967 return;
1968 }
1969 SKIP_BLANKS;
1970 /*
1971 * TODO !!!
1972 */
1973 while ((IS_CHAR(CUR)) && (CUR != '>'))
1974 NEXT;
1975 free(name);
1976 }
1977}
1978
Daniel Veillard11e00581998-10-24 18:27:49 +00001979/**
1980 * xmlParseEntityDecl:
1981 * @ctxt: an XML parser context
1982 *
1983 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00001984 *
1985 * [70] EntityDecl ::= GEDecl | PEDecl
1986 *
1987 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1988 *
1989 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1990 *
1991 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1992 *
1993 * [74] PEDef ::= EntityValue | ExternalID
1994 *
1995 * [76] NDataDecl ::= S 'NDATA' S Name
1996 */
1997
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001998void
1999xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002000 CHAR *name = NULL;
2001 CHAR *value = NULL;
2002 CHAR *URI = NULL, *literal = NULL;
2003 CHAR *ndata = NULL;
2004 int isParameter = 0;
2005
2006 if ((CUR == '<') && (NXT(1) == '!') &&
2007 (NXT(2) == 'E') && (NXT(3) == 'N') &&
2008 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002009 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002010 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002011 if (!IS_BLANK(CUR)) {
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt, "Space required after '<!ENTITY'\n");
2014 ctxt->wellFormed = 0;
2015 }
2016 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002017
2018 if (CUR == '%') {
2019 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002020 if (!IS_BLANK(CUR)) {
2021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2022 ctxt->sax->error(ctxt, "Space required after '%'\n");
2023 ctxt->wellFormed = 0;
2024 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002025 SKIP_BLANKS;
2026 isParameter = 1;
2027 }
2028
2029 name = xmlParseName(ctxt);
2030 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032 ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002033 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002034 return;
2035 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002036 if (!IS_BLANK(CUR)) {
2037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038 ctxt->sax->error(ctxt,
2039 "Space required after the entity name\n");
2040 ctxt->wellFormed = 0;
2041 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002042 SKIP_BLANKS;
2043
2044 /*
2045 * TODO handle the various case of definitions...
2046 */
2047 if (isParameter) {
2048 if ((CUR == '"') || (CUR == '\''))
2049 value = xmlParseEntityValue(ctxt);
2050 if (value) {
2051 xmlAddDocEntity(ctxt->doc, name,
2052 XML_INTERNAL_PARAMETER_ENTITY,
2053 NULL, NULL, value);
2054 }
2055 else {
2056 URI = xmlParseExternalID(ctxt, &literal);
2057 if (URI) {
2058 xmlAddDocEntity(ctxt->doc, name,
2059 XML_EXTERNAL_PARAMETER_ENTITY,
2060 literal, URI, NULL);
2061 }
2062 }
2063 } else {
2064 if ((CUR == '"') || (CUR == '\'')) {
2065 value = xmlParseEntityValue(ctxt);
2066 xmlAddDocEntity(ctxt->doc, name,
2067 XML_INTERNAL_GENERAL_ENTITY,
2068 NULL, NULL, value);
2069 } else {
2070 URI = xmlParseExternalID(ctxt, &literal);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002071 if ((CUR != '>') && (!IS_BLANK(CUR))) {
2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073 ctxt->sax->error(ctxt,
2074 "Space required before 'NDATA'\n");
2075 ctxt->wellFormed = 0;
2076 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002077 SKIP_BLANKS;
2078 if ((CUR == 'N') && (NXT(1) == 'D') &&
2079 (NXT(2) == 'A') && (NXT(3) == 'T') &&
2080 (NXT(4) == 'A')) {
2081 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002082 if (!IS_BLANK(CUR)) {
2083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2084 ctxt->sax->error(ctxt,
2085 "Space required after 'NDATA'\n");
2086 ctxt->wellFormed = 0;
2087 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002088 SKIP_BLANKS;
2089 ndata = xmlParseName(ctxt);
2090 xmlAddDocEntity(ctxt->doc, name,
2091 XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2092 literal, URI, ndata);
2093 } else {
2094 xmlAddDocEntity(ctxt->doc, name,
2095 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2096 literal, URI, NULL);
2097 }
2098 }
2099 }
2100 SKIP_BLANKS;
2101 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002104 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002105 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002106 } else
2107 NEXT;
2108 if (name != NULL) free(name);
2109 if (value != NULL) free(value);
2110 if (URI != NULL) free(URI);
2111 if (literal != NULL) free(literal);
2112 if (ndata != NULL) free(ndata);
2113 }
2114}
2115
Daniel Veillard11e00581998-10-24 18:27:49 +00002116/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002117 * xmlParseDefaultDecl:
2118 * @ctxt: an XML parser context
2119 * @value: Receive a possible fixed default value for the attribute
2120 *
2121 * Parse an attribute default declaration
2122 *
2123 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2124 *
2125 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
2126 * or XML_ATTRIBUTE_FIXED.
2127 */
2128
2129int
2130xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
2131 int val;
2132 CHAR *ret;
2133
2134 *value = NULL;
2135 if ((CUR == '#') && (NXT(1) == 'R') &&
2136 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
2137 (NXT(4) == 'U') && (NXT(5) == 'I') &&
2138 (NXT(6) == 'R') && (NXT(7) == 'E') &&
2139 (NXT(8) == 'D')) {
2140 SKIP(9);
2141 return(XML_ATTRIBUTE_REQUIRED);
2142 }
2143 if ((CUR == '#') && (NXT(1) == 'I') &&
2144 (NXT(2) == 'M') && (NXT(3) == 'P') &&
2145 (NXT(4) == 'L') && (NXT(5) == 'I') &&
2146 (NXT(6) == 'E') && (NXT(7) == 'D')) {
2147 SKIP(8);
2148 return(XML_ATTRIBUTE_IMPLIED);
2149 }
2150 val = XML_ATTRIBUTE_NONE;
2151 if ((CUR == '#') && (NXT(1) == 'F') &&
2152 (NXT(2) == 'I') && (NXT(3) == 'X') &&
2153 (NXT(4) == 'E') && (NXT(5) == 'D')) {
2154 SKIP(6);
2155 val = XML_ATTRIBUTE_FIXED;
2156 if (!IS_BLANK(CUR)) {
2157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2158 ctxt->sax->error(ctxt, "Space required after '#FIXED'\n");
2159 ctxt->wellFormed = 0;
2160 }
2161 SKIP_BLANKS;
2162 }
2163 ret = xmlParseAttValue(ctxt);
2164 if (ret == NULL) {
2165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2166 ctxt->sax->error(ctxt,
2167 "Attribute default value declaration error\n");
2168 ctxt->wellFormed = 0;
2169 } else
2170 *value = ret;
2171 return(val);
2172}
2173
2174/**
Daniel Veillard11e00581998-10-24 18:27:49 +00002175 * xmlParseEnumeratedType:
2176 * @ctxt: an XML parser context
2177 * @name: ???
2178 * @:
2179 *
2180 * parse and Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002181 *
2182 * [57] EnumeratedType ::= NotationType | Enumeration
2183 *
2184 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2185 *
2186 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
Daniel Veillard11e00581998-10-24 18:27:49 +00002187 *
2188 * TODO: not implemented !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00002189 */
2190
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002191void
2192xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002193 /*
2194 * TODO !!!
2195 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002196 fprintf(stderr, "Production [57] EnumeratedType not yet supported\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002197 while ((IS_CHAR(CUR)) && (CUR != '>'))
2198 NEXT;
2199}
2200
Daniel Veillard11e00581998-10-24 18:27:49 +00002201/**
2202 * xmlParseAttributeType:
2203 * @ctxt: an XML parser context
2204 * @name: ???
2205 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002206 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00002207 *
2208 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2209 *
2210 * [55] StringType ::= 'CDATA'
2211 *
2212 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2213 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00002214 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002215 * Returns: the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00002216 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002217int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002218xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002219 /* TODO !!! */
2220 if ((CUR == 'C') && (NXT(1) == 'D') &&
2221 (NXT(2) == 'A') && (NXT(3) == 'T') &&
2222 (NXT(4) == 'A')) {
2223 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002224 return(XML_ATTRIBUTE_STRING);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002225 } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2226 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002227 return(XML_ATTRIBUTE_ID);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002228 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2229 (NXT(2) == 'R') && (NXT(3) == 'E') &&
2230 (NXT(4) == 'F')) {
2231 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002232 return(XML_ATTRIBUTE_IDREF);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002233 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2234 (NXT(2) == 'R') && (NXT(3) == 'E') &&
2235 (NXT(4) == 'F') && (NXT(5) == 'S')) {
2236 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002237 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002238 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2239 (NXT(2) == 'T') && (NXT(3) == 'I') &&
2240 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2241 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002242 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002243 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2244 (NXT(2) == 'T') && (NXT(3) == 'I') &&
2245 (NXT(4) == 'T') && (NXT(5) == 'I') &&
2246 (NXT(6) == 'E') && (NXT(7) == 'S')) {
2247 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002248 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002249 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2250 (NXT(2) == 'T') && (NXT(3) == 'O') &&
2251 (NXT(4) == 'K') && (NXT(5) == 'E') &&
2252 (NXT(6) == 'N')) {
2253 SKIP(7);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002254 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002255 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2256 (NXT(2) == 'T') && (NXT(3) == 'O') &&
2257 (NXT(4) == 'K') && (NXT(5) == 'E') &&
2258 (NXT(6) == 'N') && (NXT(7) == 'S')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002259 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002260 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002261 xmlParseEnumeratedType(ctxt, name);
2262 return(XML_ATTRIBUTE_ENUMERATED);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002263}
2264
Daniel Veillard11e00581998-10-24 18:27:49 +00002265/**
2266 * xmlParseAttributeListDecl:
2267 * @ctxt: an XML parser context
2268 *
2269 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00002270 *
2271 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2272 *
2273 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00002274 *
2275 * TODO: not implemented !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00002276 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002277void
2278xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002279 CHAR *elemName;
2280 CHAR *attrName;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002281
2282 /* TODO !!! */
2283 if ((CUR == '<') && (NXT(1) == '!') &&
2284 (NXT(2) == 'A') && (NXT(3) == 'T') &&
2285 (NXT(4) == 'T') && (NXT(5) == 'L') &&
2286 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002287 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002288 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002289 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002291 ctxt->sax->error(ctxt, "Space required after '<!ATTLIST'\n");
2292 ctxt->wellFormed = 0;
2293 }
2294 SKIP_BLANKS;
2295 elemName = xmlParseName(ctxt);
2296 if (elemName == NULL) {
2297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2298 ctxt->sax->error(ctxt, "ATTLIST: no name for Element\n");
2299 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002300 return;
2301 }
2302 SKIP_BLANKS;
2303 while (CUR != '>') {
2304 const CHAR *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002305 int type;
2306 int def;
2307 CHAR *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002308
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002309 attrName = xmlParseName(ctxt);
2310 if (attrName == NULL) {
2311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2312 ctxt->sax->error(ctxt, "ATTLIST: no name for Attribute\n");
2313 ctxt->wellFormed = 0;
2314 break;
2315 }
2316 if (!IS_BLANK(CUR)) {
2317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2318 ctxt->sax->error(ctxt,
2319 "Space required after the attribute name\n");
2320 ctxt->wellFormed = 0;
2321 break;
2322 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002323 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002324
2325 type = xmlParseAttributeType(ctxt, attrName);
2326 if (type <= 0) break;
2327
2328 if (!IS_BLANK(CUR)) {
2329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330 ctxt->sax->error(ctxt,
2331 "Space required after the attribute type\n");
2332 ctxt->wellFormed = 0;
2333 break;
2334 }
2335 SKIP_BLANKS;
2336
2337 def = xmlParseDefaultDecl(ctxt, &defaultValue);
2338 if (def <= 0) break;
2339
2340 if (CUR != '>') {
2341 if (!IS_BLANK(CUR)) {
2342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2343 ctxt->sax->error(ctxt,
2344 "Space required after the attribute default value\n");
2345 ctxt->wellFormed = 0;
2346 break;
2347 }
2348 SKIP_BLANKS;
2349 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002350 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2352 ctxt->sax->error(ctxt,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002353 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002354 break;
2355 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002356 if (attrName != NULL)
2357 free(attrName);
2358 if (defaultValue != NULL)
2359 free(defaultValue);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002360 }
2361 if (CUR == '>')
2362 NEXT;
2363
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002364 free(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002365 }
2366}
2367
Daniel Veillard11e00581998-10-24 18:27:49 +00002368/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002369 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00002370 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002371 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002372 * parse the declaration for a Mixed Element content
2373 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00002374 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002375 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2376 * '(' S? '#PCDATA' S? ')'
2377 *
2378 * returns: the list of the xmlElementContentPtr describing the element choices
2379 */
2380xmlElementContentPtr
2381xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
2382 xmlElementContentPtr ret = NULL, cur = NULL;
2383 CHAR *elem = NULL;
2384
2385 if ((CUR == '#') && (NXT(1) == 'P') &&
2386 (NXT(2) == 'C') && (NXT(3) == 'D') &&
2387 (NXT(4) == 'A') && (NXT(5) == 'T') &&
2388 (NXT(6) == 'A')) {
2389 SKIP(7);
2390 SKIP_BLANKS;
Daniel Veillard3b9def11999-01-31 22:15:06 +00002391 if (CUR == ')') {
2392 NEXT;
2393 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2394 return(ret);
2395 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002396 if ((CUR == '(') || (CUR == '|')) {
2397 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2398 if (ret == NULL) return(NULL);
Daniel Veillard3b9def11999-01-31 22:15:06 +00002399 } /********** else {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2401 ctxt->sax->error(ctxt,
2402 "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2403 ctxt->wellFormed = 0;
2404 return(NULL);
Daniel Veillard3b9def11999-01-31 22:15:06 +00002405 } **********/
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002406 while (CUR == '|') {
2407 if (elem == NULL) {
2408 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2409 if (ret == NULL) return(NULL);
2410 ret->c1 = cur;
2411 } else {
2412 cur->c1 = xmlNewElementContent(elem,
2413 XML_ELEMENT_CONTENT_ELEMENT);
2414 cur->c2 = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2415 cur = cur->c2;
2416 if (cur == NULL) return(NULL);
2417 }
2418 SKIP_BLANKS;
2419 elem = xmlParseName(ctxt);
2420 if (elem == NULL) {
2421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2422 ctxt->sax->error(ctxt,
2423 "xmlParseElementMixedContentDecl : Name expected\n");
2424 ctxt->wellFormed = 0;
2425 xmlFreeElementContent(cur);
2426 return(NULL);
2427 }
2428 SKIP_BLANKS;
2429 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00002430 if ((CUR == ')') && (NXT(1) == '*')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002431 if (elem != NULL)
2432 cur->c2 = xmlNewElementContent(elem,
2433 XML_ELEMENT_CONTENT_ELEMENT);
2434 NEXT;
2435 } else {
2436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2437 ctxt->sax->error(ctxt,
Daniel Veillard3b9def11999-01-31 22:15:06 +00002438 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002439 ctxt->wellFormed = 0;
2440 xmlFreeElementContent(ret);
2441 return(NULL);
2442 }
2443
2444 } else {
2445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2446 ctxt->sax->error(ctxt,
2447 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
2448 ctxt->wellFormed = 0;
2449 }
2450 return(ret);
2451}
2452
2453/**
2454 * xmlParseElementChildrenContentDecl:
2455 * @ctxt: an XML parser context
2456 *
2457 * parse the declaration for a Mixed Element content
2458 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
2459 *
2460 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002461 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2462 *
2463 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2464 *
2465 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2466 *
2467 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2468 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002469 * returns: the tree of xmlElementContentPtr describing the element
2470 * hierarchy.
2471 */
2472xmlElementContentPtr
2473xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
2474 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
2475 CHAR *elem;
2476 CHAR type = 0;
2477
2478 SKIP_BLANKS;
2479 if (CUR == '(') {
2480 /* Recurse on first child */
2481 NEXT;
2482 SKIP_BLANKS;
2483 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
2484 SKIP_BLANKS;
2485 } else {
2486 elem = xmlParseName(ctxt);
2487 if (elem == NULL) {
2488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2489 ctxt->sax->error(ctxt,
2490 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2491 ctxt->wellFormed = 0;
2492 return(NULL);
2493 }
2494 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2495 if (CUR == '?') {
2496 ret->ocur = XML_ELEMENT_CONTENT_OPT;
2497 NEXT;
2498 } else if (CUR == '*') {
2499 ret->ocur = XML_ELEMENT_CONTENT_MULT;
2500 NEXT;
2501 } else if (CUR == '+') {
2502 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2503 NEXT;
2504 } else {
2505 ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2506 }
2507 }
2508 SKIP_BLANKS;
2509 while (CUR != ')') {
2510 /*
2511 * Each loop we parse one separator and one element.
2512 */
2513 if (CUR == ',') {
2514 if (type == 0) type = CUR;
2515
2516 /*
2517 * Detect "Name | Name , Name" error
2518 */
2519 else if (type != CUR) {
2520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2521 ctxt->sax->error(ctxt,
2522 "xmlParseElementChildrenContentDecl : '%c' expected\n",
2523 type);
2524 ctxt->wellFormed = 0;
2525 xmlFreeElementContent(ret);
2526 return(NULL);
2527 }
2528
2529 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
2530 if (op == NULL) {
2531 xmlFreeElementContent(ret);
2532 return(NULL);
2533 }
2534 if (last == NULL) {
2535 op->c1 = ret;
2536 ret = cur = op;
2537 } else {
2538 cur->c2 = op;
2539 op->c1 = last;
2540 cur =op;
2541 }
2542 } else if (CUR == '|') {
2543 if (type == 0) type = CUR;
2544
2545 /*
2546 * Detect "Name , Name | Name" error
2547 */
2548 else if (type != CUR) {
2549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2550 ctxt->sax->error(ctxt,
2551 "xmlParseElementChildrenContentDecl : '%c' expected\n",
2552 type);
2553 ctxt->wellFormed = 0;
2554 xmlFreeElementContent(ret);
2555 return(NULL);
2556 }
2557
2558 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2559 if (op == NULL) {
2560 xmlFreeElementContent(ret);
2561 return(NULL);
2562 }
2563 if (last == NULL) {
2564 op->c1 = ret;
2565 ret = cur = op;
2566 } else {
2567 cur->c2 = op;
2568 op->c1 = last;
2569 cur =op;
2570 }
2571 } else {
2572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2573 ctxt->sax->error(ctxt,
2574 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
2575 ctxt->wellFormed = 0;
2576 xmlFreeElementContent(ret);
2577 return(NULL);
2578 }
2579 SKIP_BLANKS;
2580 if (CUR == '(') {
2581 /* Recurse on second child */
2582 NEXT;
2583 SKIP_BLANKS;
2584 cur = xmlParseElementChildrenContentDecl(ctxt);
2585 SKIP_BLANKS;
2586 } else {
2587 elem = xmlParseName(ctxt);
2588 if (elem == NULL) {
2589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2590 ctxt->sax->error(ctxt,
2591 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2592 ctxt->wellFormed = 0;
2593 return(NULL);
2594 }
2595 cur = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2596 }
2597 if (CUR == '?') {
2598 ret->ocur = XML_ELEMENT_CONTENT_OPT;
2599 NEXT;
2600 } else if (CUR == '*') {
2601 ret->ocur = XML_ELEMENT_CONTENT_MULT;
2602 NEXT;
2603 } else if (CUR == '+') {
2604 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2605 NEXT;
2606 } else {
2607 ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2608 }
2609 SKIP_BLANKS;
2610 }
2611 NEXT;
2612 if (CUR == '?') {
2613 ret->ocur = XML_ELEMENT_CONTENT_OPT;
2614 NEXT;
2615 } else if (CUR == '*') {
2616 ret->ocur = XML_ELEMENT_CONTENT_MULT;
2617 NEXT;
2618 } else if (CUR == '+') {
2619 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2620 NEXT;
2621 } else {
2622 ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2623 }
2624 return(ret);
2625}
2626
2627/**
2628 * xmlParseElementContentDecl:
2629 * @ctxt: an XML parser context
2630 * @name: the name of the element being defined.
2631 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00002632 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002633 * parse the declaration for an Element content either Mixed or Children,
2634 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
2635 *
2636 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00002637 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002638 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00002639 */
2640
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002641int
2642xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
2643 xmlElementContentPtr *result) {
2644
2645 xmlElementContentPtr tree = NULL;
2646 int res;
2647
2648 *result = NULL;
2649
2650 if (CUR != '(') {
2651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2652 ctxt->sax->error(ctxt,
2653 "xmlParseElementContentDecl : '(' expected\n");
2654 ctxt->wellFormed = 0;
2655 return(-1);
2656 }
2657 NEXT;
2658 SKIP_BLANKS;
2659 if ((CUR == '#') && (NXT(1) == 'P') &&
2660 (NXT(2) == 'C') && (NXT(3) == 'D') &&
2661 (NXT(4) == 'A') && (NXT(5) == 'T') &&
2662 (NXT(6) == 'A')) {
2663 tree = xmlParseElementMixedContentDecl(ctxt);
2664 res = XML_ELEMENT_TYPE_MIXED;
2665 } else {
2666 tree = xmlParseElementChildrenContentDecl(ctxt);
2667 res = XML_ELEMENT_TYPE_ELEMENT;
2668 }
2669 SKIP_BLANKS;
2670 /****************************
2671 if (CUR != ')') {
2672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2673 ctxt->sax->error(ctxt,
2674 "xmlParseElementContentDecl : ')' expected\n");
2675 ctxt->wellFormed = 0;
2676 return(-1);
2677 }
2678 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00002679 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002680 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002681}
2682
Daniel Veillard11e00581998-10-24 18:27:49 +00002683/**
2684 * xmlParseElementDecl:
2685 * @ctxt: an XML parser context
2686 *
2687 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002688 *
2689 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2690 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002691 * TODO There is a check [ VC: Unique Element Type Declaration ]
2692 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002693int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002694xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002695 CHAR *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002696 int ret = -1;
2697 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002698
2699 if ((CUR == '<') && (NXT(1) == '!') &&
2700 (NXT(2) == 'E') && (NXT(3) == 'L') &&
2701 (NXT(4) == 'E') && (NXT(5) == 'M') &&
2702 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002703 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002704 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002705 if (!IS_BLANK(CUR)) {
2706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2707 ctxt->sax->error(ctxt,
2708 "Space required after 'ELEMENT'\n");
2709 ctxt->wellFormed = 0;
2710 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002711 SKIP_BLANKS;
2712 name = xmlParseName(ctxt);
2713 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002715 ctxt->sax->error(ctxt,
2716 "xmlParseElementDecl: no name for Element\n");
2717 ctxt->wellFormed = 0;
2718 return(-1);
2719 }
2720 if (!IS_BLANK(CUR)) {
2721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2722 ctxt->sax->error(ctxt,
2723 "Space required after the element name\n");
2724 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002725 }
2726 SKIP_BLANKS;
2727 if ((CUR == 'E') && (NXT(1) == 'M') &&
2728 (NXT(2) == 'P') && (NXT(3) == 'T') &&
2729 (NXT(4) == 'Y')) {
2730 SKIP(5);
2731 /*
2732 * Element must always be empty.
2733 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002734 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002735 } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2736 (NXT(2) == 'Y')) {
2737 SKIP(3);
2738 /*
2739 * Element is a generic container.
2740 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002741 ret = XML_ELEMENT_TYPE_ANY;
2742 } else if (CUR == '(') {
2743 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002744 } else {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2746 ctxt->sax->error(ctxt,
2747 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
2748 ctxt->wellFormed = 0;
2749 if (name != NULL) free(name);
2750 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002751 }
2752 SKIP_BLANKS;
2753 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2755 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002756 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002757 ctxt->wellFormed = 0;
2758 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002759 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002760 xmlAddElementDecl(ctxt->doc->intSubset, name, ret, content);
2761 }
2762 if (name != NULL) {
2763 free(name);
2764 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002765 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002766 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002767}
2768
Daniel Veillard11e00581998-10-24 18:27:49 +00002769/**
2770 * xmlParseMarkupDecl:
2771 * @ctxt: an XML parser context
2772 *
2773 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002774 *
2775 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2776 * NotationDecl | PI | Comment
2777 *
2778 * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2779 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002780void
2781xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002782 xmlParseElementDecl(ctxt);
2783 xmlParseAttributeListDecl(ctxt);
2784 xmlParseEntityDecl(ctxt);
2785 xmlParseNotationDecl(ctxt);
2786 xmlParsePI(ctxt);
2787 xmlParseComment(ctxt, 0);
2788}
2789
Daniel Veillard11e00581998-10-24 18:27:49 +00002790/**
2791 * xmlParseCharRef:
2792 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002793 *
2794 * parse Reference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002795 *
2796 * [66] CharRef ::= '&#' [0-9]+ ';' |
2797 * '&#x' [0-9a-fA-F]+ ';'
Daniel Veillard11e00581998-10-24 18:27:49 +00002798 * return values: the value parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00002799 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002800CHAR *
2801xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002802 int val = 0;
2803 CHAR buf[2];
2804
2805 if ((CUR == '&') && (NXT(1) == '#') &&
2806 (NXT(2) == 'x')) {
2807 SKIP(3);
2808 while (CUR != ';') {
2809 if ((CUR >= '0') && (CUR <= '9'))
2810 val = val * 16 + (CUR - '0');
2811 else if ((CUR >= 'a') && (CUR <= 'f'))
2812 val = val * 16 + (CUR - 'a') + 10;
2813 else if ((CUR >= 'A') && (CUR <= 'F'))
2814 val = val * 16 + (CUR - 'A') + 10;
2815 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00002817 ctxt->sax->error(ctxt,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002818 "xmlParseCharRef: invalid hexadecimal value\n");
2819 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002820 val = 0;
2821 break;
2822 }
Daniel Veillard845664d1998-08-13 04:43:19 +00002823 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002824 }
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002825 if (CUR == ';')
Daniel Veillard260a68f1998-08-13 03:39:55 +00002826 NEXT;
2827 } else if ((CUR == '&') && (NXT(1) == '#')) {
2828 SKIP(2);
2829 while (CUR != ';') {
2830 if ((CUR >= '0') && (CUR <= '9'))
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002831 val = val * 10 + (CUR - '0');
Daniel Veillard260a68f1998-08-13 03:39:55 +00002832 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00002834 ctxt->sax->error(ctxt,
2835 "xmlParseCharRef: invalid decimal value\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002836 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002837 val = 0;
2838 break;
2839 }
Daniel Veillard845664d1998-08-13 04:43:19 +00002840 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002841 }
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002842 if (CUR == ';')
Daniel Veillard260a68f1998-08-13 03:39:55 +00002843 NEXT;
2844 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2846 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002847 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002848 }
2849 /*
2850 * Check the value IS_CHAR ...
2851 */
2852 if (IS_CHAR(val)) {
2853 buf[0] = (CHAR) val;
2854 buf[1] = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00002855 return(xmlStrndup(buf, 1));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002856 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00002858 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n",
2859 val);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002860 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002861 }
2862 return(NULL);
2863}
2864
Daniel Veillard11e00581998-10-24 18:27:49 +00002865/**
2866 * xmlParseEntityRef:
2867 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002868 *
2869 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002870 *
2871 * [68] EntityRef ::= '&' Name ';'
Daniel Veillardccb09631998-10-27 06:21:04 +00002872 * return values: the entity ref string or NULL if directly as input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002873 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002874CHAR *
2875xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002876 CHAR *ret = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00002877 const CHAR *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002878 CHAR *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002879 xmlEntityPtr ent;
Daniel Veillardccb09631998-10-27 06:21:04 +00002880 xmlParserInputPtr input = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002881
Daniel Veillardccb09631998-10-27 06:21:04 +00002882 q = CUR_PTR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002883 if (CUR == '&') {
2884 NEXT;
2885 name = xmlParseName(ctxt);
2886 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002889 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002890 } else {
2891 if (CUR == ';') {
2892 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002893 /*
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002894 * Well Formedness Constraint if:
2895 * - standalone
2896 * or
2897 * - no external subset and no external parameter entities
2898 * referenced
2899 * then
2900 * the entity referenced must have been declared
2901 *
2902 * TODO: to be double checked !!!
2903 */
2904 ent = xmlGetDocEntity(ctxt->doc, name);
2905 if ((ctxt->doc->standalone) ||
2906 ((ctxt->doc->intSubset == NULL) &&
2907 (ctxt->doc->extSubset == NULL))) {
2908 if (ent == NULL) {
2909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2910 ctxt->sax->error(ctxt,
2911 "Entity '%s' not defined\n", name);
2912 ctxt->wellFormed = 0;
2913 }
2914 }
2915
2916 /*
2917 * Well Formedness Constraint :
2918 * The referenced entity must be a parsed entity.
2919 */
2920 if (ent != NULL) {
2921 switch (ent->type) {
2922 case XML_INTERNAL_PARAMETER_ENTITY:
2923 case XML_EXTERNAL_PARAMETER_ENTITY:
2924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2925 ctxt->sax->error(ctxt,
2926 "Attempt to reference the parameter entity '%s'\n", name);
2927 ctxt->wellFormed = 0;
2928 break;
2929
2930 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2932 ctxt->sax->error(ctxt,
2933 "Attempt to reference unparsed entity '%s'\n", name);
2934 ctxt->wellFormed = 0;
2935 break;
2936 }
2937 }
2938
2939 /*
2940 * Well Formedness Constraint :
2941 * The referenced entity must not lead to recursion !
2942 */
2943
2944 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00002945 * We parsed the entity reference correctly, call SAX
2946 * interface for the proper behaviour:
2947 * - get a new input stream
2948 * - or keep the reference inline
Daniel Veillard260a68f1998-08-13 03:39:55 +00002949 */
Daniel Veillardccb09631998-10-27 06:21:04 +00002950 if (ctxt->sax)
2951 input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2952 if (input != NULL)
2953 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002954 else {
Daniel Veillardccb09631998-10-27 06:21:04 +00002955 ret = xmlStrndup(q, CUR_PTR - q);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002956 }
2957 } else {
2958 char cst[2] = { '&', 0 };
2959
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002961 ctxt->sax->error(ctxt,
2962 "xmlParseEntityRef: expecting ';'\n");
2963 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002964 ret = xmlStrndup(cst, 1);
2965 ret = xmlStrcat(ret, name);
2966 }
2967 free(name);
2968 }
2969 }
2970 return(ret);
2971}
2972
Daniel Veillard11e00581998-10-24 18:27:49 +00002973/**
2974 * xmlParseReference:
2975 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002976 *
2977 * parse Reference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002978 *
2979 * [67] Reference ::= EntityRef | CharRef
Daniel Veillardccb09631998-10-27 06:21:04 +00002980 * return values: the entity string or NULL if handled directly by pushing
2981 * the entity value as the input.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002982 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002983CHAR *
2984xmlParseReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002985 if ((CUR == '&') && (NXT(1) == '#')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002986 CHAR *val = xmlParseCharRef(ctxt);
2987 xmlParserInputPtr in;
2988
2989 if (val != NULL) {
2990 in = xmlNewStringInputStream(ctxt, val);
2991 xmlPushInput(ctxt, in);
2992 }
2993 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002994 } else if (CUR == '&') {
Daniel Veillardccb09631998-10-27 06:21:04 +00002995 return(xmlParseEntityRef(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002996 }
2997 return(NULL);
2998}
2999
Daniel Veillard11e00581998-10-24 18:27:49 +00003000/**
3001 * xmlParsePEReference:
3002 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00003003 *
3004 * parse PEReference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00003005 *
3006 * [69] PEReference ::= '%' Name ';'
Daniel Veillardccb09631998-10-27 06:21:04 +00003007 * return values: the entity content or NULL if handled directly.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003008 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003009CHAR *
3010xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003011 CHAR *ret = NULL;
3012 CHAR *name;
3013 xmlEntityPtr entity;
Daniel Veillardccb09631998-10-27 06:21:04 +00003014 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003015
3016 if (CUR == '%') {
3017 NEXT;
3018 name = xmlParseName(ctxt);
3019 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021 ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003022 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003023 } else {
3024 if (CUR == ';') {
3025 NEXT;
3026 entity = xmlGetDtdEntity(ctxt->doc, name);
3027 if (entity == NULL) {
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003028 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3029 ctxt->sax->warning(ctxt,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003030 "xmlParsePEReference: %%%s; not found\n", name);
Daniel Veillardccb09631998-10-27 06:21:04 +00003031 } else {
3032 input = xmlNewEntityInputStream(ctxt, entity);
3033 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003034 }
3035 } else {
Daniel Veillardccb09631998-10-27 06:21:04 +00003036 char cst[2] = { '%', 0 };
Daniel Veillard260a68f1998-08-13 03:39:55 +00003037
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003039 ctxt->sax->error(ctxt,
3040 "xmlParsePEReference: expecting ';'\n");
3041 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003042 ret = xmlStrndup(cst, 1);
3043 ret = xmlStrcat(ret, name);
3044 }
3045 free(name);
3046 }
3047 }
3048 return(ret);
3049}
3050
Daniel Veillard11e00581998-10-24 18:27:49 +00003051/**
3052 * xmlParseDocTypeDecl :
3053 * @ctxt: an XML parser context
3054 *
3055 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003056 *
3057 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3058 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
3059 */
3060
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003061void
3062xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003063 xmlDtdPtr dtd;
3064 CHAR *name;
3065 CHAR *ExternalID = NULL;
3066 CHAR *URI = NULL;
3067
3068 /*
3069 * We know that '<!DOCTYPE' has been detected.
3070 */
3071 SKIP(9);
3072
3073 SKIP_BLANKS;
3074
3075 /*
3076 * Parse the DOCTYPE name.
3077 */
3078 name = xmlParseName(ctxt);
3079 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3081 ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003082 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003083 }
3084
3085 SKIP_BLANKS;
3086
3087 /*
3088 * Check for SystemID and ExternalID
3089 */
3090 URI = xmlParseExternalID(ctxt, &ExternalID);
3091 SKIP_BLANKS;
3092
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003093 dtd = xmlCreateIntSubset(ctxt->doc, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003094
3095 /*
3096 * Is there any DTD definition ?
3097 */
3098 if (CUR == '[') {
3099 NEXT;
3100 /*
3101 * Parse the succession of Markup declarations and
3102 * PEReferences.
3103 * Subsequence (markupdecl | PEReference | S)*
3104 */
3105 while (CUR != ']') {
3106 const CHAR *check = CUR_PTR;
3107
3108 SKIP_BLANKS;
3109 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00003110 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003111
3112 if (CUR_PTR == check) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3114 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003115 "xmlParseDocTypeDecl: error detected in Markup declaration\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003116 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003117 break;
3118 }
3119 }
3120 if (CUR == ']') NEXT;
3121 }
3122
3123 /*
3124 * We should be at the end of the DOCTYPE declaration.
3125 */
3126 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3128 ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003129 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003130 /* We shouldn't try to resynchronize ... */
3131 }
3132 NEXT;
3133
3134 /*
3135 * Cleanup, since we don't use all those identifiers
3136 * TODO : the DOCTYPE if available should be stored !
3137 */
3138 if (URI != NULL) free(URI);
3139 if (ExternalID != NULL) free(ExternalID);
3140 if (name != NULL) free(name);
3141}
3142
Daniel Veillard11e00581998-10-24 18:27:49 +00003143/**
3144 * xmlParseAttribute:
3145 * @ctxt: an XML parser context
3146 * @node: the node carrying the attribute
3147 *
3148 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00003149 *
3150 * [41] Attribute ::= Name Eq AttValue
3151 *
3152 * [25] Eq ::= S? '=' S?
3153 *
3154 * With namespace:
3155 *
3156 * [NS 11] Attribute ::= QName Eq AttValue
3157 *
3158 * Also the case QName == xmlns:??? is handled independently as a namespace
3159 * definition.
3160 */
3161
Daniel Veillardccb09631998-10-27 06:21:04 +00003162xmlAttrPtr xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003163 CHAR *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003164 CHAR *ns;
Daniel Veillardccb09631998-10-27 06:21:04 +00003165 CHAR *value = NULL;
3166 xmlAttrPtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003167
3168 name = xmlNamespaceParseQName(ctxt, &ns);
3169 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171 ctxt->sax->error(ctxt, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003172 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00003173 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003174 }
3175
3176 /*
3177 * read the value
3178 */
3179 SKIP_BLANKS;
3180 if (CUR == '=') {
3181 NEXT;
3182 SKIP_BLANKS;
3183 value = xmlParseAttValue(ctxt);
3184 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003186 ctxt->sax->error(ctxt,
3187 "Specification mandate value for attribute %s\n", name);
3188 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003189 }
3190
3191 /*
3192 * Check whether it's a namespace definition
3193 */
3194 if ((ns == NULL) &&
3195 (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
3196 (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
3197 /* a default namespace definition */
3198 xmlNewNs(node, value, NULL);
3199 if (name != NULL)
3200 free(name);
3201 if (value != NULL)
3202 free(value);
Daniel Veillardccb09631998-10-27 06:21:04 +00003203 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003204 }
3205 if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
3206 (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
3207 /* a standard namespace definition */
3208 xmlNewNs(node, value, name);
Daniel Veillardccb09631998-10-27 06:21:04 +00003209 free(ns);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003210 if (name != NULL)
3211 free(name);
3212 if (value != NULL)
3213 free(value);
Daniel Veillardccb09631998-10-27 06:21:04 +00003214 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003215 }
3216
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003217 /*
3218 * Well formedness requires at most one declaration of an attribute
3219 */
3220 if ((val = xmlGetProp(ctxt->node, name)) != NULL) {
3221 free(val);
3222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3223 ctxt->sax->error(ctxt, "Attribute %s redefined\n", name);
3224 ctxt->wellFormed = 0;
3225 ret = NULL;
3226 } else {
3227 ret = xmlNewProp(ctxt->node, name, NULL);
3228 if (ret != NULL)
3229 ret->val = xmlStringGetNodeList(ctxt->doc, value);
3230 }
Daniel Veillardccb09631998-10-27 06:21:04 +00003231
3232 if (ns != NULL)
3233 free(ns);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003234 if (value != NULL)
Daniel Veillardccb09631998-10-27 06:21:04 +00003235 free(value);
3236 free(name);
3237 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003238}
3239
Daniel Veillard11e00581998-10-24 18:27:49 +00003240/**
3241 * xmlParseStartTag:
3242 * @ctxt: an XML parser context
3243 *
3244 * parse a start of tag either for rule element or
3245 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003246 *
3247 * [40] STag ::= '<' Name (S Attribute)* S? '>'
3248 *
3249 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3250 *
3251 * With namespace:
3252 *
3253 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3254 *
3255 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard11e00581998-10-24 18:27:49 +00003256 *
3257 * return values: the XML new node or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003258 */
3259
3260xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
3261 CHAR *namespace, *name;
3262 xmlNsPtr ns = NULL;
3263 xmlNodePtr ret = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00003264 xmlNodePtr parent = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003265
3266 if (CUR != '<') return(NULL);
3267 NEXT;
3268
3269 name = xmlNamespaceParseQName(ctxt, &namespace);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003270 if (name == NULL) {
3271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3272 ctxt->sax->error(ctxt,
3273 "xmlParseStartTag: invalid element name\n");
3274 ctxt->wellFormed = 0;
3275 return(NULL);
3276 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003277
3278 /*
3279 * Note : the namespace resolution is deferred until the end of the
3280 * attributes parsing, since local namespace can be defined as
3281 * an attribute at this level.
3282 */
Daniel Veillard0bef1311998-10-14 02:36:47 +00003283 ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
Daniel Veillardccb09631998-10-27 06:21:04 +00003284 if (ret == NULL) {
3285 if (namespace != NULL)
3286 free(namespace);
3287 free(name);
3288 return(NULL);
3289 }
3290
3291 /*
3292 * We are parsing a new node.
3293 */
3294 nodePush(ctxt, ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003295
3296 /*
3297 * Now parse the attributes, it ends up with the ending
3298 *
3299 * (S Attribute)* S?
3300 */
3301 SKIP_BLANKS;
3302 while ((IS_CHAR(CUR)) &&
3303 (CUR != '>') &&
3304 ((CUR != '/') || (NXT(1) != '>'))) {
3305 const CHAR *q = CUR_PTR;
3306
3307 xmlParseAttribute(ctxt, ret);
3308 SKIP_BLANKS;
3309
3310 if (q == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3312 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003313 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003314 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003315 break;
3316 }
3317 }
3318
3319 /*
3320 * Search the namespace
3321 */
3322 ns = xmlSearchNs(ctxt->doc, ret, namespace);
3323 if (ns == NULL) /* ret still doesn't have a parent yet ! */
Daniel Veillardccb09631998-10-27 06:21:04 +00003324 ns = xmlSearchNs(ctxt->doc, parent, namespace);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003325 xmlSetNs(ret, ns);
3326 if (namespace != NULL)
3327 free(namespace);
3328
3329 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00003330 * SAX: Start of Element !
3331 */
3332 if (ctxt->sax != NULL)
3333 ctxt->sax->startElement(ctxt, name);
Daniel Veillardccb09631998-10-27 06:21:04 +00003334 free(name);
3335
3336 /*
3337 * Link the child element
3338 */
3339 if (ctxt->nodeNr < 2) return(ret);
3340 parent = ctxt->nodeTab[ctxt->nodeNr - 2];
3341 if (parent != NULL)
3342 xmlAddChild(parent, ctxt->node);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003343
3344 return(ret);
3345}
3346
Daniel Veillard11e00581998-10-24 18:27:49 +00003347/**
3348 * xmlParseEndTag:
3349 * @ctxt: an XML parser context
3350 * @nsPtr: the current node namespace definition
3351 * @tagPtr: CHAR** receive the tag value
3352 *
3353 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00003354 *
3355 * [42] ETag ::= '</' Name S? '>'
3356 *
3357 * With namespace
3358 *
3359 * [9] ETag ::= '</' QName S? '>'
Daniel Veillard11e00581998-10-24 18:27:49 +00003360 *
3361 * return values: tagPtr receive the tag name just read
Daniel Veillard260a68f1998-08-13 03:39:55 +00003362 */
3363
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003364void
3365xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003366 CHAR *namespace, *name;
3367 xmlNsPtr ns = NULL;
3368
3369 *nsPtr = NULL;
3370 *tagPtr = NULL;
3371
3372 if ((CUR != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3374 ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003375 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003376 return;
3377 }
3378 SKIP(2);
3379
3380 name = xmlNamespaceParseQName(ctxt, &namespace);
3381
3382 /*
3383 * Search the namespace
3384 */
3385 ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
3386 if (namespace != NULL)
3387 free(namespace);
3388
3389 *nsPtr = ns;
3390 *tagPtr = name;
3391
3392 /*
3393 * We should definitely be at the ending "S? '>'" part
3394 */
3395 SKIP_BLANKS;
3396 if ((!IS_CHAR(CUR)) || (CUR != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3398 ctxt->sax->error(ctxt, "End tag : expected '>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003399 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003400 } else
3401 NEXT;
3402
3403 return;
3404}
3405
Daniel Veillard11e00581998-10-24 18:27:49 +00003406/**
3407 * xmlParseCDSect:
3408 * @ctxt: an XML parser context
3409 *
3410 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003411 *
3412 * [18] CDSect ::= CDStart CData CDEnd
3413 *
3414 * [19] CDStart ::= '<![CDATA['
3415 *
3416 * [20] Data ::= (Char* - (Char* ']]>' Char*))
3417 *
3418 * [21] CDEnd ::= ']]>'
3419 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003420void
3421xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003422 const CHAR *r, *s, *base;
3423
3424 if ((CUR == '<') && (NXT(1) == '!') &&
3425 (NXT(2) == '[') && (NXT(3) == 'C') &&
3426 (NXT(4) == 'D') && (NXT(5) == 'A') &&
3427 (NXT(6) == 'T') && (NXT(7) == 'A') &&
3428 (NXT(8) == '[')) {
3429 SKIP(9);
3430 } else
3431 return;
3432 base = CUR_PTR;
3433 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3435 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003436 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003437 return;
3438 }
3439 r = NEXT;
3440 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3442 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003443 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003444 return;
3445 }
3446 s = NEXT;
3447 while (IS_CHAR(CUR) &&
3448 ((*r != ']') || (*s != ']') || (CUR != '>'))) {
3449 r++;s++;NEXT;
3450 }
3451 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3453 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003454 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003455 return;
3456 }
3457
3458 /*
3459 * Ok the segment [base CUR_PTR] is to be consumed as chars.
3460 */
3461 if (ctxt->sax != NULL) {
3462 if (areBlanks(ctxt, base, CUR_PTR - base))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003463 ctxt->sax->ignorableWhitespace(ctxt, base, 0, (CUR_PTR - base) - 2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003464 else
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003465 ctxt->sax->characters(ctxt, base, 0, (CUR_PTR - base) - 2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003466 }
3467}
3468
Daniel Veillard11e00581998-10-24 18:27:49 +00003469/**
3470 * xmlParseContent:
3471 * @ctxt: an XML parser context
3472 *
3473 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00003474 *
3475 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
3476 */
3477
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003478void
3479xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003480 xmlNodePtr ret = NULL;
3481
3482 while ((CUR != '<') || (NXT(1) != '/')) {
3483 const CHAR *test = CUR_PTR;
3484 ret = NULL;
3485
3486 /*
3487 * First case : a Processing Instruction.
3488 */
3489 if ((CUR == '<') && (NXT(1) == '?')) {
3490 xmlParsePI(ctxt);
3491 }
3492 /*
3493 * Second case : a CDSection
3494 */
3495 else if ((CUR == '<') && (NXT(1) == '!') &&
3496 (NXT(2) == '[') && (NXT(3) == 'C') &&
3497 (NXT(4) == 'D') && (NXT(5) == 'A') &&
3498 (NXT(6) == 'T') && (NXT(7) == 'A') &&
3499 (NXT(8) == '[')) {
3500 xmlParseCDSect(ctxt);
3501 }
3502 /*
3503 * Third case : a comment
3504 */
3505 else if ((CUR == '<') && (NXT(1) == '!') &&
3506 (NXT(2) == '-') && (NXT(3) == '-')) {
3507 ret = xmlParseComment(ctxt, 1);
3508 }
3509 /*
3510 * Fourth case : a sub-element.
3511 */
3512 else if (CUR == '<') {
3513 ret = xmlParseElement(ctxt);
3514 }
3515 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00003516 * Fifth case : a reference. If if has not been resolved,
3517 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00003518 */
3519 else if (CUR == '&') {
Daniel Veillardccb09631998-10-27 06:21:04 +00003520 CHAR *val = xmlParseReference(ctxt);
3521 if (val != NULL) {
3522 if (val[0] != '&') {
3523 /*
3524 * inline predefined entity.
3525 */
3526 if (ctxt->sax != NULL)
3527 ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
3528 } else {
3529 /*
3530 * user defined entity, create a node.
3531 */
3532 ret = xmlNewReference(ctxt->doc, val);
3533 xmlAddChild(ctxt->node, ret);
3534 }
3535 free(val);
3536 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003537 }
3538 /*
3539 * Last case, text. Note that References are handled directly.
3540 */
3541 else {
3542 xmlParseCharData(ctxt, 0);
3543 }
3544
3545 /*
3546 * Pop-up of finished entities.
3547 */
3548 while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
3549
3550 if (test == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003552 ctxt->sax->error(ctxt,
3553 "detected an error in element content\n");
3554 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003555 break;
3556 }
3557 }
3558}
3559
Daniel Veillard11e00581998-10-24 18:27:49 +00003560/**
3561 * xmlParseElement:
3562 * @ctxt: an XML parser context
3563 *
3564 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00003565 *
3566 * [39] element ::= EmptyElemTag | STag content ETag
3567 *
3568 * [41] Attribute ::= Name Eq AttValue
Daniel Veillard11e00581998-10-24 18:27:49 +00003569 * return values: the XML new node or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003570 */
3571
3572
3573xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
3574 xmlNodePtr ret;
3575 const CHAR *openTag = CUR_PTR;
3576 xmlParserNodeInfo node_info;
3577 CHAR *endTag;
3578 xmlNsPtr endNs;
3579
3580 /* Capture start position */
3581 node_info.begin_pos = CUR_PTR - ctxt->input->base;
3582 node_info.begin_line = ctxt->input->line;
3583
3584 ret = xmlParseStartTag(ctxt);
3585 if (ret == NULL) {
3586 return(NULL);
3587 }
3588
3589 /*
3590 * Check for an Empty Element.
3591 */
3592 if ((CUR == '/') && (NXT(1) == '>')) {
3593 SKIP(2);
3594 if (ctxt->sax != NULL)
3595 ctxt->sax->endElement(ctxt, ret->name);
3596
3597 /*
3598 * end of parsing of this node.
3599 */
3600 nodePop(ctxt);
3601
3602 return(ret);
3603 }
3604 if (CUR == '>') NEXT;
3605 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard242590e1998-11-13 18:04:35 +00003607 ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",
3608 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003609 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003610
3611 /*
3612 * end of parsing of this node.
3613 */
3614 nodePop(ctxt);
3615
3616 return(NULL);
3617 }
3618
3619 /*
3620 * Parse the content of the element:
3621 */
3622 xmlParseContent(ctxt);
3623 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard242590e1998-11-13 18:04:35 +00003625 ctxt->sax->error(ctxt,
3626 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003627 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003628
3629 /*
3630 * end of parsing of this node.
3631 */
3632 nodePop(ctxt);
3633
3634 return(NULL);
3635 }
3636
3637 /*
3638 * parse the end of tag: '</' should be here.
3639 */
3640 xmlParseEndTag(ctxt, &endNs, &endTag);
3641
3642 /*
3643 * Check that the Name in the ETag is the same as in the STag.
3644 */
3645 if (endNs != ret->ns) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3647 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003648 "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
3649 openTag, endTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003650 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003651 }
3652 if (endTag == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3654 ctxt->sax->error(ctxt, "The End tag has no name\n%.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003655 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003656 } else if (xmlStrcmp(ret->name, endTag)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3658 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003659 "Start and End tags don't use the same name\n%.30s\n%.30s\n",
3660 openTag, endTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003661 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003662 }
3663 /*
3664 * SAX: End of Tag
3665 */
3666 else if (ctxt->sax != NULL)
3667 ctxt->sax->endElement(ctxt, endTag);
3668
3669 if (endTag != NULL)
3670 free(endTag);
3671
3672 /* Capture end position and add node */
3673 if ( ret != NULL && ctxt->record_info ) {
3674 node_info.end_pos = CUR_PTR - ctxt->input->base;
3675 node_info.end_line = ctxt->input->line;
3676 node_info.node = ret;
3677 xmlParserAddNodeInfo(ctxt, &node_info);
3678 }
3679
3680 /*
3681 * end of parsing of this node.
3682 */
3683 nodePop(ctxt);
3684
3685 return(ret);
3686}
3687
Daniel Veillard11e00581998-10-24 18:27:49 +00003688/**
3689 * xmlParseVersionNum:
3690 * @ctxt: an XML parser context
3691 *
3692 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003693 *
3694 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard11e00581998-10-24 18:27:49 +00003695 * return values: the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003696 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003697CHAR *
3698xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003699 const CHAR *q = CUR_PTR;
3700 CHAR *ret;
3701
3702 while (IS_CHAR(CUR) &&
3703 (((CUR >= 'a') && (CUR <= 'z')) ||
3704 ((CUR >= 'A') && (CUR <= 'Z')) ||
3705 ((CUR >= '0') && (CUR <= '9')) ||
3706 (CUR == '_') || (CUR == '.') ||
3707 (CUR == ':') || (CUR == '-'))) NEXT;
3708 ret = xmlStrndup(q, CUR_PTR - q);
3709 return(ret);
3710}
3711
Daniel Veillard11e00581998-10-24 18:27:49 +00003712/**
3713 * xmlParseVersionInfo:
3714 * @ctxt: an XML parser context
3715 *
3716 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003717 *
3718 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3719 *
3720 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00003721 *
3722 * return values: the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00003723 */
3724
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003725CHAR *
3726xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003727 CHAR *version = NULL;
3728 const CHAR *q;
3729
3730 if ((CUR == 'v') && (NXT(1) == 'e') &&
3731 (NXT(2) == 'r') && (NXT(3) == 's') &&
3732 (NXT(4) == 'i') && (NXT(5) == 'o') &&
3733 (NXT(6) == 'n')) {
3734 SKIP(7);
3735 SKIP_BLANKS;
3736 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738 ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003739 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003740 return(NULL);
3741 }
3742 NEXT;
3743 SKIP_BLANKS;
3744 if (CUR == '"') {
3745 NEXT;
3746 q = CUR_PTR;
3747 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003748 if (CUR != '"') {
3749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3750 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003751 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003752 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003753 NEXT;
3754 } else if (CUR == '\''){
3755 NEXT;
3756 q = CUR_PTR;
3757 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003758 if (CUR != '\'') {
3759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3760 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003761 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003762 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003763 NEXT;
3764 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003766 ctxt->sax->error(ctxt,
3767 "xmlParseVersionInfo : expected ' or \"\n");
3768 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003769 }
3770 }
3771 return(version);
3772}
3773
Daniel Veillard11e00581998-10-24 18:27:49 +00003774/**
3775 * xmlParseEncName:
3776 * @ctxt: an XML parser context
3777 *
3778 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003779 *
3780 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00003781 *
3782 * return values: the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003783 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003784CHAR *
3785xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003786 const CHAR *q = CUR_PTR;
3787 CHAR *ret = NULL;
3788
3789 if (((CUR >= 'a') && (CUR <= 'z')) ||
3790 ((CUR >= 'A') && (CUR <= 'Z'))) {
3791 NEXT;
3792 while (IS_CHAR(CUR) &&
3793 (((CUR >= 'a') && (CUR <= 'z')) ||
3794 ((CUR >= 'A') && (CUR <= 'Z')) ||
3795 ((CUR >= '0') && (CUR <= '9')) ||
3796 (CUR == '-'))) NEXT;
3797 ret = xmlStrndup(q, CUR_PTR - q);
3798 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3800 ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003801 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003802 }
3803 return(ret);
3804}
3805
Daniel Veillard11e00581998-10-24 18:27:49 +00003806/**
3807 * xmlParseEncodingDecl:
3808 * @ctxt: an XML parser context
3809 *
3810 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003811 *
3812 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00003813 *
3814 * TODO: this should setup the conversion filters.
3815 *
3816 * return values: the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003817 */
3818
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003819CHAR *
3820xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003821 CHAR *encoding = NULL;
3822 const CHAR *q;
3823
3824 SKIP_BLANKS;
3825 if ((CUR == 'e') && (NXT(1) == 'n') &&
3826 (NXT(2) == 'c') && (NXT(3) == 'o') &&
3827 (NXT(4) == 'd') && (NXT(5) == 'i') &&
3828 (NXT(6) == 'n') && (NXT(7) == 'g')) {
3829 SKIP(8);
3830 SKIP_BLANKS;
3831 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3833 ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003834 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003835 return(NULL);
3836 }
3837 NEXT;
3838 SKIP_BLANKS;
3839 if (CUR == '"') {
3840 NEXT;
3841 q = CUR_PTR;
3842 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003843 if (CUR != '"') {
3844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3845 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003846 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003847 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003848 NEXT;
3849 } else if (CUR == '\''){
3850 NEXT;
3851 q = CUR_PTR;
3852 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003853 if (CUR != '\'') {
3854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3855 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003856 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003857 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003858 NEXT;
3859 } else if (CUR == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003861 ctxt->sax->error(ctxt,
3862 "xmlParseEncodingDecl : expected ' or \"\n");
3863 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003864 }
3865 }
3866 return(encoding);
3867}
3868
Daniel Veillard11e00581998-10-24 18:27:49 +00003869/**
3870 * xmlParseSDDecl:
3871 * @ctxt: an XML parser context
3872 *
3873 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003874 *
3875 * [32] SDDecl ::= S 'standalone' Eq
3876 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard11e00581998-10-24 18:27:49 +00003877 * return values: 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00003878 */
3879
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003880int
3881xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003882 int standalone = -1;
3883
3884 SKIP_BLANKS;
3885 if ((CUR == 's') && (NXT(1) == 't') &&
3886 (NXT(2) == 'a') && (NXT(3) == 'n') &&
3887 (NXT(4) == 'd') && (NXT(5) == 'a') &&
3888 (NXT(6) == 'l') && (NXT(7) == 'o') &&
3889 (NXT(8) == 'n') && (NXT(9) == 'e')) {
3890 SKIP(10);
3891 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003893 ctxt->sax->error(ctxt,
3894 "XML standalone declaration : expected '='\n");
3895 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003896 return(standalone);
3897 }
3898 NEXT;
3899 SKIP_BLANKS;
3900 if (CUR == '\''){
3901 NEXT;
3902 if ((CUR == 'n') && (NXT(1) == 'o')) {
3903 standalone = 0;
3904 SKIP(2);
3905 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3906 (NXT(2) == 's')) {
3907 standalone = 1;
3908 SKIP(3);
3909 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3911 ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003912 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003913 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003914 if (CUR != '\'') {
3915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3916 ctxt->sax->error(ctxt, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003917 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003918 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003919 NEXT;
3920 } else if (CUR == '"'){
3921 NEXT;
3922 if ((CUR == 'n') && (NXT(1) == 'o')) {
3923 standalone = 0;
3924 SKIP(2);
3925 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3926 (NXT(2) == 's')) {
3927 standalone = 1;
3928 SKIP(3);
3929 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003931 ctxt->sax->error(ctxt,
3932 "standalone accepts only 'yes' or 'no'\n");
3933 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003934 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003935 if (CUR != '"') {
3936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3937 ctxt->sax->error(ctxt, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003938 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003939 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003940 NEXT;
3941 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt, "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003944 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003945 }
3946 }
3947 return(standalone);
3948}
3949
Daniel Veillard11e00581998-10-24 18:27:49 +00003950/**
3951 * xmlParseXMLDecl:
3952 * @ctxt: an XML parser context
3953 *
3954 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00003955 *
3956 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
3957 */
3958
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003959void
3960xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003961 CHAR *version;
3962
3963 /*
3964 * We know that '<?xml' is here.
3965 */
3966 SKIP(5);
3967
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003968 if (!IS_BLANK(CUR)) {
3969 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3970 ctxt->sax->error(ctxt, "Blank needed after '<?xml'\n");
3971 ctxt->wellFormed = 0;
3972 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003973 SKIP_BLANKS;
3974
3975 /*
3976 * We should have the VersionInfo here.
3977 */
3978 version = xmlParseVersionInfo(ctxt);
3979 if (version == NULL)
3980 version = xmlCharStrdup(XML_DEFAULT_VERSION);
3981 ctxt->doc = xmlNewDoc(version);
3982 free(version);
3983
3984 /*
3985 * We may have the encoding declaration
3986 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003987 if (!IS_BLANK(CUR)) {
3988 if ((CUR == '?') && (NXT(1) == '>')) {
3989 SKIP(2);
3990 return;
3991 }
3992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3993 ctxt->sax->error(ctxt, "Blank needed here\n");
3994 ctxt->wellFormed = 0;
3995 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003996 ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
3997
3998 /*
3999 * We may have the standalone status.
4000 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004001 if ((ctxt->doc->encoding != NULL) && (!IS_BLANK(CUR))) {
4002 if ((CUR == '?') && (NXT(1) == '>')) {
4003 SKIP(2);
4004 return;
4005 }
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt, "Blank needed here\n");
4008 ctxt->wellFormed = 0;
4009 }
4010 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004011 ctxt->doc->standalone = xmlParseSDDecl(ctxt);
4012
4013 SKIP_BLANKS;
4014 if ((CUR == '?') && (NXT(1) == '>')) {
4015 SKIP(2);
4016 } else if (CUR == '>') {
4017 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4019 ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004020 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004021 NEXT;
4022 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4024 ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004025 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004026 MOVETO_ENDTAG(CUR_PTR);
4027 NEXT;
4028 }
4029}
4030
Daniel Veillard11e00581998-10-24 18:27:49 +00004031/**
4032 * xmlParseMisc:
4033 * @ctxt: an XML parser context
4034 *
4035 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004036 *
4037 * [27] Misc ::= Comment | PI | S
4038 */
4039
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004040void
4041xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004042 while (((CUR == '<') && (NXT(1) == '?')) ||
4043 ((CUR == '<') && (NXT(1) == '!') &&
4044 (NXT(2) == '-') && (NXT(3) == '-')) ||
4045 IS_BLANK(CUR)) {
4046 if ((CUR == '<') && (NXT(1) == '?')) {
4047 xmlParsePI(ctxt);
4048 } else if (IS_BLANK(CUR)) {
4049 NEXT;
4050 } else
4051 xmlParseComment(ctxt, 0);
4052 }
4053}
4054
Daniel Veillard11e00581998-10-24 18:27:49 +00004055/**
4056 * xmlParseDocument :
4057 * @ctxt: an XML parser context
4058 *
4059 * parse an XML document (and build a tree if using the standard SAX
4060 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00004061 *
4062 * [1] document ::= prolog element Misc*
4063 *
4064 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00004065 *
4066 * return values: 0, -1 in case of error. the parser context is augmented
4067 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004068 */
4069
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004070int
4071xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004072 xmlDefaultSAXHandlerInit();
4073
4074 /*
4075 * SAX: beginning of the document processing.
4076 */
4077 if (ctxt->sax)
4078 ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
4079 if (ctxt->sax)
4080 ctxt->sax->startDocument(ctxt);
4081
4082 /*
4083 * We should check for encoding here and plug-in some
4084 * conversion code TODO !!!!
4085 */
4086
4087 /*
4088 * Wipe out everything which is before the first '<'
4089 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004090 if (IS_BLANK(CUR)) {
4091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092 ctxt->sax->error(ctxt,
4093 "Extra spaces at the beginning of the document are not allowed\n");
4094 ctxt->wellFormed = 0;
4095 SKIP_BLANKS;
4096 }
4097
4098 if (CUR == 0) {
4099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4100 ctxt->sax->error(ctxt, "Document is empty\n");
4101 ctxt->wellFormed = 0;
4102 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004103
4104 /*
4105 * Check for the XMLDecl in the Prolog.
4106 */
4107 if ((CUR == '<') && (NXT(1) == '?') &&
4108 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4109 (NXT(4) == 'l')) {
4110 xmlParseXMLDecl(ctxt);
4111 /* SKIP_EOL(cur); */
4112 SKIP_BLANKS;
4113 } else if ((CUR == '<') && (NXT(1) == '?') &&
4114 (NXT(2) == 'X') && (NXT(3) == 'M') &&
4115 (NXT(4) == 'L')) {
4116 /*
4117 * The first drafts were using <?XML and the final W3C REC
4118 * now use <?xml ...
4119 */
4120 xmlParseXMLDecl(ctxt);
4121 /* SKIP_EOL(cur); */
4122 SKIP_BLANKS;
4123 } else {
4124 CHAR *version;
4125
4126 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4127 ctxt->doc = xmlNewDoc(version);
4128 free(version);
4129 }
4130
4131 /*
4132 * The Misc part of the Prolog
4133 */
4134 xmlParseMisc(ctxt);
4135
4136 /*
4137 * Then possibly doc type declaration(s) and more Misc
4138 * (doctypedecl Misc*)?
4139 */
4140 if ((CUR == '<') && (NXT(1) == '!') &&
4141 (NXT(2) == 'D') && (NXT(3) == 'O') &&
4142 (NXT(4) == 'C') && (NXT(5) == 'T') &&
4143 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
4144 (NXT(8) == 'E')) {
4145 xmlParseDocTypeDecl(ctxt);
4146 xmlParseMisc(ctxt);
4147 }
4148
4149 /*
4150 * Time to start parsing the tree itself
4151 */
4152 ctxt->doc->root = xmlParseElement(ctxt);
4153
4154 /*
4155 * The Misc part at the end
4156 */
4157 xmlParseMisc(ctxt);
4158
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004159 if (CUR != 0) {
4160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4161 ctxt->sax->error(ctxt,
4162 "Extra content at the end of the document\n");
4163 ctxt->wellFormed = 0;
4164 }
4165
Daniel Veillard260a68f1998-08-13 03:39:55 +00004166 /*
4167 * SAX: end of the document processing.
4168 */
4169 if (ctxt->sax)
4170 ctxt->sax->endDocument(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004171 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004172 return(0);
4173}
4174
Daniel Veillard11e00581998-10-24 18:27:49 +00004175/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004176 * xmlSAXParseDoc :
4177 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00004178 * @cur: a pointer to an array of CHAR
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004179 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4180 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00004181 *
4182 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004183 * It use the given SAX function block to handle the parsing callback.
4184 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00004185 *
4186 * return values: the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00004187 */
4188
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004189xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004190 xmlDocPtr ret;
4191 xmlParserCtxtPtr ctxt;
4192 xmlParserInputPtr input;
4193
4194 if (cur == NULL) return(NULL);
4195
4196 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4197 if (ctxt == NULL) {
4198 perror("malloc");
4199 return(NULL);
4200 }
4201 xmlInitParserCtxt(ctxt);
Daniel Veillard242590e1998-11-13 18:04:35 +00004202 if (sax != NULL) ctxt->sax = sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004203 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4204 if (input == NULL) {
4205 perror("malloc");
4206 free(ctxt);
4207 return(NULL);
4208 }
4209
4210 input->filename = NULL;
4211 input->line = 1;
4212 input->col = 1;
4213 input->base = cur;
4214 input->cur = cur;
4215
4216 inputPush(ctxt, input);
4217
4218
4219 xmlParseDocument(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004220 if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4221 else {
4222 ret = NULL;
4223 xmlFreeDoc(ctxt->doc);
4224 ctxt->doc = NULL;
4225 }
Daniel Veillardccb09631998-10-27 06:21:04 +00004226 free(ctxt->nodeTab);
4227 free(ctxt->inputTab);
4228 if (input->filename != NULL)
4229 free((char *)input->filename);
4230 free(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004231 free(ctxt);
4232
4233 return(ret);
4234}
4235
Daniel Veillard11e00581998-10-24 18:27:49 +00004236/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004237 * xmlParseDoc :
4238 * @cur: a pointer to an array of CHAR
4239 *
4240 * parse an XML in-memory document and build a tree.
4241 *
4242 * return values: the resulting document tree
4243 */
4244
4245xmlDocPtr xmlParseDoc(CHAR *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004246 return(xmlSAXParseDoc(NULL, cur, 0));
4247}
4248
4249/**
4250 * xmlRecoverDoc :
4251 * @cur: a pointer to an array of CHAR
4252 *
4253 * parse an XML in-memory document and build a tree.
4254 * In the case the document is not Well Formed, a tree is built anyway
4255 *
4256 * return values: the resulting document tree
4257 */
4258
4259xmlDocPtr xmlRecoverDoc(CHAR *cur) {
4260 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004261}
4262
4263/**
4264 * xmlSAXParseFile :
4265 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00004266 * @filename: the filename
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004267 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4268 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00004269 *
4270 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4271 * compressed document is provided by default if found at compile-time.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004272 * It use the given SAX function block to handle the parsing callback.
4273 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00004274 *
4275 * return values: the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00004276 */
4277
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004278xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
4279 int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004280 xmlDocPtr ret;
4281#ifdef HAVE_ZLIB_H
4282 gzFile input;
4283#else
4284 int input;
4285#endif
4286 int res;
Daniel Veillard27271681998-10-30 06:39:40 +00004287 int len;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004288 struct stat buf;
4289 char *buffer;
4290 xmlParserCtxtPtr ctxt;
4291 xmlParserInputPtr inputStream;
4292
4293 res = stat(filename, &buf);
4294 if (res < 0) return(NULL);
4295
4296#ifdef HAVE_ZLIB_H
Daniel Veillard27271681998-10-30 06:39:40 +00004297 len = (buf.st_size * 8) + 1000;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004298retry_bigger:
Daniel Veillard27271681998-10-30 06:39:40 +00004299 buffer = malloc(len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004300#else
Daniel Veillard27271681998-10-30 06:39:40 +00004301 len = buf.st_size + 100;
4302 buffer = malloc(len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004303#endif
4304 if (buffer == NULL) {
4305 perror("malloc");
4306 return(NULL);
4307 }
4308
Daniel Veillard27271681998-10-30 06:39:40 +00004309 memset(buffer, 0, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004310#ifdef HAVE_ZLIB_H
4311 input = gzopen (filename, "r");
4312 if (input == NULL) {
4313 fprintf (stderr, "Cannot read file %s :\n", filename);
4314 perror ("gzopen failed");
4315 return(NULL);
4316 }
4317#else
4318 input = open (filename, O_RDONLY);
4319 if (input < 0) {
4320 fprintf (stderr, "Cannot read file %s :\n", filename);
4321 perror ("open failed");
4322 return(NULL);
4323 }
4324#endif
4325#ifdef HAVE_ZLIB_H
Daniel Veillard27271681998-10-30 06:39:40 +00004326 res = gzread(input, buffer, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004327#else
4328 res = read(input, buffer, buf.st_size);
4329#endif
4330 if (res < 0) {
4331 fprintf (stderr, "Cannot read file %s :\n", filename);
4332#ifdef HAVE_ZLIB_H
4333 perror ("gzread failed");
4334#else
4335 perror ("read failed");
4336#endif
4337 return(NULL);
4338 }
4339#ifdef HAVE_ZLIB_H
4340 gzclose(input);
Daniel Veillard27271681998-10-30 06:39:40 +00004341 if (res >= len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004342 free(buffer);
Daniel Veillard27271681998-10-30 06:39:40 +00004343 len *= 2;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004344 goto retry_bigger;
4345 }
4346 buf.st_size = res;
4347#else
4348 close(input);
4349#endif
4350
4351 buffer[buf.st_size] = '\0';
4352
4353 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4354 if (ctxt == NULL) {
4355 perror("malloc");
4356 return(NULL);
4357 }
4358 xmlInitParserCtxt(ctxt);
Daniel Veillard242590e1998-11-13 18:04:35 +00004359 if (sax != NULL) ctxt->sax = sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004360 inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4361 if (inputStream == NULL) {
4362 perror("malloc");
4363 free(ctxt);
4364 return(NULL);
4365 }
4366
4367 inputStream->filename = strdup(filename);
4368 inputStream->line = 1;
4369 inputStream->col = 1;
4370
4371 /*
4372 * TODO : plug some encoding conversion routines here. !!!
4373 */
4374 inputStream->base = buffer;
4375 inputStream->cur = buffer;
4376
4377 inputPush(ctxt, inputStream);
4378
4379 xmlParseDocument(ctxt);
4380
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004381 if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4382 else {
4383 ret = NULL;
4384 xmlFreeDoc(ctxt->doc);
4385 ctxt->doc = NULL;
4386 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004387 free(buffer);
Daniel Veillardccb09631998-10-27 06:21:04 +00004388 free(ctxt->nodeTab);
4389 free(ctxt->inputTab);
4390 if (inputStream->filename != NULL)
4391 free((char *)inputStream->filename);
4392 free(inputStream);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004393 free(ctxt);
4394
4395 return(ret);
4396}
4397
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004398/**
4399 * xmlParseFile :
4400 * @filename: the filename
4401 *
4402 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4403 * compressed document is provided by default if found at compile-time.
4404 *
4405 * return values: the resulting document tree
4406 */
4407
4408xmlDocPtr xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004409 return(xmlSAXParseFile(NULL, filename, 0));
4410}
4411
4412/**
4413 * xmlRecoverFile :
4414 * @filename: the filename
4415 *
4416 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4417 * compressed document is provided by default if found at compile-time.
4418 * In the case the document is not Well Formed, a tree is built anyway
4419 *
4420 * return values: the resulting document tree
4421 */
4422
4423xmlDocPtr xmlRecoverFile(const char *filename) {
4424 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004425}
Daniel Veillard260a68f1998-08-13 03:39:55 +00004426
Daniel Veillard11e00581998-10-24 18:27:49 +00004427/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004428 * xmlSAXParseMemory :
4429 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00004430 * @cur: an pointer to a char array
4431 * @size: the siwe of the array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004432 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4433 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00004434 *
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004435 * parse an XML in-memory block and use the given SAX function block
4436 * to handle the parsing callback. If sax is NULL, fallback to the default
4437 * DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00004438 *
4439 * TODO : plug some encoding conversion routines here. !!!
4440 *
4441 * return values: the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00004442 */
Daniel Veillard11e00581998-10-24 18:27:49 +00004443
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004444xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size,
4445 int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004446 xmlDocPtr ret;
4447 xmlParserCtxtPtr ctxt;
4448 xmlParserInputPtr input;
4449
4450 buffer[size - 1] = '\0';
4451
4452 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4453 if (ctxt == NULL) {
4454 perror("malloc");
4455 return(NULL);
4456 }
4457 xmlInitParserCtxt(ctxt);
Daniel Veillard242590e1998-11-13 18:04:35 +00004458 if (sax != NULL) ctxt->sax = sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004459 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4460 if (input == NULL) {
4461 perror("malloc");
Daniel Veillardccb09631998-10-27 06:21:04 +00004462 free(ctxt->nodeTab);
4463 free(ctxt->inputTab);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004464 free(ctxt);
4465 return(NULL);
4466 }
4467
4468 input->filename = NULL;
4469 input->line = 1;
4470 input->col = 1;
4471
4472 /*
4473 * TODO : plug some encoding conversion routines here. !!!
4474 */
4475 input->base = buffer;
4476 input->cur = buffer;
4477
4478 inputPush(ctxt, input);
4479
4480 xmlParseDocument(ctxt);
4481
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004482 if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4483 else {
4484 ret = NULL;
4485 xmlFreeDoc(ctxt->doc);
4486 ctxt->doc = NULL;
4487 }
Daniel Veillardccb09631998-10-27 06:21:04 +00004488 free(ctxt->nodeTab);
4489 free(ctxt->inputTab);
4490 if (input->filename != NULL)
4491 free((char *)input->filename);
4492 free(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004493 free(ctxt);
4494
4495 return(ret);
4496}
4497
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004498/**
4499 * xmlParseMemory :
4500 * @cur: an pointer to a char array
4501 * @size: the size of the array
4502 *
4503 * parse an XML in-memory block and build a tree.
4504 *
4505 * return values: the resulting document tree
4506 */
4507
4508xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004509 return(xmlSAXParseMemory(NULL, buffer, size, 0));
4510}
4511
4512/**
4513 * xmlRecoverMemory :
4514 * @cur: an pointer to a char array
4515 * @size: the size of the array
4516 *
4517 * parse an XML in-memory block and build a tree.
4518 * In the case the document is not Well Formed, a tree is built anyway
4519 *
4520 * return values: the resulting document tree
4521 */
4522
4523xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
4524 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00004525}
Daniel Veillard260a68f1998-08-13 03:39:55 +00004526
Daniel Veillard11e00581998-10-24 18:27:49 +00004527/**
4528 * xmlInitParserCtxt:
4529 * @ctxt: an XML parser context
4530 *
4531 * Initialize a parser context
4532 */
4533
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004534void
4535xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004536{
4537 /* Allocate the Input stack */
4538 ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
4539 ctxt->inputNr = 0;
4540 ctxt->inputMax = 5;
4541 ctxt->input = NULL;
4542
4543 /* Allocate the Node stack */
4544 ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
4545 ctxt->nodeNr = 0;
4546 ctxt->nodeMax = 10;
4547 ctxt->node = NULL;
4548
4549 ctxt->sax = &xmlDefaultSAXHandler;
4550 ctxt->doc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004551 ctxt->wellFormed = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004552 ctxt->record_info = 0;
4553 xmlInitNodeInfoSeq(&ctxt->node_seq);
4554}
4555
Daniel Veillard11e00581998-10-24 18:27:49 +00004556/**
4557 * xmlClearParserCtxt:
4558 * @ctxt: an XML parser context
4559 *
4560 * Clear (release owned resources) and reinitialize a parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00004561 */
Daniel Veillard11e00581998-10-24 18:27:49 +00004562
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004563void
4564xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004565{
4566 xmlClearNodeInfoSeq(&ctxt->node_seq);
4567 xmlInitParserCtxt(ctxt);
4568}
4569
4570
Daniel Veillard11e00581998-10-24 18:27:49 +00004571/**
4572 * xmlSetupParserForBuffer:
4573 * @ctxt: an XML parser context
4574 * @buffer: a CHAR * buffer
4575 * @filename: a file name
4576 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004577 * Setup the parser context to parse a new buffer; Clears any prior
4578 * contents from the parser context. The buffer parameter must not be
4579 * NULL, but the filename parameter can be
4580 */
Daniel Veillard11e00581998-10-24 18:27:49 +00004581
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004582void
4583xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004584 const char* filename)
4585{
4586 xmlParserInputPtr input;
4587
4588 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4589 if (input == NULL) {
4590 perror("malloc");
4591 free(ctxt);
4592 exit(1);
4593 }
4594
4595 xmlClearParserCtxt(ctxt);
4596 if (input->filename != NULL)
4597 input->filename = strdup(filename);
4598 else
4599 input->filename = NULL;
4600 input->line = 1;
4601 input->col = 1;
4602 input->base = buffer;
4603 input->cur = buffer;
4604
4605 inputPush(ctxt, input);
4606}
4607
4608
Daniel Veillard11e00581998-10-24 18:27:49 +00004609/**
4610 * xmlParserFindNodeInfo:
4611 * @ctxt: an XML parser context
4612 * @node: an XML node within the tree
4613 *
4614 * Find the parser node info struct for a given node
4615 *
4616 * return values: an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004617 */
4618const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
4619 const xmlNode* node)
4620{
4621 unsigned long pos;
4622
4623 /* Find position where node should be at */
4624 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
4625 if ( ctx->node_seq.buffer[pos].node == node )
4626 return &ctx->node_seq.buffer[pos];
4627 else
4628 return NULL;
4629}
4630
4631
Daniel Veillard11e00581998-10-24 18:27:49 +00004632/**
4633 * xmlInitNodeInfoSeq :
4634 * @seq: a node info sequence pointer
4635 *
4636 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00004637 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004638void
4639xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004640{
4641 seq->length = 0;
4642 seq->maximum = 0;
4643 seq->buffer = NULL;
4644}
4645
Daniel Veillard11e00581998-10-24 18:27:49 +00004646/**
4647 * xmlClearNodeInfoSeq :
4648 * @seq: a node info sequence pointer
4649 *
4650 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +00004651 * info sequence
4652 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004653void
4654xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004655{
4656 if ( seq->buffer != NULL )
4657 free(seq->buffer);
4658 xmlInitNodeInfoSeq(seq);
4659}
4660
4661
Daniel Veillard11e00581998-10-24 18:27:49 +00004662/**
4663 * xmlParserFindNodeInfoIndex:
4664 * @seq: a node info sequence pointer
4665 * @node: an XML node pointer
4666 *
4667 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004668 * xmlParserFindNodeInfoIndex : Find the index that the info record for
4669 * the given node is or should be at in a sorted sequence
Daniel Veillard11e00581998-10-24 18:27:49 +00004670 * return values: a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +00004671 */
4672unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
4673 const xmlNode* node)
4674{
4675 unsigned long upper, lower, middle;
4676 int found = 0;
4677
4678 /* Do a binary search for the key */
4679 lower = 1;
4680 upper = seq->length;
4681 middle = 0;
4682 while ( lower <= upper && !found) {
4683 middle = lower + (upper - lower) / 2;
4684 if ( node == seq->buffer[middle - 1].node )
4685 found = 1;
4686 else if ( node < seq->buffer[middle - 1].node )
4687 upper = middle - 1;
4688 else
4689 lower = middle + 1;
4690 }
4691
4692 /* Return position */
4693 if ( middle == 0 || seq->buffer[middle - 1].node < node )
4694 return middle;
4695 else
4696 return middle - 1;
4697}
4698
4699
Daniel Veillard11e00581998-10-24 18:27:49 +00004700/**
4701 * xmlParserAddNodeInfo:
4702 * @ctxt: an XML parser context
4703 * @seq: a node info sequence pointer
4704 *
4705 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00004706 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004707void
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004708xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004709 const xmlParserNodeInfo* info)
4710{
4711 unsigned long pos;
4712 static unsigned int block_size = 5;
4713
4714 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004715 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
4716 if ( pos < ctxt->node_seq.length
4717 && ctxt->node_seq.buffer[pos].node == info->node ) {
4718 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004719 }
4720
4721 /* Otherwise, we need to add new node to buffer */
4722 else {
4723 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004724 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004725 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004726 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
4727 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +00004728
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004729 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard260a68f1998-08-13 03:39:55 +00004730 tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
4731 else
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004732 tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004733
4734 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard8cc0d1f1998-11-16 01:04:26 +00004736 ctxt->sax->error(ctxt, "Out of memory\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00004737 return;
4738 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004739 ctxt->node_seq.buffer = tmp_buffer;
4740 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004741 }
4742
4743 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004744 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004745 unsigned long i;
4746
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004747 for ( i = ctxt->node_seq.length; i > pos; i-- )
4748 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +00004749 }
4750
4751 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004752 ctxt->node_seq.buffer[pos] = *info;
4753 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004754 }
4755}