blob: 5f1c3fffdd068c58528b23a93485cbdfe2b13a6e [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * $Id$
7 */
8
9#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
13#include <config.h>
14#endif
15#include <stdio.h>
16#include <ctype.h>
17#include <string.h> /* for memset() only */
Seth Alvese7f12e61998-10-01 20:51:15 +000018#include <stdlib.h>
Daniel Veillard260a68f1998-08-13 03:39:55 +000019#include <sys/stat.h>
20#ifdef HAVE_FCNTL_H
21#include <fcntl.h>
22#endif
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_ZLIB_H
27#include <zlib.h>
28#endif
29
30#include "tree.h"
31#include "parser.h"
32#include "entities.h"
33
34/************************************************************************
35 * *
36 * Parser stacks related functions and macros *
37 * *
38 ************************************************************************/
39/*
40 * Generic function for accessing stacks in the Parser Context
41 */
42
43#define PUSH_AND_POP(type, name) \
44int name##Push(xmlParserCtxtPtr ctxt, type value) { \
45 if (ctxt->name##Nr >= ctxt->name##Max) { \
46 ctxt->name##Max *= 2; \
47 ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
48 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
49 if (ctxt->name##Tab == NULL) { \
50 fprintf(stderr, "realloc failed !\n"); \
51 exit(1); \
52 } \
53 } \
54 ctxt->name##Tab[ctxt->name##Nr] = value; \
55 ctxt->name = value; \
56 return(ctxt->name##Nr++); \
57} \
58type name##Pop(xmlParserCtxtPtr ctxt) { \
59 if (ctxt->name##Nr <= 0) return(0); \
60 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +000061 if (ctxt->name##Nr > 0) \
62 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
63 else \
64 ctxt->name = NULL; \
Daniel Veillard260a68f1998-08-13 03:39:55 +000065 return(ctxt->name); \
66} \
67
68PUSH_AND_POP(xmlParserInputPtr, input)
69PUSH_AND_POP(xmlNodePtr, node)
70
Daniel Veillard0ba4d531998-11-01 19:34:31 +000071/*
72 * Macros for accessing the content. Those should be used only by the parser,
73 * and not exported.
74 *
75 * Dirty macros, i.e. one need to make assumption on the context to use them
76 *
77 * CUR_PTR return the current pointer to the CHAR to be parsed.
78 * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
79 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
80 * in UNICODE mode. This should be used internally by the parser
81 * only to compare to ASCII values otherwise it would break when
82 * running with UTF-8 encoding.
83 * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
84 * to compare on ASCII based substring.
85 * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
86 * strings within the parser.
87 *
88 * Clean macros, not dependent of an ASCII context.
89 *
90 * CURRENT Returns the current char value, with the full decoding of
91 * UTF-8 if we are using this mode. It returns an int.
92 * NEXT Skip to the next character, this does the proper decoding
93 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
94 * It returns the pointer to the current CHAR.
95 */
Daniel Veillard260a68f1998-08-13 03:39:55 +000096
97#define CUR (*ctxt->input->cur)
Daniel Veillard0ba4d531998-11-01 19:34:31 +000098#define SKIP(val) ctxt->input->cur += (val)
99#define NXT(val) ctxt->input->cur[(val)]
100#define CUR_PTR ctxt->input->cur
101
102#define SKIP_BLANKS \
103 while (IS_BLANK(*(ctxt->input->cur))) NEXT
104
105#ifndef USE_UTF_8
106#define CURRENT (*ctxt->input->cur)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000107#define NEXT ((*ctxt->input->cur) ? \
108 (((*(ctxt->input->cur) == '\n') ? \
109 (ctxt->input->line++, ctxt->input->col = 1) : \
110 (ctxt->input->col++)), ctxt->input->cur++) : \
111 (xmlPopInput(ctxt), ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000112#else
113#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +0000114
115
Daniel Veillard11e00581998-10-24 18:27:49 +0000116/**
117 * xmlPopInput:
118 * @ctxt: an XML parser context
119 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000120 * xmlPopInput: the current input pointed by ctxt->input came to an end
121 * pop it and return the next char.
122 *
123 * TODO A deallocation of the popped Input structure is needed
Daniel Veillard11e00581998-10-24 18:27:49 +0000124 * return values: the current CHAR in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000125 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000126CHAR
127xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000128 if (ctxt->inputNr == 1) return(0); /* End of main Input */
129 inputPop(ctxt);
130 return(CUR);
131}
132
Daniel Veillard11e00581998-10-24 18:27:49 +0000133/**
134 * xmlPushInput:
135 * @ctxt: an XML parser context
136 * @input: an XML parser input fragment (entity, XML fragment ...).
137 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000138 * xmlPushInput: switch to a new input stream which is stacked on top
139 * of the previous one(s).
140 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000141void
142xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000143 if (input == NULL) return;
144 inputPush(ctxt, input);
145}
146
Daniel Veillard11e00581998-10-24 18:27:49 +0000147/**
148 * xmlNewEntityInputStream:
149 * @ctxt: an XML parser context
150 * @entity: an Entity pointer
151 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000152 * Create a new input stream based on a memory buffer.
Daniel Veillardccb09631998-10-27 06:21:04 +0000153 * return vakues: the new input stream
Daniel Veillard260a68f1998-08-13 03:39:55 +0000154 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000155xmlParserInputPtr
156xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000157 xmlParserInputPtr input;
158
159 if (entity == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
161 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000162 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000163 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000164 }
165 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
167 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000168 "internal: xmlNewEntityInputStream entity->input = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000169 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000170 }
171 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
172 if (input == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
174 ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000175 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000176 }
177 input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
178 input->base = entity->content;
179 input->cur = entity->content;
180 input->line = 1;
181 input->col = 1;
Daniel Veillardccb09631998-10-27 06:21:04 +0000182 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000183}
184
185/*
186 * A few macros needed to help building the parser.
187 */
188
189#ifdef UNICODE
190/************************************************************************
191 * *
192 * UNICODE version of the macros. *
193 * *
194 ************************************************************************/
195/*
196 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
197 * | [#x10000-#x10FFFF]
198 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
199 */
200#define IS_CHAR(c) \
201 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
202 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
203
204/*
205 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
206 */
207#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
208 ((c) == 0x0D))
209
210/*
211 * [85] BaseChar ::= ... long list see REC ...
212 *
213 * VI is your friend !
214 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
215 * and
216 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
217 */
218#define IS_BASECHAR(c) \
219 ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
220 (((c) >= 0x0061) && ((c) <= 0x007A)) || \
221 (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
222 (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
223 (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
224 (((c) >= 0x0100) && ((c) <= 0x0131)) || \
225 (((c) >= 0x0134) && ((c) <= 0x013E)) || \
226 (((c) >= 0x0141) && ((c) <= 0x0148)) || \
227 (((c) >= 0x014A) && ((c) <= 0x017E)) || \
228 (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
229 (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
230 (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
231 (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
232 (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
233 (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
234 ((c) == 0x0386) || \
235 (((c) >= 0x0388) && ((c) <= 0x038A)) || \
236 ((c) == 0x038C) || \
237 (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
238 (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
239 (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
240 ((c) == 0x03DA) || \
241 ((c) == 0x03DC) || \
242 ((c) == 0x03DE) || \
243 ((c) == 0x03E0) || \
244 (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
245 (((c) >= 0x0401) && ((c) <= 0x040C)) || \
246 (((c) >= 0x040E) && ((c) <= 0x044F)) || \
247 (((c) >= 0x0451) && ((c) <= 0x045C)) || \
248 (((c) >= 0x045E) && ((c) <= 0x0481)) || \
249 (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
250 (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
251 (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
252 (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
253 (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
254 (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
255 (((c) >= 0x0531) && ((c) <= 0x0556)) || \
256 ((c) == 0x0559) || \
257 (((c) >= 0x0561) && ((c) <= 0x0586)) || \
258 (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
259 (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
260 (((c) >= 0x0621) && ((c) <= 0x063A)) || \
261 (((c) >= 0x0641) && ((c) <= 0x064A)) || \
262 (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
263 (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
264 (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
265 (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
266 ((c) == 0x06D5) || \
267 (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
268 (((c) >= 0x0905) && ((c) <= 0x0939)) || \
269 ((c) == 0x093D) || \
270 (((c) >= 0x0958) && ((c) <= 0x0961)) || \
271 (((c) >= 0x0985) && ((c) <= 0x098C)) || \
272 (((c) >= 0x098F) && ((c) <= 0x0990)) || \
273 (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
274 (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
275 ((c) == 0x09B2) || \
276 (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
277 (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
278 (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
279 (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
280 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
281 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
282 (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
283 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
284 (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
285 (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
286 (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
287 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
288 ((c) == 0x0A5E) || \
289 (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
290 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
291 ((c) == 0x0A8D) || \
292 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
293 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
294 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
295 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
296 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
297 ((c) == 0x0ABD) || \
298 ((c) == 0x0AE0) || \
299 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
300 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
301 (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
302 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
303 (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
304 (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
305 ((c) == 0x0B3D) || \
306 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
307 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
308 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
309 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
310 (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
311 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
312 ((c) == 0x0B9C) || \
313 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
314 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
315 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
316 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
317 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
318 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
319 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
320 (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
321 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
322 (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
323 (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
324 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
325 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
326 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
327 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
328 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
329 ((c) == 0x0CDE) || \
330 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
331 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
332 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
333 (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
334 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
335 (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
336 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
337 ((c) == 0x0E30) || \
338 (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
339 (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
340 (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
341 ((c) == 0x0E84) || \
342 (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
343 ((c) == 0x0E8A) || \
344 ((c) == 0x0E8D) || \
345 (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
346 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
347 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
348 ((c) == 0x0EA5) || \
349 ((c) == 0x0EA7) || \
350 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
351 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
352 ((c) == 0x0EB0) || \
353 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
354 ((c) == 0x0EBD) || \
355 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
356 (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
357 (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
358 (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
359 (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
360 ((c) == 0x1100) || \
361 (((c) >= 0x1102) && ((c) <= 0x1103)) || \
362 (((c) >= 0x1105) && ((c) <= 0x1107)) || \
363 ((c) == 0x1109) || \
364 (((c) >= 0x110B) && ((c) <= 0x110C)) || \
365 (((c) >= 0x110E) && ((c) <= 0x1112)) || \
366 ((c) == 0x113C) || \
367 ((c) == 0x113E) || \
368 ((c) == 0x1140) || \
369 ((c) == 0x114C) || \
370 ((c) == 0x114E) || \
371 ((c) == 0x1150) || \
372 (((c) >= 0x1154) && ((c) <= 0x1155)) || \
373 ((c) == 0x1159) || \
374 (((c) >= 0x115F) && ((c) <= 0x1161)) || \
375 ((c) == 0x1163) || \
376 ((c) == 0x1165) || \
377 ((c) == 0x1167) || \
378 ((c) == 0x1169) || \
379 (((c) >= 0x116D) && ((c) <= 0x116E)) || \
380 (((c) >= 0x1172) && ((c) <= 0x1173)) || \
381 ((c) == 0x1175) || \
382 ((c) == 0x119E) || \
383 ((c) == 0x11A8) || \
384 ((c) == 0x11AB) || \
385 (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
386 (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
387 ((c) == 0x11BA) || \
388 (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
389 ((c) == 0x11EB) || \
390 ((c) == 0x11F0) || \
391 ((c) == 0x11F9) || \
392 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
393 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
394 (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
395 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
396 (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
397 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
398 (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
399 ((c) == 0x1F59) || \
400 ((c) == 0x1F5B) || \
401 ((c) == 0x1F5D) || \
402 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
403 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
404 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
405 ((c) == 0x1FBE) || \
406 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
407 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
408 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
409 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
410 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
411 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
412 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
413 ((c) == 0x2126) || \
414 (((c) >= 0x212A) && ((c) <= 0x212B)) || \
415 ((c) == 0x212E) || \
416 (((c) >= 0x2180) && ((c) <= 0x2182)) || \
417 (((c) >= 0x3041) && ((c) <= 0x3094)) || \
418 (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
419 (((c) >= 0x3105) && ((c) <= 0x312C)) || \
420 (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
421
422/*
423 * [88] Digit ::= ... long list see REC ...
424 */
425#define IS_DIGIT(c) \
426 ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
427 (((c) >= 0x0660) && ((c) <= 0x0669)) || \
428 (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
429 (((c) >= 0x0966) && ((c) <= 0x096F)) || \
430 (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
431 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
432 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
433 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
434 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
435 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
436 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
437 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
438 (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
439 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
440 (((c) >= 0x0F20) && ((c) <= 0x0F29)))
441
442/*
443 * [87] CombiningChar ::= ... long list see REC ...
444 */
445#define IS_COMBINING(c) \
446 ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
447 (((c) >= 0x0360) && ((c) <= 0x0361)) || \
448 (((c) >= 0x0483) && ((c) <= 0x0486)) || \
449 (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
450 (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
451 (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
452 ((c) == 0x05BF) || \
453 (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
454 ((c) == 0x05C4) || \
455 (((c) >= 0x064B) && ((c) <= 0x0652)) || \
456 ((c) == 0x0670) || \
457 (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
458 (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
459 (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
460 (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
461 (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
462 (((c) >= 0x0901) && ((c) <= 0x0903)) || \
463 ((c) == 0x093C) || \
464 (((c) >= 0x093E) && ((c) <= 0x094C)) || \
465 ((c) == 0x094D) || \
466 (((c) >= 0x0951) && ((c) <= 0x0954)) || \
467 (((c) >= 0x0962) && ((c) <= 0x0963)) || \
468 (((c) >= 0x0981) && ((c) <= 0x0983)) || \
469 ((c) == 0x09BC) || \
470 ((c) == 0x09BE) || \
471 ((c) == 0x09BF) || \
472 (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
473 (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
474 (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
475 ((c) == 0x09D7) || \
476 (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
477 ((c) == 0x0A02) || \
478 ((c) == 0x0A3C) || \
479 ((c) == 0x0A3E) || \
480 ((c) == 0x0A3F) || \
481 (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
482 (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
483 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
484 (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
485 (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
486 ((c) == 0x0ABC) || \
487 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
488 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
489 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
490 (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
491 ((c) == 0x0B3C) || \
492 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
493 (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
494 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
495 (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
496 (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
497 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
498 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
499 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
500 ((c) == 0x0BD7) || \
501 (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
502 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
503 (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
504 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
505 (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
506 (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
507 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
508 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
509 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
510 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
511 (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
512 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
513 (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
514 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
515 ((c) == 0x0D57) || \
516 ((c) == 0x0E31) || \
517 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
518 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
519 ((c) == 0x0EB1) || \
520 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
521 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
522 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
523 (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
524 ((c) == 0x0F35) || \
525 ((c) == 0x0F37) || \
526 ((c) == 0x0F39) || \
527 ((c) == 0x0F3E) || \
528 ((c) == 0x0F3F) || \
529 (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
530 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
531 (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
532 ((c) == 0x0F97) || \
533 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
534 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
535 ((c) == 0x0FB9) || \
536 (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
537 ((c) == 0x20E1) || \
538 (((c) >= 0x302A) && ((c) <= 0x302F)) || \
539 ((c) == 0x3099) || \
540 ((c) == 0x309A))
541
542/*
543 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
544 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
545 * [#x309D-#x309E] | [#x30FC-#x30FE]
546 */
547#define IS_EXTENDER(c) \
548 (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
549 ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
550 ((c) == 0xec6) || ((c) == 0x3005) \
551 (((c) >= 0x3031) && ((c) <= 0x3035)) || \
552 (((c) >= 0x309b) && ((c) <= 0x309e)) || \
553 (((c) >= 0x30fc) && ((c) <= 0x30fe)))
554
555/*
556 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
557 */
558#define IS_IDEOGRAPHIC(c) \
559 ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
560 (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
561 (((c) >= 0x3021) && ((c) <= 0x3029)) || \
562 ((c) == 0x3007))
563
564/*
565 * [84] Letter ::= BaseChar | Ideographic
566 */
567#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
568
569#else
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000570#ifndef USE_UTF_8
Daniel Veillard260a68f1998-08-13 03:39:55 +0000571/************************************************************************
572 * *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000573 * 8bits / ISO-Latin version of the macros. *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000574 * *
575 ************************************************************************/
576/*
577 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
578 * | [#x10000-#x10FFFF]
579 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
580 */
581#define IS_CHAR(c) \
582 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
583 ((c) == 0xa))
584
585/*
586 * [85] BaseChar ::= ... long list see REC ...
587 */
588#define IS_BASECHAR(c) \
589 ((((c) >= 0x41) && ((c) <= 0x5a)) || \
590 (((c) >= 0x61) && ((c) <= 0x7a)) || \
591 (((c) >= 0xaa) && ((c) <= 0x5b)) || \
592 (((c) >= 0xc0) && ((c) <= 0xd6)) || \
593 (((c) >= 0xd8) && ((c) <= 0xf6)) || \
594 (((c) >= 0xf8) && ((c) <= 0xff)) || \
595 ((c) == 0xba))
596
597/*
598 * [88] Digit ::= ... long list see REC ...
599 */
600#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
601
602/*
603 * [84] Letter ::= BaseChar | Ideographic
604 */
605#define IS_LETTER(c) IS_BASECHAR(c)
606
607
608/*
609 * [87] CombiningChar ::= ... long list see REC ...
610 */
611#define IS_COMBINING(c) 0
612
613/*
614 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
615 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
616 * [#x309D-#x309E] | [#x30FC-#x30FE]
617 */
618#define IS_EXTENDER(c) ((c) == 0xb7)
619
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000620#else /* USE_UTF_8 */
621/************************************************************************
622 * *
623 * 8bits / UTF-8 version of the macros. *
624 * *
625 ************************************************************************/
626
627TODO !!!
628#endif /* USE_UTF_8 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000629#endif /* !UNICODE */
630
631/*
632 * Blank chars.
633 *
634 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
635 */
636#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
637 ((c) == 0x0D))
638
639/*
640 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
641 */
642#define IS_PUBIDCHAR(c) \
643 (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
644 (((c) >= 'a') && ((c) <= 'z')) || \
645 (((c) >= 'A') && ((c) <= 'Z')) || \
646 (((c) >= '0') && ((c) <= '9')) || \
647 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
648 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
649 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
650 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
651 ((c) == '$') || ((c) == '_') || ((c) == '%'))
652
653#define SKIP_EOL(p) \
654 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
655 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
656
657#define MOVETO_ENDTAG(p) \
658 while (IS_CHAR(*p) && (*(p) != '>')) (p)++
659
660#define MOVETO_STARTTAG(p) \
661 while (IS_CHAR(*p) && (*(p) != '<')) (p)++
662
663/************************************************************************
664 * *
665 * Commodity functions to handle CHARs *
666 * *
667 ************************************************************************/
668
Daniel Veillard11e00581998-10-24 18:27:49 +0000669/**
670 * xmlStrndup:
671 * @cur: the input CHAR *
672 * @len: the len of @cur
673 *
674 * a strndup for array of CHAR's
675 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000676 */
677
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000678CHAR *
679xmlStrndup(const CHAR *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000680 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
681
682 if (ret == NULL) {
683 fprintf(stderr, "malloc of %d byte failed\n",
684 (len + 1) * sizeof(CHAR));
685 return(NULL);
686 }
687 memcpy(ret, cur, len * sizeof(CHAR));
688 ret[len] = 0;
689 return(ret);
690}
691
Daniel Veillard11e00581998-10-24 18:27:49 +0000692/**
693 * xmlStrdup:
694 * @cur: the input CHAR *
695 *
696 * a strdup for array of CHAR's
697 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000698 */
699
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000700CHAR *
701xmlStrdup(const CHAR *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000702 const CHAR *p = cur;
703
704 while (IS_CHAR(*p)) p++;
705 return(xmlStrndup(cur, p - cur));
706}
707
Daniel Veillard11e00581998-10-24 18:27:49 +0000708/**
709 * xmlCharStrndup:
710 * @cur: the input char *
711 * @len: the len of @cur
712 *
713 * a strndup for char's to CHAR's
714 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000715 */
716
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000717CHAR *
718xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000719 int i;
720 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
721
722 if (ret == NULL) {
723 fprintf(stderr, "malloc of %d byte failed\n",
724 (len + 1) * sizeof(CHAR));
725 return(NULL);
726 }
727 for (i = 0;i < len;i++)
728 ret[i] = (CHAR) cur[i];
729 ret[len] = 0;
730 return(ret);
731}
732
Daniel Veillard11e00581998-10-24 18:27:49 +0000733/**
734 * xmlCharStrdup:
735 * @cur: the input char *
736 * @len: the len of @cur
737 *
738 * a strdup for char's to CHAR's
739 * return values: a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000740 */
741
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000742CHAR *
743xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000744 const char *p = cur;
745
746 while (*p != '\0') p++;
747 return(xmlCharStrndup(cur, p - cur));
748}
749
Daniel Veillard11e00581998-10-24 18:27:49 +0000750/**
751 * xmlStrcmp:
752 * @str1: the first CHAR *
753 * @str2: the second CHAR *
754 *
755 * a strcmp for CHAR's
756 * return values: the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +0000757 */
758
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000759int
760xmlStrcmp(const CHAR *str1, const CHAR *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000761 register int tmp;
762
763 do {
764 tmp = *str1++ - *str2++;
765 if (tmp != 0) return(tmp);
766 } while ((*str1 != 0) && (*str2 != 0));
767 return (*str1 - *str2);
768}
769
Daniel Veillard11e00581998-10-24 18:27:49 +0000770/**
771 * xmlStrncmp:
772 * @str1: the first CHAR *
773 * @str2: the second CHAR *
774 * @len: the max comparison length
775 *
776 * a strncmp for CHAR's
777 * return values: the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +0000778 */
779
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000780int
781xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000782 register int tmp;
783
784 if (len <= 0) return(0);
785 do {
786 tmp = *str1++ - *str2++;
787 if (tmp != 0) return(tmp);
788 len--;
789 if (len <= 0) return(0);
790 } while ((*str1 != 0) && (*str2 != 0));
791 return (*str1 - *str2);
792}
793
Daniel Veillard11e00581998-10-24 18:27:49 +0000794/**
795 * xmlStrchr:
796 * @str: the CHAR * array
797 * @val: the CHAR to search
798 *
799 * a strchr for CHAR's
800 * return values: the CHAR * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000801 */
802
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000803CHAR *
804xmlStrchr(const CHAR *str, CHAR val) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000805 while (*str != 0) {
806 if (*str == val) return((CHAR *) str);
807 str++;
808 }
809 return(NULL);
810}
811
Daniel Veillard11e00581998-10-24 18:27:49 +0000812/**
813 * xmlStrlen:
814 * @str: the CHAR * array
815 *
816 * lenght of a CHAR's string
817 * return values: the number of CHAR contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000818 */
819
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000820int
821xmlStrlen(const CHAR *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000822 int len = 0;
823
824 if (str == NULL) return(0);
825 while (*str != 0) {
826 str++;
827 len++;
828 }
829 return(len);
830}
831
Daniel Veillard11e00581998-10-24 18:27:49 +0000832/**
833 * xmlStrncat:
834 * @first: the original CHAR * array
835 * @add: the CHAR * array added
836 * @len: the length of @add
837 *
838 * a strncat for array of CHAR's
839 * return values: a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000840 */
841
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000842CHAR *
843xmlStrncat(CHAR *cur, const CHAR *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000844 int size;
845 CHAR *ret;
846
847 if ((add == NULL) || (len == 0))
848 return(cur);
849 if (cur == NULL)
850 return(xmlStrndup(add, len));
851
852 size = xmlStrlen(cur);
853 ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
854 if (ret == NULL) {
855 fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
856 (size + len + 1) * sizeof(CHAR));
857 return(cur);
858 }
859 memcpy(&ret[size], add, len * sizeof(CHAR));
860 ret[size + len] = 0;
861 return(ret);
862}
863
Daniel Veillard11e00581998-10-24 18:27:49 +0000864/**
865 * xmlStrcat:
866 * @first: the original CHAR * array
867 * @add: the CHAR * array added
868 *
869 * a strcat for array of CHAR's
870 * return values: a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000871 */
872
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000873CHAR *
874xmlStrcat(CHAR *cur, const CHAR *add) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000875 const CHAR *p = add;
876
877 if (add == NULL) return(cur);
878 if (cur == NULL)
879 return(xmlStrdup(add));
880
881 while (IS_CHAR(*p)) p++;
882 return(xmlStrncat(cur, add, p - add));
883}
884
885/************************************************************************
886 * *
887 * Commodity functions, cleanup needed ? *
888 * *
889 ************************************************************************/
890
Daniel Veillard11e00581998-10-24 18:27:49 +0000891/**
892 * areBlanks:
893 * @ctxt: an XML parser context
894 * @str: a CHAR *
895 * @len: the size of @str
896 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000897 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +0000898 *
899 * TODO: to be corrected accodingly to DTD information if available
900 * return values: 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000901 */
902
903static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
904 int i;
905 xmlNodePtr lastChild;
906
907 for (i = 0;i < len;i++)
908 if (!(IS_BLANK(str[i]))) return(0);
909
910 if (CUR != '<') return(0);
911 lastChild = xmlGetLastChild(ctxt->node);
912 if (lastChild == NULL) {
913 if (ctxt->node->content != NULL) return(0);
914 } else if (xmlNodeIsText(lastChild))
915 return(0);
916 return(1);
917}
918
Daniel Veillard11e00581998-10-24 18:27:49 +0000919/**
920 * xmlHandleEntity:
921 * @ctxt: an XML parser context
922 * @entity: an XML entity pointer.
923 *
924 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +0000925 * stream ? When do we just handle that as a set of chars ?
Daniel Veillard11e00581998-10-24 18:27:49 +0000926 * TODO: we should call the SAX handler here and have it resolve the issue
Daniel Veillard260a68f1998-08-13 03:39:55 +0000927 */
928
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000929void
930xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000931 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +0000932 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000933
934 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
936 ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +0000937 entity->name);
938 return;
939 }
940 len = xmlStrlen(entity->content);
941 if (len <= 2) goto handle_as_char;
942
943 /*
944 * Redefine its content as an input stream.
945 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000946 input = xmlNewEntityInputStream(ctxt, entity);
947 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000948 return;
949
950handle_as_char:
951 /*
952 * Just handle the content as a set of chars.
953 */
954 if (ctxt->sax != NULL)
955 ctxt->sax->characters(ctxt, entity->content, 0, len);
956
957}
958
959/*
960 * Forward definition for recusive behaviour.
961 */
962xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +0000963CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
964CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000965
966/************************************************************************
967 * *
968 * Extra stuff for namespace support *
969 * Relates to http://www.w3.org/TR/WD-xml-names *
970 * *
971 ************************************************************************/
972
Daniel Veillard11e00581998-10-24 18:27:49 +0000973/**
974 * xmlNamespaceParseNCName:
975 * @ctxt: an XML parser context
976 *
977 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000978 *
979 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
980 *
981 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
982 * CombiningChar | Extender
Daniel Veillard11e00581998-10-24 18:27:49 +0000983 * return values: the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000984 */
985
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000986CHAR *
987xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000988 const CHAR *q;
989 CHAR *ret = NULL;
990
991 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
992 q = NEXT;
993
994 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
995 (CUR == '.') || (CUR == '-') ||
996 (CUR == '_') ||
997 (IS_COMBINING(CUR)) ||
998 (IS_EXTENDER(CUR)))
999 NEXT;
1000
1001 ret = xmlStrndup(q, CUR_PTR - q);
1002
1003 return(ret);
1004}
1005
Daniel Veillard11e00581998-10-24 18:27:49 +00001006/**
1007 * xmlNamespaceParseQName:
1008 * @ctxt: an XML parser context
1009 * @prefix: a CHAR **
1010 *
1011 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001012 *
1013 * [NS 5] QName ::= (Prefix ':')? LocalPart
1014 *
1015 * [NS 6] Prefix ::= NCName
1016 *
1017 * [NS 7] LocalPart ::= NCName
Daniel Veillard11e00581998-10-24 18:27:49 +00001018 * return values: the function returns the local part, and prefix is updated
1019 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001020 */
1021
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001022CHAR *
1023xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001024 CHAR *ret = NULL;
1025
1026 *prefix = NULL;
1027 ret = xmlNamespaceParseNCName(ctxt);
1028 if (CUR == ':') {
1029 *prefix = ret;
1030 NEXT;
1031 ret = xmlNamespaceParseNCName(ctxt);
1032 }
1033
1034 return(ret);
1035}
1036
Daniel Veillard11e00581998-10-24 18:27:49 +00001037/**
1038 * xmlNamespaceParseNSDef:
1039 * @ctxt: an XML parser context
1040 *
1041 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00001042 *
1043 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1044 *
1045 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard11e00581998-10-24 18:27:49 +00001046 * return values: the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001047 */
1048
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001049CHAR *
1050xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001051 CHAR *name = NULL;
1052
1053 if ((CUR == 'x') && (NXT(1) == 'm') &&
1054 (NXT(2) == 'l') && (NXT(3) == 'n') &&
1055 (NXT(4) == 's')) {
1056 SKIP(5);
1057 if (CUR == ':') {
1058 NEXT;
1059 name = xmlNamespaceParseNCName(ctxt);
1060 }
1061 }
1062 return(name);
1063}
1064
Daniel Veillard11e00581998-10-24 18:27:49 +00001065/**
1066 * xmlParseQuotedString:
1067 * @ctxt: an XML parser context
1068 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001069 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillard11e00581998-10-24 18:27:49 +00001070 * return values: the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001071 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001072CHAR *
1073xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001074 CHAR *ret = NULL;
1075 const CHAR *q;
1076
1077 if (CUR == '"') {
1078 NEXT;
1079 q = CUR_PTR;
1080 while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001081 if (CUR != '"') {
1082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1083 ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q);
1084 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001085 ret = xmlStrndup(q, CUR_PTR - q);
1086 NEXT;
1087 }
1088 } else if (CUR == '\''){
1089 NEXT;
1090 q = CUR_PTR;
1091 while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001092 if (CUR != '\'') {
1093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1094 ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q);
1095 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001096 ret = xmlStrndup(q, CUR_PTR - q);
1097 NEXT;
1098 }
1099 }
1100 return(ret);
1101}
1102
Daniel Veillard11e00581998-10-24 18:27:49 +00001103/**
1104 * xmlParseNamespace:
1105 * @ctxt: an XML parser context
1106 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001107 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1108 *
1109 * This is what the older xml-name Working Draft specified, a bunch of
1110 * other stuff may still rely on it, so support is still here as
1111 * if ot was declared on the root of the Tree:-(
1112 */
1113
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001114void
1115xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001116 CHAR *href = NULL;
1117 CHAR *prefix = NULL;
1118 int garbage = 0;
1119
1120 /*
1121 * We just skipped "namespace" or "xml:namespace"
1122 */
1123 SKIP_BLANKS;
1124
1125 while (IS_CHAR(CUR) && (CUR != '>')) {
1126 /*
1127 * We can have "ns" or "prefix" attributes
1128 * Old encoding as 'href' or 'AS' attributes is still supported
1129 */
1130 if ((CUR == 'n') && (NXT(1) == 's')) {
1131 garbage = 0;
1132 SKIP(2);
1133 SKIP_BLANKS;
1134
1135 if (CUR != '=') continue;
1136 NEXT;
1137 SKIP_BLANKS;
1138
1139 href = xmlParseQuotedString(ctxt);
1140 SKIP_BLANKS;
1141 } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1142 (NXT(2) == 'e') && (NXT(3) == 'f')) {
1143 garbage = 0;
1144 SKIP(4);
1145 SKIP_BLANKS;
1146
1147 if (CUR != '=') continue;
1148 NEXT;
1149 SKIP_BLANKS;
1150
1151 href = xmlParseQuotedString(ctxt);
1152 SKIP_BLANKS;
1153 } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1154 (NXT(2) == 'e') && (NXT(3) == 'f') &&
1155 (NXT(4) == 'i') && (NXT(5) == 'x')) {
1156 garbage = 0;
1157 SKIP(6);
1158 SKIP_BLANKS;
1159
1160 if (CUR != '=') continue;
1161 NEXT;
1162 SKIP_BLANKS;
1163
1164 prefix = xmlParseQuotedString(ctxt);
1165 SKIP_BLANKS;
1166 } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1167 garbage = 0;
1168 SKIP(2);
1169 SKIP_BLANKS;
1170
1171 if (CUR != '=') continue;
1172 NEXT;
1173 SKIP_BLANKS;
1174
1175 prefix = xmlParseQuotedString(ctxt);
1176 SKIP_BLANKS;
1177 } else if ((CUR == '?') && (NXT(1) == '>')) {
1178 garbage = 0;
1179 CUR_PTR ++;
1180 } else {
1181 /*
1182 * Found garbage when parsing the namespace
1183 */
1184 if (!garbage)
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1186 ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001187 NEXT;
1188 }
1189 }
1190
1191 MOVETO_ENDTAG(CUR_PTR);
1192 NEXT;
1193
1194 /*
1195 * Register the DTD.
1196 */
1197 if (href != NULL)
1198 xmlNewGlobalNs(ctxt->doc, href, prefix);
1199
1200 if (prefix != NULL) free(prefix);
1201 if (href != NULL) free(href);
1202}
1203
1204/************************************************************************
1205 * *
1206 * The parser itself *
1207 * Relates to http://www.w3.org/TR/REC-xml *
1208 * *
1209 ************************************************************************/
1210
Daniel Veillard11e00581998-10-24 18:27:49 +00001211/**
1212 * xmlParseName:
1213 * @ctxt: an XML parser context
1214 *
1215 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001216 *
1217 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1218 * CombiningChar | Extender
1219 *
1220 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1221 *
1222 * [6] Names ::= Name (S Name)*
Daniel Veillard11e00581998-10-24 18:27:49 +00001223 * return values: the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001224 */
1225
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001226CHAR *
1227xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001228 const CHAR *q;
1229 CHAR *ret = NULL;
1230
1231 if (!IS_LETTER(CUR) && (CUR != '_') &&
1232 (CUR != ':')) return(NULL);
1233 q = NEXT;
1234
1235 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1236 (CUR == '.') || (CUR == '-') ||
1237 (CUR == '_') || (CUR == ':') ||
1238 (IS_COMBINING(CUR)) ||
1239 (IS_EXTENDER(CUR)))
1240 NEXT;
1241
1242 ret = xmlStrndup(q, CUR_PTR - q);
1243
1244 return(ret);
1245}
1246
Daniel Veillard11e00581998-10-24 18:27:49 +00001247/**
1248 * xmlParseNmtoken:
1249 * @ctxt: an XML parser context
1250 *
1251 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001252 *
1253 * [7] Nmtoken ::= (NameChar)+
1254 *
1255 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard11e00581998-10-24 18:27:49 +00001256 * return values: the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001257 */
1258
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001259CHAR *
1260xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001261 const CHAR *q;
1262 CHAR *ret = NULL;
1263
1264 q = NEXT;
1265
1266 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1267 (CUR == '.') || (CUR == '-') ||
1268 (CUR == '_') || (CUR == ':') ||
1269 (IS_COMBINING(CUR)) ||
1270 (IS_EXTENDER(CUR)))
1271 NEXT;
1272
1273 ret = xmlStrndup(q, CUR_PTR - q);
1274
1275 return(ret);
1276}
1277
Daniel Veillard11e00581998-10-24 18:27:49 +00001278/**
1279 * xmlParseEntityValue:
1280 * @ctxt: an XML parser context
1281 *
1282 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001283 *
1284 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1285 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard11e00581998-10-24 18:27:49 +00001286 * return values: the EntityValue parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001287 */
1288
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001289CHAR *
1290xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001291 CHAR *ret = NULL, *cur;
1292 const CHAR *q;
1293
1294 if (CUR == '"') {
1295 NEXT;
1296
1297 q = CUR_PTR;
1298 while ((IS_CHAR(CUR)) && (CUR != '"')) {
1299 if (CUR == '%') {
1300 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001301 cur = xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001302 ret = xmlStrcat(ret, cur);
1303 q = CUR_PTR;
1304 } else if (CUR == '&') {
1305 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001306 cur = xmlParseReference(ctxt);
1307 if (cur != NULL) {
1308 CHAR buf[2];
1309 buf[0] = '&';
1310 buf[1] = 0;
1311 ret = xmlStrncat(ret, buf, 1);
1312 ret = xmlStrcat(ret, cur);
1313 buf[0] = ';';
1314 buf[1] = 0;
1315 ret = xmlStrncat(ret, buf, 1);
1316 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001317 q = CUR_PTR;
1318 } else
1319 NEXT;
1320 }
1321 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1323 ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001324 } else {
1325 ret = xmlStrncat(ret, q, CUR_PTR - q);
1326 NEXT;
1327 }
1328 } else if (CUR == '\'') {
1329 NEXT;
1330 q = CUR_PTR;
1331 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1332 if (CUR == '%') {
1333 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001334 cur = xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001335 ret = xmlStrcat(ret, cur);
1336 q = CUR_PTR;
1337 } else if (CUR == '&') {
1338 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001339 cur = xmlParseReference(ctxt);
1340 if (cur != NULL) {
1341 CHAR buf[2];
1342 buf[0] = '&';
1343 buf[1] = 0;
1344 ret = xmlStrncat(ret, buf, 1);
1345 ret = xmlStrcat(ret, cur);
1346 buf[0] = ';';
1347 buf[1] = 0;
1348 ret = xmlStrncat(ret, buf, 1);
1349 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001350 q = CUR_PTR;
1351 } else
1352 NEXT;
1353 }
1354 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1356 ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001357 } else {
1358 ret = xmlStrncat(ret, q, CUR_PTR - q);
1359 NEXT;
1360 }
1361 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363 ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001364 }
1365
1366 return(ret);
1367}
1368
Daniel Veillard11e00581998-10-24 18:27:49 +00001369/**
1370 * xmlParseAttValue:
1371 * @ctxt: an XML parser context
1372 *
1373 * parse a value for an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00001374 *
1375 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1376 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard11e00581998-10-24 18:27:49 +00001377 * return values: the AttValue parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001378 */
1379
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001380CHAR *
1381xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001382 CHAR *ret = NULL, *cur;
1383 const CHAR *q;
1384
1385 if (CUR == '"') {
1386 NEXT;
1387
1388 q = CUR_PTR;
1389 while ((IS_CHAR(CUR)) && (CUR != '"')) {
1390 if (CUR == '&') {
1391 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001392 cur = xmlParseReference(ctxt);
1393 if (cur != NULL) {
1394 /*
1395 * Special case for '&amp;', we don't want to
1396 * resolve it here since it will break later
1397 * when searching entities in the string.
1398 */
1399 if ((cur[0] == '&') && (cur[1] == 0)) {
1400 CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1401 ret = xmlStrncat(ret, buf, 5);
1402 } else
1403 ret = xmlStrcat(ret, cur);
1404 free(cur);
1405 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001406 q = CUR_PTR;
1407 } else
1408 NEXT;
Daniel Veillardccb09631998-10-27 06:21:04 +00001409 /*
1410 * Pop out finished entity references.
1411 */
1412 while ((CUR == 0) && (ctxt->inputNr > 1)) {
1413 if (CUR_PTR != q)
1414 ret = xmlStrncat(ret, q, CUR_PTR - q);
1415 xmlPopInput(ctxt);
1416 q = CUR_PTR;
1417 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001418 }
1419 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1421 ctxt->sax->error(ctxt, "Unfinished AttValue\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001422 } else {
1423 ret = xmlStrncat(ret, q, CUR_PTR - q);
1424 NEXT;
1425 }
1426 } else if (CUR == '\'') {
1427 NEXT;
1428 q = CUR_PTR;
1429 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1430 if (CUR == '&') {
1431 ret = xmlStrncat(ret, q, CUR_PTR - q);
Daniel Veillardccb09631998-10-27 06:21:04 +00001432 cur = xmlParseReference(ctxt);
1433 if (cur != NULL) {
1434 /*
1435 * Special case for '&amp;', we don't want to
1436 * resolve it here since it will break later
1437 * when searching entities in the string.
1438 */
1439 if ((cur[0] == '&') && (cur[1] == 0)) {
1440 CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1441 ret = xmlStrncat(ret, buf, 5);
1442 } else
1443 ret = xmlStrcat(ret, cur);
1444 free(cur);
1445 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001446 q = CUR_PTR;
1447 } else
1448 NEXT;
Daniel Veillardccb09631998-10-27 06:21:04 +00001449 /*
1450 * Pop out finished entity references.
1451 */
1452 while ((CUR == 0) && (ctxt->inputNr > 1)) {
1453 if (CUR_PTR != q)
1454 ret = xmlStrncat(ret, q, CUR_PTR - q);
1455 xmlPopInput(ctxt);
1456 q = CUR_PTR;
1457 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001458 }
1459 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1461 ctxt->sax->error(ctxt, "Unfinished AttValue\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001462 } else {
1463 ret = xmlStrncat(ret, q, CUR_PTR - q);
1464 NEXT;
1465 }
1466 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1468 ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001469 }
1470
1471 return(ret);
1472}
1473
Daniel Veillard11e00581998-10-24 18:27:49 +00001474/**
1475 * xmlParseSystemLiteral:
1476 * @ctxt: an XML parser context
1477 *
1478 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00001479 *
1480 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00001481 * return values: the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001482 */
1483
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001484CHAR *
1485xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001486 const CHAR *q;
1487 CHAR *ret = NULL;
1488
1489 if (CUR == '"') {
1490 NEXT;
1491 q = CUR_PTR;
1492 while ((IS_CHAR(CUR)) && (CUR != '"'))
1493 NEXT;
1494 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1496 ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001497 } else {
1498 ret = xmlStrndup(q, CUR_PTR - q);
1499 NEXT;
1500 }
1501 } else if (CUR == '\'') {
1502 NEXT;
1503 q = CUR_PTR;
1504 while ((IS_CHAR(CUR)) && (CUR != '\''))
1505 NEXT;
1506 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508 ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001509 } else {
1510 ret = xmlStrndup(q, CUR_PTR - q);
1511 NEXT;
1512 }
1513 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1515 ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001516 }
1517
1518 return(ret);
1519}
1520
Daniel Veillard11e00581998-10-24 18:27:49 +00001521/**
1522 * xmlParsePubidLiteral:
1523 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00001524 *
Daniel Veillard11e00581998-10-24 18:27:49 +00001525 * parse an XML public literal
1526 * return values: the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001527 */
1528
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001529CHAR *
1530xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001531 const CHAR *q;
1532 CHAR *ret = NULL;
1533 /*
1534 * Name ::= (Letter | '_') (NameChar)*
1535 */
1536 if (CUR == '"') {
1537 NEXT;
1538 q = CUR_PTR;
1539 while (IS_PUBIDCHAR(CUR)) NEXT;
1540 if (CUR != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1542 ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001543 } else {
1544 ret = xmlStrndup(q, CUR_PTR - q);
1545 NEXT;
1546 }
1547 } else if (CUR == '\'') {
1548 NEXT;
1549 q = CUR_PTR;
1550 while ((IS_LETTER(CUR)) && (CUR != '\''))
1551 NEXT;
1552 if (!IS_LETTER(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1554 ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001555 } else {
1556 ret = xmlStrndup(q, CUR_PTR - q);
1557 NEXT;
1558 }
1559 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561 ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001562 }
1563
1564 return(ret);
1565}
1566
Daniel Veillard11e00581998-10-24 18:27:49 +00001567/**
1568 * xmlParseCharData:
1569 * @ctxt: an XML parser context
1570 * @cdata: int indicating whether we are within a CDATA section
1571 *
1572 * parse a CharData section.
1573 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001574 *
1575 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
Daniel Veillard11e00581998-10-24 18:27:49 +00001576 * return values:
Daniel Veillard260a68f1998-08-13 03:39:55 +00001577 */
1578
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001579void
1580xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001581 const CHAR *q;
1582
1583 q = CUR_PTR;
1584 while ((IS_CHAR(CUR)) && (CUR != '<') &&
1585 (CUR != '&')) {
1586 NEXT;
1587 if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1588 (NXT(2) == '>')) break;
1589 }
1590 if (q == CUR_PTR) return;
1591
1592 /*
1593 * Ok the segment [q CUR_PTR] is to be consumed as chars.
1594 */
1595 if (ctxt->sax != NULL) {
1596 if (areBlanks(ctxt, q, CUR_PTR - q))
1597 ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1598 else
1599 ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1600 }
1601}
1602
Daniel Veillard11e00581998-10-24 18:27:49 +00001603/**
1604 * xmlParseExternalID:
1605 * @ctxt: an XML parser context
1606 * @publicID: a CHAR** receiving PubidLiteral
1607 *
1608 * Parse an External ID
Daniel Veillard260a68f1998-08-13 03:39:55 +00001609 *
1610 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1611 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00001612 * return values: the function returns SystemLiteral and in the second
1613 * case publicID receives PubidLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00001614 */
1615
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001616CHAR *
1617xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001618 CHAR *URI = NULL;
1619
1620 if ((CUR == 'S') && (NXT(1) == 'Y') &&
1621 (NXT(2) == 'S') && (NXT(3) == 'T') &&
1622 (NXT(4) == 'E') && (NXT(5) == 'M')) {
1623 SKIP(6);
1624 SKIP_BLANKS;
1625 URI = xmlParseSystemLiteral(ctxt);
1626 if (URI == NULL)
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1628 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001629 "xmlParseExternalID: SYSTEM, no URI\n");
1630 } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1631 (NXT(2) == 'B') && (NXT(3) == 'L') &&
1632 (NXT(4) == 'I') && (NXT(5) == 'C')) {
1633 SKIP(6);
1634 SKIP_BLANKS;
1635 *publicID = xmlParsePubidLiteral(ctxt);
1636 if (*publicID == NULL)
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1638 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001639 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1640 SKIP_BLANKS;
1641 URI = xmlParseSystemLiteral(ctxt);
1642 if (URI == NULL)
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001645 "xmlParseExternalID: PUBLIC, no URI\n");
1646 }
1647 return(URI);
1648}
1649
Daniel Veillard11e00581998-10-24 18:27:49 +00001650/**
1651 * xmlParseComment:
1652 * @create: should we create a node
1653 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001654 * Skip an XML (SGML) comment <!-- .... -->
1655 * This may or may not create a node (depending on the context)
1656 * The spec says that "For compatibility, the string "--" (double-hyphen)
1657 * must not occur within comments. "
1658 *
1659 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
Daniel Veillard11e00581998-10-24 18:27:49 +00001660 *
1661 * TODO: this should call a SAX function which will handle (or not) the
1662 * creation of the comment !
1663 * return values:
Daniel Veillard260a68f1998-08-13 03:39:55 +00001664 */
1665xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1666 xmlNodePtr ret = NULL;
1667 const CHAR *q, *start;
1668 const CHAR *r;
1669 CHAR *val;
1670
1671 /*
1672 * Check that there is a comment right here.
1673 */
1674 if ((CUR != '<') || (NXT(1) != '!') ||
1675 (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1676
1677 SKIP(4);
1678 start = q = CUR_PTR;
1679 NEXT;
1680 r = CUR_PTR;
1681 NEXT;
1682 while (IS_CHAR(CUR) &&
1683 ((CUR == ':') || (CUR != '>') ||
1684 (*r != '-') || (*q != '-'))) {
1685 if ((*r == '-') && (*q == '-'))
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1687 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001688 "Comment must not contain '--' (double-hyphen)`\n");
1689 NEXT;r++;q++;
1690 }
1691 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1693 ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001694 } else {
1695 NEXT;
1696 if (create) {
1697 val = xmlStrndup(start, q - start);
Daniel Veillard0bef1311998-10-14 02:36:47 +00001698 ret = xmlNewDocComment(ctxt->doc, val);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001699 free(val);
1700 }
1701 }
1702 return(ret);
1703}
1704
Daniel Veillard11e00581998-10-24 18:27:49 +00001705/**
1706 * xmlParsePITarget:
1707 * @ctxt: an XML parser context
1708 *
1709 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00001710 *
1711 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard11e00581998-10-24 18:27:49 +00001712 * return values: the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001713 */
1714
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001715CHAR *
1716xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001717 CHAR *name;
1718
1719 name = xmlParseName(ctxt);
1720 if ((name != NULL) && (name[3] == 0) &&
1721 ((name[0] == 'x') || (name[0] == 'X')) &&
1722 ((name[1] == 'm') || (name[1] == 'M')) &&
1723 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1725 ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001726 return(NULL);
1727 }
1728 return(name);
1729}
1730
Daniel Veillard11e00581998-10-24 18:27:49 +00001731/**
1732 * xmlParsePI:
1733 * @ctxt: an XML parser context
1734 *
1735 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001736 *
1737 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard11e00581998-10-24 18:27:49 +00001738 * return values: the PI name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001739 */
1740
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001741void
1742xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001743 CHAR *target;
1744
1745 if ((CUR == '<') && (NXT(1) == '?')) {
1746 /*
1747 * this is a Processing Instruction.
1748 */
1749 SKIP(2);
1750
1751 /*
1752 * Parse the target name and check for special support like
1753 * namespace.
1754 *
1755 * TODO : PI handling should be dynamically redefinable using an
1756 * API. Only namespace should be in the code IMHO ...
1757 */
1758 target = xmlParsePITarget(ctxt);
1759 if (target != NULL) {
1760 /*
1761 * Support for the old Processing Instruction related to namespace.
1762 */
1763 if ((target[0] == 'n') && (target[1] == 'a') &&
1764 (target[2] == 'm') && (target[3] == 'e') &&
1765 (target[4] == 's') && (target[5] == 'p') &&
1766 (target[6] == 'a') && (target[7] == 'c') &&
1767 (target[8] == 'e')) {
1768 xmlParseNamespace(ctxt);
1769 } else if ((target[0] == 'x') && (target[1] == 'm') &&
1770 (target[2] == 'l') && (target[3] == ':') &&
1771 (target[4] == 'n') && (target[5] == 'a') &&
1772 (target[6] == 'm') && (target[7] == 'e') &&
1773 (target[8] == 's') && (target[9] == 'p') &&
1774 (target[10] == 'a') && (target[11] == 'c') &&
1775 (target[12] == 'e')) {
1776 xmlParseNamespace(ctxt);
1777 } else {
1778 const CHAR *q = CUR_PTR;
1779
1780 while (IS_CHAR(CUR) &&
1781 ((CUR != '?') || (NXT(1) != '>')))
1782 NEXT;
1783 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1785 ctxt->sax->error(ctxt, "xmlParsePI: PI %s never end ...\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00001786 target);
1787 } else {
1788 CHAR *data;
1789
1790 data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1791 SKIP(2);
1792
1793 /*
1794 * SAX: PI detected.
1795 */
1796 if (ctxt->sax)
1797 ctxt->sax->processingInstruction(ctxt, target, data);
1798 /*
1799 * Unknown PI, ignore it !
1800 */
1801 else
1802 xmlParserWarning(ctxt,
1803 "xmlParsePI : skipping unknown PI %s\n",
1804 target);
1805 free(data);
1806 }
1807 }
1808 free(target);
1809 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1811 ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001812 /********* Should we try to complete parsing the PI ???
1813 while (IS_CHAR(CUR) &&
1814 (CUR != '?') && (CUR != '>'))
1815 NEXT;
1816 if (!IS_CHAR(CUR)) {
1817 fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1818 target);
1819 }
1820 ********************************************************/
1821 }
1822 }
1823}
1824
Daniel Veillard11e00581998-10-24 18:27:49 +00001825/**
1826 * xmlParseNotationDecl:
1827 * @ctxt: an XML parser context
1828 *
1829 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00001830 *
1831 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1832 *
1833 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1834 *
1835 * NOTE: Actually [75] and [83] interract badly since [75] can generate
1836 * 'PUBLIC' S PubidLiteral S SystemLiteral
1837 *
1838 * Hence there is actually 3 choices:
1839 * 'PUBLIC' S PubidLiteral
1840 * 'PUBLIC' S PubidLiteral S SystemLiteral
1841 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00001842 *
1843 * TODO: no handling of the values parsed !
Daniel Veillard260a68f1998-08-13 03:39:55 +00001844 */
1845
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001846void
1847xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001848 CHAR *name;
1849
1850 if ((CUR == '<') && (NXT(1) == '!') &&
1851 (NXT(2) == 'N') && (NXT(3) == 'O') &&
1852 (NXT(4) == 'T') && (NXT(5) == 'A') &&
1853 (NXT(6) == 'T') && (NXT(7) == 'I') &&
1854 (NXT(8) == 'O') && (NXT(9) == 'N') &&
1855 (IS_BLANK(NXT(10)))) {
1856 SKIP(10);
1857 SKIP_BLANKS;
1858
1859 name = xmlParseName(ctxt);
1860 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1862 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001863 "xmlParseAttributeListDecl: no name for Element\n");
1864 return;
1865 }
1866 SKIP_BLANKS;
1867 /*
1868 * TODO !!!
1869 */
1870 while ((IS_CHAR(CUR)) && (CUR != '>'))
1871 NEXT;
1872 free(name);
1873 }
1874}
1875
Daniel Veillard11e00581998-10-24 18:27:49 +00001876/**
1877 * xmlParseEntityDecl:
1878 * @ctxt: an XML parser context
1879 *
1880 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00001881 *
1882 * [70] EntityDecl ::= GEDecl | PEDecl
1883 *
1884 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1885 *
1886 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1887 *
1888 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1889 *
1890 * [74] PEDef ::= EntityValue | ExternalID
1891 *
1892 * [76] NDataDecl ::= S 'NDATA' S Name
1893 */
1894
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001895void
1896xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001897 CHAR *name = NULL;
1898 CHAR *value = NULL;
1899 CHAR *URI = NULL, *literal = NULL;
1900 CHAR *ndata = NULL;
1901 int isParameter = 0;
1902
1903 if ((CUR == '<') && (NXT(1) == '!') &&
1904 (NXT(2) == 'E') && (NXT(3) == 'N') &&
1905 (NXT(4) == 'T') && (NXT(5) == 'I') &&
1906 (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1907 (IS_BLANK(NXT(8)))) {
1908 SKIP(8);
1909 SKIP_BLANKS;
1910
1911 if (CUR == '%') {
1912 NEXT;
1913 SKIP_BLANKS;
1914 isParameter = 1;
1915 }
1916
1917 name = xmlParseName(ctxt);
1918 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1920 ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00001921 return;
1922 }
1923 SKIP_BLANKS;
1924
1925 /*
1926 * TODO handle the various case of definitions...
1927 */
1928 if (isParameter) {
1929 if ((CUR == '"') || (CUR == '\''))
1930 value = xmlParseEntityValue(ctxt);
1931 if (value) {
1932 xmlAddDocEntity(ctxt->doc, name,
1933 XML_INTERNAL_PARAMETER_ENTITY,
1934 NULL, NULL, value);
1935 }
1936 else {
1937 URI = xmlParseExternalID(ctxt, &literal);
1938 if (URI) {
1939 xmlAddDocEntity(ctxt->doc, name,
1940 XML_EXTERNAL_PARAMETER_ENTITY,
1941 literal, URI, NULL);
1942 }
1943 }
1944 } else {
1945 if ((CUR == '"') || (CUR == '\'')) {
1946 value = xmlParseEntityValue(ctxt);
1947 xmlAddDocEntity(ctxt->doc, name,
1948 XML_INTERNAL_GENERAL_ENTITY,
1949 NULL, NULL, value);
1950 } else {
1951 URI = xmlParseExternalID(ctxt, &literal);
1952 SKIP_BLANKS;
1953 if ((CUR == 'N') && (NXT(1) == 'D') &&
1954 (NXT(2) == 'A') && (NXT(3) == 'T') &&
1955 (NXT(4) == 'A')) {
1956 SKIP(5);
1957 SKIP_BLANKS;
1958 ndata = xmlParseName(ctxt);
1959 xmlAddDocEntity(ctxt->doc, name,
1960 XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1961 literal, URI, ndata);
1962 } else {
1963 xmlAddDocEntity(ctxt->doc, name,
1964 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1965 literal, URI, NULL);
1966 }
1967 }
1968 }
1969 SKIP_BLANKS;
1970 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1972 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001973 "xmlParseEntityDecl: entity %s not terminated\n", name);
1974 } else
1975 NEXT;
1976 if (name != NULL) free(name);
1977 if (value != NULL) free(value);
1978 if (URI != NULL) free(URI);
1979 if (literal != NULL) free(literal);
1980 if (ndata != NULL) free(ndata);
1981 }
1982}
1983
Daniel Veillard11e00581998-10-24 18:27:49 +00001984/**
1985 * xmlParseEnumeratedType:
1986 * @ctxt: an XML parser context
1987 * @name: ???
1988 * @:
1989 *
1990 * parse and Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001991 *
1992 * [57] EnumeratedType ::= NotationType | Enumeration
1993 *
1994 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1995 *
1996 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
Daniel Veillard11e00581998-10-24 18:27:49 +00001997 *
1998 * TODO: not implemented !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00001999 */
2000
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002001void
2002xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002003 /*
2004 * TODO !!!
2005 */
2006 while ((IS_CHAR(CUR)) && (CUR != '>'))
2007 NEXT;
2008}
2009
Daniel Veillard11e00581998-10-24 18:27:49 +00002010/**
2011 * xmlParseAttributeType:
2012 * @ctxt: an XML parser context
2013 * @name: ???
2014 *
2015 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00002016 *
2017 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2018 *
2019 * [55] StringType ::= 'CDATA'
2020 *
2021 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2022 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00002023 *
2024 * TODO: not implemented !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00002025 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002026void
2027xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002028 /* TODO !!! */
2029 if ((CUR == 'C') && (NXT(1) == 'D') &&
2030 (NXT(2) == 'A') && (NXT(3) == 'T') &&
2031 (NXT(4) == 'A')) {
2032 SKIP(5);
2033 } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2034 SKIP(2);
2035 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2036 (NXT(2) == 'R') && (NXT(3) == 'E') &&
2037 (NXT(4) == 'F')) {
2038 SKIP(5);
2039 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2040 (NXT(2) == 'R') && (NXT(3) == 'E') &&
2041 (NXT(4) == 'F') && (NXT(5) == 'S')) {
2042 SKIP(6);
2043 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2044 (NXT(2) == 'T') && (NXT(3) == 'I') &&
2045 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2046 SKIP(6);
2047 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2048 (NXT(2) == 'T') && (NXT(3) == 'I') &&
2049 (NXT(4) == 'T') && (NXT(5) == 'I') &&
2050 (NXT(6) == 'E') && (NXT(7) == 'S')) {
2051 SKIP(8);
2052 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2053 (NXT(2) == 'T') && (NXT(3) == 'O') &&
2054 (NXT(4) == 'K') && (NXT(5) == 'E') &&
2055 (NXT(6) == 'N')) {
2056 SKIP(7);
2057 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2058 (NXT(2) == 'T') && (NXT(3) == 'O') &&
2059 (NXT(4) == 'K') && (NXT(5) == 'E') &&
2060 (NXT(6) == 'N') && (NXT(7) == 'S')) {
2061 } else {
2062 xmlParseEnumeratedType(ctxt, name);
2063 }
2064}
2065
Daniel Veillard11e00581998-10-24 18:27:49 +00002066/**
2067 * xmlParseAttributeListDecl:
2068 * @ctxt: an XML parser context
2069 *
2070 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00002071 *
2072 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2073 *
2074 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00002075 *
2076 * TODO: not implemented !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00002077 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002078void
2079xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002080 CHAR *name;
2081
2082 /* TODO !!! */
2083 if ((CUR == '<') && (NXT(1) == '!') &&
2084 (NXT(2) == 'A') && (NXT(3) == 'T') &&
2085 (NXT(4) == 'T') && (NXT(5) == 'L') &&
2086 (NXT(6) == 'I') && (NXT(7) == 'S') &&
2087 (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
2088 SKIP(9);
2089 SKIP_BLANKS;
2090 name = xmlParseName(ctxt);
2091 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2093 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002094 "xmlParseAttributeListDecl: no name for Element\n");
2095 return;
2096 }
2097 SKIP_BLANKS;
2098 while (CUR != '>') {
2099 const CHAR *check = CUR_PTR;
2100
2101 xmlParseAttributeType(ctxt, name);
2102 SKIP_BLANKS;
2103 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002106 "xmlParseAttributeListDecl: detected error\n");
2107 break;
2108 }
2109 }
2110 if (CUR == '>')
2111 NEXT;
2112
2113 free(name);
2114 }
2115}
2116
Daniel Veillard11e00581998-10-24 18:27:49 +00002117/**
2118 * xmlParseElementContentDecl:
2119 * @ctxt: an XML parser context
2120 * @name: ???
2121 *
2122 * parse the declaration for an Element content
2123 * either Mixed or Children, the cases EMPTY and ANY being handled
Daniel Veillard260a68f1998-08-13 03:39:55 +00002124 *
2125 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2126 *
2127 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2128 *
2129 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2130 *
2131 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2132 *
2133 * or
2134 *
2135 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2136 * '(' S? '#PCDATA' S? ')'
Daniel Veillard11e00581998-10-24 18:27:49 +00002137 *
2138 * TODO: not implemented !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00002139 */
2140
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002141void
2142xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002143 /*
2144 * TODO This has to be parsed correctly, currently we just skip until
2145 * we reach the first '>'.
2146 * !!!
2147 */
2148 while ((IS_CHAR(CUR)) && (CUR != '>'))
2149 NEXT;
2150}
2151
Daniel Veillard11e00581998-10-24 18:27:49 +00002152/**
2153 * xmlParseElementDecl:
2154 * @ctxt: an XML parser context
2155 *
2156 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002157 *
2158 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2159 *
2160 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
2161 *
2162 * TODO There is a check [ VC: Unique Element Type Declaration ]
2163 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002164void
2165xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002166 CHAR *name;
2167
2168 if ((CUR == '<') && (NXT(1) == '!') &&
2169 (NXT(2) == 'E') && (NXT(3) == 'L') &&
2170 (NXT(4) == 'E') && (NXT(5) == 'M') &&
2171 (NXT(6) == 'E') && (NXT(7) == 'N') &&
2172 (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
2173 SKIP(9);
2174 SKIP_BLANKS;
2175 name = xmlParseName(ctxt);
2176 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2178 ctxt->sax->error(ctxt, "xmlParseElementDecl: no name for Element\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002179 return;
2180 }
2181 SKIP_BLANKS;
2182 if ((CUR == 'E') && (NXT(1) == 'M') &&
2183 (NXT(2) == 'P') && (NXT(3) == 'T') &&
2184 (NXT(4) == 'Y')) {
2185 SKIP(5);
2186 /*
2187 * Element must always be empty.
2188 */
2189 } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2190 (NXT(2) == 'Y')) {
2191 SKIP(3);
2192 /*
2193 * Element is a generic container.
2194 */
2195 } else {
2196 xmlParseElementContentDecl(ctxt, name);
2197 }
2198 SKIP_BLANKS;
2199 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2201 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002202 "xmlParseElementDecl: expected '>' at the end\n");
2203 } else
2204 NEXT;
2205 }
2206}
2207
Daniel Veillard11e00581998-10-24 18:27:49 +00002208/**
2209 * xmlParseMarkupDecl:
2210 * @ctxt: an XML parser context
2211 *
2212 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002213 *
2214 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2215 * NotationDecl | PI | Comment
2216 *
2217 * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2218 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002219void
2220xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002221 xmlParseElementDecl(ctxt);
2222 xmlParseAttributeListDecl(ctxt);
2223 xmlParseEntityDecl(ctxt);
2224 xmlParseNotationDecl(ctxt);
2225 xmlParsePI(ctxt);
2226 xmlParseComment(ctxt, 0);
2227}
2228
Daniel Veillard11e00581998-10-24 18:27:49 +00002229/**
2230 * xmlParseCharRef:
2231 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002232 *
2233 * parse Reference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002234 *
2235 * [66] CharRef ::= '&#' [0-9]+ ';' |
2236 * '&#x' [0-9a-fA-F]+ ';'
Daniel Veillard11e00581998-10-24 18:27:49 +00002237 * return values: the value parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00002238 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002239CHAR *
2240xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002241 int val = 0;
2242 CHAR buf[2];
2243
2244 if ((CUR == '&') && (NXT(1) == '#') &&
2245 (NXT(2) == 'x')) {
2246 SKIP(3);
2247 while (CUR != ';') {
2248 if ((CUR >= '0') && (CUR <= '9'))
2249 val = val * 16 + (CUR - '0');
2250 else if ((CUR >= 'a') && (CUR <= 'f'))
2251 val = val * 16 + (CUR - 'a') + 10;
2252 else if ((CUR >= 'A') && (CUR <= 'F'))
2253 val = val * 16 + (CUR - 'A') + 10;
2254 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2256 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002257 val = 0;
2258 break;
2259 }
Daniel Veillard845664d1998-08-13 04:43:19 +00002260 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002261 }
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002262 if (CUR == ';')
Daniel Veillard260a68f1998-08-13 03:39:55 +00002263 NEXT;
2264 } else if ((CUR == '&') && (NXT(1) == '#')) {
2265 SKIP(2);
2266 while (CUR != ';') {
2267 if ((CUR >= '0') && (CUR <= '9'))
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002268 val = val * 10 + (CUR - '0');
Daniel Veillard260a68f1998-08-13 03:39:55 +00002269 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2271 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002272 val = 0;
2273 break;
2274 }
Daniel Veillard845664d1998-08-13 04:43:19 +00002275 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002276 }
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002277 if (CUR == ';')
Daniel Veillard260a68f1998-08-13 03:39:55 +00002278 NEXT;
2279 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2281 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002282 }
2283 /*
2284 * Check the value IS_CHAR ...
2285 */
2286 if (IS_CHAR(val)) {
2287 buf[0] = (CHAR) val;
2288 buf[1] = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00002289 return(xmlStrndup(buf, 1));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002290 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2292 ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002293 }
2294 return(NULL);
2295}
2296
Daniel Veillard11e00581998-10-24 18:27:49 +00002297/**
2298 * xmlParseEntityRef:
2299 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002300 *
2301 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002302 *
2303 * [68] EntityRef ::= '&' Name ';'
Daniel Veillardccb09631998-10-27 06:21:04 +00002304 * return values: the entity ref string or NULL if directly as input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002305 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002306CHAR *
2307xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002308 CHAR *ret = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00002309 const CHAR *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002310 CHAR *name;
Daniel Veillardccb09631998-10-27 06:21:04 +00002311 xmlParserInputPtr input = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002312
Daniel Veillardccb09631998-10-27 06:21:04 +00002313 q = CUR_PTR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002314 if (CUR == '&') {
2315 NEXT;
2316 name = xmlParseName(ctxt);
2317 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2319 ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002320 } else {
2321 if (CUR == ';') {
2322 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002323 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00002324 * We parsed the entity reference correctly, call SAX
2325 * interface for the proper behaviour:
2326 * - get a new input stream
2327 * - or keep the reference inline
Daniel Veillard260a68f1998-08-13 03:39:55 +00002328 */
Daniel Veillardccb09631998-10-27 06:21:04 +00002329 if (ctxt->sax)
2330 input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2331 if (input != NULL)
2332 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002333 else {
Daniel Veillardccb09631998-10-27 06:21:04 +00002334 ret = xmlStrndup(q, CUR_PTR - q);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002335 }
2336 } else {
2337 char cst[2] = { '&', 0 };
2338
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2340 ctxt->sax->error(ctxt, "xmlParseEntityRef: expecting ';'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002341 ret = xmlStrndup(cst, 1);
2342 ret = xmlStrcat(ret, name);
2343 }
2344 free(name);
2345 }
2346 }
2347 return(ret);
2348}
2349
Daniel Veillard11e00581998-10-24 18:27:49 +00002350/**
2351 * xmlParseReference:
2352 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002353 *
2354 * parse Reference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002355 *
2356 * [67] Reference ::= EntityRef | CharRef
Daniel Veillardccb09631998-10-27 06:21:04 +00002357 * return values: the entity string or NULL if handled directly by pushing
2358 * the entity value as the input.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002359 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002360CHAR *
2361xmlParseReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002362 if ((CUR == '&') && (NXT(1) == '#')) {
Daniel Veillardccb09631998-10-27 06:21:04 +00002363 return(xmlParseCharRef(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002364 } else if (CUR == '&') {
Daniel Veillardccb09631998-10-27 06:21:04 +00002365 return(xmlParseEntityRef(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002366 }
2367 return(NULL);
2368}
2369
Daniel Veillard11e00581998-10-24 18:27:49 +00002370/**
2371 * xmlParsePEReference:
2372 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002373 *
2374 * parse PEReference declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002375 *
2376 * [69] PEReference ::= '%' Name ';'
Daniel Veillardccb09631998-10-27 06:21:04 +00002377 * return values: the entity content or NULL if handled directly.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002378 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002379CHAR *
2380xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002381 CHAR *ret = NULL;
2382 CHAR *name;
2383 xmlEntityPtr entity;
Daniel Veillardccb09631998-10-27 06:21:04 +00002384 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002385
2386 if (CUR == '%') {
2387 NEXT;
2388 name = xmlParseName(ctxt);
2389 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2391 ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002392 } else {
2393 if (CUR == ';') {
2394 NEXT;
2395 entity = xmlGetDtdEntity(ctxt->doc, name);
2396 if (entity == NULL) {
Daniel Veillard42dc9b31998-11-09 01:17:21 +00002397 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2398 ctxt->sax->warning(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002399 "xmlParsePEReference: %%%s; not found\n");
Daniel Veillardccb09631998-10-27 06:21:04 +00002400 } else {
2401 input = xmlNewEntityInputStream(ctxt, entity);
2402 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002403 }
2404 } else {
Daniel Veillardccb09631998-10-27 06:21:04 +00002405 char cst[2] = { '%', 0 };
Daniel Veillard260a68f1998-08-13 03:39:55 +00002406
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2408 ctxt->sax->error(ctxt, "xmlParsePEReference: expecting ';'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002409 ret = xmlStrndup(cst, 1);
2410 ret = xmlStrcat(ret, name);
2411 }
2412 free(name);
2413 }
2414 }
2415 return(ret);
2416}
2417
Daniel Veillard11e00581998-10-24 18:27:49 +00002418/**
2419 * xmlParseDocTypeDecl :
2420 * @ctxt: an XML parser context
2421 *
2422 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00002423 *
2424 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2425 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
2426 */
2427
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002428void
2429xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002430 xmlDtdPtr dtd;
2431 CHAR *name;
2432 CHAR *ExternalID = NULL;
2433 CHAR *URI = NULL;
2434
2435 /*
2436 * We know that '<!DOCTYPE' has been detected.
2437 */
2438 SKIP(9);
2439
2440 SKIP_BLANKS;
2441
2442 /*
2443 * Parse the DOCTYPE name.
2444 */
2445 name = xmlParseName(ctxt);
2446 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448 ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002449 }
2450
2451 SKIP_BLANKS;
2452
2453 /*
2454 * Check for SystemID and ExternalID
2455 */
2456 URI = xmlParseExternalID(ctxt, &ExternalID);
2457 SKIP_BLANKS;
2458
2459 dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
2460
2461 /*
2462 * Is there any DTD definition ?
2463 */
2464 if (CUR == '[') {
2465 NEXT;
2466 /*
2467 * Parse the succession of Markup declarations and
2468 * PEReferences.
2469 * Subsequence (markupdecl | PEReference | S)*
2470 */
2471 while (CUR != ']') {
2472 const CHAR *check = CUR_PTR;
2473
2474 SKIP_BLANKS;
2475 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00002476 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002477
2478 if (CUR_PTR == check) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2480 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002481 "xmlParseDocTypeDecl: error detected in Markup declaration\n");
2482 break;
2483 }
2484 }
2485 if (CUR == ']') NEXT;
2486 }
2487
2488 /*
2489 * We should be at the end of the DOCTYPE declaration.
2490 */
2491 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2493 ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002494 /* We shouldn't try to resynchronize ... */
2495 }
2496 NEXT;
2497
2498 /*
2499 * Cleanup, since we don't use all those identifiers
2500 * TODO : the DOCTYPE if available should be stored !
2501 */
2502 if (URI != NULL) free(URI);
2503 if (ExternalID != NULL) free(ExternalID);
2504 if (name != NULL) free(name);
2505}
2506
Daniel Veillard11e00581998-10-24 18:27:49 +00002507/**
2508 * xmlParseAttribute:
2509 * @ctxt: an XML parser context
2510 * @node: the node carrying the attribute
2511 *
2512 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00002513 *
2514 * [41] Attribute ::= Name Eq AttValue
2515 *
2516 * [25] Eq ::= S? '=' S?
2517 *
2518 * With namespace:
2519 *
2520 * [NS 11] Attribute ::= QName Eq AttValue
2521 *
2522 * Also the case QName == xmlns:??? is handled independently as a namespace
2523 * definition.
2524 */
2525
Daniel Veillardccb09631998-10-27 06:21:04 +00002526xmlAttrPtr xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
2527 CHAR *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002528 CHAR *ns;
Daniel Veillardccb09631998-10-27 06:21:04 +00002529 CHAR *value = NULL;
2530 xmlAttrPtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002531
2532 name = xmlNamespaceParseQName(ctxt, &ns);
2533 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2535 ctxt->sax->error(ctxt, "error parsing attribute name\n");
Daniel Veillardccb09631998-10-27 06:21:04 +00002536 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002537 }
2538
2539 /*
2540 * read the value
2541 */
2542 SKIP_BLANKS;
2543 if (CUR == '=') {
2544 NEXT;
2545 SKIP_BLANKS;
2546 value = xmlParseAttValue(ctxt);
2547 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2549 ctxt->sax->error(ctxt, "Specification mandate value for attribute %s\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00002550 name);
2551 }
2552
2553 /*
2554 * Check whether it's a namespace definition
2555 */
2556 if ((ns == NULL) &&
2557 (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
2558 (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
2559 /* a default namespace definition */
2560 xmlNewNs(node, value, NULL);
2561 if (name != NULL)
2562 free(name);
2563 if (value != NULL)
2564 free(value);
Daniel Veillardccb09631998-10-27 06:21:04 +00002565 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002566 }
2567 if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
2568 (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
2569 /* a standard namespace definition */
2570 xmlNewNs(node, value, name);
Daniel Veillardccb09631998-10-27 06:21:04 +00002571 free(ns);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002572 if (name != NULL)
2573 free(name);
2574 if (value != NULL)
2575 free(value);
Daniel Veillardccb09631998-10-27 06:21:04 +00002576 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002577 }
2578
Daniel Veillardccb09631998-10-27 06:21:04 +00002579 ret = xmlNewProp(ctxt->node, name, NULL);
2580 if (ret != NULL)
2581 ret->val = xmlStringGetNodeList(ctxt->doc, value);
2582
2583 if (ns != NULL)
2584 free(ns);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002585 if (value != NULL)
Daniel Veillardccb09631998-10-27 06:21:04 +00002586 free(value);
2587 free(name);
2588 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002589}
2590
Daniel Veillard11e00581998-10-24 18:27:49 +00002591/**
2592 * xmlParseStartTag:
2593 * @ctxt: an XML parser context
2594 *
2595 * parse a start of tag either for rule element or
2596 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002597 *
2598 * [40] STag ::= '<' Name (S Attribute)* S? '>'
2599 *
2600 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
2601 *
2602 * With namespace:
2603 *
2604 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
2605 *
2606 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard11e00581998-10-24 18:27:49 +00002607 *
2608 * return values: the XML new node or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002609 */
2610
2611xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
2612 CHAR *namespace, *name;
2613 xmlNsPtr ns = NULL;
2614 xmlNodePtr ret = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00002615 xmlNodePtr parent = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002616
2617 if (CUR != '<') return(NULL);
2618 NEXT;
2619
2620 name = xmlNamespaceParseQName(ctxt, &namespace);
Daniel Veillardccb09631998-10-27 06:21:04 +00002621 if (name == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002622
2623 /*
2624 * Note : the namespace resolution is deferred until the end of the
2625 * attributes parsing, since local namespace can be defined as
2626 * an attribute at this level.
2627 */
Daniel Veillard0bef1311998-10-14 02:36:47 +00002628 ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
Daniel Veillardccb09631998-10-27 06:21:04 +00002629 if (ret == NULL) {
2630 if (namespace != NULL)
2631 free(namespace);
2632 free(name);
2633 return(NULL);
2634 }
2635
2636 /*
2637 * We are parsing a new node.
2638 */
2639 nodePush(ctxt, ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002640
2641 /*
2642 * Now parse the attributes, it ends up with the ending
2643 *
2644 * (S Attribute)* S?
2645 */
2646 SKIP_BLANKS;
2647 while ((IS_CHAR(CUR)) &&
2648 (CUR != '>') &&
2649 ((CUR != '/') || (NXT(1) != '>'))) {
2650 const CHAR *q = CUR_PTR;
2651
2652 xmlParseAttribute(ctxt, ret);
2653 SKIP_BLANKS;
2654
2655 if (q == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2657 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002658 "xmlParseStartTag: problem parsing attributes\n");
2659 break;
2660 }
2661 }
2662
2663 /*
2664 * Search the namespace
2665 */
2666 ns = xmlSearchNs(ctxt->doc, ret, namespace);
2667 if (ns == NULL) /* ret still doesn't have a parent yet ! */
Daniel Veillardccb09631998-10-27 06:21:04 +00002668 ns = xmlSearchNs(ctxt->doc, parent, namespace);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002669 xmlSetNs(ret, ns);
2670 if (namespace != NULL)
2671 free(namespace);
2672
2673 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00002674 * SAX: Start of Element !
2675 */
2676 if (ctxt->sax != NULL)
2677 ctxt->sax->startElement(ctxt, name);
Daniel Veillardccb09631998-10-27 06:21:04 +00002678 free(name);
2679
2680 /*
2681 * Link the child element
2682 */
2683 if (ctxt->nodeNr < 2) return(ret);
2684 parent = ctxt->nodeTab[ctxt->nodeNr - 2];
2685 if (parent != NULL)
2686 xmlAddChild(parent, ctxt->node);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002687
2688 return(ret);
2689}
2690
Daniel Veillard11e00581998-10-24 18:27:49 +00002691/**
2692 * xmlParseEndTag:
2693 * @ctxt: an XML parser context
2694 * @nsPtr: the current node namespace definition
2695 * @tagPtr: CHAR** receive the tag value
2696 *
2697 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00002698 *
2699 * [42] ETag ::= '</' Name S? '>'
2700 *
2701 * With namespace
2702 *
2703 * [9] ETag ::= '</' QName S? '>'
Daniel Veillard11e00581998-10-24 18:27:49 +00002704 *
2705 * return values: tagPtr receive the tag name just read
Daniel Veillard260a68f1998-08-13 03:39:55 +00002706 */
2707
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002708void
2709xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002710 CHAR *namespace, *name;
2711 xmlNsPtr ns = NULL;
2712
2713 *nsPtr = NULL;
2714 *tagPtr = NULL;
2715
2716 if ((CUR != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2718 ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002719 return;
2720 }
2721 SKIP(2);
2722
2723 name = xmlNamespaceParseQName(ctxt, &namespace);
2724
2725 /*
2726 * Search the namespace
2727 */
2728 ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2729 if (namespace != NULL)
2730 free(namespace);
2731
2732 *nsPtr = ns;
2733 *tagPtr = name;
2734
2735 /*
2736 * We should definitely be at the ending "S? '>'" part
2737 */
2738 SKIP_BLANKS;
2739 if ((!IS_CHAR(CUR)) || (CUR != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2741 ctxt->sax->error(ctxt, "End tag : expected '>'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002742 } else
2743 NEXT;
2744
2745 return;
2746}
2747
Daniel Veillard11e00581998-10-24 18:27:49 +00002748/**
2749 * xmlParseCDSect:
2750 * @ctxt: an XML parser context
2751 *
2752 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002753 *
2754 * [18] CDSect ::= CDStart CData CDEnd
2755 *
2756 * [19] CDStart ::= '<![CDATA['
2757 *
2758 * [20] Data ::= (Char* - (Char* ']]>' Char*))
2759 *
2760 * [21] CDEnd ::= ']]>'
2761 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002762void
2763xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002764 const CHAR *r, *s, *base;
2765
2766 if ((CUR == '<') && (NXT(1) == '!') &&
2767 (NXT(2) == '[') && (NXT(3) == 'C') &&
2768 (NXT(4) == 'D') && (NXT(5) == 'A') &&
2769 (NXT(6) == 'T') && (NXT(7) == 'A') &&
2770 (NXT(8) == '[')) {
2771 SKIP(9);
2772 } else
2773 return;
2774 base = CUR_PTR;
2775 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2777 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002778 return;
2779 }
2780 r = NEXT;
2781 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002784 return;
2785 }
2786 s = NEXT;
2787 while (IS_CHAR(CUR) &&
2788 ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2789 r++;s++;NEXT;
2790 }
2791 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2793 ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002794 return;
2795 }
2796
2797 /*
2798 * Ok the segment [base CUR_PTR] is to be consumed as chars.
2799 */
2800 if (ctxt->sax != NULL) {
2801 if (areBlanks(ctxt, base, CUR_PTR - base))
2802 ctxt->sax->ignorableWhitespace(ctxt, base, 0, CUR_PTR - base);
2803 else
2804 ctxt->sax->characters(ctxt, base, 0, CUR_PTR - base);
2805 }
2806}
2807
Daniel Veillard11e00581998-10-24 18:27:49 +00002808/**
2809 * xmlParseContent:
2810 * @ctxt: an XML parser context
2811 *
2812 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00002813 *
2814 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
2815 */
2816
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002817void
2818xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002819 xmlNodePtr ret = NULL;
2820
2821 while ((CUR != '<') || (NXT(1) != '/')) {
2822 const CHAR *test = CUR_PTR;
2823 ret = NULL;
2824
2825 /*
2826 * First case : a Processing Instruction.
2827 */
2828 if ((CUR == '<') && (NXT(1) == '?')) {
2829 xmlParsePI(ctxt);
2830 }
2831 /*
2832 * Second case : a CDSection
2833 */
2834 else if ((CUR == '<') && (NXT(1) == '!') &&
2835 (NXT(2) == '[') && (NXT(3) == 'C') &&
2836 (NXT(4) == 'D') && (NXT(5) == 'A') &&
2837 (NXT(6) == 'T') && (NXT(7) == 'A') &&
2838 (NXT(8) == '[')) {
2839 xmlParseCDSect(ctxt);
2840 }
2841 /*
2842 * Third case : a comment
2843 */
2844 else if ((CUR == '<') && (NXT(1) == '!') &&
2845 (NXT(2) == '-') && (NXT(3) == '-')) {
2846 ret = xmlParseComment(ctxt, 1);
2847 }
2848 /*
2849 * Fourth case : a sub-element.
2850 */
2851 else if (CUR == '<') {
2852 ret = xmlParseElement(ctxt);
2853 }
2854 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00002855 * Fifth case : a reference. If if has not been resolved,
2856 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00002857 */
2858 else if (CUR == '&') {
Daniel Veillardccb09631998-10-27 06:21:04 +00002859 CHAR *val = xmlParseReference(ctxt);
2860 if (val != NULL) {
2861 if (val[0] != '&') {
2862 /*
2863 * inline predefined entity.
2864 */
2865 if (ctxt->sax != NULL)
2866 ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
2867 } else {
2868 /*
2869 * user defined entity, create a node.
2870 */
2871 ret = xmlNewReference(ctxt->doc, val);
2872 xmlAddChild(ctxt->node, ret);
2873 }
2874 free(val);
2875 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002876 }
2877 /*
2878 * Last case, text. Note that References are handled directly.
2879 */
2880 else {
2881 xmlParseCharData(ctxt, 0);
2882 }
2883
2884 /*
2885 * Pop-up of finished entities.
2886 */
2887 while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
2888
2889 if (test == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2891 ctxt->sax->error(ctxt, "detected an error in element content\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002892 break;
2893 }
2894 }
2895}
2896
Daniel Veillard11e00581998-10-24 18:27:49 +00002897/**
2898 * xmlParseElement:
2899 * @ctxt: an XML parser context
2900 *
2901 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00002902 *
2903 * [39] element ::= EmptyElemTag | STag content ETag
2904 *
2905 * [41] Attribute ::= Name Eq AttValue
Daniel Veillard11e00581998-10-24 18:27:49 +00002906 * return values: the XML new node or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002907 */
2908
2909
2910xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
2911 xmlNodePtr ret;
2912 const CHAR *openTag = CUR_PTR;
2913 xmlParserNodeInfo node_info;
2914 CHAR *endTag;
2915 xmlNsPtr endNs;
2916
2917 /* Capture start position */
2918 node_info.begin_pos = CUR_PTR - ctxt->input->base;
2919 node_info.begin_line = ctxt->input->line;
2920
2921 ret = xmlParseStartTag(ctxt);
2922 if (ret == NULL) {
2923 return(NULL);
2924 }
2925
2926 /*
2927 * Check for an Empty Element.
2928 */
2929 if ((CUR == '/') && (NXT(1) == '>')) {
2930 SKIP(2);
2931 if (ctxt->sax != NULL)
2932 ctxt->sax->endElement(ctxt, ret->name);
2933
2934 /*
2935 * end of parsing of this node.
2936 */
2937 nodePop(ctxt);
2938
2939 return(ret);
2940 }
2941 if (CUR == '>') NEXT;
2942 else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2944 ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002945
2946 /*
2947 * end of parsing of this node.
2948 */
2949 nodePop(ctxt);
2950
2951 return(NULL);
2952 }
2953
2954 /*
2955 * Parse the content of the element:
2956 */
2957 xmlParseContent(ctxt);
2958 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2960 ctxt->sax->error(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00002961 openTag);
2962
2963 /*
2964 * end of parsing of this node.
2965 */
2966 nodePop(ctxt);
2967
2968 return(NULL);
2969 }
2970
2971 /*
2972 * parse the end of tag: '</' should be here.
2973 */
2974 xmlParseEndTag(ctxt, &endNs, &endTag);
2975
2976 /*
2977 * Check that the Name in the ETag is the same as in the STag.
2978 */
2979 if (endNs != ret->ns) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2981 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002982 "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
2983 openTag, endTag);
2984 }
2985 if (endTag == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt, "The End tag has no name\n%.30s\n", openTag);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002988 } else if (xmlStrcmp(ret->name, endTag)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2990 ctxt->sax->error(ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002991 "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2992 openTag, endTag);
2993 }
2994 /*
2995 * SAX: End of Tag
2996 */
2997 else if (ctxt->sax != NULL)
2998 ctxt->sax->endElement(ctxt, endTag);
2999
3000 if (endTag != NULL)
3001 free(endTag);
3002
3003 /* Capture end position and add node */
3004 if ( ret != NULL && ctxt->record_info ) {
3005 node_info.end_pos = CUR_PTR - ctxt->input->base;
3006 node_info.end_line = ctxt->input->line;
3007 node_info.node = ret;
3008 xmlParserAddNodeInfo(ctxt, &node_info);
3009 }
3010
3011 /*
3012 * end of parsing of this node.
3013 */
3014 nodePop(ctxt);
3015
3016 return(ret);
3017}
3018
Daniel Veillard11e00581998-10-24 18:27:49 +00003019/**
3020 * xmlParseVersionNum:
3021 * @ctxt: an XML parser context
3022 *
3023 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003024 *
3025 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard11e00581998-10-24 18:27:49 +00003026 * return values: the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003027 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003028CHAR *
3029xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003030 const CHAR *q = CUR_PTR;
3031 CHAR *ret;
3032
3033 while (IS_CHAR(CUR) &&
3034 (((CUR >= 'a') && (CUR <= 'z')) ||
3035 ((CUR >= 'A') && (CUR <= 'Z')) ||
3036 ((CUR >= '0') && (CUR <= '9')) ||
3037 (CUR == '_') || (CUR == '.') ||
3038 (CUR == ':') || (CUR == '-'))) NEXT;
3039 ret = xmlStrndup(q, CUR_PTR - q);
3040 return(ret);
3041}
3042
Daniel Veillard11e00581998-10-24 18:27:49 +00003043/**
3044 * xmlParseVersionInfo:
3045 * @ctxt: an XML parser context
3046 *
3047 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003048 *
3049 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3050 *
3051 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00003052 *
3053 * return values: the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00003054 */
3055
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003056CHAR *
3057xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003058 CHAR *version = NULL;
3059 const CHAR *q;
3060
3061 if ((CUR == 'v') && (NXT(1) == 'e') &&
3062 (NXT(2) == 'r') && (NXT(3) == 's') &&
3063 (NXT(4) == 'i') && (NXT(5) == 'o') &&
3064 (NXT(6) == 'n')) {
3065 SKIP(7);
3066 SKIP_BLANKS;
3067 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3069 ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003070 return(NULL);
3071 }
3072 NEXT;
3073 SKIP_BLANKS;
3074 if (CUR == '"') {
3075 NEXT;
3076 q = CUR_PTR;
3077 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003078 if (CUR != '"') {
3079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3080 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3081 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003082 NEXT;
3083 } else if (CUR == '\''){
3084 NEXT;
3085 q = CUR_PTR;
3086 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003087 if (CUR != '\'') {
3088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3089 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3090 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003091 NEXT;
3092 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003095 }
3096 }
3097 return(version);
3098}
3099
Daniel Veillard11e00581998-10-24 18:27:49 +00003100/**
3101 * xmlParseEncName:
3102 * @ctxt: an XML parser context
3103 *
3104 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003105 *
3106 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00003107 *
3108 * return values: the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003109 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003110CHAR *
3111xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003112 const CHAR *q = CUR_PTR;
3113 CHAR *ret = NULL;
3114
3115 if (((CUR >= 'a') && (CUR <= 'z')) ||
3116 ((CUR >= 'A') && (CUR <= 'Z'))) {
3117 NEXT;
3118 while (IS_CHAR(CUR) &&
3119 (((CUR >= 'a') && (CUR <= 'z')) ||
3120 ((CUR >= 'A') && (CUR <= 'Z')) ||
3121 ((CUR >= '0') && (CUR <= '9')) ||
3122 (CUR == '-'))) NEXT;
3123 ret = xmlStrndup(q, CUR_PTR - q);
3124 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3126 ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003127 }
3128 return(ret);
3129}
3130
Daniel Veillard11e00581998-10-24 18:27:49 +00003131/**
3132 * xmlParseEncodingDecl:
3133 * @ctxt: an XML parser context
3134 *
3135 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003136 *
3137 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00003138 *
3139 * TODO: this should setup the conversion filters.
3140 *
3141 * return values: the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003142 */
3143
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003144CHAR *
3145xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003146 CHAR *encoding = NULL;
3147 const CHAR *q;
3148
3149 SKIP_BLANKS;
3150 if ((CUR == 'e') && (NXT(1) == 'n') &&
3151 (NXT(2) == 'c') && (NXT(3) == 'o') &&
3152 (NXT(4) == 'd') && (NXT(5) == 'i') &&
3153 (NXT(6) == 'n') && (NXT(7) == 'g')) {
3154 SKIP(8);
3155 SKIP_BLANKS;
3156 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3158 ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003159 return(NULL);
3160 }
3161 NEXT;
3162 SKIP_BLANKS;
3163 if (CUR == '"') {
3164 NEXT;
3165 q = CUR_PTR;
3166 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003167 if (CUR != '"') {
3168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3169 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3170 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003171 NEXT;
3172 } else if (CUR == '\''){
3173 NEXT;
3174 q = CUR_PTR;
3175 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003176 if (CUR != '\'') {
3177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3178 ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3179 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003180 NEXT;
3181 } else if (CUR == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3183 ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003184 }
3185 }
3186 return(encoding);
3187}
3188
Daniel Veillard11e00581998-10-24 18:27:49 +00003189/**
3190 * xmlParseSDDecl:
3191 * @ctxt: an XML parser context
3192 *
3193 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003194 *
3195 * [32] SDDecl ::= S 'standalone' Eq
3196 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard11e00581998-10-24 18:27:49 +00003197 * return values: 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00003198 */
3199
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003200int
3201xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003202 int standalone = -1;
3203
3204 SKIP_BLANKS;
3205 if ((CUR == 's') && (NXT(1) == 't') &&
3206 (NXT(2) == 'a') && (NXT(3) == 'n') &&
3207 (NXT(4) == 'd') && (NXT(5) == 'a') &&
3208 (NXT(6) == 'l') && (NXT(7) == 'o') &&
3209 (NXT(8) == 'n') && (NXT(9) == 'e')) {
3210 SKIP(10);
3211 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3213 ctxt->sax->error(ctxt, "XML standalone declaration : expected '='\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003214 return(standalone);
3215 }
3216 NEXT;
3217 SKIP_BLANKS;
3218 if (CUR == '\''){
3219 NEXT;
3220 if ((CUR == 'n') && (NXT(1) == 'o')) {
3221 standalone = 0;
3222 SKIP(2);
3223 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3224 (NXT(2) == 's')) {
3225 standalone = 1;
3226 SKIP(3);
3227 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003230 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003231 if (CUR != '\'') {
3232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3233 ctxt->sax->error(ctxt, "String not closed\n");
3234 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003235 NEXT;
3236 } else if (CUR == '"'){
3237 NEXT;
3238 if ((CUR == 'n') && (NXT(1) == 'o')) {
3239 standalone = 0;
3240 SKIP(2);
3241 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3242 (NXT(2) == 's')) {
3243 standalone = 1;
3244 SKIP(3);
3245 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003248 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003249 if (CUR != '"') {
3250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3251 ctxt->sax->error(ctxt, "String not closed\n");
3252 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00003253 NEXT;
3254 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt, "Standalone value not found\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003257 }
3258 }
3259 return(standalone);
3260}
3261
Daniel Veillard11e00581998-10-24 18:27:49 +00003262/**
3263 * xmlParseXMLDecl:
3264 * @ctxt: an XML parser context
3265 *
3266 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00003267 *
3268 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
3269 */
3270
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003271void
3272xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003273 CHAR *version;
3274
3275 /*
3276 * We know that '<?xml' is here.
3277 */
3278 SKIP(5);
3279
3280 SKIP_BLANKS;
3281
3282 /*
3283 * We should have the VersionInfo here.
3284 */
3285 version = xmlParseVersionInfo(ctxt);
3286 if (version == NULL)
3287 version = xmlCharStrdup(XML_DEFAULT_VERSION);
3288 ctxt->doc = xmlNewDoc(version);
3289 free(version);
3290
3291 /*
3292 * We may have the encoding declaration
3293 */
3294 ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
3295
3296 /*
3297 * We may have the standalone status.
3298 */
3299 ctxt->doc->standalone = xmlParseSDDecl(ctxt);
3300
3301 SKIP_BLANKS;
3302 if ((CUR == '?') && (NXT(1) == '>')) {
3303 SKIP(2);
3304 } else if (CUR == '>') {
3305 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3307 ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003308 NEXT;
3309 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003312 MOVETO_ENDTAG(CUR_PTR);
3313 NEXT;
3314 }
3315}
3316
Daniel Veillard11e00581998-10-24 18:27:49 +00003317/**
3318 * xmlParseMisc:
3319 * @ctxt: an XML parser context
3320 *
3321 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003322 *
3323 * [27] Misc ::= Comment | PI | S
3324 */
3325
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003326void
3327xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003328 while (((CUR == '<') && (NXT(1) == '?')) ||
3329 ((CUR == '<') && (NXT(1) == '!') &&
3330 (NXT(2) == '-') && (NXT(3) == '-')) ||
3331 IS_BLANK(CUR)) {
3332 if ((CUR == '<') && (NXT(1) == '?')) {
3333 xmlParsePI(ctxt);
3334 } else if (IS_BLANK(CUR)) {
3335 NEXT;
3336 } else
3337 xmlParseComment(ctxt, 0);
3338 }
3339}
3340
Daniel Veillard11e00581998-10-24 18:27:49 +00003341/**
3342 * xmlParseDocument :
3343 * @ctxt: an XML parser context
3344 *
3345 * parse an XML document (and build a tree if using the standard SAX
3346 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00003347 *
3348 * [1] document ::= prolog element Misc*
3349 *
3350 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00003351 *
3352 * return values: 0, -1 in case of error. the parser context is augmented
3353 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003354 */
3355
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003356int
3357xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003358 xmlDefaultSAXHandlerInit();
3359
3360 /*
3361 * SAX: beginning of the document processing.
3362 */
3363 if (ctxt->sax)
3364 ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
3365 if (ctxt->sax)
3366 ctxt->sax->startDocument(ctxt);
3367
3368 /*
3369 * We should check for encoding here and plug-in some
3370 * conversion code TODO !!!!
3371 */
3372
3373 /*
3374 * Wipe out everything which is before the first '<'
3375 */
3376 SKIP_BLANKS;
3377
3378 /*
3379 * Check for the XMLDecl in the Prolog.
3380 */
3381 if ((CUR == '<') && (NXT(1) == '?') &&
3382 (NXT(2) == 'x') && (NXT(3) == 'm') &&
3383 (NXT(4) == 'l')) {
3384 xmlParseXMLDecl(ctxt);
3385 /* SKIP_EOL(cur); */
3386 SKIP_BLANKS;
3387 } else if ((CUR == '<') && (NXT(1) == '?') &&
3388 (NXT(2) == 'X') && (NXT(3) == 'M') &&
3389 (NXT(4) == 'L')) {
3390 /*
3391 * The first drafts were using <?XML and the final W3C REC
3392 * now use <?xml ...
3393 */
3394 xmlParseXMLDecl(ctxt);
3395 /* SKIP_EOL(cur); */
3396 SKIP_BLANKS;
3397 } else {
3398 CHAR *version;
3399
3400 version = xmlCharStrdup(XML_DEFAULT_VERSION);
3401 ctxt->doc = xmlNewDoc(version);
3402 free(version);
3403 }
3404
3405 /*
3406 * The Misc part of the Prolog
3407 */
3408 xmlParseMisc(ctxt);
3409
3410 /*
3411 * Then possibly doc type declaration(s) and more Misc
3412 * (doctypedecl Misc*)?
3413 */
3414 if ((CUR == '<') && (NXT(1) == '!') &&
3415 (NXT(2) == 'D') && (NXT(3) == 'O') &&
3416 (NXT(4) == 'C') && (NXT(5) == 'T') &&
3417 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
3418 (NXT(8) == 'E')) {
3419 xmlParseDocTypeDecl(ctxt);
3420 xmlParseMisc(ctxt);
3421 }
3422
3423 /*
3424 * Time to start parsing the tree itself
3425 */
3426 ctxt->doc->root = xmlParseElement(ctxt);
3427
3428 /*
3429 * The Misc part at the end
3430 */
3431 xmlParseMisc(ctxt);
3432
3433 /*
3434 * SAX: end of the document processing.
3435 */
3436 if (ctxt->sax)
3437 ctxt->sax->endDocument(ctxt);
3438 return(0);
3439}
3440
Daniel Veillard11e00581998-10-24 18:27:49 +00003441/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003442 * xmlSAXParseDoc :
3443 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00003444 * @cur: a pointer to an array of CHAR
3445 *
3446 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003447 * It use the given SAX function block to handle the parsing callback.
3448 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00003449 *
3450 * return values: the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00003451 */
3452
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003453xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003454 xmlDocPtr ret;
3455 xmlParserCtxtPtr ctxt;
3456 xmlParserInputPtr input;
3457
3458 if (cur == NULL) return(NULL);
3459
3460 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3461 if (ctxt == NULL) {
3462 perror("malloc");
3463 return(NULL);
3464 }
3465 xmlInitParserCtxt(ctxt);
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003466 if (sax != NULL) ctxt->sax == sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003467 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3468 if (input == NULL) {
3469 perror("malloc");
3470 free(ctxt);
3471 return(NULL);
3472 }
3473
3474 input->filename = NULL;
3475 input->line = 1;
3476 input->col = 1;
3477 input->base = cur;
3478 input->cur = cur;
3479
3480 inputPush(ctxt, input);
3481
3482
3483 xmlParseDocument(ctxt);
3484 ret = ctxt->doc;
Daniel Veillardccb09631998-10-27 06:21:04 +00003485 free(ctxt->nodeTab);
3486 free(ctxt->inputTab);
3487 if (input->filename != NULL)
3488 free((char *)input->filename);
3489 free(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003490 free(ctxt);
3491
3492 return(ret);
3493}
3494
Daniel Veillard11e00581998-10-24 18:27:49 +00003495/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003496 * xmlParseDoc :
3497 * @cur: a pointer to an array of CHAR
3498 *
3499 * parse an XML in-memory document and build a tree.
3500 *
3501 * return values: the resulting document tree
3502 */
3503
3504xmlDocPtr xmlParseDoc(CHAR *cur) {
3505 return(xmlSAXParseDoc(NULL, cur));
3506}
3507
3508/**
3509 * xmlSAXParseFile :
3510 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00003511 * @filename: the filename
3512 *
3513 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
3514 * compressed document is provided by default if found at compile-time.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003515 * It use the given SAX function block to handle the parsing callback.
3516 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00003517 *
3518 * return values: the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00003519 */
3520
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003521xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003522 xmlDocPtr ret;
3523#ifdef HAVE_ZLIB_H
3524 gzFile input;
3525#else
3526 int input;
3527#endif
3528 int res;
Daniel Veillard27271681998-10-30 06:39:40 +00003529 int len;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003530 struct stat buf;
3531 char *buffer;
3532 xmlParserCtxtPtr ctxt;
3533 xmlParserInputPtr inputStream;
3534
3535 res = stat(filename, &buf);
3536 if (res < 0) return(NULL);
3537
3538#ifdef HAVE_ZLIB_H
Daniel Veillard27271681998-10-30 06:39:40 +00003539 len = (buf.st_size * 8) + 1000;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003540retry_bigger:
Daniel Veillard27271681998-10-30 06:39:40 +00003541 buffer = malloc(len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003542#else
Daniel Veillard27271681998-10-30 06:39:40 +00003543 len = buf.st_size + 100;
3544 buffer = malloc(len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003545#endif
3546 if (buffer == NULL) {
3547 perror("malloc");
3548 return(NULL);
3549 }
3550
Daniel Veillard27271681998-10-30 06:39:40 +00003551 memset(buffer, 0, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003552#ifdef HAVE_ZLIB_H
3553 input = gzopen (filename, "r");
3554 if (input == NULL) {
3555 fprintf (stderr, "Cannot read file %s :\n", filename);
3556 perror ("gzopen failed");
3557 return(NULL);
3558 }
3559#else
3560 input = open (filename, O_RDONLY);
3561 if (input < 0) {
3562 fprintf (stderr, "Cannot read file %s :\n", filename);
3563 perror ("open failed");
3564 return(NULL);
3565 }
3566#endif
3567#ifdef HAVE_ZLIB_H
Daniel Veillard27271681998-10-30 06:39:40 +00003568 res = gzread(input, buffer, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003569#else
3570 res = read(input, buffer, buf.st_size);
3571#endif
3572 if (res < 0) {
3573 fprintf (stderr, "Cannot read file %s :\n", filename);
3574#ifdef HAVE_ZLIB_H
3575 perror ("gzread failed");
3576#else
3577 perror ("read failed");
3578#endif
3579 return(NULL);
3580 }
3581#ifdef HAVE_ZLIB_H
3582 gzclose(input);
Daniel Veillard27271681998-10-30 06:39:40 +00003583 if (res >= len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003584 free(buffer);
Daniel Veillard27271681998-10-30 06:39:40 +00003585 len *= 2;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003586 goto retry_bigger;
3587 }
3588 buf.st_size = res;
3589#else
3590 close(input);
3591#endif
3592
3593 buffer[buf.st_size] = '\0';
3594
3595 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3596 if (ctxt == NULL) {
3597 perror("malloc");
3598 return(NULL);
3599 }
3600 xmlInitParserCtxt(ctxt);
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003601 if (sax != NULL) ctxt->sax == sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003602 inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3603 if (inputStream == NULL) {
3604 perror("malloc");
3605 free(ctxt);
3606 return(NULL);
3607 }
3608
3609 inputStream->filename = strdup(filename);
3610 inputStream->line = 1;
3611 inputStream->col = 1;
3612
3613 /*
3614 * TODO : plug some encoding conversion routines here. !!!
3615 */
3616 inputStream->base = buffer;
3617 inputStream->cur = buffer;
3618
3619 inputPush(ctxt, inputStream);
3620
3621 xmlParseDocument(ctxt);
3622
3623 ret = ctxt->doc;
3624 free(buffer);
Daniel Veillardccb09631998-10-27 06:21:04 +00003625 free(ctxt->nodeTab);
3626 free(ctxt->inputTab);
3627 if (inputStream->filename != NULL)
3628 free((char *)inputStream->filename);
3629 free(inputStream);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003630 free(ctxt);
3631
3632 return(ret);
3633}
3634
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003635/**
3636 * xmlParseFile :
3637 * @filename: the filename
3638 *
3639 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
3640 * compressed document is provided by default if found at compile-time.
3641 *
3642 * return values: the resulting document tree
3643 */
3644
3645xmlDocPtr xmlParseFile(const char *filename) {
3646 return(xmlSAXParseFile(NULL, filename));
3647}
Daniel Veillard260a68f1998-08-13 03:39:55 +00003648
Daniel Veillard11e00581998-10-24 18:27:49 +00003649/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003650 * xmlSAXParseMemory :
3651 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00003652 * @cur: an pointer to a char array
3653 * @size: the siwe of the array
3654 *
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003655 * parse an XML in-memory block and use the given SAX function block
3656 * to handle the parsing callback. If sax is NULL, fallback to the default
3657 * DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00003658 *
3659 * TODO : plug some encoding conversion routines here. !!!
3660 *
3661 * return values: the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00003662 */
Daniel Veillard11e00581998-10-24 18:27:49 +00003663
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003664xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003665 xmlDocPtr ret;
3666 xmlParserCtxtPtr ctxt;
3667 xmlParserInputPtr input;
3668
3669 buffer[size - 1] = '\0';
3670
3671 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3672 if (ctxt == NULL) {
3673 perror("malloc");
3674 return(NULL);
3675 }
3676 xmlInitParserCtxt(ctxt);
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003677 if (sax != NULL) ctxt->sax == sax;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003678 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3679 if (input == NULL) {
3680 perror("malloc");
Daniel Veillardccb09631998-10-27 06:21:04 +00003681 free(ctxt->nodeTab);
3682 free(ctxt->inputTab);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003683 free(ctxt);
3684 return(NULL);
3685 }
3686
3687 input->filename = NULL;
3688 input->line = 1;
3689 input->col = 1;
3690
3691 /*
3692 * TODO : plug some encoding conversion routines here. !!!
3693 */
3694 input->base = buffer;
3695 input->cur = buffer;
3696
3697 inputPush(ctxt, input);
3698
3699 xmlParseDocument(ctxt);
3700
3701 ret = ctxt->doc;
Daniel Veillardccb09631998-10-27 06:21:04 +00003702 free(ctxt->nodeTab);
3703 free(ctxt->inputTab);
3704 if (input->filename != NULL)
3705 free((char *)input->filename);
3706 free(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003707 free(ctxt);
3708
3709 return(ret);
3710}
3711
Daniel Veillard42dc9b31998-11-09 01:17:21 +00003712/**
3713 * xmlParseMemory :
3714 * @cur: an pointer to a char array
3715 * @size: the size of the array
3716 *
3717 * parse an XML in-memory block and build a tree.
3718 *
3719 * return values: the resulting document tree
3720 */
3721
3722xmlDocPtr xmlParseMemory(char *buffer, int size) {
3723 return(xmlSAXParseMemory(NULL, buffer, size));
3724}
Daniel Veillard260a68f1998-08-13 03:39:55 +00003725
Daniel Veillard11e00581998-10-24 18:27:49 +00003726/**
3727 * xmlInitParserCtxt:
3728 * @ctxt: an XML parser context
3729 *
3730 * Initialize a parser context
3731 */
3732
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003733void
3734xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
Daniel Veillard260a68f1998-08-13 03:39:55 +00003735{
3736 /* Allocate the Input stack */
3737 ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
3738 ctxt->inputNr = 0;
3739 ctxt->inputMax = 5;
3740 ctxt->input = NULL;
3741
3742 /* Allocate the Node stack */
3743 ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
3744 ctxt->nodeNr = 0;
3745 ctxt->nodeMax = 10;
3746 ctxt->node = NULL;
3747
3748 ctxt->sax = &xmlDefaultSAXHandler;
3749 ctxt->doc = NULL;
3750 ctxt->record_info = 0;
3751 xmlInitNodeInfoSeq(&ctxt->node_seq);
3752}
3753
Daniel Veillard11e00581998-10-24 18:27:49 +00003754/**
3755 * xmlClearParserCtxt:
3756 * @ctxt: an XML parser context
3757 *
3758 * Clear (release owned resources) and reinitialize a parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00003759 */
Daniel Veillard11e00581998-10-24 18:27:49 +00003760
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003761void
3762xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
Daniel Veillard260a68f1998-08-13 03:39:55 +00003763{
3764 xmlClearNodeInfoSeq(&ctxt->node_seq);
3765 xmlInitParserCtxt(ctxt);
3766}
3767
3768
Daniel Veillard11e00581998-10-24 18:27:49 +00003769/**
3770 * xmlSetupParserForBuffer:
3771 * @ctxt: an XML parser context
3772 * @buffer: a CHAR * buffer
3773 * @filename: a file name
3774 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003775 * Setup the parser context to parse a new buffer; Clears any prior
3776 * contents from the parser context. The buffer parameter must not be
3777 * NULL, but the filename parameter can be
3778 */
Daniel Veillard11e00581998-10-24 18:27:49 +00003779
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003780void
3781xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003782 const char* filename)
3783{
3784 xmlParserInputPtr input;
3785
3786 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3787 if (input == NULL) {
3788 perror("malloc");
3789 free(ctxt);
3790 exit(1);
3791 }
3792
3793 xmlClearParserCtxt(ctxt);
3794 if (input->filename != NULL)
3795 input->filename = strdup(filename);
3796 else
3797 input->filename = NULL;
3798 input->line = 1;
3799 input->col = 1;
3800 input->base = buffer;
3801 input->cur = buffer;
3802
3803 inputPush(ctxt, input);
3804}
3805
3806
Daniel Veillard11e00581998-10-24 18:27:49 +00003807/**
3808 * xmlParserFindNodeInfo:
3809 * @ctxt: an XML parser context
3810 * @node: an XML node within the tree
3811 *
3812 * Find the parser node info struct for a given node
3813 *
3814 * return values: an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003815 */
3816const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
3817 const xmlNode* node)
3818{
3819 unsigned long pos;
3820
3821 /* Find position where node should be at */
3822 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3823 if ( ctx->node_seq.buffer[pos].node == node )
3824 return &ctx->node_seq.buffer[pos];
3825 else
3826 return NULL;
3827}
3828
3829
Daniel Veillard11e00581998-10-24 18:27:49 +00003830/**
3831 * xmlInitNodeInfoSeq :
3832 * @seq: a node info sequence pointer
3833 *
3834 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00003835 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003836void
3837xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00003838{
3839 seq->length = 0;
3840 seq->maximum = 0;
3841 seq->buffer = NULL;
3842}
3843
Daniel Veillard11e00581998-10-24 18:27:49 +00003844/**
3845 * xmlClearNodeInfoSeq :
3846 * @seq: a node info sequence pointer
3847 *
3848 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +00003849 * info sequence
3850 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003851void
3852xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00003853{
3854 if ( seq->buffer != NULL )
3855 free(seq->buffer);
3856 xmlInitNodeInfoSeq(seq);
3857}
3858
3859
Daniel Veillard11e00581998-10-24 18:27:49 +00003860/**
3861 * xmlParserFindNodeInfoIndex:
3862 * @seq: a node info sequence pointer
3863 * @node: an XML node pointer
3864 *
3865 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003866 * xmlParserFindNodeInfoIndex : Find the index that the info record for
3867 * the given node is or should be at in a sorted sequence
Daniel Veillard11e00581998-10-24 18:27:49 +00003868 * return values: a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +00003869 */
3870unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
3871 const xmlNode* node)
3872{
3873 unsigned long upper, lower, middle;
3874 int found = 0;
3875
3876 /* Do a binary search for the key */
3877 lower = 1;
3878 upper = seq->length;
3879 middle = 0;
3880 while ( lower <= upper && !found) {
3881 middle = lower + (upper - lower) / 2;
3882 if ( node == seq->buffer[middle - 1].node )
3883 found = 1;
3884 else if ( node < seq->buffer[middle - 1].node )
3885 upper = middle - 1;
3886 else
3887 lower = middle + 1;
3888 }
3889
3890 /* Return position */
3891 if ( middle == 0 || seq->buffer[middle - 1].node < node )
3892 return middle;
3893 else
3894 return middle - 1;
3895}
3896
3897
Daniel Veillard11e00581998-10-24 18:27:49 +00003898/**
3899 * xmlParserAddNodeInfo:
3900 * @ctxt: an XML parser context
3901 * @seq: a node info sequence pointer
3902 *
3903 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00003904 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003905void
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003906xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003907 const xmlParserNodeInfo* info)
3908{
3909 unsigned long pos;
3910 static unsigned int block_size = 5;
3911
3912 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003913 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
3914 if ( pos < ctxt->node_seq.length
3915 && ctxt->node_seq.buffer[pos].node == info->node ) {
3916 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003917 }
3918
3919 /* Otherwise, we need to add new node to buffer */
3920 else {
3921 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003922 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003923 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003924 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
3925 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003926
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003927 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard260a68f1998-08-13 03:39:55 +00003928 tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3929 else
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003930 tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003931
3932 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3934 ctxt->sax->error(ctxt, "Out of memory");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003935 return;
3936 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003937 ctxt->node_seq.buffer = tmp_buffer;
3938 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003939 }
3940
3941 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003942 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003943 unsigned long i;
3944
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003945 for ( i = ctxt->node_seq.length; i > pos; i-- )
3946 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +00003947 }
3948
3949 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003950 ctxt->node_seq.buffer[pos] = *info;
3951 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003952 }
3953}