blob: 1b3b384ee7d1c9f02d0fa5b1c37e0f512e3cdfc3 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * $Id$
7 */
8
9#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
13#include <config.h>
14#endif
15#include <stdio.h>
16#include <ctype.h>
17#include <string.h> /* for memset() only */
18#include <malloc.h>
19#include <sys/stat.h>
20#ifdef HAVE_FCNTL_H
21#include <fcntl.h>
22#endif
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_ZLIB_H
27#include <zlib.h>
28#endif
29
30#include "tree.h"
31#include "parser.h"
32#include "entities.h"
33
34/************************************************************************
35 * *
36 * Parser stacks related functions and macros *
37 * *
38 ************************************************************************/
39/*
40 * Generic function for accessing stacks in the Parser Context
41 */
42
43#define PUSH_AND_POP(type, name) \
44int name##Push(xmlParserCtxtPtr ctxt, type value) { \
45 if (ctxt->name##Nr >= ctxt->name##Max) { \
46 ctxt->name##Max *= 2; \
47 ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
48 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
49 if (ctxt->name##Tab == NULL) { \
50 fprintf(stderr, "realloc failed !\n"); \
51 exit(1); \
52 } \
53 } \
54 ctxt->name##Tab[ctxt->name##Nr] = value; \
55 ctxt->name = value; \
56 return(ctxt->name##Nr++); \
57} \
58type name##Pop(xmlParserCtxtPtr ctxt) { \
59 if (ctxt->name##Nr <= 0) return(0); \
60 ctxt->name##Nr--; \
61 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
62 return(ctxt->name); \
63} \
64
65PUSH_AND_POP(xmlParserInputPtr, input)
66PUSH_AND_POP(xmlNodePtr, node)
67
68/*************
69#define CUR (*(ctxt->input->cur) ? *(ctxt->input->cur) : xmlPopInput(ctxt))
70#define NEXT (((*(ctxt->input->cur) == '\n') ? \
71 (ctxt->input->line++, ctxt->input->col = 1) : \
72 (ctxt->input->col++)), ctxt->input->cur++)
73 *************/
74
75#define CUR (*ctxt->input->cur)
76#define NEXT ((*ctxt->input->cur) ? \
77 (((*(ctxt->input->cur) == '\n') ? \
78 (ctxt->input->line++, ctxt->input->col = 1) : \
79 (ctxt->input->col++)), ctxt->input->cur++) : \
80 (xmlPopInput(ctxt), ctxt->input->cur))
81
82#define CUR_PTR ctxt->input->cur
83
84#define NXT(val) ctxt->input->cur[(val)]
85
86#define SKIP(val) ctxt->input->cur += (val)
87#define SKIP_BLANKS \
88 while (IS_BLANK(*(ctxt->input->cur))) NEXT
89
90
91/*
92 * xmlPopInput: the current input pointed by ctxt->input came to an end
93 * pop it and return the next char.
94 *
95 * TODO A deallocation of the popped Input structure is needed
96 */
97CHAR xmlPopInput(xmlParserCtxtPtr ctxt) {
98 if (ctxt->inputNr == 1) return(0); /* End of main Input */
99 inputPop(ctxt);
100 return(CUR);
101}
102
103/*
104 * xmlPushInput: switch to a new input stream which is stacked on top
105 * of the previous one(s).
106 */
107void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
108 if (input == NULL) return;
109 inputPush(ctxt, input);
110}
111
112/*
113 * Create a new input stream based on a memory buffer.
114 */
115void xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
116 xmlParserInputPtr input;
117
118 if (entity == NULL) {
119 xmlParserError(ctxt,
120 "internal: xmlNewEntityInputStream entity = NULL\n");
121 return;
122 }
123 if (entity->content == NULL) {
124 xmlParserError(ctxt,
125 "internal: xmlNewEntityInputStream entity->input = NULL\n");
126 return;
127 }
128 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
129 if (input == NULL) {
130 xmlParserError(ctxt, "malloc: couldn't allocate a new input stream\n");
131 return;
132 }
133 input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
134 input->base = entity->content;
135 input->cur = entity->content;
136 input->line = 1;
137 input->col = 1;
138 xmlPushInput(ctxt, input);
139}
140
141/*
142 * A few macros needed to help building the parser.
143 */
144
145#ifdef UNICODE
146/************************************************************************
147 * *
148 * UNICODE version of the macros. *
149 * *
150 ************************************************************************/
151/*
152 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
153 * | [#x10000-#x10FFFF]
154 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
155 */
156#define IS_CHAR(c) \
157 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
158 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
159
160/*
161 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
162 */
163#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
164 ((c) == 0x0D))
165
166/*
167 * [85] BaseChar ::= ... long list see REC ...
168 *
169 * VI is your friend !
170 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
171 * and
172 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
173 */
174#define IS_BASECHAR(c) \
175 ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
176 (((c) >= 0x0061) && ((c) <= 0x007A)) || \
177 (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
178 (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
179 (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
180 (((c) >= 0x0100) && ((c) <= 0x0131)) || \
181 (((c) >= 0x0134) && ((c) <= 0x013E)) || \
182 (((c) >= 0x0141) && ((c) <= 0x0148)) || \
183 (((c) >= 0x014A) && ((c) <= 0x017E)) || \
184 (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
185 (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
186 (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
187 (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
188 (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
189 (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
190 ((c) == 0x0386) || \
191 (((c) >= 0x0388) && ((c) <= 0x038A)) || \
192 ((c) == 0x038C) || \
193 (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
194 (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
195 (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
196 ((c) == 0x03DA) || \
197 ((c) == 0x03DC) || \
198 ((c) == 0x03DE) || \
199 ((c) == 0x03E0) || \
200 (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
201 (((c) >= 0x0401) && ((c) <= 0x040C)) || \
202 (((c) >= 0x040E) && ((c) <= 0x044F)) || \
203 (((c) >= 0x0451) && ((c) <= 0x045C)) || \
204 (((c) >= 0x045E) && ((c) <= 0x0481)) || \
205 (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
206 (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
207 (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
208 (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
209 (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
210 (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
211 (((c) >= 0x0531) && ((c) <= 0x0556)) || \
212 ((c) == 0x0559) || \
213 (((c) >= 0x0561) && ((c) <= 0x0586)) || \
214 (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
215 (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
216 (((c) >= 0x0621) && ((c) <= 0x063A)) || \
217 (((c) >= 0x0641) && ((c) <= 0x064A)) || \
218 (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
219 (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
220 (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
221 (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
222 ((c) == 0x06D5) || \
223 (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
224 (((c) >= 0x0905) && ((c) <= 0x0939)) || \
225 ((c) == 0x093D) || \
226 (((c) >= 0x0958) && ((c) <= 0x0961)) || \
227 (((c) >= 0x0985) && ((c) <= 0x098C)) || \
228 (((c) >= 0x098F) && ((c) <= 0x0990)) || \
229 (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
230 (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
231 ((c) == 0x09B2) || \
232 (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
233 (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
234 (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
235 (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
236 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
237 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
238 (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
239 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
240 (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
241 (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
242 (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
243 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
244 ((c) == 0x0A5E) || \
245 (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
246 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
247 ((c) == 0x0A8D) || \
248 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
249 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
250 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
251 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
252 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
253 ((c) == 0x0ABD) || \
254 ((c) == 0x0AE0) || \
255 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
256 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
257 (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
258 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
259 (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
260 (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
261 ((c) == 0x0B3D) || \
262 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
263 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
264 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
265 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
266 (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
267 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
268 ((c) == 0x0B9C) || \
269 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
270 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
271 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
272 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
273 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
274 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
275 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
276 (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
277 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
278 (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
279 (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
280 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
281 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
282 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
283 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
284 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
285 ((c) == 0x0CDE) || \
286 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
287 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
288 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
289 (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
290 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
291 (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
292 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
293 ((c) == 0x0E30) || \
294 (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
295 (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
296 (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
297 ((c) == 0x0E84) || \
298 (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
299 ((c) == 0x0E8A) || \
300 ((c) == 0x0E8D) || \
301 (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
302 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
303 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
304 ((c) == 0x0EA5) || \
305 ((c) == 0x0EA7) || \
306 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
307 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
308 ((c) == 0x0EB0) || \
309 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
310 ((c) == 0x0EBD) || \
311 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
312 (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
313 (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
314 (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
315 (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
316 ((c) == 0x1100) || \
317 (((c) >= 0x1102) && ((c) <= 0x1103)) || \
318 (((c) >= 0x1105) && ((c) <= 0x1107)) || \
319 ((c) == 0x1109) || \
320 (((c) >= 0x110B) && ((c) <= 0x110C)) || \
321 (((c) >= 0x110E) && ((c) <= 0x1112)) || \
322 ((c) == 0x113C) || \
323 ((c) == 0x113E) || \
324 ((c) == 0x1140) || \
325 ((c) == 0x114C) || \
326 ((c) == 0x114E) || \
327 ((c) == 0x1150) || \
328 (((c) >= 0x1154) && ((c) <= 0x1155)) || \
329 ((c) == 0x1159) || \
330 (((c) >= 0x115F) && ((c) <= 0x1161)) || \
331 ((c) == 0x1163) || \
332 ((c) == 0x1165) || \
333 ((c) == 0x1167) || \
334 ((c) == 0x1169) || \
335 (((c) >= 0x116D) && ((c) <= 0x116E)) || \
336 (((c) >= 0x1172) && ((c) <= 0x1173)) || \
337 ((c) == 0x1175) || \
338 ((c) == 0x119E) || \
339 ((c) == 0x11A8) || \
340 ((c) == 0x11AB) || \
341 (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
342 (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
343 ((c) == 0x11BA) || \
344 (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
345 ((c) == 0x11EB) || \
346 ((c) == 0x11F0) || \
347 ((c) == 0x11F9) || \
348 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
349 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
350 (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
351 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
352 (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
353 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
354 (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
355 ((c) == 0x1F59) || \
356 ((c) == 0x1F5B) || \
357 ((c) == 0x1F5D) || \
358 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
359 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
360 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
361 ((c) == 0x1FBE) || \
362 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
363 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
364 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
365 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
366 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
367 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
368 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
369 ((c) == 0x2126) || \
370 (((c) >= 0x212A) && ((c) <= 0x212B)) || \
371 ((c) == 0x212E) || \
372 (((c) >= 0x2180) && ((c) <= 0x2182)) || \
373 (((c) >= 0x3041) && ((c) <= 0x3094)) || \
374 (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
375 (((c) >= 0x3105) && ((c) <= 0x312C)) || \
376 (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
377
378/*
379 * [88] Digit ::= ... long list see REC ...
380 */
381#define IS_DIGIT(c) \
382 ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
383 (((c) >= 0x0660) && ((c) <= 0x0669)) || \
384 (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
385 (((c) >= 0x0966) && ((c) <= 0x096F)) || \
386 (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
387 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
388 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
389 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
390 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
391 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
392 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
393 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
394 (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
395 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
396 (((c) >= 0x0F20) && ((c) <= 0x0F29)))
397
398/*
399 * [87] CombiningChar ::= ... long list see REC ...
400 */
401#define IS_COMBINING(c) \
402 ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
403 (((c) >= 0x0360) && ((c) <= 0x0361)) || \
404 (((c) >= 0x0483) && ((c) <= 0x0486)) || \
405 (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
406 (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
407 (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
408 ((c) == 0x05BF) || \
409 (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
410 ((c) == 0x05C4) || \
411 (((c) >= 0x064B) && ((c) <= 0x0652)) || \
412 ((c) == 0x0670) || \
413 (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
414 (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
415 (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
416 (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
417 (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
418 (((c) >= 0x0901) && ((c) <= 0x0903)) || \
419 ((c) == 0x093C) || \
420 (((c) >= 0x093E) && ((c) <= 0x094C)) || \
421 ((c) == 0x094D) || \
422 (((c) >= 0x0951) && ((c) <= 0x0954)) || \
423 (((c) >= 0x0962) && ((c) <= 0x0963)) || \
424 (((c) >= 0x0981) && ((c) <= 0x0983)) || \
425 ((c) == 0x09BC) || \
426 ((c) == 0x09BE) || \
427 ((c) == 0x09BF) || \
428 (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
429 (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
430 (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
431 ((c) == 0x09D7) || \
432 (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
433 ((c) == 0x0A02) || \
434 ((c) == 0x0A3C) || \
435 ((c) == 0x0A3E) || \
436 ((c) == 0x0A3F) || \
437 (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
438 (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
439 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
440 (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
441 (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
442 ((c) == 0x0ABC) || \
443 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
444 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
445 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
446 (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
447 ((c) == 0x0B3C) || \
448 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
449 (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
450 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
451 (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
452 (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
453 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
454 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
455 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
456 ((c) == 0x0BD7) || \
457 (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
458 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
459 (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
460 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
461 (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
462 (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
463 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
464 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
465 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
466 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
467 (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
468 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
469 (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
470 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
471 ((c) == 0x0D57) || \
472 ((c) == 0x0E31) || \
473 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
474 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
475 ((c) == 0x0EB1) || \
476 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
477 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
478 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
479 (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
480 ((c) == 0x0F35) || \
481 ((c) == 0x0F37) || \
482 ((c) == 0x0F39) || \
483 ((c) == 0x0F3E) || \
484 ((c) == 0x0F3F) || \
485 (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
486 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
487 (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
488 ((c) == 0x0F97) || \
489 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
490 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
491 ((c) == 0x0FB9) || \
492 (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
493 ((c) == 0x20E1) || \
494 (((c) >= 0x302A) && ((c) <= 0x302F)) || \
495 ((c) == 0x3099) || \
496 ((c) == 0x309A))
497
498/*
499 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
500 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
501 * [#x309D-#x309E] | [#x30FC-#x30FE]
502 */
503#define IS_EXTENDER(c) \
504 (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
505 ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
506 ((c) == 0xec6) || ((c) == 0x3005) \
507 (((c) >= 0x3031) && ((c) <= 0x3035)) || \
508 (((c) >= 0x309b) && ((c) <= 0x309e)) || \
509 (((c) >= 0x30fc) && ((c) <= 0x30fe)))
510
511/*
512 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
513 */
514#define IS_IDEOGRAPHIC(c) \
515 ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
516 (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
517 (((c) >= 0x3021) && ((c) <= 0x3029)) || \
518 ((c) == 0x3007))
519
520/*
521 * [84] Letter ::= BaseChar | Ideographic
522 */
523#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
524
525#else
526/************************************************************************
527 * *
528 * 8bits / ASCII version of the macros. *
529 * *
530 ************************************************************************/
531/*
532 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
533 * | [#x10000-#x10FFFF]
534 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
535 */
536#define IS_CHAR(c) \
537 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
538 ((c) == 0xa))
539
540/*
541 * [85] BaseChar ::= ... long list see REC ...
542 */
543#define IS_BASECHAR(c) \
544 ((((c) >= 0x41) && ((c) <= 0x5a)) || \
545 (((c) >= 0x61) && ((c) <= 0x7a)) || \
546 (((c) >= 0xaa) && ((c) <= 0x5b)) || \
547 (((c) >= 0xc0) && ((c) <= 0xd6)) || \
548 (((c) >= 0xd8) && ((c) <= 0xf6)) || \
549 (((c) >= 0xf8) && ((c) <= 0xff)) || \
550 ((c) == 0xba))
551
552/*
553 * [88] Digit ::= ... long list see REC ...
554 */
555#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
556
557/*
558 * [84] Letter ::= BaseChar | Ideographic
559 */
560#define IS_LETTER(c) IS_BASECHAR(c)
561
562
563/*
564 * [87] CombiningChar ::= ... long list see REC ...
565 */
566#define IS_COMBINING(c) 0
567
568/*
569 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
570 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
571 * [#x309D-#x309E] | [#x30FC-#x30FE]
572 */
573#define IS_EXTENDER(c) ((c) == 0xb7)
574
575#endif /* !UNICODE */
576
577/*
578 * Blank chars.
579 *
580 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
581 */
582#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
583 ((c) == 0x0D))
584
585/*
586 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
587 */
588#define IS_PUBIDCHAR(c) \
589 (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
590 (((c) >= 'a') && ((c) <= 'z')) || \
591 (((c) >= 'A') && ((c) <= 'Z')) || \
592 (((c) >= '0') && ((c) <= '9')) || \
593 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
594 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
595 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
596 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
597 ((c) == '$') || ((c) == '_') || ((c) == '%'))
598
599#define SKIP_EOL(p) \
600 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
601 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
602
603#define MOVETO_ENDTAG(p) \
604 while (IS_CHAR(*p) && (*(p) != '>')) (p)++
605
606#define MOVETO_STARTTAG(p) \
607 while (IS_CHAR(*p) && (*(p) != '<')) (p)++
608
609/************************************************************************
610 * *
611 * Commodity functions to handle CHARs *
612 * *
613 ************************************************************************/
614
615/*
616 * xmlStrndup : a strndup for array of CHAR's
617 */
618
619CHAR *xmlStrndup(const CHAR *cur, int len) {
620 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
621
622 if (ret == NULL) {
623 fprintf(stderr, "malloc of %d byte failed\n",
624 (len + 1) * sizeof(CHAR));
625 return(NULL);
626 }
627 memcpy(ret, cur, len * sizeof(CHAR));
628 ret[len] = 0;
629 return(ret);
630}
631
632/*
633 * xmlStrdup : a strdup for CHAR's
634 */
635
636CHAR *xmlStrdup(const CHAR *cur) {
637 const CHAR *p = cur;
638
639 while (IS_CHAR(*p)) p++;
640 return(xmlStrndup(cur, p - cur));
641}
642
643/*
644 * xmlCharStrndup : a strndup for char's to CHAR's
645 */
646
647CHAR *xmlCharStrndup(const char *cur, int len) {
648 int i;
649 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
650
651 if (ret == NULL) {
652 fprintf(stderr, "malloc of %d byte failed\n",
653 (len + 1) * sizeof(CHAR));
654 return(NULL);
655 }
656 for (i = 0;i < len;i++)
657 ret[i] = (CHAR) cur[i];
658 ret[len] = 0;
659 return(ret);
660}
661
662/*
663 * xmlCharStrdup : a strdup for char's to CHAR's
664 */
665
666CHAR *xmlCharStrdup(const char *cur) {
667 const char *p = cur;
668
669 while (*p != '\0') p++;
670 return(xmlCharStrndup(cur, p - cur));
671}
672
673/*
674 * xmlStrcmp : a strcmp for CHAR's
675 */
676
677int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
678 register int tmp;
679
680 do {
681 tmp = *str1++ - *str2++;
682 if (tmp != 0) return(tmp);
683 } while ((*str1 != 0) && (*str2 != 0));
684 return (*str1 - *str2);
685}
686
687/*
688 * xmlStrncmp : a strncmp for CHAR's
689 */
690
691int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
692 register int tmp;
693
694 if (len <= 0) return(0);
695 do {
696 tmp = *str1++ - *str2++;
697 if (tmp != 0) return(tmp);
698 len--;
699 if (len <= 0) return(0);
700 } while ((*str1 != 0) && (*str2 != 0));
701 return (*str1 - *str2);
702}
703
704/*
705 * xmlStrchr : a strchr for CHAR's
706 */
707
708CHAR *xmlStrchr(const CHAR *str, CHAR val) {
709 while (*str != 0) {
710 if (*str == val) return((CHAR *) str);
711 str++;
712 }
713 return(NULL);
714}
715
716/*
717 * xmlStrlen : lenght of a CHAR's string
718 */
719
720int xmlStrlen(const CHAR *str) {
721 int len = 0;
722
723 if (str == NULL) return(0);
724 while (*str != 0) {
725 str++;
726 len++;
727 }
728 return(len);
729}
730
731/*
732 * xmlStrncat : a strncat for array of CHAR's
733 */
734
735CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len) {
736 int size;
737 CHAR *ret;
738
739 if ((add == NULL) || (len == 0))
740 return(cur);
741 if (cur == NULL)
742 return(xmlStrndup(add, len));
743
744 size = xmlStrlen(cur);
745 ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
746 if (ret == NULL) {
747 fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
748 (size + len + 1) * sizeof(CHAR));
749 return(cur);
750 }
751 memcpy(&ret[size], add, len * sizeof(CHAR));
752 ret[size + len] = 0;
753 return(ret);
754}
755
756/*
757 * xmlStrcat : a strcat for CHAR's
758 */
759
760CHAR *xmlStrcat(CHAR *cur, const CHAR *add) {
761 const CHAR *p = add;
762
763 if (add == NULL) return(cur);
764 if (cur == NULL)
765 return(xmlStrdup(add));
766
767 while (IS_CHAR(*p)) p++;
768 return(xmlStrncat(cur, add, p - add));
769}
770
771/************************************************************************
772 * *
773 * Commodity functions, cleanup needed ? *
774 * *
775 ************************************************************************/
776
777/*
778 * Is this a sequence of blank chars that one can ignore ?
779 */
780
781static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
782 int i;
783 xmlNodePtr lastChild;
784
785 for (i = 0;i < len;i++)
786 if (!(IS_BLANK(str[i]))) return(0);
787
788 if (CUR != '<') return(0);
789 lastChild = xmlGetLastChild(ctxt->node);
790 if (lastChild == NULL) {
791 if (ctxt->node->content != NULL) return(0);
792 } else if (xmlNodeIsText(lastChild))
793 return(0);
794 return(1);
795}
796
797/*
798 * Handling of defined entities, when should we define a new input
799 * stream ? When do we just handle that as a set of chars ?
800 */
801
802void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
803 int len;
804
805 if (entity->content == NULL) {
806 xmlParserError(ctxt, "xmlHandleEntity %s: content == NULL\n",
807 entity->name);
808 return;
809 }
810 len = xmlStrlen(entity->content);
811 if (len <= 2) goto handle_as_char;
812
813 /*
814 * Redefine its content as an input stream.
815 */
816 xmlNewEntityInputStream(ctxt, entity);
817 return;
818
819handle_as_char:
820 /*
821 * Just handle the content as a set of chars.
822 */
823 if (ctxt->sax != NULL)
824 ctxt->sax->characters(ctxt, entity->content, 0, len);
825
826}
827
828/*
829 * Forward definition for recusive behaviour.
830 */
831xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
832CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt, int inLine);
833CHAR *xmlParseReference(xmlParserCtxtPtr ctxt, int inLine);
834
835/************************************************************************
836 * *
837 * Extra stuff for namespace support *
838 * Relates to http://www.w3.org/TR/WD-xml-names *
839 * *
840 ************************************************************************/
841
842/*
843 * xmlNamespaceParseNCName : parse an XML namespace name.
844 *
845 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
846 *
847 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
848 * CombiningChar | Extender
849 */
850
851CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
852 const CHAR *q;
853 CHAR *ret = NULL;
854
855 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
856 q = NEXT;
857
858 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
859 (CUR == '.') || (CUR == '-') ||
860 (CUR == '_') ||
861 (IS_COMBINING(CUR)) ||
862 (IS_EXTENDER(CUR)))
863 NEXT;
864
865 ret = xmlStrndup(q, CUR_PTR - q);
866
867 return(ret);
868}
869
870/*
871 * xmlNamespaceParseQName : parse an XML qualified name
872 *
873 * [NS 5] QName ::= (Prefix ':')? LocalPart
874 *
875 * [NS 6] Prefix ::= NCName
876 *
877 * [NS 7] LocalPart ::= NCName
878 */
879
880CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
881 CHAR *ret = NULL;
882
883 *prefix = NULL;
884 ret = xmlNamespaceParseNCName(ctxt);
885 if (CUR == ':') {
886 *prefix = ret;
887 NEXT;
888 ret = xmlNamespaceParseNCName(ctxt);
889 }
890
891 return(ret);
892}
893
894/*
895 * xmlNamespaceParseNSDef : parse a namespace prefix declaration
896 *
897 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
898 *
899 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
900 */
901
902CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
903 CHAR *name = NULL;
904
905 if ((CUR == 'x') && (NXT(1) == 'm') &&
906 (NXT(2) == 'l') && (NXT(3) == 'n') &&
907 (NXT(4) == 's')) {
908 SKIP(5);
909 if (CUR == ':') {
910 NEXT;
911 name = xmlNamespaceParseNCName(ctxt);
912 }
913 }
914 return(name);
915}
916
917/*
918 * [OLD] Parse and return a string between quotes or doublequotes
919 */
920CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
921 CHAR *ret = NULL;
922 const CHAR *q;
923
924 if (CUR == '"') {
925 NEXT;
926 q = CUR_PTR;
927 while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
928 if (CUR != '"')
929 xmlParserError(ctxt, "String not closed\"%.50s\n", q);
930 else {
931 ret = xmlStrndup(q, CUR_PTR - q);
932 NEXT;
933 }
934 } else if (CUR == '\''){
935 NEXT;
936 q = CUR_PTR;
937 while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
938 if (CUR != '\'')
939 xmlParserError(ctxt, "String not closed\"%.50s\n", q);
940 else {
941 ret = xmlStrndup(q, CUR_PTR - q);
942 NEXT;
943 }
944 }
945 return(ret);
946}
947
948/*
949 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
950 *
951 * This is what the older xml-name Working Draft specified, a bunch of
952 * other stuff may still rely on it, so support is still here as
953 * if ot was declared on the root of the Tree:-(
954 */
955
956void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
957 CHAR *href = NULL;
958 CHAR *prefix = NULL;
959 int garbage = 0;
960
961 /*
962 * We just skipped "namespace" or "xml:namespace"
963 */
964 SKIP_BLANKS;
965
966 while (IS_CHAR(CUR) && (CUR != '>')) {
967 /*
968 * We can have "ns" or "prefix" attributes
969 * Old encoding as 'href' or 'AS' attributes is still supported
970 */
971 if ((CUR == 'n') && (NXT(1) == 's')) {
972 garbage = 0;
973 SKIP(2);
974 SKIP_BLANKS;
975
976 if (CUR != '=') continue;
977 NEXT;
978 SKIP_BLANKS;
979
980 href = xmlParseQuotedString(ctxt);
981 SKIP_BLANKS;
982 } else if ((CUR == 'h') && (NXT(1) == 'r') &&
983 (NXT(2) == 'e') && (NXT(3) == 'f')) {
984 garbage = 0;
985 SKIP(4);
986 SKIP_BLANKS;
987
988 if (CUR != '=') continue;
989 NEXT;
990 SKIP_BLANKS;
991
992 href = xmlParseQuotedString(ctxt);
993 SKIP_BLANKS;
994 } else if ((CUR == 'p') && (NXT(1) == 'r') &&
995 (NXT(2) == 'e') && (NXT(3) == 'f') &&
996 (NXT(4) == 'i') && (NXT(5) == 'x')) {
997 garbage = 0;
998 SKIP(6);
999 SKIP_BLANKS;
1000
1001 if (CUR != '=') continue;
1002 NEXT;
1003 SKIP_BLANKS;
1004
1005 prefix = xmlParseQuotedString(ctxt);
1006 SKIP_BLANKS;
1007 } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1008 garbage = 0;
1009 SKIP(2);
1010 SKIP_BLANKS;
1011
1012 if (CUR != '=') continue;
1013 NEXT;
1014 SKIP_BLANKS;
1015
1016 prefix = xmlParseQuotedString(ctxt);
1017 SKIP_BLANKS;
1018 } else if ((CUR == '?') && (NXT(1) == '>')) {
1019 garbage = 0;
1020 CUR_PTR ++;
1021 } else {
1022 /*
1023 * Found garbage when parsing the namespace
1024 */
1025 if (!garbage)
1026 xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1027 NEXT;
1028 }
1029 }
1030
1031 MOVETO_ENDTAG(CUR_PTR);
1032 NEXT;
1033
1034 /*
1035 * Register the DTD.
1036 */
1037 if (href != NULL)
1038 xmlNewGlobalNs(ctxt->doc, href, prefix);
1039
1040 if (prefix != NULL) free(prefix);
1041 if (href != NULL) free(href);
1042}
1043
1044/************************************************************************
1045 * *
1046 * The parser itself *
1047 * Relates to http://www.w3.org/TR/REC-xml *
1048 * *
1049 ************************************************************************/
1050
1051/*
1052 * xmlParseName : parse an XML name.
1053 *
1054 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1055 * CombiningChar | Extender
1056 *
1057 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1058 *
1059 * [6] Names ::= Name (S Name)*
1060 */
1061
1062CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1063 const CHAR *q;
1064 CHAR *ret = NULL;
1065
1066 if (!IS_LETTER(CUR) && (CUR != '_') &&
1067 (CUR != ':')) return(NULL);
1068 q = NEXT;
1069
1070 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1071 (CUR == '.') || (CUR == '-') ||
1072 (CUR == '_') || (CUR == ':') ||
1073 (IS_COMBINING(CUR)) ||
1074 (IS_EXTENDER(CUR)))
1075 NEXT;
1076
1077 ret = xmlStrndup(q, CUR_PTR - q);
1078
1079 return(ret);
1080}
1081
1082/*
1083 * xmlParseNmtoken : parse an XML Nmtoken.
1084 *
1085 * [7] Nmtoken ::= (NameChar)+
1086 *
1087 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1088 */
1089
1090CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1091 const CHAR *q;
1092 CHAR *ret = NULL;
1093
1094 q = NEXT;
1095
1096 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1097 (CUR == '.') || (CUR == '-') ||
1098 (CUR == '_') || (CUR == ':') ||
1099 (IS_COMBINING(CUR)) ||
1100 (IS_EXTENDER(CUR)))
1101 NEXT;
1102
1103 ret = xmlStrndup(q, CUR_PTR - q);
1104
1105 return(ret);
1106}
1107
1108/*
1109 * xmlParseEntityValue : parse a value for ENTITY decl.
1110 *
1111 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1112 * "'" ([^%&'] | PEReference | Reference)* "'"
1113 */
1114
1115CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1116 CHAR *ret = NULL, *cur;
1117 const CHAR *q;
1118
1119 if (CUR == '"') {
1120 NEXT;
1121
1122 q = CUR_PTR;
1123 while ((IS_CHAR(CUR)) && (CUR != '"')) {
1124 if (CUR == '%') {
1125 ret = xmlStrncat(ret, q, CUR_PTR - q);
1126 cur = xmlParsePEReference(ctxt, 1);
1127 ret = xmlStrcat(ret, cur);
1128 q = CUR_PTR;
1129 } else if (CUR == '&') {
1130 ret = xmlStrncat(ret, q, CUR_PTR - q);
1131 cur = xmlParseReference(ctxt, 1);
1132 ret = xmlStrcat(ret, cur);
1133 q = CUR_PTR;
1134 } else
1135 NEXT;
1136 }
1137 if (!IS_CHAR(CUR)) {
1138 xmlParserError(ctxt, "Unfinished EntityValue\n");
1139 } else {
1140 ret = xmlStrncat(ret, q, CUR_PTR - q);
1141 NEXT;
1142 }
1143 } else if (CUR == '\'') {
1144 NEXT;
1145 q = CUR_PTR;
1146 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1147 if (CUR == '%') {
1148 ret = xmlStrncat(ret, q, CUR_PTR - q);
1149 cur = xmlParsePEReference(ctxt, 1);
1150 ret = xmlStrcat(ret, cur);
1151 q = CUR_PTR;
1152 } else if (CUR == '&') {
1153 ret = xmlStrncat(ret, q, CUR_PTR - q);
1154 cur = xmlParseReference(ctxt, 1);
1155 ret = xmlStrcat(ret, cur);
1156 q = CUR_PTR;
1157 } else
1158 NEXT;
1159 }
1160 if (!IS_CHAR(CUR)) {
1161 xmlParserError(ctxt, "Unfinished EntityValue\n");
1162 } else {
1163 ret = xmlStrncat(ret, q, CUR_PTR - q);
1164 NEXT;
1165 }
1166 } else {
1167 xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1168 }
1169
1170 return(ret);
1171}
1172
1173/*
1174 * xmlParseAttValue : parse a value for an attribute
1175 *
1176 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1177 * "'" ([^<&'] | Reference)* "'"
1178 */
1179
1180CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1181 CHAR *ret = NULL, *cur;
1182 const CHAR *q;
1183
1184 if (CUR == '"') {
1185 NEXT;
1186
1187 q = CUR_PTR;
1188 while ((IS_CHAR(CUR)) && (CUR != '"')) {
1189 if (CUR == '&') {
1190 ret = xmlStrncat(ret, q, CUR_PTR - q);
1191 cur = xmlParseReference(ctxt, 1);
1192 ret = xmlStrcat(ret, cur);
1193 q = CUR_PTR;
1194 } else
1195 NEXT;
1196 }
1197 if (!IS_CHAR(CUR)) {
1198 xmlParserError(ctxt, "Unfinished AttValue\n");
1199 } else {
1200 ret = xmlStrncat(ret, q, CUR_PTR - q);
1201 NEXT;
1202 }
1203 } else if (CUR == '\'') {
1204 NEXT;
1205 q = CUR_PTR;
1206 while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1207 if (CUR == '&') {
1208 ret = xmlStrncat(ret, q, CUR_PTR - q);
1209 cur = xmlParseReference(ctxt, 1);
1210 ret = xmlStrcat(ret, cur);
1211 q = CUR_PTR;
1212 } else
1213 NEXT;
1214 }
1215 if (!IS_CHAR(CUR)) {
1216 xmlParserError(ctxt, "Unfinished AttValue\n");
1217 } else {
1218 ret = xmlStrncat(ret, q, CUR_PTR - q);
1219 NEXT;
1220 }
1221 } else {
1222 xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1223 }
1224
1225 return(ret);
1226}
1227
1228/*
1229 * xmlParseSystemLiteral : parse an XML Literal
1230 *
1231 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1232 */
1233
1234CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1235 const CHAR *q;
1236 CHAR *ret = NULL;
1237
1238 if (CUR == '"') {
1239 NEXT;
1240 q = CUR_PTR;
1241 while ((IS_CHAR(CUR)) && (CUR != '"'))
1242 NEXT;
1243 if (!IS_CHAR(CUR)) {
1244 xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1245 } else {
1246 ret = xmlStrndup(q, CUR_PTR - q);
1247 NEXT;
1248 }
1249 } else if (CUR == '\'') {
1250 NEXT;
1251 q = CUR_PTR;
1252 while ((IS_CHAR(CUR)) && (CUR != '\''))
1253 NEXT;
1254 if (!IS_CHAR(CUR)) {
1255 xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1256 } else {
1257 ret = xmlStrndup(q, CUR_PTR - q);
1258 NEXT;
1259 }
1260 } else {
1261 xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1262 }
1263
1264 return(ret);
1265}
1266
1267/*
1268 * xmlParsePubidLiteral: parse an XML public literal
1269 *
1270 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1271 */
1272
1273CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1274 const CHAR *q;
1275 CHAR *ret = NULL;
1276 /*
1277 * Name ::= (Letter | '_') (NameChar)*
1278 */
1279 if (CUR == '"') {
1280 NEXT;
1281 q = CUR_PTR;
1282 while (IS_PUBIDCHAR(CUR)) NEXT;
1283 if (CUR != '"') {
1284 xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1285 } else {
1286 ret = xmlStrndup(q, CUR_PTR - q);
1287 NEXT;
1288 }
1289 } else if (CUR == '\'') {
1290 NEXT;
1291 q = CUR_PTR;
1292 while ((IS_LETTER(CUR)) && (CUR != '\''))
1293 NEXT;
1294 if (!IS_LETTER(CUR)) {
1295 xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1296 } else {
1297 ret = xmlStrndup(q, CUR_PTR - q);
1298 NEXT;
1299 }
1300 } else {
1301 xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1302 }
1303
1304 return(ret);
1305}
1306
1307/*
1308 * xmlParseCharData: parse a CharData section.
1309 * if we are within a CDATA section ']]>' marks an end of section.
1310 *
1311 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1312 */
1313
1314void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1315 const CHAR *q;
1316
1317 q = CUR_PTR;
1318 while ((IS_CHAR(CUR)) && (CUR != '<') &&
1319 (CUR != '&')) {
1320 NEXT;
1321 if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1322 (NXT(2) == '>')) break;
1323 }
1324 if (q == CUR_PTR) return;
1325
1326 /*
1327 * Ok the segment [q CUR_PTR] is to be consumed as chars.
1328 */
1329 if (ctxt->sax != NULL) {
1330 if (areBlanks(ctxt, q, CUR_PTR - q))
1331 ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1332 else
1333 ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1334 }
1335}
1336
1337/*
1338 * xmlParseExternalID: Parse an External ID
1339 *
1340 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1341 * | 'PUBLIC' S PubidLiteral S SystemLiteral
1342 */
1343
1344CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1345 CHAR *URI = NULL;
1346
1347 if ((CUR == 'S') && (NXT(1) == 'Y') &&
1348 (NXT(2) == 'S') && (NXT(3) == 'T') &&
1349 (NXT(4) == 'E') && (NXT(5) == 'M')) {
1350 SKIP(6);
1351 SKIP_BLANKS;
1352 URI = xmlParseSystemLiteral(ctxt);
1353 if (URI == NULL)
1354 xmlParserError(ctxt,
1355 "xmlParseExternalID: SYSTEM, no URI\n");
1356 } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1357 (NXT(2) == 'B') && (NXT(3) == 'L') &&
1358 (NXT(4) == 'I') && (NXT(5) == 'C')) {
1359 SKIP(6);
1360 SKIP_BLANKS;
1361 *publicID = xmlParsePubidLiteral(ctxt);
1362 if (*publicID == NULL)
1363 xmlParserError(ctxt,
1364 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1365 SKIP_BLANKS;
1366 URI = xmlParseSystemLiteral(ctxt);
1367 if (URI == NULL)
1368 xmlParserError(ctxt,
1369 "xmlParseExternalID: PUBLIC, no URI\n");
1370 }
1371 return(URI);
1372}
1373
1374/*
1375 * Skip an XML (SGML) comment <!-- .... -->
1376 * This may or may not create a node (depending on the context)
1377 * The spec says that "For compatibility, the string "--" (double-hyphen)
1378 * must not occur within comments. "
1379 *
1380 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1381 */
1382xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1383 xmlNodePtr ret = NULL;
1384 const CHAR *q, *start;
1385 const CHAR *r;
1386 CHAR *val;
1387
1388 /*
1389 * Check that there is a comment right here.
1390 */
1391 if ((CUR != '<') || (NXT(1) != '!') ||
1392 (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1393
1394 SKIP(4);
1395 start = q = CUR_PTR;
1396 NEXT;
1397 r = CUR_PTR;
1398 NEXT;
1399 while (IS_CHAR(CUR) &&
1400 ((CUR == ':') || (CUR != '>') ||
1401 (*r != '-') || (*q != '-'))) {
1402 if ((*r == '-') && (*q == '-'))
1403 xmlParserError(ctxt,
1404 "Comment must not contain '--' (double-hyphen)`\n");
1405 NEXT;r++;q++;
1406 }
1407 if (!IS_CHAR(CUR)) {
1408 xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1409 } else {
1410 NEXT;
1411 if (create) {
1412 val = xmlStrndup(start, q - start);
1413 ret = xmlNewComment(val);
1414 free(val);
1415 }
1416 }
1417 return(ret);
1418}
1419
1420/*
1421 * xmlParsePITarget: parse the name of a PI
1422 *
1423 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1424 */
1425
1426CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1427 CHAR *name;
1428
1429 name = xmlParseName(ctxt);
1430 if ((name != NULL) && (name[3] == 0) &&
1431 ((name[0] == 'x') || (name[0] == 'X')) &&
1432 ((name[1] == 'm') || (name[1] == 'M')) &&
1433 ((name[2] == 'l') || (name[2] == 'L'))) {
1434 xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1435 return(NULL);
1436 }
1437 return(name);
1438}
1439
1440/*
1441 * xmlParsePI: parse an XML Processing Instruction.
1442 *
1443 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1444 */
1445
1446void xmlParsePI(xmlParserCtxtPtr ctxt) {
1447 CHAR *target;
1448
1449 if ((CUR == '<') && (NXT(1) == '?')) {
1450 /*
1451 * this is a Processing Instruction.
1452 */
1453 SKIP(2);
1454
1455 /*
1456 * Parse the target name and check for special support like
1457 * namespace.
1458 *
1459 * TODO : PI handling should be dynamically redefinable using an
1460 * API. Only namespace should be in the code IMHO ...
1461 */
1462 target = xmlParsePITarget(ctxt);
1463 if (target != NULL) {
1464 /*
1465 * Support for the old Processing Instruction related to namespace.
1466 */
1467 if ((target[0] == 'n') && (target[1] == 'a') &&
1468 (target[2] == 'm') && (target[3] == 'e') &&
1469 (target[4] == 's') && (target[5] == 'p') &&
1470 (target[6] == 'a') && (target[7] == 'c') &&
1471 (target[8] == 'e')) {
1472 xmlParseNamespace(ctxt);
1473 } else if ((target[0] == 'x') && (target[1] == 'm') &&
1474 (target[2] == 'l') && (target[3] == ':') &&
1475 (target[4] == 'n') && (target[5] == 'a') &&
1476 (target[6] == 'm') && (target[7] == 'e') &&
1477 (target[8] == 's') && (target[9] == 'p') &&
1478 (target[10] == 'a') && (target[11] == 'c') &&
1479 (target[12] == 'e')) {
1480 xmlParseNamespace(ctxt);
1481 } else {
1482 const CHAR *q = CUR_PTR;
1483
1484 while (IS_CHAR(CUR) &&
1485 ((CUR != '?') || (NXT(1) != '>')))
1486 NEXT;
1487 if (!IS_CHAR(CUR)) {
1488 xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1489 target);
1490 } else {
1491 CHAR *data;
1492
1493 data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1494 SKIP(2);
1495
1496 /*
1497 * SAX: PI detected.
1498 */
1499 if (ctxt->sax)
1500 ctxt->sax->processingInstruction(ctxt, target, data);
1501 /*
1502 * Unknown PI, ignore it !
1503 */
1504 else
1505 xmlParserWarning(ctxt,
1506 "xmlParsePI : skipping unknown PI %s\n",
1507 target);
1508 free(data);
1509 }
1510 }
1511 free(target);
1512 } else {
1513 xmlParserError(ctxt, "xmlParsePI : no target name\n");
1514 /********* Should we try to complete parsing the PI ???
1515 while (IS_CHAR(CUR) &&
1516 (CUR != '?') && (CUR != '>'))
1517 NEXT;
1518 if (!IS_CHAR(CUR)) {
1519 fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1520 target);
1521 }
1522 ********************************************************/
1523 }
1524 }
1525}
1526
1527/*
1528 * xmlParseNotationDecl: parse a notation declaration
1529 *
1530 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1531 *
1532 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1533 *
1534 * NOTE: Actually [75] and [83] interract badly since [75] can generate
1535 * 'PUBLIC' S PubidLiteral S SystemLiteral
1536 *
1537 * Hence there is actually 3 choices:
1538 * 'PUBLIC' S PubidLiteral
1539 * 'PUBLIC' S PubidLiteral S SystemLiteral
1540 * and 'SYSTEM' S SystemLiteral
1541 */
1542
1543void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1544 CHAR *name;
1545
1546 if ((CUR == '<') && (NXT(1) == '!') &&
1547 (NXT(2) == 'N') && (NXT(3) == 'O') &&
1548 (NXT(4) == 'T') && (NXT(5) == 'A') &&
1549 (NXT(6) == 'T') && (NXT(7) == 'I') &&
1550 (NXT(8) == 'O') && (NXT(9) == 'N') &&
1551 (IS_BLANK(NXT(10)))) {
1552 SKIP(10);
1553 SKIP_BLANKS;
1554
1555 name = xmlParseName(ctxt);
1556 if (name == NULL) {
1557 xmlParserError(ctxt,
1558 "xmlParseAttributeListDecl: no name for Element\n");
1559 return;
1560 }
1561 SKIP_BLANKS;
1562 /*
1563 * TODO !!!
1564 */
1565 while ((IS_CHAR(CUR)) && (CUR != '>'))
1566 NEXT;
1567 free(name);
1568 }
1569}
1570
1571/*
1572 * xmlParseEntityDecl: parse <!ENTITY declarations
1573 *
1574 * [70] EntityDecl ::= GEDecl | PEDecl
1575 *
1576 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1577 *
1578 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1579 *
1580 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1581 *
1582 * [74] PEDef ::= EntityValue | ExternalID
1583 *
1584 * [76] NDataDecl ::= S 'NDATA' S Name
1585 */
1586
1587void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1588 CHAR *name = NULL;
1589 CHAR *value = NULL;
1590 CHAR *URI = NULL, *literal = NULL;
1591 CHAR *ndata = NULL;
1592 int isParameter = 0;
1593
1594 if ((CUR == '<') && (NXT(1) == '!') &&
1595 (NXT(2) == 'E') && (NXT(3) == 'N') &&
1596 (NXT(4) == 'T') && (NXT(5) == 'I') &&
1597 (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1598 (IS_BLANK(NXT(8)))) {
1599 SKIP(8);
1600 SKIP_BLANKS;
1601
1602 if (CUR == '%') {
1603 NEXT;
1604 SKIP_BLANKS;
1605 isParameter = 1;
1606 }
1607
1608 name = xmlParseName(ctxt);
1609 if (name == NULL) {
1610 xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1611 return;
1612 }
1613 SKIP_BLANKS;
1614
1615 /*
1616 * TODO handle the various case of definitions...
1617 */
1618 if (isParameter) {
1619 if ((CUR == '"') || (CUR == '\''))
1620 value = xmlParseEntityValue(ctxt);
1621 if (value) {
1622 xmlAddDocEntity(ctxt->doc, name,
1623 XML_INTERNAL_PARAMETER_ENTITY,
1624 NULL, NULL, value);
1625 }
1626 else {
1627 URI = xmlParseExternalID(ctxt, &literal);
1628 if (URI) {
1629 xmlAddDocEntity(ctxt->doc, name,
1630 XML_EXTERNAL_PARAMETER_ENTITY,
1631 literal, URI, NULL);
1632 }
1633 }
1634 } else {
1635 if ((CUR == '"') || (CUR == '\'')) {
1636 value = xmlParseEntityValue(ctxt);
1637 xmlAddDocEntity(ctxt->doc, name,
1638 XML_INTERNAL_GENERAL_ENTITY,
1639 NULL, NULL, value);
1640 } else {
1641 URI = xmlParseExternalID(ctxt, &literal);
1642 SKIP_BLANKS;
1643 if ((CUR == 'N') && (NXT(1) == 'D') &&
1644 (NXT(2) == 'A') && (NXT(3) == 'T') &&
1645 (NXT(4) == 'A')) {
1646 SKIP(5);
1647 SKIP_BLANKS;
1648 ndata = xmlParseName(ctxt);
1649 xmlAddDocEntity(ctxt->doc, name,
1650 XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1651 literal, URI, ndata);
1652 } else {
1653 xmlAddDocEntity(ctxt->doc, name,
1654 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1655 literal, URI, NULL);
1656 }
1657 }
1658 }
1659 SKIP_BLANKS;
1660 if (CUR != '>') {
1661 xmlParserError(ctxt,
1662 "xmlParseEntityDecl: entity %s not terminated\n", name);
1663 } else
1664 NEXT;
1665 if (name != NULL) free(name);
1666 if (value != NULL) free(value);
1667 if (URI != NULL) free(URI);
1668 if (literal != NULL) free(literal);
1669 if (ndata != NULL) free(ndata);
1670 }
1671}
1672
1673/*
1674 * xmlParseEnumeratedType: parse and Enumerated attribute type.
1675 *
1676 * [57] EnumeratedType ::= NotationType | Enumeration
1677 *
1678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1679 *
1680 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1681 */
1682
1683void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1684 /*
1685 * TODO !!!
1686 */
1687 while ((IS_CHAR(CUR)) && (CUR != '>'))
1688 NEXT;
1689}
1690
1691/*
1692 * xmlParseAttributeType: parse the Attribute list def for an element
1693 *
1694 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1695 *
1696 * [55] StringType ::= 'CDATA'
1697 *
1698 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1699 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1700 */
1701void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1702 /* TODO !!! */
1703 if ((CUR == 'C') && (NXT(1) == 'D') &&
1704 (NXT(2) == 'A') && (NXT(3) == 'T') &&
1705 (NXT(4) == 'A')) {
1706 SKIP(5);
1707 } else if ((CUR == 'I') && (NXT(1) == 'D')) {
1708 SKIP(2);
1709 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1710 (NXT(2) == 'R') && (NXT(3) == 'E') &&
1711 (NXT(4) == 'F')) {
1712 SKIP(5);
1713 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1714 (NXT(2) == 'R') && (NXT(3) == 'E') &&
1715 (NXT(4) == 'F') && (NXT(5) == 'S')) {
1716 SKIP(6);
1717 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1718 (NXT(2) == 'T') && (NXT(3) == 'I') &&
1719 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
1720 SKIP(6);
1721 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1722 (NXT(2) == 'T') && (NXT(3) == 'I') &&
1723 (NXT(4) == 'T') && (NXT(5) == 'I') &&
1724 (NXT(6) == 'E') && (NXT(7) == 'S')) {
1725 SKIP(8);
1726 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1727 (NXT(2) == 'T') && (NXT(3) == 'O') &&
1728 (NXT(4) == 'K') && (NXT(5) == 'E') &&
1729 (NXT(6) == 'N')) {
1730 SKIP(7);
1731 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1732 (NXT(2) == 'T') && (NXT(3) == 'O') &&
1733 (NXT(4) == 'K') && (NXT(5) == 'E') &&
1734 (NXT(6) == 'N') && (NXT(7) == 'S')) {
1735 } else {
1736 xmlParseEnumeratedType(ctxt, name);
1737 }
1738}
1739
1740/*
1741 * xmlParseAttributeListDecl: parse the Attribute list def for an element
1742 *
1743 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1744 *
1745 * [53] AttDef ::= S Name S AttType S DefaultDecl
1746 */
1747void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1748 CHAR *name;
1749
1750 /* TODO !!! */
1751 if ((CUR == '<') && (NXT(1) == '!') &&
1752 (NXT(2) == 'A') && (NXT(3) == 'T') &&
1753 (NXT(4) == 'T') && (NXT(5) == 'L') &&
1754 (NXT(6) == 'I') && (NXT(7) == 'S') &&
1755 (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1756 SKIP(9);
1757 SKIP_BLANKS;
1758 name = xmlParseName(ctxt);
1759 if (name == NULL) {
1760 xmlParserError(ctxt,
1761 "xmlParseAttributeListDecl: no name for Element\n");
1762 return;
1763 }
1764 SKIP_BLANKS;
1765 while (CUR != '>') {
1766 const CHAR *check = CUR_PTR;
1767
1768 xmlParseAttributeType(ctxt, name);
1769 SKIP_BLANKS;
1770 if (check == CUR_PTR) {
1771 xmlParserError(ctxt,
1772 "xmlParseAttributeListDecl: detected error\n");
1773 break;
1774 }
1775 }
1776 if (CUR == '>')
1777 NEXT;
1778
1779 free(name);
1780 }
1781}
1782
1783/*
1784 * xmlParseElementContentDecl: parse the declaration for an Element content
1785 * either Mixed or Children, the cases EMPTY and ANY being handled
1786 * int xmlParseElementDecl.
1787 *
1788 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1789 *
1790 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1791 *
1792 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1793 *
1794 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1795 *
1796 * or
1797 *
1798 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1799 * '(' S? '#PCDATA' S? ')'
1800 */
1801
1802void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1803 /*
1804 * TODO This has to be parsed correctly, currently we just skip until
1805 * we reach the first '>'.
1806 * !!!
1807 */
1808 while ((IS_CHAR(CUR)) && (CUR != '>'))
1809 NEXT;
1810}
1811
1812/*
1813 * xmlParseElementDecl: parse an Element declaration.
1814 *
1815 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1816 *
1817 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1818 *
1819 * TODO There is a check [ VC: Unique Element Type Declaration ]
1820 */
1821void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1822 CHAR *name;
1823
1824 if ((CUR == '<') && (NXT(1) == '!') &&
1825 (NXT(2) == 'E') && (NXT(3) == 'L') &&
1826 (NXT(4) == 'E') && (NXT(5) == 'M') &&
1827 (NXT(6) == 'E') && (NXT(7) == 'N') &&
1828 (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1829 SKIP(9);
1830 SKIP_BLANKS;
1831 name = xmlParseName(ctxt);
1832 if (name == NULL) {
1833 xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1834 return;
1835 }
1836 SKIP_BLANKS;
1837 if ((CUR == 'E') && (NXT(1) == 'M') &&
1838 (NXT(2) == 'P') && (NXT(3) == 'T') &&
1839 (NXT(4) == 'Y')) {
1840 SKIP(5);
1841 /*
1842 * Element must always be empty.
1843 */
1844 } else if ((CUR == 'A') && (NXT(1) == 'N') &&
1845 (NXT(2) == 'Y')) {
1846 SKIP(3);
1847 /*
1848 * Element is a generic container.
1849 */
1850 } else {
1851 xmlParseElementContentDecl(ctxt, name);
1852 }
1853 SKIP_BLANKS;
1854 if (CUR != '>') {
1855 xmlParserError(ctxt,
1856 "xmlParseElementDecl: expected '>' at the end\n");
1857 } else
1858 NEXT;
1859 }
1860}
1861
1862/*
1863 * xmlParseMarkupDecl: parse Markup declarations
1864 *
1865 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1866 * NotationDecl | PI | Comment
1867 *
1868 * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1869 */
1870void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1871 xmlParseElementDecl(ctxt);
1872 xmlParseAttributeListDecl(ctxt);
1873 xmlParseEntityDecl(ctxt);
1874 xmlParseNotationDecl(ctxt);
1875 xmlParsePI(ctxt);
1876 xmlParseComment(ctxt, 0);
1877}
1878
1879/*
1880 * xmlParseCharRef: parse Reference declarations
1881 *
1882 * [66] CharRef ::= '&#' [0-9]+ ';' |
1883 * '&#x' [0-9a-fA-F]+ ';'
1884 */
1885CHAR *xmlParseCharRef(xmlParserCtxtPtr ctxt, int inLine) {
1886 int val = 0;
1887 CHAR buf[2];
1888
1889 if ((CUR == '&') && (NXT(1) == '#') &&
1890 (NXT(2) == 'x')) {
1891 SKIP(3);
1892 while (CUR != ';') {
1893 if ((CUR >= '0') && (CUR <= '9'))
1894 val = val * 16 + (CUR - '0');
1895 else if ((CUR >= 'a') && (CUR <= 'f'))
1896 val = val * 16 + (CUR - 'a') + 10;
1897 else if ((CUR >= 'A') && (CUR <= 'F'))
1898 val = val * 16 + (CUR - 'A') + 10;
1899 else {
1900 xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1901 val = 0;
1902 break;
1903 }
Daniel Veillard845664d1998-08-13 04:43:19 +00001904 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001905 }
1906 if (CUR != ';')
1907 NEXT;
1908 } else if ((CUR == '&') && (NXT(1) == '#')) {
1909 SKIP(2);
1910 while (CUR != ';') {
1911 if ((CUR >= '0') && (CUR <= '9'))
1912 val = val * 16 + (CUR - '0');
1913 else {
1914 xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1915 val = 0;
1916 break;
1917 }
Daniel Veillard845664d1998-08-13 04:43:19 +00001918 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001919 }
1920 if (CUR != ';')
1921 NEXT;
1922 } else {
1923 xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1924 }
1925 /*
1926 * Check the value IS_CHAR ...
1927 */
1928 if (IS_CHAR(val)) {
1929 buf[0] = (CHAR) val;
1930 buf[1] = 0;
1931 if (inLine)
1932 return(xmlStrndup(buf, 1));
1933 else if (ctxt->sax != NULL)
1934 ctxt->sax->characters(ctxt, buf, 0, 1);
1935 } else {
1936 xmlParserError(ctxt, "xmlParseCharRef: invalid value");
1937 }
1938 return(NULL);
1939}
1940
1941/*
1942 * xmlParseEntityRef: parse ENTITY references declarations
1943 *
1944 * [68] EntityRef ::= '&' Name ';'
1945 */
1946CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt, int inLine) {
1947 CHAR *ret = NULL;
1948 CHAR *name;
1949 xmlEntityPtr entity;
1950
1951 if (CUR == '&') {
1952 NEXT;
1953 name = xmlParseName(ctxt);
1954 if (name == NULL) {
1955 xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1956 } else {
1957 if (CUR == ';') {
1958 NEXT;
1959 entity = xmlGetDocEntity(ctxt->doc, name);
1960 if (entity == NULL) {
1961 /* TODO !!! Create a reference ! */
1962 xmlParserWarning(ctxt,
1963 "xmlParseEntityRef: &%s; not found\n", name);
1964 }
1965 /*
1966 * If we can get the content, push the entity content
1967 * as the next input stream.
1968 */
1969 else {
1970 switch (entity->type) {
1971 case XML_INTERNAL_PARAMETER_ENTITY:
1972 case XML_EXTERNAL_PARAMETER_ENTITY:
1973 xmlParserError(ctxt,
1974 "internal: xmlGetDtdEntity returned a general entity\n");
1975 break;
1976 case XML_INTERNAL_GENERAL_ENTITY:
1977 if (inLine)
1978 ret = entity->content;
1979 else
1980 xmlHandleEntity(ctxt, entity);
1981 break;
1982 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1983 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1984 xmlParserWarning(ctxt,
1985 "xmlParseEntityRef: external entity &%s; not supported\n",
1986 name);
1987 break;
1988 default:
1989 xmlParserError(ctxt,
1990 "internal: xmlParseEntityRef: unknown entity type %d\n",
1991 entity->type);
1992 }
1993 }
1994 } else {
1995 char cst[2] = { '&', 0 };
1996
1997 xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1998 ret = xmlStrndup(cst, 1);
1999 ret = xmlStrcat(ret, name);
2000 }
2001 free(name);
2002 }
2003 }
2004 return(ret);
2005}
2006
2007/*
2008 * xmlParseReference: parse Reference declarations
2009 *
2010 * [67] Reference ::= EntityRef | CharRef
2011 */
2012CHAR *xmlParseReference(xmlParserCtxtPtr ctxt, int inLine) {
2013 if ((CUR == '&') && (NXT(1) == '#')) {
2014 return(xmlParseCharRef(ctxt, inLine));
2015 } else if (CUR == '&') {
2016 return(xmlParseEntityRef(ctxt, inLine));
2017 }
2018 return(NULL);
2019}
2020
2021/*
2022 * xmlParsePEReference: parse PEReference declarations
2023 *
2024 * [69] PEReference ::= '%' Name ';'
2025 */
2026CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt, int inLine) {
2027 CHAR *ret = NULL;
2028 CHAR *name;
2029 xmlEntityPtr entity;
2030
2031 if (CUR == '%') {
2032 NEXT;
2033 name = xmlParseName(ctxt);
2034 if (name == NULL) {
2035 xmlParserError(ctxt, "xmlParsePEReference: no name\n");
2036 } else {
2037 if (CUR == ';') {
2038 NEXT;
2039 entity = xmlGetDtdEntity(ctxt->doc, name);
2040 if (entity == NULL) {
2041 xmlParserWarning(ctxt,
2042 "xmlParsePEReference: %%%s; not found\n");
2043 }
2044 /*
2045 * If we can get the content, push the entity content
2046 * as the next input stream.
2047 */
2048 else {
2049 switch (entity->type) {
2050 case XML_INTERNAL_PARAMETER_ENTITY:
2051 if (inLine)
2052 ret = entity->content;
2053 else
2054 xmlNewEntityInputStream(ctxt, entity);
2055 break;
2056 case XML_EXTERNAL_PARAMETER_ENTITY:
2057 xmlParserWarning(ctxt,
2058 "xmlParsePEReference: external entity %%%s; not supported\n");
2059 break;
2060 case XML_INTERNAL_GENERAL_ENTITY:
2061 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2062 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2063 xmlParserError(ctxt,
2064 "internal: xmlGetDtdEntity returned a general entity\n");
2065 break;
2066 default:
2067 xmlParserError(ctxt,
2068 "internal: xmlParsePEReference: unknown entity type %d\n",
2069 entity->type);
2070 }
2071 }
2072 } else {
2073 char cst[2] = { '&', 0 };
2074
2075 xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
2076 ret = xmlStrndup(cst, 1);
2077 ret = xmlStrcat(ret, name);
2078 }
2079 free(name);
2080 }
2081 }
2082 return(ret);
2083}
2084
2085/*
2086 * xmlParseDocTypeDecl : parse a DOCTYPE declaration
2087 *
2088 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2089 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
2090 */
2091
2092void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
2093 xmlDtdPtr dtd;
2094 CHAR *name;
2095 CHAR *ExternalID = NULL;
2096 CHAR *URI = NULL;
2097
2098 /*
2099 * We know that '<!DOCTYPE' has been detected.
2100 */
2101 SKIP(9);
2102
2103 SKIP_BLANKS;
2104
2105 /*
2106 * Parse the DOCTYPE name.
2107 */
2108 name = xmlParseName(ctxt);
2109 if (name == NULL) {
2110 xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
2111 }
2112
2113 SKIP_BLANKS;
2114
2115 /*
2116 * Check for SystemID and ExternalID
2117 */
2118 URI = xmlParseExternalID(ctxt, &ExternalID);
2119 SKIP_BLANKS;
2120
2121 dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
2122
2123 /*
2124 * Is there any DTD definition ?
2125 */
2126 if (CUR == '[') {
2127 NEXT;
2128 /*
2129 * Parse the succession of Markup declarations and
2130 * PEReferences.
2131 * Subsequence (markupdecl | PEReference | S)*
2132 */
2133 while (CUR != ']') {
2134 const CHAR *check = CUR_PTR;
2135
2136 SKIP_BLANKS;
2137 xmlParseMarkupDecl(ctxt);
2138 xmlParsePEReference(ctxt, 0);
2139
2140 if (CUR_PTR == check) {
2141 xmlParserError(ctxt,
2142 "xmlParseDocTypeDecl: error detected in Markup declaration\n");
2143 break;
2144 }
2145 }
2146 if (CUR == ']') NEXT;
2147 }
2148
2149 /*
2150 * We should be at the end of the DOCTYPE declaration.
2151 */
2152 if (CUR != '>') {
2153 xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
2154 /* We shouldn't try to resynchronize ... */
2155 }
2156 NEXT;
2157
2158 /*
2159 * Cleanup, since we don't use all those identifiers
2160 * TODO : the DOCTYPE if available should be stored !
2161 */
2162 if (URI != NULL) free(URI);
2163 if (ExternalID != NULL) free(ExternalID);
2164 if (name != NULL) free(name);
2165}
2166
2167/*
2168 * xmlParseAttribute: parse a start of tag.
2169 *
2170 * [41] Attribute ::= Name Eq AttValue
2171 *
2172 * [25] Eq ::= S? '=' S?
2173 *
2174 * With namespace:
2175 *
2176 * [NS 11] Attribute ::= QName Eq AttValue
2177 *
2178 * Also the case QName == xmlns:??? is handled independently as a namespace
2179 * definition.
2180 */
2181
2182void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
2183 CHAR *name, *value = NULL;
2184 CHAR *ns;
2185
2186 name = xmlNamespaceParseQName(ctxt, &ns);
2187 if (name == NULL) {
2188 xmlParserError(ctxt, "error parsing attribute name\n");
2189 return;
2190 }
2191
2192 /*
2193 * read the value
2194 */
2195 SKIP_BLANKS;
2196 if (CUR == '=') {
2197 NEXT;
2198 SKIP_BLANKS;
2199 value = xmlParseAttValue(ctxt);
2200 } else {
2201 xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
2202 name);
2203 }
2204
2205 /*
2206 * Check whether it's a namespace definition
2207 */
2208 if ((ns == NULL) &&
2209 (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
2210 (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
2211 /* a default namespace definition */
2212 xmlNewNs(node, value, NULL);
2213 if (name != NULL)
2214 free(name);
2215 if (value != NULL)
2216 free(value);
2217 return;
2218 }
2219 if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
2220 (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
2221 /* a standard namespace definition */
2222 xmlNewNs(node, value, name);
2223 if (name != NULL)
2224 free(name);
2225 if (value != NULL)
2226 free(value);
2227 return;
2228 }
2229
2230 /*
2231 * Add the attribute to the node.
2232 */
2233 if (name != NULL) {
2234 xmlNewProp(node, name, value);
2235 free(name);
2236 }
2237 if (value != NULL)
2238 free(value);
2239}
2240
2241/*
2242 * xmlParseStartTag: parse a start of tag either for rule element or
2243 * EmptyElement. In both case we don't parse the tag closing chars.
2244 *
2245 * [40] STag ::= '<' Name (S Attribute)* S? '>'
2246 *
2247 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
2248 *
2249 * With namespace:
2250 *
2251 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
2252 *
2253 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
2254 */
2255
2256xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
2257 CHAR *namespace, *name;
2258 xmlNsPtr ns = NULL;
2259 xmlNodePtr ret = NULL;
2260
2261 if (CUR != '<') return(NULL);
2262 NEXT;
2263
2264 name = xmlNamespaceParseQName(ctxt, &namespace);
2265
2266 /*
2267 * Note : the namespace resolution is deferred until the end of the
2268 * attributes parsing, since local namespace can be defined as
2269 * an attribute at this level.
2270 */
2271 ret = xmlNewNode(ns, name, NULL);
2272
2273 /*
2274 * Now parse the attributes, it ends up with the ending
2275 *
2276 * (S Attribute)* S?
2277 */
2278 SKIP_BLANKS;
2279 while ((IS_CHAR(CUR)) &&
2280 (CUR != '>') &&
2281 ((CUR != '/') || (NXT(1) != '>'))) {
2282 const CHAR *q = CUR_PTR;
2283
2284 xmlParseAttribute(ctxt, ret);
2285 SKIP_BLANKS;
2286
2287 if (q == CUR_PTR) {
2288 xmlParserError(ctxt,
2289 "xmlParseStartTag: problem parsing attributes\n");
2290 break;
2291 }
2292 }
2293
2294 /*
2295 * Search the namespace
2296 */
2297 ns = xmlSearchNs(ctxt->doc, ret, namespace);
2298 if (ns == NULL) /* ret still doesn't have a parent yet ! */
2299 ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2300 xmlSetNs(ret, ns);
2301 if (namespace != NULL)
2302 free(namespace);
2303
2304 /*
2305 * We are parsing a new node.
2306 */
2307 nodePush(ctxt, ret);
2308
2309 /*
2310 * SAX: Start of Element !
2311 */
2312 if (ctxt->sax != NULL)
2313 ctxt->sax->startElement(ctxt, name);
2314
2315 return(ret);
2316}
2317
2318/*
2319 * xmlParseEndTag: parse an end of tag
2320 *
2321 * [42] ETag ::= '</' Name S? '>'
2322 *
2323 * With namespace
2324 *
2325 * [9] ETag ::= '</' QName S? '>'
2326 */
2327
2328void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
2329 CHAR *namespace, *name;
2330 xmlNsPtr ns = NULL;
2331
2332 *nsPtr = NULL;
2333 *tagPtr = NULL;
2334
2335 if ((CUR != '<') || (NXT(1) != '/')) {
2336 xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
2337 return;
2338 }
2339 SKIP(2);
2340
2341 name = xmlNamespaceParseQName(ctxt, &namespace);
2342
2343 /*
2344 * Search the namespace
2345 */
2346 ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2347 if (namespace != NULL)
2348 free(namespace);
2349
2350 *nsPtr = ns;
2351 *tagPtr = name;
2352
2353 /*
2354 * We should definitely be at the ending "S? '>'" part
2355 */
2356 SKIP_BLANKS;
2357 if ((!IS_CHAR(CUR)) || (CUR != '>')) {
2358 xmlParserError(ctxt, "End tag : expected '>'\n");
2359 } else
2360 NEXT;
2361
2362 return;
2363}
2364
2365/*
2366 * xmlParseCDSect: escaped pure raw content.
2367 *
2368 * [18] CDSect ::= CDStart CData CDEnd
2369 *
2370 * [19] CDStart ::= '<![CDATA['
2371 *
2372 * [20] Data ::= (Char* - (Char* ']]>' Char*))
2373 *
2374 * [21] CDEnd ::= ']]>'
2375 */
2376void xmlParseCDSect(xmlParserCtxtPtr ctxt) {
2377 const CHAR *r, *s, *base;
2378
2379 if ((CUR == '<') && (NXT(1) == '!') &&
2380 (NXT(2) == '[') && (NXT(3) == 'C') &&
2381 (NXT(4) == 'D') && (NXT(5) == 'A') &&
2382 (NXT(6) == 'T') && (NXT(7) == 'A') &&
2383 (NXT(8) == '[')) {
2384 SKIP(9);
2385 } else
2386 return;
2387 base = CUR_PTR;
2388 if (!IS_CHAR(CUR)) {
2389 xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
2390 return;
2391 }
2392 r = NEXT;
2393 if (!IS_CHAR(CUR)) {
2394 xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
2395 return;
2396 }
2397 s = NEXT;
2398 while (IS_CHAR(CUR) &&
2399 ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2400 r++;s++;NEXT;
2401 }
2402 if (!IS_CHAR(CUR)) {
2403 xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
2404 return;
2405 }
2406
2407 /*
2408 * Ok the segment [base CUR_PTR] is to be consumed as chars.
2409 */
2410 if (ctxt->sax != NULL) {
2411 if (areBlanks(ctxt, base, CUR_PTR - base))
2412 ctxt->sax->ignorableWhitespace(ctxt, base, 0, CUR_PTR - base);
2413 else
2414 ctxt->sax->characters(ctxt, base, 0, CUR_PTR - base);
2415 }
2416}
2417
2418/*
2419 * xmlParseContent: a content is
2420 * (element | PCData | Reference | CDSect | PI | Comment)
2421 *
2422 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
2423 */
2424
2425void xmlParseContent(xmlParserCtxtPtr ctxt) {
2426 xmlNodePtr ret = NULL;
2427
2428 while ((CUR != '<') || (NXT(1) != '/')) {
2429 const CHAR *test = CUR_PTR;
2430 ret = NULL;
2431
2432 /*
2433 * First case : a Processing Instruction.
2434 */
2435 if ((CUR == '<') && (NXT(1) == '?')) {
2436 xmlParsePI(ctxt);
2437 }
2438 /*
2439 * Second case : a CDSection
2440 */
2441 else if ((CUR == '<') && (NXT(1) == '!') &&
2442 (NXT(2) == '[') && (NXT(3) == 'C') &&
2443 (NXT(4) == 'D') && (NXT(5) == 'A') &&
2444 (NXT(6) == 'T') && (NXT(7) == 'A') &&
2445 (NXT(8) == '[')) {
2446 xmlParseCDSect(ctxt);
2447 }
2448 /*
2449 * Third case : a comment
2450 */
2451 else if ((CUR == '<') && (NXT(1) == '!') &&
2452 (NXT(2) == '-') && (NXT(3) == '-')) {
2453 ret = xmlParseComment(ctxt, 1);
2454 }
2455 /*
2456 * Fourth case : a sub-element.
2457 */
2458 else if (CUR == '<') {
2459 ret = xmlParseElement(ctxt);
2460 }
2461 /*
2462 * Fifth case : a reference.
2463 */
2464 else if (CUR == '&') {
2465 xmlParseReference(ctxt, 0);
2466 }
2467 /*
2468 * Last case, text. Note that References are handled directly.
2469 */
2470 else {
2471 xmlParseCharData(ctxt, 0);
2472 }
2473
2474 /*
2475 * Pop-up of finished entities.
2476 */
2477 while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
2478
2479 if (test == CUR_PTR) {
2480 xmlParserError(ctxt, "detected an error in element content\n");
2481 break;
2482 }
2483 }
2484}
2485
2486/*
2487 * xmlParseElement: parse an XML element
2488 *
2489 * [39] element ::= EmptyElemTag | STag content ETag
2490 *
2491 * [41] Attribute ::= Name Eq AttValue
2492 */
2493
2494
2495xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
2496 xmlNodePtr ret;
2497 const CHAR *openTag = CUR_PTR;
2498 xmlParserNodeInfo node_info;
2499 CHAR *endTag;
2500 xmlNsPtr endNs;
2501
2502 /* Capture start position */
2503 node_info.begin_pos = CUR_PTR - ctxt->input->base;
2504 node_info.begin_line = ctxt->input->line;
2505
2506 ret = xmlParseStartTag(ctxt);
2507 if (ret == NULL) {
2508 return(NULL);
2509 }
2510
2511 /*
2512 * Check for an Empty Element.
2513 */
2514 if ((CUR == '/') && (NXT(1) == '>')) {
2515 SKIP(2);
2516 if (ctxt->sax != NULL)
2517 ctxt->sax->endElement(ctxt, ret->name);
2518
2519 /*
2520 * end of parsing of this node.
2521 */
2522 nodePop(ctxt);
2523
2524 return(ret);
2525 }
2526 if (CUR == '>') NEXT;
2527 else {
2528 xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
2529
2530 /*
2531 * end of parsing of this node.
2532 */
2533 nodePop(ctxt);
2534
2535 return(NULL);
2536 }
2537
2538 /*
2539 * Parse the content of the element:
2540 */
2541 xmlParseContent(ctxt);
2542 if (!IS_CHAR(CUR)) {
2543 xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2544 openTag);
2545
2546 /*
2547 * end of parsing of this node.
2548 */
2549 nodePop(ctxt);
2550
2551 return(NULL);
2552 }
2553
2554 /*
2555 * parse the end of tag: '</' should be here.
2556 */
2557 xmlParseEndTag(ctxt, &endNs, &endTag);
2558
2559 /*
2560 * Check that the Name in the ETag is the same as in the STag.
2561 */
2562 if (endNs != ret->ns) {
2563 xmlParserError(ctxt,
2564 "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
2565 openTag, endTag);
2566 }
2567 if (endTag == NULL ) {
2568 xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
2569 } else if (xmlStrcmp(ret->name, endTag)) {
2570 xmlParserError(ctxt,
2571 "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2572 openTag, endTag);
2573 }
2574 /*
2575 * SAX: End of Tag
2576 */
2577 else if (ctxt->sax != NULL)
2578 ctxt->sax->endElement(ctxt, endTag);
2579
2580 if (endTag != NULL)
2581 free(endTag);
2582
2583 /* Capture end position and add node */
2584 if ( ret != NULL && ctxt->record_info ) {
2585 node_info.end_pos = CUR_PTR - ctxt->input->base;
2586 node_info.end_line = ctxt->input->line;
2587 node_info.node = ret;
2588 xmlParserAddNodeInfo(ctxt, &node_info);
2589 }
2590
2591 /*
2592 * end of parsing of this node.
2593 */
2594 nodePop(ctxt);
2595
2596 return(ret);
2597}
2598
2599/*
2600 * xmlParseVersionNum: parse the XML version value.
2601 *
2602 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2603 */
2604CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
2605 const CHAR *q = CUR_PTR;
2606 CHAR *ret;
2607
2608 while (IS_CHAR(CUR) &&
2609 (((CUR >= 'a') && (CUR <= 'z')) ||
2610 ((CUR >= 'A') && (CUR <= 'Z')) ||
2611 ((CUR >= '0') && (CUR <= '9')) ||
2612 (CUR == '_') || (CUR == '.') ||
2613 (CUR == ':') || (CUR == '-'))) NEXT;
2614 ret = xmlStrndup(q, CUR_PTR - q);
2615 return(ret);
2616}
2617
2618/*
2619 * xmlParseVersionInfo: parse the XML version.
2620 *
2621 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2622 *
2623 * [25] Eq ::= S? '=' S?
2624 */
2625
2626CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2627 CHAR *version = NULL;
2628 const CHAR *q;
2629
2630 if ((CUR == 'v') && (NXT(1) == 'e') &&
2631 (NXT(2) == 'r') && (NXT(3) == 's') &&
2632 (NXT(4) == 'i') && (NXT(5) == 'o') &&
2633 (NXT(6) == 'n')) {
2634 SKIP(7);
2635 SKIP_BLANKS;
2636 if (CUR != '=') {
2637 xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2638 return(NULL);
2639 }
2640 NEXT;
2641 SKIP_BLANKS;
2642 if (CUR == '"') {
2643 NEXT;
2644 q = CUR_PTR;
2645 version = xmlParseVersionNum(ctxt);
2646 if (CUR != '"')
2647 xmlParserError(ctxt, "String not closed\n%.50s\n", q);
2648 else
2649 NEXT;
2650 } else if (CUR == '\''){
2651 NEXT;
2652 q = CUR_PTR;
2653 version = xmlParseVersionNum(ctxt);
2654 if (CUR != '\'')
2655 xmlParserError(ctxt, "String not closed\n%.50s\n", q);
2656 else
2657 NEXT;
2658 } else {
2659 xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
2660 }
2661 }
2662 return(version);
2663}
2664
2665/*
2666 * xmlParseEncName: parse the XML encoding name
2667 *
2668 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2669 */
2670CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
2671 const CHAR *q = CUR_PTR;
2672 CHAR *ret = NULL;
2673
2674 if (((CUR >= 'a') && (CUR <= 'z')) ||
2675 ((CUR >= 'A') && (CUR <= 'Z'))) {
2676 NEXT;
2677 while (IS_CHAR(CUR) &&
2678 (((CUR >= 'a') && (CUR <= 'z')) ||
2679 ((CUR >= 'A') && (CUR <= 'Z')) ||
2680 ((CUR >= '0') && (CUR <= '9')) ||
2681 (CUR == '-'))) NEXT;
2682 ret = xmlStrndup(q, CUR_PTR - q);
2683 } else {
2684 xmlParserError(ctxt, "Invalid XML encoding name\n");
2685 }
2686 return(ret);
2687}
2688
2689/*
2690 * xmlParseEncodingDecl: parse the XML encoding declaration
2691 *
2692 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2693 */
2694
2695CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2696 CHAR *encoding = NULL;
2697 const CHAR *q;
2698
2699 SKIP_BLANKS;
2700 if ((CUR == 'e') && (NXT(1) == 'n') &&
2701 (NXT(2) == 'c') && (NXT(3) == 'o') &&
2702 (NXT(4) == 'd') && (NXT(5) == 'i') &&
2703 (NXT(6) == 'n') && (NXT(7) == 'g')) {
2704 SKIP(8);
2705 SKIP_BLANKS;
2706 if (CUR != '=') {
2707 xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
2708 return(NULL);
2709 }
2710 NEXT;
2711 SKIP_BLANKS;
2712 if (CUR == '"') {
2713 NEXT;
2714 q = CUR_PTR;
2715 encoding = xmlParseEncName(ctxt);
2716 if (CUR != '"')
2717 xmlParserError(ctxt, "String not closed\n%.50s\n", q);
2718 else
2719 NEXT;
2720 } else if (CUR == '\''){
2721 NEXT;
2722 q = CUR_PTR;
2723 encoding = xmlParseEncName(ctxt);
2724 if (CUR != '\'')
2725 xmlParserError(ctxt, "String not closed\n%.50s\n", q);
2726 else
2727 NEXT;
2728 } else if (CUR == '"'){
2729 xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
2730 }
2731 }
2732 return(encoding);
2733}
2734
2735/*
2736 * xmlParseSDDecl: parse the XML standalone declaration
2737 *
2738 * [32] SDDecl ::= S 'standalone' Eq
2739 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2740 */
2741
2742int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2743 int standalone = -1;
2744
2745 SKIP_BLANKS;
2746 if ((CUR == 's') && (NXT(1) == 't') &&
2747 (NXT(2) == 'a') && (NXT(3) == 'n') &&
2748 (NXT(4) == 'd') && (NXT(5) == 'a') &&
2749 (NXT(6) == 'l') && (NXT(7) == 'o') &&
2750 (NXT(8) == 'n') && (NXT(9) == 'e')) {
2751 SKIP(10);
2752 if (CUR != '=') {
2753 xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
2754 return(standalone);
2755 }
2756 NEXT;
2757 SKIP_BLANKS;
2758 if (CUR == '\''){
2759 NEXT;
2760 if ((CUR == 'n') && (NXT(1) == 'o')) {
2761 standalone = 0;
2762 SKIP(2);
2763 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2764 (NXT(2) == 's')) {
2765 standalone = 1;
2766 SKIP(3);
2767 } else {
2768 xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
2769 }
2770 if (CUR != '\'')
2771 xmlParserError(ctxt, "String not closed\n");
2772 else
2773 NEXT;
2774 } else if (CUR == '"'){
2775 NEXT;
2776 if ((CUR == 'n') && (NXT(1) == 'o')) {
2777 standalone = 0;
2778 SKIP(2);
2779 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2780 (NXT(2) == 's')) {
2781 standalone = 1;
2782 SKIP(3);
2783 } else {
2784 xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
2785 }
2786 if (CUR != '"')
2787 xmlParserError(ctxt, "String not closed\n");
2788 else
2789 NEXT;
2790 } else {
2791 xmlParserError(ctxt, "Standalone value not found\n");
2792 }
2793 }
2794 return(standalone);
2795}
2796
2797/*
2798 * xmlParseXMLDecl: parse an XML declaration header
2799 *
2800 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
2801 */
2802
2803void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
2804 CHAR *version;
2805
2806 /*
2807 * We know that '<?xml' is here.
2808 */
2809 SKIP(5);
2810
2811 SKIP_BLANKS;
2812
2813 /*
2814 * We should have the VersionInfo here.
2815 */
2816 version = xmlParseVersionInfo(ctxt);
2817 if (version == NULL)
2818 version = xmlCharStrdup(XML_DEFAULT_VERSION);
2819 ctxt->doc = xmlNewDoc(version);
2820 free(version);
2821
2822 /*
2823 * We may have the encoding declaration
2824 */
2825 ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
2826
2827 /*
2828 * We may have the standalone status.
2829 */
2830 ctxt->doc->standalone = xmlParseSDDecl(ctxt);
2831
2832 SKIP_BLANKS;
2833 if ((CUR == '?') && (NXT(1) == '>')) {
2834 SKIP(2);
2835 } else if (CUR == '>') {
2836 /* Deprecated old WD ... */
2837 xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
2838 NEXT;
2839 } else {
2840 xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
2841 MOVETO_ENDTAG(CUR_PTR);
2842 NEXT;
2843 }
2844}
2845
2846/*
2847 * xmlParseMisc: parse an XML Misc* optionnal field.
2848 * Misc*
2849 *
2850 * [27] Misc ::= Comment | PI | S
2851 */
2852
2853void xmlParseMisc(xmlParserCtxtPtr ctxt) {
2854 while (((CUR == '<') && (NXT(1) == '?')) ||
2855 ((CUR == '<') && (NXT(1) == '!') &&
2856 (NXT(2) == '-') && (NXT(3) == '-')) ||
2857 IS_BLANK(CUR)) {
2858 if ((CUR == '<') && (NXT(1) == '?')) {
2859 xmlParsePI(ctxt);
2860 } else if (IS_BLANK(CUR)) {
2861 NEXT;
2862 } else
2863 xmlParseComment(ctxt, 0);
2864 }
2865}
2866
2867/*
2868 * xmlParseDocument : parse an XML document and build a tree.
2869 *
2870 * [1] document ::= prolog element Misc*
2871 *
2872 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
2873 */
2874
2875int xmlParseDocument(xmlParserCtxtPtr ctxt) {
2876 xmlDefaultSAXHandlerInit();
2877
2878 /*
2879 * SAX: beginning of the document processing.
2880 */
2881 if (ctxt->sax)
2882 ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
2883 if (ctxt->sax)
2884 ctxt->sax->startDocument(ctxt);
2885
2886 /*
2887 * We should check for encoding here and plug-in some
2888 * conversion code TODO !!!!
2889 */
2890
2891 /*
2892 * Wipe out everything which is before the first '<'
2893 */
2894 SKIP_BLANKS;
2895
2896 /*
2897 * Check for the XMLDecl in the Prolog.
2898 */
2899 if ((CUR == '<') && (NXT(1) == '?') &&
2900 (NXT(2) == 'x') && (NXT(3) == 'm') &&
2901 (NXT(4) == 'l')) {
2902 xmlParseXMLDecl(ctxt);
2903 /* SKIP_EOL(cur); */
2904 SKIP_BLANKS;
2905 } else if ((CUR == '<') && (NXT(1) == '?') &&
2906 (NXT(2) == 'X') && (NXT(3) == 'M') &&
2907 (NXT(4) == 'L')) {
2908 /*
2909 * The first drafts were using <?XML and the final W3C REC
2910 * now use <?xml ...
2911 */
2912 xmlParseXMLDecl(ctxt);
2913 /* SKIP_EOL(cur); */
2914 SKIP_BLANKS;
2915 } else {
2916 CHAR *version;
2917
2918 version = xmlCharStrdup(XML_DEFAULT_VERSION);
2919 ctxt->doc = xmlNewDoc(version);
2920 free(version);
2921 }
2922
2923 /*
2924 * The Misc part of the Prolog
2925 */
2926 xmlParseMisc(ctxt);
2927
2928 /*
2929 * Then possibly doc type declaration(s) and more Misc
2930 * (doctypedecl Misc*)?
2931 */
2932 if ((CUR == '<') && (NXT(1) == '!') &&
2933 (NXT(2) == 'D') && (NXT(3) == 'O') &&
2934 (NXT(4) == 'C') && (NXT(5) == 'T') &&
2935 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
2936 (NXT(8) == 'E')) {
2937 xmlParseDocTypeDecl(ctxt);
2938 xmlParseMisc(ctxt);
2939 }
2940
2941 /*
2942 * Time to start parsing the tree itself
2943 */
2944 ctxt->doc->root = xmlParseElement(ctxt);
2945
2946 /*
2947 * The Misc part at the end
2948 */
2949 xmlParseMisc(ctxt);
2950
2951 /*
2952 * SAX: end of the document processing.
2953 */
2954 if (ctxt->sax)
2955 ctxt->sax->endDocument(ctxt);
2956 return(0);
2957}
2958
2959/*
2960 * xmlParseDoc : parse an XML in-memory document and build a tree.
2961 */
2962
2963xmlDocPtr xmlParseDoc(CHAR *cur) {
2964 xmlDocPtr ret;
2965 xmlParserCtxtPtr ctxt;
2966 xmlParserInputPtr input;
2967
2968 if (cur == NULL) return(NULL);
2969
2970 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2971 if (ctxt == NULL) {
2972 perror("malloc");
2973 return(NULL);
2974 }
2975 xmlInitParserCtxt(ctxt);
2976 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2977 if (input == NULL) {
2978 perror("malloc");
2979 free(ctxt);
2980 return(NULL);
2981 }
2982
2983 input->filename = NULL;
2984 input->line = 1;
2985 input->col = 1;
2986 input->base = cur;
2987 input->cur = cur;
2988
2989 inputPush(ctxt, input);
2990
2991
2992 xmlParseDocument(ctxt);
2993 ret = ctxt->doc;
2994 free(ctxt);
2995
2996 return(ret);
2997}
2998
2999/*
3000 * xmlParseFile : parse an XML file and build a tree.
3001 */
3002
3003xmlDocPtr xmlParseFile(const char *filename) {
3004 xmlDocPtr ret;
3005#ifdef HAVE_ZLIB_H
3006 gzFile input;
3007#else
3008 int input;
3009#endif
3010 int res;
3011 struct stat buf;
3012 char *buffer;
3013 xmlParserCtxtPtr ctxt;
3014 xmlParserInputPtr inputStream;
3015
3016 res = stat(filename, &buf);
3017 if (res < 0) return(NULL);
3018
3019#ifdef HAVE_ZLIB_H
3020retry_bigger:
3021 buffer = malloc((buf.st_size * 20) + 100);
3022#else
3023 buffer = malloc(buf.st_size + 100);
3024#endif
3025 if (buffer == NULL) {
3026 perror("malloc");
3027 return(NULL);
3028 }
3029
3030 memset(buffer, 0, sizeof(buffer));
3031#ifdef HAVE_ZLIB_H
3032 input = gzopen (filename, "r");
3033 if (input == NULL) {
3034 fprintf (stderr, "Cannot read file %s :\n", filename);
3035 perror ("gzopen failed");
3036 return(NULL);
3037 }
3038#else
3039 input = open (filename, O_RDONLY);
3040 if (input < 0) {
3041 fprintf (stderr, "Cannot read file %s :\n", filename);
3042 perror ("open failed");
3043 return(NULL);
3044 }
3045#endif
3046#ifdef HAVE_ZLIB_H
3047 res = gzread(input, buffer, 20 * buf.st_size);
3048#else
3049 res = read(input, buffer, buf.st_size);
3050#endif
3051 if (res < 0) {
3052 fprintf (stderr, "Cannot read file %s :\n", filename);
3053#ifdef HAVE_ZLIB_H
3054 perror ("gzread failed");
3055#else
3056 perror ("read failed");
3057#endif
3058 return(NULL);
3059 }
3060#ifdef HAVE_ZLIB_H
3061 gzclose(input);
Daniel Veillard70120ff1998-09-22 00:24:21 +00003062 if (res >= 20 * buf.st_size + 20) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003063 free(buffer);
3064 buf.st_size *= 2;
3065 goto retry_bigger;
3066 }
3067 buf.st_size = res;
3068#else
3069 close(input);
3070#endif
3071
3072 buffer[buf.st_size] = '\0';
3073
3074 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3075 if (ctxt == NULL) {
3076 perror("malloc");
3077 return(NULL);
3078 }
3079 xmlInitParserCtxt(ctxt);
3080 inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3081 if (inputStream == NULL) {
3082 perror("malloc");
3083 free(ctxt);
3084 return(NULL);
3085 }
3086
3087 inputStream->filename = strdup(filename);
3088 inputStream->line = 1;
3089 inputStream->col = 1;
3090
3091 /*
3092 * TODO : plug some encoding conversion routines here. !!!
3093 */
3094 inputStream->base = buffer;
3095 inputStream->cur = buffer;
3096
3097 inputPush(ctxt, inputStream);
3098
3099 xmlParseDocument(ctxt);
3100
3101 ret = ctxt->doc;
3102 free(buffer);
3103 free(ctxt);
3104
3105 return(ret);
3106}
3107
3108
3109/*
3110 * xmlParseMemory : parse an XML memory block and build a tree.
3111 */
3112xmlDocPtr xmlParseMemory(char *buffer, int size) {
3113 xmlDocPtr ret;
3114 xmlParserCtxtPtr ctxt;
3115 xmlParserInputPtr input;
3116
3117 buffer[size - 1] = '\0';
3118
3119 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3120 if (ctxt == NULL) {
3121 perror("malloc");
3122 return(NULL);
3123 }
3124 xmlInitParserCtxt(ctxt);
3125 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3126 if (input == NULL) {
3127 perror("malloc");
3128 free(ctxt);
3129 return(NULL);
3130 }
3131
3132 input->filename = NULL;
3133 input->line = 1;
3134 input->col = 1;
3135
3136 /*
3137 * TODO : plug some encoding conversion routines here. !!!
3138 */
3139 input->base = buffer;
3140 input->cur = buffer;
3141
3142 inputPush(ctxt, input);
3143
3144 xmlParseDocument(ctxt);
3145
3146 ret = ctxt->doc;
3147 free(ctxt);
3148
3149 return(ret);
3150}
3151
3152
3153/* Initialize parser context */
3154void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
3155{
3156 /* Allocate the Input stack */
3157 ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
3158 ctxt->inputNr = 0;
3159 ctxt->inputMax = 5;
3160 ctxt->input = NULL;
3161
3162 /* Allocate the Node stack */
3163 ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
3164 ctxt->nodeNr = 0;
3165 ctxt->nodeMax = 10;
3166 ctxt->node = NULL;
3167
3168 ctxt->sax = &xmlDefaultSAXHandler;
3169 ctxt->doc = NULL;
3170 ctxt->record_info = 0;
3171 xmlInitNodeInfoSeq(&ctxt->node_seq);
3172}
3173
3174
3175/*
3176 * Clear (release owned resources) and reinitialize context
3177 */
3178void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
3179{
3180 xmlClearNodeInfoSeq(&ctxt->node_seq);
3181 xmlInitParserCtxt(ctxt);
3182}
3183
3184
3185/*
3186 * Setup the parser context to parse a new buffer; Clears any prior
3187 * contents from the parser context. The buffer parameter must not be
3188 * NULL, but the filename parameter can be
3189 */
3190void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
3191 const char* filename)
3192{
3193 xmlParserInputPtr input;
3194
3195 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3196 if (input == NULL) {
3197 perror("malloc");
3198 free(ctxt);
3199 exit(1);
3200 }
3201
3202 xmlClearParserCtxt(ctxt);
3203 if (input->filename != NULL)
3204 input->filename = strdup(filename);
3205 else
3206 input->filename = NULL;
3207 input->line = 1;
3208 input->col = 1;
3209 input->base = buffer;
3210 input->cur = buffer;
3211
3212 inputPush(ctxt, input);
3213}
3214
3215
3216/*
3217 * xmlParserFindNodeInfo : Find the parser node info struct for a given node
3218 */
3219const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
3220 const xmlNode* node)
3221{
3222 unsigned long pos;
3223
3224 /* Find position where node should be at */
3225 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3226 if ( ctx->node_seq.buffer[pos].node == node )
3227 return &ctx->node_seq.buffer[pos];
3228 else
3229 return NULL;
3230}
3231
3232
3233/*
3234 * xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
3235 */
3236void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3237{
3238 seq->length = 0;
3239 seq->maximum = 0;
3240 seq->buffer = NULL;
3241}
3242
3243/*
3244 * xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
3245 * info sequence
3246 */
3247void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3248{
3249 if ( seq->buffer != NULL )
3250 free(seq->buffer);
3251 xmlInitNodeInfoSeq(seq);
3252}
3253
3254
3255/*
3256 * xmlParserFindNodeInfoIndex : Find the index that the info record for
3257 * the given node is or should be at in a sorted sequence
3258 */
3259unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
3260 const xmlNode* node)
3261{
3262 unsigned long upper, lower, middle;
3263 int found = 0;
3264
3265 /* Do a binary search for the key */
3266 lower = 1;
3267 upper = seq->length;
3268 middle = 0;
3269 while ( lower <= upper && !found) {
3270 middle = lower + (upper - lower) / 2;
3271 if ( node == seq->buffer[middle - 1].node )
3272 found = 1;
3273 else if ( node < seq->buffer[middle - 1].node )
3274 upper = middle - 1;
3275 else
3276 lower = middle + 1;
3277 }
3278
3279 /* Return position */
3280 if ( middle == 0 || seq->buffer[middle - 1].node < node )
3281 return middle;
3282 else
3283 return middle - 1;
3284}
3285
3286
3287/*
3288 * xmlParserAddNodeInfo : Insert node info record into sorted sequence
3289 */
3290void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
3291 const xmlParserNodeInfo* info)
3292{
3293 unsigned long pos;
3294 static unsigned int block_size = 5;
3295
3296 /* Find pos and check to see if node is already in the sequence */
3297 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
3298 if ( pos < ctx->node_seq.length
3299 && ctx->node_seq.buffer[pos].node == info->node ) {
3300 ctx->node_seq.buffer[pos] = *info;
3301 }
3302
3303 /* Otherwise, we need to add new node to buffer */
3304 else {
3305 /* Expand buffer by 5 if needed */
3306 if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
3307 xmlParserNodeInfo* tmp_buffer;
3308 unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
3309 *(ctx->node_seq.maximum + block_size));
3310
3311 if ( ctx->node_seq.buffer == NULL )
3312 tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3313 else
3314 tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
3315
3316 if ( tmp_buffer == NULL ) {
3317 xmlParserError(ctx, "Out of memory");
3318 return;
3319 }
3320 ctx->node_seq.buffer = tmp_buffer;
3321 ctx->node_seq.maximum += block_size;
3322 }
3323
3324 /* If position is not at end, move elements out of the way */
3325 if ( pos != ctx->node_seq.length ) {
3326 unsigned long i;
3327
3328 for ( i = ctx->node_seq.length; i > pos; i-- )
3329 ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
3330 }
3331
3332 /* Copy element and increase length */
3333 ctx->node_seq.buffer[pos] = *info;
3334 ctx->node_seq.length++;
3335 }
3336}