Daniel Veillard | 01791d5 | 1998-07-24 19:24:09 +0000 | [diff] [blame] | 1 | /* |
| 2 | * parser.c : an XML 1.0 non-verifying parser |
| 3 | * |
| 4 | * See Copyright for the status of this software. |
| 5 | * |
| 6 | * $Id$ |
| 7 | */ |
| 8 | |
| 9 | #include <config.h> |
| 10 | #include <stdio.h> |
| 11 | #include <ctype.h> |
| 12 | #include <string.h> /* for memset() only */ |
| 13 | #include <malloc.h> |
| 14 | #include <sys/stat.h> |
| 15 | #ifdef HAVE_FCNTL_H |
| 16 | #include <fcntl.h> |
| 17 | #endif |
| 18 | #ifdef HAVE_UNISTD_H |
| 19 | #include <unistd.h> |
| 20 | #endif |
| 21 | #ifdef HAVE_ZLIB_H |
| 22 | #include <zlib.h> |
| 23 | #endif |
| 24 | |
| 25 | #include "xml_tree.h" |
| 26 | #include "xml_parser.h" |
| 27 | #include "xml_entities.h" |
| 28 | |
| 29 | /* |
| 30 | * A few macros needed to help building the parser. |
| 31 | */ |
| 32 | |
| 33 | #ifdef UNICODE |
| 34 | /* |
| 35 | * UNICODE version of the macros. Incomplete now TODO !!!! |
| 36 | */ |
| 37 | #define IS_CHAR(c) \ |
| 38 | (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ |
| 39 | (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) |
| 40 | |
| 41 | #define SKIP_BLANKS(p) \ |
| 42 | while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \ |
| 43 | (*(p) == 0x3000)) (p)++; |
| 44 | |
| 45 | /* I'm too lazy to complete this one TODO !!!! */ |
| 46 | #define IS_BASECHAR(c) \ |
| 47 | ((((c) >= 0x41) && ((c) <= 0x5a)) || \ |
| 48 | (((c) >= 0x61) && ((c) <= 0x7a)) || \ |
| 49 | (((c) >= 0xaa) && ((c) <= 0x5b)) || \ |
| 50 | (((c) >= 0xc0) && ((c) <= 0xd6)) || \ |
| 51 | (((c) >= 0xd8) && ((c) <= 0xf6)) || \ |
| 52 | (((c) >= 0xf8) && ((c) <= 0xff)) || \ |
| 53 | ((c) == 0xba)) |
| 54 | |
| 55 | /* I'm too lazy to complete this one TODO !!!! */ |
| 56 | #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39)) |
| 57 | |
| 58 | /* I'm too lazy to complete this one TODO !!!! */ |
| 59 | #define IS_COMBINING(c) 0 |
| 60 | |
| 61 | #define IS_IGNORABLE(c) \ |
| 62 | ((((c) >= 0x200c) && ((c) <= 0x200f)) || \ |
| 63 | (((c) >= 0x202a) && ((c) <= 0x202e)) || \ |
| 64 | (((c) >= 0x206a) && ((c) <= 0x206f)) || \ |
| 65 | ((c) == 0xfeff)) |
| 66 | |
| 67 | #define IS_EXTENDER(c) \ |
| 68 | (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \ |
| 69 | ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \ |
| 70 | ((c) == 0xec6) || ((c) == 0x3005) \ |
| 71 | (((c) >= 0x3031) && ((c) <= 0x3035)) || \ |
| 72 | (((c) >= 0x309b) && ((c) <= 0x309e)) || \ |
| 73 | (((c) >= 0x30fc) && ((c) <= 0x30fe)) || \ |
| 74 | (((c) >= 0xff70) && ((c) <= 0xff9e)) || \ |
| 75 | ((c) == 0xff9f)) |
| 76 | |
| 77 | #define IS_IDEOGRAPHIC(c) \ |
| 78 | ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \ |
| 79 | (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \ |
| 80 | (((c) >= 0x3021) && ((c) <= 0x3029)) || \ |
| 81 | ((c) == 0x3007)) |
| 82 | |
| 83 | #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) |
| 84 | |
| 85 | /* I'm too lazy to complete this one ! */ |
| 86 | #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa)) |
| 87 | #else |
| 88 | /* |
| 89 | * 8bits / ASCII version of the macros. |
| 90 | */ |
| 91 | #define IS_CHAR(c) \ |
| 92 | (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20)) |
| 93 | |
| 94 | #define IS_BASECHAR(c) \ |
| 95 | ((((c) >= 0x41) && ((c) <= 0x5a)) || \ |
| 96 | (((c) >= 0x61) && ((c) <= 0x7a)) || \ |
| 97 | (((c) >= 0xaa) && ((c) <= 0x5b)) || \ |
| 98 | (((c) >= 0xc0) && ((c) <= 0xd6)) || \ |
| 99 | (((c) >= 0xd8) && ((c) <= 0xf6)) || \ |
| 100 | (((c) >= 0xf8) && ((c) <= 0xff)) || \ |
| 101 | ((c) == 0xba)) |
| 102 | |
| 103 | #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39)) |
| 104 | |
| 105 | #define IS_LETTER(c) IS_BASECHAR(c) |
| 106 | |
| 107 | #define IS_COMBINING(c) 0 |
| 108 | |
| 109 | #define IS_IGNORABLE(c) 0 |
| 110 | |
| 111 | #define IS_EXTENDER(c) ((c) == 0xb7) |
| 112 | |
| 113 | #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa)) |
| 114 | #endif |
| 115 | |
| 116 | |
| 117 | #define SKIP_EOL(p) \ |
| 118 | if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ |
| 119 | if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } |
| 120 | |
| 121 | #define SKIP_BLANKS(p) \ |
| 122 | while (IS_BLANK(*(p))) (p)++; |
| 123 | |
| 124 | #define MOVETO_ENDTAG(p) \ |
| 125 | while (IS_CHAR(*p) && (*(p) != '>')) (p)++; |
| 126 | |
| 127 | #define MOVETO_STARTTAG(p) \ |
| 128 | while (IS_CHAR(*p) && (*(p) != '<')) (p)++; |
| 129 | |
| 130 | /* |
| 131 | * Forward definition for recusive behaviour. |
| 132 | */ |
| 133 | xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt); |
| 134 | |
| 135 | /* |
| 136 | * xmlHandleData : this routine represent's the specific application |
| 137 | * behaviour when reading a piece of text. |
| 138 | * |
| 139 | * For example in WebDav, any piece made only of blanks is eliminated |
| 140 | */ |
| 141 | |
| 142 | CHAR *xmlHandleData(CHAR *in) { |
| 143 | CHAR *cur; |
| 144 | |
| 145 | if (in == NULL) return(NULL); |
| 146 | cur = in; |
| 147 | while (IS_CHAR(*cur)) { |
| 148 | if (!IS_BLANK(*cur)) goto not_blank; |
| 149 | cur++; |
| 150 | } |
| 151 | free(in); |
| 152 | return(NULL); |
| 153 | |
| 154 | not_blank: |
| 155 | return(in); |
| 156 | } |
| 157 | |
| 158 | /* |
| 159 | * xmlStrndup : a strdup for array of CHAR's |
| 160 | */ |
| 161 | |
| 162 | CHAR *xmlStrndup(const CHAR *cur, int len) { |
| 163 | CHAR *ret = malloc((len + 1) * sizeof(CHAR)); |
| 164 | |
| 165 | if (ret == NULL) { |
| 166 | fprintf(stderr, "malloc of %d byte failed\n", |
| 167 | (len + 1) * sizeof(CHAR)); |
| 168 | return(NULL); |
| 169 | } |
| 170 | memcpy(ret, cur, len * sizeof(CHAR)); |
| 171 | ret[len] = 0; |
| 172 | return(ret); |
| 173 | } |
| 174 | |
| 175 | /* |
| 176 | * xmlStrdup : a strdup for CHAR's |
| 177 | */ |
| 178 | |
| 179 | CHAR *xmlStrdup(const CHAR *cur) { |
| 180 | const CHAR *p = cur; |
| 181 | |
| 182 | while (IS_CHAR(*p)) p++; |
| 183 | return(xmlStrndup(cur, p - cur)); |
| 184 | } |
| 185 | |
| 186 | /* |
| 187 | * xmlStrcmp : a strcmp for CHAR's |
| 188 | */ |
| 189 | |
| 190 | int xmlStrcmp(const CHAR *str1, const CHAR *str2) { |
| 191 | register int tmp; |
| 192 | |
| 193 | do { |
| 194 | tmp = *str1++ - *str2++; |
| 195 | if (tmp != 0) return(tmp); |
| 196 | } while ((*str1 != 0) && (*str2 != 0)); |
| 197 | return (*str1 - *str2); |
| 198 | } |
| 199 | |
| 200 | /* |
| 201 | * xmlStrncmp : a strncmp for CHAR's |
| 202 | */ |
| 203 | |
| 204 | int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) { |
| 205 | register int tmp; |
| 206 | |
| 207 | if (len <= 0) return(0); |
| 208 | do { |
| 209 | tmp = *str1++ - *str2++; |
| 210 | if (tmp != 0) return(tmp); |
| 211 | len--; |
| 212 | if (len <= 0) return(0); |
| 213 | } while ((*str1 != 0) && (*str2 != 0)); |
| 214 | return (*str1 - *str2); |
| 215 | } |
| 216 | |
| 217 | /* |
| 218 | * xmlStrchr : a strchr for CHAR's |
| 219 | */ |
| 220 | |
| 221 | CHAR *xmlStrchr(const CHAR *str, CHAR val) { |
| 222 | while (*str != 0) { |
| 223 | if (*str == val) return((CHAR *) str); |
| 224 | str++; |
| 225 | } |
| 226 | return(NULL); |
| 227 | } |
| 228 | |
| 229 | /* |
| 230 | * xmlParseName : parse an XML name. |
| 231 | */ |
| 232 | |
| 233 | CHAR *xmlParseName(xmlParserCtxtPtr ctxt) { |
| 234 | const CHAR *q; |
| 235 | CHAR *ret = NULL; |
| 236 | |
| 237 | /* |
| 238 | * Name ::= (Letter | '_') (NameChar)* |
| 239 | */ |
| 240 | if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL); |
| 241 | q = ctxt->cur++; |
| 242 | while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) || |
| 243 | (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || (ctxt->cur[0] == '_') || |
| 244 | (ctxt->cur[0] == ':') || |
| 245 | (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) || |
| 246 | (IS_EXTENDER(ctxt->cur[0]))) |
| 247 | ctxt->cur++; |
| 248 | |
| 249 | ret = xmlStrndup(q, ctxt->cur - q); |
| 250 | |
| 251 | return(ret); |
| 252 | } |
| 253 | |
| 254 | /* |
| 255 | * Parse and return a string between quotes or doublequotes |
| 256 | */ |
| 257 | CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) { |
| 258 | CHAR *ret = NULL; |
| 259 | const CHAR *q; |
| 260 | |
| 261 | if (ctxt->cur[0] == '"') { |
| 262 | ctxt->cur++; |
| 263 | q = ctxt->cur; |
| 264 | while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++; |
| 265 | if (ctxt->cur[0] != '"') |
| 266 | fprintf(stderr, "String not closed \"%.50s\n", q); |
| 267 | else { |
| 268 | ret = xmlStrndup(q, ctxt->cur - q); |
| 269 | ctxt->cur++; |
| 270 | } |
| 271 | } else if (ctxt->cur[0] == '\''){ |
| 272 | ctxt->cur++; |
| 273 | q = ctxt->cur; |
| 274 | while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++; |
| 275 | if (ctxt->cur[0] != '\'') |
| 276 | fprintf(stderr, "String not closed '%.50s\n", q); |
| 277 | else { |
| 278 | ret = xmlStrndup(q, ctxt->cur - q); |
| 279 | ctxt->cur++; |
| 280 | } |
| 281 | } |
| 282 | return(ret); |
| 283 | } |
| 284 | |
| 285 | /* |
| 286 | * Skip an XML (SGML) comment <!-- .... --> |
| 287 | * |
| 288 | * TODO !!!! Save the comment in the tree !!! |
| 289 | */ |
| 290 | void xmlParserSkipComment(xmlParserCtxtPtr ctxt) { |
| 291 | const CHAR *q, *start; |
| 292 | const CHAR *r; |
| 293 | |
| 294 | /* |
| 295 | * An extra check may avoid errors and isn't that costly ! |
| 296 | */ |
| 297 | if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') || |
| 298 | (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return; |
| 299 | |
| 300 | ctxt->cur += 4; |
| 301 | start = q = ctxt->cur; |
| 302 | ctxt->cur++; |
| 303 | r = ctxt->cur; |
| 304 | ctxt->cur++; |
| 305 | while (IS_CHAR(ctxt->cur[0]) && |
| 306 | ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') || |
| 307 | (*r != '-') || (*q != '-'))) { |
| 308 | ctxt->cur++;r++;q++; |
| 309 | } |
| 310 | if (!IS_CHAR(ctxt->cur[0])) { |
| 311 | fprintf(stderr, "Comment not terminated <!--%.50s\n", start); |
| 312 | ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */ |
| 313 | } else { |
| 314 | ctxt->cur++; |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | /* |
| 319 | * xmlParseNamespace: parse specific '<?namespace ...' constructs. |
| 320 | */ |
| 321 | |
| 322 | void xmlParseNamespace(xmlParserCtxtPtr ctxt) { |
| 323 | CHAR *href = NULL; |
| 324 | CHAR *AS = NULL; |
| 325 | int garbage = 0; |
| 326 | |
| 327 | /* |
| 328 | * We just skipped "namespace" or "xml:namespace" |
| 329 | */ |
| 330 | SKIP_BLANKS(ctxt->cur); |
| 331 | |
| 332 | while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) { |
| 333 | /* |
| 334 | * We can have "ns" or "prefix" attributes |
| 335 | * Old encoding as 'href' or 'AS' attributes is still supported |
| 336 | */ |
| 337 | if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) { |
| 338 | garbage = 0; |
| 339 | ctxt->cur += 2; |
| 340 | SKIP_BLANKS(ctxt->cur); |
| 341 | |
| 342 | if (ctxt->cur[0] != '=') continue; |
| 343 | ctxt->cur++; |
| 344 | SKIP_BLANKS(ctxt->cur); |
| 345 | |
| 346 | href = xmlParseQuotedString(ctxt); |
| 347 | SKIP_BLANKS(ctxt->cur); |
| 348 | } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') && |
| 349 | (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) { |
| 350 | garbage = 0; |
| 351 | ctxt->cur += 4; |
| 352 | SKIP_BLANKS(ctxt->cur); |
| 353 | |
| 354 | if (ctxt->cur[0] != '=') continue; |
| 355 | ctxt->cur++; |
| 356 | SKIP_BLANKS(ctxt->cur); |
| 357 | |
| 358 | href = xmlParseQuotedString(ctxt); |
| 359 | SKIP_BLANKS(ctxt->cur); |
| 360 | } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') && |
| 361 | (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') && |
| 362 | (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) { |
| 363 | garbage = 0; |
| 364 | ctxt->cur += 6; |
| 365 | SKIP_BLANKS(ctxt->cur); |
| 366 | |
| 367 | if (ctxt->cur[0] != '=') continue; |
| 368 | ctxt->cur++; |
| 369 | SKIP_BLANKS(ctxt->cur); |
| 370 | |
| 371 | AS = xmlParseQuotedString(ctxt); |
| 372 | SKIP_BLANKS(ctxt->cur); |
| 373 | } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) { |
| 374 | garbage = 0; |
| 375 | ctxt->cur += 2; |
| 376 | SKIP_BLANKS(ctxt->cur); |
| 377 | |
| 378 | if (ctxt->cur[0] != '=') continue; |
| 379 | ctxt->cur++; |
| 380 | SKIP_BLANKS(ctxt->cur); |
| 381 | |
| 382 | AS = xmlParseQuotedString(ctxt); |
| 383 | SKIP_BLANKS(ctxt->cur); |
| 384 | } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) { |
| 385 | garbage = 0; |
| 386 | ctxt->cur ++; |
| 387 | } else { |
| 388 | /* |
| 389 | * Found garbage when parsing the namespace |
| 390 | */ |
| 391 | if (!garbage) fprintf(stderr, |
| 392 | "\nxmlParseNamespace found garbage: "); |
| 393 | fprintf(stderr, "%c", ctxt->cur[0]); |
| 394 | ctxt->cur++; |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | MOVETO_ENDTAG(ctxt->cur); |
| 399 | ctxt->cur++; |
| 400 | |
| 401 | /* |
| 402 | * Register the DTD. |
| 403 | */ |
| 404 | if (href != NULL) |
| 405 | xmlNewDtd(ctxt->doc, href, AS); |
| 406 | |
| 407 | if (AS != NULL) free(AS); |
| 408 | if (href != NULL) free(href); |
| 409 | } |
| 410 | |
| 411 | /* |
| 412 | * xmlParsePI: parse an XML Processing Instruction. |
| 413 | */ |
| 414 | |
| 415 | void xmlParsePI(xmlParserCtxtPtr ctxt) { |
| 416 | if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) { |
| 417 | /* |
| 418 | * this is a Processing Instruction. |
| 419 | */ |
| 420 | ctxt->cur += 2; |
| 421 | |
| 422 | /* |
| 423 | * Special for WebDav, support for the Processing Instruction |
| 424 | * '<?namespace ...' contruct in the header of the XML document. |
| 425 | */ |
| 426 | if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'a') && |
| 427 | (ctxt->cur[2] == 'm') && (ctxt->cur[3] == 'e') && |
| 428 | (ctxt->cur[4] == 's') && (ctxt->cur[5] == 'p') && |
| 429 | (ctxt->cur[6] == 'a') && (ctxt->cur[7] == 'c') && |
| 430 | (ctxt->cur[8] == 'e')) { |
| 431 | ctxt->cur += 9; |
| 432 | xmlParseNamespace(ctxt); |
| 433 | } else if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') && |
| 434 | (ctxt->cur[2] == 'l') && (ctxt->cur[3] == ':') && |
| 435 | (ctxt->cur[4] == 'n') && (ctxt->cur[5] == 'a') && |
| 436 | (ctxt->cur[6] == 'm') && (ctxt->cur[7] == 'e') && |
| 437 | (ctxt->cur[8] == 's') && (ctxt->cur[9] == 'p') && |
| 438 | (ctxt->cur[10] == 'a') && (ctxt->cur[11] == 'c') && |
| 439 | (ctxt->cur[12] == 'e')) { |
| 440 | ctxt->cur += 13; |
| 441 | xmlParseNamespace(ctxt); |
| 442 | } else { |
| 443 | /* Unknown PI, ignore it ! */ |
| 444 | fprintf(stderr, "xmlParsePI : skipping unknown PI %30s\n", |
| 445 | ctxt->cur); |
| 446 | MOVETO_ENDTAG(ctxt->cur); |
| 447 | ctxt->cur++; |
| 448 | } |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | /* |
| 453 | * xmlParseAttribute: parse a start of tag. |
| 454 | * |
| 455 | * Attribute ::= Name Eq AttValue |
| 456 | */ |
| 457 | |
| 458 | void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) { |
| 459 | const CHAR *q; |
| 460 | CHAR *name, *value = NULL; |
| 461 | |
| 462 | if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) { |
| 463 | return; |
| 464 | } |
| 465 | q = ctxt->cur++; |
| 466 | while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) || |
| 467 | (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || |
| 468 | (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') || |
| 469 | (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) || |
| 470 | (IS_EXTENDER(ctxt->cur[0]))) |
| 471 | ctxt->cur++; |
| 472 | name = xmlStrndup(q, ctxt->cur - q); |
| 473 | |
| 474 | /* |
| 475 | * We should have the equal, we are laxist here and allow attributes |
| 476 | * without values and extra spaces. |
| 477 | */ |
| 478 | SKIP_BLANKS(ctxt->cur); |
| 479 | if (ctxt->cur[0] == '=') { |
| 480 | ctxt->cur++; |
| 481 | SKIP_BLANKS(ctxt->cur); |
| 482 | if ((ctxt->cur[0] != '\'') && (ctxt->cur[0] != '"')) { |
| 483 | fprintf(stderr, "Quotes were expected for attribute value %.20s\n", |
| 484 | q); |
| 485 | } else |
| 486 | value = xmlParseQuotedString(ctxt); |
| 487 | } |
| 488 | |
| 489 | /* |
| 490 | * Add the attribute to the node. |
| 491 | */ |
| 492 | if (name != NULL) { |
| 493 | xmlNewProp(node, name, value); |
| 494 | free(name); |
| 495 | } |
| 496 | if ( value != NULL ) |
| 497 | free(value); |
| 498 | } |
| 499 | |
| 500 | /* |
| 501 | * xmlParseStartTag: parse a start of tag. |
| 502 | */ |
| 503 | |
| 504 | xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) { |
| 505 | const CHAR *q; |
| 506 | CHAR *ns, *name; |
| 507 | xmlDtdPtr dtd = NULL; |
| 508 | xmlNodePtr ret = NULL; |
| 509 | |
| 510 | /* |
| 511 | * Theorically one should just parse a Name, but with the addition |
| 512 | * of the namespace needed for WebDav, it's a bit more complicated |
| 513 | * since the element name may be prefixed by a namespace prefix. |
| 514 | * |
| 515 | * QName ::= (NSPart ':')? LocalPart |
| 516 | * NSPart ::= Name |
| 517 | * LocalPart ::= Name |
| 518 | * STag ::= '<' QName (S Attribute)* S? '>' |
| 519 | * |
| 520 | * instead of : |
| 521 | * |
| 522 | * STag ::= '<' QName (S Attribute)* S? '>' |
| 523 | */ |
| 524 | if (ctxt->cur[0] != '<') return(NULL); |
| 525 | ctxt->cur++; |
| 526 | |
| 527 | if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL); |
| 528 | q = ctxt->cur++; |
| 529 | while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) || |
| 530 | (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || |
| 531 | (ctxt->cur[0] == '_') || |
| 532 | (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) || |
| 533 | (IS_EXTENDER(ctxt->cur[0]))) |
| 534 | ctxt->cur++; |
| 535 | |
| 536 | if (ctxt->cur[0] == ':') { |
| 537 | ns = xmlStrndup(q, ctxt->cur - q); |
| 538 | |
| 539 | ctxt->cur++; /* skip the column */ |
| 540 | if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) { |
| 541 | fprintf(stderr, |
| 542 | "Start tag : no element name after namespace identifier %.20s\n", |
| 543 | q); |
| 544 | free(ns); |
| 545 | return(NULL); |
| 546 | } |
| 547 | q = ctxt->cur++; |
| 548 | while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) || |
| 549 | (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || |
| 550 | (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') || |
| 551 | (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) || |
| 552 | (IS_EXTENDER(ctxt->cur[0]))) |
| 553 | ctxt->cur++; |
| 554 | name = xmlStrndup(q, ctxt->cur - q); |
| 555 | |
| 556 | /* |
| 557 | * Search the DTD associated to ns. |
| 558 | */ |
| 559 | dtd = xmlSearchDtd(ctxt->doc, ns); |
| 560 | if (dtd == NULL) |
| 561 | fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns); |
| 562 | free(ns); |
| 563 | } else |
| 564 | name = xmlStrndup(q, ctxt->cur - q); |
| 565 | |
| 566 | ret = xmlNewNode(dtd, name, NULL); |
| 567 | |
| 568 | /* |
| 569 | * Now parse the attributes, it ends up with the ending |
| 570 | * |
| 571 | * (S Attribute)* S? |
| 572 | */ |
| 573 | SKIP_BLANKS(ctxt->cur); |
| 574 | while ((IS_CHAR(ctxt->cur[0])) && |
| 575 | (ctxt->cur[0] != '>') && |
| 576 | ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) { |
| 577 | if (IS_LETTER(ctxt->cur[0]) || (ctxt->cur[0] == '_')) |
| 578 | xmlParseAttribute(ctxt, ret); |
| 579 | else { |
| 580 | /* We should warn TODO !!! */ |
| 581 | ctxt->cur++; |
| 582 | } |
| 583 | SKIP_BLANKS(ctxt->cur); |
| 584 | } |
| 585 | |
| 586 | return(ret); |
| 587 | } |
| 588 | |
| 589 | /* |
| 590 | * xmlParseEndTag: parse an end of tag, note that the '</' part has |
| 591 | * already been read. |
| 592 | */ |
| 593 | |
| 594 | void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) { |
| 595 | const CHAR *q; |
| 596 | CHAR *ns, *name; |
| 597 | xmlDtdPtr dtd = NULL; |
| 598 | |
| 599 | *dtdPtr = NULL; |
| 600 | *tagPtr = NULL; |
| 601 | |
| 602 | /* |
| 603 | * Theorically one should just parse a Name, but with the addition |
| 604 | * of the namespace needed for WebDav, it's a bit more complicated |
| 605 | * since the element name may be prefixed by a namespace prefix. |
| 606 | * |
| 607 | * QName ::= (NSPart ':')? LocalPart |
| 608 | * NSPart ::= Name |
| 609 | * LocalPart ::= Name |
| 610 | * ETag ::= '</' QName S? '>' |
| 611 | * |
| 612 | * instead of : |
| 613 | * |
| 614 | * ETag ::= '</' Name S? '>' |
| 615 | */ |
| 616 | if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return; |
| 617 | q = ctxt->cur++; |
| 618 | while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) || |
| 619 | (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || |
| 620 | (ctxt->cur[0] == '_') || |
| 621 | (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) || |
| 622 | (IS_EXTENDER(ctxt->cur[0]))) |
| 623 | ctxt->cur++; |
| 624 | |
| 625 | if (ctxt->cur[0] == ':') { |
| 626 | ns = xmlStrndup(q, ctxt->cur - q); |
| 627 | |
| 628 | ctxt->cur++; /* skip the column */ |
| 629 | if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) { |
| 630 | fprintf(stderr, |
| 631 | "End tag : no element name after namespace identifier %.20s\n", |
| 632 | q); |
| 633 | free(ns); |
| 634 | return; |
| 635 | } |
| 636 | q = ctxt->cur++; |
| 637 | while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) || |
| 638 | (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || |
| 639 | (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') || |
| 640 | (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) || |
| 641 | (IS_EXTENDER(ctxt->cur[0]))) |
| 642 | ctxt->cur++; |
| 643 | name = xmlStrndup(q, ctxt->cur - q); |
| 644 | |
| 645 | /* |
| 646 | * Search the DTD associated to ns. |
| 647 | */ |
| 648 | dtd = xmlSearchDtd(ctxt->doc, ns); |
| 649 | if (dtd == NULL) |
| 650 | fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns); |
| 651 | free(ns); |
| 652 | } else |
| 653 | name = xmlStrndup(q, ctxt->cur - q); |
| 654 | |
| 655 | *dtdPtr = dtd; |
| 656 | *tagPtr = name; |
| 657 | |
| 658 | /* |
| 659 | * We should definitely be at the ending "S? '>'" part |
| 660 | */ |
| 661 | SKIP_BLANKS(ctxt->cur); |
| 662 | if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) { |
| 663 | fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur); |
| 664 | /* |
| 665 | * Note : skipping to the next '>' is probably otherkill, |
| 666 | * especially in case the '>' is hust missing. |
| 667 | * |
| 668 | * Otherwise add: |
| 669 | * MOVETO_ENDTAG(ctxt->cur); |
| 670 | */ |
| 671 | } else |
| 672 | ctxt->cur++; |
| 673 | |
| 674 | return; |
| 675 | } |
| 676 | |
| 677 | /* |
| 678 | * xmlParseCDSect: escaped pure raw content. |
| 679 | */ |
| 680 | CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) { |
| 681 | const CHAR *r, *s, *base; |
| 682 | CHAR *ret; |
| 683 | |
| 684 | base = ctxt->cur; |
| 685 | if (!IS_CHAR(ctxt->cur[0])) { |
| 686 | fprintf(stderr, "CData section not finished : %.20s\n", base); |
| 687 | return(NULL); |
| 688 | } |
| 689 | r = ctxt->cur++; |
| 690 | if (!IS_CHAR(ctxt->cur[0])) { |
| 691 | fprintf(stderr, "CData section not finished : %.20s\n", base); |
| 692 | return(NULL); |
| 693 | } |
| 694 | s = ctxt->cur++; |
| 695 | while (IS_CHAR(ctxt->cur[0]) && |
| 696 | ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) { |
| 697 | r++;s++;ctxt->cur++; |
| 698 | } |
| 699 | if (!IS_CHAR(ctxt->cur[0])) { |
| 700 | fprintf(stderr, "CData section not finished : %.20s\n", base); |
| 701 | return(NULL); |
| 702 | } |
| 703 | ret = xmlStrndup(base, ctxt->cur-base); |
| 704 | |
| 705 | return(ret); |
| 706 | } |
| 707 | |
| 708 | /* |
| 709 | * xmlParseContent: a content is |
| 710 | * (element | PCData | Reference | CDSect | PI | Comment) |
| 711 | * |
| 712 | * element : starts by '<' |
| 713 | * PCData : any CHAR but '&' or '<' |
| 714 | * Reference : starts by '&' |
| 715 | * CDSect : starts by '<![CDATA[' |
| 716 | * PI : starts by '<?' |
| 717 | */ |
| 718 | |
| 719 | xmlNodePtr xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) { |
| 720 | const CHAR *q; |
| 721 | CHAR *data = NULL; |
| 722 | xmlNodePtr ret = NULL; |
| 723 | |
| 724 | /* |
| 725 | * First case : a Processing Instruction. |
| 726 | */ |
| 727 | if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) { |
| 728 | xmlParsePI(ctxt); |
| 729 | } |
| 730 | /* |
| 731 | * Second case : a CDSection |
| 732 | */ |
| 733 | if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') && |
| 734 | (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') && |
| 735 | (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') && |
| 736 | (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') && |
| 737 | (ctxt->cur[8] == '[')) { |
| 738 | ctxt->cur += 9; |
| 739 | data = xmlParseCDSect(ctxt); |
| 740 | } |
| 741 | /* |
| 742 | * Third case : a sub-element. |
| 743 | */ |
| 744 | else if (ctxt->cur[0] == '<') { |
| 745 | ret = xmlParseElement(ctxt); |
| 746 | } |
| 747 | /* |
| 748 | * Last case, text. Note that References are handled directly. |
| 749 | */ |
| 750 | else { |
| 751 | q = ctxt->cur; |
| 752 | while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++; |
| 753 | |
| 754 | if (!IS_CHAR(ctxt->cur[0])) { |
| 755 | fprintf(stderr, "Truncated content : %.50s\n", q); |
| 756 | return(NULL); |
| 757 | } |
| 758 | |
| 759 | /* |
| 760 | * Do the Entities decoding... |
| 761 | */ |
| 762 | data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q)); |
| 763 | } |
| 764 | |
| 765 | /* |
| 766 | * Handle the data if any. If there is no child |
| 767 | * add it as content, otherwise create a new node of type text. |
| 768 | */ |
| 769 | if (data != NULL) |
| 770 | data = xmlHandleData(data); |
| 771 | if (data != NULL) { |
| 772 | if (node->childs == NULL) |
| 773 | xmlNodeSetContent(node, data); |
| 774 | else |
| 775 | ret = xmlNewText(data); |
| 776 | free(data); |
| 777 | } |
| 778 | |
| 779 | return(ret); |
| 780 | } |
| 781 | |
| 782 | /* |
| 783 | * xmlParseElement: parse an XML element |
| 784 | */ |
| 785 | |
| 786 | xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) { |
| 787 | xmlNodePtr ret, child; |
| 788 | const CHAR *openTag = ctxt->cur; |
| 789 | const CHAR *closeTag = ctxt->cur; |
| 790 | |
| 791 | ret = xmlParseStartTag(ctxt); |
| 792 | if (ret == NULL) { |
| 793 | return(NULL); |
| 794 | } |
| 795 | |
| 796 | /* |
| 797 | * Check for an Empty Element. |
| 798 | */ |
| 799 | if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) { |
| 800 | ctxt->cur += 2; |
| 801 | return(ret); |
| 802 | } |
| 803 | if (ctxt->cur[0] == '>') ctxt->cur++; |
| 804 | else { |
| 805 | fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag); |
| 806 | return(NULL); |
| 807 | } |
| 808 | |
| 809 | /* |
| 810 | * Parse the content of the element: |
| 811 | * (element | PCData | Reference | CDSect | PI | Comment) * |
| 812 | * |
| 813 | * element : starts by '<' |
| 814 | * PCData : any CHAR but '&' or '<' |
| 815 | * Reference : starts by '&' |
| 816 | * CDSect : starts by '<![CDATA[' |
| 817 | * PI : starts by '<?' |
| 818 | * |
| 819 | * The loop stops upon detection of an end of tag '</' |
| 820 | */ |
| 821 | while ((IS_CHAR(ctxt->cur[0])) && |
| 822 | ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/'))) { |
| 823 | child = xmlParseContent(ctxt, ret); |
| 824 | if (child != NULL) |
| 825 | xmlAddChild(ret, child); |
| 826 | } |
| 827 | if (!IS_CHAR(ctxt->cur[0])) { |
| 828 | fprintf(stderr, "Premature end of data in tag %.30s\n", openTag); |
| 829 | return(NULL); |
| 830 | } |
| 831 | |
| 832 | /* |
| 833 | * parse the end of tag : '</' has been detected. |
| 834 | */ |
| 835 | ctxt->cur += 2; |
| 836 | if (ctxt->cur[0] == '>') ctxt->cur++; /* simplified closing </> */ |
| 837 | else { |
| 838 | CHAR *endTag; |
| 839 | xmlDtdPtr endDtd; |
| 840 | |
| 841 | xmlParseEndTag(ctxt, &endDtd, &endTag); |
| 842 | |
| 843 | /* |
| 844 | * Check that the Name in the ETag is the same as in the STag. |
| 845 | */ |
| 846 | if (endDtd != ret->dtd) { |
| 847 | fprintf(stderr, "Start and End tags don't use the same DTD:\n"); |
| 848 | fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag); |
| 849 | } |
| 850 | if (strcmp(ret->name, endTag)) { |
| 851 | fprintf(stderr, "Start and End tags don't use the same name:\n"); |
| 852 | fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag); |
| 853 | } |
| 854 | |
| 855 | if ( endTag != NULL ) |
| 856 | free(endTag); |
| 857 | } |
| 858 | |
| 859 | return(ret); |
| 860 | } |
| 861 | |
| 862 | /* |
| 863 | * xmlParseXMLDecl: parse an XML declaration header |
| 864 | */ |
| 865 | |
| 866 | void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { |
| 867 | CHAR *version; |
| 868 | |
| 869 | /* |
| 870 | * We know that '<?xml' is here. |
| 871 | */ |
| 872 | ctxt->cur += 5; |
| 873 | |
| 874 | /* |
| 875 | * Parse the version info |
| 876 | */ |
| 877 | SKIP_BLANKS(ctxt->cur); |
| 878 | |
| 879 | /* |
| 880 | * We should have 'version=' here ! |
| 881 | */ |
| 882 | if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') && |
| 883 | (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') && |
| 884 | (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') && |
| 885 | (ctxt->cur[6] == 'n') && (ctxt->cur[7] == '=')) { |
| 886 | ctxt->cur += 8; |
| 887 | version = xmlParseQuotedString(ctxt); |
| 888 | if (version == NULL) |
| 889 | ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION); |
| 890 | else { |
| 891 | ctxt->doc = xmlNewDoc(version); |
| 892 | free(version); |
| 893 | } |
| 894 | } else { |
| 895 | ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION); |
| 896 | } |
| 897 | |
| 898 | /* |
| 899 | * We should check for Required Markup Declaration TODO !!!! |
| 900 | */ |
| 901 | MOVETO_ENDTAG(ctxt->cur); |
| 902 | ctxt->cur++; |
| 903 | |
| 904 | } |
| 905 | |
| 906 | /* |
| 907 | * xmlParseMisc: parse an XML Misc optionnal field. |
| 908 | * (Comment | PI | S)* |
| 909 | */ |
| 910 | |
| 911 | void xmlParseMisc(xmlParserCtxtPtr ctxt) { |
| 912 | while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) || |
| 913 | ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') && |
| 914 | (ctxt->cur[2] == '-') && (ctxt->cur[2] == '-')) || |
| 915 | IS_BLANK(ctxt->cur[0])) { |
| 916 | if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) { |
| 917 | xmlParsePI(ctxt); |
| 918 | } else if (IS_BLANK(ctxt->cur[0])) { |
| 919 | ctxt->cur++; |
| 920 | } else |
| 921 | xmlParserSkipComment(ctxt); |
| 922 | } |
| 923 | } |
| 924 | |
| 925 | /* |
| 926 | * xmlParseDocument : parse an XML document and build a tree. |
| 927 | */ |
| 928 | |
| 929 | int xmlParseDocument(xmlParserCtxtPtr ctxt) { |
| 930 | /* |
| 931 | * We should check for encoding here and plug-in some |
| 932 | * conversion code TODO !!!! |
| 933 | */ |
| 934 | |
| 935 | /* |
| 936 | * Wipe out everything which is before the first '<' |
| 937 | */ |
| 938 | SKIP_BLANKS(ctxt->cur); |
| 939 | |
| 940 | /* |
| 941 | * Check for the XMLDecl in the Prolog. |
| 942 | */ |
| 943 | if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') && |
| 944 | (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') && |
| 945 | (ctxt->cur[4] == 'l')) { |
| 946 | xmlParseXMLDecl(ctxt); |
| 947 | /* SKIP_EOL(cur); */ |
| 948 | SKIP_BLANKS(ctxt->cur); |
| 949 | } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') && |
| 950 | (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') && |
| 951 | (ctxt->cur[4] == 'L')) { |
| 952 | /* |
| 953 | * The first drafts were using <?XML and the final W3C REC |
| 954 | * now use <?xml ... |
| 955 | */ |
| 956 | xmlParseXMLDecl(ctxt); |
| 957 | /* SKIP_EOL(cur); */ |
| 958 | SKIP_BLANKS(ctxt->cur); |
| 959 | } else { |
| 960 | ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION); |
| 961 | } |
| 962 | |
| 963 | /* |
| 964 | * The Misc part of the Prolog |
| 965 | * (Comment | PI | S) * |
| 966 | */ |
| 967 | xmlParseMisc(ctxt); |
| 968 | |
| 969 | /* |
| 970 | * Time to start parsing |
| 971 | */ |
| 972 | ctxt->doc->root = xmlParseElement(ctxt); |
| 973 | |
| 974 | return(0); |
| 975 | } |
| 976 | |
| 977 | /* |
| 978 | * xmlParseDoc : parse an XML in-memory document and build a tree. |
| 979 | */ |
| 980 | |
| 981 | xmlDocPtr xmlParseDoc(CHAR *cur) { |
| 982 | xmlDocPtr ret; |
| 983 | xmlParserCtxtPtr ctxt; |
| 984 | |
| 985 | if (cur == NULL) return(NULL); |
| 986 | |
| 987 | ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt)); |
| 988 | if (ctxt == NULL) { |
| 989 | perror("malloc"); |
| 990 | return(NULL); |
| 991 | } |
| 992 | |
| 993 | xmlInitParserCtxt(ctxt); |
| 994 | ctxt->base = cur; |
| 995 | ctxt->cur = cur; |
| 996 | |
| 997 | xmlParseDocument(ctxt); |
| 998 | ret = ctxt->doc; |
| 999 | free(ctxt->nodes); |
| 1000 | free(ctxt); |
| 1001 | |
| 1002 | return(ret); |
| 1003 | } |
| 1004 | |
| 1005 | /* |
| 1006 | * xmlParseFile : parse an XML file and build a tree. |
| 1007 | */ |
| 1008 | |
| 1009 | xmlDocPtr xmlParseFile(const char *filename) { |
| 1010 | xmlDocPtr ret; |
| 1011 | #ifdef HAVE_ZLIB_H |
| 1012 | gzFile input; |
| 1013 | #else |
| 1014 | int input; |
| 1015 | #endif |
| 1016 | int res; |
| 1017 | struct stat buf; |
| 1018 | char *buffer; |
| 1019 | xmlParserCtxtPtr ctxt; |
| 1020 | |
| 1021 | res = stat(filename, &buf); |
| 1022 | if (res < 0) return(NULL); |
| 1023 | |
| 1024 | #ifdef HAVE_ZLIB_H |
| 1025 | retry_bigger: |
| 1026 | buffer = malloc((buf.st_size * 20) + 100); |
| 1027 | #else |
| 1028 | buffer = malloc(buf.st_size + 100); |
| 1029 | #endif |
| 1030 | if (buffer == NULL) { |
| 1031 | perror("malloc"); |
| 1032 | return(NULL); |
| 1033 | } |
| 1034 | |
| 1035 | memset(buffer, 0, sizeof(buffer)); |
| 1036 | #ifdef HAVE_ZLIB_H |
| 1037 | input = gzopen (filename, "r"); |
| 1038 | if (input == NULL) { |
| 1039 | fprintf (stderr, "Cannot read file %s :\n", filename); |
| 1040 | perror ("gzopen failed"); |
| 1041 | return(NULL); |
| 1042 | } |
| 1043 | #else |
| 1044 | input = open (filename, O_RDONLY); |
| 1045 | if (input < 0) { |
| 1046 | fprintf (stderr, "Cannot read file %s :\n", filename); |
| 1047 | perror ("open failed"); |
| 1048 | return(NULL); |
| 1049 | } |
| 1050 | #endif |
| 1051 | #ifdef HAVE_ZLIB_H |
| 1052 | res = gzread(input, buffer, 20 * buf.st_size); |
| 1053 | #else |
| 1054 | res = read(input, buffer, buf.st_size); |
| 1055 | #endif |
| 1056 | if (res < 0) { |
| 1057 | fprintf (stderr, "Cannot read file %s :\n", filename); |
| 1058 | #ifdef HAVE_ZLIB_H |
| 1059 | perror ("gzread failed"); |
| 1060 | #else |
| 1061 | perror ("read failed"); |
| 1062 | #endif |
| 1063 | return(NULL); |
| 1064 | } |
| 1065 | #ifdef HAVE_ZLIB_H |
| 1066 | gzclose(input); |
| 1067 | if (res >= 20 * buf.st_size) { |
| 1068 | free(buffer); |
| 1069 | buf.st_size *= 2; |
| 1070 | goto retry_bigger; |
| 1071 | } |
| 1072 | buf.st_size = res; |
| 1073 | #else |
| 1074 | close(input); |
| 1075 | #endif |
| 1076 | |
| 1077 | |
| 1078 | ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt)); |
| 1079 | if (ctxt == NULL) { |
| 1080 | perror("malloc"); |
| 1081 | return(NULL); |
| 1082 | } |
| 1083 | buffer[buf.st_size] = '\0'; |
| 1084 | |
| 1085 | xmlInitParserCtxt(ctxt); |
| 1086 | ctxt->filename = filename; |
| 1087 | ctxt->base = buffer; |
| 1088 | ctxt->cur = buffer; |
| 1089 | |
| 1090 | xmlParseDocument(ctxt); |
| 1091 | ret = ctxt->doc; |
| 1092 | free(buffer); |
| 1093 | free(ctxt->nodes); |
| 1094 | free(ctxt); |
| 1095 | |
| 1096 | return(ret); |
| 1097 | } |
| 1098 | |
| 1099 | /* |
| 1100 | * xmlParseFile : parse an XML memory block and build a tree. |
| 1101 | */ |
| 1102 | |
| 1103 | xmlDocPtr xmlParseMemory(char *buffer, int size) { |
| 1104 | xmlDocPtr ret; |
| 1105 | xmlParserCtxtPtr ctxt; |
| 1106 | |
| 1107 | ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt)); |
| 1108 | if (ctxt == NULL) { |
| 1109 | perror("malloc"); |
| 1110 | return(NULL); |
| 1111 | } |
| 1112 | |
| 1113 | buffer[size - 1] = '\0'; |
| 1114 | |
| 1115 | xmlInitParserCtxt(ctxt); |
| 1116 | ctxt->base = buffer; |
| 1117 | ctxt->cur = buffer; |
| 1118 | |
| 1119 | xmlParseDocument(ctxt); |
| 1120 | ret = ctxt->doc; |
| 1121 | free(ctxt->nodes); |
| 1122 | free(ctxt); |
| 1123 | |
| 1124 | return(ret); |
| 1125 | } |
| 1126 | |
| 1127 | |
| 1128 | |
| 1129 | |
| 1130 | /* Initialize parser context */ |
| 1131 | void xmlInitParserCtxt(xmlParserCtxtPtr ctxt) |
| 1132 | { |
| 1133 | int i; |
| 1134 | |
| 1135 | ctxt->filename = NULL; |
| 1136 | ctxt->base = NULL; |
| 1137 | ctxt->cur = NULL; |
| 1138 | ctxt->line = 1; |
| 1139 | ctxt->col = 1; |
| 1140 | ctxt->doc = NULL; |
| 1141 | ctxt->depth = 0; |
| 1142 | ctxt->max_depth = 10; |
| 1143 | ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr)); |
| 1144 | if (ctxt->nodes == NULL) { |
| 1145 | fprintf(stderr, "malloc of %d byte failed\n", |
| 1146 | ctxt->max_depth * sizeof(xmlNodePtr)); |
| 1147 | ctxt->max_depth = 0; |
| 1148 | } else { |
| 1149 | for (i = 0;i < ctxt->max_depth;i++) |
| 1150 | ctxt->nodes[i] = NULL; |
| 1151 | } |
| 1152 | } |
| 1153 | |
| 1154 | |
| 1155 | /* |
| 1156 | * Clear (release owned resources) and reinitialize context |
| 1157 | */ |
| 1158 | void xmlClearParserCtxt(xmlParserCtxtPtr ctx) |
| 1159 | { |
| 1160 | xmlInitParserCtxt(ctx); |
| 1161 | } |
| 1162 | |
| 1163 | |
| 1164 | /* |
| 1165 | * Setup the parser context to parse a new buffer; Clears any prior |
| 1166 | * contents from the parser context. The buffer parameter must not be |
| 1167 | * NULL, but the filename parameter can be |
| 1168 | */ |
| 1169 | void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer, |
| 1170 | const char* filename) |
| 1171 | { |
| 1172 | xmlClearParserCtxt(ctxt); |
| 1173 | ctxt->base = buffer; |
| 1174 | ctxt->cur = buffer; |
| 1175 | ctxt->filename = filename; |
| 1176 | } |
| 1177 | |
| 1178 | |
| 1179 | |
| 1180 | void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg) |
| 1181 | { |
| 1182 | fputs(msg, stderr); |
| 1183 | } |