blob: dd77210fea675887c70662998fa2bbcb61e1515d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#include <string.h>
14
15#include <libxml/xmlmemory.h>
16#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000017#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <libxml/xmlerror.h>
19
20/************************************************************************
21 * *
22 * Macros to differenciate various character type *
23 * directly extracted from RFC 2396 *
24 * *
25 ************************************************************************/
26
27/*
28 * alpha = lowalpha | upalpha
29 */
30#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
31
32
33/*
34 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
35 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
36 * "u" | "v" | "w" | "x" | "y" | "z"
37 */
38
39#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
40
41/*
42 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
43 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
44 * "U" | "V" | "W" | "X" | "Y" | "Z"
45 */
46#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
47
48/*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
51
52#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
53
54/*
55 * alphanum = alpha | digit
56 */
57
58#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
59
60/*
61 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
62 * "a" | "b" | "c" | "d" | "e" | "f"
63 */
64
65#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
66 (((x) >= 'A') && ((x) <= 'F')))
67
68/*
69 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
70 */
71
72#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
73 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
74 ((x) == '(') || ((x) == ')'))
75
76
77/*
78 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
79 */
80
81#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
82 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
83 ((x) == '+') || ((x) == '$') || ((x) == ','))
84
85/*
86 * unreserved = alphanum | mark
87 */
88
89#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91/*
92 * escaped = "%" hex hex
93 */
94
95#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
96 (IS_HEX((p)[2])))
97
98/*
99 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
100 * "&" | "=" | "+" | "$" | ","
101 */
102#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
103 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
104 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
105 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
106
107/*
108 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
109 */
110#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
111 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
112 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
113 ((*(p) == ',')))
114
115/*
116 * rel_segment = 1*( unreserved | escaped |
117 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
118 */
119
120#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
121 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
122 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
123 ((*(p) == ',')))
124
125/*
126 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
127 */
128
129#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
130 ((x) == '+') || ((x) == '-') || ((x) == '.'))
131
132/*
133 * reg_name = 1*( unreserved | escaped | "$" | "," |
134 * ";" | ":" | "@" | "&" | "=" | "+" )
135 */
136
137#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
138 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
139 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
140 ((*(p) == '=')) || ((*(p) == '+')))
141
142/*
143 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
144 * "+" | "$" | "," )
145 */
146#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
147 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
148 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
149 ((*(p) == ',')))
150
151/*
152 * uric = reserved | unreserved | escaped
153 */
154
155#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
156 (IS_RESERVED(*(p))))
157
158/*
159 * Skip to next pointer char, handle escaped sequences
160 */
161
162#define NEXT(p) ((*p == '%')? p += 3 : p++)
163
164/*
165 * Productions from the spec.
166 *
167 * authority = server | reg_name
168 * reg_name = 1*( unreserved | escaped | "$" | "," |
169 * ";" | ":" | "@" | "&" | "=" | "+" )
170 *
171 * path = [ abs_path | opaque_part ]
172 */
173
174/************************************************************************
175 * *
176 * Generic URI structure functions *
177 * *
178 ************************************************************************/
179
180/**
181 * xmlCreateURI:
182 *
183 * Simply creates an empty xmlURI
184 *
185 * Returns the new structure or NULL in case of error
186 */
187xmlURIPtr
188xmlCreateURI(void) {
189 xmlURIPtr ret;
190
191 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
192 if (ret == NULL) {
193 xmlGenericError(xmlGenericErrorContext,
194 "xmlCreateURI: out of memory\n");
195 return(NULL);
196 }
197 memset(ret, 0, sizeof(xmlURI));
198 return(ret);
199}
200
201/**
202 * xmlSaveUri:
203 * @uri: pointer to an xmlURI
204 *
205 * Save the URI as an escaped string
206 *
207 * Returns a new string (to be deallocated by caller)
208 */
209xmlChar *
210xmlSaveUri(xmlURIPtr uri) {
211 xmlChar *ret = NULL;
212 const char *p;
213 int len;
214 int max;
215
216 if (uri == NULL) return(NULL);
217
218
219 max = 80;
220 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
221 if (ret == NULL) {
222 xmlGenericError(xmlGenericErrorContext,
223 "xmlSaveUri: out of memory\n");
224 return(NULL);
225 }
226 len = 0;
227
228 if (uri->scheme != NULL) {
229 p = uri->scheme;
230 while (*p != 0) {
231 if (len >= max) {
232 max *= 2;
233 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
234 if (ret == NULL) {
235 xmlGenericError(xmlGenericErrorContext,
236 "xmlSaveUri: out of memory\n");
237 return(NULL);
238 }
239 }
240 ret[len++] = *p++;
241 }
242 if (len >= max) {
243 max *= 2;
244 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
245 if (ret == NULL) {
246 xmlGenericError(xmlGenericErrorContext,
247 "xmlSaveUri: out of memory\n");
248 return(NULL);
249 }
250 }
251 ret[len++] = ':';
252 }
253 if (uri->opaque != NULL) {
254 p = uri->opaque;
255 while (*p != 0) {
256 if (len + 3 >= max) {
257 max *= 2;
258 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
259 if (ret == NULL) {
260 xmlGenericError(xmlGenericErrorContext,
261 "xmlSaveUri: out of memory\n");
262 return(NULL);
263 }
264 }
265 if ((IS_UNRESERVED(*(p))) ||
266 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
267 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
268 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
269 ret[len++] = *p++;
270 else {
271 int val = *(unsigned char *)p++;
272 int hi = val / 0x10, lo = val % 0x10;
273 ret[len++] = '%';
274 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
275 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
276 }
277 }
278 if (len >= max) {
279 max *= 2;
280 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
281 if (ret == NULL) {
282 xmlGenericError(xmlGenericErrorContext,
283 "xmlSaveUri: out of memory\n");
284 return(NULL);
285 }
286 }
287 ret[len++] = 0;
288 } else {
289 if (uri->server != NULL) {
290 if (len + 3 >= max) {
291 max *= 2;
292 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
293 if (ret == NULL) {
294 xmlGenericError(xmlGenericErrorContext,
295 "xmlSaveUri: out of memory\n");
296 return(NULL);
297 }
298 }
299 ret[len++] = '/';
300 ret[len++] = '/';
301 if (uri->user != NULL) {
302 p = uri->user;
303 while (*p != 0) {
304 if (len + 3 >= max) {
305 max *= 2;
306 ret = (xmlChar *) xmlRealloc(ret,
307 (max + 1) * sizeof(xmlChar));
308 if (ret == NULL) {
309 xmlGenericError(xmlGenericErrorContext,
310 "xmlSaveUri: out of memory\n");
311 return(NULL);
312 }
313 }
314 if ((IS_UNRESERVED(*(p))) ||
315 ((*(p) == ';')) || ((*(p) == ':')) ||
316 ((*(p) == '&')) || ((*(p) == '=')) ||
317 ((*(p) == '+')) || ((*(p) == '$')) ||
318 ((*(p) == ',')))
319 ret[len++] = *p++;
320 else {
321 int val = *(unsigned char *)p++;
322 int hi = val / 0x10, lo = val % 0x10;
323 ret[len++] = '%';
324 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
325 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
326 }
327 }
328 if (len + 3 >= max) {
329 max *= 2;
330 ret = (xmlChar *) xmlRealloc(ret,
331 (max + 1) * sizeof(xmlChar));
332 if (ret == NULL) {
333 xmlGenericError(xmlGenericErrorContext,
334 "xmlSaveUri: out of memory\n");
335 return(NULL);
336 }
337 }
338 ret[len++] = '@';
339 }
340 p = uri->server;
341 while (*p != 0) {
342 if (len >= max) {
343 max *= 2;
344 ret = (xmlChar *) xmlRealloc(ret,
345 (max + 1) * sizeof(xmlChar));
346 if (ret == NULL) {
347 xmlGenericError(xmlGenericErrorContext,
348 "xmlSaveUri: out of memory\n");
349 return(NULL);
350 }
351 }
352 ret[len++] = *p++;
353 }
354 if (uri->port > 0) {
355 if (len + 10 >= max) {
356 max *= 2;
357 ret = (xmlChar *) xmlRealloc(ret,
358 (max + 1) * sizeof(xmlChar));
359 if (ret == NULL) {
360 xmlGenericError(xmlGenericErrorContext,
361 "xmlSaveUri: out of memory\n");
362 return(NULL);
363 }
364 }
365 len += sprintf((char *) &ret[len], ":%d", uri->port);
366 }
367 } else if (uri->authority != NULL) {
368 if (len + 3 >= max) {
369 max *= 2;
370 ret = (xmlChar *) xmlRealloc(ret,
371 (max + 1) * sizeof(xmlChar));
372 if (ret == NULL) {
373 xmlGenericError(xmlGenericErrorContext,
374 "xmlSaveUri: out of memory\n");
375 return(NULL);
376 }
377 }
378 ret[len++] = '/';
379 ret[len++] = '/';
380 p = uri->authority;
381 while (*p != 0) {
382 if (len + 3 >= max) {
383 max *= 2;
384 ret = (xmlChar *) xmlRealloc(ret,
385 (max + 1) * sizeof(xmlChar));
386 if (ret == NULL) {
387 xmlGenericError(xmlGenericErrorContext,
388 "xmlSaveUri: out of memory\n");
389 return(NULL);
390 }
391 }
392 if ((IS_UNRESERVED(*(p))) ||
393 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
394 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
395 ((*(p) == '=')) || ((*(p) == '+')))
396 ret[len++] = *p++;
397 else {
398 int val = *(unsigned char *)p++;
399 int hi = val / 0x10, lo = val % 0x10;
400 ret[len++] = '%';
401 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
402 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
403 }
404 }
405 } else if (uri->scheme != NULL) {
406 if (len + 3 >= max) {
407 max *= 2;
408 ret = (xmlChar *) xmlRealloc(ret,
409 (max + 1) * sizeof(xmlChar));
410 if (ret == NULL) {
411 xmlGenericError(xmlGenericErrorContext,
412 "xmlSaveUri: out of memory\n");
413 return(NULL);
414 }
415 }
416 ret[len++] = '/';
417 ret[len++] = '/';
418 }
419 if (uri->path != NULL) {
420 p = uri->path;
421 while (*p != 0) {
422 if (len + 3 >= max) {
423 max *= 2;
424 ret = (xmlChar *) xmlRealloc(ret,
425 (max + 1) * sizeof(xmlChar));
426 if (ret == NULL) {
427 xmlGenericError(xmlGenericErrorContext,
428 "xmlSaveUri: out of memory\n");
429 return(NULL);
430 }
431 }
432 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
433 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
434 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
435 ((*(p) == ',')))
436 ret[len++] = *p++;
437 else {
438 int val = *(unsigned char *)p++;
439 int hi = val / 0x10, lo = val % 0x10;
440 ret[len++] = '%';
441 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
442 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
443 }
444 }
445 }
446 if (uri->query != NULL) {
447 if (len + 3 >= max) {
448 max *= 2;
449 ret = (xmlChar *) xmlRealloc(ret,
450 (max + 1) * sizeof(xmlChar));
451 if (ret == NULL) {
452 xmlGenericError(xmlGenericErrorContext,
453 "xmlSaveUri: out of memory\n");
454 return(NULL);
455 }
456 }
457 ret[len++] = '?';
458 p = uri->query;
459 while (*p != 0) {
460 if (len + 3 >= max) {
461 max *= 2;
462 ret = (xmlChar *) xmlRealloc(ret,
463 (max + 1) * sizeof(xmlChar));
464 if (ret == NULL) {
465 xmlGenericError(xmlGenericErrorContext,
466 "xmlSaveUri: out of memory\n");
467 return(NULL);
468 }
469 }
470 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
471 ret[len++] = *p++;
472 else {
473 int val = *(unsigned char *)p++;
474 int hi = val / 0x10, lo = val % 0x10;
475 ret[len++] = '%';
476 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
477 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
478 }
479 }
480 }
481 if (uri->fragment != NULL) {
482 if (len + 3 >= max) {
483 max *= 2;
484 ret = (xmlChar *) xmlRealloc(ret,
485 (max + 1) * sizeof(xmlChar));
486 if (ret == NULL) {
487 xmlGenericError(xmlGenericErrorContext,
488 "xmlSaveUri: out of memory\n");
489 return(NULL);
490 }
491 }
492 ret[len++] = '#';
493 p = uri->fragment;
494 while (*p != 0) {
495 if (len + 3 >= max) {
496 max *= 2;
497 ret = (xmlChar *) xmlRealloc(ret,
498 (max + 1) * sizeof(xmlChar));
499 if (ret == NULL) {
500 xmlGenericError(xmlGenericErrorContext,
501 "xmlSaveUri: out of memory\n");
502 return(NULL);
503 }
504 }
505 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
506 ret[len++] = *p++;
507 else {
508 int val = *(unsigned char *)p++;
509 int hi = val / 0x10, lo = val % 0x10;
510 ret[len++] = '%';
511 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
512 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
513 }
514 }
515 }
516 if (len >= max) {
517 max *= 2;
518 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
519 if (ret == NULL) {
520 xmlGenericError(xmlGenericErrorContext,
521 "xmlSaveUri: out of memory\n");
522 return(NULL);
523 }
524 }
525 ret[len++] = 0;
526 }
527 return(ret);
528}
529
530/**
531 * xmlPrintURI:
532 * @stream: a FILE* for the output
533 * @uri: pointer to an xmlURI
534 *
535 * Prints the URI in the stream @steam.
536 */
537void
538xmlPrintURI(FILE *stream, xmlURIPtr uri) {
539 xmlChar *out;
540
541 out = xmlSaveUri(uri);
542 if (out != NULL) {
543 fprintf(stream, "%s", out);
544 xmlFree(out);
545 }
546}
547
548/**
549 * xmlCleanURI:
550 * @uri: pointer to an xmlURI
551 *
552 * Make sure the xmlURI struct is free of content
553 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000554static void
Owen Taylor3473f882001-02-23 17:55:21 +0000555xmlCleanURI(xmlURIPtr uri) {
556 if (uri == NULL) return;
557
558 if (uri->scheme != NULL) xmlFree(uri->scheme);
559 uri->scheme = NULL;
560 if (uri->server != NULL) xmlFree(uri->server);
561 uri->server = NULL;
562 if (uri->user != NULL) xmlFree(uri->user);
563 uri->user = NULL;
564 if (uri->path != NULL) xmlFree(uri->path);
565 uri->path = NULL;
566 if (uri->fragment != NULL) xmlFree(uri->fragment);
567 uri->fragment = NULL;
568 if (uri->opaque != NULL) xmlFree(uri->opaque);
569 uri->opaque = NULL;
570 if (uri->authority != NULL) xmlFree(uri->authority);
571 uri->authority = NULL;
572 if (uri->query != NULL) xmlFree(uri->query);
573 uri->query = NULL;
574}
575
576/**
577 * xmlFreeURI:
578 * @uri: pointer to an xmlURI
579 *
580 * Free up the xmlURI struct
581 */
582void
583xmlFreeURI(xmlURIPtr uri) {
584 if (uri == NULL) return;
585
586 if (uri->scheme != NULL) xmlFree(uri->scheme);
587 if (uri->server != NULL) xmlFree(uri->server);
588 if (uri->user != NULL) xmlFree(uri->user);
589 if (uri->path != NULL) xmlFree(uri->path);
590 if (uri->fragment != NULL) xmlFree(uri->fragment);
591 if (uri->opaque != NULL) xmlFree(uri->opaque);
592 if (uri->authority != NULL) xmlFree(uri->authority);
593 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000594 xmlFree(uri);
595}
596
597/************************************************************************
598 * *
599 * Helper functions *
600 * *
601 ************************************************************************/
602
603#if 0
604/**
605 * xmlNormalizeURIPath:
606 * @path: pointer to the path string
607 *
608 * applies the 5 normalization steps to a path string
609 * Normalization occurs directly on the string, no new allocation is done
610 *
611 * Returns 0 or an error code
612 */
613int
614xmlNormalizeURIPath(char *path) {
615 int cur, out;
616
617 if (path == NULL)
618 return(-1);
619 cur = 0;
620 out = 0;
621 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
622 if (path[cur] == 0)
623 return(0);
624
625 /* we are positionned at the beginning of the first segment */
626 cur++;
627 out = cur;
628
629 /*
630 * Analyze each segment in sequence.
631 */
632 while (path[cur] != 0) {
633 /*
634 * c) All occurrences of "./", where "." is a complete path segment,
635 * are removed from the buffer string.
636 */
637 if ((path[cur] == '.') && (path[cur + 1] == '/')) {
638 cur += 2;
639 if (path[cur] == 0) {
640 path[out++] = 0;
641 }
642 continue;
643 }
644
645 /*
646 * d) If the buffer string ends with "." as a complete path segment,
647 * that "." is removed.
648 */
649 if ((path[cur] == '.') && (path[cur + 1] == 0)) {
650 path[out] = 0;
651 break;
652 }
653
654 /* read the segment */
655 while ((path[cur] != 0) && (path[cur] != '/')) {
656 path[out++] = path[cur++];
657 }
658 path[out++] = path[cur];
659 if (path[cur] != 0) {
660 cur++;
661 }
662 }
663
664 cur = 0;
665 out = 0;
666 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
667 if (path[cur] == 0)
668 return(0);
669 /* we are positionned at the beginning of the first segment */
670 cur++;
671 out = cur;
672 /*
673 * Analyze each segment in sequence.
674 */
675 while (path[cur] != 0) {
676 /*
677 * e) All occurrences of "<segment>/../", where <segment> is a
678 * complete path segment not equal to "..", are removed from the
679 * buffer string. Removal of these path segments is performed
680 * iteratively, removing the leftmost matching pattern on each
681 * iteration, until no matching pattern remains.
682 */
683 if ((cur > 1) && (out > 1) &&
684 (path[cur] == '/') && (path[cur + 1] == '.') &&
685 (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
686 ((path[out] != '.') || (path[out - 1] != '.') ||
687 (path[out - 2] != '/'))) {
688 cur += 3;
689 out --;
690 while ((out > 0) && (path[out] != '/')) { out --; }
691 path[out] = 0;
692 continue;
693 }
694
695 /*
696 * f) If the buffer string ends with "<segment>/..", where <segment>
697 * is a complete path segment not equal to "..", that
698 * "<segment>/.." is removed.
699 */
700 if ((path[cur] == '/') && (path[cur + 1] == '.') &&
701 (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
702 ((path[out] != '.') || (path[out - 1] != '.') ||
703 (path[out - 2] != '/'))) {
704 cur += 4;
705 out --;
706 while ((out > 0) && (path[out - 1] != '/')) { out --; }
707 path[out] = 0;
708 continue;
709 }
710
711 path[out++] = path[cur++]; /* / or 0 */
712 }
713 path[out] = 0;
714
715 /*
716 * g) If the resulting buffer string still begins with one or more
717 * complete path segments of "..", then the reference is
718 * considered to be in error. Implementations may handle this
719 * error by retaining these components in the resolved path (i.e.,
720 * treating them as part of the final URI), by removing them from
721 * the resolved path (i.e., discarding relative levels above the
722 * root), or by avoiding traversal of the reference.
723 *
724 * We discard them from the final path.
725 */
726 cur = 0;
727 while ((path[cur] == '/') && (path[cur + 1] == '.') &&
728 (path[cur + 2] == '.'))
729 cur += 3;
730 if (cur != 0) {
731 out = 0;
732 while (path[cur] != 0) path[out++] = path[cur++];
733 path[out] = 0;
734 }
735 return(0);
736}
737#else
738/**
739 * xmlNormalizeURIPath:
740 * @path: pointer to the path string
741 *
742 * Applies the 5 normalization steps to a path string--that is, RFC 2396
743 * Section 5.2, steps 6.c through 6.g.
744 *
745 * Normalization occurs directly on the string, no new allocation is done
746 *
747 * Returns 0 or an error code
748 */
749int
750xmlNormalizeURIPath(char *path) {
751 char *cur, *out;
752
753 if (path == NULL)
754 return(-1);
755
756 /* Skip all initial "/" chars. We want to get to the beginning of the
757 * first non-empty segment.
758 */
759 cur = path;
760 while (cur[0] == '/')
761 ++cur;
762 if (cur[0] == '\0')
763 return(0);
764
765 /* Keep everything we've seen so far. */
766 out = cur;
767
768 /*
769 * Analyze each segment in sequence for cases (c) and (d).
770 */
771 while (cur[0] != '\0') {
772 /*
773 * c) All occurrences of "./", where "." is a complete path segment,
774 * are removed from the buffer string.
775 */
776 if ((cur[0] == '.') && (cur[1] == '/')) {
777 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000778 /* '//' normalization should be done at this point too */
779 while (cur[0] == '/')
780 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000781 continue;
782 }
783
784 /*
785 * d) If the buffer string ends with "." as a complete path segment,
786 * that "." is removed.
787 */
788 if ((cur[0] == '.') && (cur[1] == '\0'))
789 break;
790
791 /* Otherwise keep the segment. */
792 while (cur[0] != '/') {
793 if (cur[0] == '\0')
794 goto done_cd;
795 (out++)[0] = (cur++)[0];
796 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000797 /* nomalize // */
798 while ((cur[0] == '/') && (cur[1] == '/'))
799 cur++;
800
Owen Taylor3473f882001-02-23 17:55:21 +0000801 (out++)[0] = (cur++)[0];
802 }
803 done_cd:
804 out[0] = '\0';
805
806 /* Reset to the beginning of the first segment for the next sequence. */
807 cur = path;
808 while (cur[0] == '/')
809 ++cur;
810 if (cur[0] == '\0')
811 return(0);
812
813 /*
814 * Analyze each segment in sequence for cases (e) and (f).
815 *
816 * e) All occurrences of "<segment>/../", where <segment> is a
817 * complete path segment not equal to "..", are removed from the
818 * buffer string. Removal of these path segments is performed
819 * iteratively, removing the leftmost matching pattern on each
820 * iteration, until no matching pattern remains.
821 *
822 * f) If the buffer string ends with "<segment>/..", where <segment>
823 * is a complete path segment not equal to "..", that
824 * "<segment>/.." is removed.
825 *
826 * To satisfy the "iterative" clause in (e), we need to collapse the
827 * string every time we find something that needs to be removed. Thus,
828 * we don't need to keep two pointers into the string: we only need a
829 * "current position" pointer.
830 */
831 while (1) {
832 char *segp;
833
834 /* At the beginning of each iteration of this loop, "cur" points to
835 * the first character of the segment we want to examine.
836 */
837
838 /* Find the end of the current segment. */
839 segp = cur;
840 while ((segp[0] != '/') && (segp[0] != '\0'))
841 ++segp;
842
843 /* If this is the last segment, we're done (we need at least two
844 * segments to meet the criteria for the (e) and (f) cases).
845 */
846 if (segp[0] == '\0')
847 break;
848
849 /* If the first segment is "..", or if the next segment _isn't_ "..",
850 * keep this segment and try the next one.
851 */
852 ++segp;
853 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
854 || ((segp[0] != '.') || (segp[1] != '.')
855 || ((segp[2] != '/') && (segp[2] != '\0')))) {
856 cur = segp;
857 continue;
858 }
859
860 /* If we get here, remove this segment and the next one and back up
861 * to the previous segment (if there is one), to implement the
862 * "iteratively" clause. It's pretty much impossible to back up
863 * while maintaining two pointers into the buffer, so just compact
864 * the whole buffer now.
865 */
866
867 /* If this is the end of the buffer, we're done. */
868 if (segp[2] == '\0') {
869 cur[0] = '\0';
870 break;
871 }
872 strcpy(cur, segp + 3);
873
874 /* If there are no previous segments, then keep going from here. */
875 segp = cur;
876 while ((segp > path) && ((--segp)[0] == '/'))
877 ;
878 if (segp == path)
879 continue;
880
881 /* "segp" is pointing to the end of a previous segment; find it's
882 * start. We need to back up to the previous segment and start
883 * over with that to handle things like "foo/bar/../..". If we
884 * don't do this, then on the first pass we'll remove the "bar/..",
885 * but be pointing at the second ".." so we won't realize we can also
886 * remove the "foo/..".
887 */
888 cur = segp;
889 while ((cur > path) && (cur[-1] != '/'))
890 --cur;
891 }
892 out[0] = '\0';
893
894 /*
895 * g) If the resulting buffer string still begins with one or more
896 * complete path segments of "..", then the reference is
897 * considered to be in error. Implementations may handle this
898 * error by retaining these components in the resolved path (i.e.,
899 * treating them as part of the final URI), by removing them from
900 * the resolved path (i.e., discarding relative levels above the
901 * root), or by avoiding traversal of the reference.
902 *
903 * We discard them from the final path.
904 */
905 if (path[0] == '/') {
906 cur = path;
907 while ((cur[1] == '.') && (cur[2] == '.')
908 && ((cur[3] == '/') || (cur[3] == '\0')))
909 cur += 3;
910
911 if (cur != path) {
912 out = path;
913 while (cur[0] != '\0')
914 (out++)[0] = (cur++)[0];
915 out[0] = 0;
916 }
917 }
918
919 return(0);
920}
921#endif
922
923/**
924 * xmlURIUnescapeString:
925 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000926 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Owen Taylor3473f882001-02-23 17:55:21 +0000927 * @target: optionnal destination buffer
928 *
929 * Unescaping routine, does not do validity checks !
930 * Output is direct unsigned char translation of %XX values (no encoding)
931 *
932 * Returns an copy of the string, but unescaped
933 */
934char *
935xmlURIUnescapeString(const char *str, int len, char *target) {
936 char *ret, *out;
937 const char *in;
938
939 if (str == NULL)
940 return(NULL);
941 if (len <= 0) len = strlen(str);
942 if (len <= 0) return(NULL);
943
944 if (target == NULL) {
945 ret = (char *) xmlMalloc(len + 1);
946 if (ret == NULL) {
947 xmlGenericError(xmlGenericErrorContext,
948 "xmlURIUnescapeString: out of memory\n");
949 return(NULL);
950 }
951 } else
952 ret = target;
953 in = str;
954 out = ret;
955 while(len > 0) {
956 if (*in == '%') {
957 in++;
958 if ((*in >= '0') && (*in <= '9'))
959 *out = (*in - '0');
960 else if ((*in >= 'a') && (*in <= 'f'))
961 *out = (*in - 'a') + 10;
962 else if ((*in >= 'A') && (*in <= 'F'))
963 *out = (*in - 'A') + 10;
964 in++;
965 if ((*in >= '0') && (*in <= '9'))
966 *out = *out * 16 + (*in - '0');
967 else if ((*in >= 'a') && (*in <= 'f'))
968 *out = *out * 16 + (*in - 'a') + 10;
969 else if ((*in >= 'A') && (*in <= 'F'))
970 *out = *out * 16 + (*in - 'A') + 10;
971 in++;
972 len -= 3;
973 out++;
974 } else {
975 *out++ = *in++;
976 len--;
977 }
978 }
979 *out = 0;
980 return(ret);
981}
982
983/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000984 * xmlURIEscapeStr:
985 * @str: string to escape
986 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000987 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000988 * This routine escapes a string to hex, ignoring reserved characters (a-z)
989 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000990 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000991 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000992 */
993xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000994xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
995 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000996 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000997
Owen Taylor3473f882001-02-23 17:55:21 +0000998 unsigned int len, out;
999
1000 if (str == NULL)
1001 return(NULL);
1002 len = xmlStrlen(str);
1003 if (len <= 0) return(NULL);
1004
1005 len += 20;
1006 ret = (xmlChar *) xmlMalloc(len);
1007 if (ret == NULL) {
1008 xmlGenericError(xmlGenericErrorContext,
1009 "xmlURIEscape: out of memory\n");
1010 return(NULL);
1011 }
1012 in = (const xmlChar *) str;
1013 out = 0;
1014 while(*in != 0) {
1015 if (len - out <= 3) {
1016 len += 20;
1017 ret = (xmlChar *) xmlRealloc(ret, len);
1018 if (ret == NULL) {
1019 xmlGenericError(xmlGenericErrorContext,
1020 "xmlURIEscape: out of memory\n");
1021 return(NULL);
1022 }
1023 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001024
1025 ch = *in;
1026
1027 if ( (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch)) ) {
Owen Taylor3473f882001-02-23 17:55:21 +00001028 unsigned char val;
1029 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001030 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001031 if (val <= 9)
1032 ret[out++] = '0' + val;
1033 else
1034 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001035 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001036 if (val <= 9)
1037 ret[out++] = '0' + val;
1038 else
1039 ret[out++] = 'A' + val - 0xA;
1040 in++;
1041 } else {
1042 ret[out++] = *in++;
1043 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001044
Owen Taylor3473f882001-02-23 17:55:21 +00001045 }
1046 ret[out] = 0;
1047 return(ret);
1048}
1049
Daniel Veillard8514c672001-05-23 10:29:12 +00001050/**
1051 * xmlURIEscape:
1052 * @str: the string of the URI to escape
1053 *
1054 * Escaping routine, does not do validity checks !
1055 * It will try to escape the chars needing this, but this is heuristic
1056 * based it's impossible to be sure.
1057 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001058 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001059 *
1060 * 25 May 2001
1061 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1062 * according to RFC2396.
1063 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001064 */
1065xmlChar *
1066xmlURIEscape(const xmlChar *str) {
Daniel Veillard6278fb52001-05-25 07:38:41 +00001067 xmlChar *ret, *segment = NULL;
1068 xmlURIPtr uri;
Daniel Veillard8514c672001-05-23 10:29:12 +00001069
Daniel Veillard6278fb52001-05-25 07:38:41 +00001070#define NULLCHK(p) if(!p) { \
1071 xmlGenericError(xmlGenericErrorContext, \
1072 "xmlURIEscape: out of memory\n"); \
1073 return NULL; }
1074
1075 uri = xmlParseURI( (const char *) str);
1076
1077 if(!uri)
1078 return NULL;
1079
1080 ret = NULL;
1081
1082 if(uri->scheme) {
1083 segment = xmlURIEscapeStr( BAD_CAST uri->scheme, BAD_CAST "+-.");
1084 NULLCHK(segment)
1085 xmlStrcat(ret, segment);
1086 xmlStrcat(ret, BAD_CAST ":");
1087 xmlFree(segment);
1088 }
1089
1090 if(uri->authority) {
1091 segment = xmlURIEscapeStr( BAD_CAST uri->authority, BAD_CAST "/?;:@");
1092 NULLCHK(segment)
1093 xmlStrcat(ret, BAD_CAST "//");
1094 xmlStrcat(ret, segment);
1095 xmlFree(segment);
1096 }
1097
1098 if(uri->user) {
1099 segment = xmlURIEscapeStr( BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1100 NULLCHK(segment)
1101 xmlStrcat(ret, segment);
1102 xmlStrcat(ret, BAD_CAST "@");
1103 xmlFree(segment);
1104 }
1105
1106 if(uri->server) {
1107 segment = xmlURIEscapeStr( BAD_CAST uri->server, BAD_CAST "/?;:@");
1108 NULLCHK(segment)
1109 xmlStrcat(ret, BAD_CAST "//");
1110 xmlStrcat(ret, segment);
1111 xmlFree(segment);
1112 }
1113
1114 if(uri->port) {
1115 xmlChar port[10];
Daniel Veillarde95e2392001-06-06 10:46:28 +00001116 snprintf((char *) segment, 10, "%d", uri->port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001117 xmlStrcat(ret, BAD_CAST ":");
1118 xmlStrcat(ret, port);
1119 xmlFree(segment);
1120 }
1121
1122 if(uri->path) {
1123 segment = xmlURIEscapeStr( BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1124 NULLCHK(segment)
1125 xmlStrcat(ret, segment);
1126 xmlFree(segment);
1127 }
1128
1129 if(uri->query) {
1130 segment = xmlURIEscapeStr( BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1131 NULLCHK(segment)
1132 xmlStrcat(ret, BAD_CAST "?");
1133 xmlStrcat(ret, segment);
1134 xmlFree(segment);
1135 }
1136
1137 if(uri->opaque) {
1138 segment = xmlURIEscapeStr( BAD_CAST uri->opaque, BAD_CAST "");
1139 NULLCHK(segment)
1140 xmlStrcat(ret, segment);
1141 xmlStrcat(ret, BAD_CAST ":");
1142 xmlFree(segment);
1143 }
1144
1145 if(uri->fragment) {
1146 segment = xmlURIEscapeStr( BAD_CAST uri->fragment, BAD_CAST "#");
1147 NULLCHK(segment)
1148 xmlStrcat(ret, BAD_CAST "#");
1149 xmlStrcat(ret, segment);
1150 xmlFree(segment);
1151 }
1152
1153#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001154
1155 return(ret);
1156}
1157
Owen Taylor3473f882001-02-23 17:55:21 +00001158/************************************************************************
1159 * *
1160 * Escaped URI parsing *
1161 * *
1162 ************************************************************************/
1163
1164/**
1165 * xmlParseURIFragment:
1166 * @uri: pointer to an URI structure
1167 * @str: pointer to the string to analyze
1168 *
1169 * Parse an URI fragment string and fills in the appropriate fields
1170 * of the @uri structure.
1171 *
1172 * fragment = *uric
1173 *
1174 * Returns 0 or the error code
1175 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001176static int
Owen Taylor3473f882001-02-23 17:55:21 +00001177xmlParseURIFragment(xmlURIPtr uri, const char **str) {
1178 const char *cur = *str;
1179
1180 if (str == NULL) return(-1);
1181
1182 while (IS_URIC(cur)) NEXT(cur);
1183 if (uri != NULL) {
1184 if (uri->fragment != NULL) xmlFree(uri->fragment);
1185 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1186 }
1187 *str = cur;
1188 return(0);
1189}
1190
1191/**
1192 * xmlParseURIQuery:
1193 * @uri: pointer to an URI structure
1194 * @str: pointer to the string to analyze
1195 *
1196 * Parse the query part of an URI
1197 *
1198 * query = *uric
1199 *
1200 * Returns 0 or the error code
1201 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001202static int
Owen Taylor3473f882001-02-23 17:55:21 +00001203xmlParseURIQuery(xmlURIPtr uri, const char **str) {
1204 const char *cur = *str;
1205
1206 if (str == NULL) return(-1);
1207
1208 while (IS_URIC(cur)) NEXT(cur);
1209 if (uri != NULL) {
1210 if (uri->query != NULL) xmlFree(uri->query);
1211 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1212 }
1213 *str = cur;
1214 return(0);
1215}
1216
1217/**
1218 * xmlParseURIScheme:
1219 * @uri: pointer to an URI structure
1220 * @str: pointer to the string to analyze
1221 *
1222 * Parse an URI scheme
1223 *
1224 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1225 *
1226 * Returns 0 or the error code
1227 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001228static int
Owen Taylor3473f882001-02-23 17:55:21 +00001229xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1230 const char *cur;
1231
1232 if (str == NULL)
1233 return(-1);
1234
1235 cur = *str;
1236 if (!IS_ALPHA(*cur))
1237 return(2);
1238 cur++;
1239 while (IS_SCHEME(*cur)) cur++;
1240 if (uri != NULL) {
1241 if (uri->scheme != NULL) xmlFree(uri->scheme);
1242 /* !!! strndup */
1243 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1244 }
1245 *str = cur;
1246 return(0);
1247}
1248
1249/**
1250 * xmlParseURIOpaquePart:
1251 * @uri: pointer to an URI structure
1252 * @str: pointer to the string to analyze
1253 *
1254 * Parse an URI opaque part
1255 *
1256 * opaque_part = uric_no_slash *uric
1257 *
1258 * Returns 0 or the error code
1259 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001260static int
Owen Taylor3473f882001-02-23 17:55:21 +00001261xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
1262 const char *cur;
1263
1264 if (str == NULL)
1265 return(-1);
1266
1267 cur = *str;
1268 if (!IS_URIC_NO_SLASH(cur)) {
1269 return(3);
1270 }
1271 NEXT(cur);
1272 while (IS_URIC(cur)) NEXT(cur);
1273 if (uri != NULL) {
1274 if (uri->opaque != NULL) xmlFree(uri->opaque);
1275 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1276 }
1277 *str = cur;
1278 return(0);
1279}
1280
1281/**
1282 * xmlParseURIServer:
1283 * @uri: pointer to an URI structure
1284 * @str: pointer to the string to analyze
1285 *
1286 * Parse a server subpart of an URI, it's a finer grain analysis
1287 * of the authority part.
1288 *
1289 * server = [ [ userinfo "@" ] hostport ]
1290 * userinfo = *( unreserved | escaped |
1291 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1292 * hostport = host [ ":" port ]
1293 * host = hostname | IPv4address
1294 * hostname = *( domainlabel "." ) toplabel [ "." ]
1295 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1296 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1297 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1298 * port = *digit
1299 *
1300 * Returns 0 or the error code
1301 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001302static int
Owen Taylor3473f882001-02-23 17:55:21 +00001303xmlParseURIServer(xmlURIPtr uri, const char **str) {
1304 const char *cur;
1305 const char *host, *tmp;
1306
1307 if (str == NULL)
1308 return(-1);
1309
1310 cur = *str;
1311
1312 /*
1313 * is there an userinfo ?
1314 */
1315 while (IS_USERINFO(cur)) NEXT(cur);
1316 if (*cur == '@') {
1317 if (uri != NULL) {
1318 if (uri->user != NULL) xmlFree(uri->user);
1319 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1320 }
1321 cur++;
1322 } else {
1323 if (uri != NULL) {
1324 if (uri->user != NULL) xmlFree(uri->user);
1325 uri->user = NULL;
1326 }
1327 cur = *str;
1328 }
1329 /*
1330 * This can be empty in the case where there is no server
1331 */
1332 host = cur;
1333 if (*cur == '/') {
1334 if (uri != NULL) {
1335 if (uri->authority != NULL) xmlFree(uri->authority);
1336 uri->authority = NULL;
1337 if (uri->server != NULL) xmlFree(uri->server);
1338 uri->server = NULL;
1339 uri->port = 0;
1340 }
1341 return(0);
1342 }
1343 /*
1344 * host part of hostport can derive either an IPV4 address
1345 * or an unresolved name. Check the IP first, it easier to detect
1346 * errors if wrong one
1347 */
1348 if (IS_DIGIT(*cur)) {
1349 while(IS_DIGIT(*cur)) cur++;
1350 if (*cur != '.')
1351 goto host_name;
1352 cur++;
1353 if (!IS_DIGIT(*cur))
1354 goto host_name;
1355 while(IS_DIGIT(*cur)) cur++;
1356 if (*cur != '.')
1357 goto host_name;
1358 cur++;
1359 if (!IS_DIGIT(*cur))
1360 goto host_name;
1361 while(IS_DIGIT(*cur)) cur++;
1362 if (*cur != '.')
1363 goto host_name;
1364 cur++;
1365 if (!IS_DIGIT(*cur))
1366 goto host_name;
1367 while(IS_DIGIT(*cur)) cur++;
1368 if (uri != NULL) {
1369 if (uri->authority != NULL) xmlFree(uri->authority);
1370 uri->authority = NULL;
1371 if (uri->server != NULL) xmlFree(uri->server);
1372 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1373 }
1374 goto host_done;
1375 }
1376host_name:
1377 /*
1378 * the hostname production as-is is a parser nightmare.
1379 * simplify it to
1380 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1381 * and just make sure the last label starts with a non numeric char.
1382 */
1383 if (!IS_ALPHANUM(*cur))
1384 return(6);
1385 while (IS_ALPHANUM(*cur)) {
1386 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1387 if (*cur == '.')
1388 cur++;
1389 }
1390 tmp = cur;
1391 tmp--;
1392 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1393 tmp++;
1394 if (!IS_ALPHA(*tmp))
1395 return(7);
1396 if (uri != NULL) {
1397 if (uri->authority != NULL) xmlFree(uri->authority);
1398 uri->authority = NULL;
1399 if (uri->server != NULL) xmlFree(uri->server);
1400 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1401 }
1402
1403host_done:
1404
1405 /*
1406 * finish by checking for a port presence.
1407 */
1408 if (*cur == ':') {
1409 cur++;
1410 if (IS_DIGIT(*cur)) {
1411 if (uri != NULL)
1412 uri->port = 0;
1413 while (IS_DIGIT(*cur)) {
1414 if (uri != NULL)
1415 uri->port = uri->port * 10 + (*cur - '0');
1416 cur++;
1417 }
1418 }
1419 }
1420 *str = cur;
1421 return(0);
1422}
1423
1424/**
1425 * xmlParseURIRelSegment:
1426 * @uri: pointer to an URI structure
1427 * @str: pointer to the string to analyze
1428 *
1429 * Parse an URI relative segment
1430 *
1431 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1432 * "+" | "$" | "," )
1433 *
1434 * Returns 0 or the error code
1435 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001436static int
Owen Taylor3473f882001-02-23 17:55:21 +00001437xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
1438 const char *cur;
1439
1440 if (str == NULL)
1441 return(-1);
1442
1443 cur = *str;
1444 if (!IS_SEGMENT(cur)) {
1445 return(3);
1446 }
1447 NEXT(cur);
1448 while (IS_SEGMENT(cur)) NEXT(cur);
1449 if (uri != NULL) {
1450 if (uri->path != NULL) xmlFree(uri->path);
1451 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1452 }
1453 *str = cur;
1454 return(0);
1455}
1456
1457/**
1458 * xmlParseURIPathSegments:
1459 * @uri: pointer to an URI structure
1460 * @str: pointer to the string to analyze
1461 * @slash: should we add a leading slash
1462 *
1463 * Parse an URI set of path segments
1464 *
1465 * path_segments = segment *( "/" segment )
1466 * segment = *pchar *( ";" param )
1467 * param = *pchar
1468 *
1469 * Returns 0 or the error code
1470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001471static int
Owen Taylor3473f882001-02-23 17:55:21 +00001472xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
1473 const char *cur;
1474
1475 if (str == NULL)
1476 return(-1);
1477
1478 cur = *str;
1479
1480 do {
1481 while (IS_PCHAR(cur)) NEXT(cur);
1482 if (*cur == ';') {
1483 cur++;
1484 while (IS_PCHAR(cur)) NEXT(cur);
1485 }
1486 if (*cur != '/') break;
1487 cur++;
1488 } while (1);
1489 if (uri != NULL) {
1490 int len, len2 = 0;
1491 char *path;
1492
1493 /*
1494 * Concat the set of path segments to the current path
1495 */
1496 len = cur - *str;
1497 if (slash)
1498 len++;
1499
1500 if (uri->path != NULL) {
1501 len2 = strlen(uri->path);
1502 len += len2;
1503 }
1504 path = (char *) xmlMalloc(len + 1);
1505 if (path == NULL) {
1506 xmlGenericError(xmlGenericErrorContext,
1507 "xmlParseURIPathSegments: out of memory\n");
1508 *str = cur;
1509 return(-1);
1510 }
1511 if (uri->path != NULL)
1512 memcpy(path, uri->path, len2);
1513 if (slash) {
1514 path[len2] = '/';
1515 len2++;
1516 }
1517 path[len2] = 0;
1518 if (cur - *str > 0)
1519 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1520 if (uri->path != NULL)
1521 xmlFree(uri->path);
1522 uri->path = path;
1523 }
1524 *str = cur;
1525 return(0);
1526}
1527
1528/**
1529 * xmlParseURIAuthority:
1530 * @uri: pointer to an URI structure
1531 * @str: pointer to the string to analyze
1532 *
1533 * Parse the authority part of an URI.
1534 *
1535 * authority = server | reg_name
1536 * server = [ [ userinfo "@" ] hostport ]
1537 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1538 * "@" | "&" | "=" | "+" )
1539 *
1540 * Note : this is completely ambiguous since reg_name is allowed to
1541 * use the full set of chars in use by server:
1542 *
1543 * 3.2.1. Registry-based Naming Authority
1544 *
1545 * The structure of a registry-based naming authority is specific
1546 * to the URI scheme, but constrained to the allowed characters
1547 * for an authority component.
1548 *
1549 * Returns 0 or the error code
1550 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001551static int
Owen Taylor3473f882001-02-23 17:55:21 +00001552xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1553 const char *cur;
1554 int ret;
1555
1556 if (str == NULL)
1557 return(-1);
1558
1559 cur = *str;
1560
1561 /*
1562 * try first to parse it as a server string.
1563 */
1564 ret = xmlParseURIServer(uri, str);
1565 if (ret == 0)
1566 return(0);
1567
1568 /*
1569 * failed, fallback to reg_name
1570 */
1571 if (!IS_REG_NAME(cur)) {
1572 return(5);
1573 }
1574 NEXT(cur);
1575 while (IS_REG_NAME(cur)) NEXT(cur);
1576 if (uri != NULL) {
1577 if (uri->server != NULL) xmlFree(uri->server);
1578 uri->server = NULL;
1579 if (uri->user != NULL) xmlFree(uri->user);
1580 uri->user = NULL;
1581 if (uri->authority != NULL) xmlFree(uri->authority);
1582 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1583 }
1584 *str = cur;
1585 return(0);
1586}
1587
1588/**
1589 * xmlParseURIHierPart:
1590 * @uri: pointer to an URI structure
1591 * @str: pointer to the string to analyze
1592 *
1593 * Parse an URI hirarchical part
1594 *
1595 * hier_part = ( net_path | abs_path ) [ "?" query ]
1596 * abs_path = "/" path_segments
1597 * net_path = "//" authority [ abs_path ]
1598 *
1599 * Returns 0 or the error code
1600 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001601static int
Owen Taylor3473f882001-02-23 17:55:21 +00001602xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1603 int ret;
1604 const char *cur;
1605
1606 if (str == NULL)
1607 return(-1);
1608
1609 cur = *str;
1610
1611 if ((cur[0] == '/') && (cur[1] == '/')) {
1612 cur += 2;
1613 ret = xmlParseURIAuthority(uri, &cur);
1614 if (ret != 0)
1615 return(ret);
1616 if (cur[0] == '/') {
1617 cur++;
1618 ret = xmlParseURIPathSegments(uri, &cur, 1);
1619 }
1620 } else if (cur[0] == '/') {
1621 cur++;
1622 ret = xmlParseURIPathSegments(uri, &cur, 1);
1623 } else {
1624 return(4);
1625 }
1626 if (ret != 0)
1627 return(ret);
1628 if (*cur == '?') {
1629 cur++;
1630 ret = xmlParseURIQuery(uri, &cur);
1631 if (ret != 0)
1632 return(ret);
1633 }
1634 *str = cur;
1635 return(0);
1636}
1637
1638/**
1639 * xmlParseAbsoluteURI:
1640 * @uri: pointer to an URI structure
1641 * @str: pointer to the string to analyze
1642 *
1643 * Parse an URI reference string and fills in the appropriate fields
1644 * of the @uri structure
1645 *
1646 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1647 *
1648 * Returns 0 or the error code
1649 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001650static int
Owen Taylor3473f882001-02-23 17:55:21 +00001651xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1652 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001653 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001654
1655 if (str == NULL)
1656 return(-1);
1657
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001658 cur = *str;
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660 ret = xmlParseURIScheme(uri, str);
1661 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001662 if (**str != ':') {
1663 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001664 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001665 }
Owen Taylor3473f882001-02-23 17:55:21 +00001666 (*str)++;
1667 if (**str == '/')
1668 return(xmlParseURIHierPart(uri, str));
1669 return(xmlParseURIOpaquePart(uri, str));
1670}
1671
1672/**
1673 * xmlParseRelativeURI:
1674 * @uri: pointer to an URI structure
1675 * @str: pointer to the string to analyze
1676 *
1677 * Parse an relative URI string and fills in the appropriate fields
1678 * of the @uri structure
1679 *
1680 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1681 * abs_path = "/" path_segments
1682 * net_path = "//" authority [ abs_path ]
1683 * rel_path = rel_segment [ abs_path ]
1684 *
1685 * Returns 0 or the error code
1686 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001687static int
Owen Taylor3473f882001-02-23 17:55:21 +00001688xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1689 int ret = 0;
1690 const char *cur;
1691
1692 if (str == NULL)
1693 return(-1);
1694
1695 cur = *str;
1696 if ((cur[0] == '/') && (cur[1] == '/')) {
1697 cur += 2;
1698 ret = xmlParseURIAuthority(uri, &cur);
1699 if (ret != 0)
1700 return(ret);
1701 if (cur[0] == '/') {
1702 cur++;
1703 ret = xmlParseURIPathSegments(uri, &cur, 1);
1704 }
1705 } else if (cur[0] == '/') {
1706 cur++;
1707 ret = xmlParseURIPathSegments(uri, &cur, 1);
1708 } else if (cur[0] != '#' && cur[0] != '?') {
1709 ret = xmlParseURIRelSegment(uri, &cur);
1710 if (ret != 0)
1711 return(ret);
1712 if (cur[0] == '/') {
1713 cur++;
1714 ret = xmlParseURIPathSegments(uri, &cur, 1);
1715 }
1716 }
1717 if (ret != 0)
1718 return(ret);
1719 if (*cur == '?') {
1720 cur++;
1721 ret = xmlParseURIQuery(uri, &cur);
1722 if (ret != 0)
1723 return(ret);
1724 }
1725 *str = cur;
1726 return(ret);
1727}
1728
1729/**
1730 * xmlParseURIReference:
1731 * @uri: pointer to an URI structure
1732 * @str: the string to analyze
1733 *
1734 * Parse an URI reference string and fills in the appropriate fields
1735 * of the @uri structure
1736 *
1737 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1738 *
1739 * Returns 0 or the error code
1740 */
1741int
1742xmlParseURIReference(xmlURIPtr uri, const char *str) {
1743 int ret;
1744 const char *tmp = str;
1745
1746 if (str == NULL)
1747 return(-1);
1748 xmlCleanURI(uri);
1749
1750 /*
1751 * Try first to parse aboslute refs, then fallback to relative if
1752 * it fails.
1753 */
1754 ret = xmlParseAbsoluteURI(uri, &str);
1755 if (ret != 0) {
1756 xmlCleanURI(uri);
1757 str = tmp;
1758 ret = xmlParseRelativeURI(uri, &str);
1759 }
1760 if (ret != 0) {
1761 xmlCleanURI(uri);
1762 return(ret);
1763 }
1764
1765 if (*str == '#') {
1766 str++;
1767 ret = xmlParseURIFragment(uri, &str);
1768 if (ret != 0) return(ret);
1769 }
1770 if (*str != 0) {
1771 xmlCleanURI(uri);
1772 return(1);
1773 }
1774 return(0);
1775}
1776
1777/**
1778 * xmlParseURI:
1779 * @str: the URI string to analyze
1780 *
1781 * Parse an URI
1782 *
1783 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1784 *
1785 * Returns a newly build xmlURIPtr or NULL in case of error
1786 */
1787xmlURIPtr
1788xmlParseURI(const char *str) {
1789 xmlURIPtr uri;
1790 int ret;
1791
1792 if (str == NULL)
1793 return(NULL);
1794 uri = xmlCreateURI();
1795 if (uri != NULL) {
1796 ret = xmlParseURIReference(uri, str);
1797 if (ret) {
1798 xmlFreeURI(uri);
1799 return(NULL);
1800 }
1801 }
1802 return(uri);
1803}
1804
1805/************************************************************************
1806 * *
1807 * Public functions *
1808 * *
1809 ************************************************************************/
1810
1811/**
1812 * xmlBuildURI:
1813 * @URI: the URI instance found in the document
1814 * @base: the base value
1815 *
1816 * Computes he final URI of the reference done by checking that
1817 * the given URI is valid, and building the final URI using the
1818 * base URI. This is processed according to section 5.2 of the
1819 * RFC 2396
1820 *
1821 * 5.2. Resolving Relative References to Absolute Form
1822 *
1823 * Returns a new URI string (to be freed by the caller) or NULL in case
1824 * of error.
1825 */
1826xmlChar *
1827xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1828 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001829 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001830 xmlURIPtr ref = NULL;
1831 xmlURIPtr bas = NULL;
1832 xmlURIPtr res = NULL;
1833
1834 /*
1835 * 1) The URI reference is parsed into the potential four components and
1836 * fragment identifier, as described in Section 4.3.
1837 *
1838 * NOTE that a completely empty URI is treated by modern browsers
1839 * as a reference to "." rather than as a synonym for the current
1840 * URI. Should we do that here?
1841 */
1842 if (URI == NULL)
1843 ret = -1;
1844 else {
1845 if (*URI) {
1846 ref = xmlCreateURI();
1847 if (ref == NULL)
1848 goto done;
1849 ret = xmlParseURIReference(ref, (const char *) URI);
1850 }
1851 else
1852 ret = 0;
1853 }
1854 if (ret != 0)
1855 goto done;
1856 if (base == NULL)
1857 ret = -1;
1858 else {
1859 bas = xmlCreateURI();
1860 if (bas == NULL)
1861 goto done;
1862 ret = xmlParseURIReference(bas, (const char *) base);
1863 }
1864 if (ret != 0) {
1865 if (ref)
1866 val = xmlSaveUri(ref);
1867 goto done;
1868 }
1869 if (ref == NULL) {
1870 /*
1871 * the base fragment must be ignored
1872 */
1873 if (bas->fragment != NULL) {
1874 xmlFree(bas->fragment);
1875 bas->fragment = NULL;
1876 }
1877 val = xmlSaveUri(bas);
1878 goto done;
1879 }
1880
1881 /*
1882 * 2) If the path component is empty and the scheme, authority, and
1883 * query components are undefined, then it is a reference to the
1884 * current document and we are done. Otherwise, the reference URI's
1885 * query and fragment components are defined as found (or not found)
1886 * within the URI reference and not inherited from the base URI.
1887 *
1888 * NOTE that in modern browsers, the parsing differs from the above
1889 * in the following aspect: the query component is allowed to be
1890 * defined while still treating this as a reference to the current
1891 * document.
1892 */
1893 res = xmlCreateURI();
1894 if (res == NULL)
1895 goto done;
1896 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1897 ((ref->authority == NULL) && (ref->server == NULL))) {
1898 if (bas->scheme != NULL)
1899 res->scheme = xmlMemStrdup(bas->scheme);
1900 if (bas->authority != NULL)
1901 res->authority = xmlMemStrdup(bas->authority);
1902 else if (bas->server != NULL) {
1903 res->server = xmlMemStrdup(bas->server);
1904 if (bas->user != NULL)
1905 res->user = xmlMemStrdup(bas->user);
1906 res->port = bas->port;
1907 }
1908 if (bas->path != NULL)
1909 res->path = xmlMemStrdup(bas->path);
1910 if (ref->query != NULL)
1911 res->query = xmlMemStrdup(ref->query);
1912 else if (bas->query != NULL)
1913 res->query = xmlMemStrdup(bas->query);
1914 if (ref->fragment != NULL)
1915 res->fragment = xmlMemStrdup(ref->fragment);
1916 goto step_7;
1917 }
1918
1919 if (ref->query != NULL)
1920 res->query = xmlMemStrdup(ref->query);
1921 if (ref->fragment != NULL)
1922 res->fragment = xmlMemStrdup(ref->fragment);
1923
1924 /*
1925 * 3) If the scheme component is defined, indicating that the reference
1926 * starts with a scheme name, then the reference is interpreted as an
1927 * absolute URI and we are done. Otherwise, the reference URI's
1928 * scheme is inherited from the base URI's scheme component.
1929 */
1930 if (ref->scheme != NULL) {
1931 val = xmlSaveUri(ref);
1932 goto done;
1933 }
1934 if (bas->scheme != NULL)
1935 res->scheme = xmlMemStrdup(bas->scheme);
1936
1937 /*
1938 * 4) If the authority component is defined, then the reference is a
1939 * network-path and we skip to step 7. Otherwise, the reference
1940 * URI's authority is inherited from the base URI's authority
1941 * component, which will also be undefined if the URI scheme does not
1942 * use an authority component.
1943 */
1944 if ((ref->authority != NULL) || (ref->server != NULL)) {
1945 if (ref->authority != NULL)
1946 res->authority = xmlMemStrdup(ref->authority);
1947 else {
1948 res->server = xmlMemStrdup(ref->server);
1949 if (ref->user != NULL)
1950 res->user = xmlMemStrdup(ref->user);
1951 res->port = ref->port;
1952 }
1953 if (ref->path != NULL)
1954 res->path = xmlMemStrdup(ref->path);
1955 goto step_7;
1956 }
1957 if (bas->authority != NULL)
1958 res->authority = xmlMemStrdup(bas->authority);
1959 else if (bas->server != NULL) {
1960 res->server = xmlMemStrdup(bas->server);
1961 if (bas->user != NULL)
1962 res->user = xmlMemStrdup(bas->user);
1963 res->port = bas->port;
1964 }
1965
1966 /*
1967 * 5) If the path component begins with a slash character ("/"), then
1968 * the reference is an absolute-path and we skip to step 7.
1969 */
1970 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1971 res->path = xmlMemStrdup(ref->path);
1972 goto step_7;
1973 }
1974
1975
1976 /*
1977 * 6) If this step is reached, then we are resolving a relative-path
1978 * reference. The relative path needs to be merged with the base
1979 * URI's path. Although there are many ways to do this, we will
1980 * describe a simple method using a separate string buffer.
1981 *
1982 * Allocate a buffer large enough for the result string.
1983 */
1984 len = 2; /* extra / and 0 */
1985 if (ref->path != NULL)
1986 len += strlen(ref->path);
1987 if (bas->path != NULL)
1988 len += strlen(bas->path);
1989 res->path = (char *) xmlMalloc(len);
1990 if (res->path == NULL) {
1991 xmlGenericError(xmlGenericErrorContext,
1992 "xmlBuildURI: out of memory\n");
1993 goto done;
1994 }
1995 res->path[0] = 0;
1996
1997 /*
1998 * a) All but the last segment of the base URI's path component is
1999 * copied to the buffer. In other words, any characters after the
2000 * last (right-most) slash character, if any, are excluded.
2001 */
2002 cur = 0;
2003 out = 0;
2004 if (bas->path != NULL) {
2005 while (bas->path[cur] != 0) {
2006 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2007 cur++;
2008 if (bas->path[cur] == 0)
2009 break;
2010
2011 cur++;
2012 while (out < cur) {
2013 res->path[out] = bas->path[out];
2014 out++;
2015 }
2016 }
2017 }
2018 res->path[out] = 0;
2019
2020 /*
2021 * b) The reference's path component is appended to the buffer
2022 * string.
2023 */
2024 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002025 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002026 /*
2027 * Ensure the path includes a '/'
2028 */
2029 if ((out == 0) && (bas->server != NULL))
2030 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002031 while (ref->path[indx] != 0) {
2032 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002033 }
2034 }
2035 res->path[out] = 0;
2036
2037 /*
2038 * Steps c) to h) are really path normalization steps
2039 */
2040 xmlNormalizeURIPath(res->path);
2041
2042step_7:
2043
2044 /*
2045 * 7) The resulting URI components, including any inherited from the
2046 * base URI, are recombined to give the absolute form of the URI
2047 * reference.
2048 */
2049 val = xmlSaveUri(res);
2050
2051done:
2052 if (ref != NULL)
2053 xmlFreeURI(ref);
2054 if (bas != NULL)
2055 xmlFreeURI(bas);
2056 if (res != NULL)
2057 xmlFreeURI(res);
2058 return(val);
2059}
2060
2061