blob: 183bef331025c1178906d92a88c02ee146d5532e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
8 * Daniel.Veillard@w3.org
9 */
10
11#ifdef WIN32
12#define INCLUDE_WINSOCK
13#include "win32config.h"
14#else
15#include "config.h"
16#endif
17
18#include <stdio.h>
19#include <string.h>
20
21#include <libxml/xmlmemory.h>
22#include <libxml/uri.h>
23#include <libxml/xmlerror.h>
24
25/************************************************************************
26 * *
27 * Macros to differenciate various character type *
28 * directly extracted from RFC 2396 *
29 * *
30 ************************************************************************/
31
32/*
33 * alpha = lowalpha | upalpha
34 */
35#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
36
37
38/*
39 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
40 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
41 * "u" | "v" | "w" | "x" | "y" | "z"
42 */
43
44#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
45
46/*
47 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
48 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
49 * "U" | "V" | "W" | "X" | "Y" | "Z"
50 */
51#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
52
53/*
54 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
55 */
56
57#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
58
59/*
60 * alphanum = alpha | digit
61 */
62
63#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
64
65/*
66 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
67 * "a" | "b" | "c" | "d" | "e" | "f"
68 */
69
70#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
71 (((x) >= 'A') && ((x) <= 'F')))
72
73/*
74 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
75 */
76
77#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
78 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
79 ((x) == '(') || ((x) == ')'))
80
81
82/*
83 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
84 */
85
86#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
87 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
88 ((x) == '+') || ((x) == '$') || ((x) == ','))
89
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
163/*
164 * Skip to next pointer char, handle escaped sequences
165 */
166
167#define NEXT(p) ((*p == '%')? p += 3 : p++)
168
169/*
170 * Productions from the spec.
171 *
172 * authority = server | reg_name
173 * reg_name = 1*( unreserved | escaped | "$" | "," |
174 * ";" | ":" | "@" | "&" | "=" | "+" )
175 *
176 * path = [ abs_path | opaque_part ]
177 */
178
179/************************************************************************
180 * *
181 * Generic URI structure functions *
182 * *
183 ************************************************************************/
184
185/**
186 * xmlCreateURI:
187 *
188 * Simply creates an empty xmlURI
189 *
190 * Returns the new structure or NULL in case of error
191 */
192xmlURIPtr
193xmlCreateURI(void) {
194 xmlURIPtr ret;
195
196 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
197 if (ret == NULL) {
198 xmlGenericError(xmlGenericErrorContext,
199 "xmlCreateURI: out of memory\n");
200 return(NULL);
201 }
202 memset(ret, 0, sizeof(xmlURI));
203 return(ret);
204}
205
206/**
207 * xmlSaveUri:
208 * @uri: pointer to an xmlURI
209 *
210 * Save the URI as an escaped string
211 *
212 * Returns a new string (to be deallocated by caller)
213 */
214xmlChar *
215xmlSaveUri(xmlURIPtr uri) {
216 xmlChar *ret = NULL;
217 const char *p;
218 int len;
219 int max;
220
221 if (uri == NULL) return(NULL);
222
223
224 max = 80;
225 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
226 if (ret == NULL) {
227 xmlGenericError(xmlGenericErrorContext,
228 "xmlSaveUri: out of memory\n");
229 return(NULL);
230 }
231 len = 0;
232
233 if (uri->scheme != NULL) {
234 p = uri->scheme;
235 while (*p != 0) {
236 if (len >= max) {
237 max *= 2;
238 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
239 if (ret == NULL) {
240 xmlGenericError(xmlGenericErrorContext,
241 "xmlSaveUri: out of memory\n");
242 return(NULL);
243 }
244 }
245 ret[len++] = *p++;
246 }
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = ':';
257 }
258 if (uri->opaque != NULL) {
259 p = uri->opaque;
260 while (*p != 0) {
261 if (len + 3 >= max) {
262 max *= 2;
263 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
264 if (ret == NULL) {
265 xmlGenericError(xmlGenericErrorContext,
266 "xmlSaveUri: out of memory\n");
267 return(NULL);
268 }
269 }
270 if ((IS_UNRESERVED(*(p))) ||
271 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
272 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
273 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
274 ret[len++] = *p++;
275 else {
276 int val = *(unsigned char *)p++;
277 int hi = val / 0x10, lo = val % 0x10;
278 ret[len++] = '%';
279 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
280 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
281 }
282 }
283 if (len >= max) {
284 max *= 2;
285 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
286 if (ret == NULL) {
287 xmlGenericError(xmlGenericErrorContext,
288 "xmlSaveUri: out of memory\n");
289 return(NULL);
290 }
291 }
292 ret[len++] = 0;
293 } else {
294 if (uri->server != NULL) {
295 if (len + 3 >= max) {
296 max *= 2;
297 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
298 if (ret == NULL) {
299 xmlGenericError(xmlGenericErrorContext,
300 "xmlSaveUri: out of memory\n");
301 return(NULL);
302 }
303 }
304 ret[len++] = '/';
305 ret[len++] = '/';
306 if (uri->user != NULL) {
307 p = uri->user;
308 while (*p != 0) {
309 if (len + 3 >= max) {
310 max *= 2;
311 ret = (xmlChar *) xmlRealloc(ret,
312 (max + 1) * sizeof(xmlChar));
313 if (ret == NULL) {
314 xmlGenericError(xmlGenericErrorContext,
315 "xmlSaveUri: out of memory\n");
316 return(NULL);
317 }
318 }
319 if ((IS_UNRESERVED(*(p))) ||
320 ((*(p) == ';')) || ((*(p) == ':')) ||
321 ((*(p) == '&')) || ((*(p) == '=')) ||
322 ((*(p) == '+')) || ((*(p) == '$')) ||
323 ((*(p) == ',')))
324 ret[len++] = *p++;
325 else {
326 int val = *(unsigned char *)p++;
327 int hi = val / 0x10, lo = val % 0x10;
328 ret[len++] = '%';
329 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
330 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
331 }
332 }
333 if (len + 3 >= max) {
334 max *= 2;
335 ret = (xmlChar *) xmlRealloc(ret,
336 (max + 1) * sizeof(xmlChar));
337 if (ret == NULL) {
338 xmlGenericError(xmlGenericErrorContext,
339 "xmlSaveUri: out of memory\n");
340 return(NULL);
341 }
342 }
343 ret[len++] = '@';
344 }
345 p = uri->server;
346 while (*p != 0) {
347 if (len >= max) {
348 max *= 2;
349 ret = (xmlChar *) xmlRealloc(ret,
350 (max + 1) * sizeof(xmlChar));
351 if (ret == NULL) {
352 xmlGenericError(xmlGenericErrorContext,
353 "xmlSaveUri: out of memory\n");
354 return(NULL);
355 }
356 }
357 ret[len++] = *p++;
358 }
359 if (uri->port > 0) {
360 if (len + 10 >= max) {
361 max *= 2;
362 ret = (xmlChar *) xmlRealloc(ret,
363 (max + 1) * sizeof(xmlChar));
364 if (ret == NULL) {
365 xmlGenericError(xmlGenericErrorContext,
366 "xmlSaveUri: out of memory\n");
367 return(NULL);
368 }
369 }
370 len += sprintf((char *) &ret[len], ":%d", uri->port);
371 }
372 } else if (uri->authority != NULL) {
373 if (len + 3 >= max) {
374 max *= 2;
375 ret = (xmlChar *) xmlRealloc(ret,
376 (max + 1) * sizeof(xmlChar));
377 if (ret == NULL) {
378 xmlGenericError(xmlGenericErrorContext,
379 "xmlSaveUri: out of memory\n");
380 return(NULL);
381 }
382 }
383 ret[len++] = '/';
384 ret[len++] = '/';
385 p = uri->authority;
386 while (*p != 0) {
387 if (len + 3 >= max) {
388 max *= 2;
389 ret = (xmlChar *) xmlRealloc(ret,
390 (max + 1) * sizeof(xmlChar));
391 if (ret == NULL) {
392 xmlGenericError(xmlGenericErrorContext,
393 "xmlSaveUri: out of memory\n");
394 return(NULL);
395 }
396 }
397 if ((IS_UNRESERVED(*(p))) ||
398 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
399 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
400 ((*(p) == '=')) || ((*(p) == '+')))
401 ret[len++] = *p++;
402 else {
403 int val = *(unsigned char *)p++;
404 int hi = val / 0x10, lo = val % 0x10;
405 ret[len++] = '%';
406 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
407 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
408 }
409 }
410 } else if (uri->scheme != NULL) {
411 if (len + 3 >= max) {
412 max *= 2;
413 ret = (xmlChar *) xmlRealloc(ret,
414 (max + 1) * sizeof(xmlChar));
415 if (ret == NULL) {
416 xmlGenericError(xmlGenericErrorContext,
417 "xmlSaveUri: out of memory\n");
418 return(NULL);
419 }
420 }
421 ret[len++] = '/';
422 ret[len++] = '/';
423 }
424 if (uri->path != NULL) {
425 p = uri->path;
426 while (*p != 0) {
427 if (len + 3 >= max) {
428 max *= 2;
429 ret = (xmlChar *) xmlRealloc(ret,
430 (max + 1) * sizeof(xmlChar));
431 if (ret == NULL) {
432 xmlGenericError(xmlGenericErrorContext,
433 "xmlSaveUri: out of memory\n");
434 return(NULL);
435 }
436 }
437 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
438 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
439 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
440 ((*(p) == ',')))
441 ret[len++] = *p++;
442 else {
443 int val = *(unsigned char *)p++;
444 int hi = val / 0x10, lo = val % 0x10;
445 ret[len++] = '%';
446 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
447 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
448 }
449 }
450 }
451 if (uri->query != NULL) {
452 if (len + 3 >= max) {
453 max *= 2;
454 ret = (xmlChar *) xmlRealloc(ret,
455 (max + 1) * sizeof(xmlChar));
456 if (ret == NULL) {
457 xmlGenericError(xmlGenericErrorContext,
458 "xmlSaveUri: out of memory\n");
459 return(NULL);
460 }
461 }
462 ret[len++] = '?';
463 p = uri->query;
464 while (*p != 0) {
465 if (len + 3 >= max) {
466 max *= 2;
467 ret = (xmlChar *) xmlRealloc(ret,
468 (max + 1) * sizeof(xmlChar));
469 if (ret == NULL) {
470 xmlGenericError(xmlGenericErrorContext,
471 "xmlSaveUri: out of memory\n");
472 return(NULL);
473 }
474 }
475 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
476 ret[len++] = *p++;
477 else {
478 int val = *(unsigned char *)p++;
479 int hi = val / 0x10, lo = val % 0x10;
480 ret[len++] = '%';
481 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
482 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
483 }
484 }
485 }
486 if (uri->fragment != NULL) {
487 if (len + 3 >= max) {
488 max *= 2;
489 ret = (xmlChar *) xmlRealloc(ret,
490 (max + 1) * sizeof(xmlChar));
491 if (ret == NULL) {
492 xmlGenericError(xmlGenericErrorContext,
493 "xmlSaveUri: out of memory\n");
494 return(NULL);
495 }
496 }
497 ret[len++] = '#';
498 p = uri->fragment;
499 while (*p != 0) {
500 if (len + 3 >= max) {
501 max *= 2;
502 ret = (xmlChar *) xmlRealloc(ret,
503 (max + 1) * sizeof(xmlChar));
504 if (ret == NULL) {
505 xmlGenericError(xmlGenericErrorContext,
506 "xmlSaveUri: out of memory\n");
507 return(NULL);
508 }
509 }
510 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
511 ret[len++] = *p++;
512 else {
513 int val = *(unsigned char *)p++;
514 int hi = val / 0x10, lo = val % 0x10;
515 ret[len++] = '%';
516 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
517 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
518 }
519 }
520 }
521 if (len >= max) {
522 max *= 2;
523 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
524 if (ret == NULL) {
525 xmlGenericError(xmlGenericErrorContext,
526 "xmlSaveUri: out of memory\n");
527 return(NULL);
528 }
529 }
530 ret[len++] = 0;
531 }
532 return(ret);
533}
534
535/**
536 * xmlPrintURI:
537 * @stream: a FILE* for the output
538 * @uri: pointer to an xmlURI
539 *
540 * Prints the URI in the stream @steam.
541 */
542void
543xmlPrintURI(FILE *stream, xmlURIPtr uri) {
544 xmlChar *out;
545
546 out = xmlSaveUri(uri);
547 if (out != NULL) {
548 fprintf(stream, "%s", out);
549 xmlFree(out);
550 }
551}
552
553/**
554 * xmlCleanURI:
555 * @uri: pointer to an xmlURI
556 *
557 * Make sure the xmlURI struct is free of content
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static void
Owen Taylor3473f882001-02-23 17:55:21 +0000560xmlCleanURI(xmlURIPtr uri) {
561 if (uri == NULL) return;
562
563 if (uri->scheme != NULL) xmlFree(uri->scheme);
564 uri->scheme = NULL;
565 if (uri->server != NULL) xmlFree(uri->server);
566 uri->server = NULL;
567 if (uri->user != NULL) xmlFree(uri->user);
568 uri->user = NULL;
569 if (uri->path != NULL) xmlFree(uri->path);
570 uri->path = NULL;
571 if (uri->fragment != NULL) xmlFree(uri->fragment);
572 uri->fragment = NULL;
573 if (uri->opaque != NULL) xmlFree(uri->opaque);
574 uri->opaque = NULL;
575 if (uri->authority != NULL) xmlFree(uri->authority);
576 uri->authority = NULL;
577 if (uri->query != NULL) xmlFree(uri->query);
578 uri->query = NULL;
579}
580
581/**
582 * xmlFreeURI:
583 * @uri: pointer to an xmlURI
584 *
585 * Free up the xmlURI struct
586 */
587void
588xmlFreeURI(xmlURIPtr uri) {
589 if (uri == NULL) return;
590
591 if (uri->scheme != NULL) xmlFree(uri->scheme);
592 if (uri->server != NULL) xmlFree(uri->server);
593 if (uri->user != NULL) xmlFree(uri->user);
594 if (uri->path != NULL) xmlFree(uri->path);
595 if (uri->fragment != NULL) xmlFree(uri->fragment);
596 if (uri->opaque != NULL) xmlFree(uri->opaque);
597 if (uri->authority != NULL) xmlFree(uri->authority);
598 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000599 MEM_CLEANUP(uri, sizeof(xmlURI));
Owen Taylor3473f882001-02-23 17:55:21 +0000600 xmlFree(uri);
601}
602
603/************************************************************************
604 * *
605 * Helper functions *
606 * *
607 ************************************************************************/
608
609#if 0
610/**
611 * xmlNormalizeURIPath:
612 * @path: pointer to the path string
613 *
614 * applies the 5 normalization steps to a path string
615 * Normalization occurs directly on the string, no new allocation is done
616 *
617 * Returns 0 or an error code
618 */
619int
620xmlNormalizeURIPath(char *path) {
621 int cur, out;
622
623 if (path == NULL)
624 return(-1);
625 cur = 0;
626 out = 0;
627 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
628 if (path[cur] == 0)
629 return(0);
630
631 /* we are positionned at the beginning of the first segment */
632 cur++;
633 out = cur;
634
635 /*
636 * Analyze each segment in sequence.
637 */
638 while (path[cur] != 0) {
639 /*
640 * c) All occurrences of "./", where "." is a complete path segment,
641 * are removed from the buffer string.
642 */
643 if ((path[cur] == '.') && (path[cur + 1] == '/')) {
644 cur += 2;
645 if (path[cur] == 0) {
646 path[out++] = 0;
647 }
648 continue;
649 }
650
651 /*
652 * d) If the buffer string ends with "." as a complete path segment,
653 * that "." is removed.
654 */
655 if ((path[cur] == '.') && (path[cur + 1] == 0)) {
656 path[out] = 0;
657 break;
658 }
659
660 /* read the segment */
661 while ((path[cur] != 0) && (path[cur] != '/')) {
662 path[out++] = path[cur++];
663 }
664 path[out++] = path[cur];
665 if (path[cur] != 0) {
666 cur++;
667 }
668 }
669
670 cur = 0;
671 out = 0;
672 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
673 if (path[cur] == 0)
674 return(0);
675 /* we are positionned at the beginning of the first segment */
676 cur++;
677 out = cur;
678 /*
679 * Analyze each segment in sequence.
680 */
681 while (path[cur] != 0) {
682 /*
683 * e) All occurrences of "<segment>/../", where <segment> is a
684 * complete path segment not equal to "..", are removed from the
685 * buffer string. Removal of these path segments is performed
686 * iteratively, removing the leftmost matching pattern on each
687 * iteration, until no matching pattern remains.
688 */
689 if ((cur > 1) && (out > 1) &&
690 (path[cur] == '/') && (path[cur + 1] == '.') &&
691 (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
692 ((path[out] != '.') || (path[out - 1] != '.') ||
693 (path[out - 2] != '/'))) {
694 cur += 3;
695 out --;
696 while ((out > 0) && (path[out] != '/')) { out --; }
697 path[out] = 0;
698 continue;
699 }
700
701 /*
702 * f) If the buffer string ends with "<segment>/..", where <segment>
703 * is a complete path segment not equal to "..", that
704 * "<segment>/.." is removed.
705 */
706 if ((path[cur] == '/') && (path[cur + 1] == '.') &&
707 (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
708 ((path[out] != '.') || (path[out - 1] != '.') ||
709 (path[out - 2] != '/'))) {
710 cur += 4;
711 out --;
712 while ((out > 0) && (path[out - 1] != '/')) { out --; }
713 path[out] = 0;
714 continue;
715 }
716
717 path[out++] = path[cur++]; /* / or 0 */
718 }
719 path[out] = 0;
720
721 /*
722 * g) If the resulting buffer string still begins with one or more
723 * complete path segments of "..", then the reference is
724 * considered to be in error. Implementations may handle this
725 * error by retaining these components in the resolved path (i.e.,
726 * treating them as part of the final URI), by removing them from
727 * the resolved path (i.e., discarding relative levels above the
728 * root), or by avoiding traversal of the reference.
729 *
730 * We discard them from the final path.
731 */
732 cur = 0;
733 while ((path[cur] == '/') && (path[cur + 1] == '.') &&
734 (path[cur + 2] == '.'))
735 cur += 3;
736 if (cur != 0) {
737 out = 0;
738 while (path[cur] != 0) path[out++] = path[cur++];
739 path[out] = 0;
740 }
741 return(0);
742}
743#else
744/**
745 * xmlNormalizeURIPath:
746 * @path: pointer to the path string
747 *
748 * Applies the 5 normalization steps to a path string--that is, RFC 2396
749 * Section 5.2, steps 6.c through 6.g.
750 *
751 * Normalization occurs directly on the string, no new allocation is done
752 *
753 * Returns 0 or an error code
754 */
755int
756xmlNormalizeURIPath(char *path) {
757 char *cur, *out;
758
759 if (path == NULL)
760 return(-1);
761
762 /* Skip all initial "/" chars. We want to get to the beginning of the
763 * first non-empty segment.
764 */
765 cur = path;
766 while (cur[0] == '/')
767 ++cur;
768 if (cur[0] == '\0')
769 return(0);
770
771 /* Keep everything we've seen so far. */
772 out = cur;
773
774 /*
775 * Analyze each segment in sequence for cases (c) and (d).
776 */
777 while (cur[0] != '\0') {
778 /*
779 * c) All occurrences of "./", where "." is a complete path segment,
780 * are removed from the buffer string.
781 */
782 if ((cur[0] == '.') && (cur[1] == '/')) {
783 cur += 2;
784 continue;
785 }
786
787 /*
788 * d) If the buffer string ends with "." as a complete path segment,
789 * that "." is removed.
790 */
791 if ((cur[0] == '.') && (cur[1] == '\0'))
792 break;
793
794 /* Otherwise keep the segment. */
795 while (cur[0] != '/') {
796 if (cur[0] == '\0')
797 goto done_cd;
798 (out++)[0] = (cur++)[0];
799 }
800 (out++)[0] = (cur++)[0];
801 }
802 done_cd:
803 out[0] = '\0';
804
805 /* Reset to the beginning of the first segment for the next sequence. */
806 cur = path;
807 while (cur[0] == '/')
808 ++cur;
809 if (cur[0] == '\0')
810 return(0);
811
812 /*
813 * Analyze each segment in sequence for cases (e) and (f).
814 *
815 * e) All occurrences of "<segment>/../", where <segment> is a
816 * complete path segment not equal to "..", are removed from the
817 * buffer string. Removal of these path segments is performed
818 * iteratively, removing the leftmost matching pattern on each
819 * iteration, until no matching pattern remains.
820 *
821 * f) If the buffer string ends with "<segment>/..", where <segment>
822 * is a complete path segment not equal to "..", that
823 * "<segment>/.." is removed.
824 *
825 * To satisfy the "iterative" clause in (e), we need to collapse the
826 * string every time we find something that needs to be removed. Thus,
827 * we don't need to keep two pointers into the string: we only need a
828 * "current position" pointer.
829 */
830 while (1) {
831 char *segp;
832
833 /* At the beginning of each iteration of this loop, "cur" points to
834 * the first character of the segment we want to examine.
835 */
836
837 /* Find the end of the current segment. */
838 segp = cur;
839 while ((segp[0] != '/') && (segp[0] != '\0'))
840 ++segp;
841
842 /* If this is the last segment, we're done (we need at least two
843 * segments to meet the criteria for the (e) and (f) cases).
844 */
845 if (segp[0] == '\0')
846 break;
847
848 /* If the first segment is "..", or if the next segment _isn't_ "..",
849 * keep this segment and try the next one.
850 */
851 ++segp;
852 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
853 || ((segp[0] != '.') || (segp[1] != '.')
854 || ((segp[2] != '/') && (segp[2] != '\0')))) {
855 cur = segp;
856 continue;
857 }
858
859 /* If we get here, remove this segment and the next one and back up
860 * to the previous segment (if there is one), to implement the
861 * "iteratively" clause. It's pretty much impossible to back up
862 * while maintaining two pointers into the buffer, so just compact
863 * the whole buffer now.
864 */
865
866 /* If this is the end of the buffer, we're done. */
867 if (segp[2] == '\0') {
868 cur[0] = '\0';
869 break;
870 }
871 strcpy(cur, segp + 3);
872
873 /* If there are no previous segments, then keep going from here. */
874 segp = cur;
875 while ((segp > path) && ((--segp)[0] == '/'))
876 ;
877 if (segp == path)
878 continue;
879
880 /* "segp" is pointing to the end of a previous segment; find it's
881 * start. We need to back up to the previous segment and start
882 * over with that to handle things like "foo/bar/../..". If we
883 * don't do this, then on the first pass we'll remove the "bar/..",
884 * but be pointing at the second ".." so we won't realize we can also
885 * remove the "foo/..".
886 */
887 cur = segp;
888 while ((cur > path) && (cur[-1] != '/'))
889 --cur;
890 }
891 out[0] = '\0';
892
893 /*
894 * g) If the resulting buffer string still begins with one or more
895 * complete path segments of "..", then the reference is
896 * considered to be in error. Implementations may handle this
897 * error by retaining these components in the resolved path (i.e.,
898 * treating them as part of the final URI), by removing them from
899 * the resolved path (i.e., discarding relative levels above the
900 * root), or by avoiding traversal of the reference.
901 *
902 * We discard them from the final path.
903 */
904 if (path[0] == '/') {
905 cur = path;
906 while ((cur[1] == '.') && (cur[2] == '.')
907 && ((cur[3] == '/') || (cur[3] == '\0')))
908 cur += 3;
909
910 if (cur != path) {
911 out = path;
912 while (cur[0] != '\0')
913 (out++)[0] = (cur++)[0];
914 out[0] = 0;
915 }
916 }
917
918 return(0);
919}
920#endif
921
922/**
923 * xmlURIUnescapeString:
924 * @str: the string to unescape
925 * @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
926 * @target: optionnal destination buffer
927 *
928 * Unescaping routine, does not do validity checks !
929 * Output is direct unsigned char translation of %XX values (no encoding)
930 *
931 * Returns an copy of the string, but unescaped
932 */
933char *
934xmlURIUnescapeString(const char *str, int len, char *target) {
935 char *ret, *out;
936 const char *in;
937
938 if (str == NULL)
939 return(NULL);
940 if (len <= 0) len = strlen(str);
941 if (len <= 0) return(NULL);
942
943 if (target == NULL) {
944 ret = (char *) xmlMalloc(len + 1);
945 if (ret == NULL) {
946 xmlGenericError(xmlGenericErrorContext,
947 "xmlURIUnescapeString: out of memory\n");
948 return(NULL);
949 }
950 } else
951 ret = target;
952 in = str;
953 out = ret;
954 while(len > 0) {
955 if (*in == '%') {
956 in++;
957 if ((*in >= '0') && (*in <= '9'))
958 *out = (*in - '0');
959 else if ((*in >= 'a') && (*in <= 'f'))
960 *out = (*in - 'a') + 10;
961 else if ((*in >= 'A') && (*in <= 'F'))
962 *out = (*in - 'A') + 10;
963 in++;
964 if ((*in >= '0') && (*in <= '9'))
965 *out = *out * 16 + (*in - '0');
966 else if ((*in >= 'a') && (*in <= 'f'))
967 *out = *out * 16 + (*in - 'a') + 10;
968 else if ((*in >= 'A') && (*in <= 'F'))
969 *out = *out * 16 + (*in - 'A') + 10;
970 in++;
971 len -= 3;
972 out++;
973 } else {
974 *out++ = *in++;
975 len--;
976 }
977 }
978 *out = 0;
979 return(ret);
980}
981
982/**
983 * xmlURIEscape:
984 * @str: the string of the URI to escape
985 *
986 * Escaping routine, does not do validity checks !
987 * It will try to escape the chars needing this, but this is heuristic
988 * based it's impossible to be sure.
989 *
Daniel Veillard146c9122001-03-22 15:22:27 +0000990 * TODO: make the proper implementation of this function by calling
991 * xmlParseURIReference() and escaping each section accordingly
992 * to the rules (c.f. bug 51876)
993 *
Owen Taylor3473f882001-02-23 17:55:21 +0000994 * Returns an copy of the string, but escaped
995 */
996xmlChar *
997xmlURIEscape(const xmlChar *str) {
998 xmlChar *ret;
999 const xmlChar *in;
1000 unsigned int len, out;
1001
1002 if (str == NULL)
1003 return(NULL);
1004 len = xmlStrlen(str);
1005 if (len <= 0) return(NULL);
1006
1007 len += 20;
1008 ret = (xmlChar *) xmlMalloc(len);
1009 if (ret == NULL) {
1010 xmlGenericError(xmlGenericErrorContext,
1011 "xmlURIEscape: out of memory\n");
1012 return(NULL);
1013 }
1014 in = (const xmlChar *) str;
1015 out = 0;
1016 while(*in != 0) {
1017 if (len - out <= 3) {
1018 len += 20;
1019 ret = (xmlChar *) xmlRealloc(ret, len);
1020 if (ret == NULL) {
1021 xmlGenericError(xmlGenericErrorContext,
1022 "xmlURIEscape: out of memory\n");
1023 return(NULL);
1024 }
1025 }
1026 if ((!IS_UNRESERVED(*in)) && (*in != ':') && (*in != '/') &&
1027 (*in != '?') && (*in != '#')) {
1028 unsigned char val;
1029 ret[out++] = '%';
1030 val = *in >> 4;
1031 if (val <= 9)
1032 ret[out++] = '0' + val;
1033 else
1034 ret[out++] = 'A' + val - 0xA;
1035 val = *in & 0xF;
1036 if (val <= 9)
1037 ret[out++] = '0' + val;
1038 else
1039 ret[out++] = 'A' + val - 0xA;
1040 in++;
1041 } else {
1042 ret[out++] = *in++;
1043 }
1044 }
1045 ret[out] = 0;
1046 return(ret);
1047}
1048
1049/************************************************************************
1050 * *
1051 * Escaped URI parsing *
1052 * *
1053 ************************************************************************/
1054
1055/**
1056 * xmlParseURIFragment:
1057 * @uri: pointer to an URI structure
1058 * @str: pointer to the string to analyze
1059 *
1060 * Parse an URI fragment string and fills in the appropriate fields
1061 * of the @uri structure.
1062 *
1063 * fragment = *uric
1064 *
1065 * Returns 0 or the error code
1066 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001067static int
Owen Taylor3473f882001-02-23 17:55:21 +00001068xmlParseURIFragment(xmlURIPtr uri, const char **str) {
1069 const char *cur = *str;
1070
1071 if (str == NULL) return(-1);
1072
1073 while (IS_URIC(cur)) NEXT(cur);
1074 if (uri != NULL) {
1075 if (uri->fragment != NULL) xmlFree(uri->fragment);
1076 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1077 }
1078 *str = cur;
1079 return(0);
1080}
1081
1082/**
1083 * xmlParseURIQuery:
1084 * @uri: pointer to an URI structure
1085 * @str: pointer to the string to analyze
1086 *
1087 * Parse the query part of an URI
1088 *
1089 * query = *uric
1090 *
1091 * Returns 0 or the error code
1092 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001093static int
Owen Taylor3473f882001-02-23 17:55:21 +00001094xmlParseURIQuery(xmlURIPtr uri, const char **str) {
1095 const char *cur = *str;
1096
1097 if (str == NULL) return(-1);
1098
1099 while (IS_URIC(cur)) NEXT(cur);
1100 if (uri != NULL) {
1101 if (uri->query != NULL) xmlFree(uri->query);
1102 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1103 }
1104 *str = cur;
1105 return(0);
1106}
1107
1108/**
1109 * xmlParseURIScheme:
1110 * @uri: pointer to an URI structure
1111 * @str: pointer to the string to analyze
1112 *
1113 * Parse an URI scheme
1114 *
1115 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1116 *
1117 * Returns 0 or the error code
1118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001119static int
Owen Taylor3473f882001-02-23 17:55:21 +00001120xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1121 const char *cur;
1122
1123 if (str == NULL)
1124 return(-1);
1125
1126 cur = *str;
1127 if (!IS_ALPHA(*cur))
1128 return(2);
1129 cur++;
1130 while (IS_SCHEME(*cur)) cur++;
1131 if (uri != NULL) {
1132 if (uri->scheme != NULL) xmlFree(uri->scheme);
1133 /* !!! strndup */
1134 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1135 }
1136 *str = cur;
1137 return(0);
1138}
1139
1140/**
1141 * xmlParseURIOpaquePart:
1142 * @uri: pointer to an URI structure
1143 * @str: pointer to the string to analyze
1144 *
1145 * Parse an URI opaque part
1146 *
1147 * opaque_part = uric_no_slash *uric
1148 *
1149 * Returns 0 or the error code
1150 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001151static int
Owen Taylor3473f882001-02-23 17:55:21 +00001152xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
1153 const char *cur;
1154
1155 if (str == NULL)
1156 return(-1);
1157
1158 cur = *str;
1159 if (!IS_URIC_NO_SLASH(cur)) {
1160 return(3);
1161 }
1162 NEXT(cur);
1163 while (IS_URIC(cur)) NEXT(cur);
1164 if (uri != NULL) {
1165 if (uri->opaque != NULL) xmlFree(uri->opaque);
1166 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1167 }
1168 *str = cur;
1169 return(0);
1170}
1171
1172/**
1173 * xmlParseURIServer:
1174 * @uri: pointer to an URI structure
1175 * @str: pointer to the string to analyze
1176 *
1177 * Parse a server subpart of an URI, it's a finer grain analysis
1178 * of the authority part.
1179 *
1180 * server = [ [ userinfo "@" ] hostport ]
1181 * userinfo = *( unreserved | escaped |
1182 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1183 * hostport = host [ ":" port ]
1184 * host = hostname | IPv4address
1185 * hostname = *( domainlabel "." ) toplabel [ "." ]
1186 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1187 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1188 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1189 * port = *digit
1190 *
1191 * Returns 0 or the error code
1192 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001193static int
Owen Taylor3473f882001-02-23 17:55:21 +00001194xmlParseURIServer(xmlURIPtr uri, const char **str) {
1195 const char *cur;
1196 const char *host, *tmp;
1197
1198 if (str == NULL)
1199 return(-1);
1200
1201 cur = *str;
1202
1203 /*
1204 * is there an userinfo ?
1205 */
1206 while (IS_USERINFO(cur)) NEXT(cur);
1207 if (*cur == '@') {
1208 if (uri != NULL) {
1209 if (uri->user != NULL) xmlFree(uri->user);
1210 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1211 }
1212 cur++;
1213 } else {
1214 if (uri != NULL) {
1215 if (uri->user != NULL) xmlFree(uri->user);
1216 uri->user = NULL;
1217 }
1218 cur = *str;
1219 }
1220 /*
1221 * This can be empty in the case where there is no server
1222 */
1223 host = cur;
1224 if (*cur == '/') {
1225 if (uri != NULL) {
1226 if (uri->authority != NULL) xmlFree(uri->authority);
1227 uri->authority = NULL;
1228 if (uri->server != NULL) xmlFree(uri->server);
1229 uri->server = NULL;
1230 uri->port = 0;
1231 }
1232 return(0);
1233 }
1234 /*
1235 * host part of hostport can derive either an IPV4 address
1236 * or an unresolved name. Check the IP first, it easier to detect
1237 * errors if wrong one
1238 */
1239 if (IS_DIGIT(*cur)) {
1240 while(IS_DIGIT(*cur)) cur++;
1241 if (*cur != '.')
1242 goto host_name;
1243 cur++;
1244 if (!IS_DIGIT(*cur))
1245 goto host_name;
1246 while(IS_DIGIT(*cur)) cur++;
1247 if (*cur != '.')
1248 goto host_name;
1249 cur++;
1250 if (!IS_DIGIT(*cur))
1251 goto host_name;
1252 while(IS_DIGIT(*cur)) cur++;
1253 if (*cur != '.')
1254 goto host_name;
1255 cur++;
1256 if (!IS_DIGIT(*cur))
1257 goto host_name;
1258 while(IS_DIGIT(*cur)) cur++;
1259 if (uri != NULL) {
1260 if (uri->authority != NULL) xmlFree(uri->authority);
1261 uri->authority = NULL;
1262 if (uri->server != NULL) xmlFree(uri->server);
1263 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1264 }
1265 goto host_done;
1266 }
1267host_name:
1268 /*
1269 * the hostname production as-is is a parser nightmare.
1270 * simplify it to
1271 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1272 * and just make sure the last label starts with a non numeric char.
1273 */
1274 if (!IS_ALPHANUM(*cur))
1275 return(6);
1276 while (IS_ALPHANUM(*cur)) {
1277 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1278 if (*cur == '.')
1279 cur++;
1280 }
1281 tmp = cur;
1282 tmp--;
1283 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1284 tmp++;
1285 if (!IS_ALPHA(*tmp))
1286 return(7);
1287 if (uri != NULL) {
1288 if (uri->authority != NULL) xmlFree(uri->authority);
1289 uri->authority = NULL;
1290 if (uri->server != NULL) xmlFree(uri->server);
1291 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1292 }
1293
1294host_done:
1295
1296 /*
1297 * finish by checking for a port presence.
1298 */
1299 if (*cur == ':') {
1300 cur++;
1301 if (IS_DIGIT(*cur)) {
1302 if (uri != NULL)
1303 uri->port = 0;
1304 while (IS_DIGIT(*cur)) {
1305 if (uri != NULL)
1306 uri->port = uri->port * 10 + (*cur - '0');
1307 cur++;
1308 }
1309 }
1310 }
1311 *str = cur;
1312 return(0);
1313}
1314
1315/**
1316 * xmlParseURIRelSegment:
1317 * @uri: pointer to an URI structure
1318 * @str: pointer to the string to analyze
1319 *
1320 * Parse an URI relative segment
1321 *
1322 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1323 * "+" | "$" | "," )
1324 *
1325 * Returns 0 or the error code
1326 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001327static int
Owen Taylor3473f882001-02-23 17:55:21 +00001328xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
1329 const char *cur;
1330
1331 if (str == NULL)
1332 return(-1);
1333
1334 cur = *str;
1335 if (!IS_SEGMENT(cur)) {
1336 return(3);
1337 }
1338 NEXT(cur);
1339 while (IS_SEGMENT(cur)) NEXT(cur);
1340 if (uri != NULL) {
1341 if (uri->path != NULL) xmlFree(uri->path);
1342 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1343 }
1344 *str = cur;
1345 return(0);
1346}
1347
1348/**
1349 * xmlParseURIPathSegments:
1350 * @uri: pointer to an URI structure
1351 * @str: pointer to the string to analyze
1352 * @slash: should we add a leading slash
1353 *
1354 * Parse an URI set of path segments
1355 *
1356 * path_segments = segment *( "/" segment )
1357 * segment = *pchar *( ";" param )
1358 * param = *pchar
1359 *
1360 * Returns 0 or the error code
1361 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001362static int
Owen Taylor3473f882001-02-23 17:55:21 +00001363xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
1364 const char *cur;
1365
1366 if (str == NULL)
1367 return(-1);
1368
1369 cur = *str;
1370
1371 do {
1372 while (IS_PCHAR(cur)) NEXT(cur);
1373 if (*cur == ';') {
1374 cur++;
1375 while (IS_PCHAR(cur)) NEXT(cur);
1376 }
1377 if (*cur != '/') break;
1378 cur++;
1379 } while (1);
1380 if (uri != NULL) {
1381 int len, len2 = 0;
1382 char *path;
1383
1384 /*
1385 * Concat the set of path segments to the current path
1386 */
1387 len = cur - *str;
1388 if (slash)
1389 len++;
1390
1391 if (uri->path != NULL) {
1392 len2 = strlen(uri->path);
1393 len += len2;
1394 }
1395 path = (char *) xmlMalloc(len + 1);
1396 if (path == NULL) {
1397 xmlGenericError(xmlGenericErrorContext,
1398 "xmlParseURIPathSegments: out of memory\n");
1399 *str = cur;
1400 return(-1);
1401 }
1402 if (uri->path != NULL)
1403 memcpy(path, uri->path, len2);
1404 if (slash) {
1405 path[len2] = '/';
1406 len2++;
1407 }
1408 path[len2] = 0;
1409 if (cur - *str > 0)
1410 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1411 if (uri->path != NULL)
1412 xmlFree(uri->path);
1413 uri->path = path;
1414 }
1415 *str = cur;
1416 return(0);
1417}
1418
1419/**
1420 * xmlParseURIAuthority:
1421 * @uri: pointer to an URI structure
1422 * @str: pointer to the string to analyze
1423 *
1424 * Parse the authority part of an URI.
1425 *
1426 * authority = server | reg_name
1427 * server = [ [ userinfo "@" ] hostport ]
1428 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1429 * "@" | "&" | "=" | "+" )
1430 *
1431 * Note : this is completely ambiguous since reg_name is allowed to
1432 * use the full set of chars in use by server:
1433 *
1434 * 3.2.1. Registry-based Naming Authority
1435 *
1436 * The structure of a registry-based naming authority is specific
1437 * to the URI scheme, but constrained to the allowed characters
1438 * for an authority component.
1439 *
1440 * Returns 0 or the error code
1441 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001442static int
Owen Taylor3473f882001-02-23 17:55:21 +00001443xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1444 const char *cur;
1445 int ret;
1446
1447 if (str == NULL)
1448 return(-1);
1449
1450 cur = *str;
1451
1452 /*
1453 * try first to parse it as a server string.
1454 */
1455 ret = xmlParseURIServer(uri, str);
1456 if (ret == 0)
1457 return(0);
1458
1459 /*
1460 * failed, fallback to reg_name
1461 */
1462 if (!IS_REG_NAME(cur)) {
1463 return(5);
1464 }
1465 NEXT(cur);
1466 while (IS_REG_NAME(cur)) NEXT(cur);
1467 if (uri != NULL) {
1468 if (uri->server != NULL) xmlFree(uri->server);
1469 uri->server = NULL;
1470 if (uri->user != NULL) xmlFree(uri->user);
1471 uri->user = NULL;
1472 if (uri->authority != NULL) xmlFree(uri->authority);
1473 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1474 }
1475 *str = cur;
1476 return(0);
1477}
1478
1479/**
1480 * xmlParseURIHierPart:
1481 * @uri: pointer to an URI structure
1482 * @str: pointer to the string to analyze
1483 *
1484 * Parse an URI hirarchical part
1485 *
1486 * hier_part = ( net_path | abs_path ) [ "?" query ]
1487 * abs_path = "/" path_segments
1488 * net_path = "//" authority [ abs_path ]
1489 *
1490 * Returns 0 or the error code
1491 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001492static int
Owen Taylor3473f882001-02-23 17:55:21 +00001493xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1494 int ret;
1495 const char *cur;
1496
1497 if (str == NULL)
1498 return(-1);
1499
1500 cur = *str;
1501
1502 if ((cur[0] == '/') && (cur[1] == '/')) {
1503 cur += 2;
1504 ret = xmlParseURIAuthority(uri, &cur);
1505 if (ret != 0)
1506 return(ret);
1507 if (cur[0] == '/') {
1508 cur++;
1509 ret = xmlParseURIPathSegments(uri, &cur, 1);
1510 }
1511 } else if (cur[0] == '/') {
1512 cur++;
1513 ret = xmlParseURIPathSegments(uri, &cur, 1);
1514 } else {
1515 return(4);
1516 }
1517 if (ret != 0)
1518 return(ret);
1519 if (*cur == '?') {
1520 cur++;
1521 ret = xmlParseURIQuery(uri, &cur);
1522 if (ret != 0)
1523 return(ret);
1524 }
1525 *str = cur;
1526 return(0);
1527}
1528
1529/**
1530 * xmlParseAbsoluteURI:
1531 * @uri: pointer to an URI structure
1532 * @str: pointer to the string to analyze
1533 *
1534 * Parse an URI reference string and fills in the appropriate fields
1535 * of the @uri structure
1536 *
1537 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1538 *
1539 * Returns 0 or the error code
1540 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001541static int
Owen Taylor3473f882001-02-23 17:55:21 +00001542xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1543 int ret;
1544
1545 if (str == NULL)
1546 return(-1);
1547
1548 ret = xmlParseURIScheme(uri, str);
1549 if (ret != 0) return(ret);
1550 if (**str != ':')
1551 return(1);
1552 (*str)++;
1553 if (**str == '/')
1554 return(xmlParseURIHierPart(uri, str));
1555 return(xmlParseURIOpaquePart(uri, str));
1556}
1557
1558/**
1559 * xmlParseRelativeURI:
1560 * @uri: pointer to an URI structure
1561 * @str: pointer to the string to analyze
1562 *
1563 * Parse an relative URI string and fills in the appropriate fields
1564 * of the @uri structure
1565 *
1566 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1567 * abs_path = "/" path_segments
1568 * net_path = "//" authority [ abs_path ]
1569 * rel_path = rel_segment [ abs_path ]
1570 *
1571 * Returns 0 or the error code
1572 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001573static int
Owen Taylor3473f882001-02-23 17:55:21 +00001574xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1575 int ret = 0;
1576 const char *cur;
1577
1578 if (str == NULL)
1579 return(-1);
1580
1581 cur = *str;
1582 if ((cur[0] == '/') && (cur[1] == '/')) {
1583 cur += 2;
1584 ret = xmlParseURIAuthority(uri, &cur);
1585 if (ret != 0)
1586 return(ret);
1587 if (cur[0] == '/') {
1588 cur++;
1589 ret = xmlParseURIPathSegments(uri, &cur, 1);
1590 }
1591 } else if (cur[0] == '/') {
1592 cur++;
1593 ret = xmlParseURIPathSegments(uri, &cur, 1);
1594 } else if (cur[0] != '#' && cur[0] != '?') {
1595 ret = xmlParseURIRelSegment(uri, &cur);
1596 if (ret != 0)
1597 return(ret);
1598 if (cur[0] == '/') {
1599 cur++;
1600 ret = xmlParseURIPathSegments(uri, &cur, 1);
1601 }
1602 }
1603 if (ret != 0)
1604 return(ret);
1605 if (*cur == '?') {
1606 cur++;
1607 ret = xmlParseURIQuery(uri, &cur);
1608 if (ret != 0)
1609 return(ret);
1610 }
1611 *str = cur;
1612 return(ret);
1613}
1614
1615/**
1616 * xmlParseURIReference:
1617 * @uri: pointer to an URI structure
1618 * @str: the string to analyze
1619 *
1620 * Parse an URI reference string and fills in the appropriate fields
1621 * of the @uri structure
1622 *
1623 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1624 *
1625 * Returns 0 or the error code
1626 */
1627int
1628xmlParseURIReference(xmlURIPtr uri, const char *str) {
1629 int ret;
1630 const char *tmp = str;
1631
1632 if (str == NULL)
1633 return(-1);
1634 xmlCleanURI(uri);
1635
1636 /*
1637 * Try first to parse aboslute refs, then fallback to relative if
1638 * it fails.
1639 */
1640 ret = xmlParseAbsoluteURI(uri, &str);
1641 if (ret != 0) {
1642 xmlCleanURI(uri);
1643 str = tmp;
1644 ret = xmlParseRelativeURI(uri, &str);
1645 }
1646 if (ret != 0) {
1647 xmlCleanURI(uri);
1648 return(ret);
1649 }
1650
1651 if (*str == '#') {
1652 str++;
1653 ret = xmlParseURIFragment(uri, &str);
1654 if (ret != 0) return(ret);
1655 }
1656 if (*str != 0) {
1657 xmlCleanURI(uri);
1658 return(1);
1659 }
1660 return(0);
1661}
1662
1663/**
1664 * xmlParseURI:
1665 * @str: the URI string to analyze
1666 *
1667 * Parse an URI
1668 *
1669 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1670 *
1671 * Returns a newly build xmlURIPtr or NULL in case of error
1672 */
1673xmlURIPtr
1674xmlParseURI(const char *str) {
1675 xmlURIPtr uri;
1676 int ret;
1677
1678 if (str == NULL)
1679 return(NULL);
1680 uri = xmlCreateURI();
1681 if (uri != NULL) {
1682 ret = xmlParseURIReference(uri, str);
1683 if (ret) {
1684 xmlFreeURI(uri);
1685 return(NULL);
1686 }
1687 }
1688 return(uri);
1689}
1690
1691/************************************************************************
1692 * *
1693 * Public functions *
1694 * *
1695 ************************************************************************/
1696
1697/**
1698 * xmlBuildURI:
1699 * @URI: the URI instance found in the document
1700 * @base: the base value
1701 *
1702 * Computes he final URI of the reference done by checking that
1703 * the given URI is valid, and building the final URI using the
1704 * base URI. This is processed according to section 5.2 of the
1705 * RFC 2396
1706 *
1707 * 5.2. Resolving Relative References to Absolute Form
1708 *
1709 * Returns a new URI string (to be freed by the caller) or NULL in case
1710 * of error.
1711 */
1712xmlChar *
1713xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1714 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001715 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001716 xmlURIPtr ref = NULL;
1717 xmlURIPtr bas = NULL;
1718 xmlURIPtr res = NULL;
1719
1720 /*
1721 * 1) The URI reference is parsed into the potential four components and
1722 * fragment identifier, as described in Section 4.3.
1723 *
1724 * NOTE that a completely empty URI is treated by modern browsers
1725 * as a reference to "." rather than as a synonym for the current
1726 * URI. Should we do that here?
1727 */
1728 if (URI == NULL)
1729 ret = -1;
1730 else {
1731 if (*URI) {
1732 ref = xmlCreateURI();
1733 if (ref == NULL)
1734 goto done;
1735 ret = xmlParseURIReference(ref, (const char *) URI);
1736 }
1737 else
1738 ret = 0;
1739 }
1740 if (ret != 0)
1741 goto done;
1742 if (base == NULL)
1743 ret = -1;
1744 else {
1745 bas = xmlCreateURI();
1746 if (bas == NULL)
1747 goto done;
1748 ret = xmlParseURIReference(bas, (const char *) base);
1749 }
1750 if (ret != 0) {
1751 if (ref)
1752 val = xmlSaveUri(ref);
1753 goto done;
1754 }
1755 if (ref == NULL) {
1756 /*
1757 * the base fragment must be ignored
1758 */
1759 if (bas->fragment != NULL) {
1760 xmlFree(bas->fragment);
1761 bas->fragment = NULL;
1762 }
1763 val = xmlSaveUri(bas);
1764 goto done;
1765 }
1766
1767 /*
1768 * 2) If the path component is empty and the scheme, authority, and
1769 * query components are undefined, then it is a reference to the
1770 * current document and we are done. Otherwise, the reference URI's
1771 * query and fragment components are defined as found (or not found)
1772 * within the URI reference and not inherited from the base URI.
1773 *
1774 * NOTE that in modern browsers, the parsing differs from the above
1775 * in the following aspect: the query component is allowed to be
1776 * defined while still treating this as a reference to the current
1777 * document.
1778 */
1779 res = xmlCreateURI();
1780 if (res == NULL)
1781 goto done;
1782 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1783 ((ref->authority == NULL) && (ref->server == NULL))) {
1784 if (bas->scheme != NULL)
1785 res->scheme = xmlMemStrdup(bas->scheme);
1786 if (bas->authority != NULL)
1787 res->authority = xmlMemStrdup(bas->authority);
1788 else if (bas->server != NULL) {
1789 res->server = xmlMemStrdup(bas->server);
1790 if (bas->user != NULL)
1791 res->user = xmlMemStrdup(bas->user);
1792 res->port = bas->port;
1793 }
1794 if (bas->path != NULL)
1795 res->path = xmlMemStrdup(bas->path);
1796 if (ref->query != NULL)
1797 res->query = xmlMemStrdup(ref->query);
1798 else if (bas->query != NULL)
1799 res->query = xmlMemStrdup(bas->query);
1800 if (ref->fragment != NULL)
1801 res->fragment = xmlMemStrdup(ref->fragment);
1802 goto step_7;
1803 }
1804
1805 if (ref->query != NULL)
1806 res->query = xmlMemStrdup(ref->query);
1807 if (ref->fragment != NULL)
1808 res->fragment = xmlMemStrdup(ref->fragment);
1809
1810 /*
1811 * 3) If the scheme component is defined, indicating that the reference
1812 * starts with a scheme name, then the reference is interpreted as an
1813 * absolute URI and we are done. Otherwise, the reference URI's
1814 * scheme is inherited from the base URI's scheme component.
1815 */
1816 if (ref->scheme != NULL) {
1817 val = xmlSaveUri(ref);
1818 goto done;
1819 }
1820 if (bas->scheme != NULL)
1821 res->scheme = xmlMemStrdup(bas->scheme);
1822
1823 /*
1824 * 4) If the authority component is defined, then the reference is a
1825 * network-path and we skip to step 7. Otherwise, the reference
1826 * URI's authority is inherited from the base URI's authority
1827 * component, which will also be undefined if the URI scheme does not
1828 * use an authority component.
1829 */
1830 if ((ref->authority != NULL) || (ref->server != NULL)) {
1831 if (ref->authority != NULL)
1832 res->authority = xmlMemStrdup(ref->authority);
1833 else {
1834 res->server = xmlMemStrdup(ref->server);
1835 if (ref->user != NULL)
1836 res->user = xmlMemStrdup(ref->user);
1837 res->port = ref->port;
1838 }
1839 if (ref->path != NULL)
1840 res->path = xmlMemStrdup(ref->path);
1841 goto step_7;
1842 }
1843 if (bas->authority != NULL)
1844 res->authority = xmlMemStrdup(bas->authority);
1845 else if (bas->server != NULL) {
1846 res->server = xmlMemStrdup(bas->server);
1847 if (bas->user != NULL)
1848 res->user = xmlMemStrdup(bas->user);
1849 res->port = bas->port;
1850 }
1851
1852 /*
1853 * 5) If the path component begins with a slash character ("/"), then
1854 * the reference is an absolute-path and we skip to step 7.
1855 */
1856 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1857 res->path = xmlMemStrdup(ref->path);
1858 goto step_7;
1859 }
1860
1861
1862 /*
1863 * 6) If this step is reached, then we are resolving a relative-path
1864 * reference. The relative path needs to be merged with the base
1865 * URI's path. Although there are many ways to do this, we will
1866 * describe a simple method using a separate string buffer.
1867 *
1868 * Allocate a buffer large enough for the result string.
1869 */
1870 len = 2; /* extra / and 0 */
1871 if (ref->path != NULL)
1872 len += strlen(ref->path);
1873 if (bas->path != NULL)
1874 len += strlen(bas->path);
1875 res->path = (char *) xmlMalloc(len);
1876 if (res->path == NULL) {
1877 xmlGenericError(xmlGenericErrorContext,
1878 "xmlBuildURI: out of memory\n");
1879 goto done;
1880 }
1881 res->path[0] = 0;
1882
1883 /*
1884 * a) All but the last segment of the base URI's path component is
1885 * copied to the buffer. In other words, any characters after the
1886 * last (right-most) slash character, if any, are excluded.
1887 */
1888 cur = 0;
1889 out = 0;
1890 if (bas->path != NULL) {
1891 while (bas->path[cur] != 0) {
1892 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1893 cur++;
1894 if (bas->path[cur] == 0)
1895 break;
1896
1897 cur++;
1898 while (out < cur) {
1899 res->path[out] = bas->path[out];
1900 out++;
1901 }
1902 }
1903 }
1904 res->path[out] = 0;
1905
1906 /*
1907 * b) The reference's path component is appended to the buffer
1908 * string.
1909 */
1910 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001911 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001912 /*
1913 * Ensure the path includes a '/'
1914 */
1915 if ((out == 0) && (bas->server != NULL))
1916 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001917 while (ref->path[indx] != 0) {
1918 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00001919 }
1920 }
1921 res->path[out] = 0;
1922
1923 /*
1924 * Steps c) to h) are really path normalization steps
1925 */
1926 xmlNormalizeURIPath(res->path);
1927
1928step_7:
1929
1930 /*
1931 * 7) The resulting URI components, including any inherited from the
1932 * base URI, are recombined to give the absolute form of the URI
1933 * reference.
1934 */
1935 val = xmlSaveUri(res);
1936
1937done:
1938 if (ref != NULL)
1939 xmlFreeURI(ref);
1940 if (bas != NULL)
1941 xmlFreeURI(bas);
1942 if (res != NULL)
1943 xmlFreeURI(res);
1944 return(val);
1945}
1946
1947