blob: ef9ca12daf7924ae21a7c7b99ea803e6a336f604 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
49/*
50 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
51 */
52
53#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
54
55/*
56 * alphanum = alpha | digit
57 */
58
59#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
60
61/*
62 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
63 * "a" | "b" | "c" | "d" | "e" | "f"
64 */
65
66#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
67 (((x) >= 'A') && ((x) <= 'F')))
68
69/*
70 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
71 */
72
73#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
74 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
75 ((x) == '(') || ((x) == ')'))
76
77
78/*
79 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
80 */
81
82#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84 ((x) == '+') || ((x) == '$') || ((x) == ','))
85
86/*
87 * unreserved = alphanum | mark
88 */
89
90#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
91
92/*
93 * escaped = "%" hex hex
94 */
95
96#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
97 (IS_HEX((p)[2])))
98
99/*
100 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
101 * "&" | "=" | "+" | "$" | ","
102 */
103#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
104 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
105 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
106 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
107
108/*
109 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
110 */
111#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
112 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
113 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
114 ((*(p) == ',')))
115
116/*
117 * rel_segment = 1*( unreserved | escaped |
118 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
119 */
120
121#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
122 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
123 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
124 ((*(p) == ',')))
125
126/*
127 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
128 */
129
130#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
131 ((x) == '+') || ((x) == '-') || ((x) == '.'))
132
133/*
134 * reg_name = 1*( unreserved | escaped | "$" | "," |
135 * ";" | ":" | "@" | "&" | "=" | "+" )
136 */
137
138#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
139 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
140 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
141 ((*(p) == '=')) || ((*(p) == '+')))
142
143/*
144 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
145 * "+" | "$" | "," )
146 */
147#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
148 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
149 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
150 ((*(p) == ',')))
151
152/*
153 * uric = reserved | unreserved | escaped
154 */
155
156#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
157 (IS_RESERVED(*(p))))
158
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000159/*
160* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
161*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163#define IS_UNWISE(p) \
164 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
165 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
166 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000167
168/*
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * Skip to next pointer char, handle escaped sequences
170 */
171
172#define NEXT(p) ((*p == '%')? p += 3 : p++)
173
174/*
175 * Productions from the spec.
176 *
177 * authority = server | reg_name
178 * reg_name = 1*( unreserved | escaped | "$" | "," |
179 * ";" | ":" | "@" | "&" | "=" | "+" )
180 *
181 * path = [ abs_path | opaque_part ]
182 */
183
184/************************************************************************
185 * *
186 * Generic URI structure functions *
187 * *
188 ************************************************************************/
189
190/**
191 * xmlCreateURI:
192 *
193 * Simply creates an empty xmlURI
194 *
195 * Returns the new structure or NULL in case of error
196 */
197xmlURIPtr
198xmlCreateURI(void) {
199 xmlURIPtr ret;
200
201 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
202 if (ret == NULL) {
203 xmlGenericError(xmlGenericErrorContext,
204 "xmlCreateURI: out of memory\n");
205 return(NULL);
206 }
207 memset(ret, 0, sizeof(xmlURI));
208 return(ret);
209}
210
211/**
212 * xmlSaveUri:
213 * @uri: pointer to an xmlURI
214 *
215 * Save the URI as an escaped string
216 *
217 * Returns a new string (to be deallocated by caller)
218 */
219xmlChar *
220xmlSaveUri(xmlURIPtr uri) {
221 xmlChar *ret = NULL;
222 const char *p;
223 int len;
224 int max;
225
226 if (uri == NULL) return(NULL);
227
228
229 max = 80;
230 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
231 if (ret == NULL) {
232 xmlGenericError(xmlGenericErrorContext,
233 "xmlSaveUri: out of memory\n");
234 return(NULL);
235 }
236 len = 0;
237
238 if (uri->scheme != NULL) {
239 p = uri->scheme;
240 while (*p != 0) {
241 if (len >= max) {
242 max *= 2;
243 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
244 if (ret == NULL) {
245 xmlGenericError(xmlGenericErrorContext,
246 "xmlSaveUri: out of memory\n");
247 return(NULL);
248 }
249 }
250 ret[len++] = *p++;
251 }
252 if (len >= max) {
253 max *= 2;
254 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
255 if (ret == NULL) {
256 xmlGenericError(xmlGenericErrorContext,
257 "xmlSaveUri: out of memory\n");
258 return(NULL);
259 }
260 }
261 ret[len++] = ':';
262 }
263 if (uri->opaque != NULL) {
264 p = uri->opaque;
265 while (*p != 0) {
266 if (len + 3 >= max) {
267 max *= 2;
268 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
269 if (ret == NULL) {
270 xmlGenericError(xmlGenericErrorContext,
271 "xmlSaveUri: out of memory\n");
272 return(NULL);
273 }
274 }
275 if ((IS_UNRESERVED(*(p))) ||
276 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
277 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
278 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
279 ret[len++] = *p++;
280 else {
281 int val = *(unsigned char *)p++;
282 int hi = val / 0x10, lo = val % 0x10;
283 ret[len++] = '%';
284 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
285 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
286 }
287 }
288 if (len >= max) {
289 max *= 2;
290 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
291 if (ret == NULL) {
292 xmlGenericError(xmlGenericErrorContext,
293 "xmlSaveUri: out of memory\n");
294 return(NULL);
295 }
296 }
297 ret[len++] = 0;
298 } else {
299 if (uri->server != NULL) {
300 if (len + 3 >= max) {
301 max *= 2;
302 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
303 if (ret == NULL) {
304 xmlGenericError(xmlGenericErrorContext,
305 "xmlSaveUri: out of memory\n");
306 return(NULL);
307 }
308 }
309 ret[len++] = '/';
310 ret[len++] = '/';
311 if (uri->user != NULL) {
312 p = uri->user;
313 while (*p != 0) {
314 if (len + 3 >= max) {
315 max *= 2;
316 ret = (xmlChar *) xmlRealloc(ret,
317 (max + 1) * sizeof(xmlChar));
318 if (ret == NULL) {
319 xmlGenericError(xmlGenericErrorContext,
320 "xmlSaveUri: out of memory\n");
321 return(NULL);
322 }
323 }
324 if ((IS_UNRESERVED(*(p))) ||
325 ((*(p) == ';')) || ((*(p) == ':')) ||
326 ((*(p) == '&')) || ((*(p) == '=')) ||
327 ((*(p) == '+')) || ((*(p) == '$')) ||
328 ((*(p) == ',')))
329 ret[len++] = *p++;
330 else {
331 int val = *(unsigned char *)p++;
332 int hi = val / 0x10, lo = val % 0x10;
333 ret[len++] = '%';
334 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
335 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
336 }
337 }
338 if (len + 3 >= max) {
339 max *= 2;
340 ret = (xmlChar *) xmlRealloc(ret,
341 (max + 1) * sizeof(xmlChar));
342 if (ret == NULL) {
343 xmlGenericError(xmlGenericErrorContext,
344 "xmlSaveUri: out of memory\n");
345 return(NULL);
346 }
347 }
348 ret[len++] = '@';
349 }
350 p = uri->server;
351 while (*p != 0) {
352 if (len >= max) {
353 max *= 2;
354 ret = (xmlChar *) xmlRealloc(ret,
355 (max + 1) * sizeof(xmlChar));
356 if (ret == NULL) {
357 xmlGenericError(xmlGenericErrorContext,
358 "xmlSaveUri: out of memory\n");
359 return(NULL);
360 }
361 }
362 ret[len++] = *p++;
363 }
364 if (uri->port > 0) {
365 if (len + 10 >= max) {
366 max *= 2;
367 ret = (xmlChar *) xmlRealloc(ret,
368 (max + 1) * sizeof(xmlChar));
369 if (ret == NULL) {
370 xmlGenericError(xmlGenericErrorContext,
371 "xmlSaveUri: out of memory\n");
372 return(NULL);
373 }
374 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000375 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000376 }
377 } else if (uri->authority != NULL) {
378 if (len + 3 >= max) {
379 max *= 2;
380 ret = (xmlChar *) xmlRealloc(ret,
381 (max + 1) * sizeof(xmlChar));
382 if (ret == NULL) {
383 xmlGenericError(xmlGenericErrorContext,
384 "xmlSaveUri: out of memory\n");
385 return(NULL);
386 }
387 }
388 ret[len++] = '/';
389 ret[len++] = '/';
390 p = uri->authority;
391 while (*p != 0) {
392 if (len + 3 >= max) {
393 max *= 2;
394 ret = (xmlChar *) xmlRealloc(ret,
395 (max + 1) * sizeof(xmlChar));
396 if (ret == NULL) {
397 xmlGenericError(xmlGenericErrorContext,
398 "xmlSaveUri: out of memory\n");
399 return(NULL);
400 }
401 }
402 if ((IS_UNRESERVED(*(p))) ||
403 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
404 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
405 ((*(p) == '=')) || ((*(p) == '+')))
406 ret[len++] = *p++;
407 else {
408 int val = *(unsigned char *)p++;
409 int hi = val / 0x10, lo = val % 0x10;
410 ret[len++] = '%';
411 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
412 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
413 }
414 }
415 } else if (uri->scheme != NULL) {
416 if (len + 3 >= max) {
417 max *= 2;
418 ret = (xmlChar *) xmlRealloc(ret,
419 (max + 1) * sizeof(xmlChar));
420 if (ret == NULL) {
421 xmlGenericError(xmlGenericErrorContext,
422 "xmlSaveUri: out of memory\n");
423 return(NULL);
424 }
425 }
426 ret[len++] = '/';
427 ret[len++] = '/';
428 }
429 if (uri->path != NULL) {
430 p = uri->path;
431 while (*p != 0) {
432 if (len + 3 >= max) {
433 max *= 2;
434 ret = (xmlChar *) xmlRealloc(ret,
435 (max + 1) * sizeof(xmlChar));
436 if (ret == NULL) {
437 xmlGenericError(xmlGenericErrorContext,
438 "xmlSaveUri: out of memory\n");
439 return(NULL);
440 }
441 }
442 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
443 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
444 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
445 ((*(p) == ',')))
446 ret[len++] = *p++;
447 else {
448 int val = *(unsigned char *)p++;
449 int hi = val / 0x10, lo = val % 0x10;
450 ret[len++] = '%';
451 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
452 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
453 }
454 }
455 }
456 if (uri->query != NULL) {
457 if (len + 3 >= max) {
458 max *= 2;
459 ret = (xmlChar *) xmlRealloc(ret,
460 (max + 1) * sizeof(xmlChar));
461 if (ret == NULL) {
462 xmlGenericError(xmlGenericErrorContext,
463 "xmlSaveUri: out of memory\n");
464 return(NULL);
465 }
466 }
467 ret[len++] = '?';
468 p = uri->query;
469 while (*p != 0) {
470 if (len + 3 >= max) {
471 max *= 2;
472 ret = (xmlChar *) xmlRealloc(ret,
473 (max + 1) * sizeof(xmlChar));
474 if (ret == NULL) {
475 xmlGenericError(xmlGenericErrorContext,
476 "xmlSaveUri: out of memory\n");
477 return(NULL);
478 }
479 }
480 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
481 ret[len++] = *p++;
482 else {
483 int val = *(unsigned char *)p++;
484 int hi = val / 0x10, lo = val % 0x10;
485 ret[len++] = '%';
486 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
487 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
488 }
489 }
490 }
491 if (uri->fragment != NULL) {
492 if (len + 3 >= max) {
493 max *= 2;
494 ret = (xmlChar *) xmlRealloc(ret,
495 (max + 1) * sizeof(xmlChar));
496 if (ret == NULL) {
497 xmlGenericError(xmlGenericErrorContext,
498 "xmlSaveUri: out of memory\n");
499 return(NULL);
500 }
501 }
502 ret[len++] = '#';
503 p = uri->fragment;
504 while (*p != 0) {
505 if (len + 3 >= max) {
506 max *= 2;
507 ret = (xmlChar *) xmlRealloc(ret,
508 (max + 1) * sizeof(xmlChar));
509 if (ret == NULL) {
510 xmlGenericError(xmlGenericErrorContext,
511 "xmlSaveUri: out of memory\n");
512 return(NULL);
513 }
514 }
515 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
516 ret[len++] = *p++;
517 else {
518 int val = *(unsigned char *)p++;
519 int hi = val / 0x10, lo = val % 0x10;
520 ret[len++] = '%';
521 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
522 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
523 }
524 }
525 }
526 if (len >= max) {
527 max *= 2;
528 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
529 if (ret == NULL) {
530 xmlGenericError(xmlGenericErrorContext,
531 "xmlSaveUri: out of memory\n");
532 return(NULL);
533 }
534 }
535 ret[len++] = 0;
536 }
537 return(ret);
538}
539
540/**
541 * xmlPrintURI:
542 * @stream: a FILE* for the output
543 * @uri: pointer to an xmlURI
544 *
545 * Prints the URI in the stream @steam.
546 */
547void
548xmlPrintURI(FILE *stream, xmlURIPtr uri) {
549 xmlChar *out;
550
551 out = xmlSaveUri(uri);
552 if (out != NULL) {
553 fprintf(stream, "%s", out);
554 xmlFree(out);
555 }
556}
557
558/**
559 * xmlCleanURI:
560 * @uri: pointer to an xmlURI
561 *
562 * Make sure the xmlURI struct is free of content
563 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000564static void
Owen Taylor3473f882001-02-23 17:55:21 +0000565xmlCleanURI(xmlURIPtr uri) {
566 if (uri == NULL) return;
567
568 if (uri->scheme != NULL) xmlFree(uri->scheme);
569 uri->scheme = NULL;
570 if (uri->server != NULL) xmlFree(uri->server);
571 uri->server = NULL;
572 if (uri->user != NULL) xmlFree(uri->user);
573 uri->user = NULL;
574 if (uri->path != NULL) xmlFree(uri->path);
575 uri->path = NULL;
576 if (uri->fragment != NULL) xmlFree(uri->fragment);
577 uri->fragment = NULL;
578 if (uri->opaque != NULL) xmlFree(uri->opaque);
579 uri->opaque = NULL;
580 if (uri->authority != NULL) xmlFree(uri->authority);
581 uri->authority = NULL;
582 if (uri->query != NULL) xmlFree(uri->query);
583 uri->query = NULL;
584}
585
586/**
587 * xmlFreeURI:
588 * @uri: pointer to an xmlURI
589 *
590 * Free up the xmlURI struct
591 */
592void
593xmlFreeURI(xmlURIPtr uri) {
594 if (uri == NULL) return;
595
596 if (uri->scheme != NULL) xmlFree(uri->scheme);
597 if (uri->server != NULL) xmlFree(uri->server);
598 if (uri->user != NULL) xmlFree(uri->user);
599 if (uri->path != NULL) xmlFree(uri->path);
600 if (uri->fragment != NULL) xmlFree(uri->fragment);
601 if (uri->opaque != NULL) xmlFree(uri->opaque);
602 if (uri->authority != NULL) xmlFree(uri->authority);
603 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000604 xmlFree(uri);
605}
606
607/************************************************************************
608 * *
609 * Helper functions *
610 * *
611 ************************************************************************/
612
Owen Taylor3473f882001-02-23 17:55:21 +0000613/**
614 * xmlNormalizeURIPath:
615 * @path: pointer to the path string
616 *
617 * Applies the 5 normalization steps to a path string--that is, RFC 2396
618 * Section 5.2, steps 6.c through 6.g.
619 *
620 * Normalization occurs directly on the string, no new allocation is done
621 *
622 * Returns 0 or an error code
623 */
624int
625xmlNormalizeURIPath(char *path) {
626 char *cur, *out;
627
628 if (path == NULL)
629 return(-1);
630
631 /* Skip all initial "/" chars. We want to get to the beginning of the
632 * first non-empty segment.
633 */
634 cur = path;
635 while (cur[0] == '/')
636 ++cur;
637 if (cur[0] == '\0')
638 return(0);
639
640 /* Keep everything we've seen so far. */
641 out = cur;
642
643 /*
644 * Analyze each segment in sequence for cases (c) and (d).
645 */
646 while (cur[0] != '\0') {
647 /*
648 * c) All occurrences of "./", where "." is a complete path segment,
649 * are removed from the buffer string.
650 */
651 if ((cur[0] == '.') && (cur[1] == '/')) {
652 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000653 /* '//' normalization should be done at this point too */
654 while (cur[0] == '/')
655 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000656 continue;
657 }
658
659 /*
660 * d) If the buffer string ends with "." as a complete path segment,
661 * that "." is removed.
662 */
663 if ((cur[0] == '.') && (cur[1] == '\0'))
664 break;
665
666 /* Otherwise keep the segment. */
667 while (cur[0] != '/') {
668 if (cur[0] == '\0')
669 goto done_cd;
670 (out++)[0] = (cur++)[0];
671 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000672 /* nomalize // */
673 while ((cur[0] == '/') && (cur[1] == '/'))
674 cur++;
675
Owen Taylor3473f882001-02-23 17:55:21 +0000676 (out++)[0] = (cur++)[0];
677 }
678 done_cd:
679 out[0] = '\0';
680
681 /* Reset to the beginning of the first segment for the next sequence. */
682 cur = path;
683 while (cur[0] == '/')
684 ++cur;
685 if (cur[0] == '\0')
686 return(0);
687
688 /*
689 * Analyze each segment in sequence for cases (e) and (f).
690 *
691 * e) All occurrences of "<segment>/../", where <segment> is a
692 * complete path segment not equal to "..", are removed from the
693 * buffer string. Removal of these path segments is performed
694 * iteratively, removing the leftmost matching pattern on each
695 * iteration, until no matching pattern remains.
696 *
697 * f) If the buffer string ends with "<segment>/..", where <segment>
698 * is a complete path segment not equal to "..", that
699 * "<segment>/.." is removed.
700 *
701 * To satisfy the "iterative" clause in (e), we need to collapse the
702 * string every time we find something that needs to be removed. Thus,
703 * we don't need to keep two pointers into the string: we only need a
704 * "current position" pointer.
705 */
706 while (1) {
707 char *segp;
708
709 /* At the beginning of each iteration of this loop, "cur" points to
710 * the first character of the segment we want to examine.
711 */
712
713 /* Find the end of the current segment. */
714 segp = cur;
715 while ((segp[0] != '/') && (segp[0] != '\0'))
716 ++segp;
717
718 /* If this is the last segment, we're done (we need at least two
719 * segments to meet the criteria for the (e) and (f) cases).
720 */
721 if (segp[0] == '\0')
722 break;
723
724 /* If the first segment is "..", or if the next segment _isn't_ "..",
725 * keep this segment and try the next one.
726 */
727 ++segp;
728 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
729 || ((segp[0] != '.') || (segp[1] != '.')
730 || ((segp[2] != '/') && (segp[2] != '\0')))) {
731 cur = segp;
732 continue;
733 }
734
735 /* If we get here, remove this segment and the next one and back up
736 * to the previous segment (if there is one), to implement the
737 * "iteratively" clause. It's pretty much impossible to back up
738 * while maintaining two pointers into the buffer, so just compact
739 * the whole buffer now.
740 */
741
742 /* If this is the end of the buffer, we're done. */
743 if (segp[2] == '\0') {
744 cur[0] = '\0';
745 break;
746 }
747 strcpy(cur, segp + 3);
748
749 /* If there are no previous segments, then keep going from here. */
750 segp = cur;
751 while ((segp > path) && ((--segp)[0] == '/'))
752 ;
753 if (segp == path)
754 continue;
755
756 /* "segp" is pointing to the end of a previous segment; find it's
757 * start. We need to back up to the previous segment and start
758 * over with that to handle things like "foo/bar/../..". If we
759 * don't do this, then on the first pass we'll remove the "bar/..",
760 * but be pointing at the second ".." so we won't realize we can also
761 * remove the "foo/..".
762 */
763 cur = segp;
764 while ((cur > path) && (cur[-1] != '/'))
765 --cur;
766 }
767 out[0] = '\0';
768
769 /*
770 * g) If the resulting buffer string still begins with one or more
771 * complete path segments of "..", then the reference is
772 * considered to be in error. Implementations may handle this
773 * error by retaining these components in the resolved path (i.e.,
774 * treating them as part of the final URI), by removing them from
775 * the resolved path (i.e., discarding relative levels above the
776 * root), or by avoiding traversal of the reference.
777 *
778 * We discard them from the final path.
779 */
780 if (path[0] == '/') {
781 cur = path;
782 while ((cur[1] == '.') && (cur[2] == '.')
783 && ((cur[3] == '/') || (cur[3] == '\0')))
784 cur += 3;
785
786 if (cur != path) {
787 out = path;
788 while (cur[0] != '\0')
789 (out++)[0] = (cur++)[0];
790 out[0] = 0;
791 }
792 }
793
794 return(0);
795}
Owen Taylor3473f882001-02-23 17:55:21 +0000796
797/**
798 * xmlURIUnescapeString:
799 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000800 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000801 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000802 *
803 * Unescaping routine, does not do validity checks !
804 * Output is direct unsigned char translation of %XX values (no encoding)
805 *
806 * Returns an copy of the string, but unescaped
807 */
808char *
809xmlURIUnescapeString(const char *str, int len, char *target) {
810 char *ret, *out;
811 const char *in;
812
813 if (str == NULL)
814 return(NULL);
815 if (len <= 0) len = strlen(str);
816 if (len <= 0) return(NULL);
817
818 if (target == NULL) {
819 ret = (char *) xmlMalloc(len + 1);
820 if (ret == NULL) {
821 xmlGenericError(xmlGenericErrorContext,
822 "xmlURIUnescapeString: out of memory\n");
823 return(NULL);
824 }
825 } else
826 ret = target;
827 in = str;
828 out = ret;
829 while(len > 0) {
830 if (*in == '%') {
831 in++;
832 if ((*in >= '0') && (*in <= '9'))
833 *out = (*in - '0');
834 else if ((*in >= 'a') && (*in <= 'f'))
835 *out = (*in - 'a') + 10;
836 else if ((*in >= 'A') && (*in <= 'F'))
837 *out = (*in - 'A') + 10;
838 in++;
839 if ((*in >= '0') && (*in <= '9'))
840 *out = *out * 16 + (*in - '0');
841 else if ((*in >= 'a') && (*in <= 'f'))
842 *out = *out * 16 + (*in - 'a') + 10;
843 else if ((*in >= 'A') && (*in <= 'F'))
844 *out = *out * 16 + (*in - 'A') + 10;
845 in++;
846 len -= 3;
847 out++;
848 } else {
849 *out++ = *in++;
850 len--;
851 }
852 }
853 *out = 0;
854 return(ret);
855}
856
857/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000858 * xmlURIEscapeStr:
859 * @str: string to escape
860 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000861 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000862 * This routine escapes a string to hex, ignoring reserved characters (a-z)
863 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000864 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000865 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000866 */
867xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000868xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
869 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000870 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000871
Owen Taylor3473f882001-02-23 17:55:21 +0000872 unsigned int len, out;
873
874 if (str == NULL)
875 return(NULL);
876 len = xmlStrlen(str);
877 if (len <= 0) return(NULL);
878
879 len += 20;
880 ret = (xmlChar *) xmlMalloc(len);
881 if (ret == NULL) {
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000884 return(NULL);
885 }
886 in = (const xmlChar *) str;
887 out = 0;
888 while(*in != 0) {
889 if (len - out <= 3) {
890 len += 20;
891 ret = (xmlChar *) xmlRealloc(ret, len);
892 if (ret == NULL) {
893 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000894 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000895 return(NULL);
896 }
897 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000898
899 ch = *in;
900
Daniel Veillardeb475a32002-04-14 22:00:22 +0000901 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000902 unsigned char val;
903 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000904 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000905 if (val <= 9)
906 ret[out++] = '0' + val;
907 else
908 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000909 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000910 if (val <= 9)
911 ret[out++] = '0' + val;
912 else
913 ret[out++] = 'A' + val - 0xA;
914 in++;
915 } else {
916 ret[out++] = *in++;
917 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000918
Owen Taylor3473f882001-02-23 17:55:21 +0000919 }
920 ret[out] = 0;
921 return(ret);
922}
923
Daniel Veillard8514c672001-05-23 10:29:12 +0000924/**
925 * xmlURIEscape:
926 * @str: the string of the URI to escape
927 *
928 * Escaping routine, does not do validity checks !
929 * It will try to escape the chars needing this, but this is heuristic
930 * based it's impossible to be sure.
931 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000932 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000933 *
934 * 25 May 2001
935 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
936 * according to RFC2396.
937 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000938 */
939xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000940xmlURIEscape(const xmlChar * str)
941{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000942 xmlChar *ret, *segment = NULL;
943 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000944 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000945
Daniel Veillard6278fb52001-05-25 07:38:41 +0000946#define NULLCHK(p) if(!p) { \
947 xmlGenericError(xmlGenericErrorContext, \
948 "xmlURIEscape: out of memory\n"); \
949 return NULL; }
950
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000951 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000952 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000953
954 uri = xmlCreateURI();
955 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000956 /*
957 * Allow escaping errors in the unescaped form
958 */
959 uri->cleanup = 1;
960 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000961 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000962 xmlFreeURI(uri);
963 return (NULL);
964 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000965 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000966
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000967 if (!uri)
968 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000969
970 ret = NULL;
971
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000972 if (uri->scheme) {
973 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
974 NULLCHK(segment)
975 ret = xmlStrcat(ret, segment);
976 ret = xmlStrcat(ret, BAD_CAST ":");
977 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000978 }
979
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000980 if (uri->authority) {
981 segment =
982 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
983 NULLCHK(segment)
984 ret = xmlStrcat(ret, BAD_CAST "//");
985 ret = xmlStrcat(ret, segment);
986 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000987 }
988
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000989 if (uri->user) {
990 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
991 NULLCHK(segment)
992 ret = xmlStrcat(ret, segment);
993 ret = xmlStrcat(ret, BAD_CAST "@");
994 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000995 }
996
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000997 if (uri->server) {
998 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
999 NULLCHK(segment)
1000 ret = xmlStrcat(ret, BAD_CAST "//");
1001 ret = xmlStrcat(ret, segment);
1002 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001003 }
1004
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001005 if (uri->port) {
1006 xmlChar port[10];
1007
Daniel Veillard43d3f612001-11-10 11:57:23 +00001008 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001009 ret = xmlStrcat(ret, BAD_CAST ":");
1010 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001011 }
1012
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001013 if (uri->path) {
1014 segment =
1015 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1016 NULLCHK(segment)
1017 ret = xmlStrcat(ret, segment);
1018 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001019 }
1020
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001021 if (uri->query) {
1022 segment =
1023 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1024 NULLCHK(segment)
1025 ret = xmlStrcat(ret, BAD_CAST "?");
1026 ret = xmlStrcat(ret, segment);
1027 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001028 }
1029
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001030 if (uri->opaque) {
1031 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1032 NULLCHK(segment)
1033 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001034 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001035 }
1036
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001037 if (uri->fragment) {
1038 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1039 NULLCHK(segment)
1040 ret = xmlStrcat(ret, BAD_CAST "#");
1041 ret = xmlStrcat(ret, segment);
1042 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001043 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001044
1045 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001046#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001047
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001048 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001049}
1050
Owen Taylor3473f882001-02-23 17:55:21 +00001051/************************************************************************
1052 * *
1053 * Escaped URI parsing *
1054 * *
1055 ************************************************************************/
1056
1057/**
1058 * xmlParseURIFragment:
1059 * @uri: pointer to an URI structure
1060 * @str: pointer to the string to analyze
1061 *
1062 * Parse an URI fragment string and fills in the appropriate fields
1063 * of the @uri structure.
1064 *
1065 * fragment = *uric
1066 *
1067 * Returns 0 or the error code
1068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001070xmlParseURIFragment(xmlURIPtr uri, const char **str)
1071{
Owen Taylor3473f882001-02-23 17:55:21 +00001072 const char *cur = *str;
1073
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001074 if (str == NULL)
1075 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001076
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001077 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1078 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001079 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001080 if (uri->fragment != NULL)
1081 xmlFree(uri->fragment);
1082 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001083 }
1084 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001085 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001086}
1087
1088/**
1089 * xmlParseURIQuery:
1090 * @uri: pointer to an URI structure
1091 * @str: pointer to the string to analyze
1092 *
1093 * Parse the query part of an URI
1094 *
1095 * query = *uric
1096 *
1097 * Returns 0 or the error code
1098 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001099static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001100xmlParseURIQuery(xmlURIPtr uri, const char **str)
1101{
Owen Taylor3473f882001-02-23 17:55:21 +00001102 const char *cur = *str;
1103
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001104 if (str == NULL)
1105 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001106
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001107 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1108 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001109 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001110 if (uri->query != NULL)
1111 xmlFree(uri->query);
1112 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001113 }
1114 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001115 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001116}
1117
1118/**
1119 * xmlParseURIScheme:
1120 * @uri: pointer to an URI structure
1121 * @str: pointer to the string to analyze
1122 *
1123 * Parse an URI scheme
1124 *
1125 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1126 *
1127 * Returns 0 or the error code
1128 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001129static int
Owen Taylor3473f882001-02-23 17:55:21 +00001130xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1131 const char *cur;
1132
1133 if (str == NULL)
1134 return(-1);
1135
1136 cur = *str;
1137 if (!IS_ALPHA(*cur))
1138 return(2);
1139 cur++;
1140 while (IS_SCHEME(*cur)) cur++;
1141 if (uri != NULL) {
1142 if (uri->scheme != NULL) xmlFree(uri->scheme);
1143 /* !!! strndup */
1144 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1145 }
1146 *str = cur;
1147 return(0);
1148}
1149
1150/**
1151 * xmlParseURIOpaquePart:
1152 * @uri: pointer to an URI structure
1153 * @str: pointer to the string to analyze
1154 *
1155 * Parse an URI opaque part
1156 *
1157 * opaque_part = uric_no_slash *uric
1158 *
1159 * Returns 0 or the error code
1160 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001161static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001162xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1163{
Owen Taylor3473f882001-02-23 17:55:21 +00001164 const char *cur;
1165
1166 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001167 return (-1);
1168
Owen Taylor3473f882001-02-23 17:55:21 +00001169 cur = *str;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001170 if (!(IS_URIC_NO_SLASH(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001171 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001172 }
1173 NEXT(cur);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001174 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1175 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001177 if (uri->opaque != NULL)
1178 xmlFree(uri->opaque);
1179 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001180 }
1181 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001182 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001183}
1184
1185/**
1186 * xmlParseURIServer:
1187 * @uri: pointer to an URI structure
1188 * @str: pointer to the string to analyze
1189 *
1190 * Parse a server subpart of an URI, it's a finer grain analysis
1191 * of the authority part.
1192 *
1193 * server = [ [ userinfo "@" ] hostport ]
1194 * userinfo = *( unreserved | escaped |
1195 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1196 * hostport = host [ ":" port ]
1197 * host = hostname | IPv4address
1198 * hostname = *( domainlabel "." ) toplabel [ "." ]
1199 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1200 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1201 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1202 * port = *digit
1203 *
1204 * Returns 0 or the error code
1205 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001206static int
Owen Taylor3473f882001-02-23 17:55:21 +00001207xmlParseURIServer(xmlURIPtr uri, const char **str) {
1208 const char *cur;
1209 const char *host, *tmp;
1210
1211 if (str == NULL)
1212 return(-1);
1213
1214 cur = *str;
1215
1216 /*
1217 * is there an userinfo ?
1218 */
1219 while (IS_USERINFO(cur)) NEXT(cur);
1220 if (*cur == '@') {
1221 if (uri != NULL) {
1222 if (uri->user != NULL) xmlFree(uri->user);
1223 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1224 }
1225 cur++;
1226 } else {
1227 if (uri != NULL) {
1228 if (uri->user != NULL) xmlFree(uri->user);
1229 uri->user = NULL;
1230 }
1231 cur = *str;
1232 }
1233 /*
1234 * This can be empty in the case where there is no server
1235 */
1236 host = cur;
1237 if (*cur == '/') {
1238 if (uri != NULL) {
1239 if (uri->authority != NULL) xmlFree(uri->authority);
1240 uri->authority = NULL;
1241 if (uri->server != NULL) xmlFree(uri->server);
1242 uri->server = NULL;
1243 uri->port = 0;
1244 }
1245 return(0);
1246 }
1247 /*
1248 * host part of hostport can derive either an IPV4 address
1249 * or an unresolved name. Check the IP first, it easier to detect
1250 * errors if wrong one
1251 */
1252 if (IS_DIGIT(*cur)) {
1253 while(IS_DIGIT(*cur)) cur++;
1254 if (*cur != '.')
1255 goto host_name;
1256 cur++;
1257 if (!IS_DIGIT(*cur))
1258 goto host_name;
1259 while(IS_DIGIT(*cur)) cur++;
1260 if (*cur != '.')
1261 goto host_name;
1262 cur++;
1263 if (!IS_DIGIT(*cur))
1264 goto host_name;
1265 while(IS_DIGIT(*cur)) cur++;
1266 if (*cur != '.')
1267 goto host_name;
1268 cur++;
1269 if (!IS_DIGIT(*cur))
1270 goto host_name;
1271 while(IS_DIGIT(*cur)) cur++;
1272 if (uri != NULL) {
1273 if (uri->authority != NULL) xmlFree(uri->authority);
1274 uri->authority = NULL;
1275 if (uri->server != NULL) xmlFree(uri->server);
1276 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1277 }
1278 goto host_done;
1279 }
1280host_name:
1281 /*
1282 * the hostname production as-is is a parser nightmare.
1283 * simplify it to
1284 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1285 * and just make sure the last label starts with a non numeric char.
1286 */
1287 if (!IS_ALPHANUM(*cur))
1288 return(6);
1289 while (IS_ALPHANUM(*cur)) {
1290 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1291 if (*cur == '.')
1292 cur++;
1293 }
1294 tmp = cur;
1295 tmp--;
1296 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1297 tmp++;
1298 if (!IS_ALPHA(*tmp))
1299 return(7);
1300 if (uri != NULL) {
1301 if (uri->authority != NULL) xmlFree(uri->authority);
1302 uri->authority = NULL;
1303 if (uri->server != NULL) xmlFree(uri->server);
1304 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1305 }
1306
1307host_done:
1308
1309 /*
1310 * finish by checking for a port presence.
1311 */
1312 if (*cur == ':') {
1313 cur++;
1314 if (IS_DIGIT(*cur)) {
1315 if (uri != NULL)
1316 uri->port = 0;
1317 while (IS_DIGIT(*cur)) {
1318 if (uri != NULL)
1319 uri->port = uri->port * 10 + (*cur - '0');
1320 cur++;
1321 }
1322 }
1323 }
1324 *str = cur;
1325 return(0);
1326}
1327
1328/**
1329 * xmlParseURIRelSegment:
1330 * @uri: pointer to an URI structure
1331 * @str: pointer to the string to analyze
1332 *
1333 * Parse an URI relative segment
1334 *
1335 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1336 * "+" | "$" | "," )
1337 *
1338 * Returns 0 or the error code
1339 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001340static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001341xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1342{
Owen Taylor3473f882001-02-23 17:55:21 +00001343 const char *cur;
1344
1345 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001346 return (-1);
1347
Owen Taylor3473f882001-02-23 17:55:21 +00001348 cur = *str;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001349 if (!(IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
1350 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001351 }
1352 NEXT(cur);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001353 while (IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1354 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001355 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001356 if (uri->path != NULL)
1357 xmlFree(uri->path);
1358 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001359 }
1360 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001361 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001362}
1363
1364/**
1365 * xmlParseURIPathSegments:
1366 * @uri: pointer to an URI structure
1367 * @str: pointer to the string to analyze
1368 * @slash: should we add a leading slash
1369 *
1370 * Parse an URI set of path segments
1371 *
1372 * path_segments = segment *( "/" segment )
1373 * segment = *pchar *( ";" param )
1374 * param = *pchar
1375 *
1376 * Returns 0 or the error code
1377 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001378static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001379xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1380{
Owen Taylor3473f882001-02-23 17:55:21 +00001381 const char *cur;
1382
1383 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001384 return (-1);
1385
Owen Taylor3473f882001-02-23 17:55:21 +00001386 cur = *str;
1387
1388 do {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001389 while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1390 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001391 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001392 cur++;
1393 while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1394 NEXT(cur);
1395 }
1396 if (*cur != '/')
1397 break;
1398 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001399 } while (1);
1400 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001401 int len, len2 = 0;
1402 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001403
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001404 /*
1405 * Concat the set of path segments to the current path
1406 */
1407 len = cur - *str;
1408 if (slash)
1409 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001410
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001411 if (uri->path != NULL) {
1412 len2 = strlen(uri->path);
1413 len += len2;
1414 }
Owen Taylor3473f882001-02-23 17:55:21 +00001415 path = (char *) xmlMalloc(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001416 if (path == NULL) {
1417 xmlGenericError(xmlGenericErrorContext,
1418 "xmlParseURIPathSegments: out of memory\n");
1419 *str = cur;
1420 return (-1);
1421 }
1422 if (uri->path != NULL)
1423 memcpy(path, uri->path, len2);
1424 if (slash) {
1425 path[len2] = '/';
1426 len2++;
1427 }
1428 path[len2] = 0;
1429 if (cur - *str > 0)
1430 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1431 if (uri->path != NULL)
1432 xmlFree(uri->path);
1433 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001434 }
1435 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001436 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001437}
1438
1439/**
1440 * xmlParseURIAuthority:
1441 * @uri: pointer to an URI structure
1442 * @str: pointer to the string to analyze
1443 *
1444 * Parse the authority part of an URI.
1445 *
1446 * authority = server | reg_name
1447 * server = [ [ userinfo "@" ] hostport ]
1448 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1449 * "@" | "&" | "=" | "+" )
1450 *
1451 * Note : this is completely ambiguous since reg_name is allowed to
1452 * use the full set of chars in use by server:
1453 *
1454 * 3.2.1. Registry-based Naming Authority
1455 *
1456 * The structure of a registry-based naming authority is specific
1457 * to the URI scheme, but constrained to the allowed characters
1458 * for an authority component.
1459 *
1460 * Returns 0 or the error code
1461 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001462static int
Owen Taylor3473f882001-02-23 17:55:21 +00001463xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1464 const char *cur;
1465 int ret;
1466
1467 if (str == NULL)
1468 return(-1);
1469
1470 cur = *str;
1471
1472 /*
1473 * try first to parse it as a server string.
1474 */
1475 ret = xmlParseURIServer(uri, str);
1476 if (ret == 0)
1477 return(0);
1478
1479 /*
1480 * failed, fallback to reg_name
1481 */
1482 if (!IS_REG_NAME(cur)) {
1483 return(5);
1484 }
1485 NEXT(cur);
1486 while (IS_REG_NAME(cur)) NEXT(cur);
1487 if (uri != NULL) {
1488 if (uri->server != NULL) xmlFree(uri->server);
1489 uri->server = NULL;
1490 if (uri->user != NULL) xmlFree(uri->user);
1491 uri->user = NULL;
1492 if (uri->authority != NULL) xmlFree(uri->authority);
1493 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1494 }
1495 *str = cur;
1496 return(0);
1497}
1498
1499/**
1500 * xmlParseURIHierPart:
1501 * @uri: pointer to an URI structure
1502 * @str: pointer to the string to analyze
1503 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001504 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001505 *
1506 * hier_part = ( net_path | abs_path ) [ "?" query ]
1507 * abs_path = "/" path_segments
1508 * net_path = "//" authority [ abs_path ]
1509 *
1510 * Returns 0 or the error code
1511 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001512static int
Owen Taylor3473f882001-02-23 17:55:21 +00001513xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1514 int ret;
1515 const char *cur;
1516
1517 if (str == NULL)
1518 return(-1);
1519
1520 cur = *str;
1521
1522 if ((cur[0] == '/') && (cur[1] == '/')) {
1523 cur += 2;
1524 ret = xmlParseURIAuthority(uri, &cur);
1525 if (ret != 0)
1526 return(ret);
1527 if (cur[0] == '/') {
1528 cur++;
1529 ret = xmlParseURIPathSegments(uri, &cur, 1);
1530 }
1531 } else if (cur[0] == '/') {
1532 cur++;
1533 ret = xmlParseURIPathSegments(uri, &cur, 1);
1534 } else {
1535 return(4);
1536 }
1537 if (ret != 0)
1538 return(ret);
1539 if (*cur == '?') {
1540 cur++;
1541 ret = xmlParseURIQuery(uri, &cur);
1542 if (ret != 0)
1543 return(ret);
1544 }
1545 *str = cur;
1546 return(0);
1547}
1548
1549/**
1550 * xmlParseAbsoluteURI:
1551 * @uri: pointer to an URI structure
1552 * @str: pointer to the string to analyze
1553 *
1554 * Parse an URI reference string and fills in the appropriate fields
1555 * of the @uri structure
1556 *
1557 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1558 *
1559 * Returns 0 or the error code
1560 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001561static int
Owen Taylor3473f882001-02-23 17:55:21 +00001562xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1563 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001564 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001565
1566 if (str == NULL)
1567 return(-1);
1568
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001569 cur = *str;
1570
Owen Taylor3473f882001-02-23 17:55:21 +00001571 ret = xmlParseURIScheme(uri, str);
1572 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001573 if (**str != ':') {
1574 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001575 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001576 }
Owen Taylor3473f882001-02-23 17:55:21 +00001577 (*str)++;
1578 if (**str == '/')
1579 return(xmlParseURIHierPart(uri, str));
1580 return(xmlParseURIOpaquePart(uri, str));
1581}
1582
1583/**
1584 * xmlParseRelativeURI:
1585 * @uri: pointer to an URI structure
1586 * @str: pointer to the string to analyze
1587 *
1588 * Parse an relative URI string and fills in the appropriate fields
1589 * of the @uri structure
1590 *
1591 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1592 * abs_path = "/" path_segments
1593 * net_path = "//" authority [ abs_path ]
1594 * rel_path = rel_segment [ abs_path ]
1595 *
1596 * Returns 0 or the error code
1597 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001598static int
Owen Taylor3473f882001-02-23 17:55:21 +00001599xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1600 int ret = 0;
1601 const char *cur;
1602
1603 if (str == NULL)
1604 return(-1);
1605
1606 cur = *str;
1607 if ((cur[0] == '/') && (cur[1] == '/')) {
1608 cur += 2;
1609 ret = xmlParseURIAuthority(uri, &cur);
1610 if (ret != 0)
1611 return(ret);
1612 if (cur[0] == '/') {
1613 cur++;
1614 ret = xmlParseURIPathSegments(uri, &cur, 1);
1615 }
1616 } else if (cur[0] == '/') {
1617 cur++;
1618 ret = xmlParseURIPathSegments(uri, &cur, 1);
1619 } else if (cur[0] != '#' && cur[0] != '?') {
1620 ret = xmlParseURIRelSegment(uri, &cur);
1621 if (ret != 0)
1622 return(ret);
1623 if (cur[0] == '/') {
1624 cur++;
1625 ret = xmlParseURIPathSegments(uri, &cur, 1);
1626 }
1627 }
1628 if (ret != 0)
1629 return(ret);
1630 if (*cur == '?') {
1631 cur++;
1632 ret = xmlParseURIQuery(uri, &cur);
1633 if (ret != 0)
1634 return(ret);
1635 }
1636 *str = cur;
1637 return(ret);
1638}
1639
1640/**
1641 * xmlParseURIReference:
1642 * @uri: pointer to an URI structure
1643 * @str: the string to analyze
1644 *
1645 * Parse an URI reference string and fills in the appropriate fields
1646 * of the @uri structure
1647 *
1648 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1649 *
1650 * Returns 0 or the error code
1651 */
1652int
1653xmlParseURIReference(xmlURIPtr uri, const char *str) {
1654 int ret;
1655 const char *tmp = str;
1656
1657 if (str == NULL)
1658 return(-1);
1659 xmlCleanURI(uri);
1660
1661 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001662 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001663 * it fails.
1664 */
1665 ret = xmlParseAbsoluteURI(uri, &str);
1666 if (ret != 0) {
1667 xmlCleanURI(uri);
1668 str = tmp;
1669 ret = xmlParseRelativeURI(uri, &str);
1670 }
1671 if (ret != 0) {
1672 xmlCleanURI(uri);
1673 return(ret);
1674 }
1675
1676 if (*str == '#') {
1677 str++;
1678 ret = xmlParseURIFragment(uri, &str);
1679 if (ret != 0) return(ret);
1680 }
1681 if (*str != 0) {
1682 xmlCleanURI(uri);
1683 return(1);
1684 }
1685 return(0);
1686}
1687
1688/**
1689 * xmlParseURI:
1690 * @str: the URI string to analyze
1691 *
1692 * Parse an URI
1693 *
1694 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1695 *
1696 * Returns a newly build xmlURIPtr or NULL in case of error
1697 */
1698xmlURIPtr
1699xmlParseURI(const char *str) {
1700 xmlURIPtr uri;
1701 int ret;
1702
1703 if (str == NULL)
1704 return(NULL);
1705 uri = xmlCreateURI();
1706 if (uri != NULL) {
1707 ret = xmlParseURIReference(uri, str);
1708 if (ret) {
1709 xmlFreeURI(uri);
1710 return(NULL);
1711 }
1712 }
1713 return(uri);
1714}
1715
1716/************************************************************************
1717 * *
1718 * Public functions *
1719 * *
1720 ************************************************************************/
1721
1722/**
1723 * xmlBuildURI:
1724 * @URI: the URI instance found in the document
1725 * @base: the base value
1726 *
1727 * Computes he final URI of the reference done by checking that
1728 * the given URI is valid, and building the final URI using the
1729 * base URI. This is processed according to section 5.2 of the
1730 * RFC 2396
1731 *
1732 * 5.2. Resolving Relative References to Absolute Form
1733 *
1734 * Returns a new URI string (to be freed by the caller) or NULL in case
1735 * of error.
1736 */
1737xmlChar *
1738xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1739 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001740 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001741 xmlURIPtr ref = NULL;
1742 xmlURIPtr bas = NULL;
1743 xmlURIPtr res = NULL;
1744
1745 /*
1746 * 1) The URI reference is parsed into the potential four components and
1747 * fragment identifier, as described in Section 4.3.
1748 *
1749 * NOTE that a completely empty URI is treated by modern browsers
1750 * as a reference to "." rather than as a synonym for the current
1751 * URI. Should we do that here?
1752 */
1753 if (URI == NULL)
1754 ret = -1;
1755 else {
1756 if (*URI) {
1757 ref = xmlCreateURI();
1758 if (ref == NULL)
1759 goto done;
1760 ret = xmlParseURIReference(ref, (const char *) URI);
1761 }
1762 else
1763 ret = 0;
1764 }
1765 if (ret != 0)
1766 goto done;
1767 if (base == NULL)
1768 ret = -1;
1769 else {
1770 bas = xmlCreateURI();
1771 if (bas == NULL)
1772 goto done;
1773 ret = xmlParseURIReference(bas, (const char *) base);
1774 }
1775 if (ret != 0) {
1776 if (ref)
1777 val = xmlSaveUri(ref);
1778 goto done;
1779 }
1780 if (ref == NULL) {
1781 /*
1782 * the base fragment must be ignored
1783 */
1784 if (bas->fragment != NULL) {
1785 xmlFree(bas->fragment);
1786 bas->fragment = NULL;
1787 }
1788 val = xmlSaveUri(bas);
1789 goto done;
1790 }
1791
1792 /*
1793 * 2) If the path component is empty and the scheme, authority, and
1794 * query components are undefined, then it is a reference to the
1795 * current document and we are done. Otherwise, the reference URI's
1796 * query and fragment components are defined as found (or not found)
1797 * within the URI reference and not inherited from the base URI.
1798 *
1799 * NOTE that in modern browsers, the parsing differs from the above
1800 * in the following aspect: the query component is allowed to be
1801 * defined while still treating this as a reference to the current
1802 * document.
1803 */
1804 res = xmlCreateURI();
1805 if (res == NULL)
1806 goto done;
1807 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1808 ((ref->authority == NULL) && (ref->server == NULL))) {
1809 if (bas->scheme != NULL)
1810 res->scheme = xmlMemStrdup(bas->scheme);
1811 if (bas->authority != NULL)
1812 res->authority = xmlMemStrdup(bas->authority);
1813 else if (bas->server != NULL) {
1814 res->server = xmlMemStrdup(bas->server);
1815 if (bas->user != NULL)
1816 res->user = xmlMemStrdup(bas->user);
1817 res->port = bas->port;
1818 }
1819 if (bas->path != NULL)
1820 res->path = xmlMemStrdup(bas->path);
1821 if (ref->query != NULL)
1822 res->query = xmlMemStrdup(ref->query);
1823 else if (bas->query != NULL)
1824 res->query = xmlMemStrdup(bas->query);
1825 if (ref->fragment != NULL)
1826 res->fragment = xmlMemStrdup(ref->fragment);
1827 goto step_7;
1828 }
1829
1830 if (ref->query != NULL)
1831 res->query = xmlMemStrdup(ref->query);
1832 if (ref->fragment != NULL)
1833 res->fragment = xmlMemStrdup(ref->fragment);
1834
1835 /*
1836 * 3) If the scheme component is defined, indicating that the reference
1837 * starts with a scheme name, then the reference is interpreted as an
1838 * absolute URI and we are done. Otherwise, the reference URI's
1839 * scheme is inherited from the base URI's scheme component.
1840 */
1841 if (ref->scheme != NULL) {
1842 val = xmlSaveUri(ref);
1843 goto done;
1844 }
1845 if (bas->scheme != NULL)
1846 res->scheme = xmlMemStrdup(bas->scheme);
1847
1848 /*
1849 * 4) If the authority component is defined, then the reference is a
1850 * network-path and we skip to step 7. Otherwise, the reference
1851 * URI's authority is inherited from the base URI's authority
1852 * component, which will also be undefined if the URI scheme does not
1853 * use an authority component.
1854 */
1855 if ((ref->authority != NULL) || (ref->server != NULL)) {
1856 if (ref->authority != NULL)
1857 res->authority = xmlMemStrdup(ref->authority);
1858 else {
1859 res->server = xmlMemStrdup(ref->server);
1860 if (ref->user != NULL)
1861 res->user = xmlMemStrdup(ref->user);
1862 res->port = ref->port;
1863 }
1864 if (ref->path != NULL)
1865 res->path = xmlMemStrdup(ref->path);
1866 goto step_7;
1867 }
1868 if (bas->authority != NULL)
1869 res->authority = xmlMemStrdup(bas->authority);
1870 else if (bas->server != NULL) {
1871 res->server = xmlMemStrdup(bas->server);
1872 if (bas->user != NULL)
1873 res->user = xmlMemStrdup(bas->user);
1874 res->port = bas->port;
1875 }
1876
1877 /*
1878 * 5) If the path component begins with a slash character ("/"), then
1879 * the reference is an absolute-path and we skip to step 7.
1880 */
1881 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1882 res->path = xmlMemStrdup(ref->path);
1883 goto step_7;
1884 }
1885
1886
1887 /*
1888 * 6) If this step is reached, then we are resolving a relative-path
1889 * reference. The relative path needs to be merged with the base
1890 * URI's path. Although there are many ways to do this, we will
1891 * describe a simple method using a separate string buffer.
1892 *
1893 * Allocate a buffer large enough for the result string.
1894 */
1895 len = 2; /* extra / and 0 */
1896 if (ref->path != NULL)
1897 len += strlen(ref->path);
1898 if (bas->path != NULL)
1899 len += strlen(bas->path);
1900 res->path = (char *) xmlMalloc(len);
1901 if (res->path == NULL) {
1902 xmlGenericError(xmlGenericErrorContext,
1903 "xmlBuildURI: out of memory\n");
1904 goto done;
1905 }
1906 res->path[0] = 0;
1907
1908 /*
1909 * a) All but the last segment of the base URI's path component is
1910 * copied to the buffer. In other words, any characters after the
1911 * last (right-most) slash character, if any, are excluded.
1912 */
1913 cur = 0;
1914 out = 0;
1915 if (bas->path != NULL) {
1916 while (bas->path[cur] != 0) {
1917 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1918 cur++;
1919 if (bas->path[cur] == 0)
1920 break;
1921
1922 cur++;
1923 while (out < cur) {
1924 res->path[out] = bas->path[out];
1925 out++;
1926 }
1927 }
1928 }
1929 res->path[out] = 0;
1930
1931 /*
1932 * b) The reference's path component is appended to the buffer
1933 * string.
1934 */
1935 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001936 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001937 /*
1938 * Ensure the path includes a '/'
1939 */
1940 if ((out == 0) && (bas->server != NULL))
1941 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001942 while (ref->path[indx] != 0) {
1943 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00001944 }
1945 }
1946 res->path[out] = 0;
1947
1948 /*
1949 * Steps c) to h) are really path normalization steps
1950 */
1951 xmlNormalizeURIPath(res->path);
1952
1953step_7:
1954
1955 /*
1956 * 7) The resulting URI components, including any inherited from the
1957 * base URI, are recombined to give the absolute form of the URI
1958 * reference.
1959 */
1960 val = xmlSaveUri(res);
1961
1962done:
1963 if (ref != NULL)
1964 xmlFreeURI(ref);
1965 if (bas != NULL)
1966 xmlFreeURI(bas);
1967 if (res != NULL)
1968 xmlFreeURI(res);
1969 return(val);
1970}
1971
1972