blob: a20eec98f3261d1d9b83b6f540a6c97e99e485cb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#include <string.h>
14
15#include <libxml/xmlmemory.h>
16#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000017#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <libxml/xmlerror.h>
19
20/************************************************************************
21 * *
22 * Macros to differenciate various character type *
23 * directly extracted from RFC 2396 *
24 * *
25 ************************************************************************/
26
27/*
28 * alpha = lowalpha | upalpha
29 */
30#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
31
32
33/*
34 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
35 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
36 * "u" | "v" | "w" | "x" | "y" | "z"
37 */
38
39#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
40
41/*
42 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
43 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
44 * "U" | "V" | "W" | "X" | "Y" | "Z"
45 */
46#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
47
48/*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
51
52#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
53
54/*
55 * alphanum = alpha | digit
56 */
57
58#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
59
60/*
61 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
62 * "a" | "b" | "c" | "d" | "e" | "f"
63 */
64
65#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
66 (((x) >= 'A') && ((x) <= 'F')))
67
68/*
69 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
70 */
71
72#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
73 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
74 ((x) == '(') || ((x) == ')'))
75
76
77/*
78 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
79 */
80
81#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
82 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
83 ((x) == '+') || ((x) == '$') || ((x) == ','))
84
85/*
86 * unreserved = alphanum | mark
87 */
88
89#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91/*
92 * escaped = "%" hex hex
93 */
94
95#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
96 (IS_HEX((p)[2])))
97
98/*
99 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
100 * "&" | "=" | "+" | "$" | ","
101 */
102#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
103 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
104 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
105 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
106
107/*
108 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
109 */
110#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
111 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
112 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
113 ((*(p) == ',')))
114
115/*
116 * rel_segment = 1*( unreserved | escaped |
117 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
118 */
119
120#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
121 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
122 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
123 ((*(p) == ',')))
124
125/*
126 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
127 */
128
129#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
130 ((x) == '+') || ((x) == '-') || ((x) == '.'))
131
132/*
133 * reg_name = 1*( unreserved | escaped | "$" | "," |
134 * ";" | ":" | "@" | "&" | "=" | "+" )
135 */
136
137#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
138 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
139 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
140 ((*(p) == '=')) || ((*(p) == '+')))
141
142/*
143 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
144 * "+" | "$" | "," )
145 */
146#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
147 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
148 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
149 ((*(p) == ',')))
150
151/*
152 * uric = reserved | unreserved | escaped
153 */
154
155#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
156 (IS_RESERVED(*(p))))
157
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000158/*
159* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
160*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000161
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000162#define IS_UNWISE(p) \
163 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
164 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
165 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
167/*
Owen Taylor3473f882001-02-23 17:55:21 +0000168 * Skip to next pointer char, handle escaped sequences
169 */
170
171#define NEXT(p) ((*p == '%')? p += 3 : p++)
172
173/*
174 * Productions from the spec.
175 *
176 * authority = server | reg_name
177 * reg_name = 1*( unreserved | escaped | "$" | "," |
178 * ";" | ":" | "@" | "&" | "=" | "+" )
179 *
180 * path = [ abs_path | opaque_part ]
181 */
182
183/************************************************************************
184 * *
185 * Generic URI structure functions *
186 * *
187 ************************************************************************/
188
189/**
190 * xmlCreateURI:
191 *
192 * Simply creates an empty xmlURI
193 *
194 * Returns the new structure or NULL in case of error
195 */
196xmlURIPtr
197xmlCreateURI(void) {
198 xmlURIPtr ret;
199
200 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
201 if (ret == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "xmlCreateURI: out of memory\n");
204 return(NULL);
205 }
206 memset(ret, 0, sizeof(xmlURI));
207 return(ret);
208}
209
210/**
211 * xmlSaveUri:
212 * @uri: pointer to an xmlURI
213 *
214 * Save the URI as an escaped string
215 *
216 * Returns a new string (to be deallocated by caller)
217 */
218xmlChar *
219xmlSaveUri(xmlURIPtr uri) {
220 xmlChar *ret = NULL;
221 const char *p;
222 int len;
223 int max;
224
225 if (uri == NULL) return(NULL);
226
227
228 max = 80;
229 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
230 if (ret == NULL) {
231 xmlGenericError(xmlGenericErrorContext,
232 "xmlSaveUri: out of memory\n");
233 return(NULL);
234 }
235 len = 0;
236
237 if (uri->scheme != NULL) {
238 p = uri->scheme;
239 while (*p != 0) {
240 if (len >= max) {
241 max *= 2;
242 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
243 if (ret == NULL) {
244 xmlGenericError(xmlGenericErrorContext,
245 "xmlSaveUri: out of memory\n");
246 return(NULL);
247 }
248 }
249 ret[len++] = *p++;
250 }
251 if (len >= max) {
252 max *= 2;
253 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
254 if (ret == NULL) {
255 xmlGenericError(xmlGenericErrorContext,
256 "xmlSaveUri: out of memory\n");
257 return(NULL);
258 }
259 }
260 ret[len++] = ':';
261 }
262 if (uri->opaque != NULL) {
263 p = uri->opaque;
264 while (*p != 0) {
265 if (len + 3 >= max) {
266 max *= 2;
267 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
268 if (ret == NULL) {
269 xmlGenericError(xmlGenericErrorContext,
270 "xmlSaveUri: out of memory\n");
271 return(NULL);
272 }
273 }
274 if ((IS_UNRESERVED(*(p))) ||
275 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
276 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
277 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
278 ret[len++] = *p++;
279 else {
280 int val = *(unsigned char *)p++;
281 int hi = val / 0x10, lo = val % 0x10;
282 ret[len++] = '%';
283 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
284 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
285 }
286 }
287 if (len >= max) {
288 max *= 2;
289 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
290 if (ret == NULL) {
291 xmlGenericError(xmlGenericErrorContext,
292 "xmlSaveUri: out of memory\n");
293 return(NULL);
294 }
295 }
296 ret[len++] = 0;
297 } else {
298 if (uri->server != NULL) {
299 if (len + 3 >= max) {
300 max *= 2;
301 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
302 if (ret == NULL) {
303 xmlGenericError(xmlGenericErrorContext,
304 "xmlSaveUri: out of memory\n");
305 return(NULL);
306 }
307 }
308 ret[len++] = '/';
309 ret[len++] = '/';
310 if (uri->user != NULL) {
311 p = uri->user;
312 while (*p != 0) {
313 if (len + 3 >= max) {
314 max *= 2;
315 ret = (xmlChar *) xmlRealloc(ret,
316 (max + 1) * sizeof(xmlChar));
317 if (ret == NULL) {
318 xmlGenericError(xmlGenericErrorContext,
319 "xmlSaveUri: out of memory\n");
320 return(NULL);
321 }
322 }
323 if ((IS_UNRESERVED(*(p))) ||
324 ((*(p) == ';')) || ((*(p) == ':')) ||
325 ((*(p) == '&')) || ((*(p) == '=')) ||
326 ((*(p) == '+')) || ((*(p) == '$')) ||
327 ((*(p) == ',')))
328 ret[len++] = *p++;
329 else {
330 int val = *(unsigned char *)p++;
331 int hi = val / 0x10, lo = val % 0x10;
332 ret[len++] = '%';
333 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
334 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
335 }
336 }
337 if (len + 3 >= max) {
338 max *= 2;
339 ret = (xmlChar *) xmlRealloc(ret,
340 (max + 1) * sizeof(xmlChar));
341 if (ret == NULL) {
342 xmlGenericError(xmlGenericErrorContext,
343 "xmlSaveUri: out of memory\n");
344 return(NULL);
345 }
346 }
347 ret[len++] = '@';
348 }
349 p = uri->server;
350 while (*p != 0) {
351 if (len >= max) {
352 max *= 2;
353 ret = (xmlChar *) xmlRealloc(ret,
354 (max + 1) * sizeof(xmlChar));
355 if (ret == NULL) {
356 xmlGenericError(xmlGenericErrorContext,
357 "xmlSaveUri: out of memory\n");
358 return(NULL);
359 }
360 }
361 ret[len++] = *p++;
362 }
363 if (uri->port > 0) {
364 if (len + 10 >= max) {
365 max *= 2;
366 ret = (xmlChar *) xmlRealloc(ret,
367 (max + 1) * sizeof(xmlChar));
368 if (ret == NULL) {
369 xmlGenericError(xmlGenericErrorContext,
370 "xmlSaveUri: out of memory\n");
371 return(NULL);
372 }
373 }
374 len += sprintf((char *) &ret[len], ":%d", uri->port);
375 }
376 } else if (uri->authority != NULL) {
377 if (len + 3 >= max) {
378 max *= 2;
379 ret = (xmlChar *) xmlRealloc(ret,
380 (max + 1) * sizeof(xmlChar));
381 if (ret == NULL) {
382 xmlGenericError(xmlGenericErrorContext,
383 "xmlSaveUri: out of memory\n");
384 return(NULL);
385 }
386 }
387 ret[len++] = '/';
388 ret[len++] = '/';
389 p = uri->authority;
390 while (*p != 0) {
391 if (len + 3 >= max) {
392 max *= 2;
393 ret = (xmlChar *) xmlRealloc(ret,
394 (max + 1) * sizeof(xmlChar));
395 if (ret == NULL) {
396 xmlGenericError(xmlGenericErrorContext,
397 "xmlSaveUri: out of memory\n");
398 return(NULL);
399 }
400 }
401 if ((IS_UNRESERVED(*(p))) ||
402 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
403 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
404 ((*(p) == '=')) || ((*(p) == '+')))
405 ret[len++] = *p++;
406 else {
407 int val = *(unsigned char *)p++;
408 int hi = val / 0x10, lo = val % 0x10;
409 ret[len++] = '%';
410 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
411 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
412 }
413 }
414 } else if (uri->scheme != NULL) {
415 if (len + 3 >= max) {
416 max *= 2;
417 ret = (xmlChar *) xmlRealloc(ret,
418 (max + 1) * sizeof(xmlChar));
419 if (ret == NULL) {
420 xmlGenericError(xmlGenericErrorContext,
421 "xmlSaveUri: out of memory\n");
422 return(NULL);
423 }
424 }
425 ret[len++] = '/';
426 ret[len++] = '/';
427 }
428 if (uri->path != NULL) {
429 p = uri->path;
430 while (*p != 0) {
431 if (len + 3 >= max) {
432 max *= 2;
433 ret = (xmlChar *) xmlRealloc(ret,
434 (max + 1) * sizeof(xmlChar));
435 if (ret == NULL) {
436 xmlGenericError(xmlGenericErrorContext,
437 "xmlSaveUri: out of memory\n");
438 return(NULL);
439 }
440 }
441 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
442 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
443 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
444 ((*(p) == ',')))
445 ret[len++] = *p++;
446 else {
447 int val = *(unsigned char *)p++;
448 int hi = val / 0x10, lo = val % 0x10;
449 ret[len++] = '%';
450 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
451 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
452 }
453 }
454 }
455 if (uri->query != NULL) {
456 if (len + 3 >= max) {
457 max *= 2;
458 ret = (xmlChar *) xmlRealloc(ret,
459 (max + 1) * sizeof(xmlChar));
460 if (ret == NULL) {
461 xmlGenericError(xmlGenericErrorContext,
462 "xmlSaveUri: out of memory\n");
463 return(NULL);
464 }
465 }
466 ret[len++] = '?';
467 p = uri->query;
468 while (*p != 0) {
469 if (len + 3 >= max) {
470 max *= 2;
471 ret = (xmlChar *) xmlRealloc(ret,
472 (max + 1) * sizeof(xmlChar));
473 if (ret == NULL) {
474 xmlGenericError(xmlGenericErrorContext,
475 "xmlSaveUri: out of memory\n");
476 return(NULL);
477 }
478 }
479 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
480 ret[len++] = *p++;
481 else {
482 int val = *(unsigned char *)p++;
483 int hi = val / 0x10, lo = val % 0x10;
484 ret[len++] = '%';
485 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
486 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
487 }
488 }
489 }
490 if (uri->fragment != NULL) {
491 if (len + 3 >= max) {
492 max *= 2;
493 ret = (xmlChar *) xmlRealloc(ret,
494 (max + 1) * sizeof(xmlChar));
495 if (ret == NULL) {
496 xmlGenericError(xmlGenericErrorContext,
497 "xmlSaveUri: out of memory\n");
498 return(NULL);
499 }
500 }
501 ret[len++] = '#';
502 p = uri->fragment;
503 while (*p != 0) {
504 if (len + 3 >= max) {
505 max *= 2;
506 ret = (xmlChar *) xmlRealloc(ret,
507 (max + 1) * sizeof(xmlChar));
508 if (ret == NULL) {
509 xmlGenericError(xmlGenericErrorContext,
510 "xmlSaveUri: out of memory\n");
511 return(NULL);
512 }
513 }
514 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
515 ret[len++] = *p++;
516 else {
517 int val = *(unsigned char *)p++;
518 int hi = val / 0x10, lo = val % 0x10;
519 ret[len++] = '%';
520 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
521 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
522 }
523 }
524 }
525 if (len >= max) {
526 max *= 2;
527 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
528 if (ret == NULL) {
529 xmlGenericError(xmlGenericErrorContext,
530 "xmlSaveUri: out of memory\n");
531 return(NULL);
532 }
533 }
534 ret[len++] = 0;
535 }
536 return(ret);
537}
538
539/**
540 * xmlPrintURI:
541 * @stream: a FILE* for the output
542 * @uri: pointer to an xmlURI
543 *
544 * Prints the URI in the stream @steam.
545 */
546void
547xmlPrintURI(FILE *stream, xmlURIPtr uri) {
548 xmlChar *out;
549
550 out = xmlSaveUri(uri);
551 if (out != NULL) {
552 fprintf(stream, "%s", out);
553 xmlFree(out);
554 }
555}
556
557/**
558 * xmlCleanURI:
559 * @uri: pointer to an xmlURI
560 *
561 * Make sure the xmlURI struct is free of content
562 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000563static void
Owen Taylor3473f882001-02-23 17:55:21 +0000564xmlCleanURI(xmlURIPtr uri) {
565 if (uri == NULL) return;
566
567 if (uri->scheme != NULL) xmlFree(uri->scheme);
568 uri->scheme = NULL;
569 if (uri->server != NULL) xmlFree(uri->server);
570 uri->server = NULL;
571 if (uri->user != NULL) xmlFree(uri->user);
572 uri->user = NULL;
573 if (uri->path != NULL) xmlFree(uri->path);
574 uri->path = NULL;
575 if (uri->fragment != NULL) xmlFree(uri->fragment);
576 uri->fragment = NULL;
577 if (uri->opaque != NULL) xmlFree(uri->opaque);
578 uri->opaque = NULL;
579 if (uri->authority != NULL) xmlFree(uri->authority);
580 uri->authority = NULL;
581 if (uri->query != NULL) xmlFree(uri->query);
582 uri->query = NULL;
583}
584
585/**
586 * xmlFreeURI:
587 * @uri: pointer to an xmlURI
588 *
589 * Free up the xmlURI struct
590 */
591void
592xmlFreeURI(xmlURIPtr uri) {
593 if (uri == NULL) return;
594
595 if (uri->scheme != NULL) xmlFree(uri->scheme);
596 if (uri->server != NULL) xmlFree(uri->server);
597 if (uri->user != NULL) xmlFree(uri->user);
598 if (uri->path != NULL) xmlFree(uri->path);
599 if (uri->fragment != NULL) xmlFree(uri->fragment);
600 if (uri->opaque != NULL) xmlFree(uri->opaque);
601 if (uri->authority != NULL) xmlFree(uri->authority);
602 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000603 xmlFree(uri);
604}
605
606/************************************************************************
607 * *
608 * Helper functions *
609 * *
610 ************************************************************************/
611
Owen Taylor3473f882001-02-23 17:55:21 +0000612/**
613 * xmlNormalizeURIPath:
614 * @path: pointer to the path string
615 *
616 * Applies the 5 normalization steps to a path string--that is, RFC 2396
617 * Section 5.2, steps 6.c through 6.g.
618 *
619 * Normalization occurs directly on the string, no new allocation is done
620 *
621 * Returns 0 or an error code
622 */
623int
624xmlNormalizeURIPath(char *path) {
625 char *cur, *out;
626
627 if (path == NULL)
628 return(-1);
629
630 /* Skip all initial "/" chars. We want to get to the beginning of the
631 * first non-empty segment.
632 */
633 cur = path;
634 while (cur[0] == '/')
635 ++cur;
636 if (cur[0] == '\0')
637 return(0);
638
639 /* Keep everything we've seen so far. */
640 out = cur;
641
642 /*
643 * Analyze each segment in sequence for cases (c) and (d).
644 */
645 while (cur[0] != '\0') {
646 /*
647 * c) All occurrences of "./", where "." is a complete path segment,
648 * are removed from the buffer string.
649 */
650 if ((cur[0] == '.') && (cur[1] == '/')) {
651 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000652 /* '//' normalization should be done at this point too */
653 while (cur[0] == '/')
654 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000655 continue;
656 }
657
658 /*
659 * d) If the buffer string ends with "." as a complete path segment,
660 * that "." is removed.
661 */
662 if ((cur[0] == '.') && (cur[1] == '\0'))
663 break;
664
665 /* Otherwise keep the segment. */
666 while (cur[0] != '/') {
667 if (cur[0] == '\0')
668 goto done_cd;
669 (out++)[0] = (cur++)[0];
670 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000671 /* nomalize // */
672 while ((cur[0] == '/') && (cur[1] == '/'))
673 cur++;
674
Owen Taylor3473f882001-02-23 17:55:21 +0000675 (out++)[0] = (cur++)[0];
676 }
677 done_cd:
678 out[0] = '\0';
679
680 /* Reset to the beginning of the first segment for the next sequence. */
681 cur = path;
682 while (cur[0] == '/')
683 ++cur;
684 if (cur[0] == '\0')
685 return(0);
686
687 /*
688 * Analyze each segment in sequence for cases (e) and (f).
689 *
690 * e) All occurrences of "<segment>/../", where <segment> is a
691 * complete path segment not equal to "..", are removed from the
692 * buffer string. Removal of these path segments is performed
693 * iteratively, removing the leftmost matching pattern on each
694 * iteration, until no matching pattern remains.
695 *
696 * f) If the buffer string ends with "<segment>/..", where <segment>
697 * is a complete path segment not equal to "..", that
698 * "<segment>/.." is removed.
699 *
700 * To satisfy the "iterative" clause in (e), we need to collapse the
701 * string every time we find something that needs to be removed. Thus,
702 * we don't need to keep two pointers into the string: we only need a
703 * "current position" pointer.
704 */
705 while (1) {
706 char *segp;
707
708 /* At the beginning of each iteration of this loop, "cur" points to
709 * the first character of the segment we want to examine.
710 */
711
712 /* Find the end of the current segment. */
713 segp = cur;
714 while ((segp[0] != '/') && (segp[0] != '\0'))
715 ++segp;
716
717 /* If this is the last segment, we're done (we need at least two
718 * segments to meet the criteria for the (e) and (f) cases).
719 */
720 if (segp[0] == '\0')
721 break;
722
723 /* If the first segment is "..", or if the next segment _isn't_ "..",
724 * keep this segment and try the next one.
725 */
726 ++segp;
727 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
728 || ((segp[0] != '.') || (segp[1] != '.')
729 || ((segp[2] != '/') && (segp[2] != '\0')))) {
730 cur = segp;
731 continue;
732 }
733
734 /* If we get here, remove this segment and the next one and back up
735 * to the previous segment (if there is one), to implement the
736 * "iteratively" clause. It's pretty much impossible to back up
737 * while maintaining two pointers into the buffer, so just compact
738 * the whole buffer now.
739 */
740
741 /* If this is the end of the buffer, we're done. */
742 if (segp[2] == '\0') {
743 cur[0] = '\0';
744 break;
745 }
746 strcpy(cur, segp + 3);
747
748 /* If there are no previous segments, then keep going from here. */
749 segp = cur;
750 while ((segp > path) && ((--segp)[0] == '/'))
751 ;
752 if (segp == path)
753 continue;
754
755 /* "segp" is pointing to the end of a previous segment; find it's
756 * start. We need to back up to the previous segment and start
757 * over with that to handle things like "foo/bar/../..". If we
758 * don't do this, then on the first pass we'll remove the "bar/..",
759 * but be pointing at the second ".." so we won't realize we can also
760 * remove the "foo/..".
761 */
762 cur = segp;
763 while ((cur > path) && (cur[-1] != '/'))
764 --cur;
765 }
766 out[0] = '\0';
767
768 /*
769 * g) If the resulting buffer string still begins with one or more
770 * complete path segments of "..", then the reference is
771 * considered to be in error. Implementations may handle this
772 * error by retaining these components in the resolved path (i.e.,
773 * treating them as part of the final URI), by removing them from
774 * the resolved path (i.e., discarding relative levels above the
775 * root), or by avoiding traversal of the reference.
776 *
777 * We discard them from the final path.
778 */
779 if (path[0] == '/') {
780 cur = path;
781 while ((cur[1] == '.') && (cur[2] == '.')
782 && ((cur[3] == '/') || (cur[3] == '\0')))
783 cur += 3;
784
785 if (cur != path) {
786 out = path;
787 while (cur[0] != '\0')
788 (out++)[0] = (cur++)[0];
789 out[0] = 0;
790 }
791 }
792
793 return(0);
794}
Owen Taylor3473f882001-02-23 17:55:21 +0000795
796/**
797 * xmlURIUnescapeString:
798 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000799 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Owen Taylor3473f882001-02-23 17:55:21 +0000800 * @target: optionnal destination buffer
801 *
802 * Unescaping routine, does not do validity checks !
803 * Output is direct unsigned char translation of %XX values (no encoding)
804 *
805 * Returns an copy of the string, but unescaped
806 */
807char *
808xmlURIUnescapeString(const char *str, int len, char *target) {
809 char *ret, *out;
810 const char *in;
811
812 if (str == NULL)
813 return(NULL);
814 if (len <= 0) len = strlen(str);
815 if (len <= 0) return(NULL);
816
817 if (target == NULL) {
818 ret = (char *) xmlMalloc(len + 1);
819 if (ret == NULL) {
820 xmlGenericError(xmlGenericErrorContext,
821 "xmlURIUnescapeString: out of memory\n");
822 return(NULL);
823 }
824 } else
825 ret = target;
826 in = str;
827 out = ret;
828 while(len > 0) {
829 if (*in == '%') {
830 in++;
831 if ((*in >= '0') && (*in <= '9'))
832 *out = (*in - '0');
833 else if ((*in >= 'a') && (*in <= 'f'))
834 *out = (*in - 'a') + 10;
835 else if ((*in >= 'A') && (*in <= 'F'))
836 *out = (*in - 'A') + 10;
837 in++;
838 if ((*in >= '0') && (*in <= '9'))
839 *out = *out * 16 + (*in - '0');
840 else if ((*in >= 'a') && (*in <= 'f'))
841 *out = *out * 16 + (*in - 'a') + 10;
842 else if ((*in >= 'A') && (*in <= 'F'))
843 *out = *out * 16 + (*in - 'A') + 10;
844 in++;
845 len -= 3;
846 out++;
847 } else {
848 *out++ = *in++;
849 len--;
850 }
851 }
852 *out = 0;
853 return(ret);
854}
855
856/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000857 * xmlURIEscapeStr:
858 * @str: string to escape
859 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000860 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000861 * This routine escapes a string to hex, ignoring reserved characters (a-z)
862 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000863 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000864 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000865 */
866xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000867xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
868 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000869 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000870
Owen Taylor3473f882001-02-23 17:55:21 +0000871 unsigned int len, out;
872
873 if (str == NULL)
874 return(NULL);
875 len = xmlStrlen(str);
876 if (len <= 0) return(NULL);
877
878 len += 20;
879 ret = (xmlChar *) xmlMalloc(len);
880 if (ret == NULL) {
881 xmlGenericError(xmlGenericErrorContext,
882 "xmlURIEscape: out of memory\n");
883 return(NULL);
884 }
885 in = (const xmlChar *) str;
886 out = 0;
887 while(*in != 0) {
888 if (len - out <= 3) {
889 len += 20;
890 ret = (xmlChar *) xmlRealloc(ret, len);
891 if (ret == NULL) {
892 xmlGenericError(xmlGenericErrorContext,
893 "xmlURIEscape: out of memory\n");
894 return(NULL);
895 }
896 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000897
898 ch = *in;
899
900 if ( (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch)) ) {
Owen Taylor3473f882001-02-23 17:55:21 +0000901 unsigned char val;
902 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000903 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000904 if (val <= 9)
905 ret[out++] = '0' + val;
906 else
907 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000908 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000909 if (val <= 9)
910 ret[out++] = '0' + val;
911 else
912 ret[out++] = 'A' + val - 0xA;
913 in++;
914 } else {
915 ret[out++] = *in++;
916 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000917
Owen Taylor3473f882001-02-23 17:55:21 +0000918 }
919 ret[out] = 0;
920 return(ret);
921}
922
Daniel Veillard8514c672001-05-23 10:29:12 +0000923/**
924 * xmlURIEscape:
925 * @str: the string of the URI to escape
926 *
927 * Escaping routine, does not do validity checks !
928 * It will try to escape the chars needing this, but this is heuristic
929 * based it's impossible to be sure.
930 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000931 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000932 *
933 * 25 May 2001
934 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
935 * according to RFC2396.
936 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000937 */
938xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000939xmlURIEscape(const xmlChar * str)
940{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000941 xmlChar *ret, *segment = NULL;
942 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000943 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000944
Daniel Veillard6278fb52001-05-25 07:38:41 +0000945#define NULLCHK(p) if(!p) { \
946 xmlGenericError(xmlGenericErrorContext, \
947 "xmlURIEscape: out of memory\n"); \
948 return NULL; }
949
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000950 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000951 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000952
953 uri = xmlCreateURI();
954 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000955 /*
956 * Allow escaping errors in the unescaped form
957 */
958 uri->cleanup = 1;
959 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000960 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000961 xmlFreeURI(uri);
962 return (NULL);
963 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000964 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000965
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000966 if (!uri)
967 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000968
969 ret = NULL;
970
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000971 if (uri->scheme) {
972 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
973 NULLCHK(segment)
974 ret = xmlStrcat(ret, segment);
975 ret = xmlStrcat(ret, BAD_CAST ":");
976 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000977 }
978
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000979 if (uri->authority) {
980 segment =
981 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
982 NULLCHK(segment)
983 ret = xmlStrcat(ret, BAD_CAST "//");
984 ret = xmlStrcat(ret, segment);
985 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000986 }
987
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000988 if (uri->user) {
989 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
990 NULLCHK(segment)
991 ret = xmlStrcat(ret, segment);
992 ret = xmlStrcat(ret, BAD_CAST "@");
993 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000994 }
995
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000996 if (uri->server) {
997 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
998 NULLCHK(segment)
999 ret = xmlStrcat(ret, BAD_CAST "//");
1000 ret = xmlStrcat(ret, segment);
1001 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001002 }
1003
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001004 if (uri->port) {
1005 xmlChar port[10];
1006
Daniel Veillard43d3f612001-11-10 11:57:23 +00001007 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001008 ret = xmlStrcat(ret, BAD_CAST ":");
1009 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001010 }
1011
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001012 if (uri->path) {
1013 segment =
1014 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1015 NULLCHK(segment)
1016 ret = xmlStrcat(ret, segment);
1017 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001018 }
1019
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001020 if (uri->query) {
1021 segment =
1022 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1023 NULLCHK(segment)
1024 ret = xmlStrcat(ret, BAD_CAST "?");
1025 ret = xmlStrcat(ret, segment);
1026 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001027 }
1028
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001029 if (uri->opaque) {
1030 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1031 NULLCHK(segment)
1032 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001033 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001034 }
1035
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001036 if (uri->fragment) {
1037 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1038 NULLCHK(segment)
1039 ret = xmlStrcat(ret, BAD_CAST "#");
1040 ret = xmlStrcat(ret, segment);
1041 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001042 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001043
1044 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001045#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001046
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001047 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001048}
1049
Owen Taylor3473f882001-02-23 17:55:21 +00001050/************************************************************************
1051 * *
1052 * Escaped URI parsing *
1053 * *
1054 ************************************************************************/
1055
1056/**
1057 * xmlParseURIFragment:
1058 * @uri: pointer to an URI structure
1059 * @str: pointer to the string to analyze
1060 *
1061 * Parse an URI fragment string and fills in the appropriate fields
1062 * of the @uri structure.
1063 *
1064 * fragment = *uric
1065 *
1066 * Returns 0 or the error code
1067 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001068static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001069xmlParseURIFragment(xmlURIPtr uri, const char **str)
1070{
Owen Taylor3473f882001-02-23 17:55:21 +00001071 const char *cur = *str;
1072
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001073 if (str == NULL)
1074 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001075
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001076 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1077 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001079 if (uri->fragment != NULL)
1080 xmlFree(uri->fragment);
1081 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
1083 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001084 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001085}
1086
1087/**
1088 * xmlParseURIQuery:
1089 * @uri: pointer to an URI structure
1090 * @str: pointer to the string to analyze
1091 *
1092 * Parse the query part of an URI
1093 *
1094 * query = *uric
1095 *
1096 * Returns 0 or the error code
1097 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001098static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001099xmlParseURIQuery(xmlURIPtr uri, const char **str)
1100{
Owen Taylor3473f882001-02-23 17:55:21 +00001101 const char *cur = *str;
1102
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001103 if (str == NULL)
1104 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001105
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001106 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1107 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001108 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001109 if (uri->query != NULL)
1110 xmlFree(uri->query);
1111 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001112 }
1113 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001114 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001115}
1116
1117/**
1118 * xmlParseURIScheme:
1119 * @uri: pointer to an URI structure
1120 * @str: pointer to the string to analyze
1121 *
1122 * Parse an URI scheme
1123 *
1124 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1125 *
1126 * Returns 0 or the error code
1127 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001128static int
Owen Taylor3473f882001-02-23 17:55:21 +00001129xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1130 const char *cur;
1131
1132 if (str == NULL)
1133 return(-1);
1134
1135 cur = *str;
1136 if (!IS_ALPHA(*cur))
1137 return(2);
1138 cur++;
1139 while (IS_SCHEME(*cur)) cur++;
1140 if (uri != NULL) {
1141 if (uri->scheme != NULL) xmlFree(uri->scheme);
1142 /* !!! strndup */
1143 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1144 }
1145 *str = cur;
1146 return(0);
1147}
1148
1149/**
1150 * xmlParseURIOpaquePart:
1151 * @uri: pointer to an URI structure
1152 * @str: pointer to the string to analyze
1153 *
1154 * Parse an URI opaque part
1155 *
1156 * opaque_part = uric_no_slash *uric
1157 *
1158 * Returns 0 or the error code
1159 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001160static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001161xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1162{
Owen Taylor3473f882001-02-23 17:55:21 +00001163 const char *cur;
1164
1165 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001166 return (-1);
1167
Owen Taylor3473f882001-02-23 17:55:21 +00001168 cur = *str;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001169 if (!(IS_URIC_NO_SLASH(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001170 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001171 }
1172 NEXT(cur);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001173 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1174 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001175 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001176 if (uri->opaque != NULL)
1177 xmlFree(uri->opaque);
1178 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001179 }
1180 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001181 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001182}
1183
1184/**
1185 * xmlParseURIServer:
1186 * @uri: pointer to an URI structure
1187 * @str: pointer to the string to analyze
1188 *
1189 * Parse a server subpart of an URI, it's a finer grain analysis
1190 * of the authority part.
1191 *
1192 * server = [ [ userinfo "@" ] hostport ]
1193 * userinfo = *( unreserved | escaped |
1194 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1195 * hostport = host [ ":" port ]
1196 * host = hostname | IPv4address
1197 * hostname = *( domainlabel "." ) toplabel [ "." ]
1198 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1199 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1200 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1201 * port = *digit
1202 *
1203 * Returns 0 or the error code
1204 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001205static int
Owen Taylor3473f882001-02-23 17:55:21 +00001206xmlParseURIServer(xmlURIPtr uri, const char **str) {
1207 const char *cur;
1208 const char *host, *tmp;
1209
1210 if (str == NULL)
1211 return(-1);
1212
1213 cur = *str;
1214
1215 /*
1216 * is there an userinfo ?
1217 */
1218 while (IS_USERINFO(cur)) NEXT(cur);
1219 if (*cur == '@') {
1220 if (uri != NULL) {
1221 if (uri->user != NULL) xmlFree(uri->user);
1222 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1223 }
1224 cur++;
1225 } else {
1226 if (uri != NULL) {
1227 if (uri->user != NULL) xmlFree(uri->user);
1228 uri->user = NULL;
1229 }
1230 cur = *str;
1231 }
1232 /*
1233 * This can be empty in the case where there is no server
1234 */
1235 host = cur;
1236 if (*cur == '/') {
1237 if (uri != NULL) {
1238 if (uri->authority != NULL) xmlFree(uri->authority);
1239 uri->authority = NULL;
1240 if (uri->server != NULL) xmlFree(uri->server);
1241 uri->server = NULL;
1242 uri->port = 0;
1243 }
1244 return(0);
1245 }
1246 /*
1247 * host part of hostport can derive either an IPV4 address
1248 * or an unresolved name. Check the IP first, it easier to detect
1249 * errors if wrong one
1250 */
1251 if (IS_DIGIT(*cur)) {
1252 while(IS_DIGIT(*cur)) cur++;
1253 if (*cur != '.')
1254 goto host_name;
1255 cur++;
1256 if (!IS_DIGIT(*cur))
1257 goto host_name;
1258 while(IS_DIGIT(*cur)) cur++;
1259 if (*cur != '.')
1260 goto host_name;
1261 cur++;
1262 if (!IS_DIGIT(*cur))
1263 goto host_name;
1264 while(IS_DIGIT(*cur)) cur++;
1265 if (*cur != '.')
1266 goto host_name;
1267 cur++;
1268 if (!IS_DIGIT(*cur))
1269 goto host_name;
1270 while(IS_DIGIT(*cur)) cur++;
1271 if (uri != NULL) {
1272 if (uri->authority != NULL) xmlFree(uri->authority);
1273 uri->authority = NULL;
1274 if (uri->server != NULL) xmlFree(uri->server);
1275 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1276 }
1277 goto host_done;
1278 }
1279host_name:
1280 /*
1281 * the hostname production as-is is a parser nightmare.
1282 * simplify it to
1283 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1284 * and just make sure the last label starts with a non numeric char.
1285 */
1286 if (!IS_ALPHANUM(*cur))
1287 return(6);
1288 while (IS_ALPHANUM(*cur)) {
1289 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1290 if (*cur == '.')
1291 cur++;
1292 }
1293 tmp = cur;
1294 tmp--;
1295 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1296 tmp++;
1297 if (!IS_ALPHA(*tmp))
1298 return(7);
1299 if (uri != NULL) {
1300 if (uri->authority != NULL) xmlFree(uri->authority);
1301 uri->authority = NULL;
1302 if (uri->server != NULL) xmlFree(uri->server);
1303 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1304 }
1305
1306host_done:
1307
1308 /*
1309 * finish by checking for a port presence.
1310 */
1311 if (*cur == ':') {
1312 cur++;
1313 if (IS_DIGIT(*cur)) {
1314 if (uri != NULL)
1315 uri->port = 0;
1316 while (IS_DIGIT(*cur)) {
1317 if (uri != NULL)
1318 uri->port = uri->port * 10 + (*cur - '0');
1319 cur++;
1320 }
1321 }
1322 }
1323 *str = cur;
1324 return(0);
1325}
1326
1327/**
1328 * xmlParseURIRelSegment:
1329 * @uri: pointer to an URI structure
1330 * @str: pointer to the string to analyze
1331 *
1332 * Parse an URI relative segment
1333 *
1334 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1335 * "+" | "$" | "," )
1336 *
1337 * Returns 0 or the error code
1338 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001339static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001340xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1341{
Owen Taylor3473f882001-02-23 17:55:21 +00001342 const char *cur;
1343
1344 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001345 return (-1);
1346
Owen Taylor3473f882001-02-23 17:55:21 +00001347 cur = *str;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001348 if (!(IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
1349 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001350 }
1351 NEXT(cur);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001352 while (IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1353 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001354 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001355 if (uri->path != NULL)
1356 xmlFree(uri->path);
1357 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001358 }
1359 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001360 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001361}
1362
1363/**
1364 * xmlParseURIPathSegments:
1365 * @uri: pointer to an URI structure
1366 * @str: pointer to the string to analyze
1367 * @slash: should we add a leading slash
1368 *
1369 * Parse an URI set of path segments
1370 *
1371 * path_segments = segment *( "/" segment )
1372 * segment = *pchar *( ";" param )
1373 * param = *pchar
1374 *
1375 * Returns 0 or the error code
1376 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001377static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001378xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1379{
Owen Taylor3473f882001-02-23 17:55:21 +00001380 const char *cur;
1381
1382 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001383 return (-1);
1384
Owen Taylor3473f882001-02-23 17:55:21 +00001385 cur = *str;
1386
1387 do {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001388 while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1389 NEXT(cur);
1390 if (*cur == ';') {
1391 cur++;
1392 while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1393 NEXT(cur);
1394 }
1395 if (*cur != '/')
1396 break;
1397 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001398 } while (1);
1399 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001400 int len, len2 = 0;
1401 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001402
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001403 /*
1404 * Concat the set of path segments to the current path
1405 */
1406 len = cur - *str;
1407 if (slash)
1408 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001409
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001410 if (uri->path != NULL) {
1411 len2 = strlen(uri->path);
1412 len += len2;
1413 }
Owen Taylor3473f882001-02-23 17:55:21 +00001414 path = (char *) xmlMalloc(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001415 if (path == NULL) {
1416 xmlGenericError(xmlGenericErrorContext,
1417 "xmlParseURIPathSegments: out of memory\n");
1418 *str = cur;
1419 return (-1);
1420 }
1421 if (uri->path != NULL)
1422 memcpy(path, uri->path, len2);
1423 if (slash) {
1424 path[len2] = '/';
1425 len2++;
1426 }
1427 path[len2] = 0;
1428 if (cur - *str > 0)
1429 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1430 if (uri->path != NULL)
1431 xmlFree(uri->path);
1432 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001433 }
1434 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001435 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001436}
1437
1438/**
1439 * xmlParseURIAuthority:
1440 * @uri: pointer to an URI structure
1441 * @str: pointer to the string to analyze
1442 *
1443 * Parse the authority part of an URI.
1444 *
1445 * authority = server | reg_name
1446 * server = [ [ userinfo "@" ] hostport ]
1447 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1448 * "@" | "&" | "=" | "+" )
1449 *
1450 * Note : this is completely ambiguous since reg_name is allowed to
1451 * use the full set of chars in use by server:
1452 *
1453 * 3.2.1. Registry-based Naming Authority
1454 *
1455 * The structure of a registry-based naming authority is specific
1456 * to the URI scheme, but constrained to the allowed characters
1457 * for an authority component.
1458 *
1459 * Returns 0 or the error code
1460 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001461static int
Owen Taylor3473f882001-02-23 17:55:21 +00001462xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1463 const char *cur;
1464 int ret;
1465
1466 if (str == NULL)
1467 return(-1);
1468
1469 cur = *str;
1470
1471 /*
1472 * try first to parse it as a server string.
1473 */
1474 ret = xmlParseURIServer(uri, str);
1475 if (ret == 0)
1476 return(0);
1477
1478 /*
1479 * failed, fallback to reg_name
1480 */
1481 if (!IS_REG_NAME(cur)) {
1482 return(5);
1483 }
1484 NEXT(cur);
1485 while (IS_REG_NAME(cur)) NEXT(cur);
1486 if (uri != NULL) {
1487 if (uri->server != NULL) xmlFree(uri->server);
1488 uri->server = NULL;
1489 if (uri->user != NULL) xmlFree(uri->user);
1490 uri->user = NULL;
1491 if (uri->authority != NULL) xmlFree(uri->authority);
1492 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1493 }
1494 *str = cur;
1495 return(0);
1496}
1497
1498/**
1499 * xmlParseURIHierPart:
1500 * @uri: pointer to an URI structure
1501 * @str: pointer to the string to analyze
1502 *
1503 * Parse an URI hirarchical part
1504 *
1505 * hier_part = ( net_path | abs_path ) [ "?" query ]
1506 * abs_path = "/" path_segments
1507 * net_path = "//" authority [ abs_path ]
1508 *
1509 * Returns 0 or the error code
1510 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001511static int
Owen Taylor3473f882001-02-23 17:55:21 +00001512xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1513 int ret;
1514 const char *cur;
1515
1516 if (str == NULL)
1517 return(-1);
1518
1519 cur = *str;
1520
1521 if ((cur[0] == '/') && (cur[1] == '/')) {
1522 cur += 2;
1523 ret = xmlParseURIAuthority(uri, &cur);
1524 if (ret != 0)
1525 return(ret);
1526 if (cur[0] == '/') {
1527 cur++;
1528 ret = xmlParseURIPathSegments(uri, &cur, 1);
1529 }
1530 } else if (cur[0] == '/') {
1531 cur++;
1532 ret = xmlParseURIPathSegments(uri, &cur, 1);
1533 } else {
1534 return(4);
1535 }
1536 if (ret != 0)
1537 return(ret);
1538 if (*cur == '?') {
1539 cur++;
1540 ret = xmlParseURIQuery(uri, &cur);
1541 if (ret != 0)
1542 return(ret);
1543 }
1544 *str = cur;
1545 return(0);
1546}
1547
1548/**
1549 * xmlParseAbsoluteURI:
1550 * @uri: pointer to an URI structure
1551 * @str: pointer to the string to analyze
1552 *
1553 * Parse an URI reference string and fills in the appropriate fields
1554 * of the @uri structure
1555 *
1556 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1557 *
1558 * Returns 0 or the error code
1559 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001560static int
Owen Taylor3473f882001-02-23 17:55:21 +00001561xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1562 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001563 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001564
1565 if (str == NULL)
1566 return(-1);
1567
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001568 cur = *str;
1569
Owen Taylor3473f882001-02-23 17:55:21 +00001570 ret = xmlParseURIScheme(uri, str);
1571 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001572 if (**str != ':') {
1573 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001574 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001575 }
Owen Taylor3473f882001-02-23 17:55:21 +00001576 (*str)++;
1577 if (**str == '/')
1578 return(xmlParseURIHierPart(uri, str));
1579 return(xmlParseURIOpaquePart(uri, str));
1580}
1581
1582/**
1583 * xmlParseRelativeURI:
1584 * @uri: pointer to an URI structure
1585 * @str: pointer to the string to analyze
1586 *
1587 * Parse an relative URI string and fills in the appropriate fields
1588 * of the @uri structure
1589 *
1590 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1591 * abs_path = "/" path_segments
1592 * net_path = "//" authority [ abs_path ]
1593 * rel_path = rel_segment [ abs_path ]
1594 *
1595 * Returns 0 or the error code
1596 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001597static int
Owen Taylor3473f882001-02-23 17:55:21 +00001598xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1599 int ret = 0;
1600 const char *cur;
1601
1602 if (str == NULL)
1603 return(-1);
1604
1605 cur = *str;
1606 if ((cur[0] == '/') && (cur[1] == '/')) {
1607 cur += 2;
1608 ret = xmlParseURIAuthority(uri, &cur);
1609 if (ret != 0)
1610 return(ret);
1611 if (cur[0] == '/') {
1612 cur++;
1613 ret = xmlParseURIPathSegments(uri, &cur, 1);
1614 }
1615 } else if (cur[0] == '/') {
1616 cur++;
1617 ret = xmlParseURIPathSegments(uri, &cur, 1);
1618 } else if (cur[0] != '#' && cur[0] != '?') {
1619 ret = xmlParseURIRelSegment(uri, &cur);
1620 if (ret != 0)
1621 return(ret);
1622 if (cur[0] == '/') {
1623 cur++;
1624 ret = xmlParseURIPathSegments(uri, &cur, 1);
1625 }
1626 }
1627 if (ret != 0)
1628 return(ret);
1629 if (*cur == '?') {
1630 cur++;
1631 ret = xmlParseURIQuery(uri, &cur);
1632 if (ret != 0)
1633 return(ret);
1634 }
1635 *str = cur;
1636 return(ret);
1637}
1638
1639/**
1640 * xmlParseURIReference:
1641 * @uri: pointer to an URI structure
1642 * @str: the string to analyze
1643 *
1644 * Parse an URI reference string and fills in the appropriate fields
1645 * of the @uri structure
1646 *
1647 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1648 *
1649 * Returns 0 or the error code
1650 */
1651int
1652xmlParseURIReference(xmlURIPtr uri, const char *str) {
1653 int ret;
1654 const char *tmp = str;
1655
1656 if (str == NULL)
1657 return(-1);
1658 xmlCleanURI(uri);
1659
1660 /*
1661 * Try first to parse aboslute refs, then fallback to relative if
1662 * it fails.
1663 */
1664 ret = xmlParseAbsoluteURI(uri, &str);
1665 if (ret != 0) {
1666 xmlCleanURI(uri);
1667 str = tmp;
1668 ret = xmlParseRelativeURI(uri, &str);
1669 }
1670 if (ret != 0) {
1671 xmlCleanURI(uri);
1672 return(ret);
1673 }
1674
1675 if (*str == '#') {
1676 str++;
1677 ret = xmlParseURIFragment(uri, &str);
1678 if (ret != 0) return(ret);
1679 }
1680 if (*str != 0) {
1681 xmlCleanURI(uri);
1682 return(1);
1683 }
1684 return(0);
1685}
1686
1687/**
1688 * xmlParseURI:
1689 * @str: the URI string to analyze
1690 *
1691 * Parse an URI
1692 *
1693 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1694 *
1695 * Returns a newly build xmlURIPtr or NULL in case of error
1696 */
1697xmlURIPtr
1698xmlParseURI(const char *str) {
1699 xmlURIPtr uri;
1700 int ret;
1701
1702 if (str == NULL)
1703 return(NULL);
1704 uri = xmlCreateURI();
1705 if (uri != NULL) {
1706 ret = xmlParseURIReference(uri, str);
1707 if (ret) {
1708 xmlFreeURI(uri);
1709 return(NULL);
1710 }
1711 }
1712 return(uri);
1713}
1714
1715/************************************************************************
1716 * *
1717 * Public functions *
1718 * *
1719 ************************************************************************/
1720
1721/**
1722 * xmlBuildURI:
1723 * @URI: the URI instance found in the document
1724 * @base: the base value
1725 *
1726 * Computes he final URI of the reference done by checking that
1727 * the given URI is valid, and building the final URI using the
1728 * base URI. This is processed according to section 5.2 of the
1729 * RFC 2396
1730 *
1731 * 5.2. Resolving Relative References to Absolute Form
1732 *
1733 * Returns a new URI string (to be freed by the caller) or NULL in case
1734 * of error.
1735 */
1736xmlChar *
1737xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1738 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001739 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 xmlURIPtr ref = NULL;
1741 xmlURIPtr bas = NULL;
1742 xmlURIPtr res = NULL;
1743
1744 /*
1745 * 1) The URI reference is parsed into the potential four components and
1746 * fragment identifier, as described in Section 4.3.
1747 *
1748 * NOTE that a completely empty URI is treated by modern browsers
1749 * as a reference to "." rather than as a synonym for the current
1750 * URI. Should we do that here?
1751 */
1752 if (URI == NULL)
1753 ret = -1;
1754 else {
1755 if (*URI) {
1756 ref = xmlCreateURI();
1757 if (ref == NULL)
1758 goto done;
1759 ret = xmlParseURIReference(ref, (const char *) URI);
1760 }
1761 else
1762 ret = 0;
1763 }
1764 if (ret != 0)
1765 goto done;
1766 if (base == NULL)
1767 ret = -1;
1768 else {
1769 bas = xmlCreateURI();
1770 if (bas == NULL)
1771 goto done;
1772 ret = xmlParseURIReference(bas, (const char *) base);
1773 }
1774 if (ret != 0) {
1775 if (ref)
1776 val = xmlSaveUri(ref);
1777 goto done;
1778 }
1779 if (ref == NULL) {
1780 /*
1781 * the base fragment must be ignored
1782 */
1783 if (bas->fragment != NULL) {
1784 xmlFree(bas->fragment);
1785 bas->fragment = NULL;
1786 }
1787 val = xmlSaveUri(bas);
1788 goto done;
1789 }
1790
1791 /*
1792 * 2) If the path component is empty and the scheme, authority, and
1793 * query components are undefined, then it is a reference to the
1794 * current document and we are done. Otherwise, the reference URI's
1795 * query and fragment components are defined as found (or not found)
1796 * within the URI reference and not inherited from the base URI.
1797 *
1798 * NOTE that in modern browsers, the parsing differs from the above
1799 * in the following aspect: the query component is allowed to be
1800 * defined while still treating this as a reference to the current
1801 * document.
1802 */
1803 res = xmlCreateURI();
1804 if (res == NULL)
1805 goto done;
1806 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1807 ((ref->authority == NULL) && (ref->server == NULL))) {
1808 if (bas->scheme != NULL)
1809 res->scheme = xmlMemStrdup(bas->scheme);
1810 if (bas->authority != NULL)
1811 res->authority = xmlMemStrdup(bas->authority);
1812 else if (bas->server != NULL) {
1813 res->server = xmlMemStrdup(bas->server);
1814 if (bas->user != NULL)
1815 res->user = xmlMemStrdup(bas->user);
1816 res->port = bas->port;
1817 }
1818 if (bas->path != NULL)
1819 res->path = xmlMemStrdup(bas->path);
1820 if (ref->query != NULL)
1821 res->query = xmlMemStrdup(ref->query);
1822 else if (bas->query != NULL)
1823 res->query = xmlMemStrdup(bas->query);
1824 if (ref->fragment != NULL)
1825 res->fragment = xmlMemStrdup(ref->fragment);
1826 goto step_7;
1827 }
1828
1829 if (ref->query != NULL)
1830 res->query = xmlMemStrdup(ref->query);
1831 if (ref->fragment != NULL)
1832 res->fragment = xmlMemStrdup(ref->fragment);
1833
1834 /*
1835 * 3) If the scheme component is defined, indicating that the reference
1836 * starts with a scheme name, then the reference is interpreted as an
1837 * absolute URI and we are done. Otherwise, the reference URI's
1838 * scheme is inherited from the base URI's scheme component.
1839 */
1840 if (ref->scheme != NULL) {
1841 val = xmlSaveUri(ref);
1842 goto done;
1843 }
1844 if (bas->scheme != NULL)
1845 res->scheme = xmlMemStrdup(bas->scheme);
1846
1847 /*
1848 * 4) If the authority component is defined, then the reference is a
1849 * network-path and we skip to step 7. Otherwise, the reference
1850 * URI's authority is inherited from the base URI's authority
1851 * component, which will also be undefined if the URI scheme does not
1852 * use an authority component.
1853 */
1854 if ((ref->authority != NULL) || (ref->server != NULL)) {
1855 if (ref->authority != NULL)
1856 res->authority = xmlMemStrdup(ref->authority);
1857 else {
1858 res->server = xmlMemStrdup(ref->server);
1859 if (ref->user != NULL)
1860 res->user = xmlMemStrdup(ref->user);
1861 res->port = ref->port;
1862 }
1863 if (ref->path != NULL)
1864 res->path = xmlMemStrdup(ref->path);
1865 goto step_7;
1866 }
1867 if (bas->authority != NULL)
1868 res->authority = xmlMemStrdup(bas->authority);
1869 else if (bas->server != NULL) {
1870 res->server = xmlMemStrdup(bas->server);
1871 if (bas->user != NULL)
1872 res->user = xmlMemStrdup(bas->user);
1873 res->port = bas->port;
1874 }
1875
1876 /*
1877 * 5) If the path component begins with a slash character ("/"), then
1878 * the reference is an absolute-path and we skip to step 7.
1879 */
1880 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1881 res->path = xmlMemStrdup(ref->path);
1882 goto step_7;
1883 }
1884
1885
1886 /*
1887 * 6) If this step is reached, then we are resolving a relative-path
1888 * reference. The relative path needs to be merged with the base
1889 * URI's path. Although there are many ways to do this, we will
1890 * describe a simple method using a separate string buffer.
1891 *
1892 * Allocate a buffer large enough for the result string.
1893 */
1894 len = 2; /* extra / and 0 */
1895 if (ref->path != NULL)
1896 len += strlen(ref->path);
1897 if (bas->path != NULL)
1898 len += strlen(bas->path);
1899 res->path = (char *) xmlMalloc(len);
1900 if (res->path == NULL) {
1901 xmlGenericError(xmlGenericErrorContext,
1902 "xmlBuildURI: out of memory\n");
1903 goto done;
1904 }
1905 res->path[0] = 0;
1906
1907 /*
1908 * a) All but the last segment of the base URI's path component is
1909 * copied to the buffer. In other words, any characters after the
1910 * last (right-most) slash character, if any, are excluded.
1911 */
1912 cur = 0;
1913 out = 0;
1914 if (bas->path != NULL) {
1915 while (bas->path[cur] != 0) {
1916 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1917 cur++;
1918 if (bas->path[cur] == 0)
1919 break;
1920
1921 cur++;
1922 while (out < cur) {
1923 res->path[out] = bas->path[out];
1924 out++;
1925 }
1926 }
1927 }
1928 res->path[out] = 0;
1929
1930 /*
1931 * b) The reference's path component is appended to the buffer
1932 * string.
1933 */
1934 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001935 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001936 /*
1937 * Ensure the path includes a '/'
1938 */
1939 if ((out == 0) && (bas->server != NULL))
1940 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001941 while (ref->path[indx] != 0) {
1942 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00001943 }
1944 }
1945 res->path[out] = 0;
1946
1947 /*
1948 * Steps c) to h) are really path normalization steps
1949 */
1950 xmlNormalizeURIPath(res->path);
1951
1952step_7:
1953
1954 /*
1955 * 7) The resulting URI components, including any inherited from the
1956 * base URI, are recombined to give the absolute form of the URI
1957 * reference.
1958 */
1959 val = xmlSaveUri(res);
1960
1961done:
1962 if (ref != NULL)
1963 xmlFreeURI(ref);
1964 if (bas != NULL)
1965 xmlFreeURI(bas);
1966 if (res != NULL)
1967 xmlFreeURI(res);
1968 return(val);
1969}
1970
1971