blob: 28401c8c26e418d831bdae61e4206d41ef371d33 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
Daniel Veillardd7af5552008-08-04 15:29:44 +000021static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000022
23/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000024 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000025 * alpha = lowalpha | upalpha
26 */
27#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
30/*
31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 * "u" | "v" | "w" | "x" | "y" | "z"
34 */
35
36#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
38/*
39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 * "U" | "V" | "W" | "X" | "Y" | "Z"
42 */
43#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
Daniel Veillardbe3eb202004-07-09 12:05:25 +000045#ifdef IS_DIGIT
46#undef IS_DIGIT
47#endif
Owen Taylor3473f882001-02-23 17:55:21 +000048/*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
Owen Taylor3473f882001-02-23 17:55:21 +000051#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53/*
54 * alphanum = alpha | digit
55 */
56
57#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59/*
Owen Taylor3473f882001-02-23 17:55:21 +000060 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61 */
62
Daniel Veillardd7af5552008-08-04 15:29:44 +000063#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
64 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000065 ((x) == '(') || ((x) == ')'))
66
Owen Taylor3473f882001-02-23 17:55:21 +000067/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000068 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +000069 */
70
Daniel Veillardd7af5552008-08-04 15:29:44 +000071#define IS_UNWISE(p) \
72 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
73 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
74 ((*(p) == ']')) || ((*(p) == '`')))
75/*
76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77 * "[" | "]"
78 */
79
80#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * unreserved = alphanum | mark
87 */
88
89#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91/*
Owen Taylor3473f882001-02-23 17:55:21 +000092 * Skip to next pointer char, handle escaped sequences
93 */
94
95#define NEXT(p) ((*p == '%')? p += 3 : p++)
96
97/*
98 * Productions from the spec.
99 *
100 * authority = server | reg_name
101 * reg_name = 1*( unreserved | escaped | "$" | "," |
102 * ";" | ":" | "@" | "&" | "=" | "+" )
103 *
104 * path = [ abs_path | opaque_part ]
105 */
106
Daniel Veillard336a8e12005-08-07 10:46:19 +0000107#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/************************************************************************
110 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000111 * RFC 3986 parser *
112 * *
113 ************************************************************************/
114
115#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
117 ((*(p) >= 'A') && (*(p) <= 'Z')))
118#define ISA_HEXDIG(p) \
119 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
120 ((*(p) >= 'A') && (*(p) <= 'F')))
121
122/*
123 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
124 * / "*" / "+" / "," / ";" / "="
125 */
126#define ISA_SUB_DELIM(p) \
127 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
128 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
129 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
130 ((*(p) == '=')))
131
132/*
133 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134 */
135#define ISA_GEN_DELIM(p) \
136 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
137 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
138 ((*(p) == '@')))
139
140/*
141 * reserved = gen-delims / sub-delims
142 */
143#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144
145/*
146 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
147 */
148#define ISA_UNRESERVED(p) \
149 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
150 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151
152/*
153 * pct-encoded = "%" HEXDIG HEXDIG
154 */
155#define ISA_PCT_ENCODED(p) \
156 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157
158/*
159 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
160 */
161#define ISA_PCHAR(p) \
162 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
163 ((*(p) == ':')) || ((*(p) == '@')))
164
165/**
166 * xmlParse3986Scheme:
167 * @uri: pointer to an URI structure
168 * @str: pointer to the string to analyze
169 *
170 * Parse an URI scheme
171 *
172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173 *
174 * Returns 0 or the error code
175 */
176static int
177xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178 const char *cur;
179
180 if (str == NULL)
181 return(-1);
182
183 cur = *str;
184 if (!ISA_ALPHA(cur))
185 return(2);
186 cur++;
187 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189 if (uri != NULL) {
190 if (uri->scheme != NULL) xmlFree(uri->scheme);
191 uri->scheme = STRNDUP(*str, cur - *str);
192 }
193 *str = cur;
194 return(0);
195}
196
197/**
198 * xmlParse3986Fragment:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
201 *
202 * Parse the query part of an URI
203 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000204 * fragment = *( pchar / "/" / "?" )
205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206 * in the fragment identifier but this is used very broadly for
207 * xpointer scheme selection, so we are allowing it here to not break
208 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000209 *
210 * Returns 0 or the error code
211 */
212static int
213xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214{
215 const char *cur;
216
217 if (str == NULL)
218 return (-1);
219
220 cur = *str;
221
222 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000223 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000224 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225 NEXT(cur);
226 if (uri != NULL) {
227 if (uri->fragment != NULL)
228 xmlFree(uri->fragment);
229 if (uri->cleanup & 2)
230 uri->fragment = STRNDUP(*str, cur - *str);
231 else
232 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233 }
234 *str = cur;
235 return (0);
236}
237
238/**
239 * xmlParse3986Query:
240 * @uri: pointer to an URI structure
241 * @str: pointer to the string to analyze
242 *
243 * Parse the query part of an URI
244 *
245 * query = *uric
246 *
247 * Returns 0 or the error code
248 */
249static int
250xmlParse3986Query(xmlURIPtr uri, const char **str)
251{
252 const char *cur;
253
254 if (str == NULL)
255 return (-1);
256
257 cur = *str;
258
259 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261 NEXT(cur);
262 if (uri != NULL) {
263 if (uri->query != NULL)
264 xmlFree(uri->query);
265 if (uri->cleanup & 2)
266 uri->query = STRNDUP(*str, cur - *str);
267 else
268 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269
270 /* Save the raw bytes of the query as well.
271 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272 */
273 if (uri->query_raw != NULL)
274 xmlFree (uri->query_raw);
275 uri->query_raw = STRNDUP (*str, cur - *str);
276 }
277 *str = cur;
278 return (0);
279}
280
281/**
282 * xmlParse3986Port:
283 * @uri: pointer to an URI structure
284 * @str: the string to analyze
285 *
286 * Parse a port part and fills in the appropriate fields
287 * of the @uri structure
288 *
289 * port = *DIGIT
290 *
291 * Returns 0 or the error code
292 */
293static int
294xmlParse3986Port(xmlURIPtr uri, const char **str)
295{
296 const char *cur = *str;
297
298 if (ISA_DIGIT(cur)) {
299 if (uri != NULL)
300 uri->port = 0;
301 while (ISA_DIGIT(cur)) {
302 if (uri != NULL)
303 uri->port = uri->port * 10 + (*cur - '0');
304 cur++;
305 }
306 *str = cur;
307 return(0);
308 }
309 return(1);
310}
311
312/**
313 * xmlParse3986Userinfo:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
317 * Parse an user informations part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326{
327 const char *cur;
328
329 cur = *str;
330 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331 ISA_SUB_DELIM(cur) || (*cur == ':'))
332 NEXT(cur);
333 if (*cur == '@') {
334 if (uri != NULL) {
335 if (uri->user != NULL) xmlFree(uri->user);
336 if (uri->cleanup & 2)
337 uri->user = STRNDUP(*str, cur - *str);
338 else
339 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340 }
341 *str = cur;
342 return(0);
343 }
344 return(1);
345}
346
347/**
348 * xmlParse3986DecOctet:
349 * @str: the string to analyze
350 *
351 * dec-octet = DIGIT ; 0-9
352 * / %x31-39 DIGIT ; 10-99
353 * / "1" 2DIGIT ; 100-199
354 * / "2" %x30-34 DIGIT ; 200-249
355 * / "25" %x30-35 ; 250-255
356 *
357 * Skip a dec-octet.
358 *
359 * Returns 0 if found and skipped, 1 otherwise
360 */
361static int
362xmlParse3986DecOctet(const char **str) {
363 const char *cur = *str;
364
365 if (!(ISA_DIGIT(cur)))
366 return(1);
367 if (!ISA_DIGIT(cur+1))
368 cur++;
369 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370 cur += 2;
371 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372 cur += 3;
373 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375 cur += 3;
376 else if ((*cur == '2') && (*(cur + 1) == '5') &&
377 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378 cur += 3;
379 else
380 return(1);
381 *str = cur;
382 return(0);
383}
384/**
385 * xmlParse3986Host:
386 * @uri: pointer to an URI structure
387 * @str: the string to analyze
388 *
389 * Parse an host part and fills in the appropriate fields
390 * of the @uri structure
391 *
392 * host = IP-literal / IPv4address / reg-name
393 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
394 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
395 * reg-name = *( unreserved / pct-encoded / sub-delims )
396 *
397 * Returns 0 or the error code
398 */
399static int
400xmlParse3986Host(xmlURIPtr uri, const char **str)
401{
402 const char *cur = *str;
403 const char *host;
404
405 host = cur;
406 /*
407 * IPv6 and future adressing scheme are enclosed between brackets
408 */
409 if (*cur == '[') {
410 cur++;
411 while ((*cur != ']') && (*cur != 0))
412 cur++;
413 if (*cur != ']')
414 return(1);
415 cur++;
416 goto found;
417 }
418 /*
419 * try to parse an IPv4
420 */
421 if (ISA_DIGIT(cur)) {
422 if (xmlParse3986DecOctet(&cur) != 0)
423 goto not_ipv4;
424 if (*cur != '.')
425 goto not_ipv4;
426 cur++;
427 if (xmlParse3986DecOctet(&cur) != 0)
428 goto not_ipv4;
429 if (*cur != '.')
430 goto not_ipv4;
431 if (xmlParse3986DecOctet(&cur) != 0)
432 goto not_ipv4;
433 if (*cur != '.')
434 goto not_ipv4;
435 if (xmlParse3986DecOctet(&cur) != 0)
436 goto not_ipv4;
437 goto found;
438not_ipv4:
439 cur = *str;
440 }
441 /*
442 * then this should be a hostname which can be empty
443 */
444 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445 NEXT(cur);
446found:
447 if (uri != NULL) {
448 if (uri->authority != NULL) xmlFree(uri->authority);
449 uri->authority = NULL;
450 if (uri->server != NULL) xmlFree(uri->server);
451 if (cur != host) {
452 if (uri->cleanup & 2)
453 uri->server = STRNDUP(host, cur - host);
454 else
455 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456 } else
457 uri->server = NULL;
458 }
459 *str = cur;
460 return(0);
461}
462
463/**
464 * xmlParse3986Authority:
465 * @uri: pointer to an URI structure
466 * @str: the string to analyze
467 *
468 * Parse an authority part and fills in the appropriate fields
469 * of the @uri structure
470 *
471 * authority = [ userinfo "@" ] host [ ":" port ]
472 *
473 * Returns 0 or the error code
474 */
475static int
476xmlParse3986Authority(xmlURIPtr uri, const char **str)
477{
478 const char *cur;
479 int ret;
480
481 cur = *str;
482 /*
483 * try to parse an userinfo and check for the trailing @
484 */
485 ret = xmlParse3986Userinfo(uri, &cur);
486 if ((ret != 0) || (*cur != '@'))
487 cur = *str;
488 else
489 cur++;
490 ret = xmlParse3986Host(uri, &cur);
491 if (ret != 0) return(ret);
492 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000493 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000494 ret = xmlParse3986Port(uri, &cur);
495 if (ret != 0) return(ret);
496 }
497 *str = cur;
498 return(0);
499}
500
501/**
502 * xmlParse3986Segment:
503 * @str: the string to analyze
504 * @forbid: an optional forbidden character
505 * @empty: allow an empty segment
506 *
507 * Parse a segment and fills in the appropriate fields
508 * of the @uri structure
509 *
510 * segment = *pchar
511 * segment-nz = 1*pchar
512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513 * ; non-zero-length segment without any colon ":"
514 *
515 * Returns 0 or the error code
516 */
517static int
518xmlParse3986Segment(const char **str, char forbid, int empty)
519{
520 const char *cur;
521
522 cur = *str;
523 if (!ISA_PCHAR(cur)) {
524 if (empty)
525 return(0);
526 return(1);
527 }
528 while (ISA_PCHAR(cur) && (*cur != forbid))
529 NEXT(cur);
530 *str = cur;
531 return (0);
532}
533
534/**
535 * xmlParse3986PathAbEmpty:
536 * @uri: pointer to an URI structure
537 * @str: the string to analyze
538 *
539 * Parse an path absolute or empty and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * path-abempty = *( "/" segment )
543 *
544 * Returns 0 or the error code
545 */
546static int
547xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548{
549 const char *cur;
550 int ret;
551
552 cur = *str;
553
554 while (*cur == '/') {
555 cur++;
556 ret = xmlParse3986Segment(&cur, 0, 1);
557 if (ret != 0) return(ret);
558 }
559 if (uri != NULL) {
560 if (uri->path != NULL) xmlFree(uri->path);
561 if (uri->cleanup & 2)
562 uri->path = STRNDUP(*str, cur - *str);
563 else
564 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
565 }
566 *str = cur;
567 return (0);
568}
569
570/**
571 * xmlParse3986PathAbsolute:
572 * @uri: pointer to an URI structure
573 * @str: the string to analyze
574 *
575 * Parse an path absolute and fills in the appropriate fields
576 * of the @uri structure
577 *
578 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
579 *
580 * Returns 0 or the error code
581 */
582static int
583xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
584{
585 const char *cur;
586 int ret;
587
588 cur = *str;
589
590 if (*cur != '/')
591 return(1);
592 cur++;
593 ret = xmlParse3986Segment(&cur, 0, 0);
594 if (ret == 0) {
595 while (*cur == '/') {
596 cur++;
597 ret = xmlParse3986Segment(&cur, 0, 1);
598 if (ret != 0) return(ret);
599 }
600 }
601 if (uri != NULL) {
602 if (uri->path != NULL) xmlFree(uri->path);
603 if (uri->cleanup & 2)
604 uri->path = STRNDUP(*str, cur - *str);
605 else
606 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
607 }
608 *str = cur;
609 return (0);
610}
611
612/**
613 * xmlParse3986PathRootless:
614 * @uri: pointer to an URI structure
615 * @str: the string to analyze
616 *
617 * Parse an path without root and fills in the appropriate fields
618 * of the @uri structure
619 *
620 * path-rootless = segment-nz *( "/" segment )
621 *
622 * Returns 0 or the error code
623 */
624static int
625xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
626{
627 const char *cur;
628 int ret;
629
630 cur = *str;
631
632 ret = xmlParse3986Segment(&cur, 0, 0);
633 if (ret != 0) return(ret);
634 while (*cur == '/') {
635 cur++;
636 ret = xmlParse3986Segment(&cur, 0, 1);
637 if (ret != 0) return(ret);
638 }
639 if (uri != NULL) {
640 if (uri->path != NULL) xmlFree(uri->path);
641 if (uri->cleanup & 2)
642 uri->path = STRNDUP(*str, cur - *str);
643 else
644 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645 }
646 *str = cur;
647 return (0);
648}
649
650/**
651 * xmlParse3986PathNoScheme:
652 * @uri: pointer to an URI structure
653 * @str: the string to analyze
654 *
655 * Parse an path which is not a scheme and fills in the appropriate fields
656 * of the @uri structure
657 *
658 * path-noscheme = segment-nz-nc *( "/" segment )
659 *
660 * Returns 0 or the error code
661 */
662static int
663xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
664{
665 const char *cur;
666 int ret;
667
668 cur = *str;
669
670 ret = xmlParse3986Segment(&cur, ':', 0);
671 if (ret != 0) return(ret);
672 while (*cur == '/') {
673 cur++;
674 ret = xmlParse3986Segment(&cur, 0, 1);
675 if (ret != 0) return(ret);
676 }
677 if (uri != NULL) {
678 if (uri->path != NULL) xmlFree(uri->path);
679 if (uri->cleanup & 2)
680 uri->path = STRNDUP(*str, cur - *str);
681 else
682 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
683 }
684 *str = cur;
685 return (0);
686}
687
688/**
689 * xmlParse3986HierPart:
690 * @uri: pointer to an URI structure
691 * @str: the string to analyze
692 *
693 * Parse an hierarchical part and fills in the appropriate fields
694 * of the @uri structure
695 *
696 * hier-part = "//" authority path-abempty
697 * / path-absolute
698 * / path-rootless
699 * / path-empty
700 *
701 * Returns 0 or the error code
702 */
703static int
704xmlParse3986HierPart(xmlURIPtr uri, const char **str)
705{
706 const char *cur;
707 int ret;
708
709 cur = *str;
710
711 if ((*cur == '/') && (*(cur + 1) == '/')) {
712 cur += 2;
713 ret = xmlParse3986Authority(uri, &cur);
714 if (ret != 0) return(ret);
715 ret = xmlParse3986PathAbEmpty(uri, &cur);
716 if (ret != 0) return(ret);
717 *str = cur;
718 return(0);
719 } else if (*cur == '/') {
720 ret = xmlParse3986PathAbsolute(uri, &cur);
721 if (ret != 0) return(ret);
722 } else if (ISA_PCHAR(cur)) {
723 ret = xmlParse3986PathRootless(uri, &cur);
724 if (ret != 0) return(ret);
725 } else {
726 /* path-empty is effectively empty */
727 if (uri != NULL) {
728 if (uri->path != NULL) xmlFree(uri->path);
729 uri->path = NULL;
730 }
731 }
732 *str = cur;
733 return (0);
734}
735
736/**
737 * xmlParse3986RelativeRef:
738 * @uri: pointer to an URI structure
739 * @str: the string to analyze
740 *
741 * Parse an URI string and fills in the appropriate fields
742 * of the @uri structure
743 *
744 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
745 * relative-part = "//" authority path-abempty
746 * / path-absolute
747 * / path-noscheme
748 * / path-empty
749 *
750 * Returns 0 or the error code
751 */
752static int
753xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
754 int ret;
755
756 if ((*str == '/') && (*(str + 1) == '/')) {
757 str += 2;
758 ret = xmlParse3986Authority(uri, &str);
759 if (ret != 0) return(ret);
760 ret = xmlParse3986PathAbEmpty(uri, &str);
761 if (ret != 0) return(ret);
762 } else if (*str == '/') {
763 ret = xmlParse3986PathAbsolute(uri, &str);
764 if (ret != 0) return(ret);
765 } else if (ISA_PCHAR(str)) {
766 ret = xmlParse3986PathNoScheme(uri, &str);
767 if (ret != 0) return(ret);
768 } else {
769 /* path-empty is effectively empty */
770 if (uri != NULL) {
771 if (uri->path != NULL) xmlFree(uri->path);
772 uri->path = NULL;
773 }
774 }
775
776 if (*str == '?') {
777 str++;
778 ret = xmlParse3986Query(uri, &str);
779 if (ret != 0) return(ret);
780 }
781 if (*str == '#') {
782 str++;
783 ret = xmlParse3986Fragment(uri, &str);
784 if (ret != 0) return(ret);
785 }
786 if (*str != 0) {
787 xmlCleanURI(uri);
788 return(1);
789 }
790 return(0);
791}
792
793
794/**
795 * xmlParse3986URI:
796 * @uri: pointer to an URI structure
797 * @str: the string to analyze
798 *
799 * Parse an URI string and fills in the appropriate fields
800 * of the @uri structure
801 *
802 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
803 *
804 * Returns 0 or the error code
805 */
806static int
807xmlParse3986URI(xmlURIPtr uri, const char *str) {
808 int ret;
809
810 ret = xmlParse3986Scheme(uri, &str);
811 if (ret != 0) return(ret);
812 if (*str != ':') {
813 return(1);
814 }
815 str++;
816 ret = xmlParse3986HierPart(uri, &str);
817 if (ret != 0) return(ret);
818 if (*str == '?') {
819 str++;
820 ret = xmlParse3986Query(uri, &str);
821 if (ret != 0) return(ret);
822 }
823 if (*str == '#') {
824 str++;
825 ret = xmlParse3986Fragment(uri, &str);
826 if (ret != 0) return(ret);
827 }
828 if (*str != 0) {
829 xmlCleanURI(uri);
830 return(1);
831 }
832 return(0);
833}
834
835/**
836 * xmlParse3986URIReference:
837 * @uri: pointer to an URI structure
838 * @str: the string to analyze
839 *
840 * Parse an URI reference string and fills in the appropriate fields
841 * of the @uri structure
842 *
843 * URI-reference = URI / relative-ref
844 *
845 * Returns 0 or the error code
846 */
847static int
848xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
849 int ret;
850
851 if (str == NULL)
852 return(-1);
853 xmlCleanURI(uri);
854
855 /*
856 * Try first to parse absolute refs, then fallback to relative if
857 * it fails.
858 */
859 ret = xmlParse3986URI(uri, str);
860 if (ret != 0) {
861 xmlCleanURI(uri);
862 ret = xmlParse3986RelativeRef(uri, str);
863 if (ret != 0) {
864 xmlCleanURI(uri);
865 return(ret);
866 }
867 }
868 return(0);
869}
870
871/**
872 * xmlParseURI:
873 * @str: the URI string to analyze
874 *
875 * Parse an URI based on RFC 3986
876 *
877 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
878 *
879 * Returns a newly built xmlURIPtr or NULL in case of error
880 */
881xmlURIPtr
882xmlParseURI(const char *str) {
883 xmlURIPtr uri;
884 int ret;
885
886 if (str == NULL)
887 return(NULL);
888 uri = xmlCreateURI();
889 if (uri != NULL) {
890 ret = xmlParse3986URIReference(uri, str);
891 if (ret) {
892 xmlFreeURI(uri);
893 return(NULL);
894 }
895 }
896 return(uri);
897}
898
899/**
900 * xmlParseURIReference:
901 * @uri: pointer to an URI structure
902 * @str: the string to analyze
903 *
904 * Parse an URI reference string based on RFC 3986 and fills in the
905 * appropriate fields of the @uri structure
906 *
907 * URI-reference = URI / relative-ref
908 *
909 * Returns 0 or the error code
910 */
911int
912xmlParseURIReference(xmlURIPtr uri, const char *str) {
913 return(xmlParse3986URIReference(uri, str));
914}
915
916/**
917 * xmlParseURIRaw:
918 * @str: the URI string to analyze
919 * @raw: if 1 unescaping of URI pieces are disabled
920 *
921 * Parse an URI but allows to keep intact the original fragments.
922 *
923 * URI-reference = URI / relative-ref
924 *
925 * Returns a newly built xmlURIPtr or NULL in case of error
926 */
927xmlURIPtr
928xmlParseURIRaw(const char *str, int raw) {
929 xmlURIPtr uri;
930 int ret;
931
932 if (str == NULL)
933 return(NULL);
934 uri = xmlCreateURI();
935 if (uri != NULL) {
936 if (raw) {
937 uri->cleanup |= 2;
938 }
939 ret = xmlParseURIReference(uri, str);
940 if (ret) {
941 xmlFreeURI(uri);
942 return(NULL);
943 }
944 }
945 return(uri);
946}
947
948/************************************************************************
949 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000950 * Generic URI structure functions *
951 * *
952 ************************************************************************/
953
954/**
955 * xmlCreateURI:
956 *
957 * Simply creates an empty xmlURI
958 *
959 * Returns the new structure or NULL in case of error
960 */
961xmlURIPtr
962xmlCreateURI(void) {
963 xmlURIPtr ret;
964
965 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
966 if (ret == NULL) {
967 xmlGenericError(xmlGenericErrorContext,
968 "xmlCreateURI: out of memory\n");
969 return(NULL);
970 }
971 memset(ret, 0, sizeof(xmlURI));
972 return(ret);
973}
974
975/**
976 * xmlSaveUri:
977 * @uri: pointer to an xmlURI
978 *
979 * Save the URI as an escaped string
980 *
981 * Returns a new string (to be deallocated by caller)
982 */
983xmlChar *
984xmlSaveUri(xmlURIPtr uri) {
985 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +0000986 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 const char *p;
988 int len;
989 int max;
990
991 if (uri == NULL) return(NULL);
992
993
994 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000995 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000996 if (ret == NULL) {
997 xmlGenericError(xmlGenericErrorContext,
998 "xmlSaveUri: out of memory\n");
999 return(NULL);
1000 }
1001 len = 0;
1002
1003 if (uri->scheme != NULL) {
1004 p = uri->scheme;
1005 while (*p != 0) {
1006 if (len >= max) {
1007 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001008 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1009 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001010 xmlGenericError(xmlGenericErrorContext,
1011 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001012 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001013 return(NULL);
1014 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001015 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001016 }
1017 ret[len++] = *p++;
1018 }
1019 if (len >= max) {
1020 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001021 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1022 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001023 xmlGenericError(xmlGenericErrorContext,
1024 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001025 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001026 return(NULL);
1027 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001028 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001029 }
1030 ret[len++] = ':';
1031 }
1032 if (uri->opaque != NULL) {
1033 p = uri->opaque;
1034 while (*p != 0) {
1035 if (len + 3 >= max) {
1036 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001037 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001039 xmlGenericError(xmlGenericErrorContext,
1040 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001041 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001042 return(NULL);
1043 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001044 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001045 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001046 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001047 ret[len++] = *p++;
1048 else {
1049 int val = *(unsigned char *)p++;
1050 int hi = val / 0x10, lo = val % 0x10;
1051 ret[len++] = '%';
1052 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1053 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1054 }
1055 }
Owen Taylor3473f882001-02-23 17:55:21 +00001056 } else {
1057 if (uri->server != NULL) {
1058 if (len + 3 >= max) {
1059 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001060 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1061 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001062 xmlGenericError(xmlGenericErrorContext,
1063 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001064 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001065 return(NULL);
1066 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001067 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001068 }
1069 ret[len++] = '/';
1070 ret[len++] = '/';
1071 if (uri->user != NULL) {
1072 p = uri->user;
1073 while (*p != 0) {
1074 if (len + 3 >= max) {
1075 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001076 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001077 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001078 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001081 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001082 return(NULL);
1083 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001084 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001085 }
1086 if ((IS_UNRESERVED(*(p))) ||
1087 ((*(p) == ';')) || ((*(p) == ':')) ||
1088 ((*(p) == '&')) || ((*(p) == '=')) ||
1089 ((*(p) == '+')) || ((*(p) == '$')) ||
1090 ((*(p) == ',')))
1091 ret[len++] = *p++;
1092 else {
1093 int val = *(unsigned char *)p++;
1094 int hi = val / 0x10, lo = val % 0x10;
1095 ret[len++] = '%';
1096 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1097 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1098 }
1099 }
1100 if (len + 3 >= max) {
1101 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001102 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001103 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001104 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001105 xmlGenericError(xmlGenericErrorContext,
1106 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001107 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001108 return(NULL);
1109 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001110 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001111 }
1112 ret[len++] = '@';
1113 }
1114 p = uri->server;
1115 while (*p != 0) {
1116 if (len >= max) {
1117 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001118 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001119 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001120 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001121 xmlGenericError(xmlGenericErrorContext,
1122 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001123 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001124 return(NULL);
1125 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001126 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001127 }
1128 ret[len++] = *p++;
1129 }
1130 if (uri->port > 0) {
1131 if (len + 10 >= max) {
1132 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001133 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001134 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001135 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001136 xmlGenericError(xmlGenericErrorContext,
1137 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001138 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001139 return(NULL);
1140 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001141 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001142 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +00001143 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001144 }
1145 } else if (uri->authority != NULL) {
1146 if (len + 3 >= max) {
1147 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001148 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001149 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001150 if (temp == NULL) {
1151 xmlGenericError(xmlGenericErrorContext,
1152 "xmlSaveUri: out of memory\n");
1153 xmlFree(ret);
1154 return(NULL);
1155 }
1156 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001157 }
1158 ret[len++] = '/';
1159 ret[len++] = '/';
1160 p = uri->authority;
1161 while (*p != 0) {
1162 if (len + 3 >= max) {
1163 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001164 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001165 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001166 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001167 xmlGenericError(xmlGenericErrorContext,
1168 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001169 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001170 return(NULL);
1171 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001172 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001173 }
1174 if ((IS_UNRESERVED(*(p))) ||
1175 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1176 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1177 ((*(p) == '=')) || ((*(p) == '+')))
1178 ret[len++] = *p++;
1179 else {
1180 int val = *(unsigned char *)p++;
1181 int hi = val / 0x10, lo = val % 0x10;
1182 ret[len++] = '%';
1183 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1184 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1185 }
1186 }
1187 } else if (uri->scheme != NULL) {
1188 if (len + 3 >= max) {
1189 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001190 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001191 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001192 if (temp == NULL) {
1193 xmlGenericError(xmlGenericErrorContext,
1194 "xmlSaveUri: out of memory\n");
1195 xmlFree(ret);
1196 return(NULL);
1197 }
1198 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001199 }
1200 ret[len++] = '/';
1201 ret[len++] = '/';
1202 }
1203 if (uri->path != NULL) {
1204 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001205 /*
1206 * the colon in file:///d: should not be escaped or
1207 * Windows accesses fail later.
1208 */
1209 if ((uri->scheme != NULL) &&
1210 (p[0] == '/') &&
1211 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001214 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001215 if (len + 3 >= max) {
1216 max *= 2;
1217 ret = (xmlChar *) xmlRealloc(ret,
1218 (max + 1) * sizeof(xmlChar));
1219 if (ret == NULL) {
1220 xmlGenericError(xmlGenericErrorContext,
1221 "xmlSaveUri: out of memory\n");
1222 return(NULL);
1223 }
1224 }
1225 ret[len++] = *p++;
1226 ret[len++] = *p++;
1227 ret[len++] = *p++;
1228 }
Owen Taylor3473f882001-02-23 17:55:21 +00001229 while (*p != 0) {
1230 if (len + 3 >= max) {
1231 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001232 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001233 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001234 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001235 xmlGenericError(xmlGenericErrorContext,
1236 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001237 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001238 return(NULL);
1239 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001240 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001241 }
1242 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1243 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1244 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1245 ((*(p) == ',')))
1246 ret[len++] = *p++;
1247 else {
1248 int val = *(unsigned char *)p++;
1249 int hi = val / 0x10, lo = val % 0x10;
1250 ret[len++] = '%';
1251 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1252 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1253 }
1254 }
1255 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001256 if (uri->query_raw != NULL) {
1257 if (len + 1 >= max) {
1258 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001259 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillarda1413b82007-04-26 08:33:28 +00001260 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001261 if (temp == NULL) {
1262 xmlGenericError(xmlGenericErrorContext,
1263 "xmlSaveUri: out of memory\n");
1264 xmlFree(ret);
1265 return(NULL);
1266 }
1267 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001268 }
1269 ret[len++] = '?';
1270 p = uri->query_raw;
1271 while (*p != 0) {
1272 if (len + 1 >= max) {
1273 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001274 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillarda1413b82007-04-26 08:33:28 +00001275 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001276 if (temp == NULL) {
Daniel Veillarda1413b82007-04-26 08:33:28 +00001277 xmlGenericError(xmlGenericErrorContext,
1278 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001279 xmlFree(ret);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001280 return(NULL);
1281 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001282 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001283 }
1284 ret[len++] = *p++;
1285 }
1286 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001287 if (len + 3 >= max) {
1288 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001289 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001290 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001291 if (temp == NULL) {
1292 xmlGenericError(xmlGenericErrorContext,
1293 "xmlSaveUri: out of memory\n");
1294 xmlFree(ret);
1295 return(NULL);
1296 }
1297 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001298 }
1299 ret[len++] = '?';
1300 p = uri->query;
1301 while (*p != 0) {
1302 if (len + 3 >= max) {
1303 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001304 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001305 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001306 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001307 xmlGenericError(xmlGenericErrorContext,
1308 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001309 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001310 return(NULL);
1311 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001312 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001313 }
1314 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1315 ret[len++] = *p++;
1316 else {
1317 int val = *(unsigned char *)p++;
1318 int hi = val / 0x10, lo = val % 0x10;
1319 ret[len++] = '%';
1320 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1321 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1322 }
1323 }
1324 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001325 }
1326 if (uri->fragment != NULL) {
1327 if (len + 3 >= max) {
1328 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001329 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001330 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001331 if (temp == NULL) {
1332 xmlGenericError(xmlGenericErrorContext,
1333 "xmlSaveUri: out of memory\n");
1334 xmlFree(ret);
1335 return(NULL);
Daniel Veillardd7af5552008-08-04 15:29:44 +00001336 }
1337 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001338 }
1339 ret[len++] = '#';
1340 p = uri->fragment;
1341 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001342 if (len + 3 >= max) {
1343 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001344 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001345 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001346 if (temp == NULL) {
1347 xmlGenericError(xmlGenericErrorContext,
1348 "xmlSaveUri: out of memory\n");
1349 xmlFree(ret);
1350 return(NULL);
1351 }
1352 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001353 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001354 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1355 ret[len++] = *p++;
1356 else {
1357 int val = *(unsigned char *)p++;
1358 int hi = val / 0x10, lo = val % 0x10;
1359 ret[len++] = '%';
1360 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1361 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001362 }
1363 }
Owen Taylor3473f882001-02-23 17:55:21 +00001364 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001365 if (len >= max) {
1366 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001367 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1368 if (temp == NULL) {
1369 xmlGenericError(xmlGenericErrorContext,
1370 "xmlSaveUri: out of memory\n");
1371 xmlFree(ret);
1372 return(NULL);
1373 }
1374 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001375 }
1376 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001377 return(ret);
1378}
1379
1380/**
1381 * xmlPrintURI:
1382 * @stream: a FILE* for the output
1383 * @uri: pointer to an xmlURI
1384 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001385 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001386 */
1387void
1388xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1389 xmlChar *out;
1390
1391 out = xmlSaveUri(uri);
1392 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001393 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001394 xmlFree(out);
1395 }
1396}
1397
1398/**
1399 * xmlCleanURI:
1400 * @uri: pointer to an xmlURI
1401 *
1402 * Make sure the xmlURI struct is free of content
1403 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001404static void
Owen Taylor3473f882001-02-23 17:55:21 +00001405xmlCleanURI(xmlURIPtr uri) {
1406 if (uri == NULL) return;
1407
1408 if (uri->scheme != NULL) xmlFree(uri->scheme);
1409 uri->scheme = NULL;
1410 if (uri->server != NULL) xmlFree(uri->server);
1411 uri->server = NULL;
1412 if (uri->user != NULL) xmlFree(uri->user);
1413 uri->user = NULL;
1414 if (uri->path != NULL) xmlFree(uri->path);
1415 uri->path = NULL;
1416 if (uri->fragment != NULL) xmlFree(uri->fragment);
1417 uri->fragment = NULL;
1418 if (uri->opaque != NULL) xmlFree(uri->opaque);
1419 uri->opaque = NULL;
1420 if (uri->authority != NULL) xmlFree(uri->authority);
1421 uri->authority = NULL;
1422 if (uri->query != NULL) xmlFree(uri->query);
1423 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001424 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1425 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001426}
1427
1428/**
1429 * xmlFreeURI:
1430 * @uri: pointer to an xmlURI
1431 *
1432 * Free up the xmlURI struct
1433 */
1434void
1435xmlFreeURI(xmlURIPtr uri) {
1436 if (uri == NULL) return;
1437
1438 if (uri->scheme != NULL) xmlFree(uri->scheme);
1439 if (uri->server != NULL) xmlFree(uri->server);
1440 if (uri->user != NULL) xmlFree(uri->user);
1441 if (uri->path != NULL) xmlFree(uri->path);
1442 if (uri->fragment != NULL) xmlFree(uri->fragment);
1443 if (uri->opaque != NULL) xmlFree(uri->opaque);
1444 if (uri->authority != NULL) xmlFree(uri->authority);
1445 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001446 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001447 xmlFree(uri);
1448}
1449
1450/************************************************************************
1451 * *
1452 * Helper functions *
1453 * *
1454 ************************************************************************/
1455
Owen Taylor3473f882001-02-23 17:55:21 +00001456/**
1457 * xmlNormalizeURIPath:
1458 * @path: pointer to the path string
1459 *
1460 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1461 * Section 5.2, steps 6.c through 6.g.
1462 *
1463 * Normalization occurs directly on the string, no new allocation is done
1464 *
1465 * Returns 0 or an error code
1466 */
1467int
1468xmlNormalizeURIPath(char *path) {
1469 char *cur, *out;
1470
1471 if (path == NULL)
1472 return(-1);
1473
1474 /* Skip all initial "/" chars. We want to get to the beginning of the
1475 * first non-empty segment.
1476 */
1477 cur = path;
1478 while (cur[0] == '/')
1479 ++cur;
1480 if (cur[0] == '\0')
1481 return(0);
1482
1483 /* Keep everything we've seen so far. */
1484 out = cur;
1485
1486 /*
1487 * Analyze each segment in sequence for cases (c) and (d).
1488 */
1489 while (cur[0] != '\0') {
1490 /*
1491 * c) All occurrences of "./", where "." is a complete path segment,
1492 * are removed from the buffer string.
1493 */
1494 if ((cur[0] == '.') && (cur[1] == '/')) {
1495 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001496 /* '//' normalization should be done at this point too */
1497 while (cur[0] == '/')
1498 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001499 continue;
1500 }
1501
1502 /*
1503 * d) If the buffer string ends with "." as a complete path segment,
1504 * that "." is removed.
1505 */
1506 if ((cur[0] == '.') && (cur[1] == '\0'))
1507 break;
1508
1509 /* Otherwise keep the segment. */
1510 while (cur[0] != '/') {
1511 if (cur[0] == '\0')
1512 goto done_cd;
1513 (out++)[0] = (cur++)[0];
1514 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001515 /* nomalize // */
1516 while ((cur[0] == '/') && (cur[1] == '/'))
1517 cur++;
1518
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (out++)[0] = (cur++)[0];
1520 }
1521 done_cd:
1522 out[0] = '\0';
1523
1524 /* Reset to the beginning of the first segment for the next sequence. */
1525 cur = path;
1526 while (cur[0] == '/')
1527 ++cur;
1528 if (cur[0] == '\0')
1529 return(0);
1530
1531 /*
1532 * Analyze each segment in sequence for cases (e) and (f).
1533 *
1534 * e) All occurrences of "<segment>/../", where <segment> is a
1535 * complete path segment not equal to "..", are removed from the
1536 * buffer string. Removal of these path segments is performed
1537 * iteratively, removing the leftmost matching pattern on each
1538 * iteration, until no matching pattern remains.
1539 *
1540 * f) If the buffer string ends with "<segment>/..", where <segment>
1541 * is a complete path segment not equal to "..", that
1542 * "<segment>/.." is removed.
1543 *
1544 * To satisfy the "iterative" clause in (e), we need to collapse the
1545 * string every time we find something that needs to be removed. Thus,
1546 * we don't need to keep two pointers into the string: we only need a
1547 * "current position" pointer.
1548 */
1549 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001550 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001551
1552 /* At the beginning of each iteration of this loop, "cur" points to
1553 * the first character of the segment we want to examine.
1554 */
1555
1556 /* Find the end of the current segment. */
1557 segp = cur;
1558 while ((segp[0] != '/') && (segp[0] != '\0'))
1559 ++segp;
1560
1561 /* If this is the last segment, we're done (we need at least two
1562 * segments to meet the criteria for the (e) and (f) cases).
1563 */
1564 if (segp[0] == '\0')
1565 break;
1566
1567 /* If the first segment is "..", or if the next segment _isn't_ "..",
1568 * keep this segment and try the next one.
1569 */
1570 ++segp;
1571 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1572 || ((segp[0] != '.') || (segp[1] != '.')
1573 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1574 cur = segp;
1575 continue;
1576 }
1577
1578 /* If we get here, remove this segment and the next one and back up
1579 * to the previous segment (if there is one), to implement the
1580 * "iteratively" clause. It's pretty much impossible to back up
1581 * while maintaining two pointers into the buffer, so just compact
1582 * the whole buffer now.
1583 */
1584
1585 /* If this is the end of the buffer, we're done. */
1586 if (segp[2] == '\0') {
1587 cur[0] = '\0';
1588 break;
1589 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001590 /* Valgrind complained, strcpy(cur, segp + 3); */
1591 /* string will overlap, do not use strcpy */
1592 tmp = cur;
1593 segp += 3;
1594 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +00001595
1596 /* If there are no previous segments, then keep going from here. */
1597 segp = cur;
1598 while ((segp > path) && ((--segp)[0] == '/'))
1599 ;
1600 if (segp == path)
1601 continue;
1602
1603 /* "segp" is pointing to the end of a previous segment; find it's
1604 * start. We need to back up to the previous segment and start
1605 * over with that to handle things like "foo/bar/../..". If we
1606 * don't do this, then on the first pass we'll remove the "bar/..",
1607 * but be pointing at the second ".." so we won't realize we can also
1608 * remove the "foo/..".
1609 */
1610 cur = segp;
1611 while ((cur > path) && (cur[-1] != '/'))
1612 --cur;
1613 }
1614 out[0] = '\0';
1615
1616 /*
1617 * g) If the resulting buffer string still begins with one or more
1618 * complete path segments of "..", then the reference is
1619 * considered to be in error. Implementations may handle this
1620 * error by retaining these components in the resolved path (i.e.,
1621 * treating them as part of the final URI), by removing them from
1622 * the resolved path (i.e., discarding relative levels above the
1623 * root), or by avoiding traversal of the reference.
1624 *
1625 * We discard them from the final path.
1626 */
1627 if (path[0] == '/') {
1628 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001629 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001630 && ((cur[3] == '/') || (cur[3] == '\0')))
1631 cur += 3;
1632
1633 if (cur != path) {
1634 out = path;
1635 while (cur[0] != '\0')
1636 (out++)[0] = (cur++)[0];
1637 out[0] = 0;
1638 }
1639 }
1640
1641 return(0);
1642}
Owen Taylor3473f882001-02-23 17:55:21 +00001643
Daniel Veillard966a31e2004-05-09 02:58:44 +00001644static int is_hex(char c) {
1645 if (((c >= '0') && (c <= '9')) ||
1646 ((c >= 'a') && (c <= 'f')) ||
1647 ((c >= 'A') && (c <= 'F')))
1648 return(1);
1649 return(0);
1650}
1651
Owen Taylor3473f882001-02-23 17:55:21 +00001652/**
1653 * xmlURIUnescapeString:
1654 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001655 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001656 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001657 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001658 * Unescaping routine, but does not check that the string is an URI. The
1659 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001660 * Note that the length of the result can only be smaller or same size as
1661 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001662 *
Daniel Veillard79187652007-04-24 10:19:52 +00001663 * Returns a copy of the string, but unescaped, will return NULL only in case
1664 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001665 */
1666char *
1667xmlURIUnescapeString(const char *str, int len, char *target) {
1668 char *ret, *out;
1669 const char *in;
1670
1671 if (str == NULL)
1672 return(NULL);
1673 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001674 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001675
1676 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001677 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 if (ret == NULL) {
1679 xmlGenericError(xmlGenericErrorContext,
1680 "xmlURIUnescapeString: out of memory\n");
1681 return(NULL);
1682 }
1683 } else
1684 ret = target;
1685 in = str;
1686 out = ret;
1687 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001688 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001689 in++;
1690 if ((*in >= '0') && (*in <= '9'))
1691 *out = (*in - '0');
1692 else if ((*in >= 'a') && (*in <= 'f'))
1693 *out = (*in - 'a') + 10;
1694 else if ((*in >= 'A') && (*in <= 'F'))
1695 *out = (*in - 'A') + 10;
1696 in++;
1697 if ((*in >= '0') && (*in <= '9'))
1698 *out = *out * 16 + (*in - '0');
1699 else if ((*in >= 'a') && (*in <= 'f'))
1700 *out = *out * 16 + (*in - 'a') + 10;
1701 else if ((*in >= 'A') && (*in <= 'F'))
1702 *out = *out * 16 + (*in - 'A') + 10;
1703 in++;
1704 len -= 3;
1705 out++;
1706 } else {
1707 *out++ = *in++;
1708 len--;
1709 }
1710 }
1711 *out = 0;
1712 return(ret);
1713}
1714
1715/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001716 * xmlURIEscapeStr:
1717 * @str: string to escape
1718 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001719 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001720 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1721 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001722 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001723 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001724 */
1725xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001726xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1727 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001728 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001729 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +00001730
Owen Taylor3473f882001-02-23 17:55:21 +00001731 unsigned int len, out;
1732
1733 if (str == NULL)
1734 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001735 if (str[0] == 0)
1736 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001737 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001738 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001739
1740 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001741 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001742 if (ret == NULL) {
1743 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001744 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001745 return(NULL);
1746 }
1747 in = (const xmlChar *) str;
1748 out = 0;
1749 while(*in != 0) {
1750 if (len - out <= 3) {
1751 len += 20;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001752 temp = (xmlChar *) xmlRealloc(ret, len);
1753 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001754 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001755 "xmlURIEscapeStr: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001756 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001757 return(NULL);
1758 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001759 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001760 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001761
1762 ch = *in;
1763
Daniel Veillardeb475a32002-04-14 22:00:22 +00001764 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001765 unsigned char val;
1766 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001767 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001768 if (val <= 9)
1769 ret[out++] = '0' + val;
1770 else
1771 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001772 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001773 if (val <= 9)
1774 ret[out++] = '0' + val;
1775 else
1776 ret[out++] = 'A' + val - 0xA;
1777 in++;
1778 } else {
1779 ret[out++] = *in++;
1780 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001781
Owen Taylor3473f882001-02-23 17:55:21 +00001782 }
1783 ret[out] = 0;
1784 return(ret);
1785}
1786
Daniel Veillard8514c672001-05-23 10:29:12 +00001787/**
1788 * xmlURIEscape:
1789 * @str: the string of the URI to escape
1790 *
1791 * Escaping routine, does not do validity checks !
1792 * It will try to escape the chars needing this, but this is heuristic
1793 * based it's impossible to be sure.
1794 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001795 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001796 *
1797 * 25 May 2001
1798 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1799 * according to RFC2396.
1800 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001801 */
1802xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001803xmlURIEscape(const xmlChar * str)
1804{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001805 xmlChar *ret, *segment = NULL;
1806 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001807 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001808
Daniel Veillard6278fb52001-05-25 07:38:41 +00001809#define NULLCHK(p) if(!p) { \
1810 xmlGenericError(xmlGenericErrorContext, \
1811 "xmlURIEscape: out of memory\n"); \
Daniel Veillarded86dc22008-04-24 11:58:41 +00001812 xmlFreeURI(uri); \
1813 return NULL; } \
Daniel Veillard6278fb52001-05-25 07:38:41 +00001814
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001815 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001816 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001817
1818 uri = xmlCreateURI();
1819 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001820 /*
1821 * Allow escaping errors in the unescaped form
1822 */
1823 uri->cleanup = 1;
1824 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001825 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001826 xmlFreeURI(uri);
1827 return (NULL);
1828 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001829 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001830
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001831 if (!uri)
1832 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001833
1834 ret = NULL;
1835
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001836 if (uri->scheme) {
1837 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1838 NULLCHK(segment)
1839 ret = xmlStrcat(ret, segment);
1840 ret = xmlStrcat(ret, BAD_CAST ":");
1841 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001842 }
1843
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001844 if (uri->authority) {
1845 segment =
1846 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1847 NULLCHK(segment)
1848 ret = xmlStrcat(ret, BAD_CAST "//");
1849 ret = xmlStrcat(ret, segment);
1850 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001851 }
1852
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001853 if (uri->user) {
1854 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1855 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001856 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001857 ret = xmlStrcat(ret, segment);
1858 ret = xmlStrcat(ret, BAD_CAST "@");
1859 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001860 }
1861
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001862 if (uri->server) {
1863 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1864 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001865 if (uri->user == NULL)
Daniel Veillardd7af5552008-08-04 15:29:44 +00001866 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001867 ret = xmlStrcat(ret, segment);
1868 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001869 }
1870
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001871 if (uri->port) {
1872 xmlChar port[10];
1873
Daniel Veillard43d3f612001-11-10 11:57:23 +00001874 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001875 ret = xmlStrcat(ret, BAD_CAST ":");
1876 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001877 }
1878
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001879 if (uri->path) {
1880 segment =
1881 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1882 NULLCHK(segment)
1883 ret = xmlStrcat(ret, segment);
1884 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001885 }
1886
Daniel Veillarda1413b82007-04-26 08:33:28 +00001887 if (uri->query_raw) {
1888 ret = xmlStrcat(ret, BAD_CAST "?");
1889 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1890 }
1891 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001892 segment =
1893 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1894 NULLCHK(segment)
1895 ret = xmlStrcat(ret, BAD_CAST "?");
1896 ret = xmlStrcat(ret, segment);
1897 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001898 }
1899
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001900 if (uri->opaque) {
1901 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1902 NULLCHK(segment)
1903 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001904 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001905 }
1906
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001907 if (uri->fragment) {
1908 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1909 NULLCHK(segment)
1910 ret = xmlStrcat(ret, BAD_CAST "#");
1911 ret = xmlStrcat(ret, segment);
1912 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001913 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001914
1915 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001916#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001917
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001918 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001919}
1920
Owen Taylor3473f882001-02-23 17:55:21 +00001921/************************************************************************
1922 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001923 * Public functions *
1924 * *
1925 ************************************************************************/
1926
1927/**
1928 * xmlBuildURI:
1929 * @URI: the URI instance found in the document
1930 * @base: the base value
1931 *
1932 * Computes he final URI of the reference done by checking that
1933 * the given URI is valid, and building the final URI using the
1934 * base URI. This is processed according to section 5.2 of the
1935 * RFC 2396
1936 *
1937 * 5.2. Resolving Relative References to Absolute Form
1938 *
1939 * Returns a new URI string (to be freed by the caller) or NULL in case
1940 * of error.
1941 */
1942xmlChar *
1943xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1944 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001945 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlURIPtr ref = NULL;
1947 xmlURIPtr bas = NULL;
1948 xmlURIPtr res = NULL;
1949
1950 /*
1951 * 1) The URI reference is parsed into the potential four components and
1952 * fragment identifier, as described in Section 4.3.
1953 *
1954 * NOTE that a completely empty URI is treated by modern browsers
1955 * as a reference to "." rather than as a synonym for the current
1956 * URI. Should we do that here?
1957 */
1958 if (URI == NULL)
1959 ret = -1;
1960 else {
1961 if (*URI) {
1962 ref = xmlCreateURI();
1963 if (ref == NULL)
1964 goto done;
1965 ret = xmlParseURIReference(ref, (const char *) URI);
1966 }
1967 else
1968 ret = 0;
1969 }
1970 if (ret != 0)
1971 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001972 if ((ref != NULL) && (ref->scheme != NULL)) {
1973 /*
1974 * The URI is absolute don't modify.
1975 */
1976 val = xmlStrdup(URI);
1977 goto done;
1978 }
Owen Taylor3473f882001-02-23 17:55:21 +00001979 if (base == NULL)
1980 ret = -1;
1981 else {
1982 bas = xmlCreateURI();
1983 if (bas == NULL)
1984 goto done;
1985 ret = xmlParseURIReference(bas, (const char *) base);
1986 }
1987 if (ret != 0) {
1988 if (ref)
1989 val = xmlSaveUri(ref);
1990 goto done;
1991 }
1992 if (ref == NULL) {
1993 /*
1994 * the base fragment must be ignored
1995 */
1996 if (bas->fragment != NULL) {
1997 xmlFree(bas->fragment);
1998 bas->fragment = NULL;
1999 }
2000 val = xmlSaveUri(bas);
2001 goto done;
2002 }
2003
2004 /*
2005 * 2) If the path component is empty and the scheme, authority, and
2006 * query components are undefined, then it is a reference to the
2007 * current document and we are done. Otherwise, the reference URI's
2008 * query and fragment components are defined as found (or not found)
2009 * within the URI reference and not inherited from the base URI.
2010 *
2011 * NOTE that in modern browsers, the parsing differs from the above
2012 * in the following aspect: the query component is allowed to be
2013 * defined while still treating this as a reference to the current
2014 * document.
2015 */
2016 res = xmlCreateURI();
2017 if (res == NULL)
2018 goto done;
2019 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2020 ((ref->authority == NULL) && (ref->server == NULL))) {
2021 if (bas->scheme != NULL)
2022 res->scheme = xmlMemStrdup(bas->scheme);
2023 if (bas->authority != NULL)
2024 res->authority = xmlMemStrdup(bas->authority);
2025 else if (bas->server != NULL) {
2026 res->server = xmlMemStrdup(bas->server);
2027 if (bas->user != NULL)
2028 res->user = xmlMemStrdup(bas->user);
2029 res->port = bas->port;
2030 }
2031 if (bas->path != NULL)
2032 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002033 if (ref->query_raw != NULL)
2034 res->query_raw = xmlMemStrdup (ref->query_raw);
2035 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00002036 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002037 else if (bas->query_raw != NULL)
2038 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00002039 else if (bas->query != NULL)
2040 res->query = xmlMemStrdup(bas->query);
2041 if (ref->fragment != NULL)
2042 res->fragment = xmlMemStrdup(ref->fragment);
2043 goto step_7;
2044 }
Owen Taylor3473f882001-02-23 17:55:21 +00002045
2046 /*
2047 * 3) If the scheme component is defined, indicating that the reference
2048 * starts with a scheme name, then the reference is interpreted as an
2049 * absolute URI and we are done. Otherwise, the reference URI's
2050 * scheme is inherited from the base URI's scheme component.
2051 */
2052 if (ref->scheme != NULL) {
2053 val = xmlSaveUri(ref);
2054 goto done;
2055 }
2056 if (bas->scheme != NULL)
2057 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00002058
Daniel Veillarda1413b82007-04-26 08:33:28 +00002059 if (ref->query_raw != NULL)
2060 res->query_raw = xmlMemStrdup(ref->query_raw);
2061 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002062 res->query = xmlMemStrdup(ref->query);
2063 if (ref->fragment != NULL)
2064 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002065
2066 /*
2067 * 4) If the authority component is defined, then the reference is a
2068 * network-path and we skip to step 7. Otherwise, the reference
2069 * URI's authority is inherited from the base URI's authority
2070 * component, which will also be undefined if the URI scheme does not
2071 * use an authority component.
2072 */
2073 if ((ref->authority != NULL) || (ref->server != NULL)) {
2074 if (ref->authority != NULL)
2075 res->authority = xmlMemStrdup(ref->authority);
2076 else {
2077 res->server = xmlMemStrdup(ref->server);
2078 if (ref->user != NULL)
2079 res->user = xmlMemStrdup(ref->user);
2080 res->port = ref->port;
2081 }
2082 if (ref->path != NULL)
2083 res->path = xmlMemStrdup(ref->path);
2084 goto step_7;
2085 }
2086 if (bas->authority != NULL)
2087 res->authority = xmlMemStrdup(bas->authority);
2088 else if (bas->server != NULL) {
2089 res->server = xmlMemStrdup(bas->server);
2090 if (bas->user != NULL)
2091 res->user = xmlMemStrdup(bas->user);
2092 res->port = bas->port;
2093 }
2094
2095 /*
2096 * 5) If the path component begins with a slash character ("/"), then
2097 * the reference is an absolute-path and we skip to step 7.
2098 */
2099 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2100 res->path = xmlMemStrdup(ref->path);
2101 goto step_7;
2102 }
2103
2104
2105 /*
2106 * 6) If this step is reached, then we are resolving a relative-path
2107 * reference. The relative path needs to be merged with the base
2108 * URI's path. Although there are many ways to do this, we will
2109 * describe a simple method using a separate string buffer.
2110 *
2111 * Allocate a buffer large enough for the result string.
2112 */
2113 len = 2; /* extra / and 0 */
2114 if (ref->path != NULL)
2115 len += strlen(ref->path);
2116 if (bas->path != NULL)
2117 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002118 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if (res->path == NULL) {
2120 xmlGenericError(xmlGenericErrorContext,
2121 "xmlBuildURI: out of memory\n");
2122 goto done;
2123 }
2124 res->path[0] = 0;
2125
2126 /*
2127 * a) All but the last segment of the base URI's path component is
2128 * copied to the buffer. In other words, any characters after the
2129 * last (right-most) slash character, if any, are excluded.
2130 */
2131 cur = 0;
2132 out = 0;
2133 if (bas->path != NULL) {
2134 while (bas->path[cur] != 0) {
2135 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2136 cur++;
2137 if (bas->path[cur] == 0)
2138 break;
2139
2140 cur++;
2141 while (out < cur) {
2142 res->path[out] = bas->path[out];
2143 out++;
2144 }
2145 }
2146 }
2147 res->path[out] = 0;
2148
2149 /*
2150 * b) The reference's path component is appended to the buffer
2151 * string.
2152 */
2153 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002154 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002155 /*
2156 * Ensure the path includes a '/'
2157 */
2158 if ((out == 0) && (bas->server != NULL))
2159 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002160 while (ref->path[indx] != 0) {
2161 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002162 }
2163 }
2164 res->path[out] = 0;
2165
2166 /*
2167 * Steps c) to h) are really path normalization steps
2168 */
2169 xmlNormalizeURIPath(res->path);
2170
2171step_7:
2172
2173 /*
2174 * 7) The resulting URI components, including any inherited from the
2175 * base URI, are recombined to give the absolute form of the URI
2176 * reference.
2177 */
2178 val = xmlSaveUri(res);
2179
2180done:
2181 if (ref != NULL)
2182 xmlFreeURI(ref);
2183 if (bas != NULL)
2184 xmlFreeURI(bas);
2185 if (res != NULL)
2186 xmlFreeURI(res);
2187 return(val);
2188}
2189
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002190/**
William M. Brackf7789b12004-06-07 08:57:27 +00002191 * xmlBuildRelativeURI:
2192 * @URI: the URI reference under consideration
2193 * @base: the base value
2194 *
2195 * Expresses the URI of the reference in terms relative to the
2196 * base. Some examples of this operation include:
2197 * base = "http://site1.com/docs/book1.html"
2198 * URI input URI returned
2199 * docs/pic1.gif pic1.gif
2200 * docs/img/pic1.gif img/pic1.gif
2201 * img/pic1.gif ../img/pic1.gif
2202 * http://site1.com/docs/pic1.gif pic1.gif
2203 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2204 *
2205 * base = "docs/book1.html"
2206 * URI input URI returned
2207 * docs/pic1.gif pic1.gif
2208 * docs/img/pic1.gif img/pic1.gif
2209 * img/pic1.gif ../img/pic1.gif
2210 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2211 *
2212 *
2213 * Note: if the URI reference is really wierd or complicated, it may be
2214 * worthwhile to first convert it into a "nice" one by calling
2215 * xmlBuildURI (using 'base') before calling this routine,
2216 * since this routine (for reasonable efficiency) assumes URI has
2217 * already been through some validation.
2218 *
2219 * Returns a new URI string (to be freed by the caller) or NULL in case
2220 * error.
2221 */
2222xmlChar *
2223xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2224{
2225 xmlChar *val = NULL;
2226 int ret;
2227 int ix;
2228 int pos = 0;
2229 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002230 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002231 xmlURIPtr ref = NULL;
2232 xmlURIPtr bas = NULL;
2233 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002234 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002235
2236 if ((URI == NULL) || (*URI == 0))
2237 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002238
2239 /*
2240 * First parse URI into a standard form
2241 */
2242 ref = xmlCreateURI ();
2243 if (ref == NULL)
2244 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002245 /* If URI not already in "relative" form */
2246 if (URI[0] != '.') {
2247 ret = xmlParseURIReference (ref, (const char *) URI);
2248 if (ret != 0)
2249 goto done; /* Error in URI, return NULL */
2250 } else
2251 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002252
2253 /*
2254 * Next parse base into the same standard form
2255 */
2256 if ((base == NULL) || (*base == 0)) {
2257 val = xmlStrdup (URI);
2258 goto done;
2259 }
2260 bas = xmlCreateURI ();
2261 if (bas == NULL)
2262 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002263 if (base[0] != '.') {
2264 ret = xmlParseURIReference (bas, (const char *) base);
2265 if (ret != 0)
2266 goto done; /* Error in base, return NULL */
2267 } else
2268 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002269
2270 /*
2271 * If the scheme / server on the URI differs from the base,
2272 * just return the URI
2273 */
2274 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002275 ((bas->scheme == NULL) ||
2276 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2277 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002278 val = xmlStrdup (URI);
2279 goto done;
2280 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002281 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2282 val = xmlStrdup(BAD_CAST "");
2283 goto done;
2284 }
2285 if (bas->path == NULL) {
2286 val = xmlStrdup((xmlChar *)ref->path);
2287 goto done;
2288 }
2289 if (ref->path == NULL) {
2290 ref->path = (char *) "/";
2291 remove_path = 1;
2292 }
William M. Brackf7789b12004-06-07 08:57:27 +00002293
2294 /*
2295 * At this point (at last!) we can compare the two paths
2296 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002297 * First we take care of the special case where either of the
2298 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002299 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002300 if (bas->path == NULL) {
2301 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002302 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002303 if (*uptr == '/')
2304 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002305 /* exception characters from xmlSaveUri */
2306 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002307 }
2308 goto done;
2309 }
William M. Brackf7789b12004-06-07 08:57:27 +00002310 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002311 if (ref->path == NULL) {
2312 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002313 if (bptr[ix] == '/')
2314 nbslash++;
2315 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002316 uptr = NULL;
2317 len = 1; /* this is for a string terminator only */
2318 } else {
2319 /*
2320 * Next we compare the two strings and find where they first differ
2321 */
2322 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2323 pos += 2;
2324 if ((*bptr == '.') && (bptr[1] == '/'))
2325 bptr += 2;
2326 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2327 bptr++;
2328 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2329 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002330
William M. Brack820d5ed2005-09-14 05:24:27 +00002331 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002332 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002333 goto done; /* (I can't imagine why anyone would do this) */
2334 }
2335
2336 /*
2337 * In URI, "back up" to the last '/' encountered. This will be the
2338 * beginning of the "unique" suffix of URI
2339 */
2340 ix = pos;
2341 if ((ref->path[ix] == '/') && (ix > 0))
2342 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002343 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2344 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002345 for (; ix > 0; ix--) {
2346 if (ref->path[ix] == '/')
2347 break;
2348 }
2349 if (ix == 0) {
2350 uptr = (xmlChar *)ref->path;
2351 } else {
2352 ix++;
2353 uptr = (xmlChar *)&ref->path[ix];
2354 }
2355
2356 /*
2357 * In base, count the number of '/' from the differing point
2358 */
2359 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2360 for (; bptr[ix] != 0; ix++) {
2361 if (bptr[ix] == '/')
2362 nbslash++;
2363 }
2364 }
2365 len = xmlStrlen (uptr) + 1;
2366 }
2367
William M. Brackf7789b12004-06-07 08:57:27 +00002368 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002369 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002370 /* exception characters from xmlSaveUri */
2371 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002372 goto done;
2373 }
William M. Brackf7789b12004-06-07 08:57:27 +00002374
2375 /*
2376 * Allocate just enough space for the returned string -
2377 * length of the remainder of the URI, plus enough space
2378 * for the "../" groups, plus one for the terminator
2379 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002380 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002381 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002382 xmlGenericError(xmlGenericErrorContext,
2383 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002384 goto done;
2385 }
2386 vptr = val;
2387 /*
2388 * Put in as many "../" as needed
2389 */
2390 for (; nbslash>0; nbslash--) {
2391 *vptr++ = '.';
2392 *vptr++ = '.';
2393 *vptr++ = '/';
2394 }
2395 /*
2396 * Finish up with the end of the URI
2397 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002398 if (uptr != NULL) {
2399 if ((vptr > val) && (len > 0) &&
2400 (uptr[0] == '/') && (vptr[-1] == '/')) {
2401 memcpy (vptr, uptr + 1, len - 1);
2402 vptr[len - 2] = 0;
2403 } else {
2404 memcpy (vptr, uptr, len);
2405 vptr[len - 1] = 0;
2406 }
2407 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002408 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002409 }
William M. Brackf7789b12004-06-07 08:57:27 +00002410
William M. Brack50420192007-07-20 01:09:08 +00002411 /* escape the freshly-built path */
2412 vptr = val;
2413 /* exception characters from xmlSaveUri */
2414 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2415 xmlFree(vptr);
2416
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002417done:
William M. Brackf7789b12004-06-07 08:57:27 +00002418 /*
2419 * Free the working variables
2420 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002421 if (remove_path != 0)
2422 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002423 if (ref != NULL)
2424 xmlFreeURI (ref);
2425 if (bas != NULL)
2426 xmlFreeURI (bas);
2427
2428 return val;
2429}
2430
2431/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002432 * xmlCanonicPath:
2433 * @path: the resource locator in a filesystem notation
2434 *
2435 * Constructs a canonic path from the specified path.
2436 *
2437 * Returns a new canonic path, or a duplicate of the path parameter if the
2438 * construction fails. The caller is responsible for freeing the memory occupied
2439 * by the returned string. If there is insufficient memory available, or the
2440 * argument is NULL, the function returns NULL.
2441 */
2442#define IS_WINDOWS_PATH(p) \
2443 ((p != NULL) && \
2444 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2445 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2446 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002447xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002448xmlCanonicPath(const xmlChar *path)
2449{
William M. Brack22242272007-01-27 07:59:37 +00002450/*
2451 * For Windows implementations, additional work needs to be done to
2452 * replace backslashes in pathnames with "forward slashes"
2453 */
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002454#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002455 int len = 0;
2456 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002457 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002458#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002459 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002460 xmlChar *ret;
2461 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002462
2463 if (path == NULL)
2464 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002465
2466 /* sanitize filename starting with // so it can be used as URI */
2467 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2468 path++;
2469
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002470 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002471 xmlFreeURI(uri);
2472 return xmlStrdup(path);
2473 }
2474
William M. Brack22242272007-01-27 07:59:37 +00002475 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002476 absuri = xmlStrstr(path, BAD_CAST "://");
2477 if (absuri != NULL) {
2478 int l, j;
2479 unsigned char c;
2480 xmlChar *escURI;
2481
2482 /*
2483 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002484 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002485 * part matches a protocol.
2486 */
2487 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002488 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002489 if ((l <= 0) || (l > 20))
2490 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002491 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002492 for (j = 0;j < l;j++) {
2493 c = path[j];
2494 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2495 goto path_processing;
2496 }
2497
William M. Brack22242272007-01-27 07:59:37 +00002498 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002499 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2500 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002501 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002502 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002503 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002504 if (uri != NULL) {
2505 xmlFreeURI(uri);
2506 return escURI;
2507 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002508 }
2509 }
2510
2511path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002512/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002513#if defined(_WIN32) && !defined(__CYGWIN__)
2514 /*
William M. Brack22242272007-01-27 07:59:37 +00002515 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002516 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002517 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002518 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002519 return(NULL);
2520 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002521
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002522 len = xmlStrlen(path);
2523 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002524 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002525 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002526 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002527 uri->path = xmlMallocAtomic(len + 2);
2528 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002529 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002530 return(NULL);
2531 }
William M. Brack22242272007-01-27 07:59:37 +00002532 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002533 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002534 p = uri->path + 1;
2535 strncpy(p, path, len + 1);
2536 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002537 uri->path = xmlStrdup(path);
2538 if (uri->path == NULL) {
2539 xmlFreeURI(uri);
2540 return(NULL);
2541 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002542 p = uri->path;
2543 }
William M. Brack22242272007-01-27 07:59:37 +00002544 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002545 while (*p != '\0') {
2546 if (*p == '\\')
2547 *p = '/';
2548 p++;
2549 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002550
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002551 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002552 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002553 } else {
2554 ret = xmlSaveUri(uri);
2555 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002556
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002557 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002558#else
2559 ret = xmlStrdup((const xmlChar *) path);
2560#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002561 return(ret);
2562}
Owen Taylor3473f882001-02-23 17:55:21 +00002563
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002564/**
2565 * xmlPathToURI:
2566 * @path: the resource locator in a filesystem notation
2567 *
2568 * Constructs an URI expressing the existing path
2569 *
2570 * Returns a new URI, or a duplicate of the path parameter if the
2571 * construction fails. The caller is responsible for freeing the memory
2572 * occupied by the returned string. If there is insufficient memory available,
2573 * or the argument is NULL, the function returns NULL.
2574 */
2575xmlChar *
2576xmlPathToURI(const xmlChar *path)
2577{
2578 xmlURIPtr uri;
2579 xmlURI temp;
2580 xmlChar *ret, *cal;
2581
2582 if (path == NULL)
2583 return(NULL);
2584
2585 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2586 xmlFreeURI(uri);
2587 return xmlStrdup(path);
2588 }
2589 cal = xmlCanonicPath(path);
2590 if (cal == NULL)
2591 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002592#if defined(_WIN32) && !defined(__CYGWIN__)
2593 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2594 If 'cal' is a valid URI allready then we are done here, as continuing would make
2595 it invalid. */
2596 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2597 xmlFreeURI(uri);
2598 return cal;
2599 }
2600 /* 'cal' can contain a relative path with backslashes. If that is processed
2601 by xmlSaveURI, they will be escaped and the external entity loader machinery
2602 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2603 ret = cal;
2604 while (*ret != '\0') {
2605 if (*ret == '\\')
2606 *ret = '/';
2607 ret++;
2608 }
2609#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002610 memset(&temp, 0, sizeof(temp));
2611 temp.path = (char *) cal;
2612 ret = xmlSaveUri(&temp);
2613 xmlFree(cal);
2614 return(ret);
2615}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002616#define bottom_uri
2617#include "elfgcchack.h"