blob: 7e2f525669fd194cc57e95f4c5a137b8df9ebebb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardfc74a6f2012-05-07 15:02:25 +08008 * TODO: that module behaves really badly on OOM situation
9 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000010 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000011 */
12
Daniel Veillard34ce8be2002-03-18 19:37:11 +000013#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000014#include "libxml.h"
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#include <string.h>
17
18#include <libxml/xmlmemory.h>
19#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000020#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000021#include <libxml/xmlerror.h>
22
Daniel Veillardd7af5552008-08-04 15:29:44 +000023static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000024
25/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000026 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000027 * alpha = lowalpha | upalpha
28 */
29#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
30
31
32/*
33 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
34 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
35 * "u" | "v" | "w" | "x" | "y" | "z"
36 */
37
38#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
39
40/*
41 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
42 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
43 * "U" | "V" | "W" | "X" | "Y" | "Z"
44 */
45#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
46
Daniel Veillardbe3eb202004-07-09 12:05:25 +000047#ifdef IS_DIGIT
48#undef IS_DIGIT
49#endif
Owen Taylor3473f882001-02-23 17:55:21 +000050/*
51 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
52 */
Owen Taylor3473f882001-02-23 17:55:21 +000053#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
54
55/*
56 * alphanum = alpha | digit
57 */
58
59#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
60
61/*
Owen Taylor3473f882001-02-23 17:55:21 +000062 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
63 */
64
Daniel Veillardd7af5552008-08-04 15:29:44 +000065#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
66 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000067 ((x) == '(') || ((x) == ')'))
68
Owen Taylor3473f882001-02-23 17:55:21 +000069/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000070 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +000071 */
72
Daniel Veillardd7af5552008-08-04 15:29:44 +000073#define IS_UNWISE(p) \
74 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
75 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
76 ((*(p) == ']')) || ((*(p) == '`')))
77/*
78 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
79 * "[" | "]"
80 */
81
82#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
85 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000086
87/*
88 * unreserved = alphanum | mark
89 */
90
91#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
92
93/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * Skip to next pointer char, handle escaped sequences
95 */
96
97#define NEXT(p) ((*p == '%')? p += 3 : p++)
98
99/*
100 * Productions from the spec.
101 *
102 * authority = server | reg_name
103 * reg_name = 1*( unreserved | escaped | "$" | "," |
104 * ";" | ":" | "@" | "&" | "=" | "+" )
105 *
106 * path = [ abs_path | opaque_part ]
107 */
108
Daniel Veillard336a8e12005-08-07 10:46:19 +0000109#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
110
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000113 * RFC 3986 parser *
114 * *
115 ************************************************************************/
116
117#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
118#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
119 ((*(p) >= 'A') && (*(p) <= 'Z')))
120#define ISA_HEXDIG(p) \
121 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
122 ((*(p) >= 'A') && (*(p) <= 'F')))
123
124/*
125 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
126 * / "*" / "+" / "," / ";" / "="
127 */
128#define ISA_SUB_DELIM(p) \
129 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
130 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
131 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Daniel Veillard2ee91eb2010-06-04 09:14:16 +0800132 ((*(p) == '=')) || ((*(p) == '\'')))
Daniel Veillardd7af5552008-08-04 15:29:44 +0000133
134/*
135 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
136 */
137#define ISA_GEN_DELIM(p) \
138 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
139 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
140 ((*(p) == '@')))
141
142/*
143 * reserved = gen-delims / sub-delims
144 */
145#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
146
147/*
148 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
149 */
150#define ISA_UNRESERVED(p) \
151 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
152 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
153
154/*
155 * pct-encoded = "%" HEXDIG HEXDIG
156 */
157#define ISA_PCT_ENCODED(p) \
158 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
159
160/*
161 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
162 */
163#define ISA_PCHAR(p) \
164 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
165 ((*(p) == ':')) || ((*(p) == '@')))
166
167/**
168 * xmlParse3986Scheme:
169 * @uri: pointer to an URI structure
170 * @str: pointer to the string to analyze
171 *
172 * Parse an URI scheme
173 *
174 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
175 *
176 * Returns 0 or the error code
177 */
178static int
179xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
180 const char *cur;
181
182 if (str == NULL)
183 return(-1);
184
185 cur = *str;
186 if (!ISA_ALPHA(cur))
187 return(2);
188 cur++;
189 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
190 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
191 if (uri != NULL) {
192 if (uri->scheme != NULL) xmlFree(uri->scheme);
193 uri->scheme = STRNDUP(*str, cur - *str);
194 }
195 *str = cur;
196 return(0);
197}
198
199/**
200 * xmlParse3986Fragment:
201 * @uri: pointer to an URI structure
202 * @str: pointer to the string to analyze
203 *
204 * Parse the query part of an URI
205 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000206 * fragment = *( pchar / "/" / "?" )
207 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
208 * in the fragment identifier but this is used very broadly for
209 * xpointer scheme selection, so we are allowing it here to not break
210 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000211 *
212 * Returns 0 or the error code
213 */
214static int
215xmlParse3986Fragment(xmlURIPtr uri, const char **str)
216{
217 const char *cur;
218
219 if (str == NULL)
220 return (-1);
221
222 cur = *str;
223
224 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000225 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000226 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
227 NEXT(cur);
228 if (uri != NULL) {
229 if (uri->fragment != NULL)
230 xmlFree(uri->fragment);
231 if (uri->cleanup & 2)
232 uri->fragment = STRNDUP(*str, cur - *str);
233 else
234 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
235 }
236 *str = cur;
237 return (0);
238}
239
240/**
241 * xmlParse3986Query:
242 * @uri: pointer to an URI structure
243 * @str: pointer to the string to analyze
244 *
245 * Parse the query part of an URI
246 *
247 * query = *uric
248 *
249 * Returns 0 or the error code
250 */
251static int
252xmlParse3986Query(xmlURIPtr uri, const char **str)
253{
254 const char *cur;
255
256 if (str == NULL)
257 return (-1);
258
259 cur = *str;
260
261 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
262 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
263 NEXT(cur);
264 if (uri != NULL) {
265 if (uri->query != NULL)
266 xmlFree(uri->query);
267 if (uri->cleanup & 2)
268 uri->query = STRNDUP(*str, cur - *str);
269 else
270 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
271
272 /* Save the raw bytes of the query as well.
273 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
274 */
275 if (uri->query_raw != NULL)
276 xmlFree (uri->query_raw);
277 uri->query_raw = STRNDUP (*str, cur - *str);
278 }
279 *str = cur;
280 return (0);
281}
282
283/**
284 * xmlParse3986Port:
285 * @uri: pointer to an URI structure
286 * @str: the string to analyze
287 *
288 * Parse a port part and fills in the appropriate fields
289 * of the @uri structure
290 *
291 * port = *DIGIT
292 *
293 * Returns 0 or the error code
294 */
295static int
296xmlParse3986Port(xmlURIPtr uri, const char **str)
297{
298 const char *cur = *str;
299
300 if (ISA_DIGIT(cur)) {
301 if (uri != NULL)
302 uri->port = 0;
303 while (ISA_DIGIT(cur)) {
304 if (uri != NULL)
305 uri->port = uri->port * 10 + (*cur - '0');
306 cur++;
307 }
308 *str = cur;
309 return(0);
310 }
311 return(1);
312}
313
314/**
315 * xmlParse3986Userinfo:
316 * @uri: pointer to an URI structure
317 * @str: the string to analyze
318 *
319 * Parse an user informations part and fills in the appropriate fields
320 * of the @uri structure
321 *
322 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
323 *
324 * Returns 0 or the error code
325 */
326static int
327xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
328{
329 const char *cur;
330
331 cur = *str;
332 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
333 ISA_SUB_DELIM(cur) || (*cur == ':'))
334 NEXT(cur);
335 if (*cur == '@') {
336 if (uri != NULL) {
337 if (uri->user != NULL) xmlFree(uri->user);
338 if (uri->cleanup & 2)
339 uri->user = STRNDUP(*str, cur - *str);
340 else
341 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
342 }
343 *str = cur;
344 return(0);
345 }
346 return(1);
347}
348
349/**
350 * xmlParse3986DecOctet:
351 * @str: the string to analyze
352 *
353 * dec-octet = DIGIT ; 0-9
354 * / %x31-39 DIGIT ; 10-99
355 * / "1" 2DIGIT ; 100-199
356 * / "2" %x30-34 DIGIT ; 200-249
357 * / "25" %x30-35 ; 250-255
358 *
359 * Skip a dec-octet.
360 *
361 * Returns 0 if found and skipped, 1 otherwise
362 */
363static int
364xmlParse3986DecOctet(const char **str) {
365 const char *cur = *str;
366
367 if (!(ISA_DIGIT(cur)))
368 return(1);
369 if (!ISA_DIGIT(cur+1))
370 cur++;
371 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
372 cur += 2;
373 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
374 cur += 3;
375 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
376 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
377 cur += 3;
378 else if ((*cur == '2') && (*(cur + 1) == '5') &&
379 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
380 cur += 3;
381 else
382 return(1);
383 *str = cur;
384 return(0);
385}
386/**
387 * xmlParse3986Host:
388 * @uri: pointer to an URI structure
389 * @str: the string to analyze
390 *
391 * Parse an host part and fills in the appropriate fields
392 * of the @uri structure
393 *
394 * host = IP-literal / IPv4address / reg-name
395 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
396 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
397 * reg-name = *( unreserved / pct-encoded / sub-delims )
398 *
399 * Returns 0 or the error code
400 */
401static int
402xmlParse3986Host(xmlURIPtr uri, const char **str)
403{
404 const char *cur = *str;
405 const char *host;
406
407 host = cur;
408 /*
409 * IPv6 and future adressing scheme are enclosed between brackets
410 */
411 if (*cur == '[') {
412 cur++;
413 while ((*cur != ']') && (*cur != 0))
414 cur++;
415 if (*cur != ']')
416 return(1);
417 cur++;
418 goto found;
419 }
420 /*
421 * try to parse an IPv4
422 */
423 if (ISA_DIGIT(cur)) {
424 if (xmlParse3986DecOctet(&cur) != 0)
425 goto not_ipv4;
426 if (*cur != '.')
427 goto not_ipv4;
428 cur++;
429 if (xmlParse3986DecOctet(&cur) != 0)
430 goto not_ipv4;
431 if (*cur != '.')
432 goto not_ipv4;
433 if (xmlParse3986DecOctet(&cur) != 0)
434 goto not_ipv4;
435 if (*cur != '.')
436 goto not_ipv4;
437 if (xmlParse3986DecOctet(&cur) != 0)
438 goto not_ipv4;
439 goto found;
440not_ipv4:
441 cur = *str;
442 }
443 /*
444 * then this should be a hostname which can be empty
445 */
446 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
447 NEXT(cur);
448found:
449 if (uri != NULL) {
450 if (uri->authority != NULL) xmlFree(uri->authority);
451 uri->authority = NULL;
452 if (uri->server != NULL) xmlFree(uri->server);
453 if (cur != host) {
454 if (uri->cleanup & 2)
455 uri->server = STRNDUP(host, cur - host);
456 else
457 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
458 } else
459 uri->server = NULL;
460 }
461 *str = cur;
462 return(0);
463}
464
465/**
466 * xmlParse3986Authority:
467 * @uri: pointer to an URI structure
468 * @str: the string to analyze
469 *
470 * Parse an authority part and fills in the appropriate fields
471 * of the @uri structure
472 *
473 * authority = [ userinfo "@" ] host [ ":" port ]
474 *
475 * Returns 0 or the error code
476 */
477static int
478xmlParse3986Authority(xmlURIPtr uri, const char **str)
479{
480 const char *cur;
481 int ret;
482
483 cur = *str;
484 /*
485 * try to parse an userinfo and check for the trailing @
486 */
487 ret = xmlParse3986Userinfo(uri, &cur);
488 if ((ret != 0) || (*cur != '@'))
489 cur = *str;
490 else
491 cur++;
492 ret = xmlParse3986Host(uri, &cur);
493 if (ret != 0) return(ret);
494 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000495 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000496 ret = xmlParse3986Port(uri, &cur);
497 if (ret != 0) return(ret);
498 }
499 *str = cur;
500 return(0);
501}
502
503/**
504 * xmlParse3986Segment:
505 * @str: the string to analyze
506 * @forbid: an optional forbidden character
507 * @empty: allow an empty segment
508 *
509 * Parse a segment and fills in the appropriate fields
510 * of the @uri structure
511 *
512 * segment = *pchar
513 * segment-nz = 1*pchar
514 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
515 * ; non-zero-length segment without any colon ":"
516 *
517 * Returns 0 or the error code
518 */
519static int
520xmlParse3986Segment(const char **str, char forbid, int empty)
521{
522 const char *cur;
523
524 cur = *str;
525 if (!ISA_PCHAR(cur)) {
526 if (empty)
527 return(0);
528 return(1);
529 }
530 while (ISA_PCHAR(cur) && (*cur != forbid))
531 NEXT(cur);
532 *str = cur;
533 return (0);
534}
535
536/**
537 * xmlParse3986PathAbEmpty:
538 * @uri: pointer to an URI structure
539 * @str: the string to analyze
540 *
541 * Parse an path absolute or empty and fills in the appropriate fields
542 * of the @uri structure
543 *
544 * path-abempty = *( "/" segment )
545 *
546 * Returns 0 or the error code
547 */
548static int
549xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
550{
551 const char *cur;
552 int ret;
553
554 cur = *str;
555
556 while (*cur == '/') {
557 cur++;
558 ret = xmlParse3986Segment(&cur, 0, 1);
559 if (ret != 0) return(ret);
560 }
561 if (uri != NULL) {
562 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200563 if (*str != cur) {
564 if (uri->cleanup & 2)
565 uri->path = STRNDUP(*str, cur - *str);
566 else
567 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
568 } else {
569 uri->path = NULL;
570 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000571 }
572 *str = cur;
573 return (0);
574}
575
576/**
577 * xmlParse3986PathAbsolute:
578 * @uri: pointer to an URI structure
579 * @str: the string to analyze
580 *
581 * Parse an path absolute and fills in the appropriate fields
582 * of the @uri structure
583 *
584 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
585 *
586 * Returns 0 or the error code
587 */
588static int
589xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
590{
591 const char *cur;
592 int ret;
593
594 cur = *str;
595
596 if (*cur != '/')
597 return(1);
598 cur++;
599 ret = xmlParse3986Segment(&cur, 0, 0);
600 if (ret == 0) {
601 while (*cur == '/') {
602 cur++;
603 ret = xmlParse3986Segment(&cur, 0, 1);
604 if (ret != 0) return(ret);
605 }
606 }
607 if (uri != NULL) {
608 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200609 if (cur != *str) {
610 if (uri->cleanup & 2)
611 uri->path = STRNDUP(*str, cur - *str);
612 else
613 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
614 } else {
615 uri->path = NULL;
616 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000617 }
618 *str = cur;
619 return (0);
620}
621
622/**
623 * xmlParse3986PathRootless:
624 * @uri: pointer to an URI structure
625 * @str: the string to analyze
626 *
627 * Parse an path without root and fills in the appropriate fields
628 * of the @uri structure
629 *
630 * path-rootless = segment-nz *( "/" segment )
631 *
632 * Returns 0 or the error code
633 */
634static int
635xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
636{
637 const char *cur;
638 int ret;
639
640 cur = *str;
641
642 ret = xmlParse3986Segment(&cur, 0, 0);
643 if (ret != 0) return(ret);
644 while (*cur == '/') {
645 cur++;
646 ret = xmlParse3986Segment(&cur, 0, 1);
647 if (ret != 0) return(ret);
648 }
649 if (uri != NULL) {
650 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200651 if (cur != *str) {
652 if (uri->cleanup & 2)
653 uri->path = STRNDUP(*str, cur - *str);
654 else
655 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
656 } else {
657 uri->path = NULL;
658 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000659 }
660 *str = cur;
661 return (0);
662}
663
664/**
665 * xmlParse3986PathNoScheme:
666 * @uri: pointer to an URI structure
667 * @str: the string to analyze
668 *
669 * Parse an path which is not a scheme and fills in the appropriate fields
670 * of the @uri structure
671 *
672 * path-noscheme = segment-nz-nc *( "/" segment )
673 *
674 * Returns 0 or the error code
675 */
676static int
677xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
678{
679 const char *cur;
680 int ret;
681
682 cur = *str;
683
684 ret = xmlParse3986Segment(&cur, ':', 0);
685 if (ret != 0) return(ret);
686 while (*cur == '/') {
687 cur++;
688 ret = xmlParse3986Segment(&cur, 0, 1);
689 if (ret != 0) return(ret);
690 }
691 if (uri != NULL) {
692 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200693 if (cur != *str) {
694 if (uri->cleanup & 2)
695 uri->path = STRNDUP(*str, cur - *str);
696 else
697 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
698 } else {
699 uri->path = NULL;
700 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000701 }
702 *str = cur;
703 return (0);
704}
705
706/**
707 * xmlParse3986HierPart:
708 * @uri: pointer to an URI structure
709 * @str: the string to analyze
710 *
711 * Parse an hierarchical part and fills in the appropriate fields
712 * of the @uri structure
713 *
714 * hier-part = "//" authority path-abempty
715 * / path-absolute
716 * / path-rootless
717 * / path-empty
718 *
719 * Returns 0 or the error code
720 */
721static int
722xmlParse3986HierPart(xmlURIPtr uri, const char **str)
723{
724 const char *cur;
725 int ret;
726
727 cur = *str;
728
729 if ((*cur == '/') && (*(cur + 1) == '/')) {
730 cur += 2;
731 ret = xmlParse3986Authority(uri, &cur);
732 if (ret != 0) return(ret);
733 ret = xmlParse3986PathAbEmpty(uri, &cur);
734 if (ret != 0) return(ret);
735 *str = cur;
736 return(0);
737 } else if (*cur == '/') {
738 ret = xmlParse3986PathAbsolute(uri, &cur);
739 if (ret != 0) return(ret);
740 } else if (ISA_PCHAR(cur)) {
741 ret = xmlParse3986PathRootless(uri, &cur);
742 if (ret != 0) return(ret);
743 } else {
744 /* path-empty is effectively empty */
745 if (uri != NULL) {
746 if (uri->path != NULL) xmlFree(uri->path);
747 uri->path = NULL;
748 }
749 }
750 *str = cur;
751 return (0);
752}
753
754/**
755 * xmlParse3986RelativeRef:
756 * @uri: pointer to an URI structure
757 * @str: the string to analyze
758 *
759 * Parse an URI string and fills in the appropriate fields
760 * of the @uri structure
761 *
762 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
763 * relative-part = "//" authority path-abempty
764 * / path-absolute
765 * / path-noscheme
766 * / path-empty
767 *
768 * Returns 0 or the error code
769 */
770static int
771xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
772 int ret;
773
774 if ((*str == '/') && (*(str + 1) == '/')) {
775 str += 2;
776 ret = xmlParse3986Authority(uri, &str);
777 if (ret != 0) return(ret);
778 ret = xmlParse3986PathAbEmpty(uri, &str);
779 if (ret != 0) return(ret);
780 } else if (*str == '/') {
781 ret = xmlParse3986PathAbsolute(uri, &str);
782 if (ret != 0) return(ret);
783 } else if (ISA_PCHAR(str)) {
784 ret = xmlParse3986PathNoScheme(uri, &str);
785 if (ret != 0) return(ret);
786 } else {
787 /* path-empty is effectively empty */
788 if (uri != NULL) {
789 if (uri->path != NULL) xmlFree(uri->path);
790 uri->path = NULL;
791 }
792 }
793
794 if (*str == '?') {
795 str++;
796 ret = xmlParse3986Query(uri, &str);
797 if (ret != 0) return(ret);
798 }
799 if (*str == '#') {
800 str++;
801 ret = xmlParse3986Fragment(uri, &str);
802 if (ret != 0) return(ret);
803 }
804 if (*str != 0) {
805 xmlCleanURI(uri);
806 return(1);
807 }
808 return(0);
809}
810
811
812/**
813 * xmlParse3986URI:
814 * @uri: pointer to an URI structure
815 * @str: the string to analyze
816 *
817 * Parse an URI string and fills in the appropriate fields
818 * of the @uri structure
819 *
820 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
821 *
822 * Returns 0 or the error code
823 */
824static int
825xmlParse3986URI(xmlURIPtr uri, const char *str) {
826 int ret;
827
828 ret = xmlParse3986Scheme(uri, &str);
829 if (ret != 0) return(ret);
830 if (*str != ':') {
831 return(1);
832 }
833 str++;
834 ret = xmlParse3986HierPart(uri, &str);
835 if (ret != 0) return(ret);
836 if (*str == '?') {
837 str++;
838 ret = xmlParse3986Query(uri, &str);
839 if (ret != 0) return(ret);
840 }
841 if (*str == '#') {
842 str++;
843 ret = xmlParse3986Fragment(uri, &str);
844 if (ret != 0) return(ret);
845 }
846 if (*str != 0) {
847 xmlCleanURI(uri);
848 return(1);
849 }
850 return(0);
851}
852
853/**
854 * xmlParse3986URIReference:
855 * @uri: pointer to an URI structure
856 * @str: the string to analyze
857 *
858 * Parse an URI reference string and fills in the appropriate fields
859 * of the @uri structure
860 *
861 * URI-reference = URI / relative-ref
862 *
863 * Returns 0 or the error code
864 */
865static int
866xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
867 int ret;
868
869 if (str == NULL)
870 return(-1);
871 xmlCleanURI(uri);
872
873 /*
874 * Try first to parse absolute refs, then fallback to relative if
875 * it fails.
876 */
877 ret = xmlParse3986URI(uri, str);
878 if (ret != 0) {
879 xmlCleanURI(uri);
880 ret = xmlParse3986RelativeRef(uri, str);
881 if (ret != 0) {
882 xmlCleanURI(uri);
883 return(ret);
884 }
885 }
886 return(0);
887}
888
889/**
890 * xmlParseURI:
891 * @str: the URI string to analyze
892 *
893 * Parse an URI based on RFC 3986
894 *
895 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
896 *
897 * Returns a newly built xmlURIPtr or NULL in case of error
898 */
899xmlURIPtr
900xmlParseURI(const char *str) {
901 xmlURIPtr uri;
902 int ret;
903
904 if (str == NULL)
905 return(NULL);
906 uri = xmlCreateURI();
907 if (uri != NULL) {
908 ret = xmlParse3986URIReference(uri, str);
909 if (ret) {
910 xmlFreeURI(uri);
911 return(NULL);
912 }
913 }
914 return(uri);
915}
916
917/**
918 * xmlParseURIReference:
919 * @uri: pointer to an URI structure
920 * @str: the string to analyze
921 *
922 * Parse an URI reference string based on RFC 3986 and fills in the
923 * appropriate fields of the @uri structure
924 *
925 * URI-reference = URI / relative-ref
926 *
927 * Returns 0 or the error code
928 */
929int
930xmlParseURIReference(xmlURIPtr uri, const char *str) {
931 return(xmlParse3986URIReference(uri, str));
932}
933
934/**
935 * xmlParseURIRaw:
936 * @str: the URI string to analyze
937 * @raw: if 1 unescaping of URI pieces are disabled
938 *
939 * Parse an URI but allows to keep intact the original fragments.
940 *
941 * URI-reference = URI / relative-ref
942 *
943 * Returns a newly built xmlURIPtr or NULL in case of error
944 */
945xmlURIPtr
946xmlParseURIRaw(const char *str, int raw) {
947 xmlURIPtr uri;
948 int ret;
949
950 if (str == NULL)
951 return(NULL);
952 uri = xmlCreateURI();
953 if (uri != NULL) {
954 if (raw) {
955 uri->cleanup |= 2;
956 }
957 ret = xmlParseURIReference(uri, str);
958 if (ret) {
959 xmlFreeURI(uri);
960 return(NULL);
961 }
962 }
963 return(uri);
964}
965
966/************************************************************************
967 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000968 * Generic URI structure functions *
969 * *
970 ************************************************************************/
971
972/**
973 * xmlCreateURI:
974 *
975 * Simply creates an empty xmlURI
976 *
977 * Returns the new structure or NULL in case of error
978 */
979xmlURIPtr
980xmlCreateURI(void) {
981 xmlURIPtr ret;
982
983 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
984 if (ret == NULL) {
985 xmlGenericError(xmlGenericErrorContext,
986 "xmlCreateURI: out of memory\n");
987 return(NULL);
988 }
989 memset(ret, 0, sizeof(xmlURI));
990 return(ret);
991}
992
993/**
994 * xmlSaveUri:
995 * @uri: pointer to an xmlURI
996 *
997 * Save the URI as an escaped string
998 *
999 * Returns a new string (to be deallocated by caller)
1000 */
1001xmlChar *
1002xmlSaveUri(xmlURIPtr uri) {
1003 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001004 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001005 const char *p;
1006 int len;
1007 int max;
1008
1009 if (uri == NULL) return(NULL);
1010
1011
1012 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001013 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001014 if (ret == NULL) {
1015 xmlGenericError(xmlGenericErrorContext,
1016 "xmlSaveUri: out of memory\n");
1017 return(NULL);
1018 }
1019 len = 0;
1020
1021 if (uri->scheme != NULL) {
1022 p = uri->scheme;
1023 while (*p != 0) {
1024 if (len >= max) {
1025 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001026 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1027 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001028 xmlGenericError(xmlGenericErrorContext,
1029 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001030 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001031 return(NULL);
1032 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001033 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001034 }
1035 ret[len++] = *p++;
1036 }
1037 if (len >= max) {
1038 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001039 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1040 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001041 xmlGenericError(xmlGenericErrorContext,
1042 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001043 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001046 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001047 }
1048 ret[len++] = ':';
1049 }
1050 if (uri->opaque != NULL) {
1051 p = uri->opaque;
1052 while (*p != 0) {
1053 if (len + 3 >= max) {
1054 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001055 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1056 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001057 xmlGenericError(xmlGenericErrorContext,
1058 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001059 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001060 return(NULL);
1061 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001062 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001063 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001064 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001065 ret[len++] = *p++;
1066 else {
1067 int val = *(unsigned char *)p++;
1068 int hi = val / 0x10, lo = val % 0x10;
1069 ret[len++] = '%';
1070 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1071 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1072 }
1073 }
Owen Taylor3473f882001-02-23 17:55:21 +00001074 } else {
1075 if (uri->server != NULL) {
1076 if (len + 3 >= max) {
1077 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001078 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1079 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001080 xmlGenericError(xmlGenericErrorContext,
1081 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001082 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001083 return(NULL);
1084 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001085 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001086 }
1087 ret[len++] = '/';
1088 ret[len++] = '/';
1089 if (uri->user != NULL) {
1090 p = uri->user;
1091 while (*p != 0) {
1092 if (len + 3 >= max) {
1093 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001094 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001095 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001096 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001097 xmlGenericError(xmlGenericErrorContext,
1098 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001099 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001100 return(NULL);
1101 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001102 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001103 }
1104 if ((IS_UNRESERVED(*(p))) ||
1105 ((*(p) == ';')) || ((*(p) == ':')) ||
1106 ((*(p) == '&')) || ((*(p) == '=')) ||
1107 ((*(p) == '+')) || ((*(p) == '$')) ||
1108 ((*(p) == ',')))
1109 ret[len++] = *p++;
1110 else {
1111 int val = *(unsigned char *)p++;
1112 int hi = val / 0x10, lo = val % 0x10;
1113 ret[len++] = '%';
1114 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1115 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1116 }
1117 }
1118 if (len + 3 >= max) {
1119 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001120 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001121 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001122 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001123 xmlGenericError(xmlGenericErrorContext,
1124 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001125 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001126 return(NULL);
1127 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001128 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001129 }
1130 ret[len++] = '@';
1131 }
1132 p = uri->server;
1133 while (*p != 0) {
1134 if (len >= max) {
1135 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001136 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001137 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001138 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001139 xmlGenericError(xmlGenericErrorContext,
1140 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001141 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001142 return(NULL);
1143 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001144 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001145 }
1146 ret[len++] = *p++;
1147 }
1148 if (uri->port > 0) {
1149 if (len + 10 >= max) {
1150 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001151 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001152 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001153 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001154 xmlGenericError(xmlGenericErrorContext,
1155 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001156 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001157 return(NULL);
1158 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001159 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001160 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +00001161 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001162 }
1163 } else if (uri->authority != NULL) {
1164 if (len + 3 >= max) {
1165 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001166 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001167 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001168 if (temp == NULL) {
1169 xmlGenericError(xmlGenericErrorContext,
1170 "xmlSaveUri: out of memory\n");
1171 xmlFree(ret);
1172 return(NULL);
1173 }
1174 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001175 }
1176 ret[len++] = '/';
1177 ret[len++] = '/';
1178 p = uri->authority;
1179 while (*p != 0) {
1180 if (len + 3 >= max) {
1181 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001182 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001183 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001184 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 xmlGenericError(xmlGenericErrorContext,
1186 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001187 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001188 return(NULL);
1189 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001190 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001191 }
1192 if ((IS_UNRESERVED(*(p))) ||
1193 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1194 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1195 ((*(p) == '=')) || ((*(p) == '+')))
1196 ret[len++] = *p++;
1197 else {
1198 int val = *(unsigned char *)p++;
1199 int hi = val / 0x10, lo = val % 0x10;
1200 ret[len++] = '%';
1201 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1202 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1203 }
1204 }
1205 } else if (uri->scheme != NULL) {
1206 if (len + 3 >= max) {
1207 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001208 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001209 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001210 if (temp == NULL) {
1211 xmlGenericError(xmlGenericErrorContext,
1212 "xmlSaveUri: out of memory\n");
1213 xmlFree(ret);
1214 return(NULL);
1215 }
1216 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001217 }
1218 ret[len++] = '/';
1219 ret[len++] = '/';
1220 }
1221 if (uri->path != NULL) {
1222 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001223 /*
1224 * the colon in file:///d: should not be escaped or
1225 * Windows accesses fail later.
1226 */
1227 if ((uri->scheme != NULL) &&
1228 (p[0] == '/') &&
1229 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1230 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1231 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001232 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001233 if (len + 3 >= max) {
1234 max *= 2;
1235 ret = (xmlChar *) xmlRealloc(ret,
1236 (max + 1) * sizeof(xmlChar));
1237 if (ret == NULL) {
1238 xmlGenericError(xmlGenericErrorContext,
1239 "xmlSaveUri: out of memory\n");
1240 return(NULL);
1241 }
1242 }
1243 ret[len++] = *p++;
1244 ret[len++] = *p++;
1245 ret[len++] = *p++;
1246 }
Owen Taylor3473f882001-02-23 17:55:21 +00001247 while (*p != 0) {
1248 if (len + 3 >= max) {
1249 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001250 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001251 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001252 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001253 xmlGenericError(xmlGenericErrorContext,
1254 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001255 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001256 return(NULL);
1257 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001258 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1261 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1262 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1263 ((*(p) == ',')))
1264 ret[len++] = *p++;
1265 else {
1266 int val = *(unsigned char *)p++;
1267 int hi = val / 0x10, lo = val % 0x10;
1268 ret[len++] = '%';
1269 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1270 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1271 }
1272 }
1273 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001274 if (uri->query_raw != NULL) {
1275 if (len + 1 >= max) {
1276 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001277 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillarda1413b82007-04-26 08:33:28 +00001278 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001279 if (temp == NULL) {
1280 xmlGenericError(xmlGenericErrorContext,
1281 "xmlSaveUri: out of memory\n");
1282 xmlFree(ret);
1283 return(NULL);
1284 }
1285 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001286 }
1287 ret[len++] = '?';
1288 p = uri->query_raw;
1289 while (*p != 0) {
1290 if (len + 1 >= max) {
1291 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001292 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillarda1413b82007-04-26 08:33:28 +00001293 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001294 if (temp == NULL) {
Daniel Veillarda1413b82007-04-26 08:33:28 +00001295 xmlGenericError(xmlGenericErrorContext,
1296 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001297 xmlFree(ret);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001298 return(NULL);
1299 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001300 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001301 }
1302 ret[len++] = *p++;
1303 }
1304 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001305 if (len + 3 >= max) {
1306 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001307 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001308 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001309 if (temp == NULL) {
1310 xmlGenericError(xmlGenericErrorContext,
1311 "xmlSaveUri: out of memory\n");
1312 xmlFree(ret);
1313 return(NULL);
1314 }
1315 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001316 }
1317 ret[len++] = '?';
1318 p = uri->query;
1319 while (*p != 0) {
1320 if (len + 3 >= max) {
1321 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001322 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001323 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001324 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 xmlGenericError(xmlGenericErrorContext,
1326 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001327 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001328 return(NULL);
1329 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001330 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001331 }
1332 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1333 ret[len++] = *p++;
1334 else {
1335 int val = *(unsigned char *)p++;
1336 int hi = val / 0x10, lo = val % 0x10;
1337 ret[len++] = '%';
1338 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1339 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1340 }
1341 }
1342 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001343 }
1344 if (uri->fragment != NULL) {
1345 if (len + 3 >= max) {
1346 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001347 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001348 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001349 if (temp == NULL) {
1350 xmlGenericError(xmlGenericErrorContext,
1351 "xmlSaveUri: out of memory\n");
1352 xmlFree(ret);
1353 return(NULL);
Daniel Veillardd7af5552008-08-04 15:29:44 +00001354 }
1355 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001356 }
1357 ret[len++] = '#';
1358 p = uri->fragment;
1359 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001360 if (len + 3 >= max) {
1361 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001362 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001363 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001364 if (temp == NULL) {
1365 xmlGenericError(xmlGenericErrorContext,
1366 "xmlSaveUri: out of memory\n");
1367 xmlFree(ret);
1368 return(NULL);
1369 }
1370 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001371 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001372 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1373 ret[len++] = *p++;
1374 else {
1375 int val = *(unsigned char *)p++;
1376 int hi = val / 0x10, lo = val % 0x10;
1377 ret[len++] = '%';
1378 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1379 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001380 }
1381 }
Owen Taylor3473f882001-02-23 17:55:21 +00001382 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001383 if (len >= max) {
1384 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001385 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1386 if (temp == NULL) {
1387 xmlGenericError(xmlGenericErrorContext,
1388 "xmlSaveUri: out of memory\n");
1389 xmlFree(ret);
1390 return(NULL);
1391 }
1392 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001393 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02001394 ret[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001395 return(ret);
1396}
1397
1398/**
1399 * xmlPrintURI:
1400 * @stream: a FILE* for the output
1401 * @uri: pointer to an xmlURI
1402 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001403 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001404 */
1405void
1406xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1407 xmlChar *out;
1408
1409 out = xmlSaveUri(uri);
1410 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001411 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001412 xmlFree(out);
1413 }
1414}
1415
1416/**
1417 * xmlCleanURI:
1418 * @uri: pointer to an xmlURI
1419 *
1420 * Make sure the xmlURI struct is free of content
1421 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001422static void
Owen Taylor3473f882001-02-23 17:55:21 +00001423xmlCleanURI(xmlURIPtr uri) {
1424 if (uri == NULL) return;
1425
1426 if (uri->scheme != NULL) xmlFree(uri->scheme);
1427 uri->scheme = NULL;
1428 if (uri->server != NULL) xmlFree(uri->server);
1429 uri->server = NULL;
1430 if (uri->user != NULL) xmlFree(uri->user);
1431 uri->user = NULL;
1432 if (uri->path != NULL) xmlFree(uri->path);
1433 uri->path = NULL;
1434 if (uri->fragment != NULL) xmlFree(uri->fragment);
1435 uri->fragment = NULL;
1436 if (uri->opaque != NULL) xmlFree(uri->opaque);
1437 uri->opaque = NULL;
1438 if (uri->authority != NULL) xmlFree(uri->authority);
1439 uri->authority = NULL;
1440 if (uri->query != NULL) xmlFree(uri->query);
1441 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001442 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1443 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001444}
1445
1446/**
1447 * xmlFreeURI:
1448 * @uri: pointer to an xmlURI
1449 *
1450 * Free up the xmlURI struct
1451 */
1452void
1453xmlFreeURI(xmlURIPtr uri) {
1454 if (uri == NULL) return;
1455
1456 if (uri->scheme != NULL) xmlFree(uri->scheme);
1457 if (uri->server != NULL) xmlFree(uri->server);
1458 if (uri->user != NULL) xmlFree(uri->user);
1459 if (uri->path != NULL) xmlFree(uri->path);
1460 if (uri->fragment != NULL) xmlFree(uri->fragment);
1461 if (uri->opaque != NULL) xmlFree(uri->opaque);
1462 if (uri->authority != NULL) xmlFree(uri->authority);
1463 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001464 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001465 xmlFree(uri);
1466}
1467
1468/************************************************************************
1469 * *
1470 * Helper functions *
1471 * *
1472 ************************************************************************/
1473
Owen Taylor3473f882001-02-23 17:55:21 +00001474/**
1475 * xmlNormalizeURIPath:
1476 * @path: pointer to the path string
1477 *
1478 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1479 * Section 5.2, steps 6.c through 6.g.
1480 *
1481 * Normalization occurs directly on the string, no new allocation is done
1482 *
1483 * Returns 0 or an error code
1484 */
1485int
1486xmlNormalizeURIPath(char *path) {
1487 char *cur, *out;
1488
1489 if (path == NULL)
1490 return(-1);
1491
1492 /* Skip all initial "/" chars. We want to get to the beginning of the
1493 * first non-empty segment.
1494 */
1495 cur = path;
1496 while (cur[0] == '/')
1497 ++cur;
1498 if (cur[0] == '\0')
1499 return(0);
1500
1501 /* Keep everything we've seen so far. */
1502 out = cur;
1503
1504 /*
1505 * Analyze each segment in sequence for cases (c) and (d).
1506 */
1507 while (cur[0] != '\0') {
1508 /*
1509 * c) All occurrences of "./", where "." is a complete path segment,
1510 * are removed from the buffer string.
1511 */
1512 if ((cur[0] == '.') && (cur[1] == '/')) {
1513 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001514 /* '//' normalization should be done at this point too */
1515 while (cur[0] == '/')
1516 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001517 continue;
1518 }
1519
1520 /*
1521 * d) If the buffer string ends with "." as a complete path segment,
1522 * that "." is removed.
1523 */
1524 if ((cur[0] == '.') && (cur[1] == '\0'))
1525 break;
1526
1527 /* Otherwise keep the segment. */
1528 while (cur[0] != '/') {
1529 if (cur[0] == '\0')
1530 goto done_cd;
1531 (out++)[0] = (cur++)[0];
1532 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001533 /* nomalize // */
1534 while ((cur[0] == '/') && (cur[1] == '/'))
1535 cur++;
1536
Owen Taylor3473f882001-02-23 17:55:21 +00001537 (out++)[0] = (cur++)[0];
1538 }
1539 done_cd:
1540 out[0] = '\0';
1541
1542 /* Reset to the beginning of the first segment for the next sequence. */
1543 cur = path;
1544 while (cur[0] == '/')
1545 ++cur;
1546 if (cur[0] == '\0')
1547 return(0);
1548
1549 /*
1550 * Analyze each segment in sequence for cases (e) and (f).
1551 *
1552 * e) All occurrences of "<segment>/../", where <segment> is a
1553 * complete path segment not equal to "..", are removed from the
1554 * buffer string. Removal of these path segments is performed
1555 * iteratively, removing the leftmost matching pattern on each
1556 * iteration, until no matching pattern remains.
1557 *
1558 * f) If the buffer string ends with "<segment>/..", where <segment>
1559 * is a complete path segment not equal to "..", that
1560 * "<segment>/.." is removed.
1561 *
1562 * To satisfy the "iterative" clause in (e), we need to collapse the
1563 * string every time we find something that needs to be removed. Thus,
1564 * we don't need to keep two pointers into the string: we only need a
1565 * "current position" pointer.
1566 */
1567 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001568 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /* At the beginning of each iteration of this loop, "cur" points to
1571 * the first character of the segment we want to examine.
1572 */
1573
1574 /* Find the end of the current segment. */
1575 segp = cur;
1576 while ((segp[0] != '/') && (segp[0] != '\0'))
1577 ++segp;
1578
1579 /* If this is the last segment, we're done (we need at least two
1580 * segments to meet the criteria for the (e) and (f) cases).
1581 */
1582 if (segp[0] == '\0')
1583 break;
1584
1585 /* If the first segment is "..", or if the next segment _isn't_ "..",
1586 * keep this segment and try the next one.
1587 */
1588 ++segp;
1589 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1590 || ((segp[0] != '.') || (segp[1] != '.')
1591 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1592 cur = segp;
1593 continue;
1594 }
1595
1596 /* If we get here, remove this segment and the next one and back up
1597 * to the previous segment (if there is one), to implement the
1598 * "iteratively" clause. It's pretty much impossible to back up
1599 * while maintaining two pointers into the buffer, so just compact
1600 * the whole buffer now.
1601 */
1602
1603 /* If this is the end of the buffer, we're done. */
1604 if (segp[2] == '\0') {
1605 cur[0] = '\0';
1606 break;
1607 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001608 /* Valgrind complained, strcpy(cur, segp + 3); */
Nico Webercedf84d2012-03-05 16:36:59 +08001609 /* string will overlap, do not use strcpy */
1610 tmp = cur;
1611 segp += 3;
1612 while ((*tmp++ = *segp++) != 0)
1613 ;
Owen Taylor3473f882001-02-23 17:55:21 +00001614
1615 /* If there are no previous segments, then keep going from here. */
1616 segp = cur;
1617 while ((segp > path) && ((--segp)[0] == '/'))
1618 ;
1619 if (segp == path)
1620 continue;
1621
1622 /* "segp" is pointing to the end of a previous segment; find it's
1623 * start. We need to back up to the previous segment and start
1624 * over with that to handle things like "foo/bar/../..". If we
1625 * don't do this, then on the first pass we'll remove the "bar/..",
1626 * but be pointing at the second ".." so we won't realize we can also
1627 * remove the "foo/..".
1628 */
1629 cur = segp;
1630 while ((cur > path) && (cur[-1] != '/'))
1631 --cur;
1632 }
1633 out[0] = '\0';
1634
1635 /*
1636 * g) If the resulting buffer string still begins with one or more
1637 * complete path segments of "..", then the reference is
1638 * considered to be in error. Implementations may handle this
1639 * error by retaining these components in the resolved path (i.e.,
1640 * treating them as part of the final URI), by removing them from
1641 * the resolved path (i.e., discarding relative levels above the
1642 * root), or by avoiding traversal of the reference.
1643 *
1644 * We discard them from the final path.
1645 */
1646 if (path[0] == '/') {
1647 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001648 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001649 && ((cur[3] == '/') || (cur[3] == '\0')))
1650 cur += 3;
1651
1652 if (cur != path) {
1653 out = path;
1654 while (cur[0] != '\0')
1655 (out++)[0] = (cur++)[0];
1656 out[0] = 0;
1657 }
1658 }
1659
1660 return(0);
1661}
Owen Taylor3473f882001-02-23 17:55:21 +00001662
Daniel Veillard966a31e2004-05-09 02:58:44 +00001663static int is_hex(char c) {
1664 if (((c >= '0') && (c <= '9')) ||
1665 ((c >= 'a') && (c <= 'f')) ||
1666 ((c >= 'A') && (c <= 'F')))
1667 return(1);
1668 return(0);
1669}
1670
Owen Taylor3473f882001-02-23 17:55:21 +00001671/**
1672 * xmlURIUnescapeString:
1673 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001674 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001675 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001676 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001677 * Unescaping routine, but does not check that the string is an URI. The
1678 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001679 * Note that the length of the result can only be smaller or same size as
1680 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001681 *
Daniel Veillard79187652007-04-24 10:19:52 +00001682 * Returns a copy of the string, but unescaped, will return NULL only in case
1683 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001684 */
1685char *
1686xmlURIUnescapeString(const char *str, int len, char *target) {
1687 char *ret, *out;
1688 const char *in;
1689
1690 if (str == NULL)
1691 return(NULL);
1692 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001693 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001694
1695 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001696 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001697 if (ret == NULL) {
1698 xmlGenericError(xmlGenericErrorContext,
1699 "xmlURIUnescapeString: out of memory\n");
1700 return(NULL);
1701 }
1702 } else
1703 ret = target;
1704 in = str;
1705 out = ret;
1706 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001707 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001708 in++;
1709 if ((*in >= '0') && (*in <= '9'))
1710 *out = (*in - '0');
1711 else if ((*in >= 'a') && (*in <= 'f'))
1712 *out = (*in - 'a') + 10;
1713 else if ((*in >= 'A') && (*in <= 'F'))
1714 *out = (*in - 'A') + 10;
1715 in++;
1716 if ((*in >= '0') && (*in <= '9'))
1717 *out = *out * 16 + (*in - '0');
1718 else if ((*in >= 'a') && (*in <= 'f'))
1719 *out = *out * 16 + (*in - 'a') + 10;
1720 else if ((*in >= 'A') && (*in <= 'F'))
1721 *out = *out * 16 + (*in - 'A') + 10;
1722 in++;
1723 len -= 3;
1724 out++;
1725 } else {
1726 *out++ = *in++;
1727 len--;
1728 }
1729 }
1730 *out = 0;
1731 return(ret);
1732}
1733
1734/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001735 * xmlURIEscapeStr:
1736 * @str: string to escape
1737 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001738 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001739 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1740 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001741 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001742 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001743 */
1744xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001745xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1746 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001747 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +00001749
Owen Taylor3473f882001-02-23 17:55:21 +00001750 unsigned int len, out;
1751
1752 if (str == NULL)
1753 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001754 if (str[0] == 0)
1755 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001756 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001757 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001758
1759 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001760 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001761 if (ret == NULL) {
1762 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001763 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001764 return(NULL);
1765 }
1766 in = (const xmlChar *) str;
1767 out = 0;
1768 while(*in != 0) {
1769 if (len - out <= 3) {
1770 len += 20;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001771 temp = (xmlChar *) xmlRealloc(ret, len);
1772 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001773 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001774 "xmlURIEscapeStr: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001775 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001776 return(NULL);
1777 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001778 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001779 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001780
1781 ch = *in;
1782
Daniel Veillardeb475a32002-04-14 22:00:22 +00001783 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001784 unsigned char val;
1785 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001786 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001787 if (val <= 9)
1788 ret[out++] = '0' + val;
1789 else
1790 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001791 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001792 if (val <= 9)
1793 ret[out++] = '0' + val;
1794 else
1795 ret[out++] = 'A' + val - 0xA;
1796 in++;
1797 } else {
1798 ret[out++] = *in++;
1799 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 }
1802 ret[out] = 0;
1803 return(ret);
1804}
1805
Daniel Veillard8514c672001-05-23 10:29:12 +00001806/**
1807 * xmlURIEscape:
1808 * @str: the string of the URI to escape
1809 *
1810 * Escaping routine, does not do validity checks !
1811 * It will try to escape the chars needing this, but this is heuristic
1812 * based it's impossible to be sure.
1813 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001814 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001815 *
1816 * 25 May 2001
1817 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1818 * according to RFC2396.
1819 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001820 */
1821xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001822xmlURIEscape(const xmlChar * str)
1823{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001824 xmlChar *ret, *segment = NULL;
1825 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001826 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001827
Daniel Veillard6278fb52001-05-25 07:38:41 +00001828#define NULLCHK(p) if(!p) { \
1829 xmlGenericError(xmlGenericErrorContext, \
1830 "xmlURIEscape: out of memory\n"); \
Daniel Veillarded86dc22008-04-24 11:58:41 +00001831 xmlFreeURI(uri); \
1832 return NULL; } \
Daniel Veillard6278fb52001-05-25 07:38:41 +00001833
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001834 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001835 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001836
1837 uri = xmlCreateURI();
1838 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001839 /*
1840 * Allow escaping errors in the unescaped form
1841 */
1842 uri->cleanup = 1;
1843 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001844 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001845 xmlFreeURI(uri);
1846 return (NULL);
1847 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001848 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001849
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001850 if (!uri)
1851 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001852
1853 ret = NULL;
1854
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001855 if (uri->scheme) {
1856 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1857 NULLCHK(segment)
1858 ret = xmlStrcat(ret, segment);
1859 ret = xmlStrcat(ret, BAD_CAST ":");
1860 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001861 }
1862
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001863 if (uri->authority) {
1864 segment =
1865 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1866 NULLCHK(segment)
1867 ret = xmlStrcat(ret, BAD_CAST "//");
1868 ret = xmlStrcat(ret, segment);
1869 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001870 }
1871
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001872 if (uri->user) {
1873 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1874 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001875 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001876 ret = xmlStrcat(ret, segment);
1877 ret = xmlStrcat(ret, BAD_CAST "@");
1878 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001879 }
1880
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001881 if (uri->server) {
1882 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1883 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001884 if (uri->user == NULL)
Daniel Veillardd7af5552008-08-04 15:29:44 +00001885 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001886 ret = xmlStrcat(ret, segment);
1887 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001888 }
1889
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001890 if (uri->port) {
1891 xmlChar port[10];
1892
Daniel Veillard43d3f612001-11-10 11:57:23 +00001893 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001894 ret = xmlStrcat(ret, BAD_CAST ":");
1895 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001896 }
1897
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001898 if (uri->path) {
1899 segment =
1900 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1901 NULLCHK(segment)
1902 ret = xmlStrcat(ret, segment);
1903 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001904 }
1905
Daniel Veillarda1413b82007-04-26 08:33:28 +00001906 if (uri->query_raw) {
1907 ret = xmlStrcat(ret, BAD_CAST "?");
1908 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1909 }
1910 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001911 segment =
1912 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1913 NULLCHK(segment)
1914 ret = xmlStrcat(ret, BAD_CAST "?");
1915 ret = xmlStrcat(ret, segment);
1916 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001917 }
1918
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001919 if (uri->opaque) {
1920 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1921 NULLCHK(segment)
1922 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001923 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001924 }
1925
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001926 if (uri->fragment) {
1927 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1928 NULLCHK(segment)
1929 ret = xmlStrcat(ret, BAD_CAST "#");
1930 ret = xmlStrcat(ret, segment);
1931 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001932 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001933
1934 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001935#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001936
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001937 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001938}
1939
Owen Taylor3473f882001-02-23 17:55:21 +00001940/************************************************************************
1941 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001942 * Public functions *
1943 * *
1944 ************************************************************************/
1945
1946/**
1947 * xmlBuildURI:
1948 * @URI: the URI instance found in the document
1949 * @base: the base value
1950 *
1951 * Computes he final URI of the reference done by checking that
1952 * the given URI is valid, and building the final URI using the
1953 * base URI. This is processed according to section 5.2 of the
1954 * RFC 2396
1955 *
1956 * 5.2. Resolving Relative References to Absolute Form
1957 *
1958 * Returns a new URI string (to be freed by the caller) or NULL in case
1959 * of error.
1960 */
1961xmlChar *
1962xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1963 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001964 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 xmlURIPtr ref = NULL;
1966 xmlURIPtr bas = NULL;
1967 xmlURIPtr res = NULL;
1968
1969 /*
1970 * 1) The URI reference is parsed into the potential four components and
1971 * fragment identifier, as described in Section 4.3.
1972 *
1973 * NOTE that a completely empty URI is treated by modern browsers
1974 * as a reference to "." rather than as a synonym for the current
1975 * URI. Should we do that here?
1976 */
1977 if (URI == NULL)
1978 ret = -1;
1979 else {
1980 if (*URI) {
1981 ref = xmlCreateURI();
1982 if (ref == NULL)
1983 goto done;
1984 ret = xmlParseURIReference(ref, (const char *) URI);
1985 }
1986 else
1987 ret = 0;
1988 }
1989 if (ret != 0)
1990 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001991 if ((ref != NULL) && (ref->scheme != NULL)) {
1992 /*
1993 * The URI is absolute don't modify.
1994 */
1995 val = xmlStrdup(URI);
1996 goto done;
1997 }
Owen Taylor3473f882001-02-23 17:55:21 +00001998 if (base == NULL)
1999 ret = -1;
2000 else {
2001 bas = xmlCreateURI();
2002 if (bas == NULL)
2003 goto done;
2004 ret = xmlParseURIReference(bas, (const char *) base);
2005 }
2006 if (ret != 0) {
2007 if (ref)
2008 val = xmlSaveUri(ref);
2009 goto done;
2010 }
2011 if (ref == NULL) {
2012 /*
2013 * the base fragment must be ignored
2014 */
2015 if (bas->fragment != NULL) {
2016 xmlFree(bas->fragment);
2017 bas->fragment = NULL;
2018 }
2019 val = xmlSaveUri(bas);
2020 goto done;
2021 }
2022
2023 /*
2024 * 2) If the path component is empty and the scheme, authority, and
2025 * query components are undefined, then it is a reference to the
2026 * current document and we are done. Otherwise, the reference URI's
2027 * query and fragment components are defined as found (or not found)
2028 * within the URI reference and not inherited from the base URI.
2029 *
2030 * NOTE that in modern browsers, the parsing differs from the above
2031 * in the following aspect: the query component is allowed to be
2032 * defined while still treating this as a reference to the current
2033 * document.
2034 */
2035 res = xmlCreateURI();
2036 if (res == NULL)
2037 goto done;
2038 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2039 ((ref->authority == NULL) && (ref->server == NULL))) {
2040 if (bas->scheme != NULL)
2041 res->scheme = xmlMemStrdup(bas->scheme);
2042 if (bas->authority != NULL)
2043 res->authority = xmlMemStrdup(bas->authority);
2044 else if (bas->server != NULL) {
2045 res->server = xmlMemStrdup(bas->server);
2046 if (bas->user != NULL)
2047 res->user = xmlMemStrdup(bas->user);
2048 res->port = bas->port;
2049 }
2050 if (bas->path != NULL)
2051 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002052 if (ref->query_raw != NULL)
2053 res->query_raw = xmlMemStrdup (ref->query_raw);
2054 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00002055 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002056 else if (bas->query_raw != NULL)
2057 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00002058 else if (bas->query != NULL)
2059 res->query = xmlMemStrdup(bas->query);
2060 if (ref->fragment != NULL)
2061 res->fragment = xmlMemStrdup(ref->fragment);
2062 goto step_7;
2063 }
Owen Taylor3473f882001-02-23 17:55:21 +00002064
2065 /*
2066 * 3) If the scheme component is defined, indicating that the reference
2067 * starts with a scheme name, then the reference is interpreted as an
2068 * absolute URI and we are done. Otherwise, the reference URI's
2069 * scheme is inherited from the base URI's scheme component.
2070 */
2071 if (ref->scheme != NULL) {
2072 val = xmlSaveUri(ref);
2073 goto done;
2074 }
2075 if (bas->scheme != NULL)
2076 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00002077
Daniel Veillarda1413b82007-04-26 08:33:28 +00002078 if (ref->query_raw != NULL)
2079 res->query_raw = xmlMemStrdup(ref->query_raw);
2080 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002081 res->query = xmlMemStrdup(ref->query);
2082 if (ref->fragment != NULL)
2083 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002084
2085 /*
2086 * 4) If the authority component is defined, then the reference is a
2087 * network-path and we skip to step 7. Otherwise, the reference
2088 * URI's authority is inherited from the base URI's authority
2089 * component, which will also be undefined if the URI scheme does not
2090 * use an authority component.
2091 */
2092 if ((ref->authority != NULL) || (ref->server != NULL)) {
2093 if (ref->authority != NULL)
2094 res->authority = xmlMemStrdup(ref->authority);
2095 else {
2096 res->server = xmlMemStrdup(ref->server);
2097 if (ref->user != NULL)
2098 res->user = xmlMemStrdup(ref->user);
2099 res->port = ref->port;
2100 }
2101 if (ref->path != NULL)
2102 res->path = xmlMemStrdup(ref->path);
2103 goto step_7;
2104 }
2105 if (bas->authority != NULL)
2106 res->authority = xmlMemStrdup(bas->authority);
2107 else if (bas->server != NULL) {
2108 res->server = xmlMemStrdup(bas->server);
2109 if (bas->user != NULL)
2110 res->user = xmlMemStrdup(bas->user);
2111 res->port = bas->port;
2112 }
2113
2114 /*
2115 * 5) If the path component begins with a slash character ("/"), then
2116 * the reference is an absolute-path and we skip to step 7.
2117 */
2118 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2119 res->path = xmlMemStrdup(ref->path);
2120 goto step_7;
2121 }
2122
2123
2124 /*
2125 * 6) If this step is reached, then we are resolving a relative-path
2126 * reference. The relative path needs to be merged with the base
2127 * URI's path. Although there are many ways to do this, we will
2128 * describe a simple method using a separate string buffer.
2129 *
2130 * Allocate a buffer large enough for the result string.
2131 */
2132 len = 2; /* extra / and 0 */
2133 if (ref->path != NULL)
2134 len += strlen(ref->path);
2135 if (bas->path != NULL)
2136 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002137 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002138 if (res->path == NULL) {
2139 xmlGenericError(xmlGenericErrorContext,
2140 "xmlBuildURI: out of memory\n");
2141 goto done;
2142 }
2143 res->path[0] = 0;
2144
2145 /*
2146 * a) All but the last segment of the base URI's path component is
2147 * copied to the buffer. In other words, any characters after the
2148 * last (right-most) slash character, if any, are excluded.
2149 */
2150 cur = 0;
2151 out = 0;
2152 if (bas->path != NULL) {
2153 while (bas->path[cur] != 0) {
2154 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2155 cur++;
2156 if (bas->path[cur] == 0)
2157 break;
2158
2159 cur++;
2160 while (out < cur) {
2161 res->path[out] = bas->path[out];
2162 out++;
2163 }
2164 }
2165 }
2166 res->path[out] = 0;
2167
2168 /*
2169 * b) The reference's path component is appended to the buffer
2170 * string.
2171 */
2172 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002173 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002174 /*
2175 * Ensure the path includes a '/'
2176 */
2177 if ((out == 0) && (bas->server != NULL))
2178 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002179 while (ref->path[indx] != 0) {
2180 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002181 }
2182 }
2183 res->path[out] = 0;
2184
2185 /*
2186 * Steps c) to h) are really path normalization steps
2187 */
2188 xmlNormalizeURIPath(res->path);
2189
2190step_7:
2191
2192 /*
2193 * 7) The resulting URI components, including any inherited from the
2194 * base URI, are recombined to give the absolute form of the URI
2195 * reference.
2196 */
2197 val = xmlSaveUri(res);
2198
2199done:
2200 if (ref != NULL)
2201 xmlFreeURI(ref);
2202 if (bas != NULL)
2203 xmlFreeURI(bas);
2204 if (res != NULL)
2205 xmlFreeURI(res);
2206 return(val);
2207}
2208
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002209/**
William M. Brackf7789b12004-06-07 08:57:27 +00002210 * xmlBuildRelativeURI:
2211 * @URI: the URI reference under consideration
2212 * @base: the base value
2213 *
2214 * Expresses the URI of the reference in terms relative to the
2215 * base. Some examples of this operation include:
2216 * base = "http://site1.com/docs/book1.html"
2217 * URI input URI returned
2218 * docs/pic1.gif pic1.gif
2219 * docs/img/pic1.gif img/pic1.gif
2220 * img/pic1.gif ../img/pic1.gif
2221 * http://site1.com/docs/pic1.gif pic1.gif
2222 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2223 *
2224 * base = "docs/book1.html"
2225 * URI input URI returned
2226 * docs/pic1.gif pic1.gif
2227 * docs/img/pic1.gif img/pic1.gif
2228 * img/pic1.gif ../img/pic1.gif
2229 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2230 *
2231 *
2232 * Note: if the URI reference is really wierd or complicated, it may be
2233 * worthwhile to first convert it into a "nice" one by calling
2234 * xmlBuildURI (using 'base') before calling this routine,
2235 * since this routine (for reasonable efficiency) assumes URI has
2236 * already been through some validation.
2237 *
2238 * Returns a new URI string (to be freed by the caller) or NULL in case
2239 * error.
2240 */
2241xmlChar *
2242xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2243{
2244 xmlChar *val = NULL;
2245 int ret;
2246 int ix;
2247 int pos = 0;
2248 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002249 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002250 xmlURIPtr ref = NULL;
2251 xmlURIPtr bas = NULL;
2252 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002253 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002254
2255 if ((URI == NULL) || (*URI == 0))
2256 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002257
2258 /*
2259 * First parse URI into a standard form
2260 */
2261 ref = xmlCreateURI ();
2262 if (ref == NULL)
2263 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002264 /* If URI not already in "relative" form */
2265 if (URI[0] != '.') {
2266 ret = xmlParseURIReference (ref, (const char *) URI);
2267 if (ret != 0)
2268 goto done; /* Error in URI, return NULL */
2269 } else
2270 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002271
2272 /*
2273 * Next parse base into the same standard form
2274 */
2275 if ((base == NULL) || (*base == 0)) {
2276 val = xmlStrdup (URI);
2277 goto done;
2278 }
2279 bas = xmlCreateURI ();
2280 if (bas == NULL)
2281 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002282 if (base[0] != '.') {
2283 ret = xmlParseURIReference (bas, (const char *) base);
2284 if (ret != 0)
2285 goto done; /* Error in base, return NULL */
2286 } else
2287 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002288
2289 /*
2290 * If the scheme / server on the URI differs from the base,
2291 * just return the URI
2292 */
2293 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002294 ((bas->scheme == NULL) ||
2295 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2296 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002297 val = xmlStrdup (URI);
2298 goto done;
2299 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002300 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2301 val = xmlStrdup(BAD_CAST "");
2302 goto done;
2303 }
2304 if (bas->path == NULL) {
2305 val = xmlStrdup((xmlChar *)ref->path);
2306 goto done;
2307 }
2308 if (ref->path == NULL) {
2309 ref->path = (char *) "/";
2310 remove_path = 1;
2311 }
William M. Brackf7789b12004-06-07 08:57:27 +00002312
2313 /*
2314 * At this point (at last!) we can compare the two paths
2315 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002316 * First we take care of the special case where either of the
2317 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002318 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002319 if (bas->path == NULL) {
2320 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002321 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002322 if (*uptr == '/')
2323 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002324 /* exception characters from xmlSaveUri */
2325 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002326 }
2327 goto done;
2328 }
William M. Brackf7789b12004-06-07 08:57:27 +00002329 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002330 if (ref->path == NULL) {
2331 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002332 if (bptr[ix] == '/')
2333 nbslash++;
2334 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002335 uptr = NULL;
2336 len = 1; /* this is for a string terminator only */
2337 } else {
2338 /*
2339 * Next we compare the two strings and find where they first differ
2340 */
2341 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2342 pos += 2;
2343 if ((*bptr == '.') && (bptr[1] == '/'))
2344 bptr += 2;
2345 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2346 bptr++;
2347 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2348 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002349
William M. Brack820d5ed2005-09-14 05:24:27 +00002350 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002351 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002352 goto done; /* (I can't imagine why anyone would do this) */
2353 }
2354
2355 /*
2356 * In URI, "back up" to the last '/' encountered. This will be the
2357 * beginning of the "unique" suffix of URI
2358 */
2359 ix = pos;
2360 if ((ref->path[ix] == '/') && (ix > 0))
2361 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002362 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2363 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002364 for (; ix > 0; ix--) {
2365 if (ref->path[ix] == '/')
2366 break;
2367 }
2368 if (ix == 0) {
2369 uptr = (xmlChar *)ref->path;
2370 } else {
2371 ix++;
2372 uptr = (xmlChar *)&ref->path[ix];
2373 }
2374
2375 /*
2376 * In base, count the number of '/' from the differing point
2377 */
2378 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2379 for (; bptr[ix] != 0; ix++) {
2380 if (bptr[ix] == '/')
2381 nbslash++;
2382 }
2383 }
2384 len = xmlStrlen (uptr) + 1;
2385 }
2386
William M. Brackf7789b12004-06-07 08:57:27 +00002387 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002388 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002389 /* exception characters from xmlSaveUri */
2390 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002391 goto done;
2392 }
William M. Brackf7789b12004-06-07 08:57:27 +00002393
2394 /*
2395 * Allocate just enough space for the returned string -
2396 * length of the remainder of the URI, plus enough space
2397 * for the "../" groups, plus one for the terminator
2398 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002399 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002400 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002401 xmlGenericError(xmlGenericErrorContext,
2402 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002403 goto done;
2404 }
2405 vptr = val;
2406 /*
2407 * Put in as many "../" as needed
2408 */
2409 for (; nbslash>0; nbslash--) {
2410 *vptr++ = '.';
2411 *vptr++ = '.';
2412 *vptr++ = '/';
2413 }
2414 /*
2415 * Finish up with the end of the URI
2416 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002417 if (uptr != NULL) {
2418 if ((vptr > val) && (len > 0) &&
2419 (uptr[0] == '/') && (vptr[-1] == '/')) {
2420 memcpy (vptr, uptr + 1, len - 1);
2421 vptr[len - 2] = 0;
2422 } else {
2423 memcpy (vptr, uptr, len);
2424 vptr[len - 1] = 0;
2425 }
2426 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002427 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002428 }
William M. Brackf7789b12004-06-07 08:57:27 +00002429
William M. Brack50420192007-07-20 01:09:08 +00002430 /* escape the freshly-built path */
2431 vptr = val;
2432 /* exception characters from xmlSaveUri */
2433 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2434 xmlFree(vptr);
2435
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002436done:
William M. Brackf7789b12004-06-07 08:57:27 +00002437 /*
2438 * Free the working variables
2439 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002440 if (remove_path != 0)
2441 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002442 if (ref != NULL)
2443 xmlFreeURI (ref);
2444 if (bas != NULL)
2445 xmlFreeURI (bas);
2446
2447 return val;
2448}
2449
2450/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002451 * xmlCanonicPath:
2452 * @path: the resource locator in a filesystem notation
2453 *
2454 * Constructs a canonic path from the specified path.
2455 *
2456 * Returns a new canonic path, or a duplicate of the path parameter if the
2457 * construction fails. The caller is responsible for freeing the memory occupied
2458 * by the returned string. If there is insufficient memory available, or the
2459 * argument is NULL, the function returns NULL.
2460 */
2461#define IS_WINDOWS_PATH(p) \
2462 ((p != NULL) && \
2463 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2464 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2465 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002466xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002467xmlCanonicPath(const xmlChar *path)
2468{
William M. Brack22242272007-01-27 07:59:37 +00002469/*
2470 * For Windows implementations, additional work needs to be done to
2471 * replace backslashes in pathnames with "forward slashes"
2472 */
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002473#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002474 int len = 0;
2475 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002476 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002477#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002478 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002479 xmlChar *ret;
2480 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002481
2482 if (path == NULL)
2483 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002484
2485 /* sanitize filename starting with // so it can be used as URI */
2486 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2487 path++;
2488
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002489 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002490 xmlFreeURI(uri);
2491 return xmlStrdup(path);
2492 }
2493
William M. Brack22242272007-01-27 07:59:37 +00002494 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002495 absuri = xmlStrstr(path, BAD_CAST "://");
2496 if (absuri != NULL) {
2497 int l, j;
2498 unsigned char c;
2499 xmlChar *escURI;
2500
2501 /*
2502 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002503 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002504 * part matches a protocol.
2505 */
2506 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002507 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002508 if ((l <= 0) || (l > 20))
2509 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002510 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002511 for (j = 0;j < l;j++) {
2512 c = path[j];
2513 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2514 goto path_processing;
2515 }
2516
William M. Brack22242272007-01-27 07:59:37 +00002517 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002518 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2519 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002520 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002521 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002522 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002523 if (uri != NULL) {
2524 xmlFreeURI(uri);
2525 return escURI;
2526 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002527 }
2528 }
2529
2530path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002531/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002532#if defined(_WIN32) && !defined(__CYGWIN__)
2533 /*
William M. Brack22242272007-01-27 07:59:37 +00002534 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002535 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002536 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002537 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002538 return(NULL);
2539 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002540
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002541 len = xmlStrlen(path);
2542 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002543 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002544 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002545 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002546 uri->path = xmlMallocAtomic(len + 2);
2547 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002548 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002549 return(NULL);
2550 }
William M. Brack22242272007-01-27 07:59:37 +00002551 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002552 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002553 p = uri->path + 1;
2554 strncpy(p, path, len + 1);
2555 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002556 uri->path = xmlStrdup(path);
2557 if (uri->path == NULL) {
2558 xmlFreeURI(uri);
2559 return(NULL);
2560 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002561 p = uri->path;
2562 }
William M. Brack22242272007-01-27 07:59:37 +00002563 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002564 while (*p != '\0') {
2565 if (*p == '\\')
2566 *p = '/';
2567 p++;
2568 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002569
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002570 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002571 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002572 } else {
2573 ret = xmlSaveUri(uri);
2574 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002575
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002576 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002577#else
2578 ret = xmlStrdup((const xmlChar *) path);
2579#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002580 return(ret);
2581}
Owen Taylor3473f882001-02-23 17:55:21 +00002582
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002583/**
2584 * xmlPathToURI:
2585 * @path: the resource locator in a filesystem notation
2586 *
2587 * Constructs an URI expressing the existing path
2588 *
2589 * Returns a new URI, or a duplicate of the path parameter if the
2590 * construction fails. The caller is responsible for freeing the memory
2591 * occupied by the returned string. If there is insufficient memory available,
2592 * or the argument is NULL, the function returns NULL.
2593 */
2594xmlChar *
2595xmlPathToURI(const xmlChar *path)
2596{
2597 xmlURIPtr uri;
2598 xmlURI temp;
2599 xmlChar *ret, *cal;
2600
2601 if (path == NULL)
2602 return(NULL);
2603
2604 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2605 xmlFreeURI(uri);
2606 return xmlStrdup(path);
2607 }
2608 cal = xmlCanonicPath(path);
2609 if (cal == NULL)
2610 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002611#if defined(_WIN32) && !defined(__CYGWIN__)
2612 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2613 If 'cal' is a valid URI allready then we are done here, as continuing would make
2614 it invalid. */
2615 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2616 xmlFreeURI(uri);
2617 return cal;
2618 }
2619 /* 'cal' can contain a relative path with backslashes. If that is processed
2620 by xmlSaveURI, they will be escaped and the external entity loader machinery
2621 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2622 ret = cal;
2623 while (*ret != '\0') {
2624 if (*ret == '\\')
2625 *ret = '/';
2626 ret++;
2627 }
2628#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002629 memset(&temp, 0, sizeof(temp));
2630 temp.path = (char *) cal;
2631 ret = xmlSaveUri(&temp);
2632 xmlFree(cal);
2633 return(ret);
2634}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002635#define bottom_uri
2636#include "elfgcchack.h"