blob: bd2e96d78643b87045bf1dd071b051755a6e1999 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
Daniel Veillardd7af5552008-08-04 15:29:44 +000021static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000022
23/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000024 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000025 * alpha = lowalpha | upalpha
26 */
27#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
30/*
31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 * "u" | "v" | "w" | "x" | "y" | "z"
34 */
35
36#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
38/*
39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 * "U" | "V" | "W" | "X" | "Y" | "Z"
42 */
43#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
Daniel Veillardbe3eb202004-07-09 12:05:25 +000045#ifdef IS_DIGIT
46#undef IS_DIGIT
47#endif
Owen Taylor3473f882001-02-23 17:55:21 +000048/*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
Owen Taylor3473f882001-02-23 17:55:21 +000051#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53/*
54 * alphanum = alpha | digit
55 */
56
57#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59/*
Owen Taylor3473f882001-02-23 17:55:21 +000060 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61 */
62
Daniel Veillardd7af5552008-08-04 15:29:44 +000063#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
64 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000065 ((x) == '(') || ((x) == ')'))
66
Owen Taylor3473f882001-02-23 17:55:21 +000067/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000068 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +000069 */
70
Daniel Veillardd7af5552008-08-04 15:29:44 +000071#define IS_UNWISE(p) \
72 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
73 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
74 ((*(p) == ']')) || ((*(p) == '`')))
75/*
76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77 * "[" | "]"
78 */
79
80#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * unreserved = alphanum | mark
87 */
88
89#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91/*
Owen Taylor3473f882001-02-23 17:55:21 +000092 * Skip to next pointer char, handle escaped sequences
93 */
94
95#define NEXT(p) ((*p == '%')? p += 3 : p++)
96
97/*
98 * Productions from the spec.
99 *
100 * authority = server | reg_name
101 * reg_name = 1*( unreserved | escaped | "$" | "," |
102 * ";" | ":" | "@" | "&" | "=" | "+" )
103 *
104 * path = [ abs_path | opaque_part ]
105 */
106
Daniel Veillard336a8e12005-08-07 10:46:19 +0000107#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/************************************************************************
110 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000111 * RFC 3986 parser *
112 * *
113 ************************************************************************/
114
115#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
117 ((*(p) >= 'A') && (*(p) <= 'Z')))
118#define ISA_HEXDIG(p) \
119 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
120 ((*(p) >= 'A') && (*(p) <= 'F')))
121
122/*
123 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
124 * / "*" / "+" / "," / ";" / "="
125 */
126#define ISA_SUB_DELIM(p) \
127 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
128 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
129 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Daniel Veillard2ee91eb2010-06-04 09:14:16 +0800130 ((*(p) == '=')) || ((*(p) == '\'')))
Daniel Veillardd7af5552008-08-04 15:29:44 +0000131
132/*
133 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134 */
135#define ISA_GEN_DELIM(p) \
136 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
137 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
138 ((*(p) == '@')))
139
140/*
141 * reserved = gen-delims / sub-delims
142 */
143#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144
145/*
146 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
147 */
148#define ISA_UNRESERVED(p) \
149 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
150 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151
152/*
153 * pct-encoded = "%" HEXDIG HEXDIG
154 */
155#define ISA_PCT_ENCODED(p) \
156 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157
158/*
159 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
160 */
161#define ISA_PCHAR(p) \
162 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
163 ((*(p) == ':')) || ((*(p) == '@')))
164
165/**
166 * xmlParse3986Scheme:
167 * @uri: pointer to an URI structure
168 * @str: pointer to the string to analyze
169 *
170 * Parse an URI scheme
171 *
172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173 *
174 * Returns 0 or the error code
175 */
176static int
177xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178 const char *cur;
179
180 if (str == NULL)
181 return(-1);
182
183 cur = *str;
184 if (!ISA_ALPHA(cur))
185 return(2);
186 cur++;
187 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189 if (uri != NULL) {
190 if (uri->scheme != NULL) xmlFree(uri->scheme);
191 uri->scheme = STRNDUP(*str, cur - *str);
192 }
193 *str = cur;
194 return(0);
195}
196
197/**
198 * xmlParse3986Fragment:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
201 *
202 * Parse the query part of an URI
203 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000204 * fragment = *( pchar / "/" / "?" )
205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206 * in the fragment identifier but this is used very broadly for
207 * xpointer scheme selection, so we are allowing it here to not break
208 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000209 *
210 * Returns 0 or the error code
211 */
212static int
213xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214{
215 const char *cur;
216
217 if (str == NULL)
218 return (-1);
219
220 cur = *str;
221
222 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000223 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000224 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225 NEXT(cur);
226 if (uri != NULL) {
227 if (uri->fragment != NULL)
228 xmlFree(uri->fragment);
229 if (uri->cleanup & 2)
230 uri->fragment = STRNDUP(*str, cur - *str);
231 else
232 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233 }
234 *str = cur;
235 return (0);
236}
237
238/**
239 * xmlParse3986Query:
240 * @uri: pointer to an URI structure
241 * @str: pointer to the string to analyze
242 *
243 * Parse the query part of an URI
244 *
245 * query = *uric
246 *
247 * Returns 0 or the error code
248 */
249static int
250xmlParse3986Query(xmlURIPtr uri, const char **str)
251{
252 const char *cur;
253
254 if (str == NULL)
255 return (-1);
256
257 cur = *str;
258
259 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261 NEXT(cur);
262 if (uri != NULL) {
263 if (uri->query != NULL)
264 xmlFree(uri->query);
265 if (uri->cleanup & 2)
266 uri->query = STRNDUP(*str, cur - *str);
267 else
268 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269
270 /* Save the raw bytes of the query as well.
271 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272 */
273 if (uri->query_raw != NULL)
274 xmlFree (uri->query_raw);
275 uri->query_raw = STRNDUP (*str, cur - *str);
276 }
277 *str = cur;
278 return (0);
279}
280
281/**
282 * xmlParse3986Port:
283 * @uri: pointer to an URI structure
284 * @str: the string to analyze
285 *
286 * Parse a port part and fills in the appropriate fields
287 * of the @uri structure
288 *
289 * port = *DIGIT
290 *
291 * Returns 0 or the error code
292 */
293static int
294xmlParse3986Port(xmlURIPtr uri, const char **str)
295{
296 const char *cur = *str;
297
298 if (ISA_DIGIT(cur)) {
299 if (uri != NULL)
300 uri->port = 0;
301 while (ISA_DIGIT(cur)) {
302 if (uri != NULL)
303 uri->port = uri->port * 10 + (*cur - '0');
304 cur++;
305 }
306 *str = cur;
307 return(0);
308 }
309 return(1);
310}
311
312/**
313 * xmlParse3986Userinfo:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
317 * Parse an user informations part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326{
327 const char *cur;
328
329 cur = *str;
330 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331 ISA_SUB_DELIM(cur) || (*cur == ':'))
332 NEXT(cur);
333 if (*cur == '@') {
334 if (uri != NULL) {
335 if (uri->user != NULL) xmlFree(uri->user);
336 if (uri->cleanup & 2)
337 uri->user = STRNDUP(*str, cur - *str);
338 else
339 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340 }
341 *str = cur;
342 return(0);
343 }
344 return(1);
345}
346
347/**
348 * xmlParse3986DecOctet:
349 * @str: the string to analyze
350 *
351 * dec-octet = DIGIT ; 0-9
352 * / %x31-39 DIGIT ; 10-99
353 * / "1" 2DIGIT ; 100-199
354 * / "2" %x30-34 DIGIT ; 200-249
355 * / "25" %x30-35 ; 250-255
356 *
357 * Skip a dec-octet.
358 *
359 * Returns 0 if found and skipped, 1 otherwise
360 */
361static int
362xmlParse3986DecOctet(const char **str) {
363 const char *cur = *str;
364
365 if (!(ISA_DIGIT(cur)))
366 return(1);
367 if (!ISA_DIGIT(cur+1))
368 cur++;
369 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370 cur += 2;
371 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372 cur += 3;
373 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375 cur += 3;
376 else if ((*cur == '2') && (*(cur + 1) == '5') &&
377 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378 cur += 3;
379 else
380 return(1);
381 *str = cur;
382 return(0);
383}
384/**
385 * xmlParse3986Host:
386 * @uri: pointer to an URI structure
387 * @str: the string to analyze
388 *
389 * Parse an host part and fills in the appropriate fields
390 * of the @uri structure
391 *
392 * host = IP-literal / IPv4address / reg-name
393 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
394 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
395 * reg-name = *( unreserved / pct-encoded / sub-delims )
396 *
397 * Returns 0 or the error code
398 */
399static int
400xmlParse3986Host(xmlURIPtr uri, const char **str)
401{
402 const char *cur = *str;
403 const char *host;
404
405 host = cur;
406 /*
407 * IPv6 and future adressing scheme are enclosed between brackets
408 */
409 if (*cur == '[') {
410 cur++;
411 while ((*cur != ']') && (*cur != 0))
412 cur++;
413 if (*cur != ']')
414 return(1);
415 cur++;
416 goto found;
417 }
418 /*
419 * try to parse an IPv4
420 */
421 if (ISA_DIGIT(cur)) {
422 if (xmlParse3986DecOctet(&cur) != 0)
423 goto not_ipv4;
424 if (*cur != '.')
425 goto not_ipv4;
426 cur++;
427 if (xmlParse3986DecOctet(&cur) != 0)
428 goto not_ipv4;
429 if (*cur != '.')
430 goto not_ipv4;
431 if (xmlParse3986DecOctet(&cur) != 0)
432 goto not_ipv4;
433 if (*cur != '.')
434 goto not_ipv4;
435 if (xmlParse3986DecOctet(&cur) != 0)
436 goto not_ipv4;
437 goto found;
438not_ipv4:
439 cur = *str;
440 }
441 /*
442 * then this should be a hostname which can be empty
443 */
444 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445 NEXT(cur);
446found:
447 if (uri != NULL) {
448 if (uri->authority != NULL) xmlFree(uri->authority);
449 uri->authority = NULL;
450 if (uri->server != NULL) xmlFree(uri->server);
451 if (cur != host) {
452 if (uri->cleanup & 2)
453 uri->server = STRNDUP(host, cur - host);
454 else
455 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456 } else
457 uri->server = NULL;
458 }
459 *str = cur;
460 return(0);
461}
462
463/**
464 * xmlParse3986Authority:
465 * @uri: pointer to an URI structure
466 * @str: the string to analyze
467 *
468 * Parse an authority part and fills in the appropriate fields
469 * of the @uri structure
470 *
471 * authority = [ userinfo "@" ] host [ ":" port ]
472 *
473 * Returns 0 or the error code
474 */
475static int
476xmlParse3986Authority(xmlURIPtr uri, const char **str)
477{
478 const char *cur;
479 int ret;
480
481 cur = *str;
482 /*
483 * try to parse an userinfo and check for the trailing @
484 */
485 ret = xmlParse3986Userinfo(uri, &cur);
486 if ((ret != 0) || (*cur != '@'))
487 cur = *str;
488 else
489 cur++;
490 ret = xmlParse3986Host(uri, &cur);
491 if (ret != 0) return(ret);
492 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000493 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000494 ret = xmlParse3986Port(uri, &cur);
495 if (ret != 0) return(ret);
496 }
497 *str = cur;
498 return(0);
499}
500
501/**
502 * xmlParse3986Segment:
503 * @str: the string to analyze
504 * @forbid: an optional forbidden character
505 * @empty: allow an empty segment
506 *
507 * Parse a segment and fills in the appropriate fields
508 * of the @uri structure
509 *
510 * segment = *pchar
511 * segment-nz = 1*pchar
512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513 * ; non-zero-length segment without any colon ":"
514 *
515 * Returns 0 or the error code
516 */
517static int
518xmlParse3986Segment(const char **str, char forbid, int empty)
519{
520 const char *cur;
521
522 cur = *str;
523 if (!ISA_PCHAR(cur)) {
524 if (empty)
525 return(0);
526 return(1);
527 }
528 while (ISA_PCHAR(cur) && (*cur != forbid))
529 NEXT(cur);
530 *str = cur;
531 return (0);
532}
533
534/**
535 * xmlParse3986PathAbEmpty:
536 * @uri: pointer to an URI structure
537 * @str: the string to analyze
538 *
539 * Parse an path absolute or empty and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * path-abempty = *( "/" segment )
543 *
544 * Returns 0 or the error code
545 */
546static int
547xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548{
549 const char *cur;
550 int ret;
551
552 cur = *str;
553
554 while (*cur == '/') {
555 cur++;
556 ret = xmlParse3986Segment(&cur, 0, 1);
557 if (ret != 0) return(ret);
558 }
559 if (uri != NULL) {
560 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200561 if (*str != cur) {
562 if (uri->cleanup & 2)
563 uri->path = STRNDUP(*str, cur - *str);
564 else
565 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
566 } else {
567 uri->path = NULL;
568 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000569 }
570 *str = cur;
571 return (0);
572}
573
574/**
575 * xmlParse3986PathAbsolute:
576 * @uri: pointer to an URI structure
577 * @str: the string to analyze
578 *
579 * Parse an path absolute and fills in the appropriate fields
580 * of the @uri structure
581 *
582 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
583 *
584 * Returns 0 or the error code
585 */
586static int
587xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
588{
589 const char *cur;
590 int ret;
591
592 cur = *str;
593
594 if (*cur != '/')
595 return(1);
596 cur++;
597 ret = xmlParse3986Segment(&cur, 0, 0);
598 if (ret == 0) {
599 while (*cur == '/') {
600 cur++;
601 ret = xmlParse3986Segment(&cur, 0, 1);
602 if (ret != 0) return(ret);
603 }
604 }
605 if (uri != NULL) {
606 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200607 if (cur != *str) {
608 if (uri->cleanup & 2)
609 uri->path = STRNDUP(*str, cur - *str);
610 else
611 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
612 } else {
613 uri->path = NULL;
614 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000615 }
616 *str = cur;
617 return (0);
618}
619
620/**
621 * xmlParse3986PathRootless:
622 * @uri: pointer to an URI structure
623 * @str: the string to analyze
624 *
625 * Parse an path without root and fills in the appropriate fields
626 * of the @uri structure
627 *
628 * path-rootless = segment-nz *( "/" segment )
629 *
630 * Returns 0 or the error code
631 */
632static int
633xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
634{
635 const char *cur;
636 int ret;
637
638 cur = *str;
639
640 ret = xmlParse3986Segment(&cur, 0, 0);
641 if (ret != 0) return(ret);
642 while (*cur == '/') {
643 cur++;
644 ret = xmlParse3986Segment(&cur, 0, 1);
645 if (ret != 0) return(ret);
646 }
647 if (uri != NULL) {
648 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200649 if (cur != *str) {
650 if (uri->cleanup & 2)
651 uri->path = STRNDUP(*str, cur - *str);
652 else
653 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
654 } else {
655 uri->path = NULL;
656 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000657 }
658 *str = cur;
659 return (0);
660}
661
662/**
663 * xmlParse3986PathNoScheme:
664 * @uri: pointer to an URI structure
665 * @str: the string to analyze
666 *
667 * Parse an path which is not a scheme and fills in the appropriate fields
668 * of the @uri structure
669 *
670 * path-noscheme = segment-nz-nc *( "/" segment )
671 *
672 * Returns 0 or the error code
673 */
674static int
675xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
676{
677 const char *cur;
678 int ret;
679
680 cur = *str;
681
682 ret = xmlParse3986Segment(&cur, ':', 0);
683 if (ret != 0) return(ret);
684 while (*cur == '/') {
685 cur++;
686 ret = xmlParse3986Segment(&cur, 0, 1);
687 if (ret != 0) return(ret);
688 }
689 if (uri != NULL) {
690 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200691 if (cur != *str) {
692 if (uri->cleanup & 2)
693 uri->path = STRNDUP(*str, cur - *str);
694 else
695 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
696 } else {
697 uri->path = NULL;
698 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000699 }
700 *str = cur;
701 return (0);
702}
703
704/**
705 * xmlParse3986HierPart:
706 * @uri: pointer to an URI structure
707 * @str: the string to analyze
708 *
709 * Parse an hierarchical part and fills in the appropriate fields
710 * of the @uri structure
711 *
712 * hier-part = "//" authority path-abempty
713 * / path-absolute
714 * / path-rootless
715 * / path-empty
716 *
717 * Returns 0 or the error code
718 */
719static int
720xmlParse3986HierPart(xmlURIPtr uri, const char **str)
721{
722 const char *cur;
723 int ret;
724
725 cur = *str;
726
727 if ((*cur == '/') && (*(cur + 1) == '/')) {
728 cur += 2;
729 ret = xmlParse3986Authority(uri, &cur);
730 if (ret != 0) return(ret);
731 ret = xmlParse3986PathAbEmpty(uri, &cur);
732 if (ret != 0) return(ret);
733 *str = cur;
734 return(0);
735 } else if (*cur == '/') {
736 ret = xmlParse3986PathAbsolute(uri, &cur);
737 if (ret != 0) return(ret);
738 } else if (ISA_PCHAR(cur)) {
739 ret = xmlParse3986PathRootless(uri, &cur);
740 if (ret != 0) return(ret);
741 } else {
742 /* path-empty is effectively empty */
743 if (uri != NULL) {
744 if (uri->path != NULL) xmlFree(uri->path);
745 uri->path = NULL;
746 }
747 }
748 *str = cur;
749 return (0);
750}
751
752/**
753 * xmlParse3986RelativeRef:
754 * @uri: pointer to an URI structure
755 * @str: the string to analyze
756 *
757 * Parse an URI string and fills in the appropriate fields
758 * of the @uri structure
759 *
760 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
761 * relative-part = "//" authority path-abempty
762 * / path-absolute
763 * / path-noscheme
764 * / path-empty
765 *
766 * Returns 0 or the error code
767 */
768static int
769xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
770 int ret;
771
772 if ((*str == '/') && (*(str + 1) == '/')) {
773 str += 2;
774 ret = xmlParse3986Authority(uri, &str);
775 if (ret != 0) return(ret);
776 ret = xmlParse3986PathAbEmpty(uri, &str);
777 if (ret != 0) return(ret);
778 } else if (*str == '/') {
779 ret = xmlParse3986PathAbsolute(uri, &str);
780 if (ret != 0) return(ret);
781 } else if (ISA_PCHAR(str)) {
782 ret = xmlParse3986PathNoScheme(uri, &str);
783 if (ret != 0) return(ret);
784 } else {
785 /* path-empty is effectively empty */
786 if (uri != NULL) {
787 if (uri->path != NULL) xmlFree(uri->path);
788 uri->path = NULL;
789 }
790 }
791
792 if (*str == '?') {
793 str++;
794 ret = xmlParse3986Query(uri, &str);
795 if (ret != 0) return(ret);
796 }
797 if (*str == '#') {
798 str++;
799 ret = xmlParse3986Fragment(uri, &str);
800 if (ret != 0) return(ret);
801 }
802 if (*str != 0) {
803 xmlCleanURI(uri);
804 return(1);
805 }
806 return(0);
807}
808
809
810/**
811 * xmlParse3986URI:
812 * @uri: pointer to an URI structure
813 * @str: the string to analyze
814 *
815 * Parse an URI string and fills in the appropriate fields
816 * of the @uri structure
817 *
818 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
819 *
820 * Returns 0 or the error code
821 */
822static int
823xmlParse3986URI(xmlURIPtr uri, const char *str) {
824 int ret;
825
826 ret = xmlParse3986Scheme(uri, &str);
827 if (ret != 0) return(ret);
828 if (*str != ':') {
829 return(1);
830 }
831 str++;
832 ret = xmlParse3986HierPart(uri, &str);
833 if (ret != 0) return(ret);
834 if (*str == '?') {
835 str++;
836 ret = xmlParse3986Query(uri, &str);
837 if (ret != 0) return(ret);
838 }
839 if (*str == '#') {
840 str++;
841 ret = xmlParse3986Fragment(uri, &str);
842 if (ret != 0) return(ret);
843 }
844 if (*str != 0) {
845 xmlCleanURI(uri);
846 return(1);
847 }
848 return(0);
849}
850
851/**
852 * xmlParse3986URIReference:
853 * @uri: pointer to an URI structure
854 * @str: the string to analyze
855 *
856 * Parse an URI reference string and fills in the appropriate fields
857 * of the @uri structure
858 *
859 * URI-reference = URI / relative-ref
860 *
861 * Returns 0 or the error code
862 */
863static int
864xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
865 int ret;
866
867 if (str == NULL)
868 return(-1);
869 xmlCleanURI(uri);
870
871 /*
872 * Try first to parse absolute refs, then fallback to relative if
873 * it fails.
874 */
875 ret = xmlParse3986URI(uri, str);
876 if (ret != 0) {
877 xmlCleanURI(uri);
878 ret = xmlParse3986RelativeRef(uri, str);
879 if (ret != 0) {
880 xmlCleanURI(uri);
881 return(ret);
882 }
883 }
884 return(0);
885}
886
887/**
888 * xmlParseURI:
889 * @str: the URI string to analyze
890 *
891 * Parse an URI based on RFC 3986
892 *
893 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
894 *
895 * Returns a newly built xmlURIPtr or NULL in case of error
896 */
897xmlURIPtr
898xmlParseURI(const char *str) {
899 xmlURIPtr uri;
900 int ret;
901
902 if (str == NULL)
903 return(NULL);
904 uri = xmlCreateURI();
905 if (uri != NULL) {
906 ret = xmlParse3986URIReference(uri, str);
907 if (ret) {
908 xmlFreeURI(uri);
909 return(NULL);
910 }
911 }
912 return(uri);
913}
914
915/**
916 * xmlParseURIReference:
917 * @uri: pointer to an URI structure
918 * @str: the string to analyze
919 *
920 * Parse an URI reference string based on RFC 3986 and fills in the
921 * appropriate fields of the @uri structure
922 *
923 * URI-reference = URI / relative-ref
924 *
925 * Returns 0 or the error code
926 */
927int
928xmlParseURIReference(xmlURIPtr uri, const char *str) {
929 return(xmlParse3986URIReference(uri, str));
930}
931
932/**
933 * xmlParseURIRaw:
934 * @str: the URI string to analyze
935 * @raw: if 1 unescaping of URI pieces are disabled
936 *
937 * Parse an URI but allows to keep intact the original fragments.
938 *
939 * URI-reference = URI / relative-ref
940 *
941 * Returns a newly built xmlURIPtr or NULL in case of error
942 */
943xmlURIPtr
944xmlParseURIRaw(const char *str, int raw) {
945 xmlURIPtr uri;
946 int ret;
947
948 if (str == NULL)
949 return(NULL);
950 uri = xmlCreateURI();
951 if (uri != NULL) {
952 if (raw) {
953 uri->cleanup |= 2;
954 }
955 ret = xmlParseURIReference(uri, str);
956 if (ret) {
957 xmlFreeURI(uri);
958 return(NULL);
959 }
960 }
961 return(uri);
962}
963
964/************************************************************************
965 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000966 * Generic URI structure functions *
967 * *
968 ************************************************************************/
969
970/**
971 * xmlCreateURI:
972 *
973 * Simply creates an empty xmlURI
974 *
975 * Returns the new structure or NULL in case of error
976 */
977xmlURIPtr
978xmlCreateURI(void) {
979 xmlURIPtr ret;
980
981 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
982 if (ret == NULL) {
983 xmlGenericError(xmlGenericErrorContext,
984 "xmlCreateURI: out of memory\n");
985 return(NULL);
986 }
987 memset(ret, 0, sizeof(xmlURI));
988 return(ret);
989}
990
991/**
992 * xmlSaveUri:
993 * @uri: pointer to an xmlURI
994 *
995 * Save the URI as an escaped string
996 *
997 * Returns a new string (to be deallocated by caller)
998 */
999xmlChar *
1000xmlSaveUri(xmlURIPtr uri) {
1001 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001002 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001003 const char *p;
1004 int len;
1005 int max;
1006
1007 if (uri == NULL) return(NULL);
1008
1009
1010 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001011 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001012 if (ret == NULL) {
1013 xmlGenericError(xmlGenericErrorContext,
1014 "xmlSaveUri: out of memory\n");
1015 return(NULL);
1016 }
1017 len = 0;
1018
1019 if (uri->scheme != NULL) {
1020 p = uri->scheme;
1021 while (*p != 0) {
1022 if (len >= max) {
1023 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001024 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1025 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001026 xmlGenericError(xmlGenericErrorContext,
1027 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001028 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001029 return(NULL);
1030 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001031 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001032 }
1033 ret[len++] = *p++;
1034 }
1035 if (len >= max) {
1036 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001037 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001039 xmlGenericError(xmlGenericErrorContext,
1040 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001041 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001042 return(NULL);
1043 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001044 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001045 }
1046 ret[len++] = ':';
1047 }
1048 if (uri->opaque != NULL) {
1049 p = uri->opaque;
1050 while (*p != 0) {
1051 if (len + 3 >= max) {
1052 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001053 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1054 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001055 xmlGenericError(xmlGenericErrorContext,
1056 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001057 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001058 return(NULL);
1059 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001060 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001061 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001062 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001063 ret[len++] = *p++;
1064 else {
1065 int val = *(unsigned char *)p++;
1066 int hi = val / 0x10, lo = val % 0x10;
1067 ret[len++] = '%';
1068 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1069 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1070 }
1071 }
Owen Taylor3473f882001-02-23 17:55:21 +00001072 } else {
1073 if (uri->server != NULL) {
1074 if (len + 3 >= max) {
1075 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001076 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1077 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001078 xmlGenericError(xmlGenericErrorContext,
1079 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001080 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001081 return(NULL);
1082 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001083 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001084 }
1085 ret[len++] = '/';
1086 ret[len++] = '/';
1087 if (uri->user != NULL) {
1088 p = uri->user;
1089 while (*p != 0) {
1090 if (len + 3 >= max) {
1091 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001092 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001093 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001094 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001095 xmlGenericError(xmlGenericErrorContext,
1096 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001097 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001098 return(NULL);
1099 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001100 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001101 }
1102 if ((IS_UNRESERVED(*(p))) ||
1103 ((*(p) == ';')) || ((*(p) == ':')) ||
1104 ((*(p) == '&')) || ((*(p) == '=')) ||
1105 ((*(p) == '+')) || ((*(p) == '$')) ||
1106 ((*(p) == ',')))
1107 ret[len++] = *p++;
1108 else {
1109 int val = *(unsigned char *)p++;
1110 int hi = val / 0x10, lo = val % 0x10;
1111 ret[len++] = '%';
1112 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1113 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1114 }
1115 }
1116 if (len + 3 >= max) {
1117 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001118 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001119 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001120 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001121 xmlGenericError(xmlGenericErrorContext,
1122 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001123 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001124 return(NULL);
1125 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001126 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001127 }
1128 ret[len++] = '@';
1129 }
1130 p = uri->server;
1131 while (*p != 0) {
1132 if (len >= max) {
1133 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001134 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001135 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001136 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001137 xmlGenericError(xmlGenericErrorContext,
1138 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001139 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001140 return(NULL);
1141 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001142 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001143 }
1144 ret[len++] = *p++;
1145 }
1146 if (uri->port > 0) {
1147 if (len + 10 >= max) {
1148 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001149 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001150 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001151 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001152 xmlGenericError(xmlGenericErrorContext,
1153 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001154 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001155 return(NULL);
1156 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001157 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001158 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +00001159 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001160 }
1161 } else if (uri->authority != NULL) {
1162 if (len + 3 >= max) {
1163 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001164 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001165 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001166 if (temp == NULL) {
1167 xmlGenericError(xmlGenericErrorContext,
1168 "xmlSaveUri: out of memory\n");
1169 xmlFree(ret);
1170 return(NULL);
1171 }
1172 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001173 }
1174 ret[len++] = '/';
1175 ret[len++] = '/';
1176 p = uri->authority;
1177 while (*p != 0) {
1178 if (len + 3 >= max) {
1179 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001180 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001181 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001182 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001183 xmlGenericError(xmlGenericErrorContext,
1184 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001185 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001186 return(NULL);
1187 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001188 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001189 }
1190 if ((IS_UNRESERVED(*(p))) ||
1191 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1192 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1193 ((*(p) == '=')) || ((*(p) == '+')))
1194 ret[len++] = *p++;
1195 else {
1196 int val = *(unsigned char *)p++;
1197 int hi = val / 0x10, lo = val % 0x10;
1198 ret[len++] = '%';
1199 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1200 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1201 }
1202 }
1203 } else if (uri->scheme != NULL) {
1204 if (len + 3 >= max) {
1205 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001206 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001207 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001208 if (temp == NULL) {
1209 xmlGenericError(xmlGenericErrorContext,
1210 "xmlSaveUri: out of memory\n");
1211 xmlFree(ret);
1212 return(NULL);
1213 }
1214 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001215 }
1216 ret[len++] = '/';
1217 ret[len++] = '/';
1218 }
1219 if (uri->path != NULL) {
1220 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001221 /*
1222 * the colon in file:///d: should not be escaped or
1223 * Windows accesses fail later.
1224 */
1225 if ((uri->scheme != NULL) &&
1226 (p[0] == '/') &&
1227 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1228 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1229 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001230 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001231 if (len + 3 >= max) {
1232 max *= 2;
1233 ret = (xmlChar *) xmlRealloc(ret,
1234 (max + 1) * sizeof(xmlChar));
1235 if (ret == NULL) {
1236 xmlGenericError(xmlGenericErrorContext,
1237 "xmlSaveUri: out of memory\n");
1238 return(NULL);
1239 }
1240 }
1241 ret[len++] = *p++;
1242 ret[len++] = *p++;
1243 ret[len++] = *p++;
1244 }
Owen Taylor3473f882001-02-23 17:55:21 +00001245 while (*p != 0) {
1246 if (len + 3 >= max) {
1247 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001248 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001249 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001250 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001251 xmlGenericError(xmlGenericErrorContext,
1252 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001253 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001254 return(NULL);
1255 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001256 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001257 }
1258 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1259 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1260 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1261 ((*(p) == ',')))
1262 ret[len++] = *p++;
1263 else {
1264 int val = *(unsigned char *)p++;
1265 int hi = val / 0x10, lo = val % 0x10;
1266 ret[len++] = '%';
1267 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1268 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1269 }
1270 }
1271 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001272 if (uri->query_raw != NULL) {
1273 if (len + 1 >= max) {
1274 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001275 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillarda1413b82007-04-26 08:33:28 +00001276 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001277 if (temp == NULL) {
1278 xmlGenericError(xmlGenericErrorContext,
1279 "xmlSaveUri: out of memory\n");
1280 xmlFree(ret);
1281 return(NULL);
1282 }
1283 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001284 }
1285 ret[len++] = '?';
1286 p = uri->query_raw;
1287 while (*p != 0) {
1288 if (len + 1 >= max) {
1289 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001290 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillarda1413b82007-04-26 08:33:28 +00001291 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001292 if (temp == NULL) {
Daniel Veillarda1413b82007-04-26 08:33:28 +00001293 xmlGenericError(xmlGenericErrorContext,
1294 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001295 xmlFree(ret);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001296 return(NULL);
1297 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001298 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001299 }
1300 ret[len++] = *p++;
1301 }
1302 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (len + 3 >= max) {
1304 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001305 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001306 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001307 if (temp == NULL) {
1308 xmlGenericError(xmlGenericErrorContext,
1309 "xmlSaveUri: out of memory\n");
1310 xmlFree(ret);
1311 return(NULL);
1312 }
1313 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001314 }
1315 ret[len++] = '?';
1316 p = uri->query;
1317 while (*p != 0) {
1318 if (len + 3 >= max) {
1319 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001320 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001321 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001322 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001323 xmlGenericError(xmlGenericErrorContext,
1324 "xmlSaveUri: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001325 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001326 return(NULL);
1327 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001328 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001329 }
1330 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1331 ret[len++] = *p++;
1332 else {
1333 int val = *(unsigned char *)p++;
1334 int hi = val / 0x10, lo = val % 0x10;
1335 ret[len++] = '%';
1336 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1337 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1338 }
1339 }
1340 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001341 }
1342 if (uri->fragment != NULL) {
1343 if (len + 3 >= max) {
1344 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001345 temp = (xmlChar *) xmlRealloc(ret,
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001346 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001347 if (temp == NULL) {
1348 xmlGenericError(xmlGenericErrorContext,
1349 "xmlSaveUri: out of memory\n");
1350 xmlFree(ret);
1351 return(NULL);
Daniel Veillardd7af5552008-08-04 15:29:44 +00001352 }
1353 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001354 }
1355 ret[len++] = '#';
1356 p = uri->fragment;
1357 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001358 if (len + 3 >= max) {
1359 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001360 temp = (xmlChar *) xmlRealloc(ret,
Owen Taylor3473f882001-02-23 17:55:21 +00001361 (max + 1) * sizeof(xmlChar));
Daniel Veillarded86dc22008-04-24 11:58:41 +00001362 if (temp == NULL) {
1363 xmlGenericError(xmlGenericErrorContext,
1364 "xmlSaveUri: out of memory\n");
1365 xmlFree(ret);
1366 return(NULL);
1367 }
1368 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001369 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001370 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1371 ret[len++] = *p++;
1372 else {
1373 int val = *(unsigned char *)p++;
1374 int hi = val / 0x10, lo = val % 0x10;
1375 ret[len++] = '%';
1376 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1377 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001378 }
1379 }
Owen Taylor3473f882001-02-23 17:55:21 +00001380 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001381 if (len >= max) {
1382 max *= 2;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001383 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1384 if (temp == NULL) {
1385 xmlGenericError(xmlGenericErrorContext,
1386 "xmlSaveUri: out of memory\n");
1387 xmlFree(ret);
1388 return(NULL);
1389 }
1390 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001391 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02001392 ret[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001393 return(ret);
1394}
1395
1396/**
1397 * xmlPrintURI:
1398 * @stream: a FILE* for the output
1399 * @uri: pointer to an xmlURI
1400 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001401 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001402 */
1403void
1404xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1405 xmlChar *out;
1406
1407 out = xmlSaveUri(uri);
1408 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001409 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001410 xmlFree(out);
1411 }
1412}
1413
1414/**
1415 * xmlCleanURI:
1416 * @uri: pointer to an xmlURI
1417 *
1418 * Make sure the xmlURI struct is free of content
1419 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001420static void
Owen Taylor3473f882001-02-23 17:55:21 +00001421xmlCleanURI(xmlURIPtr uri) {
1422 if (uri == NULL) return;
1423
1424 if (uri->scheme != NULL) xmlFree(uri->scheme);
1425 uri->scheme = NULL;
1426 if (uri->server != NULL) xmlFree(uri->server);
1427 uri->server = NULL;
1428 if (uri->user != NULL) xmlFree(uri->user);
1429 uri->user = NULL;
1430 if (uri->path != NULL) xmlFree(uri->path);
1431 uri->path = NULL;
1432 if (uri->fragment != NULL) xmlFree(uri->fragment);
1433 uri->fragment = NULL;
1434 if (uri->opaque != NULL) xmlFree(uri->opaque);
1435 uri->opaque = NULL;
1436 if (uri->authority != NULL) xmlFree(uri->authority);
1437 uri->authority = NULL;
1438 if (uri->query != NULL) xmlFree(uri->query);
1439 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001440 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1441 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001442}
1443
1444/**
1445 * xmlFreeURI:
1446 * @uri: pointer to an xmlURI
1447 *
1448 * Free up the xmlURI struct
1449 */
1450void
1451xmlFreeURI(xmlURIPtr uri) {
1452 if (uri == NULL) return;
1453
1454 if (uri->scheme != NULL) xmlFree(uri->scheme);
1455 if (uri->server != NULL) xmlFree(uri->server);
1456 if (uri->user != NULL) xmlFree(uri->user);
1457 if (uri->path != NULL) xmlFree(uri->path);
1458 if (uri->fragment != NULL) xmlFree(uri->fragment);
1459 if (uri->opaque != NULL) xmlFree(uri->opaque);
1460 if (uri->authority != NULL) xmlFree(uri->authority);
1461 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001462 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001463 xmlFree(uri);
1464}
1465
1466/************************************************************************
1467 * *
1468 * Helper functions *
1469 * *
1470 ************************************************************************/
1471
Owen Taylor3473f882001-02-23 17:55:21 +00001472/**
1473 * xmlNormalizeURIPath:
1474 * @path: pointer to the path string
1475 *
1476 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1477 * Section 5.2, steps 6.c through 6.g.
1478 *
1479 * Normalization occurs directly on the string, no new allocation is done
1480 *
1481 * Returns 0 or an error code
1482 */
1483int
1484xmlNormalizeURIPath(char *path) {
1485 char *cur, *out;
1486
1487 if (path == NULL)
1488 return(-1);
1489
1490 /* Skip all initial "/" chars. We want to get to the beginning of the
1491 * first non-empty segment.
1492 */
1493 cur = path;
1494 while (cur[0] == '/')
1495 ++cur;
1496 if (cur[0] == '\0')
1497 return(0);
1498
1499 /* Keep everything we've seen so far. */
1500 out = cur;
1501
1502 /*
1503 * Analyze each segment in sequence for cases (c) and (d).
1504 */
1505 while (cur[0] != '\0') {
1506 /*
1507 * c) All occurrences of "./", where "." is a complete path segment,
1508 * are removed from the buffer string.
1509 */
1510 if ((cur[0] == '.') && (cur[1] == '/')) {
1511 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001512 /* '//' normalization should be done at this point too */
1513 while (cur[0] == '/')
1514 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 continue;
1516 }
1517
1518 /*
1519 * d) If the buffer string ends with "." as a complete path segment,
1520 * that "." is removed.
1521 */
1522 if ((cur[0] == '.') && (cur[1] == '\0'))
1523 break;
1524
1525 /* Otherwise keep the segment. */
1526 while (cur[0] != '/') {
1527 if (cur[0] == '\0')
1528 goto done_cd;
1529 (out++)[0] = (cur++)[0];
1530 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001531 /* nomalize // */
1532 while ((cur[0] == '/') && (cur[1] == '/'))
1533 cur++;
1534
Owen Taylor3473f882001-02-23 17:55:21 +00001535 (out++)[0] = (cur++)[0];
1536 }
1537 done_cd:
1538 out[0] = '\0';
1539
1540 /* Reset to the beginning of the first segment for the next sequence. */
1541 cur = path;
1542 while (cur[0] == '/')
1543 ++cur;
1544 if (cur[0] == '\0')
1545 return(0);
1546
1547 /*
1548 * Analyze each segment in sequence for cases (e) and (f).
1549 *
1550 * e) All occurrences of "<segment>/../", where <segment> is a
1551 * complete path segment not equal to "..", are removed from the
1552 * buffer string. Removal of these path segments is performed
1553 * iteratively, removing the leftmost matching pattern on each
1554 * iteration, until no matching pattern remains.
1555 *
1556 * f) If the buffer string ends with "<segment>/..", where <segment>
1557 * is a complete path segment not equal to "..", that
1558 * "<segment>/.." is removed.
1559 *
1560 * To satisfy the "iterative" clause in (e), we need to collapse the
1561 * string every time we find something that needs to be removed. Thus,
1562 * we don't need to keep two pointers into the string: we only need a
1563 * "current position" pointer.
1564 */
1565 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001566 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001567
1568 /* At the beginning of each iteration of this loop, "cur" points to
1569 * the first character of the segment we want to examine.
1570 */
1571
1572 /* Find the end of the current segment. */
1573 segp = cur;
1574 while ((segp[0] != '/') && (segp[0] != '\0'))
1575 ++segp;
1576
1577 /* If this is the last segment, we're done (we need at least two
1578 * segments to meet the criteria for the (e) and (f) cases).
1579 */
1580 if (segp[0] == '\0')
1581 break;
1582
1583 /* If the first segment is "..", or if the next segment _isn't_ "..",
1584 * keep this segment and try the next one.
1585 */
1586 ++segp;
1587 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1588 || ((segp[0] != '.') || (segp[1] != '.')
1589 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1590 cur = segp;
1591 continue;
1592 }
1593
1594 /* If we get here, remove this segment and the next one and back up
1595 * to the previous segment (if there is one), to implement the
1596 * "iteratively" clause. It's pretty much impossible to back up
1597 * while maintaining two pointers into the buffer, so just compact
1598 * the whole buffer now.
1599 */
1600
1601 /* If this is the end of the buffer, we're done. */
1602 if (segp[2] == '\0') {
1603 cur[0] = '\0';
1604 break;
1605 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001606 /* Valgrind complained, strcpy(cur, segp + 3); */
Nico Webercedf84d2012-03-05 16:36:59 +08001607 /* string will overlap, do not use strcpy */
1608 tmp = cur;
1609 segp += 3;
1610 while ((*tmp++ = *segp++) != 0)
1611 ;
Owen Taylor3473f882001-02-23 17:55:21 +00001612
1613 /* If there are no previous segments, then keep going from here. */
1614 segp = cur;
1615 while ((segp > path) && ((--segp)[0] == '/'))
1616 ;
1617 if (segp == path)
1618 continue;
1619
1620 /* "segp" is pointing to the end of a previous segment; find it's
1621 * start. We need to back up to the previous segment and start
1622 * over with that to handle things like "foo/bar/../..". If we
1623 * don't do this, then on the first pass we'll remove the "bar/..",
1624 * but be pointing at the second ".." so we won't realize we can also
1625 * remove the "foo/..".
1626 */
1627 cur = segp;
1628 while ((cur > path) && (cur[-1] != '/'))
1629 --cur;
1630 }
1631 out[0] = '\0';
1632
1633 /*
1634 * g) If the resulting buffer string still begins with one or more
1635 * complete path segments of "..", then the reference is
1636 * considered to be in error. Implementations may handle this
1637 * error by retaining these components in the resolved path (i.e.,
1638 * treating them as part of the final URI), by removing them from
1639 * the resolved path (i.e., discarding relative levels above the
1640 * root), or by avoiding traversal of the reference.
1641 *
1642 * We discard them from the final path.
1643 */
1644 if (path[0] == '/') {
1645 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001646 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001647 && ((cur[3] == '/') || (cur[3] == '\0')))
1648 cur += 3;
1649
1650 if (cur != path) {
1651 out = path;
1652 while (cur[0] != '\0')
1653 (out++)[0] = (cur++)[0];
1654 out[0] = 0;
1655 }
1656 }
1657
1658 return(0);
1659}
Owen Taylor3473f882001-02-23 17:55:21 +00001660
Daniel Veillard966a31e2004-05-09 02:58:44 +00001661static int is_hex(char c) {
1662 if (((c >= '0') && (c <= '9')) ||
1663 ((c >= 'a') && (c <= 'f')) ||
1664 ((c >= 'A') && (c <= 'F')))
1665 return(1);
1666 return(0);
1667}
1668
Owen Taylor3473f882001-02-23 17:55:21 +00001669/**
1670 * xmlURIUnescapeString:
1671 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001672 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001673 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001674 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001675 * Unescaping routine, but does not check that the string is an URI. The
1676 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001677 * Note that the length of the result can only be smaller or same size as
1678 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001679 *
Daniel Veillard79187652007-04-24 10:19:52 +00001680 * Returns a copy of the string, but unescaped, will return NULL only in case
1681 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001682 */
1683char *
1684xmlURIUnescapeString(const char *str, int len, char *target) {
1685 char *ret, *out;
1686 const char *in;
1687
1688 if (str == NULL)
1689 return(NULL);
1690 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001691 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001692
1693 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001694 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001695 if (ret == NULL) {
1696 xmlGenericError(xmlGenericErrorContext,
1697 "xmlURIUnescapeString: out of memory\n");
1698 return(NULL);
1699 }
1700 } else
1701 ret = target;
1702 in = str;
1703 out = ret;
1704 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001705 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001706 in++;
1707 if ((*in >= '0') && (*in <= '9'))
1708 *out = (*in - '0');
1709 else if ((*in >= 'a') && (*in <= 'f'))
1710 *out = (*in - 'a') + 10;
1711 else if ((*in >= 'A') && (*in <= 'F'))
1712 *out = (*in - 'A') + 10;
1713 in++;
1714 if ((*in >= '0') && (*in <= '9'))
1715 *out = *out * 16 + (*in - '0');
1716 else if ((*in >= 'a') && (*in <= 'f'))
1717 *out = *out * 16 + (*in - 'a') + 10;
1718 else if ((*in >= 'A') && (*in <= 'F'))
1719 *out = *out * 16 + (*in - 'A') + 10;
1720 in++;
1721 len -= 3;
1722 out++;
1723 } else {
1724 *out++ = *in++;
1725 len--;
1726 }
1727 }
1728 *out = 0;
1729 return(ret);
1730}
1731
1732/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001733 * xmlURIEscapeStr:
1734 * @str: string to escape
1735 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001736 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001737 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1738 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001739 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001740 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001741 */
1742xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001743xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1744 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001745 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001746 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +00001747
Owen Taylor3473f882001-02-23 17:55:21 +00001748 unsigned int len, out;
1749
1750 if (str == NULL)
1751 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001752 if (str[0] == 0)
1753 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001754 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001755 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001756
1757 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001758 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001759 if (ret == NULL) {
1760 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001761 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001762 return(NULL);
1763 }
1764 in = (const xmlChar *) str;
1765 out = 0;
1766 while(*in != 0) {
1767 if (len - out <= 3) {
1768 len += 20;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001769 temp = (xmlChar *) xmlRealloc(ret, len);
1770 if (temp == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001771 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001772 "xmlURIEscapeStr: out of memory\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001773 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001774 return(NULL);
1775 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001776 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001777 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001778
1779 ch = *in;
1780
Daniel Veillardeb475a32002-04-14 22:00:22 +00001781 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001782 unsigned char val;
1783 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001784 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001785 if (val <= 9)
1786 ret[out++] = '0' + val;
1787 else
1788 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001789 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001790 if (val <= 9)
1791 ret[out++] = '0' + val;
1792 else
1793 ret[out++] = 'A' + val - 0xA;
1794 in++;
1795 } else {
1796 ret[out++] = *in++;
1797 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001798
Owen Taylor3473f882001-02-23 17:55:21 +00001799 }
1800 ret[out] = 0;
1801 return(ret);
1802}
1803
Daniel Veillard8514c672001-05-23 10:29:12 +00001804/**
1805 * xmlURIEscape:
1806 * @str: the string of the URI to escape
1807 *
1808 * Escaping routine, does not do validity checks !
1809 * It will try to escape the chars needing this, but this is heuristic
1810 * based it's impossible to be sure.
1811 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001812 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001813 *
1814 * 25 May 2001
1815 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1816 * according to RFC2396.
1817 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001818 */
1819xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001820xmlURIEscape(const xmlChar * str)
1821{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001822 xmlChar *ret, *segment = NULL;
1823 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001824 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001825
Daniel Veillard6278fb52001-05-25 07:38:41 +00001826#define NULLCHK(p) if(!p) { \
1827 xmlGenericError(xmlGenericErrorContext, \
1828 "xmlURIEscape: out of memory\n"); \
Daniel Veillarded86dc22008-04-24 11:58:41 +00001829 xmlFreeURI(uri); \
1830 return NULL; } \
Daniel Veillard6278fb52001-05-25 07:38:41 +00001831
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001832 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001833 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001834
1835 uri = xmlCreateURI();
1836 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001837 /*
1838 * Allow escaping errors in the unescaped form
1839 */
1840 uri->cleanup = 1;
1841 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001842 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001843 xmlFreeURI(uri);
1844 return (NULL);
1845 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001846 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001847
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001848 if (!uri)
1849 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001850
1851 ret = NULL;
1852
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001853 if (uri->scheme) {
1854 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1855 NULLCHK(segment)
1856 ret = xmlStrcat(ret, segment);
1857 ret = xmlStrcat(ret, BAD_CAST ":");
1858 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001859 }
1860
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001861 if (uri->authority) {
1862 segment =
1863 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1864 NULLCHK(segment)
1865 ret = xmlStrcat(ret, BAD_CAST "//");
1866 ret = xmlStrcat(ret, segment);
1867 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001868 }
1869
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001870 if (uri->user) {
1871 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1872 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001873 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001874 ret = xmlStrcat(ret, segment);
1875 ret = xmlStrcat(ret, BAD_CAST "@");
1876 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001877 }
1878
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001879 if (uri->server) {
1880 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1881 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001882 if (uri->user == NULL)
Daniel Veillardd7af5552008-08-04 15:29:44 +00001883 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001884 ret = xmlStrcat(ret, segment);
1885 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001886 }
1887
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001888 if (uri->port) {
1889 xmlChar port[10];
1890
Daniel Veillard43d3f612001-11-10 11:57:23 +00001891 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001892 ret = xmlStrcat(ret, BAD_CAST ":");
1893 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001894 }
1895
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001896 if (uri->path) {
1897 segment =
1898 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1899 NULLCHK(segment)
1900 ret = xmlStrcat(ret, segment);
1901 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001902 }
1903
Daniel Veillarda1413b82007-04-26 08:33:28 +00001904 if (uri->query_raw) {
1905 ret = xmlStrcat(ret, BAD_CAST "?");
1906 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1907 }
1908 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001909 segment =
1910 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1911 NULLCHK(segment)
1912 ret = xmlStrcat(ret, BAD_CAST "?");
1913 ret = xmlStrcat(ret, segment);
1914 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001915 }
1916
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001917 if (uri->opaque) {
1918 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1919 NULLCHK(segment)
1920 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001921 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001922 }
1923
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001924 if (uri->fragment) {
1925 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1926 NULLCHK(segment)
1927 ret = xmlStrcat(ret, BAD_CAST "#");
1928 ret = xmlStrcat(ret, segment);
1929 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001930 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001931
1932 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001933#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001934
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001935 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001936}
1937
Owen Taylor3473f882001-02-23 17:55:21 +00001938/************************************************************************
1939 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001940 * Public functions *
1941 * *
1942 ************************************************************************/
1943
1944/**
1945 * xmlBuildURI:
1946 * @URI: the URI instance found in the document
1947 * @base: the base value
1948 *
1949 * Computes he final URI of the reference done by checking that
1950 * the given URI is valid, and building the final URI using the
1951 * base URI. This is processed according to section 5.2 of the
1952 * RFC 2396
1953 *
1954 * 5.2. Resolving Relative References to Absolute Form
1955 *
1956 * Returns a new URI string (to be freed by the caller) or NULL in case
1957 * of error.
1958 */
1959xmlChar *
1960xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1961 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001962 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001963 xmlURIPtr ref = NULL;
1964 xmlURIPtr bas = NULL;
1965 xmlURIPtr res = NULL;
1966
1967 /*
1968 * 1) The URI reference is parsed into the potential four components and
1969 * fragment identifier, as described in Section 4.3.
1970 *
1971 * NOTE that a completely empty URI is treated by modern browsers
1972 * as a reference to "." rather than as a synonym for the current
1973 * URI. Should we do that here?
1974 */
1975 if (URI == NULL)
1976 ret = -1;
1977 else {
1978 if (*URI) {
1979 ref = xmlCreateURI();
1980 if (ref == NULL)
1981 goto done;
1982 ret = xmlParseURIReference(ref, (const char *) URI);
1983 }
1984 else
1985 ret = 0;
1986 }
1987 if (ret != 0)
1988 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001989 if ((ref != NULL) && (ref->scheme != NULL)) {
1990 /*
1991 * The URI is absolute don't modify.
1992 */
1993 val = xmlStrdup(URI);
1994 goto done;
1995 }
Owen Taylor3473f882001-02-23 17:55:21 +00001996 if (base == NULL)
1997 ret = -1;
1998 else {
1999 bas = xmlCreateURI();
2000 if (bas == NULL)
2001 goto done;
2002 ret = xmlParseURIReference(bas, (const char *) base);
2003 }
2004 if (ret != 0) {
2005 if (ref)
2006 val = xmlSaveUri(ref);
2007 goto done;
2008 }
2009 if (ref == NULL) {
2010 /*
2011 * the base fragment must be ignored
2012 */
2013 if (bas->fragment != NULL) {
2014 xmlFree(bas->fragment);
2015 bas->fragment = NULL;
2016 }
2017 val = xmlSaveUri(bas);
2018 goto done;
2019 }
2020
2021 /*
2022 * 2) If the path component is empty and the scheme, authority, and
2023 * query components are undefined, then it is a reference to the
2024 * current document and we are done. Otherwise, the reference URI's
2025 * query and fragment components are defined as found (or not found)
2026 * within the URI reference and not inherited from the base URI.
2027 *
2028 * NOTE that in modern browsers, the parsing differs from the above
2029 * in the following aspect: the query component is allowed to be
2030 * defined while still treating this as a reference to the current
2031 * document.
2032 */
2033 res = xmlCreateURI();
2034 if (res == NULL)
2035 goto done;
2036 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2037 ((ref->authority == NULL) && (ref->server == NULL))) {
2038 if (bas->scheme != NULL)
2039 res->scheme = xmlMemStrdup(bas->scheme);
2040 if (bas->authority != NULL)
2041 res->authority = xmlMemStrdup(bas->authority);
2042 else if (bas->server != NULL) {
2043 res->server = xmlMemStrdup(bas->server);
2044 if (bas->user != NULL)
2045 res->user = xmlMemStrdup(bas->user);
2046 res->port = bas->port;
2047 }
2048 if (bas->path != NULL)
2049 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002050 if (ref->query_raw != NULL)
2051 res->query_raw = xmlMemStrdup (ref->query_raw);
2052 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00002053 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002054 else if (bas->query_raw != NULL)
2055 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00002056 else if (bas->query != NULL)
2057 res->query = xmlMemStrdup(bas->query);
2058 if (ref->fragment != NULL)
2059 res->fragment = xmlMemStrdup(ref->fragment);
2060 goto step_7;
2061 }
Owen Taylor3473f882001-02-23 17:55:21 +00002062
2063 /*
2064 * 3) If the scheme component is defined, indicating that the reference
2065 * starts with a scheme name, then the reference is interpreted as an
2066 * absolute URI and we are done. Otherwise, the reference URI's
2067 * scheme is inherited from the base URI's scheme component.
2068 */
2069 if (ref->scheme != NULL) {
2070 val = xmlSaveUri(ref);
2071 goto done;
2072 }
2073 if (bas->scheme != NULL)
2074 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00002075
Daniel Veillarda1413b82007-04-26 08:33:28 +00002076 if (ref->query_raw != NULL)
2077 res->query_raw = xmlMemStrdup(ref->query_raw);
2078 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002079 res->query = xmlMemStrdup(ref->query);
2080 if (ref->fragment != NULL)
2081 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002082
2083 /*
2084 * 4) If the authority component is defined, then the reference is a
2085 * network-path and we skip to step 7. Otherwise, the reference
2086 * URI's authority is inherited from the base URI's authority
2087 * component, which will also be undefined if the URI scheme does not
2088 * use an authority component.
2089 */
2090 if ((ref->authority != NULL) || (ref->server != NULL)) {
2091 if (ref->authority != NULL)
2092 res->authority = xmlMemStrdup(ref->authority);
2093 else {
2094 res->server = xmlMemStrdup(ref->server);
2095 if (ref->user != NULL)
2096 res->user = xmlMemStrdup(ref->user);
2097 res->port = ref->port;
2098 }
2099 if (ref->path != NULL)
2100 res->path = xmlMemStrdup(ref->path);
2101 goto step_7;
2102 }
2103 if (bas->authority != NULL)
2104 res->authority = xmlMemStrdup(bas->authority);
2105 else if (bas->server != NULL) {
2106 res->server = xmlMemStrdup(bas->server);
2107 if (bas->user != NULL)
2108 res->user = xmlMemStrdup(bas->user);
2109 res->port = bas->port;
2110 }
2111
2112 /*
2113 * 5) If the path component begins with a slash character ("/"), then
2114 * the reference is an absolute-path and we skip to step 7.
2115 */
2116 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2117 res->path = xmlMemStrdup(ref->path);
2118 goto step_7;
2119 }
2120
2121
2122 /*
2123 * 6) If this step is reached, then we are resolving a relative-path
2124 * reference. The relative path needs to be merged with the base
2125 * URI's path. Although there are many ways to do this, we will
2126 * describe a simple method using a separate string buffer.
2127 *
2128 * Allocate a buffer large enough for the result string.
2129 */
2130 len = 2; /* extra / and 0 */
2131 if (ref->path != NULL)
2132 len += strlen(ref->path);
2133 if (bas->path != NULL)
2134 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002135 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002136 if (res->path == NULL) {
2137 xmlGenericError(xmlGenericErrorContext,
2138 "xmlBuildURI: out of memory\n");
2139 goto done;
2140 }
2141 res->path[0] = 0;
2142
2143 /*
2144 * a) All but the last segment of the base URI's path component is
2145 * copied to the buffer. In other words, any characters after the
2146 * last (right-most) slash character, if any, are excluded.
2147 */
2148 cur = 0;
2149 out = 0;
2150 if (bas->path != NULL) {
2151 while (bas->path[cur] != 0) {
2152 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2153 cur++;
2154 if (bas->path[cur] == 0)
2155 break;
2156
2157 cur++;
2158 while (out < cur) {
2159 res->path[out] = bas->path[out];
2160 out++;
2161 }
2162 }
2163 }
2164 res->path[out] = 0;
2165
2166 /*
2167 * b) The reference's path component is appended to the buffer
2168 * string.
2169 */
2170 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002171 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002172 /*
2173 * Ensure the path includes a '/'
2174 */
2175 if ((out == 0) && (bas->server != NULL))
2176 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002177 while (ref->path[indx] != 0) {
2178 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002179 }
2180 }
2181 res->path[out] = 0;
2182
2183 /*
2184 * Steps c) to h) are really path normalization steps
2185 */
2186 xmlNormalizeURIPath(res->path);
2187
2188step_7:
2189
2190 /*
2191 * 7) The resulting URI components, including any inherited from the
2192 * base URI, are recombined to give the absolute form of the URI
2193 * reference.
2194 */
2195 val = xmlSaveUri(res);
2196
2197done:
2198 if (ref != NULL)
2199 xmlFreeURI(ref);
2200 if (bas != NULL)
2201 xmlFreeURI(bas);
2202 if (res != NULL)
2203 xmlFreeURI(res);
2204 return(val);
2205}
2206
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002207/**
William M. Brackf7789b12004-06-07 08:57:27 +00002208 * xmlBuildRelativeURI:
2209 * @URI: the URI reference under consideration
2210 * @base: the base value
2211 *
2212 * Expresses the URI of the reference in terms relative to the
2213 * base. Some examples of this operation include:
2214 * base = "http://site1.com/docs/book1.html"
2215 * URI input URI returned
2216 * docs/pic1.gif pic1.gif
2217 * docs/img/pic1.gif img/pic1.gif
2218 * img/pic1.gif ../img/pic1.gif
2219 * http://site1.com/docs/pic1.gif pic1.gif
2220 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2221 *
2222 * base = "docs/book1.html"
2223 * URI input URI returned
2224 * docs/pic1.gif pic1.gif
2225 * docs/img/pic1.gif img/pic1.gif
2226 * img/pic1.gif ../img/pic1.gif
2227 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2228 *
2229 *
2230 * Note: if the URI reference is really wierd or complicated, it may be
2231 * worthwhile to first convert it into a "nice" one by calling
2232 * xmlBuildURI (using 'base') before calling this routine,
2233 * since this routine (for reasonable efficiency) assumes URI has
2234 * already been through some validation.
2235 *
2236 * Returns a new URI string (to be freed by the caller) or NULL in case
2237 * error.
2238 */
2239xmlChar *
2240xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2241{
2242 xmlChar *val = NULL;
2243 int ret;
2244 int ix;
2245 int pos = 0;
2246 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002247 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002248 xmlURIPtr ref = NULL;
2249 xmlURIPtr bas = NULL;
2250 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002251 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002252
2253 if ((URI == NULL) || (*URI == 0))
2254 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002255
2256 /*
2257 * First parse URI into a standard form
2258 */
2259 ref = xmlCreateURI ();
2260 if (ref == NULL)
2261 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002262 /* If URI not already in "relative" form */
2263 if (URI[0] != '.') {
2264 ret = xmlParseURIReference (ref, (const char *) URI);
2265 if (ret != 0)
2266 goto done; /* Error in URI, return NULL */
2267 } else
2268 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002269
2270 /*
2271 * Next parse base into the same standard form
2272 */
2273 if ((base == NULL) || (*base == 0)) {
2274 val = xmlStrdup (URI);
2275 goto done;
2276 }
2277 bas = xmlCreateURI ();
2278 if (bas == NULL)
2279 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002280 if (base[0] != '.') {
2281 ret = xmlParseURIReference (bas, (const char *) base);
2282 if (ret != 0)
2283 goto done; /* Error in base, return NULL */
2284 } else
2285 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002286
2287 /*
2288 * If the scheme / server on the URI differs from the base,
2289 * just return the URI
2290 */
2291 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002292 ((bas->scheme == NULL) ||
2293 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2294 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002295 val = xmlStrdup (URI);
2296 goto done;
2297 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002298 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2299 val = xmlStrdup(BAD_CAST "");
2300 goto done;
2301 }
2302 if (bas->path == NULL) {
2303 val = xmlStrdup((xmlChar *)ref->path);
2304 goto done;
2305 }
2306 if (ref->path == NULL) {
2307 ref->path = (char *) "/";
2308 remove_path = 1;
2309 }
William M. Brackf7789b12004-06-07 08:57:27 +00002310
2311 /*
2312 * At this point (at last!) we can compare the two paths
2313 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002314 * First we take care of the special case where either of the
2315 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002316 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002317 if (bas->path == NULL) {
2318 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002319 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002320 if (*uptr == '/')
2321 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002322 /* exception characters from xmlSaveUri */
2323 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002324 }
2325 goto done;
2326 }
William M. Brackf7789b12004-06-07 08:57:27 +00002327 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002328 if (ref->path == NULL) {
2329 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002330 if (bptr[ix] == '/')
2331 nbslash++;
2332 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002333 uptr = NULL;
2334 len = 1; /* this is for a string terminator only */
2335 } else {
2336 /*
2337 * Next we compare the two strings and find where they first differ
2338 */
2339 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2340 pos += 2;
2341 if ((*bptr == '.') && (bptr[1] == '/'))
2342 bptr += 2;
2343 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2344 bptr++;
2345 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2346 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002347
William M. Brack820d5ed2005-09-14 05:24:27 +00002348 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002349 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002350 goto done; /* (I can't imagine why anyone would do this) */
2351 }
2352
2353 /*
2354 * In URI, "back up" to the last '/' encountered. This will be the
2355 * beginning of the "unique" suffix of URI
2356 */
2357 ix = pos;
2358 if ((ref->path[ix] == '/') && (ix > 0))
2359 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002360 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2361 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002362 for (; ix > 0; ix--) {
2363 if (ref->path[ix] == '/')
2364 break;
2365 }
2366 if (ix == 0) {
2367 uptr = (xmlChar *)ref->path;
2368 } else {
2369 ix++;
2370 uptr = (xmlChar *)&ref->path[ix];
2371 }
2372
2373 /*
2374 * In base, count the number of '/' from the differing point
2375 */
2376 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2377 for (; bptr[ix] != 0; ix++) {
2378 if (bptr[ix] == '/')
2379 nbslash++;
2380 }
2381 }
2382 len = xmlStrlen (uptr) + 1;
2383 }
2384
William M. Brackf7789b12004-06-07 08:57:27 +00002385 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002386 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002387 /* exception characters from xmlSaveUri */
2388 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002389 goto done;
2390 }
William M. Brackf7789b12004-06-07 08:57:27 +00002391
2392 /*
2393 * Allocate just enough space for the returned string -
2394 * length of the remainder of the URI, plus enough space
2395 * for the "../" groups, plus one for the terminator
2396 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002397 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002398 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002399 xmlGenericError(xmlGenericErrorContext,
2400 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002401 goto done;
2402 }
2403 vptr = val;
2404 /*
2405 * Put in as many "../" as needed
2406 */
2407 for (; nbslash>0; nbslash--) {
2408 *vptr++ = '.';
2409 *vptr++ = '.';
2410 *vptr++ = '/';
2411 }
2412 /*
2413 * Finish up with the end of the URI
2414 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002415 if (uptr != NULL) {
2416 if ((vptr > val) && (len > 0) &&
2417 (uptr[0] == '/') && (vptr[-1] == '/')) {
2418 memcpy (vptr, uptr + 1, len - 1);
2419 vptr[len - 2] = 0;
2420 } else {
2421 memcpy (vptr, uptr, len);
2422 vptr[len - 1] = 0;
2423 }
2424 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002425 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002426 }
William M. Brackf7789b12004-06-07 08:57:27 +00002427
William M. Brack50420192007-07-20 01:09:08 +00002428 /* escape the freshly-built path */
2429 vptr = val;
2430 /* exception characters from xmlSaveUri */
2431 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2432 xmlFree(vptr);
2433
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002434done:
William M. Brackf7789b12004-06-07 08:57:27 +00002435 /*
2436 * Free the working variables
2437 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002438 if (remove_path != 0)
2439 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002440 if (ref != NULL)
2441 xmlFreeURI (ref);
2442 if (bas != NULL)
2443 xmlFreeURI (bas);
2444
2445 return val;
2446}
2447
2448/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002449 * xmlCanonicPath:
2450 * @path: the resource locator in a filesystem notation
2451 *
2452 * Constructs a canonic path from the specified path.
2453 *
2454 * Returns a new canonic path, or a duplicate of the path parameter if the
2455 * construction fails. The caller is responsible for freeing the memory occupied
2456 * by the returned string. If there is insufficient memory available, or the
2457 * argument is NULL, the function returns NULL.
2458 */
2459#define IS_WINDOWS_PATH(p) \
2460 ((p != NULL) && \
2461 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2462 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2463 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002464xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002465xmlCanonicPath(const xmlChar *path)
2466{
William M. Brack22242272007-01-27 07:59:37 +00002467/*
2468 * For Windows implementations, additional work needs to be done to
2469 * replace backslashes in pathnames with "forward slashes"
2470 */
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002471#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002472 int len = 0;
2473 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002474 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002475#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002476 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002477 xmlChar *ret;
2478 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002479
2480 if (path == NULL)
2481 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002482
2483 /* sanitize filename starting with // so it can be used as URI */
2484 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2485 path++;
2486
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002487 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002488 xmlFreeURI(uri);
2489 return xmlStrdup(path);
2490 }
2491
William M. Brack22242272007-01-27 07:59:37 +00002492 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002493 absuri = xmlStrstr(path, BAD_CAST "://");
2494 if (absuri != NULL) {
2495 int l, j;
2496 unsigned char c;
2497 xmlChar *escURI;
2498
2499 /*
2500 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002501 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002502 * part matches a protocol.
2503 */
2504 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002505 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002506 if ((l <= 0) || (l > 20))
2507 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002508 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002509 for (j = 0;j < l;j++) {
2510 c = path[j];
2511 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2512 goto path_processing;
2513 }
2514
William M. Brack22242272007-01-27 07:59:37 +00002515 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002516 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2517 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002518 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002519 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002520 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002521 if (uri != NULL) {
2522 xmlFreeURI(uri);
2523 return escURI;
2524 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002525 }
2526 }
2527
2528path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002529/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002530#if defined(_WIN32) && !defined(__CYGWIN__)
2531 /*
William M. Brack22242272007-01-27 07:59:37 +00002532 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002533 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002534 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002535 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002536 return(NULL);
2537 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002538
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002539 len = xmlStrlen(path);
2540 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002541 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002542 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002543 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002544 uri->path = xmlMallocAtomic(len + 2);
2545 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002546 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002547 return(NULL);
2548 }
William M. Brack22242272007-01-27 07:59:37 +00002549 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002550 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002551 p = uri->path + 1;
2552 strncpy(p, path, len + 1);
2553 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002554 uri->path = xmlStrdup(path);
2555 if (uri->path == NULL) {
2556 xmlFreeURI(uri);
2557 return(NULL);
2558 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002559 p = uri->path;
2560 }
William M. Brack22242272007-01-27 07:59:37 +00002561 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002562 while (*p != '\0') {
2563 if (*p == '\\')
2564 *p = '/';
2565 p++;
2566 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002567
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002568 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002569 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002570 } else {
2571 ret = xmlSaveUri(uri);
2572 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002573
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002574 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002575#else
2576 ret = xmlStrdup((const xmlChar *) path);
2577#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002578 return(ret);
2579}
Owen Taylor3473f882001-02-23 17:55:21 +00002580
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002581/**
2582 * xmlPathToURI:
2583 * @path: the resource locator in a filesystem notation
2584 *
2585 * Constructs an URI expressing the existing path
2586 *
2587 * Returns a new URI, or a duplicate of the path parameter if the
2588 * construction fails. The caller is responsible for freeing the memory
2589 * occupied by the returned string. If there is insufficient memory available,
2590 * or the argument is NULL, the function returns NULL.
2591 */
2592xmlChar *
2593xmlPathToURI(const xmlChar *path)
2594{
2595 xmlURIPtr uri;
2596 xmlURI temp;
2597 xmlChar *ret, *cal;
2598
2599 if (path == NULL)
2600 return(NULL);
2601
2602 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2603 xmlFreeURI(uri);
2604 return xmlStrdup(path);
2605 }
2606 cal = xmlCanonicPath(path);
2607 if (cal == NULL)
2608 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002609#if defined(_WIN32) && !defined(__CYGWIN__)
2610 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2611 If 'cal' is a valid URI allready then we are done here, as continuing would make
2612 it invalid. */
2613 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2614 xmlFreeURI(uri);
2615 return cal;
2616 }
2617 /* 'cal' can contain a relative path with backslashes. If that is processed
2618 by xmlSaveURI, they will be escaped and the external entity loader machinery
2619 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2620 ret = cal;
2621 while (*ret != '\0') {
2622 if (*ret == '\\')
2623 *ret = '/';
2624 ret++;
2625 }
2626#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002627 memset(&temp, 0, sizeof(temp));
2628 temp.path = (char *) cal;
2629 ret = xmlSaveUri(&temp);
2630 xmlFree(cal);
2631 return(ret);
2632}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002633#define bottom_uri
2634#include "elfgcchack.h"