blob: ff47abbecb1d0fc0f4161d3596e2840a6970ffaa [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
Daniel Veillard57560382012-07-24 11:44:23 +08002 * uri.c: set of generic URI related routines
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
Daniel Veillard57560382012-07-24 11:44:23 +080021/**
22 * MAX_URI_LENGTH:
23 *
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
32 */
33#define MAX_URI_LENGTH 1024 * 1024
34
35static void
36xmlURIErrMemory(const char *extra)
37{
38 if (extra)
39 __xmlRaiseError(NULL, NULL, NULL,
40 NULL, NULL, XML_FROM_URI,
41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42 extra, NULL, NULL, 0, 0,
43 "Memory allocation failed : %s\n", extra);
44 else
45 __xmlRaiseError(NULL, NULL, NULL,
46 NULL, NULL, XML_FROM_URI,
47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48 NULL, NULL, NULL, 0, 0,
49 "Memory allocation failed\n");
50}
51
Daniel Veillardd7af5552008-08-04 15:29:44 +000052static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000053
54/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000055 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000056 * alpha = lowalpha | upalpha
57 */
58#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61/*
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
65 */
66
67#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69/*
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
73 */
74#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
Daniel Veillardbe3eb202004-07-09 12:05:25 +000076#ifdef IS_DIGIT
77#undef IS_DIGIT
78#endif
Owen Taylor3473f882001-02-23 17:55:21 +000079/*
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81 */
Owen Taylor3473f882001-02-23 17:55:21 +000082#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84/*
85 * alphanum = alpha | digit
86 */
87
88#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90/*
Owen Taylor3473f882001-02-23 17:55:21 +000091 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92 */
93
Daniel Veillardd7af5552008-08-04 15:29:44 +000094#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000096 ((x) == '(') || ((x) == ')'))
97
Owen Taylor3473f882001-02-23 17:55:21 +000098/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000099 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +0000100 */
101
Daniel Veillardd7af5552008-08-04 15:29:44 +0000102#define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
106/*
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108 * "[" | "]"
109 */
110
111#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +0000115
116/*
117 * unreserved = alphanum | mark
118 */
119
120#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122/*
Owen Taylor3473f882001-02-23 17:55:21 +0000123 * Skip to next pointer char, handle escaped sequences
124 */
125
126#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128/*
129 * Productions from the spec.
130 *
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
134 *
135 * path = [ abs_path | opaque_part ]
136 */
137
Daniel Veillard336a8e12005-08-07 10:46:19 +0000138#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
Owen Taylor3473f882001-02-23 17:55:21 +0000140/************************************************************************
141 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000142 * RFC 3986 parser *
143 * *
144 ************************************************************************/
145
146#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149#define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
152
153/*
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
156 */
157#define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Daniel Veillard2ee91eb2010-06-04 09:14:16 +0800161 ((*(p) == '=')) || ((*(p) == '\'')))
Daniel Veillardd7af5552008-08-04 15:29:44 +0000162
163/*
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165 */
166#define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
169 ((*(p) == '@')))
170
171/*
172 * reserved = gen-delims / sub-delims
173 */
174#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176/*
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178 */
179#define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183/*
184 * pct-encoded = "%" HEXDIG HEXDIG
185 */
186#define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189/*
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191 */
192#define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
195
196/**
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
200 *
201 * Parse an URI scheme
202 *
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204 *
205 * Returns 0 or the error code
206 */
207static int
208xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209 const char *cur;
210
211 if (str == NULL)
212 return(-1);
213
214 cur = *str;
215 if (!ISA_ALPHA(cur))
216 return(2);
217 cur++;
218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220 if (uri != NULL) {
221 if (uri->scheme != NULL) xmlFree(uri->scheme);
222 uri->scheme = STRNDUP(*str, cur - *str);
223 }
224 *str = cur;
225 return(0);
226}
227
228/**
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
232 *
233 * Parse the query part of an URI
234 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000240 *
241 * Returns 0 or the error code
242 */
243static int
244xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245{
246 const char *cur;
247
248 if (str == NULL)
249 return (-1);
250
251 cur = *str;
252
253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000254 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256 NEXT(cur);
257 if (uri != NULL) {
258 if (uri->fragment != NULL)
259 xmlFree(uri->fragment);
260 if (uri->cleanup & 2)
261 uri->fragment = STRNDUP(*str, cur - *str);
262 else
263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264 }
265 *str = cur;
266 return (0);
267}
268
269/**
270 * xmlParse3986Query:
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
273 *
274 * Parse the query part of an URI
275 *
276 * query = *uric
277 *
278 * Returns 0 or the error code
279 */
280static int
281xmlParse3986Query(xmlURIPtr uri, const char **str)
282{
283 const char *cur;
284
285 if (str == NULL)
286 return (-1);
287
288 cur = *str;
289
290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292 NEXT(cur);
293 if (uri != NULL) {
294 if (uri->query != NULL)
295 xmlFree(uri->query);
296 if (uri->cleanup & 2)
297 uri->query = STRNDUP(*str, cur - *str);
298 else
299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 */
304 if (uri->query_raw != NULL)
305 xmlFree (uri->query_raw);
306 uri->query_raw = STRNDUP (*str, cur - *str);
307 }
308 *str = cur;
309 return (0);
310}
311
312/**
313 * xmlParse3986Port:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
317 * Parse a port part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * port = *DIGIT
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Port(xmlURIPtr uri, const char **str)
326{
327 const char *cur = *str;
328
329 if (ISA_DIGIT(cur)) {
330 if (uri != NULL)
331 uri->port = 0;
332 while (ISA_DIGIT(cur)) {
333 if (uri != NULL)
334 uri->port = uri->port * 10 + (*cur - '0');
335 cur++;
336 }
337 *str = cur;
338 return(0);
339 }
340 return(1);
341}
342
343/**
344 * xmlParse3986Userinfo:
345 * @uri: pointer to an URI structure
346 * @str: the string to analyze
347 *
348 * Parse an user informations part and fills in the appropriate fields
349 * of the @uri structure
350 *
351 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
352 *
353 * Returns 0 or the error code
354 */
355static int
356xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
357{
358 const char *cur;
359
360 cur = *str;
361 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
362 ISA_SUB_DELIM(cur) || (*cur == ':'))
363 NEXT(cur);
364 if (*cur == '@') {
365 if (uri != NULL) {
366 if (uri->user != NULL) xmlFree(uri->user);
367 if (uri->cleanup & 2)
368 uri->user = STRNDUP(*str, cur - *str);
369 else
370 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
371 }
372 *str = cur;
373 return(0);
374 }
375 return(1);
376}
377
378/**
379 * xmlParse3986DecOctet:
380 * @str: the string to analyze
381 *
382 * dec-octet = DIGIT ; 0-9
383 * / %x31-39 DIGIT ; 10-99
384 * / "1" 2DIGIT ; 100-199
385 * / "2" %x30-34 DIGIT ; 200-249
386 * / "25" %x30-35 ; 250-255
387 *
388 * Skip a dec-octet.
389 *
390 * Returns 0 if found and skipped, 1 otherwise
391 */
392static int
393xmlParse3986DecOctet(const char **str) {
394 const char *cur = *str;
395
396 if (!(ISA_DIGIT(cur)))
397 return(1);
398 if (!ISA_DIGIT(cur+1))
399 cur++;
400 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
401 cur += 2;
402 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
403 cur += 3;
404 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
405 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
406 cur += 3;
407 else if ((*cur == '2') && (*(cur + 1) == '5') &&
408 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
409 cur += 3;
410 else
411 return(1);
412 *str = cur;
413 return(0);
414}
415/**
416 * xmlParse3986Host:
417 * @uri: pointer to an URI structure
418 * @str: the string to analyze
419 *
420 * Parse an host part and fills in the appropriate fields
421 * of the @uri structure
422 *
423 * host = IP-literal / IPv4address / reg-name
424 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
425 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
426 * reg-name = *( unreserved / pct-encoded / sub-delims )
427 *
428 * Returns 0 or the error code
429 */
430static int
431xmlParse3986Host(xmlURIPtr uri, const char **str)
432{
433 const char *cur = *str;
434 const char *host;
435
436 host = cur;
437 /*
438 * IPv6 and future adressing scheme are enclosed between brackets
439 */
440 if (*cur == '[') {
441 cur++;
442 while ((*cur != ']') && (*cur != 0))
443 cur++;
444 if (*cur != ']')
445 return(1);
446 cur++;
447 goto found;
448 }
449 /*
450 * try to parse an IPv4
451 */
452 if (ISA_DIGIT(cur)) {
453 if (xmlParse3986DecOctet(&cur) != 0)
454 goto not_ipv4;
455 if (*cur != '.')
456 goto not_ipv4;
457 cur++;
458 if (xmlParse3986DecOctet(&cur) != 0)
459 goto not_ipv4;
460 if (*cur != '.')
461 goto not_ipv4;
462 if (xmlParse3986DecOctet(&cur) != 0)
463 goto not_ipv4;
464 if (*cur != '.')
465 goto not_ipv4;
466 if (xmlParse3986DecOctet(&cur) != 0)
467 goto not_ipv4;
468 goto found;
469not_ipv4:
470 cur = *str;
471 }
472 /*
473 * then this should be a hostname which can be empty
474 */
475 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
476 NEXT(cur);
477found:
478 if (uri != NULL) {
479 if (uri->authority != NULL) xmlFree(uri->authority);
480 uri->authority = NULL;
481 if (uri->server != NULL) xmlFree(uri->server);
482 if (cur != host) {
483 if (uri->cleanup & 2)
484 uri->server = STRNDUP(host, cur - host);
485 else
486 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
487 } else
488 uri->server = NULL;
489 }
490 *str = cur;
491 return(0);
492}
493
494/**
495 * xmlParse3986Authority:
496 * @uri: pointer to an URI structure
497 * @str: the string to analyze
498 *
499 * Parse an authority part and fills in the appropriate fields
500 * of the @uri structure
501 *
502 * authority = [ userinfo "@" ] host [ ":" port ]
503 *
504 * Returns 0 or the error code
505 */
506static int
507xmlParse3986Authority(xmlURIPtr uri, const char **str)
508{
509 const char *cur;
510 int ret;
511
512 cur = *str;
513 /*
514 * try to parse an userinfo and check for the trailing @
515 */
516 ret = xmlParse3986Userinfo(uri, &cur);
517 if ((ret != 0) || (*cur != '@'))
518 cur = *str;
519 else
520 cur++;
521 ret = xmlParse3986Host(uri, &cur);
522 if (ret != 0) return(ret);
523 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000524 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000525 ret = xmlParse3986Port(uri, &cur);
526 if (ret != 0) return(ret);
527 }
528 *str = cur;
529 return(0);
530}
531
532/**
533 * xmlParse3986Segment:
534 * @str: the string to analyze
535 * @forbid: an optional forbidden character
536 * @empty: allow an empty segment
537 *
538 * Parse a segment and fills in the appropriate fields
539 * of the @uri structure
540 *
541 * segment = *pchar
542 * segment-nz = 1*pchar
543 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544 * ; non-zero-length segment without any colon ":"
545 *
546 * Returns 0 or the error code
547 */
548static int
549xmlParse3986Segment(const char **str, char forbid, int empty)
550{
551 const char *cur;
552
553 cur = *str;
554 if (!ISA_PCHAR(cur)) {
555 if (empty)
556 return(0);
557 return(1);
558 }
559 while (ISA_PCHAR(cur) && (*cur != forbid))
560 NEXT(cur);
561 *str = cur;
562 return (0);
563}
564
565/**
566 * xmlParse3986PathAbEmpty:
567 * @uri: pointer to an URI structure
568 * @str: the string to analyze
569 *
570 * Parse an path absolute or empty and fills in the appropriate fields
571 * of the @uri structure
572 *
573 * path-abempty = *( "/" segment )
574 *
575 * Returns 0 or the error code
576 */
577static int
578xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
579{
580 const char *cur;
581 int ret;
582
583 cur = *str;
584
585 while (*cur == '/') {
586 cur++;
587 ret = xmlParse3986Segment(&cur, 0, 1);
588 if (ret != 0) return(ret);
589 }
590 if (uri != NULL) {
591 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200592 if (*str != cur) {
593 if (uri->cleanup & 2)
594 uri->path = STRNDUP(*str, cur - *str);
595 else
596 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
597 } else {
598 uri->path = NULL;
599 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000600 }
601 *str = cur;
602 return (0);
603}
604
605/**
606 * xmlParse3986PathAbsolute:
607 * @uri: pointer to an URI structure
608 * @str: the string to analyze
609 *
610 * Parse an path absolute and fills in the appropriate fields
611 * of the @uri structure
612 *
613 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
614 *
615 * Returns 0 or the error code
616 */
617static int
618xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
619{
620 const char *cur;
621 int ret;
622
623 cur = *str;
624
625 if (*cur != '/')
626 return(1);
627 cur++;
628 ret = xmlParse3986Segment(&cur, 0, 0);
629 if (ret == 0) {
630 while (*cur == '/') {
631 cur++;
632 ret = xmlParse3986Segment(&cur, 0, 1);
633 if (ret != 0) return(ret);
634 }
635 }
636 if (uri != NULL) {
637 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200638 if (cur != *str) {
639 if (uri->cleanup & 2)
640 uri->path = STRNDUP(*str, cur - *str);
641 else
642 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
643 } else {
644 uri->path = NULL;
645 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000646 }
647 *str = cur;
648 return (0);
649}
650
651/**
652 * xmlParse3986PathRootless:
653 * @uri: pointer to an URI structure
654 * @str: the string to analyze
655 *
656 * Parse an path without root and fills in the appropriate fields
657 * of the @uri structure
658 *
659 * path-rootless = segment-nz *( "/" segment )
660 *
661 * Returns 0 or the error code
662 */
663static int
664xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
665{
666 const char *cur;
667 int ret;
668
669 cur = *str;
670
671 ret = xmlParse3986Segment(&cur, 0, 0);
672 if (ret != 0) return(ret);
673 while (*cur == '/') {
674 cur++;
675 ret = xmlParse3986Segment(&cur, 0, 1);
676 if (ret != 0) return(ret);
677 }
678 if (uri != NULL) {
679 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200680 if (cur != *str) {
681 if (uri->cleanup & 2)
682 uri->path = STRNDUP(*str, cur - *str);
683 else
684 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
685 } else {
686 uri->path = NULL;
687 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000688 }
689 *str = cur;
690 return (0);
691}
692
693/**
694 * xmlParse3986PathNoScheme:
695 * @uri: pointer to an URI structure
696 * @str: the string to analyze
697 *
698 * Parse an path which is not a scheme and fills in the appropriate fields
699 * of the @uri structure
700 *
701 * path-noscheme = segment-nz-nc *( "/" segment )
702 *
703 * Returns 0 or the error code
704 */
705static int
706xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
707{
708 const char *cur;
709 int ret;
710
711 cur = *str;
712
713 ret = xmlParse3986Segment(&cur, ':', 0);
714 if (ret != 0) return(ret);
715 while (*cur == '/') {
716 cur++;
717 ret = xmlParse3986Segment(&cur, 0, 1);
718 if (ret != 0) return(ret);
719 }
720 if (uri != NULL) {
721 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200722 if (cur != *str) {
723 if (uri->cleanup & 2)
724 uri->path = STRNDUP(*str, cur - *str);
725 else
726 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
727 } else {
728 uri->path = NULL;
729 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000730 }
731 *str = cur;
732 return (0);
733}
734
735/**
736 * xmlParse3986HierPart:
737 * @uri: pointer to an URI structure
738 * @str: the string to analyze
739 *
740 * Parse an hierarchical part and fills in the appropriate fields
741 * of the @uri structure
742 *
743 * hier-part = "//" authority path-abempty
744 * / path-absolute
745 * / path-rootless
746 * / path-empty
747 *
748 * Returns 0 or the error code
749 */
750static int
751xmlParse3986HierPart(xmlURIPtr uri, const char **str)
752{
753 const char *cur;
754 int ret;
755
756 cur = *str;
757
758 if ((*cur == '/') && (*(cur + 1) == '/')) {
759 cur += 2;
760 ret = xmlParse3986Authority(uri, &cur);
761 if (ret != 0) return(ret);
Daniel Veillardbeb72812014-10-03 19:22:39 +0800762 if (uri->server == NULL)
763 uri->port = -1;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000764 ret = xmlParse3986PathAbEmpty(uri, &cur);
765 if (ret != 0) return(ret);
766 *str = cur;
767 return(0);
768 } else if (*cur == '/') {
769 ret = xmlParse3986PathAbsolute(uri, &cur);
770 if (ret != 0) return(ret);
771 } else if (ISA_PCHAR(cur)) {
772 ret = xmlParse3986PathRootless(uri, &cur);
773 if (ret != 0) return(ret);
774 } else {
775 /* path-empty is effectively empty */
776 if (uri != NULL) {
777 if (uri->path != NULL) xmlFree(uri->path);
778 uri->path = NULL;
779 }
780 }
781 *str = cur;
782 return (0);
783}
784
785/**
786 * xmlParse3986RelativeRef:
787 * @uri: pointer to an URI structure
788 * @str: the string to analyze
789 *
790 * Parse an URI string and fills in the appropriate fields
791 * of the @uri structure
792 *
793 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
794 * relative-part = "//" authority path-abempty
795 * / path-absolute
796 * / path-noscheme
797 * / path-empty
798 *
799 * Returns 0 or the error code
800 */
801static int
802xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
803 int ret;
804
805 if ((*str == '/') && (*(str + 1) == '/')) {
806 str += 2;
807 ret = xmlParse3986Authority(uri, &str);
808 if (ret != 0) return(ret);
809 ret = xmlParse3986PathAbEmpty(uri, &str);
810 if (ret != 0) return(ret);
811 } else if (*str == '/') {
812 ret = xmlParse3986PathAbsolute(uri, &str);
813 if (ret != 0) return(ret);
814 } else if (ISA_PCHAR(str)) {
815 ret = xmlParse3986PathNoScheme(uri, &str);
816 if (ret != 0) return(ret);
817 } else {
818 /* path-empty is effectively empty */
819 if (uri != NULL) {
820 if (uri->path != NULL) xmlFree(uri->path);
821 uri->path = NULL;
822 }
823 }
824
825 if (*str == '?') {
826 str++;
827 ret = xmlParse3986Query(uri, &str);
828 if (ret != 0) return(ret);
829 }
830 if (*str == '#') {
831 str++;
832 ret = xmlParse3986Fragment(uri, &str);
833 if (ret != 0) return(ret);
834 }
835 if (*str != 0) {
836 xmlCleanURI(uri);
837 return(1);
838 }
839 return(0);
840}
841
842
843/**
844 * xmlParse3986URI:
845 * @uri: pointer to an URI structure
846 * @str: the string to analyze
847 *
848 * Parse an URI string and fills in the appropriate fields
849 * of the @uri structure
850 *
851 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
852 *
853 * Returns 0 or the error code
854 */
855static int
856xmlParse3986URI(xmlURIPtr uri, const char *str) {
857 int ret;
858
859 ret = xmlParse3986Scheme(uri, &str);
860 if (ret != 0) return(ret);
861 if (*str != ':') {
862 return(1);
863 }
864 str++;
865 ret = xmlParse3986HierPart(uri, &str);
866 if (ret != 0) return(ret);
867 if (*str == '?') {
868 str++;
869 ret = xmlParse3986Query(uri, &str);
870 if (ret != 0) return(ret);
871 }
872 if (*str == '#') {
873 str++;
874 ret = xmlParse3986Fragment(uri, &str);
875 if (ret != 0) return(ret);
876 }
877 if (*str != 0) {
878 xmlCleanURI(uri);
879 return(1);
880 }
881 return(0);
882}
883
884/**
885 * xmlParse3986URIReference:
886 * @uri: pointer to an URI structure
887 * @str: the string to analyze
888 *
889 * Parse an URI reference string and fills in the appropriate fields
890 * of the @uri structure
891 *
892 * URI-reference = URI / relative-ref
893 *
894 * Returns 0 or the error code
895 */
896static int
897xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
898 int ret;
899
900 if (str == NULL)
901 return(-1);
902 xmlCleanURI(uri);
903
904 /*
905 * Try first to parse absolute refs, then fallback to relative if
906 * it fails.
907 */
908 ret = xmlParse3986URI(uri, str);
909 if (ret != 0) {
910 xmlCleanURI(uri);
911 ret = xmlParse3986RelativeRef(uri, str);
912 if (ret != 0) {
913 xmlCleanURI(uri);
914 return(ret);
915 }
916 }
917 return(0);
918}
919
920/**
921 * xmlParseURI:
922 * @str: the URI string to analyze
923 *
924 * Parse an URI based on RFC 3986
925 *
926 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
927 *
928 * Returns a newly built xmlURIPtr or NULL in case of error
929 */
930xmlURIPtr
931xmlParseURI(const char *str) {
932 xmlURIPtr uri;
933 int ret;
934
935 if (str == NULL)
936 return(NULL);
937 uri = xmlCreateURI();
938 if (uri != NULL) {
939 ret = xmlParse3986URIReference(uri, str);
940 if (ret) {
941 xmlFreeURI(uri);
942 return(NULL);
943 }
944 }
945 return(uri);
946}
947
948/**
949 * xmlParseURIReference:
950 * @uri: pointer to an URI structure
951 * @str: the string to analyze
952 *
953 * Parse an URI reference string based on RFC 3986 and fills in the
954 * appropriate fields of the @uri structure
955 *
956 * URI-reference = URI / relative-ref
957 *
958 * Returns 0 or the error code
959 */
960int
961xmlParseURIReference(xmlURIPtr uri, const char *str) {
962 return(xmlParse3986URIReference(uri, str));
963}
964
965/**
966 * xmlParseURIRaw:
967 * @str: the URI string to analyze
968 * @raw: if 1 unescaping of URI pieces are disabled
969 *
970 * Parse an URI but allows to keep intact the original fragments.
971 *
972 * URI-reference = URI / relative-ref
973 *
974 * Returns a newly built xmlURIPtr or NULL in case of error
975 */
976xmlURIPtr
977xmlParseURIRaw(const char *str, int raw) {
978 xmlURIPtr uri;
979 int ret;
980
981 if (str == NULL)
982 return(NULL);
983 uri = xmlCreateURI();
984 if (uri != NULL) {
985 if (raw) {
986 uri->cleanup |= 2;
987 }
988 ret = xmlParseURIReference(uri, str);
989 if (ret) {
990 xmlFreeURI(uri);
991 return(NULL);
992 }
993 }
994 return(uri);
995}
996
997/************************************************************************
998 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000999 * Generic URI structure functions *
1000 * *
1001 ************************************************************************/
1002
1003/**
1004 * xmlCreateURI:
1005 *
1006 * Simply creates an empty xmlURI
1007 *
1008 * Returns the new structure or NULL in case of error
1009 */
1010xmlURIPtr
1011xmlCreateURI(void) {
1012 xmlURIPtr ret;
1013
1014 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1015 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001016 xmlURIErrMemory("creating URI structure\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001017 return(NULL);
1018 }
1019 memset(ret, 0, sizeof(xmlURI));
1020 return(ret);
1021}
1022
1023/**
Daniel Veillard57560382012-07-24 11:44:23 +08001024 * xmlSaveUriRealloc:
1025 *
1026 * Function to handle properly a reallocation when saving an URI
1027 * Also imposes some limit on the length of an URI string output
1028 */
1029static xmlChar *
1030xmlSaveUriRealloc(xmlChar *ret, int *max) {
1031 xmlChar *temp;
1032 int tmp;
1033
1034 if (*max > MAX_URI_LENGTH) {
1035 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1036 return(NULL);
1037 }
1038 tmp = *max * 2;
1039 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1040 if (temp == NULL) {
1041 xmlURIErrMemory("saving URI\n");
1042 return(NULL);
1043 }
1044 *max = tmp;
1045 return(temp);
1046}
1047
1048/**
Owen Taylor3473f882001-02-23 17:55:21 +00001049 * xmlSaveUri:
1050 * @uri: pointer to an xmlURI
1051 *
1052 * Save the URI as an escaped string
1053 *
1054 * Returns a new string (to be deallocated by caller)
1055 */
1056xmlChar *
1057xmlSaveUri(xmlURIPtr uri) {
1058 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001059 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001060 const char *p;
1061 int len;
1062 int max;
1063
1064 if (uri == NULL) return(NULL);
1065
1066
1067 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001068 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001069 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001070 xmlURIErrMemory("saving URI\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001071 return(NULL);
1072 }
1073 len = 0;
1074
1075 if (uri->scheme != NULL) {
1076 p = uri->scheme;
1077 while (*p != 0) {
1078 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001079 temp = xmlSaveUriRealloc(ret, &max);
1080 if (temp == NULL) goto mem_error;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001081 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
1083 ret[len++] = *p++;
1084 }
1085 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001086 temp = xmlSaveUriRealloc(ret, &max);
1087 if (temp == NULL) goto mem_error;
1088 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001089 }
1090 ret[len++] = ':';
1091 }
1092 if (uri->opaque != NULL) {
1093 p = uri->opaque;
1094 while (*p != 0) {
1095 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001096 temp = xmlSaveUriRealloc(ret, &max);
1097 if (temp == NULL) goto mem_error;
1098 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001099 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001100 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001101 ret[len++] = *p++;
1102 else {
1103 int val = *(unsigned char *)p++;
1104 int hi = val / 0x10, lo = val % 0x10;
1105 ret[len++] = '%';
1106 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1107 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1108 }
1109 }
Owen Taylor3473f882001-02-23 17:55:21 +00001110 } else {
Daniel Veillardbeb72812014-10-03 19:22:39 +08001111 if ((uri->server != NULL) || (uri->port == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001112 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001113 temp = xmlSaveUriRealloc(ret, &max);
1114 if (temp == NULL) goto mem_error;
1115 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001116 }
1117 ret[len++] = '/';
1118 ret[len++] = '/';
1119 if (uri->user != NULL) {
1120 p = uri->user;
1121 while (*p != 0) {
1122 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001123 temp = xmlSaveUriRealloc(ret, &max);
1124 if (temp == NULL) goto mem_error;
1125 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001126 }
1127 if ((IS_UNRESERVED(*(p))) ||
1128 ((*(p) == ';')) || ((*(p) == ':')) ||
1129 ((*(p) == '&')) || ((*(p) == '=')) ||
1130 ((*(p) == '+')) || ((*(p) == '$')) ||
1131 ((*(p) == ',')))
1132 ret[len++] = *p++;
1133 else {
1134 int val = *(unsigned char *)p++;
1135 int hi = val / 0x10, lo = val % 0x10;
1136 ret[len++] = '%';
1137 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1138 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1139 }
1140 }
1141 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001142 temp = xmlSaveUriRealloc(ret, &max);
1143 if (temp == NULL) goto mem_error;
1144 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001145 }
1146 ret[len++] = '@';
1147 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001148 if (uri->server != NULL) {
1149 p = uri->server;
1150 while (*p != 0) {
1151 if (len >= max) {
1152 temp = xmlSaveUriRealloc(ret, &max);
1153 if (temp == NULL) goto mem_error;
1154 ret = temp;
1155 }
1156 ret[len++] = *p++;
Owen Taylor3473f882001-02-23 17:55:21 +00001157 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001158 if (uri->port > 0) {
1159 if (len + 10 >= max) {
1160 temp = xmlSaveUriRealloc(ret, &max);
1161 if (temp == NULL) goto mem_error;
1162 ret = temp;
1163 }
1164 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001165 }
Owen Taylor3473f882001-02-23 17:55:21 +00001166 }
1167 } else if (uri->authority != NULL) {
1168 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001169 temp = xmlSaveUriRealloc(ret, &max);
1170 if (temp == NULL) goto mem_error;
1171 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001172 }
1173 ret[len++] = '/';
1174 ret[len++] = '/';
1175 p = uri->authority;
1176 while (*p != 0) {
1177 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001178 temp = xmlSaveUriRealloc(ret, &max);
1179 if (temp == NULL) goto mem_error;
1180 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001181 }
1182 if ((IS_UNRESERVED(*(p))) ||
1183 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1184 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1185 ((*(p) == '=')) || ((*(p) == '+')))
1186 ret[len++] = *p++;
1187 else {
1188 int val = *(unsigned char *)p++;
1189 int hi = val / 0x10, lo = val % 0x10;
1190 ret[len++] = '%';
1191 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1192 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1193 }
1194 }
1195 } else if (uri->scheme != NULL) {
1196 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001197 temp = xmlSaveUriRealloc(ret, &max);
1198 if (temp == NULL) goto mem_error;
1199 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001200 }
Owen Taylor3473f882001-02-23 17:55:21 +00001201 }
1202 if (uri->path != NULL) {
1203 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001204 /*
1205 * the colon in file:///d: should not be escaped or
1206 * Windows accesses fail later.
1207 */
1208 if ((uri->scheme != NULL) &&
1209 (p[0] == '/') &&
1210 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1211 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1212 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001213 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001214 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001215 temp = xmlSaveUriRealloc(ret, &max);
1216 if (temp == NULL) goto mem_error;
1217 ret = temp;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001218 }
1219 ret[len++] = *p++;
1220 ret[len++] = *p++;
1221 ret[len++] = *p++;
1222 }
Owen Taylor3473f882001-02-23 17:55:21 +00001223 while (*p != 0) {
1224 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001225 temp = xmlSaveUriRealloc(ret, &max);
1226 if (temp == NULL) goto mem_error;
1227 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001228 }
1229 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1230 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1231 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1232 ((*(p) == ',')))
1233 ret[len++] = *p++;
1234 else {
1235 int val = *(unsigned char *)p++;
1236 int hi = val / 0x10, lo = val % 0x10;
1237 ret[len++] = '%';
1238 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1239 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1240 }
1241 }
1242 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001243 if (uri->query_raw != NULL) {
1244 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001245 temp = xmlSaveUriRealloc(ret, &max);
1246 if (temp == NULL) goto mem_error;
1247 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001248 }
1249 ret[len++] = '?';
1250 p = uri->query_raw;
1251 while (*p != 0) {
1252 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001253 temp = xmlSaveUriRealloc(ret, &max);
1254 if (temp == NULL) goto mem_error;
1255 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001256 }
1257 ret[len++] = *p++;
1258 }
1259 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001260 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001261 temp = xmlSaveUriRealloc(ret, &max);
1262 if (temp == NULL) goto mem_error;
1263 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001264 }
1265 ret[len++] = '?';
1266 p = uri->query;
1267 while (*p != 0) {
1268 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001269 temp = xmlSaveUriRealloc(ret, &max);
1270 if (temp == NULL) goto mem_error;
1271 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001272 }
Daniel Veillard57560382012-07-24 11:44:23 +08001273 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Owen Taylor3473f882001-02-23 17:55:21 +00001274 ret[len++] = *p++;
1275 else {
1276 int val = *(unsigned char *)p++;
1277 int hi = val / 0x10, lo = val % 0x10;
1278 ret[len++] = '%';
1279 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1280 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1281 }
1282 }
1283 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001284 }
1285 if (uri->fragment != NULL) {
1286 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001287 temp = xmlSaveUriRealloc(ret, &max);
1288 if (temp == NULL) goto mem_error;
1289 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001290 }
1291 ret[len++] = '#';
1292 p = uri->fragment;
1293 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001294 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001295 temp = xmlSaveUriRealloc(ret, &max);
1296 if (temp == NULL) goto mem_error;
1297 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001298 }
Daniel Veillard57560382012-07-24 11:44:23 +08001299 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001300 ret[len++] = *p++;
1301 else {
1302 int val = *(unsigned char *)p++;
1303 int hi = val / 0x10, lo = val % 0x10;
1304 ret[len++] = '%';
1305 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1306 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001307 }
1308 }
Owen Taylor3473f882001-02-23 17:55:21 +00001309 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001310 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001311 temp = xmlSaveUriRealloc(ret, &max);
1312 if (temp == NULL) goto mem_error;
1313 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001314 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02001315 ret[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001316 return(ret);
Daniel Veillard57560382012-07-24 11:44:23 +08001317
1318mem_error:
1319 xmlFree(ret);
1320 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001321}
1322
1323/**
1324 * xmlPrintURI:
1325 * @stream: a FILE* for the output
1326 * @uri: pointer to an xmlURI
1327 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001328 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001329 */
1330void
1331xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1332 xmlChar *out;
1333
1334 out = xmlSaveUri(uri);
1335 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001336 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001337 xmlFree(out);
1338 }
1339}
1340
1341/**
1342 * xmlCleanURI:
1343 * @uri: pointer to an xmlURI
1344 *
1345 * Make sure the xmlURI struct is free of content
1346 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001347static void
Owen Taylor3473f882001-02-23 17:55:21 +00001348xmlCleanURI(xmlURIPtr uri) {
1349 if (uri == NULL) return;
1350
1351 if (uri->scheme != NULL) xmlFree(uri->scheme);
1352 uri->scheme = NULL;
1353 if (uri->server != NULL) xmlFree(uri->server);
1354 uri->server = NULL;
1355 if (uri->user != NULL) xmlFree(uri->user);
1356 uri->user = NULL;
1357 if (uri->path != NULL) xmlFree(uri->path);
1358 uri->path = NULL;
1359 if (uri->fragment != NULL) xmlFree(uri->fragment);
1360 uri->fragment = NULL;
1361 if (uri->opaque != NULL) xmlFree(uri->opaque);
1362 uri->opaque = NULL;
1363 if (uri->authority != NULL) xmlFree(uri->authority);
1364 uri->authority = NULL;
1365 if (uri->query != NULL) xmlFree(uri->query);
1366 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001367 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1368 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001369}
1370
1371/**
1372 * xmlFreeURI:
1373 * @uri: pointer to an xmlURI
1374 *
1375 * Free up the xmlURI struct
1376 */
1377void
1378xmlFreeURI(xmlURIPtr uri) {
1379 if (uri == NULL) return;
1380
1381 if (uri->scheme != NULL) xmlFree(uri->scheme);
1382 if (uri->server != NULL) xmlFree(uri->server);
1383 if (uri->user != NULL) xmlFree(uri->user);
1384 if (uri->path != NULL) xmlFree(uri->path);
1385 if (uri->fragment != NULL) xmlFree(uri->fragment);
1386 if (uri->opaque != NULL) xmlFree(uri->opaque);
1387 if (uri->authority != NULL) xmlFree(uri->authority);
1388 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001389 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001390 xmlFree(uri);
1391}
1392
1393/************************************************************************
1394 * *
1395 * Helper functions *
1396 * *
1397 ************************************************************************/
1398
Owen Taylor3473f882001-02-23 17:55:21 +00001399/**
1400 * xmlNormalizeURIPath:
1401 * @path: pointer to the path string
1402 *
1403 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1404 * Section 5.2, steps 6.c through 6.g.
1405 *
1406 * Normalization occurs directly on the string, no new allocation is done
1407 *
1408 * Returns 0 or an error code
1409 */
1410int
1411xmlNormalizeURIPath(char *path) {
1412 char *cur, *out;
1413
1414 if (path == NULL)
1415 return(-1);
1416
1417 /* Skip all initial "/" chars. We want to get to the beginning of the
1418 * first non-empty segment.
1419 */
1420 cur = path;
1421 while (cur[0] == '/')
1422 ++cur;
1423 if (cur[0] == '\0')
1424 return(0);
1425
1426 /* Keep everything we've seen so far. */
1427 out = cur;
1428
1429 /*
1430 * Analyze each segment in sequence for cases (c) and (d).
1431 */
1432 while (cur[0] != '\0') {
1433 /*
1434 * c) All occurrences of "./", where "." is a complete path segment,
1435 * are removed from the buffer string.
1436 */
1437 if ((cur[0] == '.') && (cur[1] == '/')) {
1438 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001439 /* '//' normalization should be done at this point too */
1440 while (cur[0] == '/')
1441 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001442 continue;
1443 }
1444
1445 /*
1446 * d) If the buffer string ends with "." as a complete path segment,
1447 * that "." is removed.
1448 */
1449 if ((cur[0] == '.') && (cur[1] == '\0'))
1450 break;
1451
1452 /* Otherwise keep the segment. */
1453 while (cur[0] != '/') {
1454 if (cur[0] == '\0')
1455 goto done_cd;
1456 (out++)[0] = (cur++)[0];
1457 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001458 /* nomalize // */
1459 while ((cur[0] == '/') && (cur[1] == '/'))
1460 cur++;
1461
Owen Taylor3473f882001-02-23 17:55:21 +00001462 (out++)[0] = (cur++)[0];
1463 }
1464 done_cd:
1465 out[0] = '\0';
1466
1467 /* Reset to the beginning of the first segment for the next sequence. */
1468 cur = path;
1469 while (cur[0] == '/')
1470 ++cur;
1471 if (cur[0] == '\0')
1472 return(0);
1473
1474 /*
1475 * Analyze each segment in sequence for cases (e) and (f).
1476 *
1477 * e) All occurrences of "<segment>/../", where <segment> is a
1478 * complete path segment not equal to "..", are removed from the
1479 * buffer string. Removal of these path segments is performed
1480 * iteratively, removing the leftmost matching pattern on each
1481 * iteration, until no matching pattern remains.
1482 *
1483 * f) If the buffer string ends with "<segment>/..", where <segment>
1484 * is a complete path segment not equal to "..", that
1485 * "<segment>/.." is removed.
1486 *
1487 * To satisfy the "iterative" clause in (e), we need to collapse the
1488 * string every time we find something that needs to be removed. Thus,
1489 * we don't need to keep two pointers into the string: we only need a
1490 * "current position" pointer.
1491 */
1492 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001493 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001494
1495 /* At the beginning of each iteration of this loop, "cur" points to
1496 * the first character of the segment we want to examine.
1497 */
1498
1499 /* Find the end of the current segment. */
1500 segp = cur;
1501 while ((segp[0] != '/') && (segp[0] != '\0'))
1502 ++segp;
1503
1504 /* If this is the last segment, we're done (we need at least two
1505 * segments to meet the criteria for the (e) and (f) cases).
1506 */
1507 if (segp[0] == '\0')
1508 break;
1509
1510 /* If the first segment is "..", or if the next segment _isn't_ "..",
1511 * keep this segment and try the next one.
1512 */
1513 ++segp;
1514 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1515 || ((segp[0] != '.') || (segp[1] != '.')
1516 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1517 cur = segp;
1518 continue;
1519 }
1520
1521 /* If we get here, remove this segment and the next one and back up
1522 * to the previous segment (if there is one), to implement the
1523 * "iteratively" clause. It's pretty much impossible to back up
1524 * while maintaining two pointers into the buffer, so just compact
1525 * the whole buffer now.
1526 */
1527
1528 /* If this is the end of the buffer, we're done. */
1529 if (segp[2] == '\0') {
1530 cur[0] = '\0';
1531 break;
1532 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001533 /* Valgrind complained, strcpy(cur, segp + 3); */
Nico Webercedf84d2012-03-05 16:36:59 +08001534 /* string will overlap, do not use strcpy */
1535 tmp = cur;
1536 segp += 3;
1537 while ((*tmp++ = *segp++) != 0)
1538 ;
Owen Taylor3473f882001-02-23 17:55:21 +00001539
1540 /* If there are no previous segments, then keep going from here. */
1541 segp = cur;
1542 while ((segp > path) && ((--segp)[0] == '/'))
1543 ;
1544 if (segp == path)
1545 continue;
1546
1547 /* "segp" is pointing to the end of a previous segment; find it's
1548 * start. We need to back up to the previous segment and start
1549 * over with that to handle things like "foo/bar/../..". If we
1550 * don't do this, then on the first pass we'll remove the "bar/..",
1551 * but be pointing at the second ".." so we won't realize we can also
1552 * remove the "foo/..".
1553 */
1554 cur = segp;
1555 while ((cur > path) && (cur[-1] != '/'))
1556 --cur;
1557 }
1558 out[0] = '\0';
1559
1560 /*
1561 * g) If the resulting buffer string still begins with one or more
1562 * complete path segments of "..", then the reference is
1563 * considered to be in error. Implementations may handle this
1564 * error by retaining these components in the resolved path (i.e.,
1565 * treating them as part of the final URI), by removing them from
1566 * the resolved path (i.e., discarding relative levels above the
1567 * root), or by avoiding traversal of the reference.
1568 *
1569 * We discard them from the final path.
1570 */
1571 if (path[0] == '/') {
1572 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001573 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001574 && ((cur[3] == '/') || (cur[3] == '\0')))
1575 cur += 3;
1576
1577 if (cur != path) {
1578 out = path;
1579 while (cur[0] != '\0')
1580 (out++)[0] = (cur++)[0];
1581 out[0] = 0;
1582 }
1583 }
1584
1585 return(0);
1586}
Owen Taylor3473f882001-02-23 17:55:21 +00001587
Daniel Veillard966a31e2004-05-09 02:58:44 +00001588static int is_hex(char c) {
1589 if (((c >= '0') && (c <= '9')) ||
1590 ((c >= 'a') && (c <= 'f')) ||
1591 ((c >= 'A') && (c <= 'F')))
1592 return(1);
1593 return(0);
1594}
1595
Owen Taylor3473f882001-02-23 17:55:21 +00001596/**
1597 * xmlURIUnescapeString:
1598 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001599 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001600 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001601 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001602 * Unescaping routine, but does not check that the string is an URI. The
1603 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001604 * Note that the length of the result can only be smaller or same size as
1605 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001606 *
Daniel Veillard79187652007-04-24 10:19:52 +00001607 * Returns a copy of the string, but unescaped, will return NULL only in case
1608 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001609 */
1610char *
1611xmlURIUnescapeString(const char *str, int len, char *target) {
1612 char *ret, *out;
1613 const char *in;
1614
1615 if (str == NULL)
1616 return(NULL);
1617 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001618 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001619
1620 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001621 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001622 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001623 xmlURIErrMemory("unescaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001624 return(NULL);
1625 }
1626 } else
1627 ret = target;
1628 in = str;
1629 out = ret;
1630 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001631 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001632 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001633 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001634 *out = (*in - '0');
1635 else if ((*in >= 'a') && (*in <= 'f'))
1636 *out = (*in - 'a') + 10;
1637 else if ((*in >= 'A') && (*in <= 'F'))
1638 *out = (*in - 'A') + 10;
1639 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001640 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001641 *out = *out * 16 + (*in - '0');
1642 else if ((*in >= 'a') && (*in <= 'f'))
1643 *out = *out * 16 + (*in - 'a') + 10;
1644 else if ((*in >= 'A') && (*in <= 'F'))
1645 *out = *out * 16 + (*in - 'A') + 10;
1646 in++;
1647 len -= 3;
1648 out++;
1649 } else {
1650 *out++ = *in++;
1651 len--;
1652 }
1653 }
1654 *out = 0;
1655 return(ret);
1656}
1657
1658/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001659 * xmlURIEscapeStr:
1660 * @str: string to escape
1661 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001662 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001663 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1664 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001665 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001666 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001667 */
1668xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001669xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1670 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001671 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001672 const xmlChar *in;
Daniel Veillard57560382012-07-24 11:44:23 +08001673 int len, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001674
1675 if (str == NULL)
1676 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001677 if (str[0] == 0)
1678 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001679 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001680 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001681
1682 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001683 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001684 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001685 xmlURIErrMemory("escaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001686 return(NULL);
1687 }
1688 in = (const xmlChar *) str;
1689 out = 0;
1690 while(*in != 0) {
1691 if (len - out <= 3) {
Daniel Veillard57560382012-07-24 11:44:23 +08001692 temp = xmlSaveUriRealloc(ret, &len);
Daniel Veillarded86dc22008-04-24 11:58:41 +00001693 if (temp == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001694 xmlURIErrMemory("escaping URI value\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001695 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001696 return(NULL);
1697 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001698 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001699 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001700
1701 ch = *in;
1702
Daniel Veillardeb475a32002-04-14 22:00:22 +00001703 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001704 unsigned char val;
1705 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001706 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001707 if (val <= 9)
1708 ret[out++] = '0' + val;
1709 else
1710 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001711 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001712 if (val <= 9)
1713 ret[out++] = '0' + val;
1714 else
1715 ret[out++] = 'A' + val - 0xA;
1716 in++;
1717 } else {
1718 ret[out++] = *in++;
1719 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 }
1722 ret[out] = 0;
1723 return(ret);
1724}
1725
Daniel Veillard8514c672001-05-23 10:29:12 +00001726/**
1727 * xmlURIEscape:
1728 * @str: the string of the URI to escape
1729 *
1730 * Escaping routine, does not do validity checks !
1731 * It will try to escape the chars needing this, but this is heuristic
1732 * based it's impossible to be sure.
1733 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001734 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001735 *
1736 * 25 May 2001
1737 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1738 * according to RFC2396.
1739 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001740 */
1741xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001742xmlURIEscape(const xmlChar * str)
1743{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001744 xmlChar *ret, *segment = NULL;
1745 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001746 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001747
Daniel Veillard6278fb52001-05-25 07:38:41 +00001748#define NULLCHK(p) if(!p) { \
Daniel Veillard57560382012-07-24 11:44:23 +08001749 xmlURIErrMemory("escaping URI value\n"); \
1750 xmlFreeURI(uri); \
1751 return NULL; } \
Daniel Veillard6278fb52001-05-25 07:38:41 +00001752
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001753 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001754 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001755
1756 uri = xmlCreateURI();
1757 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001758 /*
1759 * Allow escaping errors in the unescaped form
1760 */
1761 uri->cleanup = 1;
1762 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001763 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001764 xmlFreeURI(uri);
1765 return (NULL);
1766 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001767 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001768
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001769 if (!uri)
1770 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001771
1772 ret = NULL;
1773
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001774 if (uri->scheme) {
1775 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1776 NULLCHK(segment)
1777 ret = xmlStrcat(ret, segment);
1778 ret = xmlStrcat(ret, BAD_CAST ":");
1779 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001780 }
1781
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001782 if (uri->authority) {
1783 segment =
1784 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1785 NULLCHK(segment)
1786 ret = xmlStrcat(ret, BAD_CAST "//");
1787 ret = xmlStrcat(ret, segment);
1788 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001789 }
1790
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001791 if (uri->user) {
1792 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1793 NULLCHK(segment)
Daniel Veillard57560382012-07-24 11:44:23 +08001794 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001795 ret = xmlStrcat(ret, segment);
1796 ret = xmlStrcat(ret, BAD_CAST "@");
1797 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001798 }
1799
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001800 if (uri->server) {
1801 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1802 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001803 if (uri->user == NULL)
Daniel Veillardd7af5552008-08-04 15:29:44 +00001804 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001805 ret = xmlStrcat(ret, segment);
1806 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001807 }
1808
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001809 if (uri->port) {
1810 xmlChar port[10];
1811
Daniel Veillard43d3f612001-11-10 11:57:23 +00001812 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001813 ret = xmlStrcat(ret, BAD_CAST ":");
1814 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001815 }
1816
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001817 if (uri->path) {
1818 segment =
1819 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1820 NULLCHK(segment)
1821 ret = xmlStrcat(ret, segment);
1822 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001823 }
1824
Daniel Veillarda1413b82007-04-26 08:33:28 +00001825 if (uri->query_raw) {
1826 ret = xmlStrcat(ret, BAD_CAST "?");
1827 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1828 }
1829 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001830 segment =
1831 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1832 NULLCHK(segment)
1833 ret = xmlStrcat(ret, BAD_CAST "?");
1834 ret = xmlStrcat(ret, segment);
1835 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001836 }
1837
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001838 if (uri->opaque) {
1839 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1840 NULLCHK(segment)
1841 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001842 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001843 }
1844
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001845 if (uri->fragment) {
1846 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1847 NULLCHK(segment)
1848 ret = xmlStrcat(ret, BAD_CAST "#");
1849 ret = xmlStrcat(ret, segment);
1850 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001851 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001852
1853 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001854#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001855
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001856 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001857}
1858
Owen Taylor3473f882001-02-23 17:55:21 +00001859/************************************************************************
1860 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001861 * Public functions *
1862 * *
1863 ************************************************************************/
1864
1865/**
1866 * xmlBuildURI:
1867 * @URI: the URI instance found in the document
1868 * @base: the base value
1869 *
1870 * Computes he final URI of the reference done by checking that
1871 * the given URI is valid, and building the final URI using the
Daniel Veillard57560382012-07-24 11:44:23 +08001872 * base URI. This is processed according to section 5.2 of the
Owen Taylor3473f882001-02-23 17:55:21 +00001873 * RFC 2396
1874 *
1875 * 5.2. Resolving Relative References to Absolute Form
1876 *
1877 * Returns a new URI string (to be freed by the caller) or NULL in case
1878 * of error.
1879 */
1880xmlChar *
1881xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1882 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001883 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001884 xmlURIPtr ref = NULL;
1885 xmlURIPtr bas = NULL;
1886 xmlURIPtr res = NULL;
1887
1888 /*
1889 * 1) The URI reference is parsed into the potential four components and
1890 * fragment identifier, as described in Section 4.3.
1891 *
1892 * NOTE that a completely empty URI is treated by modern browsers
1893 * as a reference to "." rather than as a synonym for the current
1894 * URI. Should we do that here?
1895 */
Daniel Veillard57560382012-07-24 11:44:23 +08001896 if (URI == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001897 ret = -1;
1898 else {
1899 if (*URI) {
1900 ref = xmlCreateURI();
1901 if (ref == NULL)
1902 goto done;
1903 ret = xmlParseURIReference(ref, (const char *) URI);
1904 }
1905 else
1906 ret = 0;
1907 }
1908 if (ret != 0)
1909 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001910 if ((ref != NULL) && (ref->scheme != NULL)) {
1911 /*
1912 * The URI is absolute don't modify.
1913 */
1914 val = xmlStrdup(URI);
1915 goto done;
1916 }
Owen Taylor3473f882001-02-23 17:55:21 +00001917 if (base == NULL)
1918 ret = -1;
1919 else {
1920 bas = xmlCreateURI();
1921 if (bas == NULL)
1922 goto done;
1923 ret = xmlParseURIReference(bas, (const char *) base);
1924 }
1925 if (ret != 0) {
1926 if (ref)
1927 val = xmlSaveUri(ref);
1928 goto done;
1929 }
1930 if (ref == NULL) {
1931 /*
1932 * the base fragment must be ignored
1933 */
1934 if (bas->fragment != NULL) {
1935 xmlFree(bas->fragment);
1936 bas->fragment = NULL;
1937 }
1938 val = xmlSaveUri(bas);
1939 goto done;
1940 }
1941
1942 /*
1943 * 2) If the path component is empty and the scheme, authority, and
1944 * query components are undefined, then it is a reference to the
1945 * current document and we are done. Otherwise, the reference URI's
1946 * query and fragment components are defined as found (or not found)
1947 * within the URI reference and not inherited from the base URI.
1948 *
1949 * NOTE that in modern browsers, the parsing differs from the above
1950 * in the following aspect: the query component is allowed to be
1951 * defined while still treating this as a reference to the current
1952 * document.
1953 */
1954 res = xmlCreateURI();
1955 if (res == NULL)
1956 goto done;
1957 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1958 ((ref->authority == NULL) && (ref->server == NULL))) {
1959 if (bas->scheme != NULL)
1960 res->scheme = xmlMemStrdup(bas->scheme);
1961 if (bas->authority != NULL)
1962 res->authority = xmlMemStrdup(bas->authority);
1963 else if (bas->server != NULL) {
1964 res->server = xmlMemStrdup(bas->server);
1965 if (bas->user != NULL)
1966 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08001967 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00001968 }
1969 if (bas->path != NULL)
1970 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001971 if (ref->query_raw != NULL)
1972 res->query_raw = xmlMemStrdup (ref->query_raw);
1973 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001974 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001975 else if (bas->query_raw != NULL)
1976 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 else if (bas->query != NULL)
1978 res->query = xmlMemStrdup(bas->query);
1979 if (ref->fragment != NULL)
1980 res->fragment = xmlMemStrdup(ref->fragment);
1981 goto step_7;
1982 }
Owen Taylor3473f882001-02-23 17:55:21 +00001983
1984 /*
1985 * 3) If the scheme component is defined, indicating that the reference
1986 * starts with a scheme name, then the reference is interpreted as an
1987 * absolute URI and we are done. Otherwise, the reference URI's
1988 * scheme is inherited from the base URI's scheme component.
1989 */
1990 if (ref->scheme != NULL) {
1991 val = xmlSaveUri(ref);
1992 goto done;
1993 }
1994 if (bas->scheme != NULL)
1995 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard57560382012-07-24 11:44:23 +08001996
Daniel Veillarda1413b82007-04-26 08:33:28 +00001997 if (ref->query_raw != NULL)
1998 res->query_raw = xmlMemStrdup(ref->query_raw);
1999 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002000 res->query = xmlMemStrdup(ref->query);
2001 if (ref->fragment != NULL)
2002 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002003
2004 /*
2005 * 4) If the authority component is defined, then the reference is a
2006 * network-path and we skip to step 7. Otherwise, the reference
2007 * URI's authority is inherited from the base URI's authority
2008 * component, which will also be undefined if the URI scheme does not
2009 * use an authority component.
2010 */
2011 if ((ref->authority != NULL) || (ref->server != NULL)) {
2012 if (ref->authority != NULL)
2013 res->authority = xmlMemStrdup(ref->authority);
2014 else {
2015 res->server = xmlMemStrdup(ref->server);
2016 if (ref->user != NULL)
2017 res->user = xmlMemStrdup(ref->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002018 res->port = ref->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002019 }
2020 if (ref->path != NULL)
2021 res->path = xmlMemStrdup(ref->path);
2022 goto step_7;
2023 }
2024 if (bas->authority != NULL)
2025 res->authority = xmlMemStrdup(bas->authority);
2026 else if (bas->server != NULL) {
2027 res->server = xmlMemStrdup(bas->server);
2028 if (bas->user != NULL)
2029 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002030 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 }
2032
2033 /*
2034 * 5) If the path component begins with a slash character ("/"), then
2035 * the reference is an absolute-path and we skip to step 7.
2036 */
2037 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2038 res->path = xmlMemStrdup(ref->path);
2039 goto step_7;
2040 }
2041
2042
2043 /*
2044 * 6) If this step is reached, then we are resolving a relative-path
2045 * reference. The relative path needs to be merged with the base
2046 * URI's path. Although there are many ways to do this, we will
2047 * describe a simple method using a separate string buffer.
2048 *
2049 * Allocate a buffer large enough for the result string.
2050 */
2051 len = 2; /* extra / and 0 */
2052 if (ref->path != NULL)
2053 len += strlen(ref->path);
2054 if (bas->path != NULL)
2055 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002056 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002057 if (res->path == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002058 xmlURIErrMemory("resolving URI against base\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002059 goto done;
2060 }
2061 res->path[0] = 0;
2062
2063 /*
2064 * a) All but the last segment of the base URI's path component is
2065 * copied to the buffer. In other words, any characters after the
2066 * last (right-most) slash character, if any, are excluded.
2067 */
2068 cur = 0;
2069 out = 0;
2070 if (bas->path != NULL) {
2071 while (bas->path[cur] != 0) {
2072 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2073 cur++;
2074 if (bas->path[cur] == 0)
2075 break;
2076
2077 cur++;
2078 while (out < cur) {
2079 res->path[out] = bas->path[out];
2080 out++;
2081 }
2082 }
2083 }
2084 res->path[out] = 0;
2085
2086 /*
2087 * b) The reference's path component is appended to the buffer
2088 * string.
2089 */
2090 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002091 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002092 /*
2093 * Ensure the path includes a '/'
2094 */
2095 if ((out == 0) && (bas->server != NULL))
2096 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002097 while (ref->path[indx] != 0) {
2098 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002099 }
2100 }
2101 res->path[out] = 0;
2102
2103 /*
2104 * Steps c) to h) are really path normalization steps
2105 */
2106 xmlNormalizeURIPath(res->path);
2107
2108step_7:
2109
2110 /*
2111 * 7) The resulting URI components, including any inherited from the
2112 * base URI, are recombined to give the absolute form of the URI
2113 * reference.
2114 */
2115 val = xmlSaveUri(res);
2116
2117done:
2118 if (ref != NULL)
2119 xmlFreeURI(ref);
2120 if (bas != NULL)
2121 xmlFreeURI(bas);
2122 if (res != NULL)
2123 xmlFreeURI(res);
2124 return(val);
2125}
2126
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002127/**
William M. Brackf7789b12004-06-07 08:57:27 +00002128 * xmlBuildRelativeURI:
2129 * @URI: the URI reference under consideration
2130 * @base: the base value
2131 *
2132 * Expresses the URI of the reference in terms relative to the
2133 * base. Some examples of this operation include:
2134 * base = "http://site1.com/docs/book1.html"
2135 * URI input URI returned
2136 * docs/pic1.gif pic1.gif
2137 * docs/img/pic1.gif img/pic1.gif
2138 * img/pic1.gif ../img/pic1.gif
2139 * http://site1.com/docs/pic1.gif pic1.gif
2140 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2141 *
2142 * base = "docs/book1.html"
2143 * URI input URI returned
2144 * docs/pic1.gif pic1.gif
2145 * docs/img/pic1.gif img/pic1.gif
2146 * img/pic1.gif ../img/pic1.gif
2147 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2148 *
2149 *
2150 * Note: if the URI reference is really wierd or complicated, it may be
2151 * worthwhile to first convert it into a "nice" one by calling
2152 * xmlBuildURI (using 'base') before calling this routine,
2153 * since this routine (for reasonable efficiency) assumes URI has
2154 * already been through some validation.
2155 *
2156 * Returns a new URI string (to be freed by the caller) or NULL in case
2157 * error.
2158 */
2159xmlChar *
2160xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2161{
2162 xmlChar *val = NULL;
2163 int ret;
2164 int ix;
2165 int pos = 0;
2166 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002167 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002168 xmlURIPtr ref = NULL;
2169 xmlURIPtr bas = NULL;
2170 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002171 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002172
2173 if ((URI == NULL) || (*URI == 0))
2174 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002175
2176 /*
2177 * First parse URI into a standard form
2178 */
2179 ref = xmlCreateURI ();
2180 if (ref == NULL)
2181 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002182 /* If URI not already in "relative" form */
2183 if (URI[0] != '.') {
2184 ret = xmlParseURIReference (ref, (const char *) URI);
2185 if (ret != 0)
2186 goto done; /* Error in URI, return NULL */
2187 } else
2188 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002189
2190 /*
2191 * Next parse base into the same standard form
2192 */
2193 if ((base == NULL) || (*base == 0)) {
2194 val = xmlStrdup (URI);
2195 goto done;
2196 }
2197 bas = xmlCreateURI ();
2198 if (bas == NULL)
2199 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002200 if (base[0] != '.') {
2201 ret = xmlParseURIReference (bas, (const char *) base);
2202 if (ret != 0)
2203 goto done; /* Error in base, return NULL */
2204 } else
2205 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002206
2207 /*
2208 * If the scheme / server on the URI differs from the base,
2209 * just return the URI
2210 */
2211 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002212 ((bas->scheme == NULL) ||
2213 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2214 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002215 val = xmlStrdup (URI);
2216 goto done;
2217 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002218 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2219 val = xmlStrdup(BAD_CAST "");
2220 goto done;
2221 }
2222 if (bas->path == NULL) {
2223 val = xmlStrdup((xmlChar *)ref->path);
2224 goto done;
2225 }
2226 if (ref->path == NULL) {
2227 ref->path = (char *) "/";
2228 remove_path = 1;
2229 }
William M. Brackf7789b12004-06-07 08:57:27 +00002230
2231 /*
2232 * At this point (at last!) we can compare the two paths
2233 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002234 * First we take care of the special case where either of the
2235 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002236 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002237 if (bas->path == NULL) {
2238 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002239 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002240 if (*uptr == '/')
2241 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002242 /* exception characters from xmlSaveUri */
2243 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002244 }
2245 goto done;
2246 }
William M. Brackf7789b12004-06-07 08:57:27 +00002247 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002248 if (ref->path == NULL) {
2249 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002250 if (bptr[ix] == '/')
2251 nbslash++;
2252 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002253 uptr = NULL;
2254 len = 1; /* this is for a string terminator only */
2255 } else {
2256 /*
2257 * Next we compare the two strings and find where they first differ
2258 */
2259 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2260 pos += 2;
2261 if ((*bptr == '.') && (bptr[1] == '/'))
2262 bptr += 2;
2263 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2264 bptr++;
2265 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2266 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002267
William M. Brack820d5ed2005-09-14 05:24:27 +00002268 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002269 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002270 goto done; /* (I can't imagine why anyone would do this) */
2271 }
2272
2273 /*
2274 * In URI, "back up" to the last '/' encountered. This will be the
2275 * beginning of the "unique" suffix of URI
2276 */
2277 ix = pos;
2278 if ((ref->path[ix] == '/') && (ix > 0))
2279 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002280 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2281 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002282 for (; ix > 0; ix--) {
2283 if (ref->path[ix] == '/')
2284 break;
2285 }
2286 if (ix == 0) {
2287 uptr = (xmlChar *)ref->path;
2288 } else {
2289 ix++;
2290 uptr = (xmlChar *)&ref->path[ix];
2291 }
2292
2293 /*
2294 * In base, count the number of '/' from the differing point
2295 */
2296 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2297 for (; bptr[ix] != 0; ix++) {
2298 if (bptr[ix] == '/')
2299 nbslash++;
2300 }
2301 }
2302 len = xmlStrlen (uptr) + 1;
2303 }
Daniel Veillard57560382012-07-24 11:44:23 +08002304
William M. Brackf7789b12004-06-07 08:57:27 +00002305 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002306 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002307 /* exception characters from xmlSaveUri */
2308 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002309 goto done;
2310 }
William M. Brackf7789b12004-06-07 08:57:27 +00002311
2312 /*
2313 * Allocate just enough space for the returned string -
2314 * length of the remainder of the URI, plus enough space
2315 * for the "../" groups, plus one for the terminator
2316 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002317 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002318 if (val == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002319 xmlURIErrMemory("building relative URI\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002320 goto done;
2321 }
2322 vptr = val;
2323 /*
2324 * Put in as many "../" as needed
2325 */
2326 for (; nbslash>0; nbslash--) {
2327 *vptr++ = '.';
2328 *vptr++ = '.';
2329 *vptr++ = '/';
2330 }
2331 /*
2332 * Finish up with the end of the URI
2333 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002334 if (uptr != NULL) {
2335 if ((vptr > val) && (len > 0) &&
2336 (uptr[0] == '/') && (vptr[-1] == '/')) {
2337 memcpy (vptr, uptr + 1, len - 1);
2338 vptr[len - 2] = 0;
2339 } else {
2340 memcpy (vptr, uptr, len);
2341 vptr[len - 1] = 0;
2342 }
2343 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002344 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002345 }
William M. Brackf7789b12004-06-07 08:57:27 +00002346
William M. Brack50420192007-07-20 01:09:08 +00002347 /* escape the freshly-built path */
2348 vptr = val;
2349 /* exception characters from xmlSaveUri */
2350 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2351 xmlFree(vptr);
2352
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002353done:
William M. Brackf7789b12004-06-07 08:57:27 +00002354 /*
2355 * Free the working variables
2356 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002357 if (remove_path != 0)
2358 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002359 if (ref != NULL)
2360 xmlFreeURI (ref);
2361 if (bas != NULL)
2362 xmlFreeURI (bas);
2363
2364 return val;
2365}
2366
2367/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002368 * xmlCanonicPath:
2369 * @path: the resource locator in a filesystem notation
2370 *
Daniel Veillard57560382012-07-24 11:44:23 +08002371 * Constructs a canonic path from the specified path.
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002372 *
Daniel Veillard57560382012-07-24 11:44:23 +08002373 * Returns a new canonic path, or a duplicate of the path parameter if the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002374 * construction fails. The caller is responsible for freeing the memory occupied
Daniel Veillard57560382012-07-24 11:44:23 +08002375 * by the returned string. If there is insufficient memory available, or the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002376 * argument is NULL, the function returns NULL.
2377 */
Daniel Veillard57560382012-07-24 11:44:23 +08002378#define IS_WINDOWS_PATH(p) \
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002379 ((p != NULL) && \
2380 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2381 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2382 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002383xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002384xmlCanonicPath(const xmlChar *path)
2385{
William M. Brack22242272007-01-27 07:59:37 +00002386/*
2387 * For Windows implementations, additional work needs to be done to
2388 * replace backslashes in pathnames with "forward slashes"
2389 */
Daniel Veillard57560382012-07-24 11:44:23 +08002390#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002391 int len = 0;
2392 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002393 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002394#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002395 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002396 xmlChar *ret;
2397 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002398
2399 if (path == NULL)
2400 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002401
Michael Stahl55b899a2012-09-07 12:14:00 +08002402#if defined(_WIN32)
2403 /*
2404 * We must not change the backslashes to slashes if the the path
2405 * starts with \\?\
2406 * Those paths can be up to 32k characters long.
2407 * Was added specifically for OpenOffice, those paths can't be converted
2408 * to URIs anyway.
2409 */
2410 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2411 (path[3] == '\\') )
2412 return xmlStrdup((const xmlChar *) path);
2413#endif
2414
2415 /* sanitize filename starting with // so it can be used as URI */
Daniel Veillard69f8a132008-02-05 08:37:56 +00002416 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2417 path++;
2418
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002419 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002420 xmlFreeURI(uri);
2421 return xmlStrdup(path);
2422 }
2423
William M. Brack22242272007-01-27 07:59:37 +00002424 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002425 absuri = xmlStrstr(path, BAD_CAST "://");
2426 if (absuri != NULL) {
2427 int l, j;
2428 unsigned char c;
2429 xmlChar *escURI;
2430
2431 /*
2432 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002433 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002434 * part matches a protocol.
2435 */
2436 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002437 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002438 if ((l <= 0) || (l > 20))
2439 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002440 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002441 for (j = 0;j < l;j++) {
2442 c = path[j];
2443 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2444 goto path_processing;
2445 }
2446
William M. Brack22242272007-01-27 07:59:37 +00002447 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002448 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2449 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002450 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002451 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002452 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002453 if (uri != NULL) {
2454 xmlFreeURI(uri);
2455 return escURI;
2456 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002457 }
2458 }
2459
2460path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002461/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard57560382012-07-24 11:44:23 +08002462#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard336a8e12005-08-07 10:46:19 +00002463 /*
William M. Brack22242272007-01-27 07:59:37 +00002464 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002465 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002466 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002467 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002468 return(NULL);
2469 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002470
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002471 len = xmlStrlen(path);
2472 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002473 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002474 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002475 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002476 uri->path = xmlMallocAtomic(len + 2);
2477 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002478 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002479 return(NULL);
2480 }
William M. Brack22242272007-01-27 07:59:37 +00002481 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002482 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002483 p = uri->path + 1;
2484 strncpy(p, path, len + 1);
2485 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002486 uri->path = xmlStrdup(path);
2487 if (uri->path == NULL) {
2488 xmlFreeURI(uri);
2489 return(NULL);
2490 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002491 p = uri->path;
2492 }
William M. Brack22242272007-01-27 07:59:37 +00002493 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002494 while (*p != '\0') {
2495 if (*p == '\\')
2496 *p = '/';
2497 p++;
2498 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002499
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002500 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002501 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002502 } else {
2503 ret = xmlSaveUri(uri);
2504 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002505
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002506 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002507#else
2508 ret = xmlStrdup((const xmlChar *) path);
2509#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002510 return(ret);
2511}
Owen Taylor3473f882001-02-23 17:55:21 +00002512
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002513/**
2514 * xmlPathToURI:
2515 * @path: the resource locator in a filesystem notation
2516 *
2517 * Constructs an URI expressing the existing path
2518 *
Daniel Veillard57560382012-07-24 11:44:23 +08002519 * Returns a new URI, or a duplicate of the path parameter if the
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002520 * construction fails. The caller is responsible for freeing the memory
2521 * occupied by the returned string. If there is insufficient memory available,
2522 * or the argument is NULL, the function returns NULL.
2523 */
2524xmlChar *
2525xmlPathToURI(const xmlChar *path)
2526{
2527 xmlURIPtr uri;
2528 xmlURI temp;
2529 xmlChar *ret, *cal;
2530
2531 if (path == NULL)
2532 return(NULL);
2533
2534 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2535 xmlFreeURI(uri);
2536 return xmlStrdup(path);
2537 }
2538 cal = xmlCanonicPath(path);
2539 if (cal == NULL)
2540 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002541#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard57560382012-07-24 11:44:23 +08002542 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002543 If 'cal' is a valid URI allready then we are done here, as continuing would make
2544 it invalid. */
2545 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2546 xmlFreeURI(uri);
2547 return cal;
2548 }
2549 /* 'cal' can contain a relative path with backslashes. If that is processed
2550 by xmlSaveURI, they will be escaped and the external entity loader machinery
2551 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2552 ret = cal;
2553 while (*ret != '\0') {
2554 if (*ret == '\\')
2555 *ret = '/';
2556 ret++;
2557 }
2558#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002559 memset(&temp, 0, sizeof(temp));
2560 temp.path = (char *) cal;
2561 ret = xmlSaveUri(&temp);
2562 xmlFree(cal);
2563 return(ret);
2564}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002565#define bottom_uri
2566#include "elfgcchack.h"