blob: d4dcd2fea23985cf705c83b901c40ecefb071af3 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
Daniel Veillard57560382012-07-24 11:44:23 +08002 * uri.c: set of generic URI related routines
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
Daniel Veillard57560382012-07-24 11:44:23 +080021/**
22 * MAX_URI_LENGTH:
23 *
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
32 */
33#define MAX_URI_LENGTH 1024 * 1024
34
35static void
36xmlURIErrMemory(const char *extra)
37{
38 if (extra)
39 __xmlRaiseError(NULL, NULL, NULL,
40 NULL, NULL, XML_FROM_URI,
41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42 extra, NULL, NULL, 0, 0,
43 "Memory allocation failed : %s\n", extra);
44 else
45 __xmlRaiseError(NULL, NULL, NULL,
46 NULL, NULL, XML_FROM_URI,
47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48 NULL, NULL, NULL, 0, 0,
49 "Memory allocation failed\n");
50}
51
Daniel Veillardd7af5552008-08-04 15:29:44 +000052static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000053
54/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000055 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000056 * alpha = lowalpha | upalpha
57 */
58#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61/*
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
65 */
66
67#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69/*
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
73 */
74#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
Daniel Veillardbe3eb202004-07-09 12:05:25 +000076#ifdef IS_DIGIT
77#undef IS_DIGIT
78#endif
Owen Taylor3473f882001-02-23 17:55:21 +000079/*
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81 */
Owen Taylor3473f882001-02-23 17:55:21 +000082#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84/*
85 * alphanum = alpha | digit
86 */
87
88#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90/*
Owen Taylor3473f882001-02-23 17:55:21 +000091 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92 */
93
Daniel Veillardd7af5552008-08-04 15:29:44 +000094#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000096 ((x) == '(') || ((x) == ')'))
97
Owen Taylor3473f882001-02-23 17:55:21 +000098/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000099 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +0000100 */
101
Daniel Veillardd7af5552008-08-04 15:29:44 +0000102#define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
106/*
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108 * "[" | "]"
109 */
110
111#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +0000115
116/*
117 * unreserved = alphanum | mark
118 */
119
120#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122/*
Owen Taylor3473f882001-02-23 17:55:21 +0000123 * Skip to next pointer char, handle escaped sequences
124 */
125
126#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128/*
129 * Productions from the spec.
130 *
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
134 *
135 * path = [ abs_path | opaque_part ]
136 */
137
Daniel Veillard336a8e12005-08-07 10:46:19 +0000138#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
Owen Taylor3473f882001-02-23 17:55:21 +0000140/************************************************************************
141 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000142 * RFC 3986 parser *
143 * *
144 ************************************************************************/
145
146#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149#define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
152
153/*
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
156 */
157#define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Daniel Veillard2ee91eb2010-06-04 09:14:16 +0800161 ((*(p) == '=')) || ((*(p) == '\'')))
Daniel Veillardd7af5552008-08-04 15:29:44 +0000162
163/*
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165 */
166#define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
169 ((*(p) == '@')))
170
171/*
172 * reserved = gen-delims / sub-delims
173 */
174#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176/*
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178 */
179#define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183/*
184 * pct-encoded = "%" HEXDIG HEXDIG
185 */
186#define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189/*
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191 */
192#define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
195
196/**
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
200 *
201 * Parse an URI scheme
202 *
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204 *
205 * Returns 0 or the error code
206 */
207static int
208xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209 const char *cur;
210
211 if (str == NULL)
212 return(-1);
213
214 cur = *str;
215 if (!ISA_ALPHA(cur))
216 return(2);
217 cur++;
218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220 if (uri != NULL) {
221 if (uri->scheme != NULL) xmlFree(uri->scheme);
222 uri->scheme = STRNDUP(*str, cur - *str);
223 }
224 *str = cur;
225 return(0);
226}
227
228/**
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
232 *
233 * Parse the query part of an URI
234 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000240 *
241 * Returns 0 or the error code
242 */
243static int
244xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245{
246 const char *cur;
247
248 if (str == NULL)
249 return (-1);
250
251 cur = *str;
252
253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000254 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256 NEXT(cur);
257 if (uri != NULL) {
258 if (uri->fragment != NULL)
259 xmlFree(uri->fragment);
260 if (uri->cleanup & 2)
261 uri->fragment = STRNDUP(*str, cur - *str);
262 else
263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264 }
265 *str = cur;
266 return (0);
267}
268
269/**
270 * xmlParse3986Query:
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
273 *
274 * Parse the query part of an URI
275 *
276 * query = *uric
277 *
278 * Returns 0 or the error code
279 */
280static int
281xmlParse3986Query(xmlURIPtr uri, const char **str)
282{
283 const char *cur;
284
285 if (str == NULL)
286 return (-1);
287
288 cur = *str;
289
290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292 NEXT(cur);
293 if (uri != NULL) {
294 if (uri->query != NULL)
295 xmlFree(uri->query);
296 if (uri->cleanup & 2)
297 uri->query = STRNDUP(*str, cur - *str);
298 else
299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 */
304 if (uri->query_raw != NULL)
305 xmlFree (uri->query_raw);
306 uri->query_raw = STRNDUP (*str, cur - *str);
307 }
308 *str = cur;
309 return (0);
310}
311
312/**
313 * xmlParse3986Port:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
317 * Parse a port part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * port = *DIGIT
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Port(xmlURIPtr uri, const char **str)
326{
327 const char *cur = *str;
328
329 if (ISA_DIGIT(cur)) {
330 if (uri != NULL)
331 uri->port = 0;
332 while (ISA_DIGIT(cur)) {
333 if (uri != NULL)
334 uri->port = uri->port * 10 + (*cur - '0');
335 cur++;
336 }
337 *str = cur;
338 return(0);
339 }
340 return(1);
341}
342
343/**
344 * xmlParse3986Userinfo:
345 * @uri: pointer to an URI structure
346 * @str: the string to analyze
347 *
348 * Parse an user informations part and fills in the appropriate fields
349 * of the @uri structure
350 *
351 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
352 *
353 * Returns 0 or the error code
354 */
355static int
356xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
357{
358 const char *cur;
359
360 cur = *str;
361 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
362 ISA_SUB_DELIM(cur) || (*cur == ':'))
363 NEXT(cur);
364 if (*cur == '@') {
365 if (uri != NULL) {
366 if (uri->user != NULL) xmlFree(uri->user);
367 if (uri->cleanup & 2)
368 uri->user = STRNDUP(*str, cur - *str);
369 else
370 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
371 }
372 *str = cur;
373 return(0);
374 }
375 return(1);
376}
377
378/**
379 * xmlParse3986DecOctet:
380 * @str: the string to analyze
381 *
382 * dec-octet = DIGIT ; 0-9
383 * / %x31-39 DIGIT ; 10-99
384 * / "1" 2DIGIT ; 100-199
385 * / "2" %x30-34 DIGIT ; 200-249
386 * / "25" %x30-35 ; 250-255
387 *
388 * Skip a dec-octet.
389 *
390 * Returns 0 if found and skipped, 1 otherwise
391 */
392static int
393xmlParse3986DecOctet(const char **str) {
394 const char *cur = *str;
395
396 if (!(ISA_DIGIT(cur)))
397 return(1);
398 if (!ISA_DIGIT(cur+1))
399 cur++;
400 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
401 cur += 2;
402 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
403 cur += 3;
404 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
405 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
406 cur += 3;
407 else if ((*cur == '2') && (*(cur + 1) == '5') &&
408 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
409 cur += 3;
410 else
411 return(1);
412 *str = cur;
413 return(0);
414}
415/**
416 * xmlParse3986Host:
417 * @uri: pointer to an URI structure
418 * @str: the string to analyze
419 *
420 * Parse an host part and fills in the appropriate fields
421 * of the @uri structure
422 *
423 * host = IP-literal / IPv4address / reg-name
424 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
425 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
426 * reg-name = *( unreserved / pct-encoded / sub-delims )
427 *
428 * Returns 0 or the error code
429 */
430static int
431xmlParse3986Host(xmlURIPtr uri, const char **str)
432{
433 const char *cur = *str;
434 const char *host;
435
436 host = cur;
437 /*
438 * IPv6 and future adressing scheme are enclosed between brackets
439 */
440 if (*cur == '[') {
441 cur++;
442 while ((*cur != ']') && (*cur != 0))
443 cur++;
444 if (*cur != ']')
445 return(1);
446 cur++;
447 goto found;
448 }
449 /*
450 * try to parse an IPv4
451 */
452 if (ISA_DIGIT(cur)) {
453 if (xmlParse3986DecOctet(&cur) != 0)
454 goto not_ipv4;
455 if (*cur != '.')
456 goto not_ipv4;
457 cur++;
458 if (xmlParse3986DecOctet(&cur) != 0)
459 goto not_ipv4;
460 if (*cur != '.')
461 goto not_ipv4;
462 if (xmlParse3986DecOctet(&cur) != 0)
463 goto not_ipv4;
464 if (*cur != '.')
465 goto not_ipv4;
466 if (xmlParse3986DecOctet(&cur) != 0)
467 goto not_ipv4;
468 goto found;
469not_ipv4:
470 cur = *str;
471 }
472 /*
473 * then this should be a hostname which can be empty
474 */
475 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
476 NEXT(cur);
477found:
478 if (uri != NULL) {
479 if (uri->authority != NULL) xmlFree(uri->authority);
480 uri->authority = NULL;
481 if (uri->server != NULL) xmlFree(uri->server);
482 if (cur != host) {
483 if (uri->cleanup & 2)
484 uri->server = STRNDUP(host, cur - host);
485 else
486 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
487 } else
488 uri->server = NULL;
489 }
490 *str = cur;
491 return(0);
492}
493
494/**
495 * xmlParse3986Authority:
496 * @uri: pointer to an URI structure
497 * @str: the string to analyze
498 *
499 * Parse an authority part and fills in the appropriate fields
500 * of the @uri structure
501 *
502 * authority = [ userinfo "@" ] host [ ":" port ]
503 *
504 * Returns 0 or the error code
505 */
506static int
507xmlParse3986Authority(xmlURIPtr uri, const char **str)
508{
509 const char *cur;
510 int ret;
511
512 cur = *str;
513 /*
514 * try to parse an userinfo and check for the trailing @
515 */
516 ret = xmlParse3986Userinfo(uri, &cur);
517 if ((ret != 0) || (*cur != '@'))
518 cur = *str;
519 else
520 cur++;
521 ret = xmlParse3986Host(uri, &cur);
522 if (ret != 0) return(ret);
523 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000524 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000525 ret = xmlParse3986Port(uri, &cur);
526 if (ret != 0) return(ret);
527 }
528 *str = cur;
529 return(0);
530}
531
532/**
533 * xmlParse3986Segment:
534 * @str: the string to analyze
535 * @forbid: an optional forbidden character
536 * @empty: allow an empty segment
537 *
538 * Parse a segment and fills in the appropriate fields
539 * of the @uri structure
540 *
541 * segment = *pchar
542 * segment-nz = 1*pchar
543 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544 * ; non-zero-length segment without any colon ":"
545 *
546 * Returns 0 or the error code
547 */
548static int
549xmlParse3986Segment(const char **str, char forbid, int empty)
550{
551 const char *cur;
552
553 cur = *str;
554 if (!ISA_PCHAR(cur)) {
555 if (empty)
556 return(0);
557 return(1);
558 }
559 while (ISA_PCHAR(cur) && (*cur != forbid))
560 NEXT(cur);
561 *str = cur;
562 return (0);
563}
564
565/**
566 * xmlParse3986PathAbEmpty:
567 * @uri: pointer to an URI structure
568 * @str: the string to analyze
569 *
570 * Parse an path absolute or empty and fills in the appropriate fields
571 * of the @uri structure
572 *
573 * path-abempty = *( "/" segment )
574 *
575 * Returns 0 or the error code
576 */
577static int
578xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
579{
580 const char *cur;
581 int ret;
582
583 cur = *str;
584
585 while (*cur == '/') {
586 cur++;
587 ret = xmlParse3986Segment(&cur, 0, 1);
588 if (ret != 0) return(ret);
589 }
590 if (uri != NULL) {
591 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200592 if (*str != cur) {
593 if (uri->cleanup & 2)
594 uri->path = STRNDUP(*str, cur - *str);
595 else
596 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
597 } else {
598 uri->path = NULL;
599 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000600 }
601 *str = cur;
602 return (0);
603}
604
605/**
606 * xmlParse3986PathAbsolute:
607 * @uri: pointer to an URI structure
608 * @str: the string to analyze
609 *
610 * Parse an path absolute and fills in the appropriate fields
611 * of the @uri structure
612 *
613 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
614 *
615 * Returns 0 or the error code
616 */
617static int
618xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
619{
620 const char *cur;
621 int ret;
622
623 cur = *str;
624
625 if (*cur != '/')
626 return(1);
627 cur++;
628 ret = xmlParse3986Segment(&cur, 0, 0);
629 if (ret == 0) {
630 while (*cur == '/') {
631 cur++;
632 ret = xmlParse3986Segment(&cur, 0, 1);
633 if (ret != 0) return(ret);
634 }
635 }
636 if (uri != NULL) {
637 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200638 if (cur != *str) {
639 if (uri->cleanup & 2)
640 uri->path = STRNDUP(*str, cur - *str);
641 else
642 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
643 } else {
644 uri->path = NULL;
645 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000646 }
647 *str = cur;
648 return (0);
649}
650
651/**
652 * xmlParse3986PathRootless:
653 * @uri: pointer to an URI structure
654 * @str: the string to analyze
655 *
656 * Parse an path without root and fills in the appropriate fields
657 * of the @uri structure
658 *
659 * path-rootless = segment-nz *( "/" segment )
660 *
661 * Returns 0 or the error code
662 */
663static int
664xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
665{
666 const char *cur;
667 int ret;
668
669 cur = *str;
670
671 ret = xmlParse3986Segment(&cur, 0, 0);
672 if (ret != 0) return(ret);
673 while (*cur == '/') {
674 cur++;
675 ret = xmlParse3986Segment(&cur, 0, 1);
676 if (ret != 0) return(ret);
677 }
678 if (uri != NULL) {
679 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200680 if (cur != *str) {
681 if (uri->cleanup & 2)
682 uri->path = STRNDUP(*str, cur - *str);
683 else
684 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
685 } else {
686 uri->path = NULL;
687 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000688 }
689 *str = cur;
690 return (0);
691}
692
693/**
694 * xmlParse3986PathNoScheme:
695 * @uri: pointer to an URI structure
696 * @str: the string to analyze
697 *
698 * Parse an path which is not a scheme and fills in the appropriate fields
699 * of the @uri structure
700 *
701 * path-noscheme = segment-nz-nc *( "/" segment )
702 *
703 * Returns 0 or the error code
704 */
705static int
706xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
707{
708 const char *cur;
709 int ret;
710
711 cur = *str;
712
713 ret = xmlParse3986Segment(&cur, ':', 0);
714 if (ret != 0) return(ret);
715 while (*cur == '/') {
716 cur++;
717 ret = xmlParse3986Segment(&cur, 0, 1);
718 if (ret != 0) return(ret);
719 }
720 if (uri != NULL) {
721 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200722 if (cur != *str) {
723 if (uri->cleanup & 2)
724 uri->path = STRNDUP(*str, cur - *str);
725 else
726 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
727 } else {
728 uri->path = NULL;
729 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000730 }
731 *str = cur;
732 return (0);
733}
734
735/**
736 * xmlParse3986HierPart:
737 * @uri: pointer to an URI structure
738 * @str: the string to analyze
739 *
740 * Parse an hierarchical part and fills in the appropriate fields
741 * of the @uri structure
742 *
743 * hier-part = "//" authority path-abempty
744 * / path-absolute
745 * / path-rootless
746 * / path-empty
747 *
748 * Returns 0 or the error code
749 */
750static int
751xmlParse3986HierPart(xmlURIPtr uri, const char **str)
752{
753 const char *cur;
754 int ret;
755
756 cur = *str;
757
758 if ((*cur == '/') && (*(cur + 1) == '/')) {
759 cur += 2;
760 ret = xmlParse3986Authority(uri, &cur);
761 if (ret != 0) return(ret);
762 ret = xmlParse3986PathAbEmpty(uri, &cur);
763 if (ret != 0) return(ret);
764 *str = cur;
765 return(0);
766 } else if (*cur == '/') {
767 ret = xmlParse3986PathAbsolute(uri, &cur);
768 if (ret != 0) return(ret);
769 } else if (ISA_PCHAR(cur)) {
770 ret = xmlParse3986PathRootless(uri, &cur);
771 if (ret != 0) return(ret);
772 } else {
773 /* path-empty is effectively empty */
774 if (uri != NULL) {
775 if (uri->path != NULL) xmlFree(uri->path);
776 uri->path = NULL;
777 }
778 }
779 *str = cur;
780 return (0);
781}
782
783/**
784 * xmlParse3986RelativeRef:
785 * @uri: pointer to an URI structure
786 * @str: the string to analyze
787 *
788 * Parse an URI string and fills in the appropriate fields
789 * of the @uri structure
790 *
791 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
792 * relative-part = "//" authority path-abempty
793 * / path-absolute
794 * / path-noscheme
795 * / path-empty
796 *
797 * Returns 0 or the error code
798 */
799static int
800xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
801 int ret;
802
803 if ((*str == '/') && (*(str + 1) == '/')) {
804 str += 2;
805 ret = xmlParse3986Authority(uri, &str);
806 if (ret != 0) return(ret);
807 ret = xmlParse3986PathAbEmpty(uri, &str);
808 if (ret != 0) return(ret);
809 } else if (*str == '/') {
810 ret = xmlParse3986PathAbsolute(uri, &str);
811 if (ret != 0) return(ret);
812 } else if (ISA_PCHAR(str)) {
813 ret = xmlParse3986PathNoScheme(uri, &str);
814 if (ret != 0) return(ret);
815 } else {
816 /* path-empty is effectively empty */
817 if (uri != NULL) {
818 if (uri->path != NULL) xmlFree(uri->path);
819 uri->path = NULL;
820 }
821 }
822
823 if (*str == '?') {
824 str++;
825 ret = xmlParse3986Query(uri, &str);
826 if (ret != 0) return(ret);
827 }
828 if (*str == '#') {
829 str++;
830 ret = xmlParse3986Fragment(uri, &str);
831 if (ret != 0) return(ret);
832 }
833 if (*str != 0) {
834 xmlCleanURI(uri);
835 return(1);
836 }
837 return(0);
838}
839
840
841/**
842 * xmlParse3986URI:
843 * @uri: pointer to an URI structure
844 * @str: the string to analyze
845 *
846 * Parse an URI string and fills in the appropriate fields
847 * of the @uri structure
848 *
849 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
850 *
851 * Returns 0 or the error code
852 */
853static int
854xmlParse3986URI(xmlURIPtr uri, const char *str) {
855 int ret;
856
857 ret = xmlParse3986Scheme(uri, &str);
858 if (ret != 0) return(ret);
859 if (*str != ':') {
860 return(1);
861 }
862 str++;
863 ret = xmlParse3986HierPart(uri, &str);
864 if (ret != 0) return(ret);
865 if (*str == '?') {
866 str++;
867 ret = xmlParse3986Query(uri, &str);
868 if (ret != 0) return(ret);
869 }
870 if (*str == '#') {
871 str++;
872 ret = xmlParse3986Fragment(uri, &str);
873 if (ret != 0) return(ret);
874 }
875 if (*str != 0) {
876 xmlCleanURI(uri);
877 return(1);
878 }
879 return(0);
880}
881
882/**
883 * xmlParse3986URIReference:
884 * @uri: pointer to an URI structure
885 * @str: the string to analyze
886 *
887 * Parse an URI reference string and fills in the appropriate fields
888 * of the @uri structure
889 *
890 * URI-reference = URI / relative-ref
891 *
892 * Returns 0 or the error code
893 */
894static int
895xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
896 int ret;
897
898 if (str == NULL)
899 return(-1);
900 xmlCleanURI(uri);
901
902 /*
903 * Try first to parse absolute refs, then fallback to relative if
904 * it fails.
905 */
906 ret = xmlParse3986URI(uri, str);
907 if (ret != 0) {
908 xmlCleanURI(uri);
909 ret = xmlParse3986RelativeRef(uri, str);
910 if (ret != 0) {
911 xmlCleanURI(uri);
912 return(ret);
913 }
914 }
915 return(0);
916}
917
918/**
919 * xmlParseURI:
920 * @str: the URI string to analyze
921 *
922 * Parse an URI based on RFC 3986
923 *
924 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
925 *
926 * Returns a newly built xmlURIPtr or NULL in case of error
927 */
928xmlURIPtr
929xmlParseURI(const char *str) {
930 xmlURIPtr uri;
931 int ret;
932
933 if (str == NULL)
934 return(NULL);
935 uri = xmlCreateURI();
936 if (uri != NULL) {
937 ret = xmlParse3986URIReference(uri, str);
938 if (ret) {
939 xmlFreeURI(uri);
940 return(NULL);
941 }
942 }
943 return(uri);
944}
945
946/**
947 * xmlParseURIReference:
948 * @uri: pointer to an URI structure
949 * @str: the string to analyze
950 *
951 * Parse an URI reference string based on RFC 3986 and fills in the
952 * appropriate fields of the @uri structure
953 *
954 * URI-reference = URI / relative-ref
955 *
956 * Returns 0 or the error code
957 */
958int
959xmlParseURIReference(xmlURIPtr uri, const char *str) {
960 return(xmlParse3986URIReference(uri, str));
961}
962
963/**
964 * xmlParseURIRaw:
965 * @str: the URI string to analyze
966 * @raw: if 1 unescaping of URI pieces are disabled
967 *
968 * Parse an URI but allows to keep intact the original fragments.
969 *
970 * URI-reference = URI / relative-ref
971 *
972 * Returns a newly built xmlURIPtr or NULL in case of error
973 */
974xmlURIPtr
975xmlParseURIRaw(const char *str, int raw) {
976 xmlURIPtr uri;
977 int ret;
978
979 if (str == NULL)
980 return(NULL);
981 uri = xmlCreateURI();
982 if (uri != NULL) {
983 if (raw) {
984 uri->cleanup |= 2;
985 }
986 ret = xmlParseURIReference(uri, str);
987 if (ret) {
988 xmlFreeURI(uri);
989 return(NULL);
990 }
991 }
992 return(uri);
993}
994
995/************************************************************************
996 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000997 * Generic URI structure functions *
998 * *
999 ************************************************************************/
1000
1001/**
1002 * xmlCreateURI:
1003 *
1004 * Simply creates an empty xmlURI
1005 *
1006 * Returns the new structure or NULL in case of error
1007 */
1008xmlURIPtr
1009xmlCreateURI(void) {
1010 xmlURIPtr ret;
1011
1012 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1013 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001014 xmlURIErrMemory("creating URI structure\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001015 return(NULL);
1016 }
1017 memset(ret, 0, sizeof(xmlURI));
1018 return(ret);
1019}
1020
1021/**
Daniel Veillard57560382012-07-24 11:44:23 +08001022 * xmlSaveUriRealloc:
1023 *
1024 * Function to handle properly a reallocation when saving an URI
1025 * Also imposes some limit on the length of an URI string output
1026 */
1027static xmlChar *
1028xmlSaveUriRealloc(xmlChar *ret, int *max) {
1029 xmlChar *temp;
1030 int tmp;
1031
1032 if (*max > MAX_URI_LENGTH) {
1033 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1034 return(NULL);
1035 }
1036 tmp = *max * 2;
1037 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1038 if (temp == NULL) {
1039 xmlURIErrMemory("saving URI\n");
1040 return(NULL);
1041 }
1042 *max = tmp;
1043 return(temp);
1044}
1045
1046/**
Owen Taylor3473f882001-02-23 17:55:21 +00001047 * xmlSaveUri:
1048 * @uri: pointer to an xmlURI
1049 *
1050 * Save the URI as an escaped string
1051 *
1052 * Returns a new string (to be deallocated by caller)
1053 */
1054xmlChar *
1055xmlSaveUri(xmlURIPtr uri) {
1056 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001057 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001058 const char *p;
1059 int len;
1060 int max;
1061
1062 if (uri == NULL) return(NULL);
1063
1064
1065 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001066 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001067 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001068 xmlURIErrMemory("saving URI\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001069 return(NULL);
1070 }
1071 len = 0;
1072
1073 if (uri->scheme != NULL) {
1074 p = uri->scheme;
1075 while (*p != 0) {
1076 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001077 temp = xmlSaveUriRealloc(ret, &max);
1078 if (temp == NULL) goto mem_error;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001079 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001080 }
1081 ret[len++] = *p++;
1082 }
1083 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001084 temp = xmlSaveUriRealloc(ret, &max);
1085 if (temp == NULL) goto mem_error;
1086 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001087 }
1088 ret[len++] = ':';
1089 }
1090 if (uri->opaque != NULL) {
1091 p = uri->opaque;
1092 while (*p != 0) {
1093 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001094 temp = xmlSaveUriRealloc(ret, &max);
1095 if (temp == NULL) goto mem_error;
1096 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001097 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001098 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001099 ret[len++] = *p++;
1100 else {
1101 int val = *(unsigned char *)p++;
1102 int hi = val / 0x10, lo = val % 0x10;
1103 ret[len++] = '%';
1104 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1105 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1106 }
1107 }
Owen Taylor3473f882001-02-23 17:55:21 +00001108 } else {
1109 if (uri->server != NULL) {
1110 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001111 temp = xmlSaveUriRealloc(ret, &max);
1112 if (temp == NULL) goto mem_error;
1113 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001114 }
1115 ret[len++] = '/';
1116 ret[len++] = '/';
1117 if (uri->user != NULL) {
1118 p = uri->user;
1119 while (*p != 0) {
1120 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001121 temp = xmlSaveUriRealloc(ret, &max);
1122 if (temp == NULL) goto mem_error;
1123 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001124 }
1125 if ((IS_UNRESERVED(*(p))) ||
1126 ((*(p) == ';')) || ((*(p) == ':')) ||
1127 ((*(p) == '&')) || ((*(p) == '=')) ||
1128 ((*(p) == '+')) || ((*(p) == '$')) ||
1129 ((*(p) == ',')))
1130 ret[len++] = *p++;
1131 else {
1132 int val = *(unsigned char *)p++;
1133 int hi = val / 0x10, lo = val % 0x10;
1134 ret[len++] = '%';
1135 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1136 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1137 }
1138 }
1139 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001140 temp = xmlSaveUriRealloc(ret, &max);
1141 if (temp == NULL) goto mem_error;
1142 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001143 }
1144 ret[len++] = '@';
1145 }
1146 p = uri->server;
1147 while (*p != 0) {
1148 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001149 temp = xmlSaveUriRealloc(ret, &max);
1150 if (temp == NULL) goto mem_error;
1151 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001152 }
1153 ret[len++] = *p++;
1154 }
1155 if (uri->port > 0) {
1156 if (len + 10 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001157 temp = xmlSaveUriRealloc(ret, &max);
1158 if (temp == NULL) goto mem_error;
1159 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001160 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +00001161 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001162 }
1163 } else if (uri->authority != NULL) {
1164 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001165 temp = xmlSaveUriRealloc(ret, &max);
1166 if (temp == NULL) goto mem_error;
1167 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001168 }
1169 ret[len++] = '/';
1170 ret[len++] = '/';
1171 p = uri->authority;
1172 while (*p != 0) {
1173 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001174 temp = xmlSaveUriRealloc(ret, &max);
1175 if (temp == NULL) goto mem_error;
1176 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001177 }
1178 if ((IS_UNRESERVED(*(p))) ||
1179 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1180 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1181 ((*(p) == '=')) || ((*(p) == '+')))
1182 ret[len++] = *p++;
1183 else {
1184 int val = *(unsigned char *)p++;
1185 int hi = val / 0x10, lo = val % 0x10;
1186 ret[len++] = '%';
1187 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1188 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1189 }
1190 }
1191 } else if (uri->scheme != NULL) {
1192 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001193 temp = xmlSaveUriRealloc(ret, &max);
1194 if (temp == NULL) goto mem_error;
1195 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001196 }
Owen Taylor3473f882001-02-23 17:55:21 +00001197 }
1198 if (uri->path != NULL) {
1199 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001200 /*
1201 * the colon in file:///d: should not be escaped or
1202 * Windows accesses fail later.
1203 */
1204 if ((uri->scheme != NULL) &&
1205 (p[0] == '/') &&
1206 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1207 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1208 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001209 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001210 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001211 temp = xmlSaveUriRealloc(ret, &max);
1212 if (temp == NULL) goto mem_error;
1213 ret = temp;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001214 }
1215 ret[len++] = *p++;
1216 ret[len++] = *p++;
1217 ret[len++] = *p++;
1218 }
Owen Taylor3473f882001-02-23 17:55:21 +00001219 while (*p != 0) {
1220 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001221 temp = xmlSaveUriRealloc(ret, &max);
1222 if (temp == NULL) goto mem_error;
1223 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001224 }
1225 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1226 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1227 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1228 ((*(p) == ',')))
1229 ret[len++] = *p++;
1230 else {
1231 int val = *(unsigned char *)p++;
1232 int hi = val / 0x10, lo = val % 0x10;
1233 ret[len++] = '%';
1234 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1235 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1236 }
1237 }
1238 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001239 if (uri->query_raw != NULL) {
1240 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001241 temp = xmlSaveUriRealloc(ret, &max);
1242 if (temp == NULL) goto mem_error;
1243 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001244 }
1245 ret[len++] = '?';
1246 p = uri->query_raw;
1247 while (*p != 0) {
1248 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001249 temp = xmlSaveUriRealloc(ret, &max);
1250 if (temp == NULL) goto mem_error;
1251 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001252 }
1253 ret[len++] = *p++;
1254 }
1255 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001256 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001257 temp = xmlSaveUriRealloc(ret, &max);
1258 if (temp == NULL) goto mem_error;
1259 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001260 }
1261 ret[len++] = '?';
1262 p = uri->query;
1263 while (*p != 0) {
1264 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001265 temp = xmlSaveUriRealloc(ret, &max);
1266 if (temp == NULL) goto mem_error;
1267 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001268 }
Daniel Veillard57560382012-07-24 11:44:23 +08001269 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Owen Taylor3473f882001-02-23 17:55:21 +00001270 ret[len++] = *p++;
1271 else {
1272 int val = *(unsigned char *)p++;
1273 int hi = val / 0x10, lo = val % 0x10;
1274 ret[len++] = '%';
1275 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1276 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1277 }
1278 }
1279 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001280 }
1281 if (uri->fragment != NULL) {
1282 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001283 temp = xmlSaveUriRealloc(ret, &max);
1284 if (temp == NULL) goto mem_error;
1285 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001286 }
1287 ret[len++] = '#';
1288 p = uri->fragment;
1289 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001290 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001291 temp = xmlSaveUriRealloc(ret, &max);
1292 if (temp == NULL) goto mem_error;
1293 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001294 }
Daniel Veillard57560382012-07-24 11:44:23 +08001295 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001296 ret[len++] = *p++;
1297 else {
1298 int val = *(unsigned char *)p++;
1299 int hi = val / 0x10, lo = val % 0x10;
1300 ret[len++] = '%';
1301 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1302 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001303 }
1304 }
Owen Taylor3473f882001-02-23 17:55:21 +00001305 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001306 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001307 temp = xmlSaveUriRealloc(ret, &max);
1308 if (temp == NULL) goto mem_error;
1309 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001310 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02001311 ret[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001312 return(ret);
Daniel Veillard57560382012-07-24 11:44:23 +08001313
1314mem_error:
1315 xmlFree(ret);
1316 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001317}
1318
1319/**
1320 * xmlPrintURI:
1321 * @stream: a FILE* for the output
1322 * @uri: pointer to an xmlURI
1323 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001324 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001325 */
1326void
1327xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1328 xmlChar *out;
1329
1330 out = xmlSaveUri(uri);
1331 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001332 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001333 xmlFree(out);
1334 }
1335}
1336
1337/**
1338 * xmlCleanURI:
1339 * @uri: pointer to an xmlURI
1340 *
1341 * Make sure the xmlURI struct is free of content
1342 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001343static void
Owen Taylor3473f882001-02-23 17:55:21 +00001344xmlCleanURI(xmlURIPtr uri) {
1345 if (uri == NULL) return;
1346
1347 if (uri->scheme != NULL) xmlFree(uri->scheme);
1348 uri->scheme = NULL;
1349 if (uri->server != NULL) xmlFree(uri->server);
1350 uri->server = NULL;
1351 if (uri->user != NULL) xmlFree(uri->user);
1352 uri->user = NULL;
1353 if (uri->path != NULL) xmlFree(uri->path);
1354 uri->path = NULL;
1355 if (uri->fragment != NULL) xmlFree(uri->fragment);
1356 uri->fragment = NULL;
1357 if (uri->opaque != NULL) xmlFree(uri->opaque);
1358 uri->opaque = NULL;
1359 if (uri->authority != NULL) xmlFree(uri->authority);
1360 uri->authority = NULL;
1361 if (uri->query != NULL) xmlFree(uri->query);
1362 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001363 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1364 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001365}
1366
1367/**
1368 * xmlFreeURI:
1369 * @uri: pointer to an xmlURI
1370 *
1371 * Free up the xmlURI struct
1372 */
1373void
1374xmlFreeURI(xmlURIPtr uri) {
1375 if (uri == NULL) return;
1376
1377 if (uri->scheme != NULL) xmlFree(uri->scheme);
1378 if (uri->server != NULL) xmlFree(uri->server);
1379 if (uri->user != NULL) xmlFree(uri->user);
1380 if (uri->path != NULL) xmlFree(uri->path);
1381 if (uri->fragment != NULL) xmlFree(uri->fragment);
1382 if (uri->opaque != NULL) xmlFree(uri->opaque);
1383 if (uri->authority != NULL) xmlFree(uri->authority);
1384 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001385 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001386 xmlFree(uri);
1387}
1388
1389/************************************************************************
1390 * *
1391 * Helper functions *
1392 * *
1393 ************************************************************************/
1394
Owen Taylor3473f882001-02-23 17:55:21 +00001395/**
1396 * xmlNormalizeURIPath:
1397 * @path: pointer to the path string
1398 *
1399 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1400 * Section 5.2, steps 6.c through 6.g.
1401 *
1402 * Normalization occurs directly on the string, no new allocation is done
1403 *
1404 * Returns 0 or an error code
1405 */
1406int
1407xmlNormalizeURIPath(char *path) {
1408 char *cur, *out;
1409
1410 if (path == NULL)
1411 return(-1);
1412
1413 /* Skip all initial "/" chars. We want to get to the beginning of the
1414 * first non-empty segment.
1415 */
1416 cur = path;
1417 while (cur[0] == '/')
1418 ++cur;
1419 if (cur[0] == '\0')
1420 return(0);
1421
1422 /* Keep everything we've seen so far. */
1423 out = cur;
1424
1425 /*
1426 * Analyze each segment in sequence for cases (c) and (d).
1427 */
1428 while (cur[0] != '\0') {
1429 /*
1430 * c) All occurrences of "./", where "." is a complete path segment,
1431 * are removed from the buffer string.
1432 */
1433 if ((cur[0] == '.') && (cur[1] == '/')) {
1434 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001435 /* '//' normalization should be done at this point too */
1436 while (cur[0] == '/')
1437 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001438 continue;
1439 }
1440
1441 /*
1442 * d) If the buffer string ends with "." as a complete path segment,
1443 * that "." is removed.
1444 */
1445 if ((cur[0] == '.') && (cur[1] == '\0'))
1446 break;
1447
1448 /* Otherwise keep the segment. */
1449 while (cur[0] != '/') {
1450 if (cur[0] == '\0')
1451 goto done_cd;
1452 (out++)[0] = (cur++)[0];
1453 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001454 /* nomalize // */
1455 while ((cur[0] == '/') && (cur[1] == '/'))
1456 cur++;
1457
Owen Taylor3473f882001-02-23 17:55:21 +00001458 (out++)[0] = (cur++)[0];
1459 }
1460 done_cd:
1461 out[0] = '\0';
1462
1463 /* Reset to the beginning of the first segment for the next sequence. */
1464 cur = path;
1465 while (cur[0] == '/')
1466 ++cur;
1467 if (cur[0] == '\0')
1468 return(0);
1469
1470 /*
1471 * Analyze each segment in sequence for cases (e) and (f).
1472 *
1473 * e) All occurrences of "<segment>/../", where <segment> is a
1474 * complete path segment not equal to "..", are removed from the
1475 * buffer string. Removal of these path segments is performed
1476 * iteratively, removing the leftmost matching pattern on each
1477 * iteration, until no matching pattern remains.
1478 *
1479 * f) If the buffer string ends with "<segment>/..", where <segment>
1480 * is a complete path segment not equal to "..", that
1481 * "<segment>/.." is removed.
1482 *
1483 * To satisfy the "iterative" clause in (e), we need to collapse the
1484 * string every time we find something that needs to be removed. Thus,
1485 * we don't need to keep two pointers into the string: we only need a
1486 * "current position" pointer.
1487 */
1488 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001489 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001490
1491 /* At the beginning of each iteration of this loop, "cur" points to
1492 * the first character of the segment we want to examine.
1493 */
1494
1495 /* Find the end of the current segment. */
1496 segp = cur;
1497 while ((segp[0] != '/') && (segp[0] != '\0'))
1498 ++segp;
1499
1500 /* If this is the last segment, we're done (we need at least two
1501 * segments to meet the criteria for the (e) and (f) cases).
1502 */
1503 if (segp[0] == '\0')
1504 break;
1505
1506 /* If the first segment is "..", or if the next segment _isn't_ "..",
1507 * keep this segment and try the next one.
1508 */
1509 ++segp;
1510 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1511 || ((segp[0] != '.') || (segp[1] != '.')
1512 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1513 cur = segp;
1514 continue;
1515 }
1516
1517 /* If we get here, remove this segment and the next one and back up
1518 * to the previous segment (if there is one), to implement the
1519 * "iteratively" clause. It's pretty much impossible to back up
1520 * while maintaining two pointers into the buffer, so just compact
1521 * the whole buffer now.
1522 */
1523
1524 /* If this is the end of the buffer, we're done. */
1525 if (segp[2] == '\0') {
1526 cur[0] = '\0';
1527 break;
1528 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001529 /* Valgrind complained, strcpy(cur, segp + 3); */
Nico Webercedf84d2012-03-05 16:36:59 +08001530 /* string will overlap, do not use strcpy */
1531 tmp = cur;
1532 segp += 3;
1533 while ((*tmp++ = *segp++) != 0)
1534 ;
Owen Taylor3473f882001-02-23 17:55:21 +00001535
1536 /* If there are no previous segments, then keep going from here. */
1537 segp = cur;
1538 while ((segp > path) && ((--segp)[0] == '/'))
1539 ;
1540 if (segp == path)
1541 continue;
1542
1543 /* "segp" is pointing to the end of a previous segment; find it's
1544 * start. We need to back up to the previous segment and start
1545 * over with that to handle things like "foo/bar/../..". If we
1546 * don't do this, then on the first pass we'll remove the "bar/..",
1547 * but be pointing at the second ".." so we won't realize we can also
1548 * remove the "foo/..".
1549 */
1550 cur = segp;
1551 while ((cur > path) && (cur[-1] != '/'))
1552 --cur;
1553 }
1554 out[0] = '\0';
1555
1556 /*
1557 * g) If the resulting buffer string still begins with one or more
1558 * complete path segments of "..", then the reference is
1559 * considered to be in error. Implementations may handle this
1560 * error by retaining these components in the resolved path (i.e.,
1561 * treating them as part of the final URI), by removing them from
1562 * the resolved path (i.e., discarding relative levels above the
1563 * root), or by avoiding traversal of the reference.
1564 *
1565 * We discard them from the final path.
1566 */
1567 if (path[0] == '/') {
1568 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001569 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001570 && ((cur[3] == '/') || (cur[3] == '\0')))
1571 cur += 3;
1572
1573 if (cur != path) {
1574 out = path;
1575 while (cur[0] != '\0')
1576 (out++)[0] = (cur++)[0];
1577 out[0] = 0;
1578 }
1579 }
1580
1581 return(0);
1582}
Owen Taylor3473f882001-02-23 17:55:21 +00001583
Daniel Veillard966a31e2004-05-09 02:58:44 +00001584static int is_hex(char c) {
1585 if (((c >= '0') && (c <= '9')) ||
1586 ((c >= 'a') && (c <= 'f')) ||
1587 ((c >= 'A') && (c <= 'F')))
1588 return(1);
1589 return(0);
1590}
1591
Owen Taylor3473f882001-02-23 17:55:21 +00001592/**
1593 * xmlURIUnescapeString:
1594 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001595 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001596 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001597 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001598 * Unescaping routine, but does not check that the string is an URI. The
1599 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001600 * Note that the length of the result can only be smaller or same size as
1601 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001602 *
Daniel Veillard79187652007-04-24 10:19:52 +00001603 * Returns a copy of the string, but unescaped, will return NULL only in case
1604 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001605 */
1606char *
1607xmlURIUnescapeString(const char *str, int len, char *target) {
1608 char *ret, *out;
1609 const char *in;
1610
1611 if (str == NULL)
1612 return(NULL);
1613 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001614 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001615
1616 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001617 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001618 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001619 xmlURIErrMemory("unescaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001620 return(NULL);
1621 }
1622 } else
1623 ret = target;
1624 in = str;
1625 out = ret;
1626 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001627 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001628 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001629 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001630 *out = (*in - '0');
1631 else if ((*in >= 'a') && (*in <= 'f'))
1632 *out = (*in - 'a') + 10;
1633 else if ((*in >= 'A') && (*in <= 'F'))
1634 *out = (*in - 'A') + 10;
1635 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001636 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001637 *out = *out * 16 + (*in - '0');
1638 else if ((*in >= 'a') && (*in <= 'f'))
1639 *out = *out * 16 + (*in - 'a') + 10;
1640 else if ((*in >= 'A') && (*in <= 'F'))
1641 *out = *out * 16 + (*in - 'A') + 10;
1642 in++;
1643 len -= 3;
1644 out++;
1645 } else {
1646 *out++ = *in++;
1647 len--;
1648 }
1649 }
1650 *out = 0;
1651 return(ret);
1652}
1653
1654/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001655 * xmlURIEscapeStr:
1656 * @str: string to escape
1657 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001658 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001659 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1660 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001661 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001662 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001663 */
1664xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001665xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1666 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001667 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001668 const xmlChar *in;
Daniel Veillard57560382012-07-24 11:44:23 +08001669 int len, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001670
1671 if (str == NULL)
1672 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001673 if (str[0] == 0)
1674 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001675 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001676 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001677
1678 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001679 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001680 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001681 xmlURIErrMemory("escaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001682 return(NULL);
1683 }
1684 in = (const xmlChar *) str;
1685 out = 0;
1686 while(*in != 0) {
1687 if (len - out <= 3) {
Daniel Veillard57560382012-07-24 11:44:23 +08001688 temp = xmlSaveUriRealloc(ret, &len);
Daniel Veillarded86dc22008-04-24 11:58:41 +00001689 if (temp == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001690 xmlURIErrMemory("escaping URI value\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001691 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001692 return(NULL);
1693 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001694 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001695 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001696
1697 ch = *in;
1698
Daniel Veillardeb475a32002-04-14 22:00:22 +00001699 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001700 unsigned char val;
1701 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001702 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001703 if (val <= 9)
1704 ret[out++] = '0' + val;
1705 else
1706 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001707 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001708 if (val <= 9)
1709 ret[out++] = '0' + val;
1710 else
1711 ret[out++] = 'A' + val - 0xA;
1712 in++;
1713 } else {
1714 ret[out++] = *in++;
1715 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001716
Owen Taylor3473f882001-02-23 17:55:21 +00001717 }
1718 ret[out] = 0;
1719 return(ret);
1720}
1721
Daniel Veillard8514c672001-05-23 10:29:12 +00001722/**
1723 * xmlURIEscape:
1724 * @str: the string of the URI to escape
1725 *
1726 * Escaping routine, does not do validity checks !
1727 * It will try to escape the chars needing this, but this is heuristic
1728 * based it's impossible to be sure.
1729 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001730 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001731 *
1732 * 25 May 2001
1733 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1734 * according to RFC2396.
1735 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001736 */
1737xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001738xmlURIEscape(const xmlChar * str)
1739{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001740 xmlChar *ret, *segment = NULL;
1741 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001742 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001743
Daniel Veillard6278fb52001-05-25 07:38:41 +00001744#define NULLCHK(p) if(!p) { \
Daniel Veillard57560382012-07-24 11:44:23 +08001745 xmlURIErrMemory("escaping URI value\n"); \
1746 xmlFreeURI(uri); \
1747 return NULL; } \
Daniel Veillard6278fb52001-05-25 07:38:41 +00001748
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001749 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001750 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001751
1752 uri = xmlCreateURI();
1753 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001754 /*
1755 * Allow escaping errors in the unescaped form
1756 */
1757 uri->cleanup = 1;
1758 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001759 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001760 xmlFreeURI(uri);
1761 return (NULL);
1762 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001763 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001764
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001765 if (!uri)
1766 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001767
1768 ret = NULL;
1769
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001770 if (uri->scheme) {
1771 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1772 NULLCHK(segment)
1773 ret = xmlStrcat(ret, segment);
1774 ret = xmlStrcat(ret, BAD_CAST ":");
1775 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001776 }
1777
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001778 if (uri->authority) {
1779 segment =
1780 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1781 NULLCHK(segment)
1782 ret = xmlStrcat(ret, BAD_CAST "//");
1783 ret = xmlStrcat(ret, segment);
1784 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001785 }
1786
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001787 if (uri->user) {
1788 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1789 NULLCHK(segment)
Daniel Veillard57560382012-07-24 11:44:23 +08001790 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001791 ret = xmlStrcat(ret, segment);
1792 ret = xmlStrcat(ret, BAD_CAST "@");
1793 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001794 }
1795
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001796 if (uri->server) {
1797 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1798 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001799 if (uri->user == NULL)
Daniel Veillardd7af5552008-08-04 15:29:44 +00001800 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001801 ret = xmlStrcat(ret, segment);
1802 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001803 }
1804
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001805 if (uri->port) {
1806 xmlChar port[10];
1807
Daniel Veillard43d3f612001-11-10 11:57:23 +00001808 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001809 ret = xmlStrcat(ret, BAD_CAST ":");
1810 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001811 }
1812
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001813 if (uri->path) {
1814 segment =
1815 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1816 NULLCHK(segment)
1817 ret = xmlStrcat(ret, segment);
1818 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001819 }
1820
Daniel Veillarda1413b82007-04-26 08:33:28 +00001821 if (uri->query_raw) {
1822 ret = xmlStrcat(ret, BAD_CAST "?");
1823 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1824 }
1825 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001826 segment =
1827 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1828 NULLCHK(segment)
1829 ret = xmlStrcat(ret, BAD_CAST "?");
1830 ret = xmlStrcat(ret, segment);
1831 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001832 }
1833
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001834 if (uri->opaque) {
1835 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1836 NULLCHK(segment)
1837 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001838 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001839 }
1840
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001841 if (uri->fragment) {
1842 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1843 NULLCHK(segment)
1844 ret = xmlStrcat(ret, BAD_CAST "#");
1845 ret = xmlStrcat(ret, segment);
1846 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001847 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001848
1849 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001850#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001851
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001852 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001853}
1854
Owen Taylor3473f882001-02-23 17:55:21 +00001855/************************************************************************
1856 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001857 * Public functions *
1858 * *
1859 ************************************************************************/
1860
1861/**
1862 * xmlBuildURI:
1863 * @URI: the URI instance found in the document
1864 * @base: the base value
1865 *
1866 * Computes he final URI of the reference done by checking that
1867 * the given URI is valid, and building the final URI using the
Daniel Veillard57560382012-07-24 11:44:23 +08001868 * base URI. This is processed according to section 5.2 of the
Owen Taylor3473f882001-02-23 17:55:21 +00001869 * RFC 2396
1870 *
1871 * 5.2. Resolving Relative References to Absolute Form
1872 *
1873 * Returns a new URI string (to be freed by the caller) or NULL in case
1874 * of error.
1875 */
1876xmlChar *
1877xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1878 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001879 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 xmlURIPtr ref = NULL;
1881 xmlURIPtr bas = NULL;
1882 xmlURIPtr res = NULL;
1883
1884 /*
1885 * 1) The URI reference is parsed into the potential four components and
1886 * fragment identifier, as described in Section 4.3.
1887 *
1888 * NOTE that a completely empty URI is treated by modern browsers
1889 * as a reference to "." rather than as a synonym for the current
1890 * URI. Should we do that here?
1891 */
Daniel Veillard57560382012-07-24 11:44:23 +08001892 if (URI == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001893 ret = -1;
1894 else {
1895 if (*URI) {
1896 ref = xmlCreateURI();
1897 if (ref == NULL)
1898 goto done;
1899 ret = xmlParseURIReference(ref, (const char *) URI);
1900 }
1901 else
1902 ret = 0;
1903 }
1904 if (ret != 0)
1905 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001906 if ((ref != NULL) && (ref->scheme != NULL)) {
1907 /*
1908 * The URI is absolute don't modify.
1909 */
1910 val = xmlStrdup(URI);
1911 goto done;
1912 }
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (base == NULL)
1914 ret = -1;
1915 else {
1916 bas = xmlCreateURI();
1917 if (bas == NULL)
1918 goto done;
1919 ret = xmlParseURIReference(bas, (const char *) base);
1920 }
1921 if (ret != 0) {
1922 if (ref)
1923 val = xmlSaveUri(ref);
1924 goto done;
1925 }
1926 if (ref == NULL) {
1927 /*
1928 * the base fragment must be ignored
1929 */
1930 if (bas->fragment != NULL) {
1931 xmlFree(bas->fragment);
1932 bas->fragment = NULL;
1933 }
1934 val = xmlSaveUri(bas);
1935 goto done;
1936 }
1937
1938 /*
1939 * 2) If the path component is empty and the scheme, authority, and
1940 * query components are undefined, then it is a reference to the
1941 * current document and we are done. Otherwise, the reference URI's
1942 * query and fragment components are defined as found (or not found)
1943 * within the URI reference and not inherited from the base URI.
1944 *
1945 * NOTE that in modern browsers, the parsing differs from the above
1946 * in the following aspect: the query component is allowed to be
1947 * defined while still treating this as a reference to the current
1948 * document.
1949 */
1950 res = xmlCreateURI();
1951 if (res == NULL)
1952 goto done;
1953 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1954 ((ref->authority == NULL) && (ref->server == NULL))) {
1955 if (bas->scheme != NULL)
1956 res->scheme = xmlMemStrdup(bas->scheme);
1957 if (bas->authority != NULL)
1958 res->authority = xmlMemStrdup(bas->authority);
1959 else if (bas->server != NULL) {
1960 res->server = xmlMemStrdup(bas->server);
1961 if (bas->user != NULL)
1962 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08001963 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00001964 }
1965 if (bas->path != NULL)
1966 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001967 if (ref->query_raw != NULL)
1968 res->query_raw = xmlMemStrdup (ref->query_raw);
1969 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001970 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001971 else if (bas->query_raw != NULL)
1972 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001973 else if (bas->query != NULL)
1974 res->query = xmlMemStrdup(bas->query);
1975 if (ref->fragment != NULL)
1976 res->fragment = xmlMemStrdup(ref->fragment);
1977 goto step_7;
1978 }
Owen Taylor3473f882001-02-23 17:55:21 +00001979
1980 /*
1981 * 3) If the scheme component is defined, indicating that the reference
1982 * starts with a scheme name, then the reference is interpreted as an
1983 * absolute URI and we are done. Otherwise, the reference URI's
1984 * scheme is inherited from the base URI's scheme component.
1985 */
1986 if (ref->scheme != NULL) {
1987 val = xmlSaveUri(ref);
1988 goto done;
1989 }
1990 if (bas->scheme != NULL)
1991 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard57560382012-07-24 11:44:23 +08001992
Daniel Veillarda1413b82007-04-26 08:33:28 +00001993 if (ref->query_raw != NULL)
1994 res->query_raw = xmlMemStrdup(ref->query_raw);
1995 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00001996 res->query = xmlMemStrdup(ref->query);
1997 if (ref->fragment != NULL)
1998 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001999
2000 /*
2001 * 4) If the authority component is defined, then the reference is a
2002 * network-path and we skip to step 7. Otherwise, the reference
2003 * URI's authority is inherited from the base URI's authority
2004 * component, which will also be undefined if the URI scheme does not
2005 * use an authority component.
2006 */
2007 if ((ref->authority != NULL) || (ref->server != NULL)) {
2008 if (ref->authority != NULL)
2009 res->authority = xmlMemStrdup(ref->authority);
2010 else {
2011 res->server = xmlMemStrdup(ref->server);
2012 if (ref->user != NULL)
2013 res->user = xmlMemStrdup(ref->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002014 res->port = ref->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002015 }
2016 if (ref->path != NULL)
2017 res->path = xmlMemStrdup(ref->path);
2018 goto step_7;
2019 }
2020 if (bas->authority != NULL)
2021 res->authority = xmlMemStrdup(bas->authority);
2022 else if (bas->server != NULL) {
2023 res->server = xmlMemStrdup(bas->server);
2024 if (bas->user != NULL)
2025 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002026 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002027 }
2028
2029 /*
2030 * 5) If the path component begins with a slash character ("/"), then
2031 * the reference is an absolute-path and we skip to step 7.
2032 */
2033 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2034 res->path = xmlMemStrdup(ref->path);
2035 goto step_7;
2036 }
2037
2038
2039 /*
2040 * 6) If this step is reached, then we are resolving a relative-path
2041 * reference. The relative path needs to be merged with the base
2042 * URI's path. Although there are many ways to do this, we will
2043 * describe a simple method using a separate string buffer.
2044 *
2045 * Allocate a buffer large enough for the result string.
2046 */
2047 len = 2; /* extra / and 0 */
2048 if (ref->path != NULL)
2049 len += strlen(ref->path);
2050 if (bas->path != NULL)
2051 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002052 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002053 if (res->path == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002054 xmlURIErrMemory("resolving URI against base\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002055 goto done;
2056 }
2057 res->path[0] = 0;
2058
2059 /*
2060 * a) All but the last segment of the base URI's path component is
2061 * copied to the buffer. In other words, any characters after the
2062 * last (right-most) slash character, if any, are excluded.
2063 */
2064 cur = 0;
2065 out = 0;
2066 if (bas->path != NULL) {
2067 while (bas->path[cur] != 0) {
2068 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2069 cur++;
2070 if (bas->path[cur] == 0)
2071 break;
2072
2073 cur++;
2074 while (out < cur) {
2075 res->path[out] = bas->path[out];
2076 out++;
2077 }
2078 }
2079 }
2080 res->path[out] = 0;
2081
2082 /*
2083 * b) The reference's path component is appended to the buffer
2084 * string.
2085 */
2086 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002087 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002088 /*
2089 * Ensure the path includes a '/'
2090 */
2091 if ((out == 0) && (bas->server != NULL))
2092 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002093 while (ref->path[indx] != 0) {
2094 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002095 }
2096 }
2097 res->path[out] = 0;
2098
2099 /*
2100 * Steps c) to h) are really path normalization steps
2101 */
2102 xmlNormalizeURIPath(res->path);
2103
2104step_7:
2105
2106 /*
2107 * 7) The resulting URI components, including any inherited from the
2108 * base URI, are recombined to give the absolute form of the URI
2109 * reference.
2110 */
2111 val = xmlSaveUri(res);
2112
2113done:
2114 if (ref != NULL)
2115 xmlFreeURI(ref);
2116 if (bas != NULL)
2117 xmlFreeURI(bas);
2118 if (res != NULL)
2119 xmlFreeURI(res);
2120 return(val);
2121}
2122
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002123/**
William M. Brackf7789b12004-06-07 08:57:27 +00002124 * xmlBuildRelativeURI:
2125 * @URI: the URI reference under consideration
2126 * @base: the base value
2127 *
2128 * Expresses the URI of the reference in terms relative to the
2129 * base. Some examples of this operation include:
2130 * base = "http://site1.com/docs/book1.html"
2131 * URI input URI returned
2132 * docs/pic1.gif pic1.gif
2133 * docs/img/pic1.gif img/pic1.gif
2134 * img/pic1.gif ../img/pic1.gif
2135 * http://site1.com/docs/pic1.gif pic1.gif
2136 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2137 *
2138 * base = "docs/book1.html"
2139 * URI input URI returned
2140 * docs/pic1.gif pic1.gif
2141 * docs/img/pic1.gif img/pic1.gif
2142 * img/pic1.gif ../img/pic1.gif
2143 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2144 *
2145 *
2146 * Note: if the URI reference is really wierd or complicated, it may be
2147 * worthwhile to first convert it into a "nice" one by calling
2148 * xmlBuildURI (using 'base') before calling this routine,
2149 * since this routine (for reasonable efficiency) assumes URI has
2150 * already been through some validation.
2151 *
2152 * Returns a new URI string (to be freed by the caller) or NULL in case
2153 * error.
2154 */
2155xmlChar *
2156xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2157{
2158 xmlChar *val = NULL;
2159 int ret;
2160 int ix;
2161 int pos = 0;
2162 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002163 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002164 xmlURIPtr ref = NULL;
2165 xmlURIPtr bas = NULL;
2166 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002167 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002168
2169 if ((URI == NULL) || (*URI == 0))
2170 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002171
2172 /*
2173 * First parse URI into a standard form
2174 */
2175 ref = xmlCreateURI ();
2176 if (ref == NULL)
2177 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002178 /* If URI not already in "relative" form */
2179 if (URI[0] != '.') {
2180 ret = xmlParseURIReference (ref, (const char *) URI);
2181 if (ret != 0)
2182 goto done; /* Error in URI, return NULL */
2183 } else
2184 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002185
2186 /*
2187 * Next parse base into the same standard form
2188 */
2189 if ((base == NULL) || (*base == 0)) {
2190 val = xmlStrdup (URI);
2191 goto done;
2192 }
2193 bas = xmlCreateURI ();
2194 if (bas == NULL)
2195 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002196 if (base[0] != '.') {
2197 ret = xmlParseURIReference (bas, (const char *) base);
2198 if (ret != 0)
2199 goto done; /* Error in base, return NULL */
2200 } else
2201 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002202
2203 /*
2204 * If the scheme / server on the URI differs from the base,
2205 * just return the URI
2206 */
2207 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002208 ((bas->scheme == NULL) ||
2209 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2210 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002211 val = xmlStrdup (URI);
2212 goto done;
2213 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002214 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2215 val = xmlStrdup(BAD_CAST "");
2216 goto done;
2217 }
2218 if (bas->path == NULL) {
2219 val = xmlStrdup((xmlChar *)ref->path);
2220 goto done;
2221 }
2222 if (ref->path == NULL) {
2223 ref->path = (char *) "/";
2224 remove_path = 1;
2225 }
William M. Brackf7789b12004-06-07 08:57:27 +00002226
2227 /*
2228 * At this point (at last!) we can compare the two paths
2229 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002230 * First we take care of the special case where either of the
2231 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002232 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002233 if (bas->path == NULL) {
2234 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002235 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002236 if (*uptr == '/')
2237 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002238 /* exception characters from xmlSaveUri */
2239 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002240 }
2241 goto done;
2242 }
William M. Brackf7789b12004-06-07 08:57:27 +00002243 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002244 if (ref->path == NULL) {
2245 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002246 if (bptr[ix] == '/')
2247 nbslash++;
2248 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002249 uptr = NULL;
2250 len = 1; /* this is for a string terminator only */
2251 } else {
2252 /*
2253 * Next we compare the two strings and find where they first differ
2254 */
2255 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2256 pos += 2;
2257 if ((*bptr == '.') && (bptr[1] == '/'))
2258 bptr += 2;
2259 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2260 bptr++;
2261 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2262 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002263
William M. Brack820d5ed2005-09-14 05:24:27 +00002264 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002265 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002266 goto done; /* (I can't imagine why anyone would do this) */
2267 }
2268
2269 /*
2270 * In URI, "back up" to the last '/' encountered. This will be the
2271 * beginning of the "unique" suffix of URI
2272 */
2273 ix = pos;
2274 if ((ref->path[ix] == '/') && (ix > 0))
2275 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002276 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2277 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002278 for (; ix > 0; ix--) {
2279 if (ref->path[ix] == '/')
2280 break;
2281 }
2282 if (ix == 0) {
2283 uptr = (xmlChar *)ref->path;
2284 } else {
2285 ix++;
2286 uptr = (xmlChar *)&ref->path[ix];
2287 }
2288
2289 /*
2290 * In base, count the number of '/' from the differing point
2291 */
2292 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2293 for (; bptr[ix] != 0; ix++) {
2294 if (bptr[ix] == '/')
2295 nbslash++;
2296 }
2297 }
2298 len = xmlStrlen (uptr) + 1;
2299 }
Daniel Veillard57560382012-07-24 11:44:23 +08002300
William M. Brackf7789b12004-06-07 08:57:27 +00002301 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002302 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002303 /* exception characters from xmlSaveUri */
2304 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002305 goto done;
2306 }
William M. Brackf7789b12004-06-07 08:57:27 +00002307
2308 /*
2309 * Allocate just enough space for the returned string -
2310 * length of the remainder of the URI, plus enough space
2311 * for the "../" groups, plus one for the terminator
2312 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002313 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002314 if (val == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002315 xmlURIErrMemory("building relative URI\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002316 goto done;
2317 }
2318 vptr = val;
2319 /*
2320 * Put in as many "../" as needed
2321 */
2322 for (; nbslash>0; nbslash--) {
2323 *vptr++ = '.';
2324 *vptr++ = '.';
2325 *vptr++ = '/';
2326 }
2327 /*
2328 * Finish up with the end of the URI
2329 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002330 if (uptr != NULL) {
2331 if ((vptr > val) && (len > 0) &&
2332 (uptr[0] == '/') && (vptr[-1] == '/')) {
2333 memcpy (vptr, uptr + 1, len - 1);
2334 vptr[len - 2] = 0;
2335 } else {
2336 memcpy (vptr, uptr, len);
2337 vptr[len - 1] = 0;
2338 }
2339 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002340 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002341 }
William M. Brackf7789b12004-06-07 08:57:27 +00002342
William M. Brack50420192007-07-20 01:09:08 +00002343 /* escape the freshly-built path */
2344 vptr = val;
2345 /* exception characters from xmlSaveUri */
2346 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2347 xmlFree(vptr);
2348
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002349done:
William M. Brackf7789b12004-06-07 08:57:27 +00002350 /*
2351 * Free the working variables
2352 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002353 if (remove_path != 0)
2354 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002355 if (ref != NULL)
2356 xmlFreeURI (ref);
2357 if (bas != NULL)
2358 xmlFreeURI (bas);
2359
2360 return val;
2361}
2362
2363/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002364 * xmlCanonicPath:
2365 * @path: the resource locator in a filesystem notation
2366 *
Daniel Veillard57560382012-07-24 11:44:23 +08002367 * Constructs a canonic path from the specified path.
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002368 *
Daniel Veillard57560382012-07-24 11:44:23 +08002369 * Returns a new canonic path, or a duplicate of the path parameter if the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002370 * construction fails. The caller is responsible for freeing the memory occupied
Daniel Veillard57560382012-07-24 11:44:23 +08002371 * by the returned string. If there is insufficient memory available, or the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002372 * argument is NULL, the function returns NULL.
2373 */
Daniel Veillard57560382012-07-24 11:44:23 +08002374#define IS_WINDOWS_PATH(p) \
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002375 ((p != NULL) && \
2376 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2377 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2378 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002379xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002380xmlCanonicPath(const xmlChar *path)
2381{
William M. Brack22242272007-01-27 07:59:37 +00002382/*
2383 * For Windows implementations, additional work needs to be done to
2384 * replace backslashes in pathnames with "forward slashes"
2385 */
Daniel Veillard57560382012-07-24 11:44:23 +08002386#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002387 int len = 0;
2388 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002389 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002390#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002391 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002392 xmlChar *ret;
2393 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002394
2395 if (path == NULL)
2396 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002397
Michael Stahl55b899a2012-09-07 12:14:00 +08002398#if defined(_WIN32)
2399 /*
2400 * We must not change the backslashes to slashes if the the path
2401 * starts with \\?\
2402 * Those paths can be up to 32k characters long.
2403 * Was added specifically for OpenOffice, those paths can't be converted
2404 * to URIs anyway.
2405 */
2406 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2407 (path[3] == '\\') )
2408 return xmlStrdup((const xmlChar *) path);
2409#endif
2410
2411 /* sanitize filename starting with // so it can be used as URI */
Daniel Veillard69f8a132008-02-05 08:37:56 +00002412 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2413 path++;
2414
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002415 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002416 xmlFreeURI(uri);
2417 return xmlStrdup(path);
2418 }
2419
William M. Brack22242272007-01-27 07:59:37 +00002420 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002421 absuri = xmlStrstr(path, BAD_CAST "://");
2422 if (absuri != NULL) {
2423 int l, j;
2424 unsigned char c;
2425 xmlChar *escURI;
2426
2427 /*
2428 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002429 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002430 * part matches a protocol.
2431 */
2432 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002433 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002434 if ((l <= 0) || (l > 20))
2435 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002436 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002437 for (j = 0;j < l;j++) {
2438 c = path[j];
2439 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2440 goto path_processing;
2441 }
2442
William M. Brack22242272007-01-27 07:59:37 +00002443 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002444 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2445 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002446 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002447 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002448 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002449 if (uri != NULL) {
2450 xmlFreeURI(uri);
2451 return escURI;
2452 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002453 }
2454 }
2455
2456path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002457/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard57560382012-07-24 11:44:23 +08002458#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard336a8e12005-08-07 10:46:19 +00002459 /*
William M. Brack22242272007-01-27 07:59:37 +00002460 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002461 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002462 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002463 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002464 return(NULL);
2465 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002466
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002467 len = xmlStrlen(path);
2468 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002469 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002470 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002471 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002472 uri->path = xmlMallocAtomic(len + 2);
2473 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002474 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002475 return(NULL);
2476 }
William M. Brack22242272007-01-27 07:59:37 +00002477 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002478 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002479 p = uri->path + 1;
2480 strncpy(p, path, len + 1);
2481 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002482 uri->path = xmlStrdup(path);
2483 if (uri->path == NULL) {
2484 xmlFreeURI(uri);
2485 return(NULL);
2486 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002487 p = uri->path;
2488 }
William M. Brack22242272007-01-27 07:59:37 +00002489 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002490 while (*p != '\0') {
2491 if (*p == '\\')
2492 *p = '/';
2493 p++;
2494 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002495
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002496 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002497 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002498 } else {
2499 ret = xmlSaveUri(uri);
2500 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002501
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002502 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002503#else
2504 ret = xmlStrdup((const xmlChar *) path);
2505#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002506 return(ret);
2507}
Owen Taylor3473f882001-02-23 17:55:21 +00002508
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002509/**
2510 * xmlPathToURI:
2511 * @path: the resource locator in a filesystem notation
2512 *
2513 * Constructs an URI expressing the existing path
2514 *
Daniel Veillard57560382012-07-24 11:44:23 +08002515 * Returns a new URI, or a duplicate of the path parameter if the
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002516 * construction fails. The caller is responsible for freeing the memory
2517 * occupied by the returned string. If there is insufficient memory available,
2518 * or the argument is NULL, the function returns NULL.
2519 */
2520xmlChar *
2521xmlPathToURI(const xmlChar *path)
2522{
2523 xmlURIPtr uri;
2524 xmlURI temp;
2525 xmlChar *ret, *cal;
2526
2527 if (path == NULL)
2528 return(NULL);
2529
2530 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2531 xmlFreeURI(uri);
2532 return xmlStrdup(path);
2533 }
2534 cal = xmlCanonicPath(path);
2535 if (cal == NULL)
2536 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002537#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard57560382012-07-24 11:44:23 +08002538 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002539 If 'cal' is a valid URI allready then we are done here, as continuing would make
2540 it invalid. */
2541 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2542 xmlFreeURI(uri);
2543 return cal;
2544 }
2545 /* 'cal' can contain a relative path with backslashes. If that is processed
2546 by xmlSaveURI, they will be escaped and the external entity loader machinery
2547 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2548 ret = cal;
2549 while (*ret != '\0') {
2550 if (*ret == '\\')
2551 *ret = '/';
2552 ret++;
2553 }
2554#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002555 memset(&temp, 0, sizeof(temp));
2556 temp.path = (char *) cal;
2557 ret = xmlSaveUri(&temp);
2558 xmlFree(cal);
2559 return(ret);
2560}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002561#define bottom_uri
2562#include "elfgcchack.h"