blob: b11a8a181f723c06ca1d74467a1d478ca79988e9 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
Daniel Veillard57560382012-07-24 11:44:23 +08002 * uri.c: set of generic URI related routines
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
Brian C. Youngf2716032017-04-05 09:31:12 -070015#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000016
17#include <libxml/xmlmemory.h>
18#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000019#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000020#include <libxml/xmlerror.h>
21
Daniel Veillard57560382012-07-24 11:44:23 +080022/**
23 * MAX_URI_LENGTH:
24 *
25 * The definition of the URI regexp in the above RFC has no size limit
26 * In practice they are usually relativey short except for the
27 * data URI scheme as defined in RFC 2397. Even for data URI the usual
28 * maximum size before hitting random practical limits is around 64 KB
29 * and 4KB is usually a maximum admitted limit for proper operations.
30 * The value below is more a security limit than anything else and
31 * really should never be hit by 'normal' operations
32 * Set to 1 MByte in 2012, this is only enforced on output
33 */
34#define MAX_URI_LENGTH 1024 * 1024
35
36static void
37xmlURIErrMemory(const char *extra)
38{
39 if (extra)
40 __xmlRaiseError(NULL, NULL, NULL,
41 NULL, NULL, XML_FROM_URI,
42 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43 extra, NULL, NULL, 0, 0,
44 "Memory allocation failed : %s\n", extra);
45 else
46 __xmlRaiseError(NULL, NULL, NULL,
47 NULL, NULL, XML_FROM_URI,
48 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49 NULL, NULL, NULL, 0, 0,
50 "Memory allocation failed\n");
51}
52
Daniel Veillardd7af5552008-08-04 15:29:44 +000053static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000054
55/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000056 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000057 * alpha = lowalpha | upalpha
58 */
59#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60
61
62/*
63 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65 * "u" | "v" | "w" | "x" | "y" | "z"
66 */
67
68#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69
70/*
71 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73 * "U" | "V" | "W" | "X" | "Y" | "Z"
74 */
75#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76
Daniel Veillardbe3eb202004-07-09 12:05:25 +000077#ifdef IS_DIGIT
78#undef IS_DIGIT
79#endif
Owen Taylor3473f882001-02-23 17:55:21 +000080/*
81 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84
85/*
86 * alphanum = alpha | digit
87 */
88
89#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90
91/*
Owen Taylor3473f882001-02-23 17:55:21 +000092 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93 */
94
Daniel Veillardd7af5552008-08-04 15:29:44 +000095#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
96 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000097 ((x) == '(') || ((x) == ')'))
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Daniel Veillardd7af5552008-08-04 15:29:44 +0000100 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +0000101 */
102
Daniel Veillardd7af5552008-08-04 15:29:44 +0000103#define IS_UNWISE(p) \
104 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
105 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
106 ((*(p) == ']')) || ((*(p) == '`')))
107/*
108 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109 * "[" | "]"
110 */
111
112#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +0000116
117/*
118 * unreserved = alphanum | mark
119 */
120
121#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122
123/*
Owen Taylor3473f882001-02-23 17:55:21 +0000124 * Skip to next pointer char, handle escaped sequences
125 */
126
127#define NEXT(p) ((*p == '%')? p += 3 : p++)
128
129/*
130 * Productions from the spec.
131 *
132 * authority = server | reg_name
133 * reg_name = 1*( unreserved | escaped | "$" | "," |
134 * ";" | ":" | "@" | "&" | "=" | "+" )
135 *
136 * path = [ abs_path | opaque_part ]
137 */
138
Daniel Veillard336a8e12005-08-07 10:46:19 +0000139#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140
Owen Taylor3473f882001-02-23 17:55:21 +0000141/************************************************************************
142 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000143 * RFC 3986 parser *
144 * *
145 ************************************************************************/
146
147#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
149 ((*(p) >= 'A') && (*(p) <= 'Z')))
150#define ISA_HEXDIG(p) \
151 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
152 ((*(p) >= 'A') && (*(p) <= 'F')))
153
154/*
155 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
156 * / "*" / "+" / "," / ";" / "="
157 */
158#define ISA_SUB_DELIM(p) \
159 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
160 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
161 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Daniel Veillard2ee91eb2010-06-04 09:14:16 +0800162 ((*(p) == '=')) || ((*(p) == '\'')))
Daniel Veillardd7af5552008-08-04 15:29:44 +0000163
164/*
165 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166 */
167#define ISA_GEN_DELIM(p) \
168 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
169 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
170 ((*(p) == '@')))
171
172/*
173 * reserved = gen-delims / sub-delims
174 */
175#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176
177/*
178 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179 */
180#define ISA_UNRESERVED(p) \
181 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
182 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183
184/*
185 * pct-encoded = "%" HEXDIG HEXDIG
186 */
187#define ISA_PCT_ENCODED(p) \
188 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189
190/*
191 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192 */
193#define ISA_PCHAR(p) \
194 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
195 ((*(p) == ':')) || ((*(p) == '@')))
196
197/**
198 * xmlParse3986Scheme:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
201 *
202 * Parse an URI scheme
203 *
204 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205 *
206 * Returns 0 or the error code
207 */
208static int
209xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210 const char *cur;
211
212 if (str == NULL)
213 return(-1);
214
215 cur = *str;
216 if (!ISA_ALPHA(cur))
217 return(2);
218 cur++;
219 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221 if (uri != NULL) {
222 if (uri->scheme != NULL) xmlFree(uri->scheme);
223 uri->scheme = STRNDUP(*str, cur - *str);
224 }
225 *str = cur;
226 return(0);
227}
228
229/**
230 * xmlParse3986Fragment:
231 * @uri: pointer to an URI structure
232 * @str: pointer to the string to analyze
233 *
234 * Parse the query part of an URI
235 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000236 * fragment = *( pchar / "/" / "?" )
237 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238 * in the fragment identifier but this is used very broadly for
239 * xpointer scheme selection, so we are allowing it here to not break
240 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000241 *
242 * Returns 0 or the error code
243 */
244static int
245xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246{
247 const char *cur;
248
249 if (str == NULL)
250 return (-1);
251
252 cur = *str;
253
254 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000255 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000256 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257 NEXT(cur);
258 if (uri != NULL) {
259 if (uri->fragment != NULL)
260 xmlFree(uri->fragment);
261 if (uri->cleanup & 2)
262 uri->fragment = STRNDUP(*str, cur - *str);
263 else
264 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265 }
266 *str = cur;
267 return (0);
268}
269
270/**
271 * xmlParse3986Query:
272 * @uri: pointer to an URI structure
273 * @str: pointer to the string to analyze
274 *
275 * Parse the query part of an URI
276 *
277 * query = *uric
278 *
279 * Returns 0 or the error code
280 */
281static int
282xmlParse3986Query(xmlURIPtr uri, const char **str)
283{
284 const char *cur;
285
286 if (str == NULL)
287 return (-1);
288
289 cur = *str;
290
291 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293 NEXT(cur);
294 if (uri != NULL) {
295 if (uri->query != NULL)
296 xmlFree(uri->query);
297 if (uri->cleanup & 2)
298 uri->query = STRNDUP(*str, cur - *str);
299 else
300 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301
302 /* Save the raw bytes of the query as well.
303 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 */
305 if (uri->query_raw != NULL)
306 xmlFree (uri->query_raw);
307 uri->query_raw = STRNDUP (*str, cur - *str);
308 }
309 *str = cur;
310 return (0);
311}
312
313/**
314 * xmlParse3986Port:
315 * @uri: pointer to an URI structure
316 * @str: the string to analyze
317 *
Xin Li28c53d32017-03-07 00:33:02 +0000318 * Parse a port part and fills in the appropriate fields
Daniel Veillardd7af5552008-08-04 15:29:44 +0000319 * of the @uri structure
320 *
321 * port = *DIGIT
322 *
323 * Returns 0 or the error code
324 */
325static int
326xmlParse3986Port(xmlURIPtr uri, const char **str)
327{
328 const char *cur = *str;
Xin Li28c53d32017-03-07 00:33:02 +0000329 unsigned port = 0; /* unsigned for defined overflow behavior */
Daniel Veillardd7af5552008-08-04 15:29:44 +0000330
331 if (ISA_DIGIT(cur)) {
Daniel Veillardd7af5552008-08-04 15:29:44 +0000332 while (ISA_DIGIT(cur)) {
Xin Li28c53d32017-03-07 00:33:02 +0000333 port = port * 10 + (*cur - '0');
334
Daniel Veillardd7af5552008-08-04 15:29:44 +0000335 cur++;
336 }
Xin Li28c53d32017-03-07 00:33:02 +0000337 if (uri != NULL)
Brian C. Youngf2716032017-04-05 09:31:12 -0700338 uri->port = port & USHRT_MAX; /* port value modulo INT_MAX+1 */
Daniel Veillardd7af5552008-08-04 15:29:44 +0000339 *str = cur;
340 return(0);
341 }
342 return(1);
343}
344
345/**
346 * xmlParse3986Userinfo:
347 * @uri: pointer to an URI structure
348 * @str: the string to analyze
349 *
350 * Parse an user informations part and fills in the appropriate fields
351 * of the @uri structure
352 *
353 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
354 *
355 * Returns 0 or the error code
356 */
357static int
358xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
359{
360 const char *cur;
361
362 cur = *str;
363 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
364 ISA_SUB_DELIM(cur) || (*cur == ':'))
365 NEXT(cur);
366 if (*cur == '@') {
367 if (uri != NULL) {
368 if (uri->user != NULL) xmlFree(uri->user);
369 if (uri->cleanup & 2)
370 uri->user = STRNDUP(*str, cur - *str);
371 else
372 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
373 }
374 *str = cur;
375 return(0);
376 }
377 return(1);
378}
379
380/**
381 * xmlParse3986DecOctet:
382 * @str: the string to analyze
383 *
384 * dec-octet = DIGIT ; 0-9
385 * / %x31-39 DIGIT ; 10-99
386 * / "1" 2DIGIT ; 100-199
387 * / "2" %x30-34 DIGIT ; 200-249
388 * / "25" %x30-35 ; 250-255
389 *
390 * Skip a dec-octet.
391 *
392 * Returns 0 if found and skipped, 1 otherwise
393 */
394static int
395xmlParse3986DecOctet(const char **str) {
396 const char *cur = *str;
397
398 if (!(ISA_DIGIT(cur)))
399 return(1);
400 if (!ISA_DIGIT(cur+1))
401 cur++;
402 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
403 cur += 2;
404 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
405 cur += 3;
406 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
407 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
408 cur += 3;
409 else if ((*cur == '2') && (*(cur + 1) == '5') &&
410 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
411 cur += 3;
412 else
413 return(1);
414 *str = cur;
415 return(0);
416}
417/**
418 * xmlParse3986Host:
419 * @uri: pointer to an URI structure
420 * @str: the string to analyze
421 *
422 * Parse an host part and fills in the appropriate fields
423 * of the @uri structure
424 *
425 * host = IP-literal / IPv4address / reg-name
426 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
427 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
428 * reg-name = *( unreserved / pct-encoded / sub-delims )
429 *
430 * Returns 0 or the error code
431 */
432static int
433xmlParse3986Host(xmlURIPtr uri, const char **str)
434{
435 const char *cur = *str;
436 const char *host;
437
438 host = cur;
439 /*
440 * IPv6 and future adressing scheme are enclosed between brackets
441 */
442 if (*cur == '[') {
443 cur++;
444 while ((*cur != ']') && (*cur != 0))
445 cur++;
446 if (*cur != ']')
447 return(1);
448 cur++;
449 goto found;
450 }
451 /*
452 * try to parse an IPv4
453 */
454 if (ISA_DIGIT(cur)) {
455 if (xmlParse3986DecOctet(&cur) != 0)
456 goto not_ipv4;
457 if (*cur != '.')
458 goto not_ipv4;
459 cur++;
460 if (xmlParse3986DecOctet(&cur) != 0)
461 goto not_ipv4;
462 if (*cur != '.')
463 goto not_ipv4;
464 if (xmlParse3986DecOctet(&cur) != 0)
465 goto not_ipv4;
466 if (*cur != '.')
467 goto not_ipv4;
468 if (xmlParse3986DecOctet(&cur) != 0)
469 goto not_ipv4;
470 goto found;
471not_ipv4:
472 cur = *str;
473 }
474 /*
475 * then this should be a hostname which can be empty
476 */
477 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
478 NEXT(cur);
479found:
480 if (uri != NULL) {
481 if (uri->authority != NULL) xmlFree(uri->authority);
482 uri->authority = NULL;
483 if (uri->server != NULL) xmlFree(uri->server);
484 if (cur != host) {
485 if (uri->cleanup & 2)
486 uri->server = STRNDUP(host, cur - host);
487 else
488 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
489 } else
490 uri->server = NULL;
491 }
492 *str = cur;
493 return(0);
494}
495
496/**
497 * xmlParse3986Authority:
498 * @uri: pointer to an URI structure
499 * @str: the string to analyze
500 *
501 * Parse an authority part and fills in the appropriate fields
502 * of the @uri structure
503 *
504 * authority = [ userinfo "@" ] host [ ":" port ]
505 *
506 * Returns 0 or the error code
507 */
508static int
509xmlParse3986Authority(xmlURIPtr uri, const char **str)
510{
511 const char *cur;
512 int ret;
513
514 cur = *str;
515 /*
516 * try to parse an userinfo and check for the trailing @
517 */
518 ret = xmlParse3986Userinfo(uri, &cur);
519 if ((ret != 0) || (*cur != '@'))
520 cur = *str;
521 else
522 cur++;
523 ret = xmlParse3986Host(uri, &cur);
524 if (ret != 0) return(ret);
525 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000526 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000527 ret = xmlParse3986Port(uri, &cur);
528 if (ret != 0) return(ret);
529 }
530 *str = cur;
531 return(0);
532}
533
534/**
535 * xmlParse3986Segment:
536 * @str: the string to analyze
537 * @forbid: an optional forbidden character
538 * @empty: allow an empty segment
539 *
540 * Parse a segment and fills in the appropriate fields
541 * of the @uri structure
542 *
543 * segment = *pchar
544 * segment-nz = 1*pchar
545 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
546 * ; non-zero-length segment without any colon ":"
547 *
548 * Returns 0 or the error code
549 */
550static int
551xmlParse3986Segment(const char **str, char forbid, int empty)
552{
553 const char *cur;
554
555 cur = *str;
556 if (!ISA_PCHAR(cur)) {
557 if (empty)
558 return(0);
559 return(1);
560 }
561 while (ISA_PCHAR(cur) && (*cur != forbid))
562 NEXT(cur);
563 *str = cur;
564 return (0);
565}
566
567/**
568 * xmlParse3986PathAbEmpty:
569 * @uri: pointer to an URI structure
570 * @str: the string to analyze
571 *
572 * Parse an path absolute or empty and fills in the appropriate fields
573 * of the @uri structure
574 *
575 * path-abempty = *( "/" segment )
576 *
577 * Returns 0 or the error code
578 */
579static int
580xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
581{
582 const char *cur;
583 int ret;
584
585 cur = *str;
586
587 while (*cur == '/') {
588 cur++;
589 ret = xmlParse3986Segment(&cur, 0, 1);
590 if (ret != 0) return(ret);
591 }
592 if (uri != NULL) {
593 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200594 if (*str != cur) {
595 if (uri->cleanup & 2)
596 uri->path = STRNDUP(*str, cur - *str);
597 else
598 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
599 } else {
600 uri->path = NULL;
601 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000602 }
603 *str = cur;
604 return (0);
605}
606
607/**
608 * xmlParse3986PathAbsolute:
609 * @uri: pointer to an URI structure
610 * @str: the string to analyze
611 *
612 * Parse an path absolute and fills in the appropriate fields
613 * of the @uri structure
614 *
615 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
616 *
617 * Returns 0 or the error code
618 */
619static int
620xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
621{
622 const char *cur;
623 int ret;
624
625 cur = *str;
626
627 if (*cur != '/')
628 return(1);
629 cur++;
630 ret = xmlParse3986Segment(&cur, 0, 0);
631 if (ret == 0) {
632 while (*cur == '/') {
633 cur++;
634 ret = xmlParse3986Segment(&cur, 0, 1);
635 if (ret != 0) return(ret);
636 }
637 }
638 if (uri != NULL) {
639 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200640 if (cur != *str) {
641 if (uri->cleanup & 2)
642 uri->path = STRNDUP(*str, cur - *str);
643 else
644 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645 } else {
646 uri->path = NULL;
647 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000648 }
649 *str = cur;
650 return (0);
651}
652
653/**
654 * xmlParse3986PathRootless:
655 * @uri: pointer to an URI structure
656 * @str: the string to analyze
657 *
658 * Parse an path without root and fills in the appropriate fields
659 * of the @uri structure
660 *
661 * path-rootless = segment-nz *( "/" segment )
662 *
663 * Returns 0 or the error code
664 */
665static int
666xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
667{
668 const char *cur;
669 int ret;
670
671 cur = *str;
672
673 ret = xmlParse3986Segment(&cur, 0, 0);
674 if (ret != 0) return(ret);
675 while (*cur == '/') {
676 cur++;
677 ret = xmlParse3986Segment(&cur, 0, 1);
678 if (ret != 0) return(ret);
679 }
680 if (uri != NULL) {
681 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200682 if (cur != *str) {
683 if (uri->cleanup & 2)
684 uri->path = STRNDUP(*str, cur - *str);
685 else
686 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
687 } else {
688 uri->path = NULL;
689 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000690 }
691 *str = cur;
692 return (0);
693}
694
695/**
696 * xmlParse3986PathNoScheme:
697 * @uri: pointer to an URI structure
698 * @str: the string to analyze
699 *
700 * Parse an path which is not a scheme and fills in the appropriate fields
701 * of the @uri structure
702 *
703 * path-noscheme = segment-nz-nc *( "/" segment )
704 *
705 * Returns 0 or the error code
706 */
707static int
708xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
709{
710 const char *cur;
711 int ret;
712
713 cur = *str;
714
715 ret = xmlParse3986Segment(&cur, ':', 0);
716 if (ret != 0) return(ret);
717 while (*cur == '/') {
718 cur++;
719 ret = xmlParse3986Segment(&cur, 0, 1);
720 if (ret != 0) return(ret);
721 }
722 if (uri != NULL) {
723 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200724 if (cur != *str) {
725 if (uri->cleanup & 2)
726 uri->path = STRNDUP(*str, cur - *str);
727 else
728 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
729 } else {
730 uri->path = NULL;
731 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000732 }
733 *str = cur;
734 return (0);
735}
736
737/**
738 * xmlParse3986HierPart:
739 * @uri: pointer to an URI structure
740 * @str: the string to analyze
741 *
742 * Parse an hierarchical part and fills in the appropriate fields
743 * of the @uri structure
744 *
745 * hier-part = "//" authority path-abempty
746 * / path-absolute
747 * / path-rootless
748 * / path-empty
749 *
750 * Returns 0 or the error code
751 */
752static int
753xmlParse3986HierPart(xmlURIPtr uri, const char **str)
754{
755 const char *cur;
756 int ret;
757
758 cur = *str;
759
760 if ((*cur == '/') && (*(cur + 1) == '/')) {
761 cur += 2;
762 ret = xmlParse3986Authority(uri, &cur);
763 if (ret != 0) return(ret);
Daniel Veillardbeb72812014-10-03 19:22:39 +0800764 if (uri->server == NULL)
765 uri->port = -1;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000766 ret = xmlParse3986PathAbEmpty(uri, &cur);
767 if (ret != 0) return(ret);
768 *str = cur;
769 return(0);
770 } else if (*cur == '/') {
771 ret = xmlParse3986PathAbsolute(uri, &cur);
772 if (ret != 0) return(ret);
773 } else if (ISA_PCHAR(cur)) {
774 ret = xmlParse3986PathRootless(uri, &cur);
775 if (ret != 0) return(ret);
776 } else {
777 /* path-empty is effectively empty */
778 if (uri != NULL) {
779 if (uri->path != NULL) xmlFree(uri->path);
780 uri->path = NULL;
781 }
782 }
783 *str = cur;
784 return (0);
785}
786
787/**
788 * xmlParse3986RelativeRef:
789 * @uri: pointer to an URI structure
790 * @str: the string to analyze
791 *
792 * Parse an URI string and fills in the appropriate fields
793 * of the @uri structure
794 *
795 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
796 * relative-part = "//" authority path-abempty
797 * / path-absolute
798 * / path-noscheme
799 * / path-empty
800 *
801 * Returns 0 or the error code
802 */
803static int
804xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
805 int ret;
806
807 if ((*str == '/') && (*(str + 1) == '/')) {
808 str += 2;
809 ret = xmlParse3986Authority(uri, &str);
810 if (ret != 0) return(ret);
811 ret = xmlParse3986PathAbEmpty(uri, &str);
812 if (ret != 0) return(ret);
813 } else if (*str == '/') {
814 ret = xmlParse3986PathAbsolute(uri, &str);
815 if (ret != 0) return(ret);
816 } else if (ISA_PCHAR(str)) {
817 ret = xmlParse3986PathNoScheme(uri, &str);
818 if (ret != 0) return(ret);
819 } else {
820 /* path-empty is effectively empty */
821 if (uri != NULL) {
822 if (uri->path != NULL) xmlFree(uri->path);
823 uri->path = NULL;
824 }
825 }
826
827 if (*str == '?') {
828 str++;
829 ret = xmlParse3986Query(uri, &str);
830 if (ret != 0) return(ret);
831 }
832 if (*str == '#') {
833 str++;
834 ret = xmlParse3986Fragment(uri, &str);
835 if (ret != 0) return(ret);
836 }
837 if (*str != 0) {
838 xmlCleanURI(uri);
839 return(1);
840 }
841 return(0);
842}
843
844
845/**
846 * xmlParse3986URI:
847 * @uri: pointer to an URI structure
848 * @str: the string to analyze
849 *
850 * Parse an URI string and fills in the appropriate fields
851 * of the @uri structure
852 *
853 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
854 *
855 * Returns 0 or the error code
856 */
857static int
858xmlParse3986URI(xmlURIPtr uri, const char *str) {
859 int ret;
860
861 ret = xmlParse3986Scheme(uri, &str);
862 if (ret != 0) return(ret);
863 if (*str != ':') {
864 return(1);
865 }
866 str++;
867 ret = xmlParse3986HierPart(uri, &str);
868 if (ret != 0) return(ret);
869 if (*str == '?') {
870 str++;
871 ret = xmlParse3986Query(uri, &str);
872 if (ret != 0) return(ret);
873 }
874 if (*str == '#') {
875 str++;
876 ret = xmlParse3986Fragment(uri, &str);
877 if (ret != 0) return(ret);
878 }
879 if (*str != 0) {
880 xmlCleanURI(uri);
881 return(1);
882 }
883 return(0);
884}
885
886/**
887 * xmlParse3986URIReference:
888 * @uri: pointer to an URI structure
889 * @str: the string to analyze
890 *
891 * Parse an URI reference string and fills in the appropriate fields
892 * of the @uri structure
893 *
894 * URI-reference = URI / relative-ref
895 *
896 * Returns 0 or the error code
897 */
898static int
899xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
900 int ret;
901
902 if (str == NULL)
903 return(-1);
904 xmlCleanURI(uri);
905
906 /*
907 * Try first to parse absolute refs, then fallback to relative if
908 * it fails.
909 */
910 ret = xmlParse3986URI(uri, str);
911 if (ret != 0) {
912 xmlCleanURI(uri);
913 ret = xmlParse3986RelativeRef(uri, str);
914 if (ret != 0) {
915 xmlCleanURI(uri);
916 return(ret);
917 }
918 }
919 return(0);
920}
921
922/**
923 * xmlParseURI:
924 * @str: the URI string to analyze
925 *
926 * Parse an URI based on RFC 3986
927 *
928 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
929 *
930 * Returns a newly built xmlURIPtr or NULL in case of error
931 */
932xmlURIPtr
933xmlParseURI(const char *str) {
934 xmlURIPtr uri;
935 int ret;
936
937 if (str == NULL)
938 return(NULL);
939 uri = xmlCreateURI();
940 if (uri != NULL) {
941 ret = xmlParse3986URIReference(uri, str);
942 if (ret) {
943 xmlFreeURI(uri);
944 return(NULL);
945 }
946 }
947 return(uri);
948}
949
950/**
951 * xmlParseURIReference:
952 * @uri: pointer to an URI structure
953 * @str: the string to analyze
954 *
955 * Parse an URI reference string based on RFC 3986 and fills in the
956 * appropriate fields of the @uri structure
957 *
958 * URI-reference = URI / relative-ref
959 *
960 * Returns 0 or the error code
961 */
962int
963xmlParseURIReference(xmlURIPtr uri, const char *str) {
964 return(xmlParse3986URIReference(uri, str));
965}
966
967/**
968 * xmlParseURIRaw:
969 * @str: the URI string to analyze
970 * @raw: if 1 unescaping of URI pieces are disabled
971 *
972 * Parse an URI but allows to keep intact the original fragments.
973 *
974 * URI-reference = URI / relative-ref
975 *
976 * Returns a newly built xmlURIPtr or NULL in case of error
977 */
978xmlURIPtr
979xmlParseURIRaw(const char *str, int raw) {
980 xmlURIPtr uri;
981 int ret;
982
983 if (str == NULL)
984 return(NULL);
985 uri = xmlCreateURI();
986 if (uri != NULL) {
987 if (raw) {
988 uri->cleanup |= 2;
989 }
990 ret = xmlParseURIReference(uri, str);
991 if (ret) {
992 xmlFreeURI(uri);
993 return(NULL);
994 }
995 }
996 return(uri);
997}
998
999/************************************************************************
1000 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001001 * Generic URI structure functions *
1002 * *
1003 ************************************************************************/
1004
1005/**
1006 * xmlCreateURI:
1007 *
1008 * Simply creates an empty xmlURI
1009 *
1010 * Returns the new structure or NULL in case of error
1011 */
1012xmlURIPtr
1013xmlCreateURI(void) {
1014 xmlURIPtr ret;
1015
1016 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1017 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001018 xmlURIErrMemory("creating URI structure\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001019 return(NULL);
1020 }
1021 memset(ret, 0, sizeof(xmlURI));
1022 return(ret);
1023}
1024
1025/**
Daniel Veillard57560382012-07-24 11:44:23 +08001026 * xmlSaveUriRealloc:
1027 *
1028 * Function to handle properly a reallocation when saving an URI
1029 * Also imposes some limit on the length of an URI string output
1030 */
1031static xmlChar *
1032xmlSaveUriRealloc(xmlChar *ret, int *max) {
1033 xmlChar *temp;
1034 int tmp;
1035
1036 if (*max > MAX_URI_LENGTH) {
1037 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1038 return(NULL);
1039 }
1040 tmp = *max * 2;
1041 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1042 if (temp == NULL) {
1043 xmlURIErrMemory("saving URI\n");
1044 return(NULL);
1045 }
1046 *max = tmp;
1047 return(temp);
1048}
1049
1050/**
Owen Taylor3473f882001-02-23 17:55:21 +00001051 * xmlSaveUri:
1052 * @uri: pointer to an xmlURI
1053 *
1054 * Save the URI as an escaped string
1055 *
1056 * Returns a new string (to be deallocated by caller)
1057 */
1058xmlChar *
1059xmlSaveUri(xmlURIPtr uri) {
1060 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001061 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001062 const char *p;
1063 int len;
1064 int max;
1065
1066 if (uri == NULL) return(NULL);
1067
1068
1069 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001070 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001071 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001072 xmlURIErrMemory("saving URI\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001073 return(NULL);
1074 }
1075 len = 0;
1076
1077 if (uri->scheme != NULL) {
1078 p = uri->scheme;
1079 while (*p != 0) {
1080 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001081 temp = xmlSaveUriRealloc(ret, &max);
1082 if (temp == NULL) goto mem_error;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001083 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001084 }
1085 ret[len++] = *p++;
1086 }
1087 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001088 temp = xmlSaveUriRealloc(ret, &max);
1089 if (temp == NULL) goto mem_error;
1090 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001091 }
1092 ret[len++] = ':';
1093 }
1094 if (uri->opaque != NULL) {
1095 p = uri->opaque;
1096 while (*p != 0) {
1097 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001098 temp = xmlSaveUriRealloc(ret, &max);
1099 if (temp == NULL) goto mem_error;
1100 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001101 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001102 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001103 ret[len++] = *p++;
1104 else {
1105 int val = *(unsigned char *)p++;
1106 int hi = val / 0x10, lo = val % 0x10;
1107 ret[len++] = '%';
1108 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1109 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1110 }
1111 }
Owen Taylor3473f882001-02-23 17:55:21 +00001112 } else {
Daniel Veillardbeb72812014-10-03 19:22:39 +08001113 if ((uri->server != NULL) || (uri->port == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001114 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001115 temp = xmlSaveUriRealloc(ret, &max);
1116 if (temp == NULL) goto mem_error;
1117 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001118 }
1119 ret[len++] = '/';
1120 ret[len++] = '/';
1121 if (uri->user != NULL) {
1122 p = uri->user;
1123 while (*p != 0) {
1124 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001125 temp = xmlSaveUriRealloc(ret, &max);
1126 if (temp == NULL) goto mem_error;
1127 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001128 }
1129 if ((IS_UNRESERVED(*(p))) ||
1130 ((*(p) == ';')) || ((*(p) == ':')) ||
1131 ((*(p) == '&')) || ((*(p) == '=')) ||
1132 ((*(p) == '+')) || ((*(p) == '$')) ||
1133 ((*(p) == ',')))
1134 ret[len++] = *p++;
1135 else {
1136 int val = *(unsigned char *)p++;
1137 int hi = val / 0x10, lo = val % 0x10;
1138 ret[len++] = '%';
1139 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1140 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1141 }
1142 }
1143 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001144 temp = xmlSaveUriRealloc(ret, &max);
1145 if (temp == NULL) goto mem_error;
1146 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001147 }
1148 ret[len++] = '@';
1149 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001150 if (uri->server != NULL) {
1151 p = uri->server;
1152 while (*p != 0) {
1153 if (len >= max) {
1154 temp = xmlSaveUriRealloc(ret, &max);
1155 if (temp == NULL) goto mem_error;
1156 ret = temp;
1157 }
1158 ret[len++] = *p++;
Owen Taylor3473f882001-02-23 17:55:21 +00001159 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001160 if (uri->port > 0) {
1161 if (len + 10 >= max) {
1162 temp = xmlSaveUriRealloc(ret, &max);
1163 if (temp == NULL) goto mem_error;
1164 ret = temp;
1165 }
1166 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001167 }
Owen Taylor3473f882001-02-23 17:55:21 +00001168 }
1169 } else if (uri->authority != NULL) {
1170 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001171 temp = xmlSaveUriRealloc(ret, &max);
1172 if (temp == NULL) goto mem_error;
1173 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001174 }
1175 ret[len++] = '/';
1176 ret[len++] = '/';
1177 p = uri->authority;
1178 while (*p != 0) {
1179 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001180 temp = xmlSaveUriRealloc(ret, &max);
1181 if (temp == NULL) goto mem_error;
1182 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001183 }
1184 if ((IS_UNRESERVED(*(p))) ||
1185 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1186 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1187 ((*(p) == '=')) || ((*(p) == '+')))
1188 ret[len++] = *p++;
1189 else {
1190 int val = *(unsigned char *)p++;
1191 int hi = val / 0x10, lo = val % 0x10;
1192 ret[len++] = '%';
1193 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1194 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1195 }
1196 }
1197 } else if (uri->scheme != NULL) {
1198 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001199 temp = xmlSaveUriRealloc(ret, &max);
1200 if (temp == NULL) goto mem_error;
1201 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001202 }
Owen Taylor3473f882001-02-23 17:55:21 +00001203 }
1204 if (uri->path != NULL) {
1205 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001206 /*
1207 * the colon in file:///d: should not be escaped or
1208 * Windows accesses fail later.
1209 */
1210 if ((uri->scheme != NULL) &&
1211 (p[0] == '/') &&
1212 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1213 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1214 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001215 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001216 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001217 temp = xmlSaveUriRealloc(ret, &max);
1218 if (temp == NULL) goto mem_error;
1219 ret = temp;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001220 }
1221 ret[len++] = *p++;
1222 ret[len++] = *p++;
1223 ret[len++] = *p++;
1224 }
Owen Taylor3473f882001-02-23 17:55:21 +00001225 while (*p != 0) {
1226 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001227 temp = xmlSaveUriRealloc(ret, &max);
1228 if (temp == NULL) goto mem_error;
1229 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001230 }
1231 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1232 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1233 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1234 ((*(p) == ',')))
1235 ret[len++] = *p++;
1236 else {
1237 int val = *(unsigned char *)p++;
1238 int hi = val / 0x10, lo = val % 0x10;
1239 ret[len++] = '%';
1240 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1241 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1242 }
1243 }
1244 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001245 if (uri->query_raw != NULL) {
1246 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001247 temp = xmlSaveUriRealloc(ret, &max);
1248 if (temp == NULL) goto mem_error;
1249 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001250 }
1251 ret[len++] = '?';
1252 p = uri->query_raw;
1253 while (*p != 0) {
1254 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001255 temp = xmlSaveUriRealloc(ret, &max);
1256 if (temp == NULL) goto mem_error;
1257 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001258 }
1259 ret[len++] = *p++;
1260 }
1261 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001262 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001263 temp = xmlSaveUriRealloc(ret, &max);
1264 if (temp == NULL) goto mem_error;
1265 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001266 }
1267 ret[len++] = '?';
1268 p = uri->query;
1269 while (*p != 0) {
1270 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001271 temp = xmlSaveUriRealloc(ret, &max);
1272 if (temp == NULL) goto mem_error;
1273 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001274 }
Daniel Veillard57560382012-07-24 11:44:23 +08001275 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Owen Taylor3473f882001-02-23 17:55:21 +00001276 ret[len++] = *p++;
1277 else {
1278 int val = *(unsigned char *)p++;
1279 int hi = val / 0x10, lo = val % 0x10;
1280 ret[len++] = '%';
1281 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1282 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1283 }
1284 }
1285 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001286 }
1287 if (uri->fragment != NULL) {
1288 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001289 temp = xmlSaveUriRealloc(ret, &max);
1290 if (temp == NULL) goto mem_error;
1291 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001292 }
1293 ret[len++] = '#';
1294 p = uri->fragment;
1295 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001296 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001297 temp = xmlSaveUriRealloc(ret, &max);
1298 if (temp == NULL) goto mem_error;
1299 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001300 }
Daniel Veillard57560382012-07-24 11:44:23 +08001301 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001302 ret[len++] = *p++;
1303 else {
1304 int val = *(unsigned char *)p++;
1305 int hi = val / 0x10, lo = val % 0x10;
1306 ret[len++] = '%';
1307 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1308 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001309 }
1310 }
Owen Taylor3473f882001-02-23 17:55:21 +00001311 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001312 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001313 temp = xmlSaveUriRealloc(ret, &max);
1314 if (temp == NULL) goto mem_error;
1315 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001316 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02001317 ret[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001318 return(ret);
Daniel Veillard57560382012-07-24 11:44:23 +08001319
1320mem_error:
1321 xmlFree(ret);
1322 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001323}
1324
1325/**
1326 * xmlPrintURI:
1327 * @stream: a FILE* for the output
1328 * @uri: pointer to an xmlURI
1329 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001330 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001331 */
1332void
1333xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1334 xmlChar *out;
1335
1336 out = xmlSaveUri(uri);
1337 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001338 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001339 xmlFree(out);
1340 }
1341}
1342
1343/**
1344 * xmlCleanURI:
1345 * @uri: pointer to an xmlURI
1346 *
1347 * Make sure the xmlURI struct is free of content
1348 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001349static void
Owen Taylor3473f882001-02-23 17:55:21 +00001350xmlCleanURI(xmlURIPtr uri) {
1351 if (uri == NULL) return;
1352
1353 if (uri->scheme != NULL) xmlFree(uri->scheme);
1354 uri->scheme = NULL;
1355 if (uri->server != NULL) xmlFree(uri->server);
1356 uri->server = NULL;
1357 if (uri->user != NULL) xmlFree(uri->user);
1358 uri->user = NULL;
1359 if (uri->path != NULL) xmlFree(uri->path);
1360 uri->path = NULL;
1361 if (uri->fragment != NULL) xmlFree(uri->fragment);
1362 uri->fragment = NULL;
1363 if (uri->opaque != NULL) xmlFree(uri->opaque);
1364 uri->opaque = NULL;
1365 if (uri->authority != NULL) xmlFree(uri->authority);
1366 uri->authority = NULL;
1367 if (uri->query != NULL) xmlFree(uri->query);
1368 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001369 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1370 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001371}
1372
1373/**
1374 * xmlFreeURI:
1375 * @uri: pointer to an xmlURI
1376 *
1377 * Free up the xmlURI struct
1378 */
1379void
1380xmlFreeURI(xmlURIPtr uri) {
1381 if (uri == NULL) return;
1382
1383 if (uri->scheme != NULL) xmlFree(uri->scheme);
1384 if (uri->server != NULL) xmlFree(uri->server);
1385 if (uri->user != NULL) xmlFree(uri->user);
1386 if (uri->path != NULL) xmlFree(uri->path);
1387 if (uri->fragment != NULL) xmlFree(uri->fragment);
1388 if (uri->opaque != NULL) xmlFree(uri->opaque);
1389 if (uri->authority != NULL) xmlFree(uri->authority);
1390 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001391 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001392 xmlFree(uri);
1393}
1394
1395/************************************************************************
1396 * *
1397 * Helper functions *
1398 * *
1399 ************************************************************************/
1400
Owen Taylor3473f882001-02-23 17:55:21 +00001401/**
1402 * xmlNormalizeURIPath:
1403 * @path: pointer to the path string
1404 *
1405 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1406 * Section 5.2, steps 6.c through 6.g.
1407 *
1408 * Normalization occurs directly on the string, no new allocation is done
1409 *
1410 * Returns 0 or an error code
1411 */
1412int
1413xmlNormalizeURIPath(char *path) {
1414 char *cur, *out;
1415
1416 if (path == NULL)
1417 return(-1);
1418
1419 /* Skip all initial "/" chars. We want to get to the beginning of the
1420 * first non-empty segment.
1421 */
1422 cur = path;
1423 while (cur[0] == '/')
1424 ++cur;
1425 if (cur[0] == '\0')
1426 return(0);
1427
1428 /* Keep everything we've seen so far. */
1429 out = cur;
1430
1431 /*
1432 * Analyze each segment in sequence for cases (c) and (d).
1433 */
1434 while (cur[0] != '\0') {
1435 /*
1436 * c) All occurrences of "./", where "." is a complete path segment,
1437 * are removed from the buffer string.
1438 */
1439 if ((cur[0] == '.') && (cur[1] == '/')) {
1440 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001441 /* '//' normalization should be done at this point too */
1442 while (cur[0] == '/')
1443 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001444 continue;
1445 }
1446
1447 /*
1448 * d) If the buffer string ends with "." as a complete path segment,
1449 * that "." is removed.
1450 */
1451 if ((cur[0] == '.') && (cur[1] == '\0'))
1452 break;
1453
1454 /* Otherwise keep the segment. */
1455 while (cur[0] != '/') {
1456 if (cur[0] == '\0')
1457 goto done_cd;
1458 (out++)[0] = (cur++)[0];
1459 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001460 /* nomalize // */
1461 while ((cur[0] == '/') && (cur[1] == '/'))
1462 cur++;
1463
Owen Taylor3473f882001-02-23 17:55:21 +00001464 (out++)[0] = (cur++)[0];
1465 }
1466 done_cd:
1467 out[0] = '\0';
1468
1469 /* Reset to the beginning of the first segment for the next sequence. */
1470 cur = path;
1471 while (cur[0] == '/')
1472 ++cur;
1473 if (cur[0] == '\0')
1474 return(0);
1475
1476 /*
1477 * Analyze each segment in sequence for cases (e) and (f).
1478 *
1479 * e) All occurrences of "<segment>/../", where <segment> is a
1480 * complete path segment not equal to "..", are removed from the
1481 * buffer string. Removal of these path segments is performed
1482 * iteratively, removing the leftmost matching pattern on each
1483 * iteration, until no matching pattern remains.
1484 *
1485 * f) If the buffer string ends with "<segment>/..", where <segment>
1486 * is a complete path segment not equal to "..", that
1487 * "<segment>/.." is removed.
1488 *
1489 * To satisfy the "iterative" clause in (e), we need to collapse the
1490 * string every time we find something that needs to be removed. Thus,
1491 * we don't need to keep two pointers into the string: we only need a
1492 * "current position" pointer.
1493 */
1494 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001495 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001496
1497 /* At the beginning of each iteration of this loop, "cur" points to
1498 * the first character of the segment we want to examine.
1499 */
1500
1501 /* Find the end of the current segment. */
1502 segp = cur;
1503 while ((segp[0] != '/') && (segp[0] != '\0'))
1504 ++segp;
1505
1506 /* If this is the last segment, we're done (we need at least two
1507 * segments to meet the criteria for the (e) and (f) cases).
1508 */
1509 if (segp[0] == '\0')
1510 break;
1511
1512 /* If the first segment is "..", or if the next segment _isn't_ "..",
1513 * keep this segment and try the next one.
1514 */
1515 ++segp;
1516 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1517 || ((segp[0] != '.') || (segp[1] != '.')
1518 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1519 cur = segp;
1520 continue;
1521 }
1522
1523 /* If we get here, remove this segment and the next one and back up
1524 * to the previous segment (if there is one), to implement the
1525 * "iteratively" clause. It's pretty much impossible to back up
1526 * while maintaining two pointers into the buffer, so just compact
1527 * the whole buffer now.
1528 */
1529
1530 /* If this is the end of the buffer, we're done. */
1531 if (segp[2] == '\0') {
1532 cur[0] = '\0';
1533 break;
1534 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001535 /* Valgrind complained, strcpy(cur, segp + 3); */
Xin Li28c53d32017-03-07 00:33:02 +00001536 /* string will overlap, do not use strcpy */
1537 tmp = cur;
1538 segp += 3;
1539 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +00001540
1541 /* If there are no previous segments, then keep going from here. */
1542 segp = cur;
1543 while ((segp > path) && ((--segp)[0] == '/'))
1544 ;
1545 if (segp == path)
1546 continue;
1547
1548 /* "segp" is pointing to the end of a previous segment; find it's
1549 * start. We need to back up to the previous segment and start
1550 * over with that to handle things like "foo/bar/../..". If we
1551 * don't do this, then on the first pass we'll remove the "bar/..",
1552 * but be pointing at the second ".." so we won't realize we can also
1553 * remove the "foo/..".
1554 */
1555 cur = segp;
1556 while ((cur > path) && (cur[-1] != '/'))
1557 --cur;
1558 }
1559 out[0] = '\0';
1560
1561 /*
1562 * g) If the resulting buffer string still begins with one or more
1563 * complete path segments of "..", then the reference is
1564 * considered to be in error. Implementations may handle this
1565 * error by retaining these components in the resolved path (i.e.,
1566 * treating them as part of the final URI), by removing them from
1567 * the resolved path (i.e., discarding relative levels above the
1568 * root), or by avoiding traversal of the reference.
1569 *
1570 * We discard them from the final path.
1571 */
1572 if (path[0] == '/') {
1573 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001574 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001575 && ((cur[3] == '/') || (cur[3] == '\0')))
1576 cur += 3;
1577
1578 if (cur != path) {
1579 out = path;
1580 while (cur[0] != '\0')
1581 (out++)[0] = (cur++)[0];
1582 out[0] = 0;
1583 }
1584 }
1585
1586 return(0);
1587}
Owen Taylor3473f882001-02-23 17:55:21 +00001588
Daniel Veillard966a31e2004-05-09 02:58:44 +00001589static int is_hex(char c) {
1590 if (((c >= '0') && (c <= '9')) ||
1591 ((c >= 'a') && (c <= 'f')) ||
1592 ((c >= 'A') && (c <= 'F')))
1593 return(1);
1594 return(0);
1595}
1596
Owen Taylor3473f882001-02-23 17:55:21 +00001597/**
1598 * xmlURIUnescapeString:
1599 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001600 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001601 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001602 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001603 * Unescaping routine, but does not check that the string is an URI. The
1604 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001605 * Note that the length of the result can only be smaller or same size as
1606 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001607 *
Daniel Veillard79187652007-04-24 10:19:52 +00001608 * Returns a copy of the string, but unescaped, will return NULL only in case
1609 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
1611char *
1612xmlURIUnescapeString(const char *str, int len, char *target) {
1613 char *ret, *out;
1614 const char *in;
1615
1616 if (str == NULL)
1617 return(NULL);
1618 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001619 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001620
1621 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001622 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001624 xmlURIErrMemory("unescaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001625 return(NULL);
1626 }
1627 } else
1628 ret = target;
1629 in = str;
1630 out = ret;
1631 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001632 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001633 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001634 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001635 *out = (*in - '0');
1636 else if ((*in >= 'a') && (*in <= 'f'))
1637 *out = (*in - 'a') + 10;
1638 else if ((*in >= 'A') && (*in <= 'F'))
1639 *out = (*in - 'A') + 10;
1640 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001641 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001642 *out = *out * 16 + (*in - '0');
1643 else if ((*in >= 'a') && (*in <= 'f'))
1644 *out = *out * 16 + (*in - 'a') + 10;
1645 else if ((*in >= 'A') && (*in <= 'F'))
1646 *out = *out * 16 + (*in - 'A') + 10;
1647 in++;
1648 len -= 3;
1649 out++;
1650 } else {
1651 *out++ = *in++;
1652 len--;
1653 }
1654 }
1655 *out = 0;
1656 return(ret);
1657}
1658
1659/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001660 * xmlURIEscapeStr:
1661 * @str: string to escape
1662 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001663 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001664 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001666 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001667 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001668 */
1669xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001670xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001672 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001673 const xmlChar *in;
Daniel Veillard57560382012-07-24 11:44:23 +08001674 int len, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001675
1676 if (str == NULL)
1677 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001678 if (str[0] == 0)
1679 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001680 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001681 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001682
1683 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001684 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001685 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001686 xmlURIErrMemory("escaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001687 return(NULL);
1688 }
1689 in = (const xmlChar *) str;
1690 out = 0;
1691 while(*in != 0) {
1692 if (len - out <= 3) {
Daniel Veillard57560382012-07-24 11:44:23 +08001693 temp = xmlSaveUriRealloc(ret, &len);
Daniel Veillarded86dc22008-04-24 11:58:41 +00001694 if (temp == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001695 xmlURIErrMemory("escaping URI value\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001696 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001697 return(NULL);
1698 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001699 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001700 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001701
1702 ch = *in;
1703
Daniel Veillardeb475a32002-04-14 22:00:22 +00001704 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001705 unsigned char val;
1706 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001707 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001708 if (val <= 9)
1709 ret[out++] = '0' + val;
1710 else
1711 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001712 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001713 if (val <= 9)
1714 ret[out++] = '0' + val;
1715 else
1716 ret[out++] = 'A' + val - 0xA;
1717 in++;
1718 } else {
1719 ret[out++] = *in++;
1720 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001721
Owen Taylor3473f882001-02-23 17:55:21 +00001722 }
1723 ret[out] = 0;
1724 return(ret);
1725}
1726
Daniel Veillard8514c672001-05-23 10:29:12 +00001727/**
1728 * xmlURIEscape:
1729 * @str: the string of the URI to escape
1730 *
1731 * Escaping routine, does not do validity checks !
1732 * It will try to escape the chars needing this, but this is heuristic
1733 * based it's impossible to be sure.
1734 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001735 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001736 *
1737 * 25 May 2001
1738 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739 * according to RFC2396.
1740 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001741 */
1742xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001743xmlURIEscape(const xmlChar * str)
1744{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001745 xmlChar *ret, *segment = NULL;
1746 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001747 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001748
Daniel Veillard6278fb52001-05-25 07:38:41 +00001749#define NULLCHK(p) if(!p) { \
Daniel Veillard57560382012-07-24 11:44:23 +08001750 xmlURIErrMemory("escaping URI value\n"); \
1751 xmlFreeURI(uri); \
1752 return NULL; } \
Daniel Veillard6278fb52001-05-25 07:38:41 +00001753
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001754 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001755 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001756
1757 uri = xmlCreateURI();
1758 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001759 /*
1760 * Allow escaping errors in the unescaped form
1761 */
1762 uri->cleanup = 1;
1763 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001764 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001765 xmlFreeURI(uri);
1766 return (NULL);
1767 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001768 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001769
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001770 if (!uri)
1771 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001772
1773 ret = NULL;
1774
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001775 if (uri->scheme) {
1776 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777 NULLCHK(segment)
1778 ret = xmlStrcat(ret, segment);
1779 ret = xmlStrcat(ret, BAD_CAST ":");
1780 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001781 }
1782
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001783 if (uri->authority) {
1784 segment =
1785 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786 NULLCHK(segment)
1787 ret = xmlStrcat(ret, BAD_CAST "//");
1788 ret = xmlStrcat(ret, segment);
1789 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001790 }
1791
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001792 if (uri->user) {
1793 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794 NULLCHK(segment)
Daniel Veillard57560382012-07-24 11:44:23 +08001795 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001796 ret = xmlStrcat(ret, segment);
1797 ret = xmlStrcat(ret, BAD_CAST "@");
1798 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001799 }
1800
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001801 if (uri->server) {
1802 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001804 if (uri->user == NULL)
Daniel Veillardd7af5552008-08-04 15:29:44 +00001805 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001806 ret = xmlStrcat(ret, segment);
1807 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001808 }
1809
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001810 if (uri->port) {
1811 xmlChar port[10];
1812
Daniel Veillard43d3f612001-11-10 11:57:23 +00001813 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001814 ret = xmlStrcat(ret, BAD_CAST ":");
1815 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001816 }
1817
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001818 if (uri->path) {
1819 segment =
1820 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821 NULLCHK(segment)
1822 ret = xmlStrcat(ret, segment);
1823 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001824 }
1825
Daniel Veillarda1413b82007-04-26 08:33:28 +00001826 if (uri->query_raw) {
1827 ret = xmlStrcat(ret, BAD_CAST "?");
1828 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829 }
1830 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001831 segment =
1832 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833 NULLCHK(segment)
1834 ret = xmlStrcat(ret, BAD_CAST "?");
1835 ret = xmlStrcat(ret, segment);
1836 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001837 }
1838
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001839 if (uri->opaque) {
1840 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841 NULLCHK(segment)
1842 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001843 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001844 }
1845
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001846 if (uri->fragment) {
1847 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848 NULLCHK(segment)
1849 ret = xmlStrcat(ret, BAD_CAST "#");
1850 ret = xmlStrcat(ret, segment);
1851 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001852 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001853
1854 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001855#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001856
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001857 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001858}
1859
Owen Taylor3473f882001-02-23 17:55:21 +00001860/************************************************************************
1861 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001862 * Public functions *
1863 * *
1864 ************************************************************************/
1865
1866/**
1867 * xmlBuildURI:
1868 * @URI: the URI instance found in the document
1869 * @base: the base value
1870 *
1871 * Computes he final URI of the reference done by checking that
1872 * the given URI is valid, and building the final URI using the
Daniel Veillard57560382012-07-24 11:44:23 +08001873 * base URI. This is processed according to section 5.2 of the
Owen Taylor3473f882001-02-23 17:55:21 +00001874 * RFC 2396
1875 *
1876 * 5.2. Resolving Relative References to Absolute Form
1877 *
1878 * Returns a new URI string (to be freed by the caller) or NULL in case
1879 * of error.
1880 */
1881xmlChar *
1882xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001884 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001885 xmlURIPtr ref = NULL;
1886 xmlURIPtr bas = NULL;
1887 xmlURIPtr res = NULL;
1888
1889 /*
1890 * 1) The URI reference is parsed into the potential four components and
1891 * fragment identifier, as described in Section 4.3.
1892 *
1893 * NOTE that a completely empty URI is treated by modern browsers
1894 * as a reference to "." rather than as a synonym for the current
1895 * URI. Should we do that here?
1896 */
Daniel Veillard57560382012-07-24 11:44:23 +08001897 if (URI == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001898 ret = -1;
1899 else {
1900 if (*URI) {
1901 ref = xmlCreateURI();
1902 if (ref == NULL)
1903 goto done;
1904 ret = xmlParseURIReference(ref, (const char *) URI);
1905 }
1906 else
1907 ret = 0;
1908 }
1909 if (ret != 0)
1910 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001911 if ((ref != NULL) && (ref->scheme != NULL)) {
1912 /*
1913 * The URI is absolute don't modify.
1914 */
1915 val = xmlStrdup(URI);
1916 goto done;
1917 }
Owen Taylor3473f882001-02-23 17:55:21 +00001918 if (base == NULL)
1919 ret = -1;
1920 else {
1921 bas = xmlCreateURI();
1922 if (bas == NULL)
1923 goto done;
1924 ret = xmlParseURIReference(bas, (const char *) base);
1925 }
1926 if (ret != 0) {
1927 if (ref)
1928 val = xmlSaveUri(ref);
1929 goto done;
1930 }
1931 if (ref == NULL) {
1932 /*
1933 * the base fragment must be ignored
1934 */
1935 if (bas->fragment != NULL) {
1936 xmlFree(bas->fragment);
1937 bas->fragment = NULL;
1938 }
1939 val = xmlSaveUri(bas);
1940 goto done;
1941 }
1942
1943 /*
1944 * 2) If the path component is empty and the scheme, authority, and
1945 * query components are undefined, then it is a reference to the
1946 * current document and we are done. Otherwise, the reference URI's
1947 * query and fragment components are defined as found (or not found)
1948 * within the URI reference and not inherited from the base URI.
1949 *
1950 * NOTE that in modern browsers, the parsing differs from the above
1951 * in the following aspect: the query component is allowed to be
1952 * defined while still treating this as a reference to the current
1953 * document.
1954 */
1955 res = xmlCreateURI();
1956 if (res == NULL)
1957 goto done;
1958 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959 ((ref->authority == NULL) && (ref->server == NULL))) {
1960 if (bas->scheme != NULL)
1961 res->scheme = xmlMemStrdup(bas->scheme);
1962 if (bas->authority != NULL)
1963 res->authority = xmlMemStrdup(bas->authority);
1964 else if (bas->server != NULL) {
1965 res->server = xmlMemStrdup(bas->server);
1966 if (bas->user != NULL)
1967 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08001968 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00001969 }
1970 if (bas->path != NULL)
1971 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001972 if (ref->query_raw != NULL)
1973 res->query_raw = xmlMemStrdup (ref->query_raw);
1974 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001975 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001976 else if (bas->query_raw != NULL)
1977 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001978 else if (bas->query != NULL)
1979 res->query = xmlMemStrdup(bas->query);
1980 if (ref->fragment != NULL)
1981 res->fragment = xmlMemStrdup(ref->fragment);
1982 goto step_7;
1983 }
Owen Taylor3473f882001-02-23 17:55:21 +00001984
1985 /*
1986 * 3) If the scheme component is defined, indicating that the reference
1987 * starts with a scheme name, then the reference is interpreted as an
1988 * absolute URI and we are done. Otherwise, the reference URI's
1989 * scheme is inherited from the base URI's scheme component.
1990 */
1991 if (ref->scheme != NULL) {
1992 val = xmlSaveUri(ref);
1993 goto done;
1994 }
1995 if (bas->scheme != NULL)
1996 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard57560382012-07-24 11:44:23 +08001997
Daniel Veillarda1413b82007-04-26 08:33:28 +00001998 if (ref->query_raw != NULL)
1999 res->query_raw = xmlMemStrdup(ref->query_raw);
2000 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002001 res->query = xmlMemStrdup(ref->query);
2002 if (ref->fragment != NULL)
2003 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002004
2005 /*
2006 * 4) If the authority component is defined, then the reference is a
2007 * network-path and we skip to step 7. Otherwise, the reference
2008 * URI's authority is inherited from the base URI's authority
2009 * component, which will also be undefined if the URI scheme does not
2010 * use an authority component.
2011 */
2012 if ((ref->authority != NULL) || (ref->server != NULL)) {
2013 if (ref->authority != NULL)
2014 res->authority = xmlMemStrdup(ref->authority);
2015 else {
2016 res->server = xmlMemStrdup(ref->server);
2017 if (ref->user != NULL)
2018 res->user = xmlMemStrdup(ref->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002019 res->port = ref->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002020 }
2021 if (ref->path != NULL)
2022 res->path = xmlMemStrdup(ref->path);
2023 goto step_7;
2024 }
2025 if (bas->authority != NULL)
2026 res->authority = xmlMemStrdup(bas->authority);
2027 else if (bas->server != NULL) {
2028 res->server = xmlMemStrdup(bas->server);
2029 if (bas->user != NULL)
2030 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002031 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002032 }
2033
2034 /*
2035 * 5) If the path component begins with a slash character ("/"), then
2036 * the reference is an absolute-path and we skip to step 7.
2037 */
2038 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2039 res->path = xmlMemStrdup(ref->path);
2040 goto step_7;
2041 }
2042
2043
2044 /*
2045 * 6) If this step is reached, then we are resolving a relative-path
2046 * reference. The relative path needs to be merged with the base
2047 * URI's path. Although there are many ways to do this, we will
2048 * describe a simple method using a separate string buffer.
2049 *
2050 * Allocate a buffer large enough for the result string.
2051 */
2052 len = 2; /* extra / and 0 */
2053 if (ref->path != NULL)
2054 len += strlen(ref->path);
2055 if (bas->path != NULL)
2056 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002057 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002058 if (res->path == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002059 xmlURIErrMemory("resolving URI against base\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002060 goto done;
2061 }
2062 res->path[0] = 0;
2063
2064 /*
2065 * a) All but the last segment of the base URI's path component is
2066 * copied to the buffer. In other words, any characters after the
2067 * last (right-most) slash character, if any, are excluded.
2068 */
2069 cur = 0;
2070 out = 0;
2071 if (bas->path != NULL) {
2072 while (bas->path[cur] != 0) {
2073 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2074 cur++;
2075 if (bas->path[cur] == 0)
2076 break;
2077
2078 cur++;
2079 while (out < cur) {
2080 res->path[out] = bas->path[out];
2081 out++;
2082 }
2083 }
2084 }
2085 res->path[out] = 0;
2086
2087 /*
2088 * b) The reference's path component is appended to the buffer
2089 * string.
2090 */
2091 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002092 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002093 /*
2094 * Ensure the path includes a '/'
2095 */
2096 if ((out == 0) && (bas->server != NULL))
2097 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002098 while (ref->path[indx] != 0) {
2099 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
2101 }
2102 res->path[out] = 0;
2103
2104 /*
2105 * Steps c) to h) are really path normalization steps
2106 */
2107 xmlNormalizeURIPath(res->path);
2108
2109step_7:
2110
2111 /*
2112 * 7) The resulting URI components, including any inherited from the
2113 * base URI, are recombined to give the absolute form of the URI
2114 * reference.
2115 */
2116 val = xmlSaveUri(res);
2117
2118done:
2119 if (ref != NULL)
2120 xmlFreeURI(ref);
2121 if (bas != NULL)
2122 xmlFreeURI(bas);
2123 if (res != NULL)
2124 xmlFreeURI(res);
2125 return(val);
2126}
2127
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002128/**
William M. Brackf7789b12004-06-07 08:57:27 +00002129 * xmlBuildRelativeURI:
2130 * @URI: the URI reference under consideration
2131 * @base: the base value
2132 *
2133 * Expresses the URI of the reference in terms relative to the
2134 * base. Some examples of this operation include:
2135 * base = "http://site1.com/docs/book1.html"
2136 * URI input URI returned
2137 * docs/pic1.gif pic1.gif
2138 * docs/img/pic1.gif img/pic1.gif
2139 * img/pic1.gif ../img/pic1.gif
2140 * http://site1.com/docs/pic1.gif pic1.gif
2141 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2142 *
2143 * base = "docs/book1.html"
2144 * URI input URI returned
2145 * docs/pic1.gif pic1.gif
2146 * docs/img/pic1.gif img/pic1.gif
2147 * img/pic1.gif ../img/pic1.gif
2148 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2149 *
2150 *
2151 * Note: if the URI reference is really wierd or complicated, it may be
2152 * worthwhile to first convert it into a "nice" one by calling
2153 * xmlBuildURI (using 'base') before calling this routine,
2154 * since this routine (for reasonable efficiency) assumes URI has
2155 * already been through some validation.
2156 *
2157 * Returns a new URI string (to be freed by the caller) or NULL in case
2158 * error.
2159 */
2160xmlChar *
2161xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2162{
2163 xmlChar *val = NULL;
2164 int ret;
2165 int ix;
2166 int pos = 0;
2167 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002168 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002169 xmlURIPtr ref = NULL;
2170 xmlURIPtr bas = NULL;
2171 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002172 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002173
2174 if ((URI == NULL) || (*URI == 0))
2175 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002176
2177 /*
2178 * First parse URI into a standard form
2179 */
2180 ref = xmlCreateURI ();
2181 if (ref == NULL)
2182 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002183 /* If URI not already in "relative" form */
2184 if (URI[0] != '.') {
2185 ret = xmlParseURIReference (ref, (const char *) URI);
2186 if (ret != 0)
2187 goto done; /* Error in URI, return NULL */
2188 } else
2189 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002190
2191 /*
2192 * Next parse base into the same standard form
2193 */
2194 if ((base == NULL) || (*base == 0)) {
2195 val = xmlStrdup (URI);
2196 goto done;
2197 }
2198 bas = xmlCreateURI ();
2199 if (bas == NULL)
2200 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002201 if (base[0] != '.') {
2202 ret = xmlParseURIReference (bas, (const char *) base);
2203 if (ret != 0)
2204 goto done; /* Error in base, return NULL */
2205 } else
2206 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002207
2208 /*
2209 * If the scheme / server on the URI differs from the base,
2210 * just return the URI
2211 */
2212 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002213 ((bas->scheme == NULL) ||
2214 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2215 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002216 val = xmlStrdup (URI);
2217 goto done;
2218 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002219 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2220 val = xmlStrdup(BAD_CAST "");
2221 goto done;
2222 }
2223 if (bas->path == NULL) {
2224 val = xmlStrdup((xmlChar *)ref->path);
2225 goto done;
2226 }
2227 if (ref->path == NULL) {
2228 ref->path = (char *) "/";
2229 remove_path = 1;
2230 }
William M. Brackf7789b12004-06-07 08:57:27 +00002231
2232 /*
2233 * At this point (at last!) we can compare the two paths
2234 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002235 * First we take care of the special case where either of the
2236 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002237 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002238 if (bas->path == NULL) {
2239 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002240 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002241 if (*uptr == '/')
2242 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002243 /* exception characters from xmlSaveUri */
2244 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002245 }
2246 goto done;
2247 }
William M. Brackf7789b12004-06-07 08:57:27 +00002248 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002249 if (ref->path == NULL) {
2250 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002251 if (bptr[ix] == '/')
2252 nbslash++;
2253 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002254 uptr = NULL;
2255 len = 1; /* this is for a string terminator only */
2256 } else {
2257 /*
2258 * Next we compare the two strings and find where they first differ
2259 */
2260 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2261 pos += 2;
2262 if ((*bptr == '.') && (bptr[1] == '/'))
2263 bptr += 2;
2264 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2265 bptr++;
2266 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2267 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002268
William M. Brack820d5ed2005-09-14 05:24:27 +00002269 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002270 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002271 goto done; /* (I can't imagine why anyone would do this) */
2272 }
2273
2274 /*
2275 * In URI, "back up" to the last '/' encountered. This will be the
2276 * beginning of the "unique" suffix of URI
2277 */
2278 ix = pos;
2279 if ((ref->path[ix] == '/') && (ix > 0))
2280 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002281 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2282 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002283 for (; ix > 0; ix--) {
2284 if (ref->path[ix] == '/')
2285 break;
2286 }
2287 if (ix == 0) {
2288 uptr = (xmlChar *)ref->path;
2289 } else {
2290 ix++;
2291 uptr = (xmlChar *)&ref->path[ix];
2292 }
2293
2294 /*
2295 * In base, count the number of '/' from the differing point
2296 */
2297 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2298 for (; bptr[ix] != 0; ix++) {
2299 if (bptr[ix] == '/')
2300 nbslash++;
2301 }
2302 }
2303 len = xmlStrlen (uptr) + 1;
2304 }
Daniel Veillard57560382012-07-24 11:44:23 +08002305
William M. Brackf7789b12004-06-07 08:57:27 +00002306 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002307 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002308 /* exception characters from xmlSaveUri */
2309 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002310 goto done;
2311 }
William M. Brackf7789b12004-06-07 08:57:27 +00002312
2313 /*
2314 * Allocate just enough space for the returned string -
2315 * length of the remainder of the URI, plus enough space
2316 * for the "../" groups, plus one for the terminator
2317 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002318 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002319 if (val == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002320 xmlURIErrMemory("building relative URI\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002321 goto done;
2322 }
2323 vptr = val;
2324 /*
2325 * Put in as many "../" as needed
2326 */
2327 for (; nbslash>0; nbslash--) {
2328 *vptr++ = '.';
2329 *vptr++ = '.';
2330 *vptr++ = '/';
2331 }
2332 /*
2333 * Finish up with the end of the URI
2334 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002335 if (uptr != NULL) {
2336 if ((vptr > val) && (len > 0) &&
2337 (uptr[0] == '/') && (vptr[-1] == '/')) {
2338 memcpy (vptr, uptr + 1, len - 1);
2339 vptr[len - 2] = 0;
2340 } else {
2341 memcpy (vptr, uptr, len);
2342 vptr[len - 1] = 0;
2343 }
2344 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002345 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002346 }
William M. Brackf7789b12004-06-07 08:57:27 +00002347
William M. Brack50420192007-07-20 01:09:08 +00002348 /* escape the freshly-built path */
2349 vptr = val;
2350 /* exception characters from xmlSaveUri */
2351 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2352 xmlFree(vptr);
2353
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002354done:
William M. Brackf7789b12004-06-07 08:57:27 +00002355 /*
2356 * Free the working variables
2357 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002358 if (remove_path != 0)
2359 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002360 if (ref != NULL)
2361 xmlFreeURI (ref);
2362 if (bas != NULL)
2363 xmlFreeURI (bas);
2364
2365 return val;
2366}
2367
2368/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002369 * xmlCanonicPath:
2370 * @path: the resource locator in a filesystem notation
2371 *
Daniel Veillard57560382012-07-24 11:44:23 +08002372 * Constructs a canonic path from the specified path.
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002373 *
Daniel Veillard57560382012-07-24 11:44:23 +08002374 * Returns a new canonic path, or a duplicate of the path parameter if the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002375 * construction fails. The caller is responsible for freeing the memory occupied
Daniel Veillard57560382012-07-24 11:44:23 +08002376 * by the returned string. If there is insufficient memory available, or the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002377 * argument is NULL, the function returns NULL.
2378 */
Daniel Veillard57560382012-07-24 11:44:23 +08002379#define IS_WINDOWS_PATH(p) \
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002380 ((p != NULL) && \
2381 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2382 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2383 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002384xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002385xmlCanonicPath(const xmlChar *path)
2386{
William M. Brack22242272007-01-27 07:59:37 +00002387/*
2388 * For Windows implementations, additional work needs to be done to
2389 * replace backslashes in pathnames with "forward slashes"
2390 */
Daniel Veillard57560382012-07-24 11:44:23 +08002391#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002392 int len = 0;
2393 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002394 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002395#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002396 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002397 xmlChar *ret;
2398 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002399
2400 if (path == NULL)
2401 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002402
Michael Stahl55b899a2012-09-07 12:14:00 +08002403#if defined(_WIN32)
2404 /*
2405 * We must not change the backslashes to slashes if the the path
2406 * starts with \\?\
2407 * Those paths can be up to 32k characters long.
2408 * Was added specifically for OpenOffice, those paths can't be converted
2409 * to URIs anyway.
2410 */
2411 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2412 (path[3] == '\\') )
2413 return xmlStrdup((const xmlChar *) path);
2414#endif
2415
2416 /* sanitize filename starting with // so it can be used as URI */
Daniel Veillard69f8a132008-02-05 08:37:56 +00002417 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2418 path++;
2419
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002420 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002421 xmlFreeURI(uri);
2422 return xmlStrdup(path);
2423 }
2424
William M. Brack22242272007-01-27 07:59:37 +00002425 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002426 absuri = xmlStrstr(path, BAD_CAST "://");
2427 if (absuri != NULL) {
2428 int l, j;
2429 unsigned char c;
2430 xmlChar *escURI;
2431
2432 /*
2433 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002434 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002435 * part matches a protocol.
2436 */
2437 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002438 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002439 if ((l <= 0) || (l > 20))
2440 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002441 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002442 for (j = 0;j < l;j++) {
2443 c = path[j];
2444 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2445 goto path_processing;
2446 }
2447
William M. Brack22242272007-01-27 07:59:37 +00002448 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002449 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2450 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002451 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002452 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002453 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002454 if (uri != NULL) {
2455 xmlFreeURI(uri);
2456 return escURI;
2457 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002458 }
2459 }
2460
2461path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002462/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard57560382012-07-24 11:44:23 +08002463#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard336a8e12005-08-07 10:46:19 +00002464 /*
William M. Brack22242272007-01-27 07:59:37 +00002465 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002466 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002467 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002468 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002469 return(NULL);
2470 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002471
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002472 len = xmlStrlen(path);
2473 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002474 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002475 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002476 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002477 uri->path = xmlMallocAtomic(len + 2);
2478 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002479 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002480 return(NULL);
2481 }
William M. Brack22242272007-01-27 07:59:37 +00002482 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002483 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002484 p = uri->path + 1;
2485 strncpy(p, path, len + 1);
2486 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002487 uri->path = xmlStrdup(path);
2488 if (uri->path == NULL) {
2489 xmlFreeURI(uri);
2490 return(NULL);
2491 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002492 p = uri->path;
2493 }
William M. Brack22242272007-01-27 07:59:37 +00002494 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002495 while (*p != '\0') {
2496 if (*p == '\\')
2497 *p = '/';
2498 p++;
2499 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002500
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002501 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002502 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002503 } else {
2504 ret = xmlSaveUri(uri);
2505 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002506
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002507 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002508#else
2509 ret = xmlStrdup((const xmlChar *) path);
2510#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002511 return(ret);
2512}
Owen Taylor3473f882001-02-23 17:55:21 +00002513
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002514/**
2515 * xmlPathToURI:
2516 * @path: the resource locator in a filesystem notation
2517 *
2518 * Constructs an URI expressing the existing path
2519 *
Daniel Veillard57560382012-07-24 11:44:23 +08002520 * Returns a new URI, or a duplicate of the path parameter if the
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002521 * construction fails. The caller is responsible for freeing the memory
2522 * occupied by the returned string. If there is insufficient memory available,
2523 * or the argument is NULL, the function returns NULL.
2524 */
2525xmlChar *
2526xmlPathToURI(const xmlChar *path)
2527{
2528 xmlURIPtr uri;
2529 xmlURI temp;
2530 xmlChar *ret, *cal;
2531
2532 if (path == NULL)
2533 return(NULL);
2534
2535 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2536 xmlFreeURI(uri);
2537 return xmlStrdup(path);
2538 }
2539 cal = xmlCanonicPath(path);
2540 if (cal == NULL)
2541 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002542#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard57560382012-07-24 11:44:23 +08002543 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002544 If 'cal' is a valid URI allready then we are done here, as continuing would make
2545 it invalid. */
2546 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2547 xmlFreeURI(uri);
2548 return cal;
2549 }
2550 /* 'cal' can contain a relative path with backslashes. If that is processed
2551 by xmlSaveURI, they will be escaped and the external entity loader machinery
2552 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2553 ret = cal;
2554 while (*ret != '\0') {
2555 if (*ret == '\\')
2556 *ret = '/';
2557 ret++;
2558 }
2559#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002560 memset(&temp, 0, sizeof(temp));
2561 temp.path = (char *) cal;
2562 ret = xmlSaveUri(&temp);
2563 xmlFree(cal);
2564 return(ret);
2565}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002566#define bottom_uri
2567#include "elfgcchack.h"