blob: 3b627e82086e04ee471139939a130f09ce9cf255 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
Daniel Veillard57560382012-07-24 11:44:23 +08002 * uri.c: set of generic URI related routines
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
Daniel Veillard57560382012-07-24 11:44:23 +080021/**
22 * MAX_URI_LENGTH:
23 *
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
32 */
33#define MAX_URI_LENGTH 1024 * 1024
34
35static void
36xmlURIErrMemory(const char *extra)
37{
38 if (extra)
39 __xmlRaiseError(NULL, NULL, NULL,
40 NULL, NULL, XML_FROM_URI,
41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42 extra, NULL, NULL, 0, 0,
43 "Memory allocation failed : %s\n", extra);
44 else
45 __xmlRaiseError(NULL, NULL, NULL,
46 NULL, NULL, XML_FROM_URI,
47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48 NULL, NULL, NULL, 0, 0,
49 "Memory allocation failed\n");
50}
51
Daniel Veillardd7af5552008-08-04 15:29:44 +000052static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000053
54/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000055 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000056 * alpha = lowalpha | upalpha
57 */
58#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61/*
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
65 */
66
67#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69/*
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
73 */
74#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
Daniel Veillardbe3eb202004-07-09 12:05:25 +000076#ifdef IS_DIGIT
77#undef IS_DIGIT
78#endif
Owen Taylor3473f882001-02-23 17:55:21 +000079/*
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81 */
Owen Taylor3473f882001-02-23 17:55:21 +000082#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84/*
85 * alphanum = alpha | digit
86 */
87
88#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90/*
Owen Taylor3473f882001-02-23 17:55:21 +000091 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92 */
93
Daniel Veillardd7af5552008-08-04 15:29:44 +000094#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000096 ((x) == '(') || ((x) == ')'))
97
Owen Taylor3473f882001-02-23 17:55:21 +000098/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000099 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +0000100 */
101
Daniel Veillardd7af5552008-08-04 15:29:44 +0000102#define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
106/*
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108 * "[" | "]"
109 */
110
111#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +0000115
116/*
117 * unreserved = alphanum | mark
118 */
119
120#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122/*
Owen Taylor3473f882001-02-23 17:55:21 +0000123 * Skip to next pointer char, handle escaped sequences
124 */
125
126#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128/*
129 * Productions from the spec.
130 *
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
134 *
135 * path = [ abs_path | opaque_part ]
136 */
137
Daniel Veillard336a8e12005-08-07 10:46:19 +0000138#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
Owen Taylor3473f882001-02-23 17:55:21 +0000140/************************************************************************
141 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000142 * RFC 3986 parser *
143 * *
144 ************************************************************************/
145
146#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149#define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
152
153/*
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
156 */
157#define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Daniel Veillard2ee91eb2010-06-04 09:14:16 +0800161 ((*(p) == '=')) || ((*(p) == '\'')))
Daniel Veillardd7af5552008-08-04 15:29:44 +0000162
163/*
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165 */
166#define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
169 ((*(p) == '@')))
170
171/*
172 * reserved = gen-delims / sub-delims
173 */
174#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176/*
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178 */
179#define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183/*
184 * pct-encoded = "%" HEXDIG HEXDIG
185 */
186#define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189/*
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191 */
192#define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
195
196/**
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
200 *
201 * Parse an URI scheme
202 *
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204 *
205 * Returns 0 or the error code
206 */
207static int
208xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209 const char *cur;
210
211 if (str == NULL)
212 return(-1);
213
214 cur = *str;
215 if (!ISA_ALPHA(cur))
216 return(2);
217 cur++;
218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220 if (uri != NULL) {
221 if (uri->scheme != NULL) xmlFree(uri->scheme);
222 uri->scheme = STRNDUP(*str, cur - *str);
223 }
224 *str = cur;
225 return(0);
226}
227
228/**
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
232 *
233 * Parse the query part of an URI
234 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000240 *
241 * Returns 0 or the error code
242 */
243static int
244xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245{
246 const char *cur;
247
248 if (str == NULL)
249 return (-1);
250
251 cur = *str;
252
253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000254 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256 NEXT(cur);
257 if (uri != NULL) {
258 if (uri->fragment != NULL)
259 xmlFree(uri->fragment);
260 if (uri->cleanup & 2)
261 uri->fragment = STRNDUP(*str, cur - *str);
262 else
263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264 }
265 *str = cur;
266 return (0);
267}
268
269/**
270 * xmlParse3986Query:
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
273 *
274 * Parse the query part of an URI
275 *
276 * query = *uric
277 *
278 * Returns 0 or the error code
279 */
280static int
281xmlParse3986Query(xmlURIPtr uri, const char **str)
282{
283 const char *cur;
284
285 if (str == NULL)
286 return (-1);
287
288 cur = *str;
289
290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292 NEXT(cur);
293 if (uri != NULL) {
294 if (uri->query != NULL)
295 xmlFree(uri->query);
296 if (uri->cleanup & 2)
297 uri->query = STRNDUP(*str, cur - *str);
298 else
299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 */
304 if (uri->query_raw != NULL)
305 xmlFree (uri->query_raw);
306 uri->query_raw = STRNDUP (*str, cur - *str);
307 }
308 *str = cur;
309 return (0);
310}
311
312/**
313 * xmlParse3986Port:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
Michael Paddon846cf012016-05-21 17:16:05 +0800317 * Parse a port part and fills in the appropriate fields
Daniel Veillardd7af5552008-08-04 15:29:44 +0000318 * of the @uri structure
319 *
320 * port = *DIGIT
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Port(xmlURIPtr uri, const char **str)
326{
327 const char *cur = *str;
Michael Paddon846cf012016-05-21 17:16:05 +0800328 unsigned port = 0; /* unsigned for defined overflow behavior */
Daniel Veillardd7af5552008-08-04 15:29:44 +0000329
330 if (ISA_DIGIT(cur)) {
Daniel Veillardd7af5552008-08-04 15:29:44 +0000331 while (ISA_DIGIT(cur)) {
Michael Paddon846cf012016-05-21 17:16:05 +0800332 port = port * 10 + (*cur - '0');
333
Daniel Veillardd7af5552008-08-04 15:29:44 +0000334 cur++;
335 }
Michael Paddon846cf012016-05-21 17:16:05 +0800336 if (uri != NULL)
337 uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
Daniel Veillardd7af5552008-08-04 15:29:44 +0000338 *str = cur;
339 return(0);
340 }
341 return(1);
342}
343
344/**
345 * xmlParse3986Userinfo:
346 * @uri: pointer to an URI structure
347 * @str: the string to analyze
348 *
349 * Parse an user informations part and fills in the appropriate fields
350 * of the @uri structure
351 *
352 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
353 *
354 * Returns 0 or the error code
355 */
356static int
357xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358{
359 const char *cur;
360
361 cur = *str;
362 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
363 ISA_SUB_DELIM(cur) || (*cur == ':'))
364 NEXT(cur);
365 if (*cur == '@') {
366 if (uri != NULL) {
367 if (uri->user != NULL) xmlFree(uri->user);
368 if (uri->cleanup & 2)
369 uri->user = STRNDUP(*str, cur - *str);
370 else
371 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
372 }
373 *str = cur;
374 return(0);
375 }
376 return(1);
377}
378
379/**
380 * xmlParse3986DecOctet:
381 * @str: the string to analyze
382 *
383 * dec-octet = DIGIT ; 0-9
384 * / %x31-39 DIGIT ; 10-99
385 * / "1" 2DIGIT ; 100-199
386 * / "2" %x30-34 DIGIT ; 200-249
387 * / "25" %x30-35 ; 250-255
388 *
389 * Skip a dec-octet.
390 *
391 * Returns 0 if found and skipped, 1 otherwise
392 */
393static int
394xmlParse3986DecOctet(const char **str) {
395 const char *cur = *str;
396
397 if (!(ISA_DIGIT(cur)))
398 return(1);
399 if (!ISA_DIGIT(cur+1))
400 cur++;
401 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
402 cur += 2;
403 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
404 cur += 3;
405 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
406 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
407 cur += 3;
408 else if ((*cur == '2') && (*(cur + 1) == '5') &&
409 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
410 cur += 3;
411 else
412 return(1);
413 *str = cur;
414 return(0);
415}
416/**
417 * xmlParse3986Host:
418 * @uri: pointer to an URI structure
419 * @str: the string to analyze
420 *
421 * Parse an host part and fills in the appropriate fields
422 * of the @uri structure
423 *
424 * host = IP-literal / IPv4address / reg-name
425 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
426 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
427 * reg-name = *( unreserved / pct-encoded / sub-delims )
428 *
429 * Returns 0 or the error code
430 */
431static int
432xmlParse3986Host(xmlURIPtr uri, const char **str)
433{
434 const char *cur = *str;
435 const char *host;
436
437 host = cur;
438 /*
439 * IPv6 and future adressing scheme are enclosed between brackets
440 */
441 if (*cur == '[') {
442 cur++;
443 while ((*cur != ']') && (*cur != 0))
444 cur++;
445 if (*cur != ']')
446 return(1);
447 cur++;
448 goto found;
449 }
450 /*
451 * try to parse an IPv4
452 */
453 if (ISA_DIGIT(cur)) {
454 if (xmlParse3986DecOctet(&cur) != 0)
455 goto not_ipv4;
456 if (*cur != '.')
457 goto not_ipv4;
458 cur++;
459 if (xmlParse3986DecOctet(&cur) != 0)
460 goto not_ipv4;
461 if (*cur != '.')
462 goto not_ipv4;
463 if (xmlParse3986DecOctet(&cur) != 0)
464 goto not_ipv4;
465 if (*cur != '.')
466 goto not_ipv4;
467 if (xmlParse3986DecOctet(&cur) != 0)
468 goto not_ipv4;
469 goto found;
470not_ipv4:
471 cur = *str;
472 }
473 /*
474 * then this should be a hostname which can be empty
475 */
476 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
477 NEXT(cur);
478found:
479 if (uri != NULL) {
480 if (uri->authority != NULL) xmlFree(uri->authority);
481 uri->authority = NULL;
482 if (uri->server != NULL) xmlFree(uri->server);
483 if (cur != host) {
484 if (uri->cleanup & 2)
485 uri->server = STRNDUP(host, cur - host);
486 else
487 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488 } else
489 uri->server = NULL;
490 }
491 *str = cur;
492 return(0);
493}
494
495/**
496 * xmlParse3986Authority:
497 * @uri: pointer to an URI structure
498 * @str: the string to analyze
499 *
500 * Parse an authority part and fills in the appropriate fields
501 * of the @uri structure
502 *
503 * authority = [ userinfo "@" ] host [ ":" port ]
504 *
505 * Returns 0 or the error code
506 */
507static int
508xmlParse3986Authority(xmlURIPtr uri, const char **str)
509{
510 const char *cur;
511 int ret;
512
513 cur = *str;
514 /*
515 * try to parse an userinfo and check for the trailing @
516 */
517 ret = xmlParse3986Userinfo(uri, &cur);
518 if ((ret != 0) || (*cur != '@'))
519 cur = *str;
520 else
521 cur++;
522 ret = xmlParse3986Host(uri, &cur);
523 if (ret != 0) return(ret);
524 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000525 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000526 ret = xmlParse3986Port(uri, &cur);
527 if (ret != 0) return(ret);
528 }
529 *str = cur;
530 return(0);
531}
532
533/**
534 * xmlParse3986Segment:
535 * @str: the string to analyze
536 * @forbid: an optional forbidden character
537 * @empty: allow an empty segment
538 *
539 * Parse a segment and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * segment = *pchar
543 * segment-nz = 1*pchar
544 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545 * ; non-zero-length segment without any colon ":"
546 *
547 * Returns 0 or the error code
548 */
549static int
550xmlParse3986Segment(const char **str, char forbid, int empty)
551{
552 const char *cur;
553
554 cur = *str;
555 if (!ISA_PCHAR(cur)) {
556 if (empty)
557 return(0);
558 return(1);
559 }
560 while (ISA_PCHAR(cur) && (*cur != forbid))
561 NEXT(cur);
562 *str = cur;
563 return (0);
564}
565
566/**
567 * xmlParse3986PathAbEmpty:
568 * @uri: pointer to an URI structure
569 * @str: the string to analyze
570 *
571 * Parse an path absolute or empty and fills in the appropriate fields
572 * of the @uri structure
573 *
574 * path-abempty = *( "/" segment )
575 *
576 * Returns 0 or the error code
577 */
578static int
579xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580{
581 const char *cur;
582 int ret;
583
584 cur = *str;
585
586 while (*cur == '/') {
587 cur++;
588 ret = xmlParse3986Segment(&cur, 0, 1);
589 if (ret != 0) return(ret);
590 }
591 if (uri != NULL) {
592 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200593 if (*str != cur) {
594 if (uri->cleanup & 2)
595 uri->path = STRNDUP(*str, cur - *str);
596 else
597 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
598 } else {
599 uri->path = NULL;
600 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000601 }
602 *str = cur;
603 return (0);
604}
605
606/**
607 * xmlParse3986PathAbsolute:
608 * @uri: pointer to an URI structure
609 * @str: the string to analyze
610 *
611 * Parse an path absolute and fills in the appropriate fields
612 * of the @uri structure
613 *
614 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615 *
616 * Returns 0 or the error code
617 */
618static int
619xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620{
621 const char *cur;
622 int ret;
623
624 cur = *str;
625
626 if (*cur != '/')
627 return(1);
628 cur++;
629 ret = xmlParse3986Segment(&cur, 0, 0);
630 if (ret == 0) {
631 while (*cur == '/') {
632 cur++;
633 ret = xmlParse3986Segment(&cur, 0, 1);
634 if (ret != 0) return(ret);
635 }
636 }
637 if (uri != NULL) {
638 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200639 if (cur != *str) {
640 if (uri->cleanup & 2)
641 uri->path = STRNDUP(*str, cur - *str);
642 else
643 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
644 } else {
645 uri->path = NULL;
646 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000647 }
648 *str = cur;
649 return (0);
650}
651
652/**
653 * xmlParse3986PathRootless:
654 * @uri: pointer to an URI structure
655 * @str: the string to analyze
656 *
657 * Parse an path without root and fills in the appropriate fields
658 * of the @uri structure
659 *
660 * path-rootless = segment-nz *( "/" segment )
661 *
662 * Returns 0 or the error code
663 */
664static int
665xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666{
667 const char *cur;
668 int ret;
669
670 cur = *str;
671
672 ret = xmlParse3986Segment(&cur, 0, 0);
673 if (ret != 0) return(ret);
674 while (*cur == '/') {
675 cur++;
676 ret = xmlParse3986Segment(&cur, 0, 1);
677 if (ret != 0) return(ret);
678 }
679 if (uri != NULL) {
680 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200681 if (cur != *str) {
682 if (uri->cleanup & 2)
683 uri->path = STRNDUP(*str, cur - *str);
684 else
685 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
686 } else {
687 uri->path = NULL;
688 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000689 }
690 *str = cur;
691 return (0);
692}
693
694/**
695 * xmlParse3986PathNoScheme:
696 * @uri: pointer to an URI structure
697 * @str: the string to analyze
698 *
699 * Parse an path which is not a scheme and fills in the appropriate fields
700 * of the @uri structure
701 *
702 * path-noscheme = segment-nz-nc *( "/" segment )
703 *
704 * Returns 0 or the error code
705 */
706static int
707xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708{
709 const char *cur;
710 int ret;
711
712 cur = *str;
713
714 ret = xmlParse3986Segment(&cur, ':', 0);
715 if (ret != 0) return(ret);
716 while (*cur == '/') {
717 cur++;
718 ret = xmlParse3986Segment(&cur, 0, 1);
719 if (ret != 0) return(ret);
720 }
721 if (uri != NULL) {
722 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200723 if (cur != *str) {
724 if (uri->cleanup & 2)
725 uri->path = STRNDUP(*str, cur - *str);
726 else
727 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
728 } else {
729 uri->path = NULL;
730 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000731 }
732 *str = cur;
733 return (0);
734}
735
736/**
737 * xmlParse3986HierPart:
738 * @uri: pointer to an URI structure
739 * @str: the string to analyze
740 *
741 * Parse an hierarchical part and fills in the appropriate fields
742 * of the @uri structure
743 *
744 * hier-part = "//" authority path-abempty
745 * / path-absolute
746 * / path-rootless
747 * / path-empty
748 *
749 * Returns 0 or the error code
750 */
751static int
752xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753{
754 const char *cur;
755 int ret;
756
757 cur = *str;
758
759 if ((*cur == '/') && (*(cur + 1) == '/')) {
760 cur += 2;
761 ret = xmlParse3986Authority(uri, &cur);
762 if (ret != 0) return(ret);
Daniel Veillardbeb72812014-10-03 19:22:39 +0800763 if (uri->server == NULL)
764 uri->port = -1;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000765 ret = xmlParse3986PathAbEmpty(uri, &cur);
766 if (ret != 0) return(ret);
767 *str = cur;
768 return(0);
769 } else if (*cur == '/') {
770 ret = xmlParse3986PathAbsolute(uri, &cur);
771 if (ret != 0) return(ret);
772 } else if (ISA_PCHAR(cur)) {
773 ret = xmlParse3986PathRootless(uri, &cur);
774 if (ret != 0) return(ret);
775 } else {
776 /* path-empty is effectively empty */
777 if (uri != NULL) {
778 if (uri->path != NULL) xmlFree(uri->path);
779 uri->path = NULL;
780 }
781 }
782 *str = cur;
783 return (0);
784}
785
786/**
787 * xmlParse3986RelativeRef:
788 * @uri: pointer to an URI structure
789 * @str: the string to analyze
790 *
791 * Parse an URI string and fills in the appropriate fields
792 * of the @uri structure
793 *
794 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
795 * relative-part = "//" authority path-abempty
796 * / path-absolute
797 * / path-noscheme
798 * / path-empty
799 *
800 * Returns 0 or the error code
801 */
802static int
803xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804 int ret;
805
806 if ((*str == '/') && (*(str + 1) == '/')) {
807 str += 2;
808 ret = xmlParse3986Authority(uri, &str);
809 if (ret != 0) return(ret);
810 ret = xmlParse3986PathAbEmpty(uri, &str);
811 if (ret != 0) return(ret);
812 } else if (*str == '/') {
813 ret = xmlParse3986PathAbsolute(uri, &str);
814 if (ret != 0) return(ret);
815 } else if (ISA_PCHAR(str)) {
816 ret = xmlParse3986PathNoScheme(uri, &str);
817 if (ret != 0) return(ret);
818 } else {
819 /* path-empty is effectively empty */
820 if (uri != NULL) {
821 if (uri->path != NULL) xmlFree(uri->path);
822 uri->path = NULL;
823 }
824 }
825
826 if (*str == '?') {
827 str++;
828 ret = xmlParse3986Query(uri, &str);
829 if (ret != 0) return(ret);
830 }
831 if (*str == '#') {
832 str++;
833 ret = xmlParse3986Fragment(uri, &str);
834 if (ret != 0) return(ret);
835 }
836 if (*str != 0) {
837 xmlCleanURI(uri);
838 return(1);
839 }
840 return(0);
841}
842
843
844/**
845 * xmlParse3986URI:
846 * @uri: pointer to an URI structure
847 * @str: the string to analyze
848 *
849 * Parse an URI string and fills in the appropriate fields
850 * of the @uri structure
851 *
852 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853 *
854 * Returns 0 or the error code
855 */
856static int
857xmlParse3986URI(xmlURIPtr uri, const char *str) {
858 int ret;
859
860 ret = xmlParse3986Scheme(uri, &str);
861 if (ret != 0) return(ret);
862 if (*str != ':') {
863 return(1);
864 }
865 str++;
866 ret = xmlParse3986HierPart(uri, &str);
867 if (ret != 0) return(ret);
868 if (*str == '?') {
869 str++;
870 ret = xmlParse3986Query(uri, &str);
871 if (ret != 0) return(ret);
872 }
873 if (*str == '#') {
874 str++;
875 ret = xmlParse3986Fragment(uri, &str);
876 if (ret != 0) return(ret);
877 }
878 if (*str != 0) {
879 xmlCleanURI(uri);
880 return(1);
881 }
882 return(0);
883}
884
885/**
886 * xmlParse3986URIReference:
887 * @uri: pointer to an URI structure
888 * @str: the string to analyze
889 *
890 * Parse an URI reference string and fills in the appropriate fields
891 * of the @uri structure
892 *
893 * URI-reference = URI / relative-ref
894 *
895 * Returns 0 or the error code
896 */
897static int
898xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899 int ret;
900
901 if (str == NULL)
902 return(-1);
903 xmlCleanURI(uri);
904
905 /*
906 * Try first to parse absolute refs, then fallback to relative if
907 * it fails.
908 */
909 ret = xmlParse3986URI(uri, str);
910 if (ret != 0) {
911 xmlCleanURI(uri);
912 ret = xmlParse3986RelativeRef(uri, str);
913 if (ret != 0) {
914 xmlCleanURI(uri);
915 return(ret);
916 }
917 }
918 return(0);
919}
920
921/**
922 * xmlParseURI:
923 * @str: the URI string to analyze
924 *
925 * Parse an URI based on RFC 3986
926 *
927 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
928 *
929 * Returns a newly built xmlURIPtr or NULL in case of error
930 */
931xmlURIPtr
932xmlParseURI(const char *str) {
933 xmlURIPtr uri;
934 int ret;
935
936 if (str == NULL)
937 return(NULL);
938 uri = xmlCreateURI();
939 if (uri != NULL) {
940 ret = xmlParse3986URIReference(uri, str);
941 if (ret) {
942 xmlFreeURI(uri);
943 return(NULL);
944 }
945 }
946 return(uri);
947}
948
949/**
950 * xmlParseURIReference:
951 * @uri: pointer to an URI structure
952 * @str: the string to analyze
953 *
954 * Parse an URI reference string based on RFC 3986 and fills in the
955 * appropriate fields of the @uri structure
956 *
957 * URI-reference = URI / relative-ref
958 *
959 * Returns 0 or the error code
960 */
961int
962xmlParseURIReference(xmlURIPtr uri, const char *str) {
963 return(xmlParse3986URIReference(uri, str));
964}
965
966/**
967 * xmlParseURIRaw:
968 * @str: the URI string to analyze
969 * @raw: if 1 unescaping of URI pieces are disabled
970 *
971 * Parse an URI but allows to keep intact the original fragments.
972 *
973 * URI-reference = URI / relative-ref
974 *
975 * Returns a newly built xmlURIPtr or NULL in case of error
976 */
977xmlURIPtr
978xmlParseURIRaw(const char *str, int raw) {
979 xmlURIPtr uri;
980 int ret;
981
982 if (str == NULL)
983 return(NULL);
984 uri = xmlCreateURI();
985 if (uri != NULL) {
986 if (raw) {
987 uri->cleanup |= 2;
988 }
989 ret = xmlParseURIReference(uri, str);
990 if (ret) {
991 xmlFreeURI(uri);
992 return(NULL);
993 }
994 }
995 return(uri);
996}
997
998/************************************************************************
999 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001000 * Generic URI structure functions *
1001 * *
1002 ************************************************************************/
1003
1004/**
1005 * xmlCreateURI:
1006 *
1007 * Simply creates an empty xmlURI
1008 *
1009 * Returns the new structure or NULL in case of error
1010 */
1011xmlURIPtr
1012xmlCreateURI(void) {
1013 xmlURIPtr ret;
1014
1015 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001017 xmlURIErrMemory("creating URI structure\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001018 return(NULL);
1019 }
1020 memset(ret, 0, sizeof(xmlURI));
1021 return(ret);
1022}
1023
1024/**
Daniel Veillard57560382012-07-24 11:44:23 +08001025 * xmlSaveUriRealloc:
1026 *
1027 * Function to handle properly a reallocation when saving an URI
1028 * Also imposes some limit on the length of an URI string output
1029 */
1030static xmlChar *
1031xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032 xmlChar *temp;
1033 int tmp;
1034
1035 if (*max > MAX_URI_LENGTH) {
1036 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037 return(NULL);
1038 }
1039 tmp = *max * 2;
1040 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041 if (temp == NULL) {
1042 xmlURIErrMemory("saving URI\n");
1043 return(NULL);
1044 }
1045 *max = tmp;
1046 return(temp);
1047}
1048
1049/**
Owen Taylor3473f882001-02-23 17:55:21 +00001050 * xmlSaveUri:
1051 * @uri: pointer to an xmlURI
1052 *
1053 * Save the URI as an escaped string
1054 *
1055 * Returns a new string (to be deallocated by caller)
1056 */
1057xmlChar *
1058xmlSaveUri(xmlURIPtr uri) {
1059 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001060 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001061 const char *p;
1062 int len;
1063 int max;
1064
1065 if (uri == NULL) return(NULL);
1066
1067
1068 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001069 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001070 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001071 xmlURIErrMemory("saving URI\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001072 return(NULL);
1073 }
1074 len = 0;
1075
1076 if (uri->scheme != NULL) {
1077 p = uri->scheme;
1078 while (*p != 0) {
1079 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001080 temp = xmlSaveUriRealloc(ret, &max);
1081 if (temp == NULL) goto mem_error;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001082 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001083 }
1084 ret[len++] = *p++;
1085 }
1086 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001087 temp = xmlSaveUriRealloc(ret, &max);
1088 if (temp == NULL) goto mem_error;
1089 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001090 }
1091 ret[len++] = ':';
1092 }
1093 if (uri->opaque != NULL) {
1094 p = uri->opaque;
1095 while (*p != 0) {
1096 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001097 temp = xmlSaveUriRealloc(ret, &max);
1098 if (temp == NULL) goto mem_error;
1099 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001100 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001101 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001102 ret[len++] = *p++;
1103 else {
1104 int val = *(unsigned char *)p++;
1105 int hi = val / 0x10, lo = val % 0x10;
1106 ret[len++] = '%';
1107 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109 }
1110 }
Owen Taylor3473f882001-02-23 17:55:21 +00001111 } else {
Daniel Veillardbeb72812014-10-03 19:22:39 +08001112 if ((uri->server != NULL) || (uri->port == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001113 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001114 temp = xmlSaveUriRealloc(ret, &max);
1115 if (temp == NULL) goto mem_error;
1116 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001117 }
1118 ret[len++] = '/';
1119 ret[len++] = '/';
1120 if (uri->user != NULL) {
1121 p = uri->user;
1122 while (*p != 0) {
1123 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001124 temp = xmlSaveUriRealloc(ret, &max);
1125 if (temp == NULL) goto mem_error;
1126 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001127 }
1128 if ((IS_UNRESERVED(*(p))) ||
1129 ((*(p) == ';')) || ((*(p) == ':')) ||
1130 ((*(p) == '&')) || ((*(p) == '=')) ||
1131 ((*(p) == '+')) || ((*(p) == '$')) ||
1132 ((*(p) == ',')))
1133 ret[len++] = *p++;
1134 else {
1135 int val = *(unsigned char *)p++;
1136 int hi = val / 0x10, lo = val % 0x10;
1137 ret[len++] = '%';
1138 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140 }
1141 }
1142 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001143 temp = xmlSaveUriRealloc(ret, &max);
1144 if (temp == NULL) goto mem_error;
1145 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001146 }
1147 ret[len++] = '@';
1148 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001149 if (uri->server != NULL) {
1150 p = uri->server;
1151 while (*p != 0) {
1152 if (len >= max) {
1153 temp = xmlSaveUriRealloc(ret, &max);
1154 if (temp == NULL) goto mem_error;
1155 ret = temp;
1156 }
1157 ret[len++] = *p++;
Owen Taylor3473f882001-02-23 17:55:21 +00001158 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001159 if (uri->port > 0) {
1160 if (len + 10 >= max) {
1161 temp = xmlSaveUriRealloc(ret, &max);
1162 if (temp == NULL) goto mem_error;
1163 ret = temp;
1164 }
1165 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001166 }
Owen Taylor3473f882001-02-23 17:55:21 +00001167 }
1168 } else if (uri->authority != NULL) {
1169 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001170 temp = xmlSaveUriRealloc(ret, &max);
1171 if (temp == NULL) goto mem_error;
1172 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001173 }
1174 ret[len++] = '/';
1175 ret[len++] = '/';
1176 p = uri->authority;
1177 while (*p != 0) {
1178 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001179 temp = xmlSaveUriRealloc(ret, &max);
1180 if (temp == NULL) goto mem_error;
1181 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001182 }
1183 if ((IS_UNRESERVED(*(p))) ||
1184 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186 ((*(p) == '=')) || ((*(p) == '+')))
1187 ret[len++] = *p++;
1188 else {
1189 int val = *(unsigned char *)p++;
1190 int hi = val / 0x10, lo = val % 0x10;
1191 ret[len++] = '%';
1192 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194 }
1195 }
1196 } else if (uri->scheme != NULL) {
1197 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001198 temp = xmlSaveUriRealloc(ret, &max);
1199 if (temp == NULL) goto mem_error;
1200 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001201 }
Owen Taylor3473f882001-02-23 17:55:21 +00001202 }
1203 if (uri->path != NULL) {
1204 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001205 /*
1206 * the colon in file:///d: should not be escaped or
1207 * Windows accesses fail later.
1208 */
1209 if ((uri->scheme != NULL) &&
1210 (p[0] == '/') &&
1211 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001214 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001215 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001216 temp = xmlSaveUriRealloc(ret, &max);
1217 if (temp == NULL) goto mem_error;
1218 ret = temp;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001219 }
1220 ret[len++] = *p++;
1221 ret[len++] = *p++;
1222 ret[len++] = *p++;
1223 }
Owen Taylor3473f882001-02-23 17:55:21 +00001224 while (*p != 0) {
1225 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001226 temp = xmlSaveUriRealloc(ret, &max);
1227 if (temp == NULL) goto mem_error;
1228 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001229 }
1230 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233 ((*(p) == ',')))
1234 ret[len++] = *p++;
1235 else {
1236 int val = *(unsigned char *)p++;
1237 int hi = val / 0x10, lo = val % 0x10;
1238 ret[len++] = '%';
1239 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241 }
1242 }
1243 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001244 if (uri->query_raw != NULL) {
1245 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001246 temp = xmlSaveUriRealloc(ret, &max);
1247 if (temp == NULL) goto mem_error;
1248 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001249 }
1250 ret[len++] = '?';
1251 p = uri->query_raw;
1252 while (*p != 0) {
1253 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001254 temp = xmlSaveUriRealloc(ret, &max);
1255 if (temp == NULL) goto mem_error;
1256 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001257 }
1258 ret[len++] = *p++;
1259 }
1260 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001261 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001262 temp = xmlSaveUriRealloc(ret, &max);
1263 if (temp == NULL) goto mem_error;
1264 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001265 }
1266 ret[len++] = '?';
1267 p = uri->query;
1268 while (*p != 0) {
1269 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001270 temp = xmlSaveUriRealloc(ret, &max);
1271 if (temp == NULL) goto mem_error;
1272 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001273 }
Daniel Veillard57560382012-07-24 11:44:23 +08001274 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Owen Taylor3473f882001-02-23 17:55:21 +00001275 ret[len++] = *p++;
1276 else {
1277 int val = *(unsigned char *)p++;
1278 int hi = val / 0x10, lo = val % 0x10;
1279 ret[len++] = '%';
1280 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282 }
1283 }
1284 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001285 }
1286 if (uri->fragment != NULL) {
1287 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001288 temp = xmlSaveUriRealloc(ret, &max);
1289 if (temp == NULL) goto mem_error;
1290 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001291 }
1292 ret[len++] = '#';
1293 p = uri->fragment;
1294 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001295 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001296 temp = xmlSaveUriRealloc(ret, &max);
1297 if (temp == NULL) goto mem_error;
1298 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001299 }
Daniel Veillard57560382012-07-24 11:44:23 +08001300 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001301 ret[len++] = *p++;
1302 else {
1303 int val = *(unsigned char *)p++;
1304 int hi = val / 0x10, lo = val % 0x10;
1305 ret[len++] = '%';
1306 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001308 }
1309 }
Owen Taylor3473f882001-02-23 17:55:21 +00001310 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001311 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001312 temp = xmlSaveUriRealloc(ret, &max);
1313 if (temp == NULL) goto mem_error;
1314 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001315 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02001316 ret[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001317 return(ret);
Daniel Veillard57560382012-07-24 11:44:23 +08001318
1319mem_error:
1320 xmlFree(ret);
1321 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001322}
1323
1324/**
1325 * xmlPrintURI:
1326 * @stream: a FILE* for the output
1327 * @uri: pointer to an xmlURI
1328 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001329 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001330 */
1331void
1332xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333 xmlChar *out;
1334
1335 out = xmlSaveUri(uri);
1336 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001337 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001338 xmlFree(out);
1339 }
1340}
1341
1342/**
1343 * xmlCleanURI:
1344 * @uri: pointer to an xmlURI
1345 *
1346 * Make sure the xmlURI struct is free of content
1347 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001348static void
Owen Taylor3473f882001-02-23 17:55:21 +00001349xmlCleanURI(xmlURIPtr uri) {
1350 if (uri == NULL) return;
1351
1352 if (uri->scheme != NULL) xmlFree(uri->scheme);
1353 uri->scheme = NULL;
1354 if (uri->server != NULL) xmlFree(uri->server);
1355 uri->server = NULL;
1356 if (uri->user != NULL) xmlFree(uri->user);
1357 uri->user = NULL;
1358 if (uri->path != NULL) xmlFree(uri->path);
1359 uri->path = NULL;
1360 if (uri->fragment != NULL) xmlFree(uri->fragment);
1361 uri->fragment = NULL;
1362 if (uri->opaque != NULL) xmlFree(uri->opaque);
1363 uri->opaque = NULL;
1364 if (uri->authority != NULL) xmlFree(uri->authority);
1365 uri->authority = NULL;
1366 if (uri->query != NULL) xmlFree(uri->query);
1367 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001368 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001370}
1371
1372/**
1373 * xmlFreeURI:
1374 * @uri: pointer to an xmlURI
1375 *
1376 * Free up the xmlURI struct
1377 */
1378void
1379xmlFreeURI(xmlURIPtr uri) {
1380 if (uri == NULL) return;
1381
1382 if (uri->scheme != NULL) xmlFree(uri->scheme);
1383 if (uri->server != NULL) xmlFree(uri->server);
1384 if (uri->user != NULL) xmlFree(uri->user);
1385 if (uri->path != NULL) xmlFree(uri->path);
1386 if (uri->fragment != NULL) xmlFree(uri->fragment);
1387 if (uri->opaque != NULL) xmlFree(uri->opaque);
1388 if (uri->authority != NULL) xmlFree(uri->authority);
1389 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001390 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001391 xmlFree(uri);
1392}
1393
1394/************************************************************************
1395 * *
1396 * Helper functions *
1397 * *
1398 ************************************************************************/
1399
Owen Taylor3473f882001-02-23 17:55:21 +00001400/**
1401 * xmlNormalizeURIPath:
1402 * @path: pointer to the path string
1403 *
1404 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405 * Section 5.2, steps 6.c through 6.g.
1406 *
1407 * Normalization occurs directly on the string, no new allocation is done
1408 *
1409 * Returns 0 or an error code
1410 */
1411int
1412xmlNormalizeURIPath(char *path) {
1413 char *cur, *out;
1414
1415 if (path == NULL)
1416 return(-1);
1417
1418 /* Skip all initial "/" chars. We want to get to the beginning of the
1419 * first non-empty segment.
1420 */
1421 cur = path;
1422 while (cur[0] == '/')
1423 ++cur;
1424 if (cur[0] == '\0')
1425 return(0);
1426
1427 /* Keep everything we've seen so far. */
1428 out = cur;
1429
1430 /*
1431 * Analyze each segment in sequence for cases (c) and (d).
1432 */
1433 while (cur[0] != '\0') {
1434 /*
1435 * c) All occurrences of "./", where "." is a complete path segment,
1436 * are removed from the buffer string.
1437 */
1438 if ((cur[0] == '.') && (cur[1] == '/')) {
1439 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001440 /* '//' normalization should be done at this point too */
1441 while (cur[0] == '/')
1442 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 continue;
1444 }
1445
1446 /*
1447 * d) If the buffer string ends with "." as a complete path segment,
1448 * that "." is removed.
1449 */
1450 if ((cur[0] == '.') && (cur[1] == '\0'))
1451 break;
1452
1453 /* Otherwise keep the segment. */
1454 while (cur[0] != '/') {
1455 if (cur[0] == '\0')
1456 goto done_cd;
1457 (out++)[0] = (cur++)[0];
1458 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001459 /* nomalize // */
1460 while ((cur[0] == '/') && (cur[1] == '/'))
1461 cur++;
1462
Owen Taylor3473f882001-02-23 17:55:21 +00001463 (out++)[0] = (cur++)[0];
1464 }
1465 done_cd:
1466 out[0] = '\0';
1467
1468 /* Reset to the beginning of the first segment for the next sequence. */
1469 cur = path;
1470 while (cur[0] == '/')
1471 ++cur;
1472 if (cur[0] == '\0')
1473 return(0);
1474
1475 /*
1476 * Analyze each segment in sequence for cases (e) and (f).
1477 *
1478 * e) All occurrences of "<segment>/../", where <segment> is a
1479 * complete path segment not equal to "..", are removed from the
1480 * buffer string. Removal of these path segments is performed
1481 * iteratively, removing the leftmost matching pattern on each
1482 * iteration, until no matching pattern remains.
1483 *
1484 * f) If the buffer string ends with "<segment>/..", where <segment>
1485 * is a complete path segment not equal to "..", that
1486 * "<segment>/.." is removed.
1487 *
1488 * To satisfy the "iterative" clause in (e), we need to collapse the
1489 * string every time we find something that needs to be removed. Thus,
1490 * we don't need to keep two pointers into the string: we only need a
1491 * "current position" pointer.
1492 */
1493 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001494 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001495
1496 /* At the beginning of each iteration of this loop, "cur" points to
1497 * the first character of the segment we want to examine.
1498 */
1499
1500 /* Find the end of the current segment. */
1501 segp = cur;
1502 while ((segp[0] != '/') && (segp[0] != '\0'))
1503 ++segp;
1504
1505 /* If this is the last segment, we're done (we need at least two
1506 * segments to meet the criteria for the (e) and (f) cases).
1507 */
1508 if (segp[0] == '\0')
1509 break;
1510
1511 /* If the first segment is "..", or if the next segment _isn't_ "..",
1512 * keep this segment and try the next one.
1513 */
1514 ++segp;
1515 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516 || ((segp[0] != '.') || (segp[1] != '.')
1517 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518 cur = segp;
1519 continue;
1520 }
1521
1522 /* If we get here, remove this segment and the next one and back up
1523 * to the previous segment (if there is one), to implement the
1524 * "iteratively" clause. It's pretty much impossible to back up
1525 * while maintaining two pointers into the buffer, so just compact
1526 * the whole buffer now.
1527 */
1528
1529 /* If this is the end of the buffer, we're done. */
1530 if (segp[2] == '\0') {
1531 cur[0] = '\0';
1532 break;
1533 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001534 /* Valgrind complained, strcpy(cur, segp + 3); */
Nico Webercedf84d2012-03-05 16:36:59 +08001535 /* string will overlap, do not use strcpy */
1536 tmp = cur;
1537 segp += 3;
1538 while ((*tmp++ = *segp++) != 0)
1539 ;
Owen Taylor3473f882001-02-23 17:55:21 +00001540
1541 /* If there are no previous segments, then keep going from here. */
1542 segp = cur;
1543 while ((segp > path) && ((--segp)[0] == '/'))
1544 ;
1545 if (segp == path)
1546 continue;
1547
1548 /* "segp" is pointing to the end of a previous segment; find it's
1549 * start. We need to back up to the previous segment and start
1550 * over with that to handle things like "foo/bar/../..". If we
1551 * don't do this, then on the first pass we'll remove the "bar/..",
1552 * but be pointing at the second ".." so we won't realize we can also
1553 * remove the "foo/..".
1554 */
1555 cur = segp;
1556 while ((cur > path) && (cur[-1] != '/'))
1557 --cur;
1558 }
1559 out[0] = '\0';
1560
1561 /*
1562 * g) If the resulting buffer string still begins with one or more
1563 * complete path segments of "..", then the reference is
1564 * considered to be in error. Implementations may handle this
1565 * error by retaining these components in the resolved path (i.e.,
1566 * treating them as part of the final URI), by removing them from
1567 * the resolved path (i.e., discarding relative levels above the
1568 * root), or by avoiding traversal of the reference.
1569 *
1570 * We discard them from the final path.
1571 */
1572 if (path[0] == '/') {
1573 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001574 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001575 && ((cur[3] == '/') || (cur[3] == '\0')))
1576 cur += 3;
1577
1578 if (cur != path) {
1579 out = path;
1580 while (cur[0] != '\0')
1581 (out++)[0] = (cur++)[0];
1582 out[0] = 0;
1583 }
1584 }
1585
1586 return(0);
1587}
Owen Taylor3473f882001-02-23 17:55:21 +00001588
Daniel Veillard966a31e2004-05-09 02:58:44 +00001589static int is_hex(char c) {
1590 if (((c >= '0') && (c <= '9')) ||
1591 ((c >= 'a') && (c <= 'f')) ||
1592 ((c >= 'A') && (c <= 'F')))
1593 return(1);
1594 return(0);
1595}
1596
Owen Taylor3473f882001-02-23 17:55:21 +00001597/**
1598 * xmlURIUnescapeString:
1599 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001600 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001601 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001602 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001603 * Unescaping routine, but does not check that the string is an URI. The
1604 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001605 * Note that the length of the result can only be smaller or same size as
1606 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001607 *
Daniel Veillard79187652007-04-24 10:19:52 +00001608 * Returns a copy of the string, but unescaped, will return NULL only in case
1609 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
1611char *
1612xmlURIUnescapeString(const char *str, int len, char *target) {
1613 char *ret, *out;
1614 const char *in;
1615
1616 if (str == NULL)
1617 return(NULL);
1618 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001619 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001620
1621 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001622 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001624 xmlURIErrMemory("unescaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001625 return(NULL);
1626 }
1627 } else
1628 ret = target;
1629 in = str;
1630 out = ret;
1631 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001632 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001633 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001634 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001635 *out = (*in - '0');
1636 else if ((*in >= 'a') && (*in <= 'f'))
1637 *out = (*in - 'a') + 10;
1638 else if ((*in >= 'A') && (*in <= 'F'))
1639 *out = (*in - 'A') + 10;
1640 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001641 if ((*in >= '0') && (*in <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001642 *out = *out * 16 + (*in - '0');
1643 else if ((*in >= 'a') && (*in <= 'f'))
1644 *out = *out * 16 + (*in - 'a') + 10;
1645 else if ((*in >= 'A') && (*in <= 'F'))
1646 *out = *out * 16 + (*in - 'A') + 10;
1647 in++;
1648 len -= 3;
1649 out++;
1650 } else {
1651 *out++ = *in++;
1652 len--;
1653 }
1654 }
1655 *out = 0;
1656 return(ret);
1657}
1658
1659/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001660 * xmlURIEscapeStr:
1661 * @str: string to escape
1662 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001663 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001664 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001666 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001667 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001668 */
1669xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001670xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001672 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001673 const xmlChar *in;
Daniel Veillard57560382012-07-24 11:44:23 +08001674 int len, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001675
1676 if (str == NULL)
1677 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001678 if (str[0] == 0)
1679 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001680 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001681 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001682
1683 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001684 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001685 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001686 xmlURIErrMemory("escaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001687 return(NULL);
1688 }
1689 in = (const xmlChar *) str;
1690 out = 0;
1691 while(*in != 0) {
1692 if (len - out <= 3) {
Daniel Veillard57560382012-07-24 11:44:23 +08001693 temp = xmlSaveUriRealloc(ret, &len);
Daniel Veillarded86dc22008-04-24 11:58:41 +00001694 if (temp == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001695 xmlURIErrMemory("escaping URI value\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001696 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001697 return(NULL);
1698 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001699 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001700 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001701
1702 ch = *in;
1703
Daniel Veillardeb475a32002-04-14 22:00:22 +00001704 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001705 unsigned char val;
1706 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001707 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001708 if (val <= 9)
1709 ret[out++] = '0' + val;
1710 else
1711 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001712 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001713 if (val <= 9)
1714 ret[out++] = '0' + val;
1715 else
1716 ret[out++] = 'A' + val - 0xA;
1717 in++;
1718 } else {
1719 ret[out++] = *in++;
1720 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001721
Owen Taylor3473f882001-02-23 17:55:21 +00001722 }
1723 ret[out] = 0;
1724 return(ret);
1725}
1726
Daniel Veillard8514c672001-05-23 10:29:12 +00001727/**
1728 * xmlURIEscape:
1729 * @str: the string of the URI to escape
1730 *
1731 * Escaping routine, does not do validity checks !
1732 * It will try to escape the chars needing this, but this is heuristic
1733 * based it's impossible to be sure.
1734 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001735 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001736 *
1737 * 25 May 2001
1738 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739 * according to RFC2396.
1740 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001741 */
1742xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001743xmlURIEscape(const xmlChar * str)
1744{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001745 xmlChar *ret, *segment = NULL;
1746 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001747 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001748
Daniel Veillard6278fb52001-05-25 07:38:41 +00001749#define NULLCHK(p) if(!p) { \
Daniel Veillard57560382012-07-24 11:44:23 +08001750 xmlURIErrMemory("escaping URI value\n"); \
1751 xmlFreeURI(uri); \
1752 return NULL; } \
Daniel Veillard6278fb52001-05-25 07:38:41 +00001753
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001754 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001755 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001756
1757 uri = xmlCreateURI();
1758 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001759 /*
1760 * Allow escaping errors in the unescaped form
1761 */
1762 uri->cleanup = 1;
1763 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001764 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001765 xmlFreeURI(uri);
1766 return (NULL);
1767 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001768 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001769
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001770 if (!uri)
1771 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001772
1773 ret = NULL;
1774
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001775 if (uri->scheme) {
1776 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777 NULLCHK(segment)
1778 ret = xmlStrcat(ret, segment);
1779 ret = xmlStrcat(ret, BAD_CAST ":");
1780 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001781 }
1782
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001783 if (uri->authority) {
1784 segment =
1785 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786 NULLCHK(segment)
1787 ret = xmlStrcat(ret, BAD_CAST "//");
1788 ret = xmlStrcat(ret, segment);
1789 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001790 }
1791
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001792 if (uri->user) {
1793 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794 NULLCHK(segment)
Daniel Veillard57560382012-07-24 11:44:23 +08001795 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001796 ret = xmlStrcat(ret, segment);
1797 ret = xmlStrcat(ret, BAD_CAST "@");
1798 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001799 }
1800
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001801 if (uri->server) {
1802 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001804 if (uri->user == NULL)
Daniel Veillardd7af5552008-08-04 15:29:44 +00001805 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001806 ret = xmlStrcat(ret, segment);
1807 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001808 }
1809
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001810 if (uri->port) {
1811 xmlChar port[10];
1812
Daniel Veillard43d3f612001-11-10 11:57:23 +00001813 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001814 ret = xmlStrcat(ret, BAD_CAST ":");
1815 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001816 }
1817
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001818 if (uri->path) {
1819 segment =
1820 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821 NULLCHK(segment)
1822 ret = xmlStrcat(ret, segment);
1823 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001824 }
1825
Daniel Veillarda1413b82007-04-26 08:33:28 +00001826 if (uri->query_raw) {
1827 ret = xmlStrcat(ret, BAD_CAST "?");
1828 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829 }
1830 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001831 segment =
1832 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833 NULLCHK(segment)
1834 ret = xmlStrcat(ret, BAD_CAST "?");
1835 ret = xmlStrcat(ret, segment);
1836 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001837 }
1838
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001839 if (uri->opaque) {
1840 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841 NULLCHK(segment)
1842 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001843 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001844 }
1845
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001846 if (uri->fragment) {
1847 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848 NULLCHK(segment)
1849 ret = xmlStrcat(ret, BAD_CAST "#");
1850 ret = xmlStrcat(ret, segment);
1851 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001852 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001853
1854 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001855#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001856
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001857 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001858}
1859
Owen Taylor3473f882001-02-23 17:55:21 +00001860/************************************************************************
1861 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001862 * Public functions *
1863 * *
1864 ************************************************************************/
1865
1866/**
1867 * xmlBuildURI:
1868 * @URI: the URI instance found in the document
1869 * @base: the base value
1870 *
1871 * Computes he final URI of the reference done by checking that
1872 * the given URI is valid, and building the final URI using the
Daniel Veillard57560382012-07-24 11:44:23 +08001873 * base URI. This is processed according to section 5.2 of the
Owen Taylor3473f882001-02-23 17:55:21 +00001874 * RFC 2396
1875 *
1876 * 5.2. Resolving Relative References to Absolute Form
1877 *
1878 * Returns a new URI string (to be freed by the caller) or NULL in case
1879 * of error.
1880 */
1881xmlChar *
1882xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001884 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001885 xmlURIPtr ref = NULL;
1886 xmlURIPtr bas = NULL;
1887 xmlURIPtr res = NULL;
1888
1889 /*
1890 * 1) The URI reference is parsed into the potential four components and
1891 * fragment identifier, as described in Section 4.3.
1892 *
1893 * NOTE that a completely empty URI is treated by modern browsers
1894 * as a reference to "." rather than as a synonym for the current
1895 * URI. Should we do that here?
1896 */
Daniel Veillard57560382012-07-24 11:44:23 +08001897 if (URI == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001898 ret = -1;
1899 else {
1900 if (*URI) {
1901 ref = xmlCreateURI();
1902 if (ref == NULL)
1903 goto done;
1904 ret = xmlParseURIReference(ref, (const char *) URI);
1905 }
1906 else
1907 ret = 0;
1908 }
1909 if (ret != 0)
1910 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001911 if ((ref != NULL) && (ref->scheme != NULL)) {
1912 /*
1913 * The URI is absolute don't modify.
1914 */
1915 val = xmlStrdup(URI);
1916 goto done;
1917 }
Owen Taylor3473f882001-02-23 17:55:21 +00001918 if (base == NULL)
1919 ret = -1;
1920 else {
1921 bas = xmlCreateURI();
1922 if (bas == NULL)
1923 goto done;
1924 ret = xmlParseURIReference(bas, (const char *) base);
1925 }
1926 if (ret != 0) {
1927 if (ref)
1928 val = xmlSaveUri(ref);
1929 goto done;
1930 }
1931 if (ref == NULL) {
1932 /*
1933 * the base fragment must be ignored
1934 */
1935 if (bas->fragment != NULL) {
1936 xmlFree(bas->fragment);
1937 bas->fragment = NULL;
1938 }
1939 val = xmlSaveUri(bas);
1940 goto done;
1941 }
1942
1943 /*
1944 * 2) If the path component is empty and the scheme, authority, and
1945 * query components are undefined, then it is a reference to the
1946 * current document and we are done. Otherwise, the reference URI's
1947 * query and fragment components are defined as found (or not found)
1948 * within the URI reference and not inherited from the base URI.
1949 *
1950 * NOTE that in modern browsers, the parsing differs from the above
1951 * in the following aspect: the query component is allowed to be
1952 * defined while still treating this as a reference to the current
1953 * document.
1954 */
1955 res = xmlCreateURI();
1956 if (res == NULL)
1957 goto done;
1958 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959 ((ref->authority == NULL) && (ref->server == NULL))) {
1960 if (bas->scheme != NULL)
1961 res->scheme = xmlMemStrdup(bas->scheme);
1962 if (bas->authority != NULL)
1963 res->authority = xmlMemStrdup(bas->authority);
1964 else if (bas->server != NULL) {
1965 res->server = xmlMemStrdup(bas->server);
1966 if (bas->user != NULL)
1967 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08001968 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00001969 }
1970 if (bas->path != NULL)
1971 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001972 if (ref->query_raw != NULL)
1973 res->query_raw = xmlMemStrdup (ref->query_raw);
1974 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001975 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001976 else if (bas->query_raw != NULL)
1977 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001978 else if (bas->query != NULL)
1979 res->query = xmlMemStrdup(bas->query);
1980 if (ref->fragment != NULL)
1981 res->fragment = xmlMemStrdup(ref->fragment);
1982 goto step_7;
1983 }
Owen Taylor3473f882001-02-23 17:55:21 +00001984
1985 /*
1986 * 3) If the scheme component is defined, indicating that the reference
1987 * starts with a scheme name, then the reference is interpreted as an
1988 * absolute URI and we are done. Otherwise, the reference URI's
1989 * scheme is inherited from the base URI's scheme component.
1990 */
1991 if (ref->scheme != NULL) {
1992 val = xmlSaveUri(ref);
1993 goto done;
1994 }
1995 if (bas->scheme != NULL)
1996 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard57560382012-07-24 11:44:23 +08001997
Daniel Veillarda1413b82007-04-26 08:33:28 +00001998 if (ref->query_raw != NULL)
1999 res->query_raw = xmlMemStrdup(ref->query_raw);
2000 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002001 res->query = xmlMemStrdup(ref->query);
2002 if (ref->fragment != NULL)
2003 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002004
2005 /*
2006 * 4) If the authority component is defined, then the reference is a
2007 * network-path and we skip to step 7. Otherwise, the reference
2008 * URI's authority is inherited from the base URI's authority
2009 * component, which will also be undefined if the URI scheme does not
2010 * use an authority component.
2011 */
2012 if ((ref->authority != NULL) || (ref->server != NULL)) {
2013 if (ref->authority != NULL)
2014 res->authority = xmlMemStrdup(ref->authority);
2015 else {
2016 res->server = xmlMemStrdup(ref->server);
2017 if (ref->user != NULL)
2018 res->user = xmlMemStrdup(ref->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002019 res->port = ref->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002020 }
2021 if (ref->path != NULL)
2022 res->path = xmlMemStrdup(ref->path);
2023 goto step_7;
2024 }
2025 if (bas->authority != NULL)
2026 res->authority = xmlMemStrdup(bas->authority);
2027 else if (bas->server != NULL) {
2028 res->server = xmlMemStrdup(bas->server);
2029 if (bas->user != NULL)
2030 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002031 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002032 }
2033
2034 /*
2035 * 5) If the path component begins with a slash character ("/"), then
2036 * the reference is an absolute-path and we skip to step 7.
2037 */
2038 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2039 res->path = xmlMemStrdup(ref->path);
2040 goto step_7;
2041 }
2042
2043
2044 /*
2045 * 6) If this step is reached, then we are resolving a relative-path
2046 * reference. The relative path needs to be merged with the base
2047 * URI's path. Although there are many ways to do this, we will
2048 * describe a simple method using a separate string buffer.
2049 *
2050 * Allocate a buffer large enough for the result string.
2051 */
2052 len = 2; /* extra / and 0 */
2053 if (ref->path != NULL)
2054 len += strlen(ref->path);
2055 if (bas->path != NULL)
2056 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002057 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002058 if (res->path == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002059 xmlURIErrMemory("resolving URI against base\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002060 goto done;
2061 }
2062 res->path[0] = 0;
2063
2064 /*
2065 * a) All but the last segment of the base URI's path component is
2066 * copied to the buffer. In other words, any characters after the
2067 * last (right-most) slash character, if any, are excluded.
2068 */
2069 cur = 0;
2070 out = 0;
2071 if (bas->path != NULL) {
2072 while (bas->path[cur] != 0) {
2073 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2074 cur++;
2075 if (bas->path[cur] == 0)
2076 break;
2077
2078 cur++;
2079 while (out < cur) {
2080 res->path[out] = bas->path[out];
2081 out++;
2082 }
2083 }
2084 }
2085 res->path[out] = 0;
2086
2087 /*
2088 * b) The reference's path component is appended to the buffer
2089 * string.
2090 */
2091 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002092 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002093 /*
2094 * Ensure the path includes a '/'
2095 */
2096 if ((out == 0) && (bas->server != NULL))
2097 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002098 while (ref->path[indx] != 0) {
2099 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
2101 }
2102 res->path[out] = 0;
2103
2104 /*
2105 * Steps c) to h) are really path normalization steps
2106 */
2107 xmlNormalizeURIPath(res->path);
2108
2109step_7:
2110
2111 /*
2112 * 7) The resulting URI components, including any inherited from the
2113 * base URI, are recombined to give the absolute form of the URI
2114 * reference.
2115 */
2116 val = xmlSaveUri(res);
2117
2118done:
2119 if (ref != NULL)
2120 xmlFreeURI(ref);
2121 if (bas != NULL)
2122 xmlFreeURI(bas);
2123 if (res != NULL)
2124 xmlFreeURI(res);
2125 return(val);
2126}
2127
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002128/**
William M. Brackf7789b12004-06-07 08:57:27 +00002129 * xmlBuildRelativeURI:
2130 * @URI: the URI reference under consideration
2131 * @base: the base value
2132 *
2133 * Expresses the URI of the reference in terms relative to the
2134 * base. Some examples of this operation include:
2135 * base = "http://site1.com/docs/book1.html"
2136 * URI input URI returned
2137 * docs/pic1.gif pic1.gif
2138 * docs/img/pic1.gif img/pic1.gif
2139 * img/pic1.gif ../img/pic1.gif
2140 * http://site1.com/docs/pic1.gif pic1.gif
2141 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2142 *
2143 * base = "docs/book1.html"
2144 * URI input URI returned
2145 * docs/pic1.gif pic1.gif
2146 * docs/img/pic1.gif img/pic1.gif
2147 * img/pic1.gif ../img/pic1.gif
2148 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2149 *
2150 *
2151 * Note: if the URI reference is really wierd or complicated, it may be
2152 * worthwhile to first convert it into a "nice" one by calling
2153 * xmlBuildURI (using 'base') before calling this routine,
2154 * since this routine (for reasonable efficiency) assumes URI has
2155 * already been through some validation.
2156 *
2157 * Returns a new URI string (to be freed by the caller) or NULL in case
2158 * error.
2159 */
2160xmlChar *
2161xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2162{
2163 xmlChar *val = NULL;
2164 int ret;
2165 int ix;
William M. Brackf7789b12004-06-07 08:57:27 +00002166 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002167 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002168 xmlURIPtr ref = NULL;
2169 xmlURIPtr bas = NULL;
2170 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002171 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002172
2173 if ((URI == NULL) || (*URI == 0))
2174 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002175
2176 /*
2177 * First parse URI into a standard form
2178 */
2179 ref = xmlCreateURI ();
2180 if (ref == NULL)
2181 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002182 /* If URI not already in "relative" form */
2183 if (URI[0] != '.') {
2184 ret = xmlParseURIReference (ref, (const char *) URI);
2185 if (ret != 0)
2186 goto done; /* Error in URI, return NULL */
2187 } else
2188 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002189
2190 /*
2191 * Next parse base into the same standard form
2192 */
2193 if ((base == NULL) || (*base == 0)) {
2194 val = xmlStrdup (URI);
2195 goto done;
2196 }
2197 bas = xmlCreateURI ();
2198 if (bas == NULL)
2199 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002200 if (base[0] != '.') {
2201 ret = xmlParseURIReference (bas, (const char *) base);
2202 if (ret != 0)
2203 goto done; /* Error in base, return NULL */
2204 } else
2205 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002206
2207 /*
2208 * If the scheme / server on the URI differs from the base,
2209 * just return the URI
2210 */
2211 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002212 ((bas->scheme == NULL) ||
2213 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2214 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002215 val = xmlStrdup (URI);
2216 goto done;
2217 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002218 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2219 val = xmlStrdup(BAD_CAST "");
2220 goto done;
2221 }
2222 if (bas->path == NULL) {
2223 val = xmlStrdup((xmlChar *)ref->path);
2224 goto done;
2225 }
2226 if (ref->path == NULL) {
2227 ref->path = (char *) "/";
2228 remove_path = 1;
2229 }
William M. Brackf7789b12004-06-07 08:57:27 +00002230
2231 /*
2232 * At this point (at last!) we can compare the two paths
2233 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002234 * First we take care of the special case where either of the
2235 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002236 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002237 if (bas->path == NULL) {
2238 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002239 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002240 if (*uptr == '/')
2241 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002242 /* exception characters from xmlSaveUri */
2243 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002244 }
2245 goto done;
2246 }
William M. Brackf7789b12004-06-07 08:57:27 +00002247 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002248 if (ref->path == NULL) {
2249 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002250 if (bptr[ix] == '/')
2251 nbslash++;
2252 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002253 uptr = NULL;
2254 len = 1; /* this is for a string terminator only */
2255 } else {
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002256 xmlChar *rptr = (xmlChar *) ref->path;
2257 int pos = 0;
2258
2259 /*
2260 * Next we compare the two strings and find where they first differ
2261 */
2262 if ((*rptr == '.') && (rptr[1] == '/'))
2263 rptr += 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002264 if ((*bptr == '.') && (bptr[1] == '/'))
2265 bptr += 2;
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002266 else if ((*bptr == '/') && (*rptr != '/'))
William M. Brack820d5ed2005-09-14 05:24:27 +00002267 bptr++;
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002268 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
William M. Brack820d5ed2005-09-14 05:24:27 +00002269 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002270
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002271 if (bptr[pos] == rptr[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002272 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002273 goto done; /* (I can't imagine why anyone would do this) */
2274 }
2275
2276 /*
2277 * In URI, "back up" to the last '/' encountered. This will be the
2278 * beginning of the "unique" suffix of URI
2279 */
2280 ix = pos;
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002281 if ((rptr[ix] == '/') && (ix > 0))
William M. Brack820d5ed2005-09-14 05:24:27 +00002282 ix--;
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002283 else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/'))
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002284 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002285 for (; ix > 0; ix--) {
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002286 if (rptr[ix] == '/')
William M. Brack820d5ed2005-09-14 05:24:27 +00002287 break;
2288 }
2289 if (ix == 0) {
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002290 uptr = (xmlChar *)rptr;
William M. Brack820d5ed2005-09-14 05:24:27 +00002291 } else {
2292 ix++;
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002293 uptr = (xmlChar *)&rptr[ix];
William M. Brack820d5ed2005-09-14 05:24:27 +00002294 }
2295
2296 /*
2297 * In base, count the number of '/' from the differing point
2298 */
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002299 if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */
William M. Brack820d5ed2005-09-14 05:24:27 +00002300 for (; bptr[ix] != 0; ix++) {
2301 if (bptr[ix] == '/')
2302 nbslash++;
2303 }
2304 }
2305 len = xmlStrlen (uptr) + 1;
2306 }
Daniel Veillard57560382012-07-24 11:44:23 +08002307
William M. Brackf7789b12004-06-07 08:57:27 +00002308 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002309 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002310 /* exception characters from xmlSaveUri */
2311 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002312 goto done;
2313 }
William M. Brackf7789b12004-06-07 08:57:27 +00002314
2315 /*
2316 * Allocate just enough space for the returned string -
2317 * length of the remainder of the URI, plus enough space
2318 * for the "../" groups, plus one for the terminator
2319 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002320 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002321 if (val == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002322 xmlURIErrMemory("building relative URI\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002323 goto done;
2324 }
2325 vptr = val;
2326 /*
2327 * Put in as many "../" as needed
2328 */
2329 for (; nbslash>0; nbslash--) {
2330 *vptr++ = '.';
2331 *vptr++ = '.';
2332 *vptr++ = '/';
2333 }
2334 /*
2335 * Finish up with the end of the URI
2336 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002337 if (uptr != NULL) {
2338 if ((vptr > val) && (len > 0) &&
2339 (uptr[0] == '/') && (vptr[-1] == '/')) {
2340 memcpy (vptr, uptr + 1, len - 1);
2341 vptr[len - 2] = 0;
2342 } else {
2343 memcpy (vptr, uptr, len);
2344 vptr[len - 1] = 0;
2345 }
2346 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002347 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002348 }
William M. Brackf7789b12004-06-07 08:57:27 +00002349
William M. Brack50420192007-07-20 01:09:08 +00002350 /* escape the freshly-built path */
2351 vptr = val;
2352 /* exception characters from xmlSaveUri */
2353 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2354 xmlFree(vptr);
2355
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002356done:
William M. Brackf7789b12004-06-07 08:57:27 +00002357 /*
2358 * Free the working variables
2359 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002360 if (remove_path != 0)
2361 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002362 if (ref != NULL)
2363 xmlFreeURI (ref);
2364 if (bas != NULL)
2365 xmlFreeURI (bas);
2366
2367 return val;
2368}
2369
2370/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002371 * xmlCanonicPath:
2372 * @path: the resource locator in a filesystem notation
2373 *
Daniel Veillard57560382012-07-24 11:44:23 +08002374 * Constructs a canonic path from the specified path.
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002375 *
Daniel Veillard57560382012-07-24 11:44:23 +08002376 * Returns a new canonic path, or a duplicate of the path parameter if the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002377 * construction fails. The caller is responsible for freeing the memory occupied
Daniel Veillard57560382012-07-24 11:44:23 +08002378 * by the returned string. If there is insufficient memory available, or the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002379 * argument is NULL, the function returns NULL.
2380 */
Daniel Veillard57560382012-07-24 11:44:23 +08002381#define IS_WINDOWS_PATH(p) \
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002382 ((p != NULL) && \
2383 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2384 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2385 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002386xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002387xmlCanonicPath(const xmlChar *path)
2388{
William M. Brack22242272007-01-27 07:59:37 +00002389/*
2390 * For Windows implementations, additional work needs to be done to
2391 * replace backslashes in pathnames with "forward slashes"
2392 */
Daniel Veillard57560382012-07-24 11:44:23 +08002393#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002394 int len = 0;
2395 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002396 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002397#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002398 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002399 xmlChar *ret;
2400 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002401
2402 if (path == NULL)
2403 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002404
Michael Stahl55b899a2012-09-07 12:14:00 +08002405#if defined(_WIN32)
2406 /*
2407 * We must not change the backslashes to slashes if the the path
2408 * starts with \\?\
2409 * Those paths can be up to 32k characters long.
2410 * Was added specifically for OpenOffice, those paths can't be converted
2411 * to URIs anyway.
2412 */
2413 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2414 (path[3] == '\\') )
2415 return xmlStrdup((const xmlChar *) path);
2416#endif
2417
2418 /* sanitize filename starting with // so it can be used as URI */
Daniel Veillard69f8a132008-02-05 08:37:56 +00002419 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2420 path++;
2421
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002422 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002423 xmlFreeURI(uri);
2424 return xmlStrdup(path);
2425 }
2426
William M. Brack22242272007-01-27 07:59:37 +00002427 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002428 absuri = xmlStrstr(path, BAD_CAST "://");
2429 if (absuri != NULL) {
2430 int l, j;
2431 unsigned char c;
2432 xmlChar *escURI;
2433
2434 /*
2435 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002436 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002437 * part matches a protocol.
2438 */
2439 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002440 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002441 if ((l <= 0) || (l > 20))
2442 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002443 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002444 for (j = 0;j < l;j++) {
2445 c = path[j];
2446 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2447 goto path_processing;
2448 }
2449
William M. Brack22242272007-01-27 07:59:37 +00002450 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002451 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2452 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002453 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002454 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002455 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002456 if (uri != NULL) {
2457 xmlFreeURI(uri);
2458 return escURI;
2459 }
Nick Wellnhoferd6b36452017-05-27 14:44:36 +02002460 xmlFree(escURI);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002461 }
2462 }
2463
2464path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002465/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard57560382012-07-24 11:44:23 +08002466#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard336a8e12005-08-07 10:46:19 +00002467 /*
William M. Brack22242272007-01-27 07:59:37 +00002468 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002469 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002470 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002471 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002472 return(NULL);
2473 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002474
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002475 len = xmlStrlen(path);
2476 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002477 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002478 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002479 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002480 uri->path = xmlMallocAtomic(len + 2);
2481 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002482 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002483 return(NULL);
2484 }
William M. Brack22242272007-01-27 07:59:37 +00002485 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002486 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002487 p = uri->path + 1;
2488 strncpy(p, path, len + 1);
2489 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002490 uri->path = xmlStrdup(path);
2491 if (uri->path == NULL) {
2492 xmlFreeURI(uri);
2493 return(NULL);
2494 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002495 p = uri->path;
2496 }
William M. Brack22242272007-01-27 07:59:37 +00002497 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002498 while (*p != '\0') {
2499 if (*p == '\\')
2500 *p = '/';
2501 p++;
2502 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002503
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002504 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002505 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002506 } else {
2507 ret = xmlSaveUri(uri);
2508 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002509
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002510 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002511#else
2512 ret = xmlStrdup((const xmlChar *) path);
2513#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002514 return(ret);
2515}
Owen Taylor3473f882001-02-23 17:55:21 +00002516
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002517/**
2518 * xmlPathToURI:
2519 * @path: the resource locator in a filesystem notation
2520 *
2521 * Constructs an URI expressing the existing path
2522 *
Daniel Veillard57560382012-07-24 11:44:23 +08002523 * Returns a new URI, or a duplicate of the path parameter if the
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002524 * construction fails. The caller is responsible for freeing the memory
2525 * occupied by the returned string. If there is insufficient memory available,
2526 * or the argument is NULL, the function returns NULL.
2527 */
2528xmlChar *
2529xmlPathToURI(const xmlChar *path)
2530{
2531 xmlURIPtr uri;
2532 xmlURI temp;
2533 xmlChar *ret, *cal;
2534
2535 if (path == NULL)
2536 return(NULL);
2537
2538 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2539 xmlFreeURI(uri);
2540 return xmlStrdup(path);
2541 }
2542 cal = xmlCanonicPath(path);
2543 if (cal == NULL)
2544 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002545#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard57560382012-07-24 11:44:23 +08002546 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002547 If 'cal' is a valid URI allready then we are done here, as continuing would make
2548 it invalid. */
2549 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2550 xmlFreeURI(uri);
2551 return cal;
2552 }
2553 /* 'cal' can contain a relative path with backslashes. If that is processed
2554 by xmlSaveURI, they will be escaped and the external entity loader machinery
2555 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2556 ret = cal;
2557 while (*ret != '\0') {
2558 if (*ret == '\\')
2559 *ret = '/';
2560 ret++;
2561 }
2562#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002563 memset(&temp, 0, sizeof(temp));
2564 temp.path = (char *) cal;
2565 ret = xmlSaveUri(&temp);
2566 xmlFree(cal);
2567 return(ret);
2568}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002569#define bottom_uri
2570#include "elfgcchack.h"