blob: 81ec7f43216dc0d7d123efa4ef9351ebabe1a84d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
Daniel Veillard57560382012-07-24 11:44:23 +08002 * uri.c: set of generic URI related routines
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
Daniel Veillardd7af5552008-08-04 15:29:44 +00004 * Reference: RFCs 3986, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Haibo Huangd23e46c2020-10-28 22:26:09 -070014#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000015#include <string.h>
16
17#include <libxml/xmlmemory.h>
18#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000019#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000020#include <libxml/xmlerror.h>
21
Daniel Veillard57560382012-07-24 11:44:23 +080022/**
23 * MAX_URI_LENGTH:
24 *
25 * The definition of the URI regexp in the above RFC has no size limit
Haibo Huangcfd91dc2020-07-30 23:01:33 -070026 * In practice they are usually relatively short except for the
Daniel Veillard57560382012-07-24 11:44:23 +080027 * data URI scheme as defined in RFC 2397. Even for data URI the usual
28 * maximum size before hitting random practical limits is around 64 KB
29 * and 4KB is usually a maximum admitted limit for proper operations.
30 * The value below is more a security limit than anything else and
31 * really should never be hit by 'normal' operations
32 * Set to 1 MByte in 2012, this is only enforced on output
33 */
34#define MAX_URI_LENGTH 1024 * 1024
35
36static void
37xmlURIErrMemory(const char *extra)
38{
39 if (extra)
40 __xmlRaiseError(NULL, NULL, NULL,
41 NULL, NULL, XML_FROM_URI,
42 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43 extra, NULL, NULL, 0, 0,
44 "Memory allocation failed : %s\n", extra);
45 else
46 __xmlRaiseError(NULL, NULL, NULL,
47 NULL, NULL, XML_FROM_URI,
48 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49 NULL, NULL, NULL, 0, 0,
50 "Memory allocation failed\n");
51}
52
Daniel Veillardd7af5552008-08-04 15:29:44 +000053static void xmlCleanURI(xmlURIPtr uri);
Owen Taylor3473f882001-02-23 17:55:21 +000054
55/*
Daniel Veillardd7af5552008-08-04 15:29:44 +000056 * Old rule from 2396 used in legacy handling code
Owen Taylor3473f882001-02-23 17:55:21 +000057 * alpha = lowalpha | upalpha
58 */
59#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60
61
62/*
63 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65 * "u" | "v" | "w" | "x" | "y" | "z"
66 */
67
68#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69
70/*
71 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73 * "U" | "V" | "W" | "X" | "Y" | "Z"
74 */
75#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76
Daniel Veillardbe3eb202004-07-09 12:05:25 +000077#ifdef IS_DIGIT
78#undef IS_DIGIT
79#endif
Owen Taylor3473f882001-02-23 17:55:21 +000080/*
81 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84
85/*
86 * alphanum = alpha | digit
87 */
88
89#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90
91/*
Owen Taylor3473f882001-02-23 17:55:21 +000092 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93 */
94
Daniel Veillardd7af5552008-08-04 15:29:44 +000095#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
96 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
Owen Taylor3473f882001-02-23 17:55:21 +000097 ((x) == '(') || ((x) == ')'))
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Daniel Veillardd7af5552008-08-04 15:29:44 +0000100 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
Owen Taylor3473f882001-02-23 17:55:21 +0000101 */
102
Daniel Veillardd7af5552008-08-04 15:29:44 +0000103#define IS_UNWISE(p) \
104 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
105 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
106 ((*(p) == ']')) || ((*(p) == '`')))
107/*
108 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109 * "[" | "]"
110 */
111
112#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +0000116
117/*
118 * unreserved = alphanum | mark
119 */
120
121#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122
123/*
Owen Taylor3473f882001-02-23 17:55:21 +0000124 * Skip to next pointer char, handle escaped sequences
125 */
126
127#define NEXT(p) ((*p == '%')? p += 3 : p++)
128
129/*
130 * Productions from the spec.
131 *
132 * authority = server | reg_name
133 * reg_name = 1*( unreserved | escaped | "$" | "," |
134 * ";" | ":" | "@" | "&" | "=" | "+" )
135 *
136 * path = [ abs_path | opaque_part ]
137 */
138
Daniel Veillard336a8e12005-08-07 10:46:19 +0000139#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140
Owen Taylor3473f882001-02-23 17:55:21 +0000141/************************************************************************
142 * *
Daniel Veillardd7af5552008-08-04 15:29:44 +0000143 * RFC 3986 parser *
144 * *
145 ************************************************************************/
146
147#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
149 ((*(p) >= 'A') && (*(p) <= 'Z')))
150#define ISA_HEXDIG(p) \
151 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
152 ((*(p) >= 'A') && (*(p) <= 'F')))
153
154/*
155 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
156 * / "*" / "+" / "," / ";" / "="
157 */
158#define ISA_SUB_DELIM(p) \
159 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
160 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
161 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Daniel Veillard2ee91eb2010-06-04 09:14:16 +0800162 ((*(p) == '=')) || ((*(p) == '\'')))
Daniel Veillardd7af5552008-08-04 15:29:44 +0000163
164/*
165 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166 */
167#define ISA_GEN_DELIM(p) \
168 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
169 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
170 ((*(p) == '@')))
171
172/*
173 * reserved = gen-delims / sub-delims
174 */
175#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176
177/*
178 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179 */
180#define ISA_UNRESERVED(p) \
181 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
182 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183
184/*
185 * pct-encoded = "%" HEXDIG HEXDIG
186 */
187#define ISA_PCT_ENCODED(p) \
188 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189
190/*
191 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192 */
193#define ISA_PCHAR(p) \
194 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
195 ((*(p) == ':')) || ((*(p) == '@')))
196
197/**
198 * xmlParse3986Scheme:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
201 *
202 * Parse an URI scheme
203 *
204 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205 *
206 * Returns 0 or the error code
207 */
208static int
209xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210 const char *cur;
211
212 if (str == NULL)
213 return(-1);
214
215 cur = *str;
216 if (!ISA_ALPHA(cur))
217 return(2);
218 cur++;
219 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221 if (uri != NULL) {
222 if (uri->scheme != NULL) xmlFree(uri->scheme);
223 uri->scheme = STRNDUP(*str, cur - *str);
224 }
225 *str = cur;
226 return(0);
227}
228
229/**
230 * xmlParse3986Fragment:
231 * @uri: pointer to an URI structure
232 * @str: pointer to the string to analyze
233 *
234 * Parse the query part of an URI
235 *
Daniel Veillard84c45df2008-08-06 10:26:06 +0000236 * fragment = *( pchar / "/" / "?" )
237 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238 * in the fragment identifier but this is used very broadly for
239 * xpointer scheme selection, so we are allowing it here to not break
240 * for example all the DocBook processing chains.
Daniel Veillardd7af5552008-08-04 15:29:44 +0000241 *
242 * Returns 0 or the error code
243 */
244static int
245xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246{
247 const char *cur;
248
249 if (str == NULL)
250 return (-1);
251
252 cur = *str;
253
254 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
Daniel Veillard84c45df2008-08-06 10:26:06 +0000255 (*cur == '[') || (*cur == ']') ||
Daniel Veillardd7af5552008-08-04 15:29:44 +0000256 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257 NEXT(cur);
258 if (uri != NULL) {
259 if (uri->fragment != NULL)
260 xmlFree(uri->fragment);
261 if (uri->cleanup & 2)
262 uri->fragment = STRNDUP(*str, cur - *str);
263 else
264 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265 }
266 *str = cur;
267 return (0);
268}
269
270/**
271 * xmlParse3986Query:
272 * @uri: pointer to an URI structure
273 * @str: pointer to the string to analyze
274 *
275 * Parse the query part of an URI
276 *
277 * query = *uric
278 *
279 * Returns 0 or the error code
280 */
281static int
282xmlParse3986Query(xmlURIPtr uri, const char **str)
283{
284 const char *cur;
285
286 if (str == NULL)
287 return (-1);
288
289 cur = *str;
290
291 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293 NEXT(cur);
294 if (uri != NULL) {
295 if (uri->query != NULL)
296 xmlFree(uri->query);
297 if (uri->cleanup & 2)
298 uri->query = STRNDUP(*str, cur - *str);
299 else
300 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301
302 /* Save the raw bytes of the query as well.
303 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 */
305 if (uri->query_raw != NULL)
306 xmlFree (uri->query_raw);
307 uri->query_raw = STRNDUP (*str, cur - *str);
308 }
309 *str = cur;
310 return (0);
311}
312
313/**
314 * xmlParse3986Port:
315 * @uri: pointer to an URI structure
316 * @str: the string to analyze
317 *
Michael Paddon846cf012016-05-21 17:16:05 +0800318 * Parse a port part and fills in the appropriate fields
Daniel Veillardd7af5552008-08-04 15:29:44 +0000319 * of the @uri structure
320 *
321 * port = *DIGIT
322 *
323 * Returns 0 or the error code
324 */
325static int
326xmlParse3986Port(xmlURIPtr uri, const char **str)
327{
328 const char *cur = *str;
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700329 int port = 0;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000330
331 if (ISA_DIGIT(cur)) {
Daniel Veillardd7af5552008-08-04 15:29:44 +0000332 while (ISA_DIGIT(cur)) {
Haibo Huangd23e46c2020-10-28 22:26:09 -0700333 int digit = *cur - '0';
334
335 if (port > INT_MAX / 10)
336 return(1);
337 port *= 10;
338 if (port > INT_MAX - digit)
339 return(1);
340 port += digit;
Michael Paddon846cf012016-05-21 17:16:05 +0800341
Daniel Veillardd7af5552008-08-04 15:29:44 +0000342 cur++;
343 }
Michael Paddon846cf012016-05-21 17:16:05 +0800344 if (uri != NULL)
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700345 uri->port = port;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000346 *str = cur;
347 return(0);
348 }
349 return(1);
350}
351
352/**
353 * xmlParse3986Userinfo:
354 * @uri: pointer to an URI structure
355 * @str: the string to analyze
356 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700357 * Parse an user information part and fills in the appropriate fields
Daniel Veillardd7af5552008-08-04 15:29:44 +0000358 * of the @uri structure
359 *
360 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
361 *
362 * Returns 0 or the error code
363 */
364static int
365xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
366{
367 const char *cur;
368
369 cur = *str;
370 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
371 ISA_SUB_DELIM(cur) || (*cur == ':'))
372 NEXT(cur);
373 if (*cur == '@') {
374 if (uri != NULL) {
375 if (uri->user != NULL) xmlFree(uri->user);
376 if (uri->cleanup & 2)
377 uri->user = STRNDUP(*str, cur - *str);
378 else
379 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
380 }
381 *str = cur;
382 return(0);
383 }
384 return(1);
385}
386
387/**
388 * xmlParse3986DecOctet:
389 * @str: the string to analyze
390 *
391 * dec-octet = DIGIT ; 0-9
392 * / %x31-39 DIGIT ; 10-99
393 * / "1" 2DIGIT ; 100-199
394 * / "2" %x30-34 DIGIT ; 200-249
395 * / "25" %x30-35 ; 250-255
396 *
397 * Skip a dec-octet.
398 *
399 * Returns 0 if found and skipped, 1 otherwise
400 */
401static int
402xmlParse3986DecOctet(const char **str) {
403 const char *cur = *str;
404
405 if (!(ISA_DIGIT(cur)))
406 return(1);
407 if (!ISA_DIGIT(cur+1))
408 cur++;
409 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
410 cur += 2;
411 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
412 cur += 3;
413 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
414 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
415 cur += 3;
416 else if ((*cur == '2') && (*(cur + 1) == '5') &&
417 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
418 cur += 3;
419 else
420 return(1);
421 *str = cur;
422 return(0);
423}
424/**
425 * xmlParse3986Host:
426 * @uri: pointer to an URI structure
427 * @str: the string to analyze
428 *
429 * Parse an host part and fills in the appropriate fields
430 * of the @uri structure
431 *
432 * host = IP-literal / IPv4address / reg-name
433 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
434 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
435 * reg-name = *( unreserved / pct-encoded / sub-delims )
436 *
437 * Returns 0 or the error code
438 */
439static int
440xmlParse3986Host(xmlURIPtr uri, const char **str)
441{
442 const char *cur = *str;
443 const char *host;
444
445 host = cur;
446 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700447 * IPv6 and future addressing scheme are enclosed between brackets
Daniel Veillardd7af5552008-08-04 15:29:44 +0000448 */
449 if (*cur == '[') {
450 cur++;
451 while ((*cur != ']') && (*cur != 0))
452 cur++;
453 if (*cur != ']')
454 return(1);
455 cur++;
456 goto found;
457 }
458 /*
459 * try to parse an IPv4
460 */
461 if (ISA_DIGIT(cur)) {
462 if (xmlParse3986DecOctet(&cur) != 0)
463 goto not_ipv4;
464 if (*cur != '.')
465 goto not_ipv4;
466 cur++;
467 if (xmlParse3986DecOctet(&cur) != 0)
468 goto not_ipv4;
469 if (*cur != '.')
470 goto not_ipv4;
471 if (xmlParse3986DecOctet(&cur) != 0)
472 goto not_ipv4;
473 if (*cur != '.')
474 goto not_ipv4;
475 if (xmlParse3986DecOctet(&cur) != 0)
476 goto not_ipv4;
477 goto found;
478not_ipv4:
479 cur = *str;
480 }
481 /*
482 * then this should be a hostname which can be empty
483 */
484 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
485 NEXT(cur);
486found:
487 if (uri != NULL) {
488 if (uri->authority != NULL) xmlFree(uri->authority);
489 uri->authority = NULL;
490 if (uri->server != NULL) xmlFree(uri->server);
491 if (cur != host) {
492 if (uri->cleanup & 2)
493 uri->server = STRNDUP(host, cur - host);
494 else
495 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
496 } else
497 uri->server = NULL;
498 }
499 *str = cur;
500 return(0);
501}
502
503/**
504 * xmlParse3986Authority:
505 * @uri: pointer to an URI structure
506 * @str: the string to analyze
507 *
508 * Parse an authority part and fills in the appropriate fields
509 * of the @uri structure
510 *
511 * authority = [ userinfo "@" ] host [ ":" port ]
512 *
513 * Returns 0 or the error code
514 */
515static int
516xmlParse3986Authority(xmlURIPtr uri, const char **str)
517{
518 const char *cur;
519 int ret;
520
521 cur = *str;
522 /*
523 * try to parse an userinfo and check for the trailing @
524 */
525 ret = xmlParse3986Userinfo(uri, &cur);
526 if ((ret != 0) || (*cur != '@'))
527 cur = *str;
528 else
529 cur++;
530 ret = xmlParse3986Host(uri, &cur);
531 if (ret != 0) return(ret);
532 if (*cur == ':') {
Daniel Veillardf582d142008-08-27 17:23:41 +0000533 cur++;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000534 ret = xmlParse3986Port(uri, &cur);
535 if (ret != 0) return(ret);
536 }
537 *str = cur;
538 return(0);
539}
540
541/**
542 * xmlParse3986Segment:
543 * @str: the string to analyze
544 * @forbid: an optional forbidden character
545 * @empty: allow an empty segment
546 *
547 * Parse a segment and fills in the appropriate fields
548 * of the @uri structure
549 *
550 * segment = *pchar
551 * segment-nz = 1*pchar
552 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
553 * ; non-zero-length segment without any colon ":"
554 *
555 * Returns 0 or the error code
556 */
557static int
558xmlParse3986Segment(const char **str, char forbid, int empty)
559{
560 const char *cur;
561
562 cur = *str;
563 if (!ISA_PCHAR(cur)) {
564 if (empty)
565 return(0);
566 return(1);
567 }
568 while (ISA_PCHAR(cur) && (*cur != forbid))
569 NEXT(cur);
570 *str = cur;
571 return (0);
572}
573
574/**
575 * xmlParse3986PathAbEmpty:
576 * @uri: pointer to an URI structure
577 * @str: the string to analyze
578 *
579 * Parse an path absolute or empty and fills in the appropriate fields
580 * of the @uri structure
581 *
582 * path-abempty = *( "/" segment )
583 *
584 * Returns 0 or the error code
585 */
586static int
587xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
588{
589 const char *cur;
590 int ret;
591
592 cur = *str;
593
594 while (*cur == '/') {
595 cur++;
596 ret = xmlParse3986Segment(&cur, 0, 1);
597 if (ret != 0) return(ret);
598 }
599 if (uri != NULL) {
600 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200601 if (*str != cur) {
602 if (uri->cleanup & 2)
603 uri->path = STRNDUP(*str, cur - *str);
604 else
605 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
606 } else {
607 uri->path = NULL;
608 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000609 }
610 *str = cur;
611 return (0);
612}
613
614/**
615 * xmlParse3986PathAbsolute:
616 * @uri: pointer to an URI structure
617 * @str: the string to analyze
618 *
619 * Parse an path absolute and fills in the appropriate fields
620 * of the @uri structure
621 *
622 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
623 *
624 * Returns 0 or the error code
625 */
626static int
627xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
628{
629 const char *cur;
630 int ret;
631
632 cur = *str;
633
634 if (*cur != '/')
635 return(1);
636 cur++;
637 ret = xmlParse3986Segment(&cur, 0, 0);
638 if (ret == 0) {
639 while (*cur == '/') {
640 cur++;
641 ret = xmlParse3986Segment(&cur, 0, 1);
642 if (ret != 0) return(ret);
643 }
644 }
645 if (uri != NULL) {
646 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200647 if (cur != *str) {
648 if (uri->cleanup & 2)
649 uri->path = STRNDUP(*str, cur - *str);
650 else
651 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
652 } else {
653 uri->path = NULL;
654 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000655 }
656 *str = cur;
657 return (0);
658}
659
660/**
661 * xmlParse3986PathRootless:
662 * @uri: pointer to an URI structure
663 * @str: the string to analyze
664 *
665 * Parse an path without root and fills in the appropriate fields
666 * of the @uri structure
667 *
668 * path-rootless = segment-nz *( "/" segment )
669 *
670 * Returns 0 or the error code
671 */
672static int
673xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
674{
675 const char *cur;
676 int ret;
677
678 cur = *str;
679
680 ret = xmlParse3986Segment(&cur, 0, 0);
681 if (ret != 0) return(ret);
682 while (*cur == '/') {
683 cur++;
684 ret = xmlParse3986Segment(&cur, 0, 1);
685 if (ret != 0) return(ret);
686 }
687 if (uri != NULL) {
688 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200689 if (cur != *str) {
690 if (uri->cleanup & 2)
691 uri->path = STRNDUP(*str, cur - *str);
692 else
693 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
694 } else {
695 uri->path = NULL;
696 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000697 }
698 *str = cur;
699 return (0);
700}
701
702/**
703 * xmlParse3986PathNoScheme:
704 * @uri: pointer to an URI structure
705 * @str: the string to analyze
706 *
707 * Parse an path which is not a scheme and fills in the appropriate fields
708 * of the @uri structure
709 *
710 * path-noscheme = segment-nz-nc *( "/" segment )
711 *
712 * Returns 0 or the error code
713 */
714static int
715xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
716{
717 const char *cur;
718 int ret;
719
720 cur = *str;
721
722 ret = xmlParse3986Segment(&cur, ':', 0);
723 if (ret != 0) return(ret);
724 while (*cur == '/') {
725 cur++;
726 ret = xmlParse3986Segment(&cur, 0, 1);
727 if (ret != 0) return(ret);
728 }
729 if (uri != NULL) {
730 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard1358fef2009-10-02 17:29:48 +0200731 if (cur != *str) {
732 if (uri->cleanup & 2)
733 uri->path = STRNDUP(*str, cur - *str);
734 else
735 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
736 } else {
737 uri->path = NULL;
738 }
Daniel Veillardd7af5552008-08-04 15:29:44 +0000739 }
740 *str = cur;
741 return (0);
742}
743
744/**
745 * xmlParse3986HierPart:
746 * @uri: pointer to an URI structure
747 * @str: the string to analyze
748 *
749 * Parse an hierarchical part and fills in the appropriate fields
750 * of the @uri structure
751 *
752 * hier-part = "//" authority path-abempty
753 * / path-absolute
754 * / path-rootless
755 * / path-empty
756 *
757 * Returns 0 or the error code
758 */
759static int
760xmlParse3986HierPart(xmlURIPtr uri, const char **str)
761{
762 const char *cur;
763 int ret;
764
765 cur = *str;
766
767 if ((*cur == '/') && (*(cur + 1) == '/')) {
768 cur += 2;
769 ret = xmlParse3986Authority(uri, &cur);
770 if (ret != 0) return(ret);
Daniel Veillardbeb72812014-10-03 19:22:39 +0800771 if (uri->server == NULL)
772 uri->port = -1;
Daniel Veillardd7af5552008-08-04 15:29:44 +0000773 ret = xmlParse3986PathAbEmpty(uri, &cur);
774 if (ret != 0) return(ret);
775 *str = cur;
776 return(0);
777 } else if (*cur == '/') {
778 ret = xmlParse3986PathAbsolute(uri, &cur);
779 if (ret != 0) return(ret);
780 } else if (ISA_PCHAR(cur)) {
781 ret = xmlParse3986PathRootless(uri, &cur);
782 if (ret != 0) return(ret);
783 } else {
784 /* path-empty is effectively empty */
785 if (uri != NULL) {
786 if (uri->path != NULL) xmlFree(uri->path);
787 uri->path = NULL;
788 }
789 }
790 *str = cur;
791 return (0);
792}
793
794/**
795 * xmlParse3986RelativeRef:
796 * @uri: pointer to an URI structure
797 * @str: the string to analyze
798 *
799 * Parse an URI string and fills in the appropriate fields
800 * of the @uri structure
801 *
802 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
803 * relative-part = "//" authority path-abempty
804 * / path-absolute
805 * / path-noscheme
806 * / path-empty
807 *
808 * Returns 0 or the error code
809 */
810static int
811xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
812 int ret;
813
814 if ((*str == '/') && (*(str + 1) == '/')) {
815 str += 2;
816 ret = xmlParse3986Authority(uri, &str);
817 if (ret != 0) return(ret);
818 ret = xmlParse3986PathAbEmpty(uri, &str);
819 if (ret != 0) return(ret);
820 } else if (*str == '/') {
821 ret = xmlParse3986PathAbsolute(uri, &str);
822 if (ret != 0) return(ret);
823 } else if (ISA_PCHAR(str)) {
824 ret = xmlParse3986PathNoScheme(uri, &str);
825 if (ret != 0) return(ret);
826 } else {
827 /* path-empty is effectively empty */
828 if (uri != NULL) {
829 if (uri->path != NULL) xmlFree(uri->path);
830 uri->path = NULL;
831 }
832 }
833
834 if (*str == '?') {
835 str++;
836 ret = xmlParse3986Query(uri, &str);
837 if (ret != 0) return(ret);
838 }
839 if (*str == '#') {
840 str++;
841 ret = xmlParse3986Fragment(uri, &str);
842 if (ret != 0) return(ret);
843 }
844 if (*str != 0) {
845 xmlCleanURI(uri);
846 return(1);
847 }
848 return(0);
849}
850
851
852/**
853 * xmlParse3986URI:
854 * @uri: pointer to an URI structure
855 * @str: the string to analyze
856 *
857 * Parse an URI string and fills in the appropriate fields
858 * of the @uri structure
859 *
860 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
861 *
862 * Returns 0 or the error code
863 */
864static int
865xmlParse3986URI(xmlURIPtr uri, const char *str) {
866 int ret;
867
868 ret = xmlParse3986Scheme(uri, &str);
869 if (ret != 0) return(ret);
870 if (*str != ':') {
871 return(1);
872 }
873 str++;
874 ret = xmlParse3986HierPart(uri, &str);
875 if (ret != 0) return(ret);
876 if (*str == '?') {
877 str++;
878 ret = xmlParse3986Query(uri, &str);
879 if (ret != 0) return(ret);
880 }
881 if (*str == '#') {
882 str++;
883 ret = xmlParse3986Fragment(uri, &str);
884 if (ret != 0) return(ret);
885 }
886 if (*str != 0) {
887 xmlCleanURI(uri);
888 return(1);
889 }
890 return(0);
891}
892
893/**
894 * xmlParse3986URIReference:
895 * @uri: pointer to an URI structure
896 * @str: the string to analyze
897 *
898 * Parse an URI reference string and fills in the appropriate fields
899 * of the @uri structure
900 *
901 * URI-reference = URI / relative-ref
902 *
903 * Returns 0 or the error code
904 */
905static int
906xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
907 int ret;
908
909 if (str == NULL)
910 return(-1);
911 xmlCleanURI(uri);
912
913 /*
914 * Try first to parse absolute refs, then fallback to relative if
915 * it fails.
916 */
917 ret = xmlParse3986URI(uri, str);
918 if (ret != 0) {
919 xmlCleanURI(uri);
920 ret = xmlParse3986RelativeRef(uri, str);
921 if (ret != 0) {
922 xmlCleanURI(uri);
923 return(ret);
924 }
925 }
926 return(0);
927}
928
929/**
930 * xmlParseURI:
931 * @str: the URI string to analyze
932 *
933 * Parse an URI based on RFC 3986
934 *
935 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
936 *
937 * Returns a newly built xmlURIPtr or NULL in case of error
938 */
939xmlURIPtr
940xmlParseURI(const char *str) {
941 xmlURIPtr uri;
942 int ret;
943
944 if (str == NULL)
945 return(NULL);
946 uri = xmlCreateURI();
947 if (uri != NULL) {
948 ret = xmlParse3986URIReference(uri, str);
949 if (ret) {
950 xmlFreeURI(uri);
951 return(NULL);
952 }
953 }
954 return(uri);
955}
956
957/**
958 * xmlParseURIReference:
959 * @uri: pointer to an URI structure
960 * @str: the string to analyze
961 *
962 * Parse an URI reference string based on RFC 3986 and fills in the
963 * appropriate fields of the @uri structure
964 *
965 * URI-reference = URI / relative-ref
966 *
967 * Returns 0 or the error code
968 */
969int
970xmlParseURIReference(xmlURIPtr uri, const char *str) {
971 return(xmlParse3986URIReference(uri, str));
972}
973
974/**
975 * xmlParseURIRaw:
976 * @str: the URI string to analyze
977 * @raw: if 1 unescaping of URI pieces are disabled
978 *
979 * Parse an URI but allows to keep intact the original fragments.
980 *
981 * URI-reference = URI / relative-ref
982 *
983 * Returns a newly built xmlURIPtr or NULL in case of error
984 */
985xmlURIPtr
986xmlParseURIRaw(const char *str, int raw) {
987 xmlURIPtr uri;
988 int ret;
989
990 if (str == NULL)
991 return(NULL);
992 uri = xmlCreateURI();
993 if (uri != NULL) {
994 if (raw) {
995 uri->cleanup |= 2;
996 }
997 ret = xmlParseURIReference(uri, str);
998 if (ret) {
999 xmlFreeURI(uri);
1000 return(NULL);
1001 }
1002 }
1003 return(uri);
1004}
1005
1006/************************************************************************
1007 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001008 * Generic URI structure functions *
1009 * *
1010 ************************************************************************/
1011
1012/**
1013 * xmlCreateURI:
1014 *
1015 * Simply creates an empty xmlURI
1016 *
1017 * Returns the new structure or NULL in case of error
1018 */
1019xmlURIPtr
1020xmlCreateURI(void) {
1021 xmlURIPtr ret;
1022
1023 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1024 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001025 xmlURIErrMemory("creating URI structure\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001026 return(NULL);
1027 }
1028 memset(ret, 0, sizeof(xmlURI));
1029 return(ret);
1030}
1031
1032/**
Daniel Veillard57560382012-07-24 11:44:23 +08001033 * xmlSaveUriRealloc:
1034 *
1035 * Function to handle properly a reallocation when saving an URI
1036 * Also imposes some limit on the length of an URI string output
1037 */
1038static xmlChar *
1039xmlSaveUriRealloc(xmlChar *ret, int *max) {
1040 xmlChar *temp;
1041 int tmp;
1042
1043 if (*max > MAX_URI_LENGTH) {
1044 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1045 return(NULL);
1046 }
1047 tmp = *max * 2;
1048 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1049 if (temp == NULL) {
1050 xmlURIErrMemory("saving URI\n");
1051 return(NULL);
1052 }
1053 *max = tmp;
1054 return(temp);
1055}
1056
1057/**
Owen Taylor3473f882001-02-23 17:55:21 +00001058 * xmlSaveUri:
1059 * @uri: pointer to an xmlURI
1060 *
1061 * Save the URI as an escaped string
1062 *
1063 * Returns a new string (to be deallocated by caller)
1064 */
1065xmlChar *
1066xmlSaveUri(xmlURIPtr uri) {
1067 xmlChar *ret = NULL;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001068 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001069 const char *p;
1070 int len;
1071 int max;
1072
1073 if (uri == NULL) return(NULL);
1074
1075
1076 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001077 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001079 xmlURIErrMemory("saving URI\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001080 return(NULL);
1081 }
1082 len = 0;
1083
1084 if (uri->scheme != NULL) {
1085 p = uri->scheme;
1086 while (*p != 0) {
1087 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001088 temp = xmlSaveUriRealloc(ret, &max);
1089 if (temp == NULL) goto mem_error;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001090 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001091 }
1092 ret[len++] = *p++;
1093 }
1094 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001095 temp = xmlSaveUriRealloc(ret, &max);
1096 if (temp == NULL) goto mem_error;
1097 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001098 }
1099 ret[len++] = ':';
1100 }
1101 if (uri->opaque != NULL) {
1102 p = uri->opaque;
1103 while (*p != 0) {
1104 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001105 temp = xmlSaveUriRealloc(ret, &max);
1106 if (temp == NULL) goto mem_error;
1107 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001108 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001109 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +00001110 ret[len++] = *p++;
1111 else {
1112 int val = *(unsigned char *)p++;
1113 int hi = val / 0x10, lo = val % 0x10;
1114 ret[len++] = '%';
1115 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1116 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1117 }
1118 }
Owen Taylor3473f882001-02-23 17:55:21 +00001119 } else {
Daniel Veillardbeb72812014-10-03 19:22:39 +08001120 if ((uri->server != NULL) || (uri->port == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001121 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001122 temp = xmlSaveUriRealloc(ret, &max);
1123 if (temp == NULL) goto mem_error;
1124 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001125 }
1126 ret[len++] = '/';
1127 ret[len++] = '/';
1128 if (uri->user != NULL) {
1129 p = uri->user;
1130 while (*p != 0) {
1131 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001132 temp = xmlSaveUriRealloc(ret, &max);
1133 if (temp == NULL) goto mem_error;
1134 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001135 }
1136 if ((IS_UNRESERVED(*(p))) ||
1137 ((*(p) == ';')) || ((*(p) == ':')) ||
1138 ((*(p) == '&')) || ((*(p) == '=')) ||
1139 ((*(p) == '+')) || ((*(p) == '$')) ||
1140 ((*(p) == ',')))
1141 ret[len++] = *p++;
1142 else {
1143 int val = *(unsigned char *)p++;
1144 int hi = val / 0x10, lo = val % 0x10;
1145 ret[len++] = '%';
1146 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1147 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1148 }
1149 }
1150 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001151 temp = xmlSaveUriRealloc(ret, &max);
1152 if (temp == NULL) goto mem_error;
1153 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001154 }
1155 ret[len++] = '@';
1156 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001157 if (uri->server != NULL) {
1158 p = uri->server;
1159 while (*p != 0) {
1160 if (len >= max) {
1161 temp = xmlSaveUriRealloc(ret, &max);
1162 if (temp == NULL) goto mem_error;
1163 ret = temp;
1164 }
1165 ret[len++] = *p++;
Owen Taylor3473f882001-02-23 17:55:21 +00001166 }
Daniel Veillardbeb72812014-10-03 19:22:39 +08001167 if (uri->port > 0) {
1168 if (len + 10 >= max) {
1169 temp = xmlSaveUriRealloc(ret, &max);
1170 if (temp == NULL) goto mem_error;
1171 ret = temp;
1172 }
1173 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +00001174 }
Owen Taylor3473f882001-02-23 17:55:21 +00001175 }
1176 } else if (uri->authority != NULL) {
1177 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001178 temp = xmlSaveUriRealloc(ret, &max);
1179 if (temp == NULL) goto mem_error;
1180 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001181 }
1182 ret[len++] = '/';
1183 ret[len++] = '/';
1184 p = uri->authority;
1185 while (*p != 0) {
1186 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001187 temp = xmlSaveUriRealloc(ret, &max);
1188 if (temp == NULL) goto mem_error;
1189 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001190 }
1191 if ((IS_UNRESERVED(*(p))) ||
1192 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1193 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1194 ((*(p) == '=')) || ((*(p) == '+')))
1195 ret[len++] = *p++;
1196 else {
1197 int val = *(unsigned char *)p++;
1198 int hi = val / 0x10, lo = val % 0x10;
1199 ret[len++] = '%';
1200 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202 }
1203 }
1204 } else if (uri->scheme != NULL) {
1205 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001206 temp = xmlSaveUriRealloc(ret, &max);
1207 if (temp == NULL) goto mem_error;
1208 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001209 }
Owen Taylor3473f882001-02-23 17:55:21 +00001210 }
1211 if (uri->path != NULL) {
1212 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001213 /*
1214 * the colon in file:///d: should not be escaped or
1215 * Windows accesses fail later.
1216 */
1217 if ((uri->scheme != NULL) &&
1218 (p[0] == '/') &&
1219 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1220 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1221 (p[2] == ':') &&
Daniel Veillardd7af5552008-08-04 15:29:44 +00001222 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
Daniel Veillarde54c3172008-03-25 13:22:41 +00001223 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001224 temp = xmlSaveUriRealloc(ret, &max);
1225 if (temp == NULL) goto mem_error;
1226 ret = temp;
Daniel Veillarde54c3172008-03-25 13:22:41 +00001227 }
1228 ret[len++] = *p++;
1229 ret[len++] = *p++;
1230 ret[len++] = *p++;
1231 }
Owen Taylor3473f882001-02-23 17:55:21 +00001232 while (*p != 0) {
1233 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001234 temp = xmlSaveUriRealloc(ret, &max);
1235 if (temp == NULL) goto mem_error;
1236 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001237 }
1238 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1239 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1240 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1241 ((*(p) == ',')))
1242 ret[len++] = *p++;
1243 else {
1244 int val = *(unsigned char *)p++;
1245 int hi = val / 0x10, lo = val % 0x10;
1246 ret[len++] = '%';
1247 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1248 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1249 }
1250 }
1251 }
Daniel Veillarda1413b82007-04-26 08:33:28 +00001252 if (uri->query_raw != NULL) {
1253 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001254 temp = xmlSaveUriRealloc(ret, &max);
1255 if (temp == NULL) goto mem_error;
1256 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001257 }
1258 ret[len++] = '?';
1259 p = uri->query_raw;
1260 while (*p != 0) {
1261 if (len + 1 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001262 temp = xmlSaveUriRealloc(ret, &max);
1263 if (temp == NULL) goto mem_error;
1264 ret = temp;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001265 }
1266 ret[len++] = *p++;
1267 }
1268 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001269 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001270 temp = xmlSaveUriRealloc(ret, &max);
1271 if (temp == NULL) goto mem_error;
1272 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001273 }
1274 ret[len++] = '?';
1275 p = uri->query;
1276 while (*p != 0) {
1277 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001278 temp = xmlSaveUriRealloc(ret, &max);
1279 if (temp == NULL) goto mem_error;
1280 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001281 }
Daniel Veillard57560382012-07-24 11:44:23 +08001282 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Owen Taylor3473f882001-02-23 17:55:21 +00001283 ret[len++] = *p++;
1284 else {
1285 int val = *(unsigned char *)p++;
1286 int hi = val / 0x10, lo = val % 0x10;
1287 ret[len++] = '%';
1288 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1289 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1290 }
1291 }
1292 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001293 }
1294 if (uri->fragment != NULL) {
1295 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001296 temp = xmlSaveUriRealloc(ret, &max);
1297 if (temp == NULL) goto mem_error;
1298 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001299 }
1300 ret[len++] = '#';
1301 p = uri->fragment;
1302 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (len + 3 >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001304 temp = xmlSaveUriRealloc(ret, &max);
1305 if (temp == NULL) goto mem_error;
1306 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001307 }
Daniel Veillard57560382012-07-24 11:44:23 +08001308 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001309 ret[len++] = *p++;
1310 else {
1311 int val = *(unsigned char *)p++;
1312 int hi = val / 0x10, lo = val % 0x10;
1313 ret[len++] = '%';
1314 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1315 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001316 }
1317 }
Owen Taylor3473f882001-02-23 17:55:21 +00001318 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001319 if (len >= max) {
Daniel Veillard57560382012-07-24 11:44:23 +08001320 temp = xmlSaveUriRealloc(ret, &max);
1321 if (temp == NULL) goto mem_error;
1322 ret = temp;
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001323 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02001324 ret[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001325 return(ret);
Daniel Veillard57560382012-07-24 11:44:23 +08001326
1327mem_error:
1328 xmlFree(ret);
1329 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001330}
1331
1332/**
1333 * xmlPrintURI:
1334 * @stream: a FILE* for the output
1335 * @uri: pointer to an xmlURI
1336 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001337 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +00001338 */
1339void
1340xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1341 xmlChar *out;
1342
1343 out = xmlSaveUri(uri);
1344 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +00001345 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +00001346 xmlFree(out);
1347 }
1348}
1349
1350/**
1351 * xmlCleanURI:
1352 * @uri: pointer to an xmlURI
1353 *
1354 * Make sure the xmlURI struct is free of content
1355 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001356static void
Owen Taylor3473f882001-02-23 17:55:21 +00001357xmlCleanURI(xmlURIPtr uri) {
1358 if (uri == NULL) return;
1359
1360 if (uri->scheme != NULL) xmlFree(uri->scheme);
1361 uri->scheme = NULL;
1362 if (uri->server != NULL) xmlFree(uri->server);
1363 uri->server = NULL;
1364 if (uri->user != NULL) xmlFree(uri->user);
1365 uri->user = NULL;
1366 if (uri->path != NULL) xmlFree(uri->path);
1367 uri->path = NULL;
1368 if (uri->fragment != NULL) xmlFree(uri->fragment);
1369 uri->fragment = NULL;
1370 if (uri->opaque != NULL) xmlFree(uri->opaque);
1371 uri->opaque = NULL;
1372 if (uri->authority != NULL) xmlFree(uri->authority);
1373 uri->authority = NULL;
1374 if (uri->query != NULL) xmlFree(uri->query);
1375 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +00001376 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1377 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001378}
1379
1380/**
1381 * xmlFreeURI:
1382 * @uri: pointer to an xmlURI
1383 *
1384 * Free up the xmlURI struct
1385 */
1386void
1387xmlFreeURI(xmlURIPtr uri) {
1388 if (uri == NULL) return;
1389
1390 if (uri->scheme != NULL) xmlFree(uri->scheme);
1391 if (uri->server != NULL) xmlFree(uri->server);
1392 if (uri->user != NULL) xmlFree(uri->user);
1393 if (uri->path != NULL) xmlFree(uri->path);
1394 if (uri->fragment != NULL) xmlFree(uri->fragment);
1395 if (uri->opaque != NULL) xmlFree(uri->opaque);
1396 if (uri->authority != NULL) xmlFree(uri->authority);
1397 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001398 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001399 xmlFree(uri);
1400}
1401
1402/************************************************************************
1403 * *
1404 * Helper functions *
1405 * *
1406 ************************************************************************/
1407
Owen Taylor3473f882001-02-23 17:55:21 +00001408/**
1409 * xmlNormalizeURIPath:
1410 * @path: pointer to the path string
1411 *
1412 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1413 * Section 5.2, steps 6.c through 6.g.
1414 *
1415 * Normalization occurs directly on the string, no new allocation is done
1416 *
1417 * Returns 0 or an error code
1418 */
1419int
1420xmlNormalizeURIPath(char *path) {
1421 char *cur, *out;
1422
1423 if (path == NULL)
1424 return(-1);
1425
1426 /* Skip all initial "/" chars. We want to get to the beginning of the
1427 * first non-empty segment.
1428 */
1429 cur = path;
1430 while (cur[0] == '/')
1431 ++cur;
1432 if (cur[0] == '\0')
1433 return(0);
1434
1435 /* Keep everything we've seen so far. */
1436 out = cur;
1437
1438 /*
1439 * Analyze each segment in sequence for cases (c) and (d).
1440 */
1441 while (cur[0] != '\0') {
1442 /*
1443 * c) All occurrences of "./", where "." is a complete path segment,
1444 * are removed from the buffer string.
1445 */
1446 if ((cur[0] == '.') && (cur[1] == '/')) {
1447 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001448 /* '//' normalization should be done at this point too */
1449 while (cur[0] == '/')
1450 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001451 continue;
1452 }
1453
1454 /*
1455 * d) If the buffer string ends with "." as a complete path segment,
1456 * that "." is removed.
1457 */
1458 if ((cur[0] == '.') && (cur[1] == '\0'))
1459 break;
1460
1461 /* Otherwise keep the segment. */
1462 while (cur[0] != '/') {
1463 if (cur[0] == '\0')
1464 goto done_cd;
1465 (out++)[0] = (cur++)[0];
1466 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001467 /* normalize // */
Daniel Veillardfcbd74a2001-06-26 07:47:23 +00001468 while ((cur[0] == '/') && (cur[1] == '/'))
1469 cur++;
1470
Owen Taylor3473f882001-02-23 17:55:21 +00001471 (out++)[0] = (cur++)[0];
1472 }
1473 done_cd:
1474 out[0] = '\0';
1475
1476 /* Reset to the beginning of the first segment for the next sequence. */
1477 cur = path;
1478 while (cur[0] == '/')
1479 ++cur;
1480 if (cur[0] == '\0')
1481 return(0);
1482
1483 /*
1484 * Analyze each segment in sequence for cases (e) and (f).
1485 *
1486 * e) All occurrences of "<segment>/../", where <segment> is a
1487 * complete path segment not equal to "..", are removed from the
1488 * buffer string. Removal of these path segments is performed
1489 * iteratively, removing the leftmost matching pattern on each
1490 * iteration, until no matching pattern remains.
1491 *
1492 * f) If the buffer string ends with "<segment>/..", where <segment>
1493 * is a complete path segment not equal to "..", that
1494 * "<segment>/.." is removed.
1495 *
1496 * To satisfy the "iterative" clause in (e), we need to collapse the
1497 * string every time we find something that needs to be removed. Thus,
1498 * we don't need to keep two pointers into the string: we only need a
1499 * "current position" pointer.
1500 */
1501 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001502 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001503
1504 /* At the beginning of each iteration of this loop, "cur" points to
1505 * the first character of the segment we want to examine.
1506 */
1507
1508 /* Find the end of the current segment. */
1509 segp = cur;
1510 while ((segp[0] != '/') && (segp[0] != '\0'))
1511 ++segp;
1512
1513 /* If this is the last segment, we're done (we need at least two
1514 * segments to meet the criteria for the (e) and (f) cases).
1515 */
1516 if (segp[0] == '\0')
1517 break;
1518
1519 /* If the first segment is "..", or if the next segment _isn't_ "..",
1520 * keep this segment and try the next one.
1521 */
1522 ++segp;
1523 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1524 || ((segp[0] != '.') || (segp[1] != '.')
1525 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1526 cur = segp;
1527 continue;
1528 }
1529
1530 /* If we get here, remove this segment and the next one and back up
1531 * to the previous segment (if there is one), to implement the
1532 * "iteratively" clause. It's pretty much impossible to back up
1533 * while maintaining two pointers into the buffer, so just compact
1534 * the whole buffer now.
1535 */
1536
1537 /* If this is the end of the buffer, we're done. */
1538 if (segp[2] == '\0') {
1539 cur[0] = '\0';
1540 break;
1541 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +00001542 /* Valgrind complained, strcpy(cur, segp + 3); */
Nico Webercedf84d2012-03-05 16:36:59 +08001543 /* string will overlap, do not use strcpy */
1544 tmp = cur;
1545 segp += 3;
1546 while ((*tmp++ = *segp++) != 0)
1547 ;
Owen Taylor3473f882001-02-23 17:55:21 +00001548
1549 /* If there are no previous segments, then keep going from here. */
1550 segp = cur;
1551 while ((segp > path) && ((--segp)[0] == '/'))
1552 ;
1553 if (segp == path)
1554 continue;
1555
1556 /* "segp" is pointing to the end of a previous segment; find it's
1557 * start. We need to back up to the previous segment and start
1558 * over with that to handle things like "foo/bar/../..". If we
1559 * don't do this, then on the first pass we'll remove the "bar/..",
1560 * but be pointing at the second ".." so we won't realize we can also
1561 * remove the "foo/..".
1562 */
1563 cur = segp;
1564 while ((cur > path) && (cur[-1] != '/'))
1565 --cur;
1566 }
1567 out[0] = '\0';
1568
1569 /*
1570 * g) If the resulting buffer string still begins with one or more
1571 * complete path segments of "..", then the reference is
1572 * considered to be in error. Implementations may handle this
1573 * error by retaining these components in the resolved path (i.e.,
1574 * treating them as part of the final URI), by removing them from
1575 * the resolved path (i.e., discarding relative levels above the
1576 * root), or by avoiding traversal of the reference.
1577 *
1578 * We discard them from the final path.
1579 */
1580 if (path[0] == '/') {
1581 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001582 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +00001583 && ((cur[3] == '/') || (cur[3] == '\0')))
1584 cur += 3;
1585
1586 if (cur != path) {
1587 out = path;
1588 while (cur[0] != '\0')
1589 (out++)[0] = (cur++)[0];
1590 out[0] = 0;
1591 }
1592 }
1593
1594 return(0);
1595}
Owen Taylor3473f882001-02-23 17:55:21 +00001596
Daniel Veillard966a31e2004-05-09 02:58:44 +00001597static int is_hex(char c) {
1598 if (((c >= '0') && (c <= '9')) ||
1599 ((c >= 'a') && (c <= 'f')) ||
1600 ((c >= 'A') && (c <= 'F')))
1601 return(1);
1602 return(0);
1603}
1604
Owen Taylor3473f882001-02-23 17:55:21 +00001605/**
1606 * xmlURIUnescapeString:
1607 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +00001608 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001609 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001610 *
Daniel Veillarda44294f2007-04-24 08:57:54 +00001611 * Unescaping routine, but does not check that the string is an URI. The
1612 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +00001613 * Note that the length of the result can only be smaller or same size as
1614 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +00001615 *
Daniel Veillard79187652007-04-24 10:19:52 +00001616 * Returns a copy of the string, but unescaped, will return NULL only in case
1617 * of error
Owen Taylor3473f882001-02-23 17:55:21 +00001618 */
1619char *
1620xmlURIUnescapeString(const char *str, int len, char *target) {
1621 char *ret, *out;
1622 const char *in;
1623
1624 if (str == NULL)
1625 return(NULL);
1626 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +00001627 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001628
1629 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001630 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00001631 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001632 xmlURIErrMemory("unescaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001633 return(NULL);
1634 }
1635 } else
1636 ret = target;
1637 in = str;
1638 out = ret;
1639 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +00001640 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001641 int c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001642 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001643 if ((*in >= '0') && (*in <= '9'))
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001644 c = (*in - '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001645 else if ((*in >= 'a') && (*in <= 'f'))
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001646 c = (*in - 'a') + 10;
Owen Taylor3473f882001-02-23 17:55:21 +00001647 else if ((*in >= 'A') && (*in <= 'F'))
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001648 c = (*in - 'A') + 10;
Owen Taylor3473f882001-02-23 17:55:21 +00001649 in++;
Daniel Veillard57560382012-07-24 11:44:23 +08001650 if ((*in >= '0') && (*in <= '9'))
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001651 c = c * 16 + (*in - '0');
Owen Taylor3473f882001-02-23 17:55:21 +00001652 else if ((*in >= 'a') && (*in <= 'f'))
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001653 c = c * 16 + (*in - 'a') + 10;
Owen Taylor3473f882001-02-23 17:55:21 +00001654 else if ((*in >= 'A') && (*in <= 'F'))
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001655 c = c * 16 + (*in - 'A') + 10;
Owen Taylor3473f882001-02-23 17:55:21 +00001656 in++;
1657 len -= 3;
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001658 *out++ = (char) c;
Owen Taylor3473f882001-02-23 17:55:21 +00001659 } else {
1660 *out++ = *in++;
1661 len--;
1662 }
1663 }
1664 *out = 0;
1665 return(ret);
1666}
1667
1668/**
Daniel Veillard8514c672001-05-23 10:29:12 +00001669 * xmlURIEscapeStr:
1670 * @str: string to escape
1671 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +00001672 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001673 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1674 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +00001675 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001676 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +00001677 */
1678xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +00001679xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1680 xmlChar *ret, ch;
Daniel Veillarded86dc22008-04-24 11:58:41 +00001681 xmlChar *temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001682 const xmlChar *in;
Daniel Veillard57560382012-07-24 11:44:23 +08001683 int len, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001684
1685 if (str == NULL)
1686 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +00001687 if (str[0] == 0)
1688 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +00001689 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +00001690 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001691
1692 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001693 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001694 if (ret == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001695 xmlURIErrMemory("escaping URI value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001696 return(NULL);
1697 }
1698 in = (const xmlChar *) str;
1699 out = 0;
1700 while(*in != 0) {
1701 if (len - out <= 3) {
Daniel Veillard57560382012-07-24 11:44:23 +08001702 temp = xmlSaveUriRealloc(ret, &len);
Daniel Veillarded86dc22008-04-24 11:58:41 +00001703 if (temp == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08001704 xmlURIErrMemory("escaping URI value\n");
Daniel Veillarded86dc22008-04-24 11:58:41 +00001705 xmlFree(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001706 return(NULL);
1707 }
Daniel Veillarded86dc22008-04-24 11:58:41 +00001708 ret = temp;
Owen Taylor3473f882001-02-23 17:55:21 +00001709 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001710
1711 ch = *in;
1712
Daniel Veillardeb475a32002-04-14 22:00:22 +00001713 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001714 unsigned char val;
1715 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001716 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001717 if (val <= 9)
1718 ret[out++] = '0' + val;
1719 else
1720 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001721 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001722 if (val <= 9)
1723 ret[out++] = '0' + val;
1724 else
1725 ret[out++] = 'A' + val - 0xA;
1726 in++;
1727 } else {
1728 ret[out++] = *in++;
1729 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001730
Owen Taylor3473f882001-02-23 17:55:21 +00001731 }
1732 ret[out] = 0;
1733 return(ret);
1734}
1735
Daniel Veillard8514c672001-05-23 10:29:12 +00001736/**
1737 * xmlURIEscape:
1738 * @str: the string of the URI to escape
1739 *
1740 * Escaping routine, does not do validity checks !
1741 * It will try to escape the chars needing this, but this is heuristic
1742 * based it's impossible to be sure.
1743 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001744 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001745 *
1746 * 25 May 2001
1747 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1748 * according to RFC2396.
1749 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001750 */
1751xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001752xmlURIEscape(const xmlChar * str)
1753{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001754 xmlChar *ret, *segment = NULL;
1755 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001756 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001757
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001758 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001759 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001760
1761 uri = xmlCreateURI();
1762 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001763 /*
1764 * Allow escaping errors in the unescaped form
1765 */
1766 uri->cleanup = 1;
1767 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001768 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001769 xmlFreeURI(uri);
1770 return (NULL);
1771 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001772 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001773
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001774 if (!uri)
1775 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001776
1777 ret = NULL;
1778
Elliott Hughesdaae4d52020-11-09 14:19:21 -08001779#define NULLCHK(p) if(!p) { \
1780 xmlURIErrMemory("escaping URI value\n"); \
1781 xmlFreeURI(uri); \
1782 xmlFree(ret); \
1783 return NULL; } \
1784
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001785 if (uri->scheme) {
1786 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1787 NULLCHK(segment)
1788 ret = xmlStrcat(ret, segment);
1789 ret = xmlStrcat(ret, BAD_CAST ":");
1790 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001791 }
1792
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001793 if (uri->authority) {
1794 segment =
1795 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1796 NULLCHK(segment)
1797 ret = xmlStrcat(ret, BAD_CAST "//");
1798 ret = xmlStrcat(ret, segment);
1799 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001800 }
1801
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001802 if (uri->user) {
1803 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1804 NULLCHK(segment)
Elliott Hughesdaae4d52020-11-09 14:19:21 -08001805 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001806 ret = xmlStrcat(ret, segment);
1807 ret = xmlStrcat(ret, BAD_CAST "@");
1808 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001809 }
1810
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001811 if (uri->server) {
1812 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1813 NULLCHK(segment)
Elliott Hughesdaae4d52020-11-09 14:19:21 -08001814 if (uri->user == NULL)
1815 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001816 ret = xmlStrcat(ret, segment);
1817 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001818 }
1819
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001820 if (uri->port) {
1821 xmlChar port[10];
1822
Daniel Veillard43d3f612001-11-10 11:57:23 +00001823 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001824 ret = xmlStrcat(ret, BAD_CAST ":");
1825 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001826 }
1827
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001828 if (uri->path) {
1829 segment =
1830 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1831 NULLCHK(segment)
1832 ret = xmlStrcat(ret, segment);
1833 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001834 }
1835
Daniel Veillarda1413b82007-04-26 08:33:28 +00001836 if (uri->query_raw) {
1837 ret = xmlStrcat(ret, BAD_CAST "?");
1838 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1839 }
1840 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001841 segment =
1842 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1843 NULLCHK(segment)
1844 ret = xmlStrcat(ret, BAD_CAST "?");
1845 ret = xmlStrcat(ret, segment);
1846 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001847 }
1848
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001849 if (uri->opaque) {
1850 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1851 NULLCHK(segment)
1852 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001853 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001854 }
1855
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001856 if (uri->fragment) {
1857 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1858 NULLCHK(segment)
1859 ret = xmlStrcat(ret, BAD_CAST "#");
1860 ret = xmlStrcat(ret, segment);
1861 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001862 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001863
1864 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001865#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001866
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001867 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001868}
1869
Owen Taylor3473f882001-02-23 17:55:21 +00001870/************************************************************************
1871 * *
Owen Taylor3473f882001-02-23 17:55:21 +00001872 * Public functions *
1873 * *
1874 ************************************************************************/
1875
1876/**
1877 * xmlBuildURI:
1878 * @URI: the URI instance found in the document
1879 * @base: the base value
1880 *
1881 * Computes he final URI of the reference done by checking that
1882 * the given URI is valid, and building the final URI using the
Daniel Veillard57560382012-07-24 11:44:23 +08001883 * base URI. This is processed according to section 5.2 of the
Owen Taylor3473f882001-02-23 17:55:21 +00001884 * RFC 2396
1885 *
1886 * 5.2. Resolving Relative References to Absolute Form
1887 *
1888 * Returns a new URI string (to be freed by the caller) or NULL in case
1889 * of error.
1890 */
1891xmlChar *
1892xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1893 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001894 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001895 xmlURIPtr ref = NULL;
1896 xmlURIPtr bas = NULL;
1897 xmlURIPtr res = NULL;
1898
1899 /*
1900 * 1) The URI reference is parsed into the potential four components and
1901 * fragment identifier, as described in Section 4.3.
1902 *
1903 * NOTE that a completely empty URI is treated by modern browsers
1904 * as a reference to "." rather than as a synonym for the current
1905 * URI. Should we do that here?
1906 */
Daniel Veillard57560382012-07-24 11:44:23 +08001907 if (URI == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001908 ret = -1;
1909 else {
1910 if (*URI) {
1911 ref = xmlCreateURI();
1912 if (ref == NULL)
1913 goto done;
1914 ret = xmlParseURIReference(ref, (const char *) URI);
1915 }
1916 else
1917 ret = 0;
1918 }
1919 if (ret != 0)
1920 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001921 if ((ref != NULL) && (ref->scheme != NULL)) {
1922 /*
1923 * The URI is absolute don't modify.
1924 */
1925 val = xmlStrdup(URI);
1926 goto done;
1927 }
Owen Taylor3473f882001-02-23 17:55:21 +00001928 if (base == NULL)
1929 ret = -1;
1930 else {
1931 bas = xmlCreateURI();
1932 if (bas == NULL)
1933 goto done;
1934 ret = xmlParseURIReference(bas, (const char *) base);
1935 }
1936 if (ret != 0) {
1937 if (ref)
1938 val = xmlSaveUri(ref);
1939 goto done;
1940 }
1941 if (ref == NULL) {
1942 /*
1943 * the base fragment must be ignored
1944 */
1945 if (bas->fragment != NULL) {
1946 xmlFree(bas->fragment);
1947 bas->fragment = NULL;
1948 }
1949 val = xmlSaveUri(bas);
1950 goto done;
1951 }
1952
1953 /*
1954 * 2) If the path component is empty and the scheme, authority, and
1955 * query components are undefined, then it is a reference to the
1956 * current document and we are done. Otherwise, the reference URI's
1957 * query and fragment components are defined as found (or not found)
1958 * within the URI reference and not inherited from the base URI.
1959 *
1960 * NOTE that in modern browsers, the parsing differs from the above
1961 * in the following aspect: the query component is allowed to be
1962 * defined while still treating this as a reference to the current
1963 * document.
1964 */
1965 res = xmlCreateURI();
1966 if (res == NULL)
1967 goto done;
1968 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1969 ((ref->authority == NULL) && (ref->server == NULL))) {
1970 if (bas->scheme != NULL)
1971 res->scheme = xmlMemStrdup(bas->scheme);
1972 if (bas->authority != NULL)
1973 res->authority = xmlMemStrdup(bas->authority);
Daniel Veillard3daee3f2017-08-28 21:12:14 +02001974 else if ((bas->server != NULL) || (bas->port == -1)) {
1975 if (bas->server != NULL)
1976 res->server = xmlMemStrdup(bas->server);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 if (bas->user != NULL)
1978 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08001979 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00001980 }
1981 if (bas->path != NULL)
1982 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001983 if (ref->query_raw != NULL)
1984 res->query_raw = xmlMemStrdup (ref->query_raw);
1985 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001986 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001987 else if (bas->query_raw != NULL)
1988 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001989 else if (bas->query != NULL)
1990 res->query = xmlMemStrdup(bas->query);
1991 if (ref->fragment != NULL)
1992 res->fragment = xmlMemStrdup(ref->fragment);
1993 goto step_7;
1994 }
Owen Taylor3473f882001-02-23 17:55:21 +00001995
1996 /*
1997 * 3) If the scheme component is defined, indicating that the reference
1998 * starts with a scheme name, then the reference is interpreted as an
1999 * absolute URI and we are done. Otherwise, the reference URI's
2000 * scheme is inherited from the base URI's scheme component.
2001 */
2002 if (ref->scheme != NULL) {
2003 val = xmlSaveUri(ref);
2004 goto done;
2005 }
2006 if (bas->scheme != NULL)
2007 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard57560382012-07-24 11:44:23 +08002008
Daniel Veillarda1413b82007-04-26 08:33:28 +00002009 if (ref->query_raw != NULL)
2010 res->query_raw = xmlMemStrdup(ref->query_raw);
2011 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002012 res->query = xmlMemStrdup(ref->query);
2013 if (ref->fragment != NULL)
2014 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002015
2016 /*
2017 * 4) If the authority component is defined, then the reference is a
2018 * network-path and we skip to step 7. Otherwise, the reference
2019 * URI's authority is inherited from the base URI's authority
2020 * component, which will also be undefined if the URI scheme does not
2021 * use an authority component.
2022 */
2023 if ((ref->authority != NULL) || (ref->server != NULL)) {
2024 if (ref->authority != NULL)
2025 res->authority = xmlMemStrdup(ref->authority);
2026 else {
2027 res->server = xmlMemStrdup(ref->server);
2028 if (ref->user != NULL)
2029 res->user = xmlMemStrdup(ref->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002030 res->port = ref->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 }
2032 if (ref->path != NULL)
2033 res->path = xmlMemStrdup(ref->path);
2034 goto step_7;
2035 }
2036 if (bas->authority != NULL)
2037 res->authority = xmlMemStrdup(bas->authority);
Daniel Veillard3daee3f2017-08-28 21:12:14 +02002038 else if ((bas->server != NULL) || (bas->port == -1)) {
2039 if (bas->server != NULL)
2040 res->server = xmlMemStrdup(bas->server);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (bas->user != NULL)
2042 res->user = xmlMemStrdup(bas->user);
Daniel Veillard57560382012-07-24 11:44:23 +08002043 res->port = bas->port;
Owen Taylor3473f882001-02-23 17:55:21 +00002044 }
2045
2046 /*
2047 * 5) If the path component begins with a slash character ("/"), then
2048 * the reference is an absolute-path and we skip to step 7.
2049 */
2050 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2051 res->path = xmlMemStrdup(ref->path);
2052 goto step_7;
2053 }
2054
2055
2056 /*
2057 * 6) If this step is reached, then we are resolving a relative-path
2058 * reference. The relative path needs to be merged with the base
2059 * URI's path. Although there are many ways to do this, we will
2060 * describe a simple method using a separate string buffer.
2061 *
2062 * Allocate a buffer large enough for the result string.
2063 */
2064 len = 2; /* extra / and 0 */
2065 if (ref->path != NULL)
2066 len += strlen(ref->path);
2067 if (bas->path != NULL)
2068 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002069 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002070 if (res->path == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002071 xmlURIErrMemory("resolving URI against base\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002072 goto done;
2073 }
2074 res->path[0] = 0;
2075
2076 /*
2077 * a) All but the last segment of the base URI's path component is
2078 * copied to the buffer. In other words, any characters after the
2079 * last (right-most) slash character, if any, are excluded.
2080 */
2081 cur = 0;
2082 out = 0;
2083 if (bas->path != NULL) {
2084 while (bas->path[cur] != 0) {
2085 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2086 cur++;
2087 if (bas->path[cur] == 0)
2088 break;
2089
2090 cur++;
2091 while (out < cur) {
2092 res->path[out] = bas->path[out];
2093 out++;
2094 }
2095 }
2096 }
2097 res->path[out] = 0;
2098
2099 /*
2100 * b) The reference's path component is appended to the buffer
2101 * string.
2102 */
2103 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002104 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002105 /*
2106 * Ensure the path includes a '/'
2107 */
2108 if ((out == 0) && (bas->server != NULL))
2109 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002110 while (ref->path[indx] != 0) {
2111 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002112 }
2113 }
2114 res->path[out] = 0;
2115
2116 /*
2117 * Steps c) to h) are really path normalization steps
2118 */
2119 xmlNormalizeURIPath(res->path);
2120
2121step_7:
2122
2123 /*
2124 * 7) The resulting URI components, including any inherited from the
2125 * base URI, are recombined to give the absolute form of the URI
2126 * reference.
2127 */
2128 val = xmlSaveUri(res);
2129
2130done:
2131 if (ref != NULL)
2132 xmlFreeURI(ref);
2133 if (bas != NULL)
2134 xmlFreeURI(bas);
2135 if (res != NULL)
2136 xmlFreeURI(res);
2137 return(val);
2138}
2139
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002140/**
William M. Brackf7789b12004-06-07 08:57:27 +00002141 * xmlBuildRelativeURI:
2142 * @URI: the URI reference under consideration
2143 * @base: the base value
2144 *
2145 * Expresses the URI of the reference in terms relative to the
2146 * base. Some examples of this operation include:
2147 * base = "http://site1.com/docs/book1.html"
2148 * URI input URI returned
2149 * docs/pic1.gif pic1.gif
2150 * docs/img/pic1.gif img/pic1.gif
2151 * img/pic1.gif ../img/pic1.gif
2152 * http://site1.com/docs/pic1.gif pic1.gif
2153 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2154 *
2155 * base = "docs/book1.html"
2156 * URI input URI returned
2157 * docs/pic1.gif pic1.gif
2158 * docs/img/pic1.gif img/pic1.gif
2159 * img/pic1.gif ../img/pic1.gif
2160 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2161 *
2162 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002163 * Note: if the URI reference is really weird or complicated, it may be
William M. Brackf7789b12004-06-07 08:57:27 +00002164 * worthwhile to first convert it into a "nice" one by calling
2165 * xmlBuildURI (using 'base') before calling this routine,
2166 * since this routine (for reasonable efficiency) assumes URI has
2167 * already been through some validation.
2168 *
2169 * Returns a new URI string (to be freed by the caller) or NULL in case
2170 * error.
2171 */
2172xmlChar *
2173xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2174{
2175 xmlChar *val = NULL;
2176 int ret;
2177 int ix;
William M. Brackf7789b12004-06-07 08:57:27 +00002178 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002179 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002180 xmlURIPtr ref = NULL;
2181 xmlURIPtr bas = NULL;
2182 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002183 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002184
2185 if ((URI == NULL) || (*URI == 0))
2186 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002187
2188 /*
2189 * First parse URI into a standard form
2190 */
2191 ref = xmlCreateURI ();
2192 if (ref == NULL)
2193 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002194 /* If URI not already in "relative" form */
2195 if (URI[0] != '.') {
2196 ret = xmlParseURIReference (ref, (const char *) URI);
2197 if (ret != 0)
2198 goto done; /* Error in URI, return NULL */
2199 } else
2200 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002201
2202 /*
2203 * Next parse base into the same standard form
2204 */
2205 if ((base == NULL) || (*base == 0)) {
2206 val = xmlStrdup (URI);
2207 goto done;
2208 }
2209 bas = xmlCreateURI ();
2210 if (bas == NULL)
2211 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002212 if (base[0] != '.') {
2213 ret = xmlParseURIReference (bas, (const char *) base);
2214 if (ret != 0)
2215 goto done; /* Error in base, return NULL */
2216 } else
2217 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002218
2219 /*
2220 * If the scheme / server on the URI differs from the base,
2221 * just return the URI
2222 */
2223 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002224 ((bas->scheme == NULL) ||
2225 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2226 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002227 val = xmlStrdup (URI);
2228 goto done;
2229 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002230 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2231 val = xmlStrdup(BAD_CAST "");
2232 goto done;
2233 }
2234 if (bas->path == NULL) {
2235 val = xmlStrdup((xmlChar *)ref->path);
2236 goto done;
2237 }
2238 if (ref->path == NULL) {
2239 ref->path = (char *) "/";
2240 remove_path = 1;
2241 }
William M. Brackf7789b12004-06-07 08:57:27 +00002242
2243 /*
2244 * At this point (at last!) we can compare the two paths
2245 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002246 * First we take care of the special case where either of the
2247 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002248 */
2249 bptr = (xmlChar *)bas->path;
Elliott Hughes7fbecab2019-01-10 16:42:03 -08002250 {
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002251 xmlChar *rptr = (xmlChar *) ref->path;
2252 int pos = 0;
2253
2254 /*
2255 * Next we compare the two strings and find where they first differ
2256 */
2257 if ((*rptr == '.') && (rptr[1] == '/'))
2258 rptr += 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002259 if ((*bptr == '.') && (bptr[1] == '/'))
2260 bptr += 2;
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002261 else if ((*bptr == '/') && (*rptr != '/'))
William M. Brack820d5ed2005-09-14 05:24:27 +00002262 bptr++;
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002263 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
William M. Brack820d5ed2005-09-14 05:24:27 +00002264 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002265
Nick Wellnhofer91e54962017-06-08 18:25:30 +02002266 if (bptr[pos] == rptr[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002267 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002268 goto done; /* (I can't imagine why anyone would do this) */
2269 }
2270
2271 /*
2272 * In URI, "back up" to the last '/' encountered. This will be the
2273 * beginning of the "unique" suffix of URI
2274 */
2275 ix = pos;
William M. Brack820d5ed2005-09-14 05:24:27 +00002276 for (; ix > 0; ix--) {
Elliott Hughes7fbecab2019-01-10 16:42:03 -08002277 if (rptr[ix - 1] == '/')
William M. Brack820d5ed2005-09-14 05:24:27 +00002278 break;
2279 }
Elliott Hughes7fbecab2019-01-10 16:42:03 -08002280 uptr = (xmlChar *)&rptr[ix];
William M. Brack820d5ed2005-09-14 05:24:27 +00002281
2282 /*
2283 * In base, count the number of '/' from the differing point
2284 */
Elliott Hughes7fbecab2019-01-10 16:42:03 -08002285 for (; bptr[ix] != 0; ix++) {
2286 if (bptr[ix] == '/')
2287 nbslash++;
William M. Brack820d5ed2005-09-14 05:24:27 +00002288 }
Elliott Hughes7fbecab2019-01-10 16:42:03 -08002289
2290 /*
2291 * e.g: URI="foo/" base="foo/bar" -> "./"
2292 */
2293 if (nbslash == 0 && !uptr[0]) {
2294 val = xmlStrdup(BAD_CAST "./");
2295 goto done;
2296 }
2297
William M. Brack820d5ed2005-09-14 05:24:27 +00002298 len = xmlStrlen (uptr) + 1;
2299 }
Daniel Veillard57560382012-07-24 11:44:23 +08002300
William M. Brackf7789b12004-06-07 08:57:27 +00002301 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002302 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002303 /* exception characters from xmlSaveUri */
2304 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002305 goto done;
2306 }
William M. Brackf7789b12004-06-07 08:57:27 +00002307
2308 /*
2309 * Allocate just enough space for the returned string -
2310 * length of the remainder of the URI, plus enough space
2311 * for the "../" groups, plus one for the terminator
2312 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002313 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002314 if (val == NULL) {
Daniel Veillard57560382012-07-24 11:44:23 +08002315 xmlURIErrMemory("building relative URI\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002316 goto done;
2317 }
2318 vptr = val;
2319 /*
2320 * Put in as many "../" as needed
2321 */
2322 for (; nbslash>0; nbslash--) {
2323 *vptr++ = '.';
2324 *vptr++ = '.';
2325 *vptr++ = '/';
2326 }
2327 /*
2328 * Finish up with the end of the URI
2329 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002330 if (uptr != NULL) {
2331 if ((vptr > val) && (len > 0) &&
2332 (uptr[0] == '/') && (vptr[-1] == '/')) {
2333 memcpy (vptr, uptr + 1, len - 1);
2334 vptr[len - 2] = 0;
2335 } else {
2336 memcpy (vptr, uptr, len);
2337 vptr[len - 1] = 0;
2338 }
2339 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002340 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002341 }
William M. Brackf7789b12004-06-07 08:57:27 +00002342
William M. Brack50420192007-07-20 01:09:08 +00002343 /* escape the freshly-built path */
2344 vptr = val;
2345 /* exception characters from xmlSaveUri */
2346 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2347 xmlFree(vptr);
2348
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002349done:
William M. Brackf7789b12004-06-07 08:57:27 +00002350 /*
2351 * Free the working variables
2352 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002353 if (remove_path != 0)
2354 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002355 if (ref != NULL)
2356 xmlFreeURI (ref);
2357 if (bas != NULL)
2358 xmlFreeURI (bas);
2359
2360 return val;
2361}
2362
2363/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002364 * xmlCanonicPath:
2365 * @path: the resource locator in a filesystem notation
2366 *
Daniel Veillard57560382012-07-24 11:44:23 +08002367 * Constructs a canonic path from the specified path.
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002368 *
Daniel Veillard57560382012-07-24 11:44:23 +08002369 * Returns a new canonic path, or a duplicate of the path parameter if the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002370 * construction fails. The caller is responsible for freeing the memory occupied
Daniel Veillard57560382012-07-24 11:44:23 +08002371 * by the returned string. If there is insufficient memory available, or the
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002372 * argument is NULL, the function returns NULL.
2373 */
Daniel Veillard57560382012-07-24 11:44:23 +08002374#define IS_WINDOWS_PATH(p) \
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002375 ((p != NULL) && \
2376 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2377 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2378 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002379xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002380xmlCanonicPath(const xmlChar *path)
2381{
William M. Brack22242272007-01-27 07:59:37 +00002382/*
2383 * For Windows implementations, additional work needs to be done to
2384 * replace backslashes in pathnames with "forward slashes"
2385 */
Daniel Veillard57560382012-07-24 11:44:23 +08002386#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002387 int len = 0;
Nick Wellnhofer41c0a132017-10-09 13:32:20 +02002388 char *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002389#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002390 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002391 xmlChar *ret;
2392 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002393
2394 if (path == NULL)
2395 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002396
Michael Stahl55b899a2012-09-07 12:14:00 +08002397#if defined(_WIN32)
2398 /*
2399 * We must not change the backslashes to slashes if the the path
2400 * starts with \\?\
2401 * Those paths can be up to 32k characters long.
2402 * Was added specifically for OpenOffice, those paths can't be converted
2403 * to URIs anyway.
2404 */
2405 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2406 (path[3] == '\\') )
2407 return xmlStrdup((const xmlChar *) path);
2408#endif
2409
2410 /* sanitize filename starting with // so it can be used as URI */
Daniel Veillard69f8a132008-02-05 08:37:56 +00002411 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2412 path++;
2413
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002414 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002415 xmlFreeURI(uri);
2416 return xmlStrdup(path);
2417 }
2418
William M. Brack22242272007-01-27 07:59:37 +00002419 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002420 absuri = xmlStrstr(path, BAD_CAST "://");
2421 if (absuri != NULL) {
2422 int l, j;
2423 unsigned char c;
2424 xmlChar *escURI;
2425
2426 /*
2427 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002428 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002429 * part matches a protocol.
2430 */
2431 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002432 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002433 if ((l <= 0) || (l > 20))
2434 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002435 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002436 for (j = 0;j < l;j++) {
2437 c = path[j];
2438 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2439 goto path_processing;
2440 }
2441
William M. Brack22242272007-01-27 07:59:37 +00002442 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002443 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2444 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002445 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002446 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002447 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002448 if (uri != NULL) {
2449 xmlFreeURI(uri);
2450 return escURI;
2451 }
Nick Wellnhoferd6b36452017-05-27 14:44:36 +02002452 xmlFree(escURI);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002453 }
2454 }
2455
2456path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002457/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard57560382012-07-24 11:44:23 +08002458#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard336a8e12005-08-07 10:46:19 +00002459 /*
William M. Brack22242272007-01-27 07:59:37 +00002460 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002461 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002462 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002463 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002464 return(NULL);
2465 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002466
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002467 len = xmlStrlen(path);
2468 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002469 /* make the scheme 'file' */
Nick Wellnhofer41c0a132017-10-09 13:32:20 +02002470 uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002471 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002472 uri->path = xmlMallocAtomic(len + 2);
2473 if (uri->path == NULL) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002474 xmlFreeURI(uri); /* Guard against 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002475 return(NULL);
2476 }
William M. Brack22242272007-01-27 07:59:37 +00002477 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002478 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002479 p = uri->path + 1;
Nick Wellnhofer41c0a132017-10-09 13:32:20 +02002480 strncpy(p, (char *) path, len + 1);
Igor Zlatkovicce076162003-02-23 13:39:39 +00002481 } else {
Nick Wellnhofer41c0a132017-10-09 13:32:20 +02002482 uri->path = (char *) xmlStrdup(path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002483 if (uri->path == NULL) {
2484 xmlFreeURI(uri);
2485 return(NULL);
2486 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002487 p = uri->path;
2488 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002489 /* Now change all occurrences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002490 while (*p != '\0') {
2491 if (*p == '\\')
2492 *p = '/';
2493 p++;
2494 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002495
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002496 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002497 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002498 } else {
2499 ret = xmlSaveUri(uri);
2500 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002501
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002502 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002503#else
2504 ret = xmlStrdup((const xmlChar *) path);
2505#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002506 return(ret);
2507}
Owen Taylor3473f882001-02-23 17:55:21 +00002508
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002509/**
2510 * xmlPathToURI:
2511 * @path: the resource locator in a filesystem notation
2512 *
2513 * Constructs an URI expressing the existing path
2514 *
Daniel Veillard57560382012-07-24 11:44:23 +08002515 * Returns a new URI, or a duplicate of the path parameter if the
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002516 * construction fails. The caller is responsible for freeing the memory
2517 * occupied by the returned string. If there is insufficient memory available,
2518 * or the argument is NULL, the function returns NULL.
2519 */
2520xmlChar *
2521xmlPathToURI(const xmlChar *path)
2522{
2523 xmlURIPtr uri;
2524 xmlURI temp;
2525 xmlChar *ret, *cal;
2526
2527 if (path == NULL)
2528 return(NULL);
2529
2530 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2531 xmlFreeURI(uri);
2532 return xmlStrdup(path);
2533 }
2534 cal = xmlCanonicPath(path);
2535 if (cal == NULL)
2536 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002537#if defined(_WIN32) && !defined(__CYGWIN__)
Daniel Veillard57560382012-07-24 11:44:23 +08002538 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002539 If 'cal' is a valid URI already then we are done here, as continuing would make
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002540 it invalid. */
2541 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2542 xmlFreeURI(uri);
2543 return cal;
2544 }
2545 /* 'cal' can contain a relative path with backslashes. If that is processed
2546 by xmlSaveURI, they will be escaped and the external entity loader machinery
2547 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2548 ret = cal;
2549 while (*ret != '\0') {
2550 if (*ret == '\\')
2551 *ret = '/';
2552 ret++;
2553 }
2554#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002555 memset(&temp, 0, sizeof(temp));
2556 temp.path = (char *) cal;
2557 ret = xmlSaveUri(&temp);
2558 xmlFree(cal);
2559 return(ret);
2560}