blob: d7f32fcfa4a96723bec05700bc3446dc75a4e28f [file] [log] [blame]
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001/**
Selim Gurun94442ad2013-12-30 18:23:42 -08002 * uri.c: set of generic URI related routines
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08003 *
Patrick Scott60a4c352009-07-09 09:30:54 -04004 * Reference: RFCs 3986, 2732 and 2373
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08005 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
18#include <libxml/globals.h>
19#include <libxml/xmlerror.h>
20
Selim Gurun94442ad2013-12-30 18:23:42 -080021/**
22 * MAX_URI_LENGTH:
23 *
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
32 */
33#define MAX_URI_LENGTH 1024 * 1024
34
35static void
36xmlURIErrMemory(const char *extra)
37{
38 if (extra)
39 __xmlRaiseError(NULL, NULL, NULL,
40 NULL, NULL, XML_FROM_URI,
41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42 extra, NULL, NULL, 0, 0,
43 "Memory allocation failed : %s\n", extra);
44 else
45 __xmlRaiseError(NULL, NULL, NULL,
46 NULL, NULL, XML_FROM_URI,
47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48 NULL, NULL, NULL, 0, 0,
49 "Memory allocation failed\n");
50}
51
Patrick Scott60a4c352009-07-09 09:30:54 -040052static void xmlCleanURI(xmlURIPtr uri);
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -080053
54/*
Patrick Scott60a4c352009-07-09 09:30:54 -040055 * Old rule from 2396 used in legacy handling code
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -080056 * alpha = lowalpha | upalpha
57 */
58#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61/*
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
65 */
66
67#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69/*
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
73 */
74#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76#ifdef IS_DIGIT
77#undef IS_DIGIT
78#endif
79/*
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81 */
82#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84/*
85 * alphanum = alpha | digit
86 */
87
88#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90/*
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -080091 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92 */
93
Patrick Scott60a4c352009-07-09 09:30:54 -040094#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -080096 ((x) == '(') || ((x) == ')'))
97
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -080098/*
Patrick Scott60a4c352009-07-09 09:30:54 -040099 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -0800100 */
101
Patrick Scott60a4c352009-07-09 09:30:54 -0400102#define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
106/*
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108 * "[" | "]"
109 */
110
111#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114 ((x) == ']'))
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -0800115
116/*
117 * unreserved = alphanum | mark
118 */
119
120#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122/*
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -0800123 * Skip to next pointer char, handle escaped sequences
124 */
125
126#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128/*
129 * Productions from the spec.
130 *
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
134 *
135 * path = [ abs_path | opaque_part ]
136 */
137
138#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
140/************************************************************************
141 * *
Patrick Scott60a4c352009-07-09 09:30:54 -0400142 * RFC 3986 parser *
143 * *
144 ************************************************************************/
145
146#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149#define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
152
153/*
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
156 */
157#define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
Selim Gurundf143a52012-03-05 14:35:53 -0800161 ((*(p) == '=')) || ((*(p) == '\'')))
Patrick Scott60a4c352009-07-09 09:30:54 -0400162
163/*
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165 */
166#define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
169 ((*(p) == '@')))
170
171/*
172 * reserved = gen-delims / sub-delims
173 */
174#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176/*
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178 */
179#define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183/*
184 * pct-encoded = "%" HEXDIG HEXDIG
185 */
186#define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189/*
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191 */
192#define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
195
196/**
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
200 *
201 * Parse an URI scheme
202 *
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204 *
205 * Returns 0 or the error code
206 */
207static int
208xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209 const char *cur;
210
211 if (str == NULL)
212 return(-1);
213
214 cur = *str;
215 if (!ISA_ALPHA(cur))
216 return(2);
217 cur++;
218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220 if (uri != NULL) {
221 if (uri->scheme != NULL) xmlFree(uri->scheme);
222 uri->scheme = STRNDUP(*str, cur - *str);
223 }
224 *str = cur;
225 return(0);
226}
227
228/**
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
232 *
233 * Parse the query part of an URI
234 *
235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
240 *
241 * Returns 0 or the error code
242 */
243static int
244xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245{
246 const char *cur;
247
248 if (str == NULL)
249 return (-1);
250
251 cur = *str;
252
253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254 (*cur == '[') || (*cur == ']') ||
255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256 NEXT(cur);
257 if (uri != NULL) {
258 if (uri->fragment != NULL)
259 xmlFree(uri->fragment);
260 if (uri->cleanup & 2)
261 uri->fragment = STRNDUP(*str, cur - *str);
262 else
263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264 }
265 *str = cur;
266 return (0);
267}
268
269/**
270 * xmlParse3986Query:
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
273 *
274 * Parse the query part of an URI
275 *
276 * query = *uric
277 *
278 * Returns 0 or the error code
279 */
280static int
281xmlParse3986Query(xmlURIPtr uri, const char **str)
282{
283 const char *cur;
284
285 if (str == NULL)
286 return (-1);
287
288 cur = *str;
289
290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292 NEXT(cur);
293 if (uri != NULL) {
294 if (uri->query != NULL)
295 xmlFree(uri->query);
296 if (uri->cleanup & 2)
297 uri->query = STRNDUP(*str, cur - *str);
298 else
299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 */
304 if (uri->query_raw != NULL)
305 xmlFree (uri->query_raw);
306 uri->query_raw = STRNDUP (*str, cur - *str);
307 }
308 *str = cur;
309 return (0);
310}
311
312/**
313 * xmlParse3986Port:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
Xin Lie742c3a2017-03-02 10:59:49 -0800317 * Parse a port part and fills in the appropriate fields
Patrick Scott60a4c352009-07-09 09:30:54 -0400318 * of the @uri structure
319 *
320 * port = *DIGIT
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Port(xmlURIPtr uri, const char **str)
326{
327 const char *cur = *str;
Xin Lie742c3a2017-03-02 10:59:49 -0800328 unsigned port = 0; /* unsigned for defined overflow behavior */
Patrick Scott60a4c352009-07-09 09:30:54 -0400329
330 if (ISA_DIGIT(cur)) {
Patrick Scott60a4c352009-07-09 09:30:54 -0400331 while (ISA_DIGIT(cur)) {
Xin Lie742c3a2017-03-02 10:59:49 -0800332 port = port * 10 + (*cur - '0');
333
Patrick Scott60a4c352009-07-09 09:30:54 -0400334 cur++;
335 }
Xin Lie742c3a2017-03-02 10:59:49 -0800336 if (uri != NULL)
337 uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
Patrick Scott60a4c352009-07-09 09:30:54 -0400338 *str = cur;
339 return(0);
340 }
341 return(1);
342}
343
344/**
345 * xmlParse3986Userinfo:
346 * @uri: pointer to an URI structure
347 * @str: the string to analyze
348 *
349 * Parse an user informations part and fills in the appropriate fields
350 * of the @uri structure
351 *
352 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
353 *
354 * Returns 0 or the error code
355 */
356static int
357xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358{
359 const char *cur;
360
361 cur = *str;
362 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
363 ISA_SUB_DELIM(cur) || (*cur == ':'))
364 NEXT(cur);
365 if (*cur == '@') {
366 if (uri != NULL) {
367 if (uri->user != NULL) xmlFree(uri->user);
368 if (uri->cleanup & 2)
369 uri->user = STRNDUP(*str, cur - *str);
370 else
371 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
372 }
373 *str = cur;
374 return(0);
375 }
376 return(1);
377}
378
379/**
380 * xmlParse3986DecOctet:
381 * @str: the string to analyze
382 *
383 * dec-octet = DIGIT ; 0-9
384 * / %x31-39 DIGIT ; 10-99
385 * / "1" 2DIGIT ; 100-199
386 * / "2" %x30-34 DIGIT ; 200-249
387 * / "25" %x30-35 ; 250-255
388 *
389 * Skip a dec-octet.
390 *
391 * Returns 0 if found and skipped, 1 otherwise
392 */
393static int
394xmlParse3986DecOctet(const char **str) {
395 const char *cur = *str;
396
397 if (!(ISA_DIGIT(cur)))
398 return(1);
399 if (!ISA_DIGIT(cur+1))
400 cur++;
401 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
402 cur += 2;
403 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
404 cur += 3;
405 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
406 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
407 cur += 3;
408 else if ((*cur == '2') && (*(cur + 1) == '5') &&
409 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
410 cur += 3;
411 else
412 return(1);
413 *str = cur;
414 return(0);
415}
416/**
417 * xmlParse3986Host:
418 * @uri: pointer to an URI structure
419 * @str: the string to analyze
420 *
421 * Parse an host part and fills in the appropriate fields
422 * of the @uri structure
423 *
424 * host = IP-literal / IPv4address / reg-name
425 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
426 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
427 * reg-name = *( unreserved / pct-encoded / sub-delims )
428 *
429 * Returns 0 or the error code
430 */
431static int
432xmlParse3986Host(xmlURIPtr uri, const char **str)
433{
434 const char *cur = *str;
435 const char *host;
436
437 host = cur;
438 /*
439 * IPv6 and future adressing scheme are enclosed between brackets
440 */
441 if (*cur == '[') {
442 cur++;
443 while ((*cur != ']') && (*cur != 0))
444 cur++;
445 if (*cur != ']')
446 return(1);
447 cur++;
448 goto found;
449 }
450 /*
451 * try to parse an IPv4
452 */
453 if (ISA_DIGIT(cur)) {
454 if (xmlParse3986DecOctet(&cur) != 0)
455 goto not_ipv4;
456 if (*cur != '.')
457 goto not_ipv4;
458 cur++;
459 if (xmlParse3986DecOctet(&cur) != 0)
460 goto not_ipv4;
461 if (*cur != '.')
462 goto not_ipv4;
463 if (xmlParse3986DecOctet(&cur) != 0)
464 goto not_ipv4;
465 if (*cur != '.')
466 goto not_ipv4;
467 if (xmlParse3986DecOctet(&cur) != 0)
468 goto not_ipv4;
469 goto found;
470not_ipv4:
471 cur = *str;
472 }
473 /*
474 * then this should be a hostname which can be empty
475 */
476 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
477 NEXT(cur);
478found:
479 if (uri != NULL) {
480 if (uri->authority != NULL) xmlFree(uri->authority);
481 uri->authority = NULL;
482 if (uri->server != NULL) xmlFree(uri->server);
483 if (cur != host) {
484 if (uri->cleanup & 2)
485 uri->server = STRNDUP(host, cur - host);
486 else
487 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488 } else
489 uri->server = NULL;
490 }
491 *str = cur;
492 return(0);
493}
494
495/**
496 * xmlParse3986Authority:
497 * @uri: pointer to an URI structure
498 * @str: the string to analyze
499 *
500 * Parse an authority part and fills in the appropriate fields
501 * of the @uri structure
502 *
503 * authority = [ userinfo "@" ] host [ ":" port ]
504 *
505 * Returns 0 or the error code
506 */
507static int
508xmlParse3986Authority(xmlURIPtr uri, const char **str)
509{
510 const char *cur;
511 int ret;
512
513 cur = *str;
514 /*
515 * try to parse an userinfo and check for the trailing @
516 */
517 ret = xmlParse3986Userinfo(uri, &cur);
518 if ((ret != 0) || (*cur != '@'))
519 cur = *str;
520 else
521 cur++;
522 ret = xmlParse3986Host(uri, &cur);
523 if (ret != 0) return(ret);
524 if (*cur == ':') {
525 cur++;
526 ret = xmlParse3986Port(uri, &cur);
527 if (ret != 0) return(ret);
528 }
529 *str = cur;
530 return(0);
531}
532
533/**
534 * xmlParse3986Segment:
535 * @str: the string to analyze
536 * @forbid: an optional forbidden character
537 * @empty: allow an empty segment
538 *
539 * Parse a segment and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * segment = *pchar
543 * segment-nz = 1*pchar
544 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545 * ; non-zero-length segment without any colon ":"
546 *
547 * Returns 0 or the error code
548 */
549static int
550xmlParse3986Segment(const char **str, char forbid, int empty)
551{
552 const char *cur;
553
554 cur = *str;
555 if (!ISA_PCHAR(cur)) {
556 if (empty)
557 return(0);
558 return(1);
559 }
560 while (ISA_PCHAR(cur) && (*cur != forbid))
561 NEXT(cur);
562 *str = cur;
563 return (0);
564}
565
566/**
567 * xmlParse3986PathAbEmpty:
568 * @uri: pointer to an URI structure
569 * @str: the string to analyze
570 *
571 * Parse an path absolute or empty and fills in the appropriate fields
572 * of the @uri structure
573 *
574 * path-abempty = *( "/" segment )
575 *
576 * Returns 0 or the error code
577 */
578static int
579xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580{
581 const char *cur;
582 int ret;
583
584 cur = *str;
585
586 while (*cur == '/') {
587 cur++;
588 ret = xmlParse3986Segment(&cur, 0, 1);
589 if (ret != 0) return(ret);
590 }
591 if (uri != NULL) {
592 if (uri->path != NULL) xmlFree(uri->path);
Selim Gurundf143a52012-03-05 14:35:53 -0800593 if (*str != cur) {
594 if (uri->cleanup & 2)
595 uri->path = STRNDUP(*str, cur - *str);
596 else
597 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
598 } else {
599 uri->path = NULL;
600 }
Patrick Scott60a4c352009-07-09 09:30:54 -0400601 }
602 *str = cur;
603 return (0);
604}
605
606/**
607 * xmlParse3986PathAbsolute:
608 * @uri: pointer to an URI structure
609 * @str: the string to analyze
610 *
611 * Parse an path absolute and fills in the appropriate fields
612 * of the @uri structure
613 *
614 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615 *
616 * Returns 0 or the error code
617 */
618static int
619xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620{
621 const char *cur;
622 int ret;
623
624 cur = *str;
625
626 if (*cur != '/')
627 return(1);
628 cur++;
629 ret = xmlParse3986Segment(&cur, 0, 0);
630 if (ret == 0) {
631 while (*cur == '/') {
632 cur++;
633 ret = xmlParse3986Segment(&cur, 0, 1);
634 if (ret != 0) return(ret);
635 }
636 }
637 if (uri != NULL) {
638 if (uri->path != NULL) xmlFree(uri->path);
Selim Gurundf143a52012-03-05 14:35:53 -0800639 if (cur != *str) {
640 if (uri->cleanup & 2)
641 uri->path = STRNDUP(*str, cur - *str);
642 else
643 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
644 } else {
645 uri->path = NULL;
646 }
Patrick Scott60a4c352009-07-09 09:30:54 -0400647 }
648 *str = cur;
649 return (0);
650}
651
652/**
653 * xmlParse3986PathRootless:
654 * @uri: pointer to an URI structure
655 * @str: the string to analyze
656 *
657 * Parse an path without root and fills in the appropriate fields
658 * of the @uri structure
659 *
660 * path-rootless = segment-nz *( "/" segment )
661 *
662 * Returns 0 or the error code
663 */
664static int
665xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666{
667 const char *cur;
668 int ret;
669
670 cur = *str;
671
672 ret = xmlParse3986Segment(&cur, 0, 0);
673 if (ret != 0) return(ret);
674 while (*cur == '/') {
675 cur++;
676 ret = xmlParse3986Segment(&cur, 0, 1);
677 if (ret != 0) return(ret);
678 }
679 if (uri != NULL) {
680 if (uri->path != NULL) xmlFree(uri->path);
Selim Gurundf143a52012-03-05 14:35:53 -0800681 if (cur != *str) {
682 if (uri->cleanup & 2)
683 uri->path = STRNDUP(*str, cur - *str);
684 else
685 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
686 } else {
687 uri->path = NULL;
688 }
Patrick Scott60a4c352009-07-09 09:30:54 -0400689 }
690 *str = cur;
691 return (0);
692}
693
694/**
695 * xmlParse3986PathNoScheme:
696 * @uri: pointer to an URI structure
697 * @str: the string to analyze
698 *
699 * Parse an path which is not a scheme and fills in the appropriate fields
700 * of the @uri structure
701 *
702 * path-noscheme = segment-nz-nc *( "/" segment )
703 *
704 * Returns 0 or the error code
705 */
706static int
707xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708{
709 const char *cur;
710 int ret;
711
712 cur = *str;
713
714 ret = xmlParse3986Segment(&cur, ':', 0);
715 if (ret != 0) return(ret);
716 while (*cur == '/') {
717 cur++;
718 ret = xmlParse3986Segment(&cur, 0, 1);
719 if (ret != 0) return(ret);
720 }
721 if (uri != NULL) {
722 if (uri->path != NULL) xmlFree(uri->path);
Selim Gurundf143a52012-03-05 14:35:53 -0800723 if (cur != *str) {
724 if (uri->cleanup & 2)
725 uri->path = STRNDUP(*str, cur - *str);
726 else
727 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
728 } else {
729 uri->path = NULL;
730 }
Patrick Scott60a4c352009-07-09 09:30:54 -0400731 }
732 *str = cur;
733 return (0);
734}
735
736/**
737 * xmlParse3986HierPart:
738 * @uri: pointer to an URI structure
739 * @str: the string to analyze
740 *
741 * Parse an hierarchical part and fills in the appropriate fields
742 * of the @uri structure
743 *
744 * hier-part = "//" authority path-abempty
745 * / path-absolute
746 * / path-rootless
747 * / path-empty
748 *
749 * Returns 0 or the error code
750 */
751static int
752xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753{
754 const char *cur;
755 int ret;
756
757 cur = *str;
758
759 if ((*cur == '/') && (*(cur + 1) == '/')) {
760 cur += 2;
761 ret = xmlParse3986Authority(uri, &cur);
762 if (ret != 0) return(ret);
Xin Lie742c3a2017-03-02 10:59:49 -0800763 if (uri->server == NULL)
764 uri->port = -1;
Patrick Scott60a4c352009-07-09 09:30:54 -0400765 ret = xmlParse3986PathAbEmpty(uri, &cur);
766 if (ret != 0) return(ret);
767 *str = cur;
768 return(0);
769 } else if (*cur == '/') {
770 ret = xmlParse3986PathAbsolute(uri, &cur);
771 if (ret != 0) return(ret);
772 } else if (ISA_PCHAR(cur)) {
773 ret = xmlParse3986PathRootless(uri, &cur);
774 if (ret != 0) return(ret);
775 } else {
776 /* path-empty is effectively empty */
777 if (uri != NULL) {
778 if (uri->path != NULL) xmlFree(uri->path);
779 uri->path = NULL;
780 }
781 }
782 *str = cur;
783 return (0);
784}
785
786/**
787 * xmlParse3986RelativeRef:
788 * @uri: pointer to an URI structure
789 * @str: the string to analyze
790 *
791 * Parse an URI string and fills in the appropriate fields
792 * of the @uri structure
793 *
794 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
795 * relative-part = "//" authority path-abempty
796 * / path-absolute
797 * / path-noscheme
798 * / path-empty
799 *
800 * Returns 0 or the error code
801 */
802static int
803xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804 int ret;
805
806 if ((*str == '/') && (*(str + 1) == '/')) {
807 str += 2;
808 ret = xmlParse3986Authority(uri, &str);
809 if (ret != 0) return(ret);
810 ret = xmlParse3986PathAbEmpty(uri, &str);
811 if (ret != 0) return(ret);
812 } else if (*str == '/') {
813 ret = xmlParse3986PathAbsolute(uri, &str);
814 if (ret != 0) return(ret);
815 } else if (ISA_PCHAR(str)) {
816 ret = xmlParse3986PathNoScheme(uri, &str);
817 if (ret != 0) return(ret);
818 } else {
819 /* path-empty is effectively empty */
820 if (uri != NULL) {
821 if (uri->path != NULL) xmlFree(uri->path);
822 uri->path = NULL;
823 }
824 }
825
826 if (*str == '?') {
827 str++;
828 ret = xmlParse3986Query(uri, &str);
829 if (ret != 0) return(ret);
830 }
831 if (*str == '#') {
832 str++;
833 ret = xmlParse3986Fragment(uri, &str);
834 if (ret != 0) return(ret);
835 }
836 if (*str != 0) {
837 xmlCleanURI(uri);
838 return(1);
839 }
840 return(0);
841}
842
843
844/**
845 * xmlParse3986URI:
846 * @uri: pointer to an URI structure
847 * @str: the string to analyze
848 *
849 * Parse an URI string and fills in the appropriate fields
850 * of the @uri structure
851 *
852 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853 *
854 * Returns 0 or the error code
855 */
856static int
857xmlParse3986URI(xmlURIPtr uri, const char *str) {
858 int ret;
859
860 ret = xmlParse3986Scheme(uri, &str);
861 if (ret != 0) return(ret);
862 if (*str != ':') {
863 return(1);
864 }
865 str++;
866 ret = xmlParse3986HierPart(uri, &str);
867 if (ret != 0) return(ret);
868 if (*str == '?') {
869 str++;
870 ret = xmlParse3986Query(uri, &str);
871 if (ret != 0) return(ret);
872 }
873 if (*str == '#') {
874 str++;
875 ret = xmlParse3986Fragment(uri, &str);
876 if (ret != 0) return(ret);
877 }
878 if (*str != 0) {
879 xmlCleanURI(uri);
880 return(1);
881 }
882 return(0);
883}
884
885/**
886 * xmlParse3986URIReference:
887 * @uri: pointer to an URI structure
888 * @str: the string to analyze
889 *
890 * Parse an URI reference string and fills in the appropriate fields
891 * of the @uri structure
892 *
893 * URI-reference = URI / relative-ref
894 *
895 * Returns 0 or the error code
896 */
897static int
898xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899 int ret;
900
901 if (str == NULL)
902 return(-1);
903 xmlCleanURI(uri);
904
905 /*
906 * Try first to parse absolute refs, then fallback to relative if
907 * it fails.
908 */
909 ret = xmlParse3986URI(uri, str);
910 if (ret != 0) {
911 xmlCleanURI(uri);
912 ret = xmlParse3986RelativeRef(uri, str);
913 if (ret != 0) {
914 xmlCleanURI(uri);
915 return(ret);
916 }
917 }
918 return(0);
919}
920
921/**
922 * xmlParseURI:
923 * @str: the URI string to analyze
924 *
925 * Parse an URI based on RFC 3986
926 *
927 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
928 *
929 * Returns a newly built xmlURIPtr or NULL in case of error
930 */
931xmlURIPtr
932xmlParseURI(const char *str) {
933 xmlURIPtr uri;
934 int ret;
935
936 if (str == NULL)
937 return(NULL);
938 uri = xmlCreateURI();
939 if (uri != NULL) {
940 ret = xmlParse3986URIReference(uri, str);
941 if (ret) {
942 xmlFreeURI(uri);
943 return(NULL);
944 }
945 }
946 return(uri);
947}
948
949/**
950 * xmlParseURIReference:
951 * @uri: pointer to an URI structure
952 * @str: the string to analyze
953 *
954 * Parse an URI reference string based on RFC 3986 and fills in the
955 * appropriate fields of the @uri structure
956 *
957 * URI-reference = URI / relative-ref
958 *
959 * Returns 0 or the error code
960 */
961int
962xmlParseURIReference(xmlURIPtr uri, const char *str) {
963 return(xmlParse3986URIReference(uri, str));
964}
965
966/**
967 * xmlParseURIRaw:
968 * @str: the URI string to analyze
969 * @raw: if 1 unescaping of URI pieces are disabled
970 *
971 * Parse an URI but allows to keep intact the original fragments.
972 *
973 * URI-reference = URI / relative-ref
974 *
975 * Returns a newly built xmlURIPtr or NULL in case of error
976 */
977xmlURIPtr
978xmlParseURIRaw(const char *str, int raw) {
979 xmlURIPtr uri;
980 int ret;
981
982 if (str == NULL)
983 return(NULL);
984 uri = xmlCreateURI();
985 if (uri != NULL) {
986 if (raw) {
987 uri->cleanup |= 2;
988 }
989 ret = xmlParseURIReference(uri, str);
990 if (ret) {
991 xmlFreeURI(uri);
992 return(NULL);
993 }
994 }
995 return(uri);
996}
997
998/************************************************************************
999 * *
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001000 * Generic URI structure functions *
1001 * *
1002 ************************************************************************/
1003
1004/**
1005 * xmlCreateURI:
1006 *
1007 * Simply creates an empty xmlURI
1008 *
1009 * Returns the new structure or NULL in case of error
1010 */
1011xmlURIPtr
1012xmlCreateURI(void) {
1013 xmlURIPtr ret;
1014
1015 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016 if (ret == NULL) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001017 xmlURIErrMemory("creating URI structure\n");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001018 return(NULL);
1019 }
1020 memset(ret, 0, sizeof(xmlURI));
1021 return(ret);
1022}
1023
1024/**
Selim Gurun94442ad2013-12-30 18:23:42 -08001025 * xmlSaveUriRealloc:
1026 *
1027 * Function to handle properly a reallocation when saving an URI
1028 * Also imposes some limit on the length of an URI string output
1029 */
1030static xmlChar *
1031xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032 xmlChar *temp;
1033 int tmp;
1034
1035 if (*max > MAX_URI_LENGTH) {
1036 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037 return(NULL);
1038 }
1039 tmp = *max * 2;
1040 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041 if (temp == NULL) {
1042 xmlURIErrMemory("saving URI\n");
1043 return(NULL);
1044 }
1045 *max = tmp;
1046 return(temp);
1047}
1048
1049/**
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001050 * xmlSaveUri:
1051 * @uri: pointer to an xmlURI
1052 *
1053 * Save the URI as an escaped string
1054 *
1055 * Returns a new string (to be deallocated by caller)
1056 */
1057xmlChar *
1058xmlSaveUri(xmlURIPtr uri) {
1059 xmlChar *ret = NULL;
Patrick Scott60a4c352009-07-09 09:30:54 -04001060 xmlChar *temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001061 const char *p;
1062 int len;
1063 int max;
1064
1065 if (uri == NULL) return(NULL);
1066
1067
1068 max = 80;
1069 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1070 if (ret == NULL) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001071 xmlURIErrMemory("saving URI\n");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001072 return(NULL);
1073 }
1074 len = 0;
1075
1076 if (uri->scheme != NULL) {
1077 p = uri->scheme;
1078 while (*p != 0) {
1079 if (len >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001080 temp = xmlSaveUriRealloc(ret, &max);
1081 if (temp == NULL) goto mem_error;
Patrick Scott60a4c352009-07-09 09:30:54 -04001082 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001083 }
1084 ret[len++] = *p++;
1085 }
1086 if (len >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001087 temp = xmlSaveUriRealloc(ret, &max);
1088 if (temp == NULL) goto mem_error;
1089 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001090 }
1091 ret[len++] = ':';
1092 }
1093 if (uri->opaque != NULL) {
1094 p = uri->opaque;
1095 while (*p != 0) {
1096 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001097 temp = xmlSaveUriRealloc(ret, &max);
1098 if (temp == NULL) goto mem_error;
1099 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001100 }
1101 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1102 ret[len++] = *p++;
1103 else {
1104 int val = *(unsigned char *)p++;
1105 int hi = val / 0x10, lo = val % 0x10;
1106 ret[len++] = '%';
1107 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109 }
1110 }
1111 } else {
Xin Lie742c3a2017-03-02 10:59:49 -08001112 if ((uri->server != NULL) || (uri->port == -1)) {
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001113 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001114 temp = xmlSaveUriRealloc(ret, &max);
1115 if (temp == NULL) goto mem_error;
1116 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001117 }
1118 ret[len++] = '/';
1119 ret[len++] = '/';
1120 if (uri->user != NULL) {
1121 p = uri->user;
1122 while (*p != 0) {
1123 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001124 temp = xmlSaveUriRealloc(ret, &max);
1125 if (temp == NULL) goto mem_error;
1126 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001127 }
1128 if ((IS_UNRESERVED(*(p))) ||
1129 ((*(p) == ';')) || ((*(p) == ':')) ||
1130 ((*(p) == '&')) || ((*(p) == '=')) ||
1131 ((*(p) == '+')) || ((*(p) == '$')) ||
1132 ((*(p) == ',')))
1133 ret[len++] = *p++;
1134 else {
1135 int val = *(unsigned char *)p++;
1136 int hi = val / 0x10, lo = val % 0x10;
1137 ret[len++] = '%';
1138 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140 }
1141 }
1142 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001143 temp = xmlSaveUriRealloc(ret, &max);
1144 if (temp == NULL) goto mem_error;
1145 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001146 }
1147 ret[len++] = '@';
1148 }
Xin Lie742c3a2017-03-02 10:59:49 -08001149 if (uri->server != NULL) {
1150 p = uri->server;
1151 while (*p != 0) {
1152 if (len >= max) {
1153 temp = xmlSaveUriRealloc(ret, &max);
1154 if (temp == NULL) goto mem_error;
1155 ret = temp;
1156 }
1157 ret[len++] = *p++;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001158 }
Xin Lie742c3a2017-03-02 10:59:49 -08001159 if (uri->port > 0) {
1160 if (len + 10 >= max) {
1161 temp = xmlSaveUriRealloc(ret, &max);
1162 if (temp == NULL) goto mem_error;
1163 ret = temp;
1164 }
1165 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001166 }
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001167 }
1168 } else if (uri->authority != NULL) {
1169 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001170 temp = xmlSaveUriRealloc(ret, &max);
1171 if (temp == NULL) goto mem_error;
1172 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001173 }
1174 ret[len++] = '/';
1175 ret[len++] = '/';
1176 p = uri->authority;
1177 while (*p != 0) {
1178 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001179 temp = xmlSaveUriRealloc(ret, &max);
1180 if (temp == NULL) goto mem_error;
1181 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001182 }
1183 if ((IS_UNRESERVED(*(p))) ||
1184 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186 ((*(p) == '=')) || ((*(p) == '+')))
1187 ret[len++] = *p++;
1188 else {
1189 int val = *(unsigned char *)p++;
1190 int hi = val / 0x10, lo = val % 0x10;
1191 ret[len++] = '%';
1192 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194 }
1195 }
1196 } else if (uri->scheme != NULL) {
1197 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001198 temp = xmlSaveUriRealloc(ret, &max);
1199 if (temp == NULL) goto mem_error;
1200 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001201 }
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001202 }
1203 if (uri->path != NULL) {
1204 p = uri->path;
Patrick Scott60a4c352009-07-09 09:30:54 -04001205 /*
1206 * the colon in file:///d: should not be escaped or
1207 * Windows accesses fail later.
1208 */
1209 if ((uri->scheme != NULL) &&
1210 (p[0] == '/') &&
1211 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213 (p[2] == ':') &&
1214 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001215 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001216 temp = xmlSaveUriRealloc(ret, &max);
1217 if (temp == NULL) goto mem_error;
1218 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001219 }
Patrick Scott60a4c352009-07-09 09:30:54 -04001220 ret[len++] = *p++;
1221 ret[len++] = *p++;
1222 ret[len++] = *p++;
1223 }
1224 while (*p != 0) {
1225 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001226 temp = xmlSaveUriRealloc(ret, &max);
1227 if (temp == NULL) goto mem_error;
1228 ret = temp;
Patrick Scott60a4c352009-07-09 09:30:54 -04001229 }
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001230 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233 ((*(p) == ',')))
1234 ret[len++] = *p++;
1235 else {
1236 int val = *(unsigned char *)p++;
1237 int hi = val / 0x10, lo = val % 0x10;
1238 ret[len++] = '%';
1239 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241 }
1242 }
1243 }
1244 if (uri->query_raw != NULL) {
1245 if (len + 1 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001246 temp = xmlSaveUriRealloc(ret, &max);
1247 if (temp == NULL) goto mem_error;
1248 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001249 }
1250 ret[len++] = '?';
1251 p = uri->query_raw;
1252 while (*p != 0) {
1253 if (len + 1 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001254 temp = xmlSaveUriRealloc(ret, &max);
1255 if (temp == NULL) goto mem_error;
1256 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001257 }
1258 ret[len++] = *p++;
1259 }
1260 } else if (uri->query != NULL) {
1261 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001262 temp = xmlSaveUriRealloc(ret, &max);
1263 if (temp == NULL) goto mem_error;
1264 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001265 }
1266 ret[len++] = '?';
1267 p = uri->query;
1268 while (*p != 0) {
1269 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001270 temp = xmlSaveUriRealloc(ret, &max);
1271 if (temp == NULL) goto mem_error;
1272 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001273 }
Selim Gurun94442ad2013-12-30 18:23:42 -08001274 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001275 ret[len++] = *p++;
1276 else {
1277 int val = *(unsigned char *)p++;
1278 int hi = val / 0x10, lo = val % 0x10;
1279 ret[len++] = '%';
1280 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282 }
1283 }
1284 }
1285 }
1286 if (uri->fragment != NULL) {
1287 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001288 temp = xmlSaveUriRealloc(ret, &max);
1289 if (temp == NULL) goto mem_error;
1290 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001291 }
1292 ret[len++] = '#';
1293 p = uri->fragment;
1294 while (*p != 0) {
1295 if (len + 3 >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001296 temp = xmlSaveUriRealloc(ret, &max);
1297 if (temp == NULL) goto mem_error;
1298 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001299 }
Selim Gurun94442ad2013-12-30 18:23:42 -08001300 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001301 ret[len++] = *p++;
1302 else {
1303 int val = *(unsigned char *)p++;
1304 int hi = val / 0x10, lo = val % 0x10;
1305 ret[len++] = '%';
1306 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308 }
1309 }
1310 }
1311 if (len >= max) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001312 temp = xmlSaveUriRealloc(ret, &max);
1313 if (temp == NULL) goto mem_error;
1314 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001315 }
Selim Gurundf143a52012-03-05 14:35:53 -08001316 ret[len] = 0;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001317 return(ret);
Selim Gurun94442ad2013-12-30 18:23:42 -08001318
1319mem_error:
1320 xmlFree(ret);
1321 return(NULL);
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001322}
1323
1324/**
1325 * xmlPrintURI:
1326 * @stream: a FILE* for the output
1327 * @uri: pointer to an xmlURI
1328 *
1329 * Prints the URI in the stream @stream.
1330 */
1331void
1332xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333 xmlChar *out;
1334
1335 out = xmlSaveUri(uri);
1336 if (out != NULL) {
1337 fprintf(stream, "%s", (char *) out);
1338 xmlFree(out);
1339 }
1340}
1341
1342/**
1343 * xmlCleanURI:
1344 * @uri: pointer to an xmlURI
1345 *
1346 * Make sure the xmlURI struct is free of content
1347 */
1348static void
1349xmlCleanURI(xmlURIPtr uri) {
1350 if (uri == NULL) return;
1351
1352 if (uri->scheme != NULL) xmlFree(uri->scheme);
1353 uri->scheme = NULL;
1354 if (uri->server != NULL) xmlFree(uri->server);
1355 uri->server = NULL;
1356 if (uri->user != NULL) xmlFree(uri->user);
1357 uri->user = NULL;
1358 if (uri->path != NULL) xmlFree(uri->path);
1359 uri->path = NULL;
1360 if (uri->fragment != NULL) xmlFree(uri->fragment);
1361 uri->fragment = NULL;
1362 if (uri->opaque != NULL) xmlFree(uri->opaque);
1363 uri->opaque = NULL;
1364 if (uri->authority != NULL) xmlFree(uri->authority);
1365 uri->authority = NULL;
1366 if (uri->query != NULL) xmlFree(uri->query);
1367 uri->query = NULL;
1368 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369 uri->query_raw = NULL;
1370}
1371
1372/**
1373 * xmlFreeURI:
1374 * @uri: pointer to an xmlURI
1375 *
1376 * Free up the xmlURI struct
1377 */
1378void
1379xmlFreeURI(xmlURIPtr uri) {
1380 if (uri == NULL) return;
1381
1382 if (uri->scheme != NULL) xmlFree(uri->scheme);
1383 if (uri->server != NULL) xmlFree(uri->server);
1384 if (uri->user != NULL) xmlFree(uri->user);
1385 if (uri->path != NULL) xmlFree(uri->path);
1386 if (uri->fragment != NULL) xmlFree(uri->fragment);
1387 if (uri->opaque != NULL) xmlFree(uri->opaque);
1388 if (uri->authority != NULL) xmlFree(uri->authority);
1389 if (uri->query != NULL) xmlFree(uri->query);
1390 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391 xmlFree(uri);
1392}
1393
1394/************************************************************************
1395 * *
1396 * Helper functions *
1397 * *
1398 ************************************************************************/
1399
1400/**
1401 * xmlNormalizeURIPath:
1402 * @path: pointer to the path string
1403 *
1404 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405 * Section 5.2, steps 6.c through 6.g.
1406 *
1407 * Normalization occurs directly on the string, no new allocation is done
1408 *
1409 * Returns 0 or an error code
1410 */
1411int
1412xmlNormalizeURIPath(char *path) {
1413 char *cur, *out;
1414
1415 if (path == NULL)
1416 return(-1);
1417
1418 /* Skip all initial "/" chars. We want to get to the beginning of the
1419 * first non-empty segment.
1420 */
1421 cur = path;
1422 while (cur[0] == '/')
1423 ++cur;
1424 if (cur[0] == '\0')
1425 return(0);
1426
1427 /* Keep everything we've seen so far. */
1428 out = cur;
1429
1430 /*
1431 * Analyze each segment in sequence for cases (c) and (d).
1432 */
1433 while (cur[0] != '\0') {
1434 /*
1435 * c) All occurrences of "./", where "." is a complete path segment,
1436 * are removed from the buffer string.
1437 */
1438 if ((cur[0] == '.') && (cur[1] == '/')) {
1439 cur += 2;
1440 /* '//' normalization should be done at this point too */
1441 while (cur[0] == '/')
1442 cur++;
1443 continue;
1444 }
1445
1446 /*
1447 * d) If the buffer string ends with "." as a complete path segment,
1448 * that "." is removed.
1449 */
1450 if ((cur[0] == '.') && (cur[1] == '\0'))
1451 break;
1452
1453 /* Otherwise keep the segment. */
1454 while (cur[0] != '/') {
1455 if (cur[0] == '\0')
1456 goto done_cd;
1457 (out++)[0] = (cur++)[0];
1458 }
1459 /* nomalize // */
1460 while ((cur[0] == '/') && (cur[1] == '/'))
1461 cur++;
1462
1463 (out++)[0] = (cur++)[0];
1464 }
1465 done_cd:
1466 out[0] = '\0';
1467
1468 /* Reset to the beginning of the first segment for the next sequence. */
1469 cur = path;
1470 while (cur[0] == '/')
1471 ++cur;
1472 if (cur[0] == '\0')
1473 return(0);
1474
1475 /*
1476 * Analyze each segment in sequence for cases (e) and (f).
1477 *
1478 * e) All occurrences of "<segment>/../", where <segment> is a
1479 * complete path segment not equal to "..", are removed from the
1480 * buffer string. Removal of these path segments is performed
1481 * iteratively, removing the leftmost matching pattern on each
1482 * iteration, until no matching pattern remains.
1483 *
1484 * f) If the buffer string ends with "<segment>/..", where <segment>
1485 * is a complete path segment not equal to "..", that
1486 * "<segment>/.." is removed.
1487 *
1488 * To satisfy the "iterative" clause in (e), we need to collapse the
1489 * string every time we find something that needs to be removed. Thus,
1490 * we don't need to keep two pointers into the string: we only need a
1491 * "current position" pointer.
1492 */
1493 while (1) {
1494 char *segp, *tmp;
1495
1496 /* At the beginning of each iteration of this loop, "cur" points to
1497 * the first character of the segment we want to examine.
1498 */
1499
1500 /* Find the end of the current segment. */
1501 segp = cur;
1502 while ((segp[0] != '/') && (segp[0] != '\0'))
1503 ++segp;
1504
1505 /* If this is the last segment, we're done (we need at least two
1506 * segments to meet the criteria for the (e) and (f) cases).
1507 */
1508 if (segp[0] == '\0')
1509 break;
1510
1511 /* If the first segment is "..", or if the next segment _isn't_ "..",
1512 * keep this segment and try the next one.
1513 */
1514 ++segp;
1515 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516 || ((segp[0] != '.') || (segp[1] != '.')
1517 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518 cur = segp;
1519 continue;
1520 }
1521
1522 /* If we get here, remove this segment and the next one and back up
1523 * to the previous segment (if there is one), to implement the
1524 * "iteratively" clause. It's pretty much impossible to back up
1525 * while maintaining two pointers into the buffer, so just compact
1526 * the whole buffer now.
1527 */
1528
1529 /* If this is the end of the buffer, we're done. */
1530 if (segp[2] == '\0') {
1531 cur[0] = '\0';
1532 break;
1533 }
1534 /* Valgrind complained, strcpy(cur, segp + 3); */
Xin Lie742c3a2017-03-02 10:59:49 -08001535 /* string will overlap, do not use strcpy */
1536 tmp = cur;
1537 segp += 3;
1538 while ((*tmp++ = *segp++) != 0);
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001539
1540 /* If there are no previous segments, then keep going from here. */
1541 segp = cur;
1542 while ((segp > path) && ((--segp)[0] == '/'))
1543 ;
1544 if (segp == path)
1545 continue;
1546
1547 /* "segp" is pointing to the end of a previous segment; find it's
1548 * start. We need to back up to the previous segment and start
1549 * over with that to handle things like "foo/bar/../..". If we
1550 * don't do this, then on the first pass we'll remove the "bar/..",
1551 * but be pointing at the second ".." so we won't realize we can also
1552 * remove the "foo/..".
1553 */
1554 cur = segp;
1555 while ((cur > path) && (cur[-1] != '/'))
1556 --cur;
1557 }
1558 out[0] = '\0';
1559
1560 /*
1561 * g) If the resulting buffer string still begins with one or more
1562 * complete path segments of "..", then the reference is
1563 * considered to be in error. Implementations may handle this
1564 * error by retaining these components in the resolved path (i.e.,
1565 * treating them as part of the final URI), by removing them from
1566 * the resolved path (i.e., discarding relative levels above the
1567 * root), or by avoiding traversal of the reference.
1568 *
1569 * We discard them from the final path.
1570 */
1571 if (path[0] == '/') {
1572 cur = path;
1573 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1574 && ((cur[3] == '/') || (cur[3] == '\0')))
1575 cur += 3;
1576
1577 if (cur != path) {
1578 out = path;
1579 while (cur[0] != '\0')
1580 (out++)[0] = (cur++)[0];
1581 out[0] = 0;
1582 }
1583 }
1584
1585 return(0);
1586}
1587
1588static int is_hex(char c) {
1589 if (((c >= '0') && (c <= '9')) ||
1590 ((c >= 'a') && (c <= 'f')) ||
1591 ((c >= 'A') && (c <= 'F')))
1592 return(1);
1593 return(0);
1594}
1595
1596/**
1597 * xmlURIUnescapeString:
1598 * @str: the string to unescape
1599 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1600 * @target: optional destination buffer
1601 *
1602 * Unescaping routine, but does not check that the string is an URI. The
1603 * output is a direct unsigned char translation of %XX values (no encoding)
1604 * Note that the length of the result can only be smaller or same size as
1605 * the input string.
1606 *
1607 * Returns a copy of the string, but unescaped, will return NULL only in case
1608 * of error
1609 */
1610char *
1611xmlURIUnescapeString(const char *str, int len, char *target) {
1612 char *ret, *out;
1613 const char *in;
1614
1615 if (str == NULL)
1616 return(NULL);
1617 if (len <= 0) len = strlen(str);
1618 if (len < 0) return(NULL);
1619
1620 if (target == NULL) {
1621 ret = (char *) xmlMallocAtomic(len + 1);
1622 if (ret == NULL) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001623 xmlURIErrMemory("unescaping URI value\n");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001624 return(NULL);
1625 }
1626 } else
1627 ret = target;
1628 in = str;
1629 out = ret;
1630 while(len > 0) {
1631 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1632 in++;
Selim Gurun94442ad2013-12-30 18:23:42 -08001633 if ((*in >= '0') && (*in <= '9'))
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001634 *out = (*in - '0');
1635 else if ((*in >= 'a') && (*in <= 'f'))
1636 *out = (*in - 'a') + 10;
1637 else if ((*in >= 'A') && (*in <= 'F'))
1638 *out = (*in - 'A') + 10;
1639 in++;
Selim Gurun94442ad2013-12-30 18:23:42 -08001640 if ((*in >= '0') && (*in <= '9'))
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001641 *out = *out * 16 + (*in - '0');
1642 else if ((*in >= 'a') && (*in <= 'f'))
1643 *out = *out * 16 + (*in - 'a') + 10;
1644 else if ((*in >= 'A') && (*in <= 'F'))
1645 *out = *out * 16 + (*in - 'A') + 10;
1646 in++;
1647 len -= 3;
1648 out++;
1649 } else {
1650 *out++ = *in++;
1651 len--;
1652 }
1653 }
1654 *out = 0;
1655 return(ret);
1656}
1657
1658/**
1659 * xmlURIEscapeStr:
1660 * @str: string to escape
1661 * @list: exception list string of chars not to escape
1662 *
1663 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1664 * and the characters in the exception list.
1665 *
1666 * Returns a new escaped string or NULL in case of error.
1667 */
1668xmlChar *
1669xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1670 xmlChar *ret, ch;
Patrick Scott60a4c352009-07-09 09:30:54 -04001671 xmlChar *temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001672 const xmlChar *in;
Selim Gurun94442ad2013-12-30 18:23:42 -08001673 int len, out;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001674
1675 if (str == NULL)
1676 return(NULL);
1677 if (str[0] == 0)
1678 return(xmlStrdup(str));
1679 len = xmlStrlen(str);
1680 if (!(len > 0)) return(NULL);
1681
1682 len += 20;
1683 ret = (xmlChar *) xmlMallocAtomic(len);
1684 if (ret == NULL) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001685 xmlURIErrMemory("escaping URI value\n");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001686 return(NULL);
1687 }
1688 in = (const xmlChar *) str;
1689 out = 0;
1690 while(*in != 0) {
1691 if (len - out <= 3) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001692 temp = xmlSaveUriRealloc(ret, &len);
Patrick Scott60a4c352009-07-09 09:30:54 -04001693 if (temp == NULL) {
Selim Gurun94442ad2013-12-30 18:23:42 -08001694 xmlURIErrMemory("escaping URI value\n");
Patrick Scott60a4c352009-07-09 09:30:54 -04001695 xmlFree(ret);
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001696 return(NULL);
1697 }
Patrick Scott60a4c352009-07-09 09:30:54 -04001698 ret = temp;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001699 }
1700
1701 ch = *in;
1702
1703 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1704 unsigned char val;
1705 ret[out++] = '%';
1706 val = ch >> 4;
1707 if (val <= 9)
1708 ret[out++] = '0' + val;
1709 else
1710 ret[out++] = 'A' + val - 0xA;
1711 val = ch & 0xF;
1712 if (val <= 9)
1713 ret[out++] = '0' + val;
1714 else
1715 ret[out++] = 'A' + val - 0xA;
1716 in++;
1717 } else {
1718 ret[out++] = *in++;
1719 }
1720
1721 }
1722 ret[out] = 0;
1723 return(ret);
1724}
1725
1726/**
1727 * xmlURIEscape:
1728 * @str: the string of the URI to escape
1729 *
1730 * Escaping routine, does not do validity checks !
1731 * It will try to escape the chars needing this, but this is heuristic
1732 * based it's impossible to be sure.
1733 *
1734 * Returns an copy of the string, but escaped
1735 *
1736 * 25 May 2001
1737 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1738 * according to RFC2396.
1739 * - Carl Douglas
1740 */
1741xmlChar *
1742xmlURIEscape(const xmlChar * str)
1743{
1744 xmlChar *ret, *segment = NULL;
1745 xmlURIPtr uri;
1746 int ret2;
1747
1748#define NULLCHK(p) if(!p) { \
Selim Gurun94442ad2013-12-30 18:23:42 -08001749 xmlURIErrMemory("escaping URI value\n"); \
1750 xmlFreeURI(uri); \
1751 return NULL; } \
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001752
1753 if (str == NULL)
1754 return (NULL);
1755
1756 uri = xmlCreateURI();
1757 if (uri != NULL) {
1758 /*
1759 * Allow escaping errors in the unescaped form
1760 */
1761 uri->cleanup = 1;
1762 ret2 = xmlParseURIReference(uri, (const char *)str);
1763 if (ret2) {
1764 xmlFreeURI(uri);
1765 return (NULL);
1766 }
1767 }
1768
1769 if (!uri)
1770 return NULL;
1771
1772 ret = NULL;
1773
1774 if (uri->scheme) {
1775 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1776 NULLCHK(segment)
1777 ret = xmlStrcat(ret, segment);
1778 ret = xmlStrcat(ret, BAD_CAST ":");
1779 xmlFree(segment);
1780 }
1781
1782 if (uri->authority) {
1783 segment =
1784 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1785 NULLCHK(segment)
1786 ret = xmlStrcat(ret, BAD_CAST "//");
1787 ret = xmlStrcat(ret, segment);
1788 xmlFree(segment);
1789 }
1790
1791 if (uri->user) {
1792 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1793 NULLCHK(segment)
Selim Gurun94442ad2013-12-30 18:23:42 -08001794 ret = xmlStrcat(ret,BAD_CAST "//");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001795 ret = xmlStrcat(ret, segment);
1796 ret = xmlStrcat(ret, BAD_CAST "@");
1797 xmlFree(segment);
1798 }
1799
1800 if (uri->server) {
1801 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1802 NULLCHK(segment)
1803 if (uri->user == NULL)
Patrick Scott60a4c352009-07-09 09:30:54 -04001804 ret = xmlStrcat(ret, BAD_CAST "//");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001805 ret = xmlStrcat(ret, segment);
1806 xmlFree(segment);
1807 }
1808
1809 if (uri->port) {
1810 xmlChar port[10];
1811
1812 snprintf((char *) port, 10, "%d", uri->port);
1813 ret = xmlStrcat(ret, BAD_CAST ":");
1814 ret = xmlStrcat(ret, port);
1815 }
1816
1817 if (uri->path) {
1818 segment =
1819 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1820 NULLCHK(segment)
1821 ret = xmlStrcat(ret, segment);
1822 xmlFree(segment);
1823 }
1824
1825 if (uri->query_raw) {
1826 ret = xmlStrcat(ret, BAD_CAST "?");
1827 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1828 }
1829 else if (uri->query) {
1830 segment =
1831 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1832 NULLCHK(segment)
1833 ret = xmlStrcat(ret, BAD_CAST "?");
1834 ret = xmlStrcat(ret, segment);
1835 xmlFree(segment);
1836 }
1837
1838 if (uri->opaque) {
1839 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1840 NULLCHK(segment)
1841 ret = xmlStrcat(ret, segment);
1842 xmlFree(segment);
1843 }
1844
1845 if (uri->fragment) {
1846 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1847 NULLCHK(segment)
1848 ret = xmlStrcat(ret, BAD_CAST "#");
1849 ret = xmlStrcat(ret, segment);
1850 xmlFree(segment);
1851 }
1852
1853 xmlFreeURI(uri);
1854#undef NULLCHK
1855
1856 return (ret);
1857}
1858
1859/************************************************************************
1860 * *
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001861 * Public functions *
1862 * *
1863 ************************************************************************/
1864
1865/**
1866 * xmlBuildURI:
1867 * @URI: the URI instance found in the document
1868 * @base: the base value
1869 *
1870 * Computes he final URI of the reference done by checking that
1871 * the given URI is valid, and building the final URI using the
Selim Gurun94442ad2013-12-30 18:23:42 -08001872 * base URI. This is processed according to section 5.2 of the
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001873 * RFC 2396
1874 *
1875 * 5.2. Resolving Relative References to Absolute Form
1876 *
1877 * Returns a new URI string (to be freed by the caller) or NULL in case
1878 * of error.
1879 */
1880xmlChar *
1881xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1882 xmlChar *val = NULL;
1883 int ret, len, indx, cur, out;
1884 xmlURIPtr ref = NULL;
1885 xmlURIPtr bas = NULL;
1886 xmlURIPtr res = NULL;
1887
1888 /*
1889 * 1) The URI reference is parsed into the potential four components and
1890 * fragment identifier, as described in Section 4.3.
1891 *
1892 * NOTE that a completely empty URI is treated by modern browsers
1893 * as a reference to "." rather than as a synonym for the current
1894 * URI. Should we do that here?
1895 */
Selim Gurun94442ad2013-12-30 18:23:42 -08001896 if (URI == NULL)
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001897 ret = -1;
1898 else {
1899 if (*URI) {
1900 ref = xmlCreateURI();
1901 if (ref == NULL)
1902 goto done;
1903 ret = xmlParseURIReference(ref, (const char *) URI);
1904 }
1905 else
1906 ret = 0;
1907 }
1908 if (ret != 0)
1909 goto done;
1910 if ((ref != NULL) && (ref->scheme != NULL)) {
1911 /*
1912 * The URI is absolute don't modify.
1913 */
1914 val = xmlStrdup(URI);
1915 goto done;
1916 }
1917 if (base == NULL)
1918 ret = -1;
1919 else {
1920 bas = xmlCreateURI();
1921 if (bas == NULL)
1922 goto done;
1923 ret = xmlParseURIReference(bas, (const char *) base);
1924 }
1925 if (ret != 0) {
1926 if (ref)
1927 val = xmlSaveUri(ref);
1928 goto done;
1929 }
1930 if (ref == NULL) {
1931 /*
1932 * the base fragment must be ignored
1933 */
1934 if (bas->fragment != NULL) {
1935 xmlFree(bas->fragment);
1936 bas->fragment = NULL;
1937 }
1938 val = xmlSaveUri(bas);
1939 goto done;
1940 }
1941
1942 /*
1943 * 2) If the path component is empty and the scheme, authority, and
1944 * query components are undefined, then it is a reference to the
1945 * current document and we are done. Otherwise, the reference URI's
1946 * query and fragment components are defined as found (or not found)
1947 * within the URI reference and not inherited from the base URI.
1948 *
1949 * NOTE that in modern browsers, the parsing differs from the above
1950 * in the following aspect: the query component is allowed to be
1951 * defined while still treating this as a reference to the current
1952 * document.
1953 */
1954 res = xmlCreateURI();
1955 if (res == NULL)
1956 goto done;
1957 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1958 ((ref->authority == NULL) && (ref->server == NULL))) {
1959 if (bas->scheme != NULL)
1960 res->scheme = xmlMemStrdup(bas->scheme);
1961 if (bas->authority != NULL)
1962 res->authority = xmlMemStrdup(bas->authority);
1963 else if (bas->server != NULL) {
1964 res->server = xmlMemStrdup(bas->server);
1965 if (bas->user != NULL)
1966 res->user = xmlMemStrdup(bas->user);
Selim Gurun94442ad2013-12-30 18:23:42 -08001967 res->port = bas->port;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001968 }
1969 if (bas->path != NULL)
1970 res->path = xmlMemStrdup(bas->path);
1971 if (ref->query_raw != NULL)
1972 res->query_raw = xmlMemStrdup (ref->query_raw);
1973 else if (ref->query != NULL)
1974 res->query = xmlMemStrdup(ref->query);
1975 else if (bas->query_raw != NULL)
1976 res->query_raw = xmlMemStrdup(bas->query_raw);
1977 else if (bas->query != NULL)
1978 res->query = xmlMemStrdup(bas->query);
1979 if (ref->fragment != NULL)
1980 res->fragment = xmlMemStrdup(ref->fragment);
1981 goto step_7;
1982 }
1983
1984 /*
1985 * 3) If the scheme component is defined, indicating that the reference
1986 * starts with a scheme name, then the reference is interpreted as an
1987 * absolute URI and we are done. Otherwise, the reference URI's
1988 * scheme is inherited from the base URI's scheme component.
1989 */
1990 if (ref->scheme != NULL) {
1991 val = xmlSaveUri(ref);
1992 goto done;
1993 }
1994 if (bas->scheme != NULL)
1995 res->scheme = xmlMemStrdup(bas->scheme);
Selim Gurun94442ad2013-12-30 18:23:42 -08001996
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08001997 if (ref->query_raw != NULL)
1998 res->query_raw = xmlMemStrdup(ref->query_raw);
1999 else if (ref->query != NULL)
2000 res->query = xmlMemStrdup(ref->query);
2001 if (ref->fragment != NULL)
2002 res->fragment = xmlMemStrdup(ref->fragment);
2003
2004 /*
2005 * 4) If the authority component is defined, then the reference is a
2006 * network-path and we skip to step 7. Otherwise, the reference
2007 * URI's authority is inherited from the base URI's authority
2008 * component, which will also be undefined if the URI scheme does not
2009 * use an authority component.
2010 */
2011 if ((ref->authority != NULL) || (ref->server != NULL)) {
2012 if (ref->authority != NULL)
2013 res->authority = xmlMemStrdup(ref->authority);
2014 else {
2015 res->server = xmlMemStrdup(ref->server);
2016 if (ref->user != NULL)
2017 res->user = xmlMemStrdup(ref->user);
Selim Gurun94442ad2013-12-30 18:23:42 -08002018 res->port = ref->port;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002019 }
2020 if (ref->path != NULL)
2021 res->path = xmlMemStrdup(ref->path);
2022 goto step_7;
2023 }
2024 if (bas->authority != NULL)
2025 res->authority = xmlMemStrdup(bas->authority);
2026 else if (bas->server != NULL) {
2027 res->server = xmlMemStrdup(bas->server);
2028 if (bas->user != NULL)
2029 res->user = xmlMemStrdup(bas->user);
Selim Gurun94442ad2013-12-30 18:23:42 -08002030 res->port = bas->port;
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002031 }
2032
2033 /*
2034 * 5) If the path component begins with a slash character ("/"), then
2035 * the reference is an absolute-path and we skip to step 7.
2036 */
2037 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2038 res->path = xmlMemStrdup(ref->path);
2039 goto step_7;
2040 }
2041
2042
2043 /*
2044 * 6) If this step is reached, then we are resolving a relative-path
2045 * reference. The relative path needs to be merged with the base
2046 * URI's path. Although there are many ways to do this, we will
2047 * describe a simple method using a separate string buffer.
2048 *
2049 * Allocate a buffer large enough for the result string.
2050 */
2051 len = 2; /* extra / and 0 */
2052 if (ref->path != NULL)
2053 len += strlen(ref->path);
2054 if (bas->path != NULL)
2055 len += strlen(bas->path);
2056 res->path = (char *) xmlMallocAtomic(len);
2057 if (res->path == NULL) {
Selim Gurun94442ad2013-12-30 18:23:42 -08002058 xmlURIErrMemory("resolving URI against base\n");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002059 goto done;
2060 }
2061 res->path[0] = 0;
2062
2063 /*
2064 * a) All but the last segment of the base URI's path component is
2065 * copied to the buffer. In other words, any characters after the
2066 * last (right-most) slash character, if any, are excluded.
2067 */
2068 cur = 0;
2069 out = 0;
2070 if (bas->path != NULL) {
2071 while (bas->path[cur] != 0) {
2072 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2073 cur++;
2074 if (bas->path[cur] == 0)
2075 break;
2076
2077 cur++;
2078 while (out < cur) {
2079 res->path[out] = bas->path[out];
2080 out++;
2081 }
2082 }
2083 }
2084 res->path[out] = 0;
2085
2086 /*
2087 * b) The reference's path component is appended to the buffer
2088 * string.
2089 */
2090 if (ref->path != NULL && ref->path[0] != 0) {
2091 indx = 0;
2092 /*
2093 * Ensure the path includes a '/'
2094 */
2095 if ((out == 0) && (bas->server != NULL))
2096 res->path[out++] = '/';
2097 while (ref->path[indx] != 0) {
2098 res->path[out++] = ref->path[indx++];
2099 }
2100 }
2101 res->path[out] = 0;
2102
2103 /*
2104 * Steps c) to h) are really path normalization steps
2105 */
2106 xmlNormalizeURIPath(res->path);
2107
2108step_7:
2109
2110 /*
2111 * 7) The resulting URI components, including any inherited from the
2112 * base URI, are recombined to give the absolute form of the URI
2113 * reference.
2114 */
2115 val = xmlSaveUri(res);
2116
2117done:
2118 if (ref != NULL)
2119 xmlFreeURI(ref);
2120 if (bas != NULL)
2121 xmlFreeURI(bas);
2122 if (res != NULL)
2123 xmlFreeURI(res);
2124 return(val);
2125}
2126
2127/**
2128 * xmlBuildRelativeURI:
2129 * @URI: the URI reference under consideration
2130 * @base: the base value
2131 *
2132 * Expresses the URI of the reference in terms relative to the
2133 * base. Some examples of this operation include:
2134 * base = "http://site1.com/docs/book1.html"
2135 * URI input URI returned
2136 * docs/pic1.gif pic1.gif
2137 * docs/img/pic1.gif img/pic1.gif
2138 * img/pic1.gif ../img/pic1.gif
2139 * http://site1.com/docs/pic1.gif pic1.gif
2140 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2141 *
2142 * base = "docs/book1.html"
2143 * URI input URI returned
2144 * docs/pic1.gif pic1.gif
2145 * docs/img/pic1.gif img/pic1.gif
2146 * img/pic1.gif ../img/pic1.gif
2147 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2148 *
2149 *
2150 * Note: if the URI reference is really wierd or complicated, it may be
2151 * worthwhile to first convert it into a "nice" one by calling
2152 * xmlBuildURI (using 'base') before calling this routine,
2153 * since this routine (for reasonable efficiency) assumes URI has
2154 * already been through some validation.
2155 *
2156 * Returns a new URI string (to be freed by the caller) or NULL in case
2157 * error.
2158 */
2159xmlChar *
2160xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2161{
2162 xmlChar *val = NULL;
2163 int ret;
2164 int ix;
2165 int pos = 0;
2166 int nbslash = 0;
2167 int len;
2168 xmlURIPtr ref = NULL;
2169 xmlURIPtr bas = NULL;
2170 xmlChar *bptr, *uptr, *vptr;
2171 int remove_path = 0;
2172
2173 if ((URI == NULL) || (*URI == 0))
2174 return NULL;
2175
2176 /*
2177 * First parse URI into a standard form
2178 */
2179 ref = xmlCreateURI ();
2180 if (ref == NULL)
2181 return NULL;
2182 /* If URI not already in "relative" form */
2183 if (URI[0] != '.') {
2184 ret = xmlParseURIReference (ref, (const char *) URI);
2185 if (ret != 0)
2186 goto done; /* Error in URI, return NULL */
2187 } else
2188 ref->path = (char *)xmlStrdup(URI);
2189
2190 /*
2191 * Next parse base into the same standard form
2192 */
2193 if ((base == NULL) || (*base == 0)) {
2194 val = xmlStrdup (URI);
2195 goto done;
2196 }
2197 bas = xmlCreateURI ();
2198 if (bas == NULL)
2199 goto done;
2200 if (base[0] != '.') {
2201 ret = xmlParseURIReference (bas, (const char *) base);
2202 if (ret != 0)
2203 goto done; /* Error in base, return NULL */
2204 } else
2205 bas->path = (char *)xmlStrdup(base);
2206
2207 /*
2208 * If the scheme / server on the URI differs from the base,
2209 * just return the URI
2210 */
2211 if ((ref->scheme != NULL) &&
2212 ((bas->scheme == NULL) ||
2213 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2214 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2215 val = xmlStrdup (URI);
2216 goto done;
2217 }
2218 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2219 val = xmlStrdup(BAD_CAST "");
2220 goto done;
2221 }
2222 if (bas->path == NULL) {
2223 val = xmlStrdup((xmlChar *)ref->path);
2224 goto done;
2225 }
2226 if (ref->path == NULL) {
2227 ref->path = (char *) "/";
2228 remove_path = 1;
2229 }
2230
2231 /*
2232 * At this point (at last!) we can compare the two paths
2233 *
2234 * First we take care of the special case where either of the
2235 * two path components may be missing (bug 316224)
2236 */
2237 if (bas->path == NULL) {
2238 if (ref->path != NULL) {
2239 uptr = (xmlChar *) ref->path;
2240 if (*uptr == '/')
2241 uptr++;
2242 /* exception characters from xmlSaveUri */
2243 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2244 }
2245 goto done;
2246 }
2247 bptr = (xmlChar *)bas->path;
2248 if (ref->path == NULL) {
2249 for (ix = 0; bptr[ix] != 0; ix++) {
2250 if (bptr[ix] == '/')
2251 nbslash++;
2252 }
2253 uptr = NULL;
2254 len = 1; /* this is for a string terminator only */
2255 } else {
2256 /*
2257 * Next we compare the two strings and find where they first differ
2258 */
2259 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2260 pos += 2;
2261 if ((*bptr == '.') && (bptr[1] == '/'))
2262 bptr += 2;
2263 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2264 bptr++;
2265 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2266 pos++;
2267
2268 if (bptr[pos] == ref->path[pos]) {
2269 val = xmlStrdup(BAD_CAST "");
2270 goto done; /* (I can't imagine why anyone would do this) */
2271 }
2272
2273 /*
2274 * In URI, "back up" to the last '/' encountered. This will be the
2275 * beginning of the "unique" suffix of URI
2276 */
2277 ix = pos;
2278 if ((ref->path[ix] == '/') && (ix > 0))
2279 ix--;
2280 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2281 ix -= 2;
2282 for (; ix > 0; ix--) {
2283 if (ref->path[ix] == '/')
2284 break;
2285 }
2286 if (ix == 0) {
2287 uptr = (xmlChar *)ref->path;
2288 } else {
2289 ix++;
2290 uptr = (xmlChar *)&ref->path[ix];
2291 }
2292
2293 /*
2294 * In base, count the number of '/' from the differing point
2295 */
2296 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2297 for (; bptr[ix] != 0; ix++) {
2298 if (bptr[ix] == '/')
2299 nbslash++;
2300 }
2301 }
2302 len = xmlStrlen (uptr) + 1;
2303 }
Selim Gurun94442ad2013-12-30 18:23:42 -08002304
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002305 if (nbslash == 0) {
2306 if (uptr != NULL)
2307 /* exception characters from xmlSaveUri */
2308 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2309 goto done;
2310 }
2311
2312 /*
2313 * Allocate just enough space for the returned string -
2314 * length of the remainder of the URI, plus enough space
2315 * for the "../" groups, plus one for the terminator
2316 */
2317 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2318 if (val == NULL) {
Selim Gurun94442ad2013-12-30 18:23:42 -08002319 xmlURIErrMemory("building relative URI\n");
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002320 goto done;
2321 }
2322 vptr = val;
2323 /*
2324 * Put in as many "../" as needed
2325 */
2326 for (; nbslash>0; nbslash--) {
2327 *vptr++ = '.';
2328 *vptr++ = '.';
2329 *vptr++ = '/';
2330 }
2331 /*
2332 * Finish up with the end of the URI
2333 */
2334 if (uptr != NULL) {
2335 if ((vptr > val) && (len > 0) &&
2336 (uptr[0] == '/') && (vptr[-1] == '/')) {
2337 memcpy (vptr, uptr + 1, len - 1);
2338 vptr[len - 2] = 0;
2339 } else {
2340 memcpy (vptr, uptr, len);
2341 vptr[len - 1] = 0;
2342 }
2343 } else {
2344 vptr[len - 1] = 0;
2345 }
2346
2347 /* escape the freshly-built path */
2348 vptr = val;
2349 /* exception characters from xmlSaveUri */
2350 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2351 xmlFree(vptr);
2352
2353done:
2354 /*
2355 * Free the working variables
2356 */
2357 if (remove_path != 0)
2358 ref->path = NULL;
2359 if (ref != NULL)
2360 xmlFreeURI (ref);
2361 if (bas != NULL)
2362 xmlFreeURI (bas);
2363
2364 return val;
2365}
2366
2367/**
2368 * xmlCanonicPath:
2369 * @path: the resource locator in a filesystem notation
2370 *
Selim Gurun94442ad2013-12-30 18:23:42 -08002371 * Constructs a canonic path from the specified path.
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002372 *
Selim Gurun94442ad2013-12-30 18:23:42 -08002373 * Returns a new canonic path, or a duplicate of the path parameter if the
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002374 * construction fails. The caller is responsible for freeing the memory occupied
Selim Gurun94442ad2013-12-30 18:23:42 -08002375 * by the returned string. If there is insufficient memory available, or the
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002376 * argument is NULL, the function returns NULL.
2377 */
Selim Gurun94442ad2013-12-30 18:23:42 -08002378#define IS_WINDOWS_PATH(p) \
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002379 ((p != NULL) && \
2380 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2381 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2382 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2383xmlChar *
2384xmlCanonicPath(const xmlChar *path)
2385{
2386/*
2387 * For Windows implementations, additional work needs to be done to
2388 * replace backslashes in pathnames with "forward slashes"
2389 */
Selim Gurun94442ad2013-12-30 18:23:42 -08002390#if defined(_WIN32) && !defined(__CYGWIN__)
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002391 int len = 0;
2392 int i = 0;
2393 xmlChar *p = NULL;
2394#endif
2395 xmlURIPtr uri;
2396 xmlChar *ret;
2397 const xmlChar *absuri;
2398
2399 if (path == NULL)
2400 return(NULL);
Patrick Scott60a4c352009-07-09 09:30:54 -04002401
Selim Gurun94442ad2013-12-30 18:23:42 -08002402#if defined(_WIN32)
2403 /*
2404 * We must not change the backslashes to slashes if the the path
2405 * starts with \\?\
2406 * Those paths can be up to 32k characters long.
2407 * Was added specifically for OpenOffice, those paths can't be converted
2408 * to URIs anyway.
2409 */
2410 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2411 (path[3] == '\\') )
2412 return xmlStrdup((const xmlChar *) path);
2413#endif
2414
2415 /* sanitize filename starting with // so it can be used as URI */
Patrick Scott60a4c352009-07-09 09:30:54 -04002416 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2417 path++;
2418
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002419 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2420 xmlFreeURI(uri);
2421 return xmlStrdup(path);
2422 }
2423
2424 /* Check if this is an "absolute uri" */
2425 absuri = xmlStrstr(path, BAD_CAST "://");
2426 if (absuri != NULL) {
2427 int l, j;
2428 unsigned char c;
2429 xmlChar *escURI;
2430
2431 /*
2432 * this looks like an URI where some parts have not been
2433 * escaped leading to a parsing problem. Check that the first
2434 * part matches a protocol.
2435 */
2436 l = absuri - path;
2437 /* Bypass if first part (part before the '://') is > 20 chars */
2438 if ((l <= 0) || (l > 20))
2439 goto path_processing;
2440 /* Bypass if any non-alpha characters are present in first part */
2441 for (j = 0;j < l;j++) {
2442 c = path[j];
2443 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2444 goto path_processing;
2445 }
2446
2447 /* Escape all except the characters specified in the supplied path */
2448 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2449 if (escURI != NULL) {
2450 /* Try parsing the escaped path */
2451 uri = xmlParseURI((const char *) escURI);
2452 /* If successful, return the escaped string */
2453 if (uri != NULL) {
2454 xmlFreeURI(uri);
2455 return escURI;
2456 }
2457 }
2458 }
2459
2460path_processing:
2461/* For Windows implementations, replace backslashes with 'forward slashes' */
Selim Gurun94442ad2013-12-30 18:23:42 -08002462#if defined(_WIN32) && !defined(__CYGWIN__)
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002463 /*
2464 * Create a URI structure
2465 */
2466 uri = xmlCreateURI();
2467 if (uri == NULL) { /* Guard against 'out of memory' */
2468 return(NULL);
2469 }
2470
2471 len = xmlStrlen(path);
2472 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2473 /* make the scheme 'file' */
2474 uri->scheme = xmlStrdup(BAD_CAST "file");
2475 /* allocate space for leading '/' + path + string terminator */
2476 uri->path = xmlMallocAtomic(len + 2);
2477 if (uri->path == NULL) {
2478 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2479 return(NULL);
2480 }
2481 /* Put in leading '/' plus path */
2482 uri->path[0] = '/';
2483 p = uri->path + 1;
2484 strncpy(p, path, len + 1);
2485 } else {
2486 uri->path = xmlStrdup(path);
2487 if (uri->path == NULL) {
2488 xmlFreeURI(uri);
2489 return(NULL);
2490 }
2491 p = uri->path;
2492 }
2493 /* Now change all occurences of '\' to '/' */
2494 while (*p != '\0') {
2495 if (*p == '\\')
2496 *p = '/';
2497 p++;
2498 }
2499
2500 if (uri->scheme == NULL) {
2501 ret = xmlStrdup((const xmlChar *) uri->path);
2502 } else {
2503 ret = xmlSaveUri(uri);
2504 }
2505
2506 xmlFreeURI(uri);
2507#else
2508 ret = xmlStrdup((const xmlChar *) path);
2509#endif
2510 return(ret);
2511}
2512
2513/**
2514 * xmlPathToURI:
2515 * @path: the resource locator in a filesystem notation
2516 *
2517 * Constructs an URI expressing the existing path
2518 *
Selim Gurun94442ad2013-12-30 18:23:42 -08002519 * Returns a new URI, or a duplicate of the path parameter if the
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002520 * construction fails. The caller is responsible for freeing the memory
2521 * occupied by the returned string. If there is insufficient memory available,
2522 * or the argument is NULL, the function returns NULL.
2523 */
2524xmlChar *
2525xmlPathToURI(const xmlChar *path)
2526{
2527 xmlURIPtr uri;
2528 xmlURI temp;
2529 xmlChar *ret, *cal;
2530
2531 if (path == NULL)
2532 return(NULL);
2533
2534 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2535 xmlFreeURI(uri);
2536 return xmlStrdup(path);
2537 }
2538 cal = xmlCanonicPath(path);
2539 if (cal == NULL)
2540 return(NULL);
2541#if defined(_WIN32) && !defined(__CYGWIN__)
Selim Gurun94442ad2013-12-30 18:23:42 -08002542 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
The Android Open Source Projectab4e2e92009-03-03 19:30:06 -08002543 If 'cal' is a valid URI allready then we are done here, as continuing would make
2544 it invalid. */
2545 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2546 xmlFreeURI(uri);
2547 return cal;
2548 }
2549 /* 'cal' can contain a relative path with backslashes. If that is processed
2550 by xmlSaveURI, they will be escaped and the external entity loader machinery
2551 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2552 ret = cal;
2553 while (*ret != '\0') {
2554 if (*ret == '\\')
2555 *ret = '/';
2556 ret++;
2557 }
2558#endif
2559 memset(&temp, 0, sizeof(temp));
2560 temp.path = (char *) cal;
2561 ret = xmlSaveUri(&temp);
2562 xmlFree(cal);
2563 return(ret);
2564}
2565#define bottom_uri
2566#include "elfgcchack.h"