blob: 1b2c08a02c6de479d96147725ad03303bd930d46 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
49/*
50 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
51 */
52
53#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
54
55/*
56 * alphanum = alpha | digit
57 */
58
59#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
60
61/*
62 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
63 * "a" | "b" | "c" | "d" | "e" | "f"
64 */
65
66#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
67 (((x) >= 'A') && ((x) <= 'F')))
68
69/*
70 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
71 */
72
73#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
74 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
75 ((x) == '(') || ((x) == ')'))
76
77
78/*
79 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
80 */
81
82#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84 ((x) == '+') || ((x) == '$') || ((x) == ','))
85
86/*
87 * unreserved = alphanum | mark
88 */
89
90#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
91
92/*
93 * escaped = "%" hex hex
94 */
95
96#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
97 (IS_HEX((p)[2])))
98
99/*
100 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
101 * "&" | "=" | "+" | "$" | ","
102 */
103#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
104 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
105 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
106 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
107
108/*
109 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
110 */
111#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
112 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
113 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
114 ((*(p) == ',')))
115
116/*
117 * rel_segment = 1*( unreserved | escaped |
118 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
119 */
120
121#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
122 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
123 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
124 ((*(p) == ',')))
125
126/*
127 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
128 */
129
130#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
131 ((x) == '+') || ((x) == '-') || ((x) == '.'))
132
133/*
134 * reg_name = 1*( unreserved | escaped | "$" | "," |
135 * ";" | ":" | "@" | "&" | "=" | "+" )
136 */
137
138#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
139 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
140 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
141 ((*(p) == '=')) || ((*(p) == '+')))
142
143/*
144 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
145 * "+" | "$" | "," )
146 */
147#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
148 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
149 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
150 ((*(p) == ',')))
151
152/*
153 * uric = reserved | unreserved | escaped
154 */
155
156#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
157 (IS_RESERVED(*(p))))
158
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000159/*
160* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
161*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163#define IS_UNWISE(p) \
164 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
165 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
166 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000167
168/*
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * Skip to next pointer char, handle escaped sequences
170 */
171
172#define NEXT(p) ((*p == '%')? p += 3 : p++)
173
174/*
175 * Productions from the spec.
176 *
177 * authority = server | reg_name
178 * reg_name = 1*( unreserved | escaped | "$" | "," |
179 * ";" | ":" | "@" | "&" | "=" | "+" )
180 *
181 * path = [ abs_path | opaque_part ]
182 */
183
184/************************************************************************
185 * *
186 * Generic URI structure functions *
187 * *
188 ************************************************************************/
189
190/**
191 * xmlCreateURI:
192 *
193 * Simply creates an empty xmlURI
194 *
195 * Returns the new structure or NULL in case of error
196 */
197xmlURIPtr
198xmlCreateURI(void) {
199 xmlURIPtr ret;
200
201 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
202 if (ret == NULL) {
203 xmlGenericError(xmlGenericErrorContext,
204 "xmlCreateURI: out of memory\n");
205 return(NULL);
206 }
207 memset(ret, 0, sizeof(xmlURI));
208 return(ret);
209}
210
211/**
212 * xmlSaveUri:
213 * @uri: pointer to an xmlURI
214 *
215 * Save the URI as an escaped string
216 *
217 * Returns a new string (to be deallocated by caller)
218 */
219xmlChar *
220xmlSaveUri(xmlURIPtr uri) {
221 xmlChar *ret = NULL;
222 const char *p;
223 int len;
224 int max;
225
226 if (uri == NULL) return(NULL);
227
228
229 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000230 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000231 if (ret == NULL) {
232 xmlGenericError(xmlGenericErrorContext,
233 "xmlSaveUri: out of memory\n");
234 return(NULL);
235 }
236 len = 0;
237
238 if (uri->scheme != NULL) {
239 p = uri->scheme;
240 while (*p != 0) {
241 if (len >= max) {
242 max *= 2;
243 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
244 if (ret == NULL) {
245 xmlGenericError(xmlGenericErrorContext,
246 "xmlSaveUri: out of memory\n");
247 return(NULL);
248 }
249 }
250 ret[len++] = *p++;
251 }
252 if (len >= max) {
253 max *= 2;
254 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
255 if (ret == NULL) {
256 xmlGenericError(xmlGenericErrorContext,
257 "xmlSaveUri: out of memory\n");
258 return(NULL);
259 }
260 }
261 ret[len++] = ':';
262 }
263 if (uri->opaque != NULL) {
264 p = uri->opaque;
265 while (*p != 0) {
266 if (len + 3 >= max) {
267 max *= 2;
268 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
269 if (ret == NULL) {
270 xmlGenericError(xmlGenericErrorContext,
271 "xmlSaveUri: out of memory\n");
272 return(NULL);
273 }
274 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000275 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000276 ret[len++] = *p++;
277 else {
278 int val = *(unsigned char *)p++;
279 int hi = val / 0x10, lo = val % 0x10;
280 ret[len++] = '%';
281 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
282 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
283 }
284 }
Owen Taylor3473f882001-02-23 17:55:21 +0000285 } else {
286 if (uri->server != NULL) {
287 if (len + 3 >= max) {
288 max *= 2;
289 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
290 if (ret == NULL) {
291 xmlGenericError(xmlGenericErrorContext,
292 "xmlSaveUri: out of memory\n");
293 return(NULL);
294 }
295 }
296 ret[len++] = '/';
297 ret[len++] = '/';
298 if (uri->user != NULL) {
299 p = uri->user;
300 while (*p != 0) {
301 if (len + 3 >= max) {
302 max *= 2;
303 ret = (xmlChar *) xmlRealloc(ret,
304 (max + 1) * sizeof(xmlChar));
305 if (ret == NULL) {
306 xmlGenericError(xmlGenericErrorContext,
307 "xmlSaveUri: out of memory\n");
308 return(NULL);
309 }
310 }
311 if ((IS_UNRESERVED(*(p))) ||
312 ((*(p) == ';')) || ((*(p) == ':')) ||
313 ((*(p) == '&')) || ((*(p) == '=')) ||
314 ((*(p) == '+')) || ((*(p) == '$')) ||
315 ((*(p) == ',')))
316 ret[len++] = *p++;
317 else {
318 int val = *(unsigned char *)p++;
319 int hi = val / 0x10, lo = val % 0x10;
320 ret[len++] = '%';
321 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
322 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
323 }
324 }
325 if (len + 3 >= max) {
326 max *= 2;
327 ret = (xmlChar *) xmlRealloc(ret,
328 (max + 1) * sizeof(xmlChar));
329 if (ret == NULL) {
330 xmlGenericError(xmlGenericErrorContext,
331 "xmlSaveUri: out of memory\n");
332 return(NULL);
333 }
334 }
335 ret[len++] = '@';
336 }
337 p = uri->server;
338 while (*p != 0) {
339 if (len >= max) {
340 max *= 2;
341 ret = (xmlChar *) xmlRealloc(ret,
342 (max + 1) * sizeof(xmlChar));
343 if (ret == NULL) {
344 xmlGenericError(xmlGenericErrorContext,
345 "xmlSaveUri: out of memory\n");
346 return(NULL);
347 }
348 }
349 ret[len++] = *p++;
350 }
351 if (uri->port > 0) {
352 if (len + 10 >= max) {
353 max *= 2;
354 ret = (xmlChar *) xmlRealloc(ret,
355 (max + 1) * sizeof(xmlChar));
356 if (ret == NULL) {
357 xmlGenericError(xmlGenericErrorContext,
358 "xmlSaveUri: out of memory\n");
359 return(NULL);
360 }
361 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000362 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000363 }
364 } else if (uri->authority != NULL) {
365 if (len + 3 >= max) {
366 max *= 2;
367 ret = (xmlChar *) xmlRealloc(ret,
368 (max + 1) * sizeof(xmlChar));
369 if (ret == NULL) {
370 xmlGenericError(xmlGenericErrorContext,
371 "xmlSaveUri: out of memory\n");
372 return(NULL);
373 }
374 }
375 ret[len++] = '/';
376 ret[len++] = '/';
377 p = uri->authority;
378 while (*p != 0) {
379 if (len + 3 >= max) {
380 max *= 2;
381 ret = (xmlChar *) xmlRealloc(ret,
382 (max + 1) * sizeof(xmlChar));
383 if (ret == NULL) {
384 xmlGenericError(xmlGenericErrorContext,
385 "xmlSaveUri: out of memory\n");
386 return(NULL);
387 }
388 }
389 if ((IS_UNRESERVED(*(p))) ||
390 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
391 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
392 ((*(p) == '=')) || ((*(p) == '+')))
393 ret[len++] = *p++;
394 else {
395 int val = *(unsigned char *)p++;
396 int hi = val / 0x10, lo = val % 0x10;
397 ret[len++] = '%';
398 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
399 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
400 }
401 }
402 } else if (uri->scheme != NULL) {
403 if (len + 3 >= max) {
404 max *= 2;
405 ret = (xmlChar *) xmlRealloc(ret,
406 (max + 1) * sizeof(xmlChar));
407 if (ret == NULL) {
408 xmlGenericError(xmlGenericErrorContext,
409 "xmlSaveUri: out of memory\n");
410 return(NULL);
411 }
412 }
413 ret[len++] = '/';
414 ret[len++] = '/';
415 }
416 if (uri->path != NULL) {
417 p = uri->path;
418 while (*p != 0) {
419 if (len + 3 >= max) {
420 max *= 2;
421 ret = (xmlChar *) xmlRealloc(ret,
422 (max + 1) * sizeof(xmlChar));
423 if (ret == NULL) {
424 xmlGenericError(xmlGenericErrorContext,
425 "xmlSaveUri: out of memory\n");
426 return(NULL);
427 }
428 }
429 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
430 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
431 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
432 ((*(p) == ',')))
433 ret[len++] = *p++;
434 else {
435 int val = *(unsigned char *)p++;
436 int hi = val / 0x10, lo = val % 0x10;
437 ret[len++] = '%';
438 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
439 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
440 }
441 }
442 }
443 if (uri->query != NULL) {
444 if (len + 3 >= max) {
445 max *= 2;
446 ret = (xmlChar *) xmlRealloc(ret,
447 (max + 1) * sizeof(xmlChar));
448 if (ret == NULL) {
449 xmlGenericError(xmlGenericErrorContext,
450 "xmlSaveUri: out of memory\n");
451 return(NULL);
452 }
453 }
454 ret[len++] = '?';
455 p = uri->query;
456 while (*p != 0) {
457 if (len + 3 >= max) {
458 max *= 2;
459 ret = (xmlChar *) xmlRealloc(ret,
460 (max + 1) * sizeof(xmlChar));
461 if (ret == NULL) {
462 xmlGenericError(xmlGenericErrorContext,
463 "xmlSaveUri: out of memory\n");
464 return(NULL);
465 }
466 }
467 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
468 ret[len++] = *p++;
469 else {
470 int val = *(unsigned char *)p++;
471 int hi = val / 0x10, lo = val % 0x10;
472 ret[len++] = '%';
473 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
474 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
475 }
476 }
477 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000478 }
479 if (uri->fragment != NULL) {
480 if (len + 3 >= max) {
481 max *= 2;
482 ret = (xmlChar *) xmlRealloc(ret,
483 (max + 1) * sizeof(xmlChar));
484 if (ret == NULL) {
485 xmlGenericError(xmlGenericErrorContext,
486 "xmlSaveUri: out of memory\n");
487 return(NULL);
488 }
489 }
490 ret[len++] = '#';
491 p = uri->fragment;
492 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000493 if (len + 3 >= max) {
494 max *= 2;
495 ret = (xmlChar *) xmlRealloc(ret,
496 (max + 1) * sizeof(xmlChar));
497 if (ret == NULL) {
498 xmlGenericError(xmlGenericErrorContext,
499 "xmlSaveUri: out of memory\n");
500 return(NULL);
501 }
502 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000503 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
504 ret[len++] = *p++;
505 else {
506 int val = *(unsigned char *)p++;
507 int hi = val / 0x10, lo = val % 0x10;
508 ret[len++] = '%';
509 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
510 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000511 }
512 }
Owen Taylor3473f882001-02-23 17:55:21 +0000513 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000514 if (len >= max) {
515 max *= 2;
516 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
517 if (ret == NULL) {
518 xmlGenericError(xmlGenericErrorContext,
519 "xmlSaveUri: out of memory\n");
520 return(NULL);
521 }
522 }
523 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000524 return(ret);
525}
526
527/**
528 * xmlPrintURI:
529 * @stream: a FILE* for the output
530 * @uri: pointer to an xmlURI
531 *
532 * Prints the URI in the stream @steam.
533 */
534void
535xmlPrintURI(FILE *stream, xmlURIPtr uri) {
536 xmlChar *out;
537
538 out = xmlSaveUri(uri);
539 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000540 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000541 xmlFree(out);
542 }
543}
544
545/**
546 * xmlCleanURI:
547 * @uri: pointer to an xmlURI
548 *
549 * Make sure the xmlURI struct is free of content
550 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000551static void
Owen Taylor3473f882001-02-23 17:55:21 +0000552xmlCleanURI(xmlURIPtr uri) {
553 if (uri == NULL) return;
554
555 if (uri->scheme != NULL) xmlFree(uri->scheme);
556 uri->scheme = NULL;
557 if (uri->server != NULL) xmlFree(uri->server);
558 uri->server = NULL;
559 if (uri->user != NULL) xmlFree(uri->user);
560 uri->user = NULL;
561 if (uri->path != NULL) xmlFree(uri->path);
562 uri->path = NULL;
563 if (uri->fragment != NULL) xmlFree(uri->fragment);
564 uri->fragment = NULL;
565 if (uri->opaque != NULL) xmlFree(uri->opaque);
566 uri->opaque = NULL;
567 if (uri->authority != NULL) xmlFree(uri->authority);
568 uri->authority = NULL;
569 if (uri->query != NULL) xmlFree(uri->query);
570 uri->query = NULL;
571}
572
573/**
574 * xmlFreeURI:
575 * @uri: pointer to an xmlURI
576 *
577 * Free up the xmlURI struct
578 */
579void
580xmlFreeURI(xmlURIPtr uri) {
581 if (uri == NULL) return;
582
583 if (uri->scheme != NULL) xmlFree(uri->scheme);
584 if (uri->server != NULL) xmlFree(uri->server);
585 if (uri->user != NULL) xmlFree(uri->user);
586 if (uri->path != NULL) xmlFree(uri->path);
587 if (uri->fragment != NULL) xmlFree(uri->fragment);
588 if (uri->opaque != NULL) xmlFree(uri->opaque);
589 if (uri->authority != NULL) xmlFree(uri->authority);
590 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000591 xmlFree(uri);
592}
593
594/************************************************************************
595 * *
596 * Helper functions *
597 * *
598 ************************************************************************/
599
Owen Taylor3473f882001-02-23 17:55:21 +0000600/**
601 * xmlNormalizeURIPath:
602 * @path: pointer to the path string
603 *
604 * Applies the 5 normalization steps to a path string--that is, RFC 2396
605 * Section 5.2, steps 6.c through 6.g.
606 *
607 * Normalization occurs directly on the string, no new allocation is done
608 *
609 * Returns 0 or an error code
610 */
611int
612xmlNormalizeURIPath(char *path) {
613 char *cur, *out;
614
615 if (path == NULL)
616 return(-1);
617
618 /* Skip all initial "/" chars. We want to get to the beginning of the
619 * first non-empty segment.
620 */
621 cur = path;
622 while (cur[0] == '/')
623 ++cur;
624 if (cur[0] == '\0')
625 return(0);
626
627 /* Keep everything we've seen so far. */
628 out = cur;
629
630 /*
631 * Analyze each segment in sequence for cases (c) and (d).
632 */
633 while (cur[0] != '\0') {
634 /*
635 * c) All occurrences of "./", where "." is a complete path segment,
636 * are removed from the buffer string.
637 */
638 if ((cur[0] == '.') && (cur[1] == '/')) {
639 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000640 /* '//' normalization should be done at this point too */
641 while (cur[0] == '/')
642 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000643 continue;
644 }
645
646 /*
647 * d) If the buffer string ends with "." as a complete path segment,
648 * that "." is removed.
649 */
650 if ((cur[0] == '.') && (cur[1] == '\0'))
651 break;
652
653 /* Otherwise keep the segment. */
654 while (cur[0] != '/') {
655 if (cur[0] == '\0')
656 goto done_cd;
657 (out++)[0] = (cur++)[0];
658 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000659 /* nomalize // */
660 while ((cur[0] == '/') && (cur[1] == '/'))
661 cur++;
662
Owen Taylor3473f882001-02-23 17:55:21 +0000663 (out++)[0] = (cur++)[0];
664 }
665 done_cd:
666 out[0] = '\0';
667
668 /* Reset to the beginning of the first segment for the next sequence. */
669 cur = path;
670 while (cur[0] == '/')
671 ++cur;
672 if (cur[0] == '\0')
673 return(0);
674
675 /*
676 * Analyze each segment in sequence for cases (e) and (f).
677 *
678 * e) All occurrences of "<segment>/../", where <segment> is a
679 * complete path segment not equal to "..", are removed from the
680 * buffer string. Removal of these path segments is performed
681 * iteratively, removing the leftmost matching pattern on each
682 * iteration, until no matching pattern remains.
683 *
684 * f) If the buffer string ends with "<segment>/..", where <segment>
685 * is a complete path segment not equal to "..", that
686 * "<segment>/.." is removed.
687 *
688 * To satisfy the "iterative" clause in (e), we need to collapse the
689 * string every time we find something that needs to be removed. Thus,
690 * we don't need to keep two pointers into the string: we only need a
691 * "current position" pointer.
692 */
693 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000694 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000695
696 /* At the beginning of each iteration of this loop, "cur" points to
697 * the first character of the segment we want to examine.
698 */
699
700 /* Find the end of the current segment. */
701 segp = cur;
702 while ((segp[0] != '/') && (segp[0] != '\0'))
703 ++segp;
704
705 /* If this is the last segment, we're done (we need at least two
706 * segments to meet the criteria for the (e) and (f) cases).
707 */
708 if (segp[0] == '\0')
709 break;
710
711 /* If the first segment is "..", or if the next segment _isn't_ "..",
712 * keep this segment and try the next one.
713 */
714 ++segp;
715 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
716 || ((segp[0] != '.') || (segp[1] != '.')
717 || ((segp[2] != '/') && (segp[2] != '\0')))) {
718 cur = segp;
719 continue;
720 }
721
722 /* If we get here, remove this segment and the next one and back up
723 * to the previous segment (if there is one), to implement the
724 * "iteratively" clause. It's pretty much impossible to back up
725 * while maintaining two pointers into the buffer, so just compact
726 * the whole buffer now.
727 */
728
729 /* If this is the end of the buffer, we're done. */
730 if (segp[2] == '\0') {
731 cur[0] = '\0';
732 break;
733 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000734 /* Valgrind complained, strcpy(cur, segp + 3); */
735 /* string will overlap, do not use strcpy */
736 tmp = cur;
737 segp += 3;
738 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000739
740 /* If there are no previous segments, then keep going from here. */
741 segp = cur;
742 while ((segp > path) && ((--segp)[0] == '/'))
743 ;
744 if (segp == path)
745 continue;
746
747 /* "segp" is pointing to the end of a previous segment; find it's
748 * start. We need to back up to the previous segment and start
749 * over with that to handle things like "foo/bar/../..". If we
750 * don't do this, then on the first pass we'll remove the "bar/..",
751 * but be pointing at the second ".." so we won't realize we can also
752 * remove the "foo/..".
753 */
754 cur = segp;
755 while ((cur > path) && (cur[-1] != '/'))
756 --cur;
757 }
758 out[0] = '\0';
759
760 /*
761 * g) If the resulting buffer string still begins with one or more
762 * complete path segments of "..", then the reference is
763 * considered to be in error. Implementations may handle this
764 * error by retaining these components in the resolved path (i.e.,
765 * treating them as part of the final URI), by removing them from
766 * the resolved path (i.e., discarding relative levels above the
767 * root), or by avoiding traversal of the reference.
768 *
769 * We discard them from the final path.
770 */
771 if (path[0] == '/') {
772 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000773 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000774 && ((cur[3] == '/') || (cur[3] == '\0')))
775 cur += 3;
776
777 if (cur != path) {
778 out = path;
779 while (cur[0] != '\0')
780 (out++)[0] = (cur++)[0];
781 out[0] = 0;
782 }
783 }
784
785 return(0);
786}
Owen Taylor3473f882001-02-23 17:55:21 +0000787
Daniel Veillard966a31e2004-05-09 02:58:44 +0000788static int is_hex(char c) {
789 if (((c >= '0') && (c <= '9')) ||
790 ((c >= 'a') && (c <= 'f')) ||
791 ((c >= 'A') && (c <= 'F')))
792 return(1);
793 return(0);
794}
795
Owen Taylor3473f882001-02-23 17:55:21 +0000796/**
797 * xmlURIUnescapeString:
798 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000799 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000800 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000801 *
802 * Unescaping routine, does not do validity checks !
803 * Output is direct unsigned char translation of %XX values (no encoding)
804 *
805 * Returns an copy of the string, but unescaped
806 */
807char *
808xmlURIUnescapeString(const char *str, int len, char *target) {
809 char *ret, *out;
810 const char *in;
811
812 if (str == NULL)
813 return(NULL);
814 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000815 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000816
817 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000818 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000819 if (ret == NULL) {
820 xmlGenericError(xmlGenericErrorContext,
821 "xmlURIUnescapeString: out of memory\n");
822 return(NULL);
823 }
824 } else
825 ret = target;
826 in = str;
827 out = ret;
828 while(len > 0) {
Daniel Veillard966a31e2004-05-09 02:58:44 +0000829 if ((*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000830 in++;
831 if ((*in >= '0') && (*in <= '9'))
832 *out = (*in - '0');
833 else if ((*in >= 'a') && (*in <= 'f'))
834 *out = (*in - 'a') + 10;
835 else if ((*in >= 'A') && (*in <= 'F'))
836 *out = (*in - 'A') + 10;
837 in++;
838 if ((*in >= '0') && (*in <= '9'))
839 *out = *out * 16 + (*in - '0');
840 else if ((*in >= 'a') && (*in <= 'f'))
841 *out = *out * 16 + (*in - 'a') + 10;
842 else if ((*in >= 'A') && (*in <= 'F'))
843 *out = *out * 16 + (*in - 'A') + 10;
844 in++;
845 len -= 3;
846 out++;
847 } else {
848 *out++ = *in++;
849 len--;
850 }
851 }
852 *out = 0;
853 return(ret);
854}
855
856/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000857 * xmlURIEscapeStr:
858 * @str: string to escape
859 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000860 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000861 * This routine escapes a string to hex, ignoring reserved characters (a-z)
862 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000863 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000864 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000865 */
866xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000867xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
868 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000869 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000870
Owen Taylor3473f882001-02-23 17:55:21 +0000871 unsigned int len, out;
872
873 if (str == NULL)
874 return(NULL);
875 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000876 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000877
878 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000879 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000880 if (ret == NULL) {
881 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000882 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000883 return(NULL);
884 }
885 in = (const xmlChar *) str;
886 out = 0;
887 while(*in != 0) {
888 if (len - out <= 3) {
889 len += 20;
890 ret = (xmlChar *) xmlRealloc(ret, len);
891 if (ret == NULL) {
892 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000893 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000894 return(NULL);
895 }
896 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000897
898 ch = *in;
899
Daniel Veillardeb475a32002-04-14 22:00:22 +0000900 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000901 unsigned char val;
902 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000903 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000904 if (val <= 9)
905 ret[out++] = '0' + val;
906 else
907 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000908 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000909 if (val <= 9)
910 ret[out++] = '0' + val;
911 else
912 ret[out++] = 'A' + val - 0xA;
913 in++;
914 } else {
915 ret[out++] = *in++;
916 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000917
Owen Taylor3473f882001-02-23 17:55:21 +0000918 }
919 ret[out] = 0;
920 return(ret);
921}
922
Daniel Veillard8514c672001-05-23 10:29:12 +0000923/**
924 * xmlURIEscape:
925 * @str: the string of the URI to escape
926 *
927 * Escaping routine, does not do validity checks !
928 * It will try to escape the chars needing this, but this is heuristic
929 * based it's impossible to be sure.
930 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000931 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000932 *
933 * 25 May 2001
934 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
935 * according to RFC2396.
936 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000937 */
938xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000939xmlURIEscape(const xmlChar * str)
940{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000941 xmlChar *ret, *segment = NULL;
942 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000943 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000944
Daniel Veillard6278fb52001-05-25 07:38:41 +0000945#define NULLCHK(p) if(!p) { \
946 xmlGenericError(xmlGenericErrorContext, \
947 "xmlURIEscape: out of memory\n"); \
948 return NULL; }
949
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000950 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000951 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000952
953 uri = xmlCreateURI();
954 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000955 /*
956 * Allow escaping errors in the unescaped form
957 */
958 uri->cleanup = 1;
959 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000960 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000961 xmlFreeURI(uri);
962 return (NULL);
963 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000964 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000965
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000966 if (!uri)
967 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000968
969 ret = NULL;
970
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000971 if (uri->scheme) {
972 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
973 NULLCHK(segment)
974 ret = xmlStrcat(ret, segment);
975 ret = xmlStrcat(ret, BAD_CAST ":");
976 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000977 }
978
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000979 if (uri->authority) {
980 segment =
981 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
982 NULLCHK(segment)
983 ret = xmlStrcat(ret, BAD_CAST "//");
984 ret = xmlStrcat(ret, segment);
985 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000986 }
987
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000988 if (uri->user) {
989 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
990 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +0000991 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000992 ret = xmlStrcat(ret, segment);
993 ret = xmlStrcat(ret, BAD_CAST "@");
994 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000995 }
996
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000997 if (uri->server) {
998 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
999 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001000 if (uri->user == NULL)
1001 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001002 ret = xmlStrcat(ret, segment);
1003 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001004 }
1005
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001006 if (uri->port) {
1007 xmlChar port[10];
1008
Daniel Veillard43d3f612001-11-10 11:57:23 +00001009 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001010 ret = xmlStrcat(ret, BAD_CAST ":");
1011 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001012 }
1013
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001014 if (uri->path) {
1015 segment =
1016 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1017 NULLCHK(segment)
1018 ret = xmlStrcat(ret, segment);
1019 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001020 }
1021
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001022 if (uri->query) {
1023 segment =
1024 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1025 NULLCHK(segment)
1026 ret = xmlStrcat(ret, BAD_CAST "?");
1027 ret = xmlStrcat(ret, segment);
1028 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001029 }
1030
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001031 if (uri->opaque) {
1032 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1033 NULLCHK(segment)
1034 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001035 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001036 }
1037
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001038 if (uri->fragment) {
1039 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1040 NULLCHK(segment)
1041 ret = xmlStrcat(ret, BAD_CAST "#");
1042 ret = xmlStrcat(ret, segment);
1043 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001044 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001045
1046 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001047#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001048
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001049 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001050}
1051
Owen Taylor3473f882001-02-23 17:55:21 +00001052/************************************************************************
1053 * *
1054 * Escaped URI parsing *
1055 * *
1056 ************************************************************************/
1057
1058/**
1059 * xmlParseURIFragment:
1060 * @uri: pointer to an URI structure
1061 * @str: pointer to the string to analyze
1062 *
1063 * Parse an URI fragment string and fills in the appropriate fields
1064 * of the @uri structure.
1065 *
1066 * fragment = *uric
1067 *
1068 * Returns 0 or the error code
1069 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001070static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001071xmlParseURIFragment(xmlURIPtr uri, const char **str)
1072{
Owen Taylor3473f882001-02-23 17:55:21 +00001073 const char *cur = *str;
1074
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001075 if (str == NULL)
1076 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001077
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001078 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001079 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001080 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001081 if (uri->fragment != NULL)
1082 xmlFree(uri->fragment);
1083 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001084 }
1085 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001086 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001087}
1088
1089/**
1090 * xmlParseURIQuery:
1091 * @uri: pointer to an URI structure
1092 * @str: pointer to the string to analyze
1093 *
1094 * Parse the query part of an URI
1095 *
1096 * query = *uric
1097 *
1098 * Returns 0 or the error code
1099 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001100static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001101xmlParseURIQuery(xmlURIPtr uri, const char **str)
1102{
Owen Taylor3473f882001-02-23 17:55:21 +00001103 const char *cur = *str;
1104
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001105 if (str == NULL)
1106 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001107
Daniel Veillard9231ff92003-03-23 22:00:51 +00001108 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001109 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001110 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001111 if (uri->query != NULL)
1112 xmlFree(uri->query);
1113 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001114 }
1115 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001116 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001117}
1118
1119/**
1120 * xmlParseURIScheme:
1121 * @uri: pointer to an URI structure
1122 * @str: pointer to the string to analyze
1123 *
1124 * Parse an URI scheme
1125 *
1126 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1127 *
1128 * Returns 0 or the error code
1129 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001130static int
Owen Taylor3473f882001-02-23 17:55:21 +00001131xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1132 const char *cur;
1133
1134 if (str == NULL)
1135 return(-1);
1136
1137 cur = *str;
1138 if (!IS_ALPHA(*cur))
1139 return(2);
1140 cur++;
1141 while (IS_SCHEME(*cur)) cur++;
1142 if (uri != NULL) {
1143 if (uri->scheme != NULL) xmlFree(uri->scheme);
1144 /* !!! strndup */
1145 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1146 }
1147 *str = cur;
1148 return(0);
1149}
1150
1151/**
1152 * xmlParseURIOpaquePart:
1153 * @uri: pointer to an URI structure
1154 * @str: pointer to the string to analyze
1155 *
1156 * Parse an URI opaque part
1157 *
1158 * opaque_part = uric_no_slash *uric
1159 *
1160 * Returns 0 or the error code
1161 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001162static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001163xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1164{
Owen Taylor3473f882001-02-23 17:55:21 +00001165 const char *cur;
1166
1167 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001168 return (-1);
1169
Owen Taylor3473f882001-02-23 17:55:21 +00001170 cur = *str;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001171 if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001172 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001173 }
1174 NEXT(cur);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001175 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001176 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001177 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001178 if (uri->opaque != NULL)
1179 xmlFree(uri->opaque);
1180 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001181 }
1182 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001183 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001184}
1185
1186/**
1187 * xmlParseURIServer:
1188 * @uri: pointer to an URI structure
1189 * @str: pointer to the string to analyze
1190 *
1191 * Parse a server subpart of an URI, it's a finer grain analysis
1192 * of the authority part.
1193 *
1194 * server = [ [ userinfo "@" ] hostport ]
1195 * userinfo = *( unreserved | escaped |
1196 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1197 * hostport = host [ ":" port ]
1198 * host = hostname | IPv4address
1199 * hostname = *( domainlabel "." ) toplabel [ "." ]
1200 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1201 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1202 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1203 * port = *digit
1204 *
1205 * Returns 0 or the error code
1206 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001207static int
Owen Taylor3473f882001-02-23 17:55:21 +00001208xmlParseURIServer(xmlURIPtr uri, const char **str) {
1209 const char *cur;
1210 const char *host, *tmp;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001211 const int IPmax = 4;
1212 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001213
1214 if (str == NULL)
1215 return(-1);
1216
1217 cur = *str;
1218
1219 /*
1220 * is there an userinfo ?
1221 */
1222 while (IS_USERINFO(cur)) NEXT(cur);
1223 if (*cur == '@') {
1224 if (uri != NULL) {
1225 if (uri->user != NULL) xmlFree(uri->user);
1226 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1227 }
1228 cur++;
1229 } else {
1230 if (uri != NULL) {
1231 if (uri->user != NULL) xmlFree(uri->user);
1232 uri->user = NULL;
1233 }
1234 cur = *str;
1235 }
1236 /*
1237 * This can be empty in the case where there is no server
1238 */
1239 host = cur;
1240 if (*cur == '/') {
1241 if (uri != NULL) {
1242 if (uri->authority != NULL) xmlFree(uri->authority);
1243 uri->authority = NULL;
1244 if (uri->server != NULL) xmlFree(uri->server);
1245 uri->server = NULL;
1246 uri->port = 0;
1247 }
1248 return(0);
1249 }
1250 /*
1251 * host part of hostport can derive either an IPV4 address
1252 * or an unresolved name. Check the IP first, it easier to detect
1253 * errors if wrong one
1254 */
Daniel Veillard9231ff92003-03-23 22:00:51 +00001255 for (oct = 0; oct < IPmax; ++oct) {
1256 if (*cur == '.')
1257 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001258 while(IS_DIGIT(*cur)) cur++;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001259 if (oct == (IPmax-1))
1260 continue;
1261 if (*cur != '.')
1262 break;
1263 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001264 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001265 if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) {
1266 /* maybe host_name */
1267 if (!IS_ALPHANUM(*cur))
1268 return(4); /* e.g. http://xml.$oft */
1269 do {
1270 do ++cur; while (IS_ALPHANUM(*cur));
1271 if (*cur == '-') {
1272 --cur;
1273 if (*cur == '.')
1274 return(5); /* e.g. http://xml.-soft */
1275 ++cur;
1276 continue;
1277 }
1278 if (*cur == '.') {
1279 --cur;
1280 if (*cur == '-')
1281 return(6); /* e.g. http://xml-.soft */
1282 if (*cur == '.')
1283 return(7); /* e.g. http://xml..soft */
1284 ++cur;
1285 continue;
1286 }
1287 break;
1288 } while (1);
1289 tmp = cur;
1290 if (tmp[-1] == '.')
1291 --tmp; /* e.g. http://xml.$Oft/ */
1292 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1293 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1294 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001295 }
Owen Taylor3473f882001-02-23 17:55:21 +00001296 if (uri != NULL) {
1297 if (uri->authority != NULL) xmlFree(uri->authority);
1298 uri->authority = NULL;
1299 if (uri->server != NULL) xmlFree(uri->server);
1300 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1301 }
Owen Taylor3473f882001-02-23 17:55:21 +00001302 /*
1303 * finish by checking for a port presence.
1304 */
1305 if (*cur == ':') {
1306 cur++;
1307 if (IS_DIGIT(*cur)) {
1308 if (uri != NULL)
1309 uri->port = 0;
1310 while (IS_DIGIT(*cur)) {
1311 if (uri != NULL)
1312 uri->port = uri->port * 10 + (*cur - '0');
1313 cur++;
1314 }
1315 }
1316 }
1317 *str = cur;
1318 return(0);
1319}
1320
1321/**
1322 * xmlParseURIRelSegment:
1323 * @uri: pointer to an URI structure
1324 * @str: pointer to the string to analyze
1325 *
1326 * Parse an URI relative segment
1327 *
1328 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1329 * "+" | "$" | "," )
1330 *
1331 * Returns 0 or the error code
1332 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001333static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001334xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1335{
Owen Taylor3473f882001-02-23 17:55:21 +00001336 const char *cur;
1337
1338 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001339 return (-1);
1340
Owen Taylor3473f882001-02-23 17:55:21 +00001341 cur = *str;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001342 if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001343 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001344 }
1345 NEXT(cur);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001346 while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001347 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001348 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001349 if (uri->path != NULL)
1350 xmlFree(uri->path);
1351 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001352 }
1353 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001354 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001355}
1356
1357/**
1358 * xmlParseURIPathSegments:
1359 * @uri: pointer to an URI structure
1360 * @str: pointer to the string to analyze
1361 * @slash: should we add a leading slash
1362 *
1363 * Parse an URI set of path segments
1364 *
1365 * path_segments = segment *( "/" segment )
1366 * segment = *pchar *( ";" param )
1367 * param = *pchar
1368 *
1369 * Returns 0 or the error code
1370 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001371static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001372xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1373{
Owen Taylor3473f882001-02-23 17:55:21 +00001374 const char *cur;
1375
1376 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001377 return (-1);
1378
Owen Taylor3473f882001-02-23 17:55:21 +00001379 cur = *str;
1380
1381 do {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001382 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001383 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001384 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001385 cur++;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001386 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001387 NEXT(cur);
1388 }
1389 if (*cur != '/')
1390 break;
1391 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001392 } while (1);
1393 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001394 int len, len2 = 0;
1395 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001397 /*
1398 * Concat the set of path segments to the current path
1399 */
1400 len = cur - *str;
1401 if (slash)
1402 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001403
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001404 if (uri->path != NULL) {
1405 len2 = strlen(uri->path);
1406 len += len2;
1407 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001408 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001409 if (path == NULL) {
1410 xmlGenericError(xmlGenericErrorContext,
1411 "xmlParseURIPathSegments: out of memory\n");
1412 *str = cur;
1413 return (-1);
1414 }
1415 if (uri->path != NULL)
1416 memcpy(path, uri->path, len2);
1417 if (slash) {
1418 path[len2] = '/';
1419 len2++;
1420 }
1421 path[len2] = 0;
1422 if (cur - *str > 0)
1423 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1424 if (uri->path != NULL)
1425 xmlFree(uri->path);
1426 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001427 }
1428 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001429 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001430}
1431
1432/**
1433 * xmlParseURIAuthority:
1434 * @uri: pointer to an URI structure
1435 * @str: pointer to the string to analyze
1436 *
1437 * Parse the authority part of an URI.
1438 *
1439 * authority = server | reg_name
1440 * server = [ [ userinfo "@" ] hostport ]
1441 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1442 * "@" | "&" | "=" | "+" )
1443 *
1444 * Note : this is completely ambiguous since reg_name is allowed to
1445 * use the full set of chars in use by server:
1446 *
1447 * 3.2.1. Registry-based Naming Authority
1448 *
1449 * The structure of a registry-based naming authority is specific
1450 * to the URI scheme, but constrained to the allowed characters
1451 * for an authority component.
1452 *
1453 * Returns 0 or the error code
1454 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001455static int
Owen Taylor3473f882001-02-23 17:55:21 +00001456xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1457 const char *cur;
1458 int ret;
1459
1460 if (str == NULL)
1461 return(-1);
1462
1463 cur = *str;
1464
1465 /*
1466 * try first to parse it as a server string.
1467 */
1468 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001469 if ((ret == 0) && (*str != NULL) &&
1470 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001471 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001472 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001473
1474 /*
1475 * failed, fallback to reg_name
1476 */
1477 if (!IS_REG_NAME(cur)) {
1478 return(5);
1479 }
1480 NEXT(cur);
1481 while (IS_REG_NAME(cur)) NEXT(cur);
1482 if (uri != NULL) {
1483 if (uri->server != NULL) xmlFree(uri->server);
1484 uri->server = NULL;
1485 if (uri->user != NULL) xmlFree(uri->user);
1486 uri->user = NULL;
1487 if (uri->authority != NULL) xmlFree(uri->authority);
1488 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1489 }
1490 *str = cur;
1491 return(0);
1492}
1493
1494/**
1495 * xmlParseURIHierPart:
1496 * @uri: pointer to an URI structure
1497 * @str: pointer to the string to analyze
1498 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001499 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001500 *
1501 * hier_part = ( net_path | abs_path ) [ "?" query ]
1502 * abs_path = "/" path_segments
1503 * net_path = "//" authority [ abs_path ]
1504 *
1505 * Returns 0 or the error code
1506 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507static int
Owen Taylor3473f882001-02-23 17:55:21 +00001508xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1509 int ret;
1510 const char *cur;
1511
1512 if (str == NULL)
1513 return(-1);
1514
1515 cur = *str;
1516
1517 if ((cur[0] == '/') && (cur[1] == '/')) {
1518 cur += 2;
1519 ret = xmlParseURIAuthority(uri, &cur);
1520 if (ret != 0)
1521 return(ret);
1522 if (cur[0] == '/') {
1523 cur++;
1524 ret = xmlParseURIPathSegments(uri, &cur, 1);
1525 }
1526 } else if (cur[0] == '/') {
1527 cur++;
1528 ret = xmlParseURIPathSegments(uri, &cur, 1);
1529 } else {
1530 return(4);
1531 }
1532 if (ret != 0)
1533 return(ret);
1534 if (*cur == '?') {
1535 cur++;
1536 ret = xmlParseURIQuery(uri, &cur);
1537 if (ret != 0)
1538 return(ret);
1539 }
1540 *str = cur;
1541 return(0);
1542}
1543
1544/**
1545 * xmlParseAbsoluteURI:
1546 * @uri: pointer to an URI structure
1547 * @str: pointer to the string to analyze
1548 *
1549 * Parse an URI reference string and fills in the appropriate fields
1550 * of the @uri structure
1551 *
1552 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1553 *
1554 * Returns 0 or the error code
1555 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001556static int
Owen Taylor3473f882001-02-23 17:55:21 +00001557xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1558 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001559 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561 if (str == NULL)
1562 return(-1);
1563
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001564 cur = *str;
1565
Owen Taylor3473f882001-02-23 17:55:21 +00001566 ret = xmlParseURIScheme(uri, str);
1567 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001568 if (**str != ':') {
1569 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001570 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001571 }
Owen Taylor3473f882001-02-23 17:55:21 +00001572 (*str)++;
1573 if (**str == '/')
1574 return(xmlParseURIHierPart(uri, str));
1575 return(xmlParseURIOpaquePart(uri, str));
1576}
1577
1578/**
1579 * xmlParseRelativeURI:
1580 * @uri: pointer to an URI structure
1581 * @str: pointer to the string to analyze
1582 *
1583 * Parse an relative URI string and fills in the appropriate fields
1584 * of the @uri structure
1585 *
1586 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1587 * abs_path = "/" path_segments
1588 * net_path = "//" authority [ abs_path ]
1589 * rel_path = rel_segment [ abs_path ]
1590 *
1591 * Returns 0 or the error code
1592 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001593static int
Owen Taylor3473f882001-02-23 17:55:21 +00001594xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1595 int ret = 0;
1596 const char *cur;
1597
1598 if (str == NULL)
1599 return(-1);
1600
1601 cur = *str;
1602 if ((cur[0] == '/') && (cur[1] == '/')) {
1603 cur += 2;
1604 ret = xmlParseURIAuthority(uri, &cur);
1605 if (ret != 0)
1606 return(ret);
1607 if (cur[0] == '/') {
1608 cur++;
1609 ret = xmlParseURIPathSegments(uri, &cur, 1);
1610 }
1611 } else if (cur[0] == '/') {
1612 cur++;
1613 ret = xmlParseURIPathSegments(uri, &cur, 1);
1614 } else if (cur[0] != '#' && cur[0] != '?') {
1615 ret = xmlParseURIRelSegment(uri, &cur);
1616 if (ret != 0)
1617 return(ret);
1618 if (cur[0] == '/') {
1619 cur++;
1620 ret = xmlParseURIPathSegments(uri, &cur, 1);
1621 }
1622 }
1623 if (ret != 0)
1624 return(ret);
1625 if (*cur == '?') {
1626 cur++;
1627 ret = xmlParseURIQuery(uri, &cur);
1628 if (ret != 0)
1629 return(ret);
1630 }
1631 *str = cur;
1632 return(ret);
1633}
1634
1635/**
1636 * xmlParseURIReference:
1637 * @uri: pointer to an URI structure
1638 * @str: the string to analyze
1639 *
1640 * Parse an URI reference string and fills in the appropriate fields
1641 * of the @uri structure
1642 *
1643 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1644 *
1645 * Returns 0 or the error code
1646 */
1647int
1648xmlParseURIReference(xmlURIPtr uri, const char *str) {
1649 int ret;
1650 const char *tmp = str;
1651
1652 if (str == NULL)
1653 return(-1);
1654 xmlCleanURI(uri);
1655
1656 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001657 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001658 * it fails.
1659 */
1660 ret = xmlParseAbsoluteURI(uri, &str);
1661 if (ret != 0) {
1662 xmlCleanURI(uri);
1663 str = tmp;
1664 ret = xmlParseRelativeURI(uri, &str);
1665 }
1666 if (ret != 0) {
1667 xmlCleanURI(uri);
1668 return(ret);
1669 }
1670
1671 if (*str == '#') {
1672 str++;
1673 ret = xmlParseURIFragment(uri, &str);
1674 if (ret != 0) return(ret);
1675 }
1676 if (*str != 0) {
1677 xmlCleanURI(uri);
1678 return(1);
1679 }
1680 return(0);
1681}
1682
1683/**
1684 * xmlParseURI:
1685 * @str: the URI string to analyze
1686 *
1687 * Parse an URI
1688 *
1689 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1690 *
1691 * Returns a newly build xmlURIPtr or NULL in case of error
1692 */
1693xmlURIPtr
1694xmlParseURI(const char *str) {
1695 xmlURIPtr uri;
1696 int ret;
1697
1698 if (str == NULL)
1699 return(NULL);
1700 uri = xmlCreateURI();
1701 if (uri != NULL) {
1702 ret = xmlParseURIReference(uri, str);
1703 if (ret) {
1704 xmlFreeURI(uri);
1705 return(NULL);
1706 }
1707 }
1708 return(uri);
1709}
1710
1711/************************************************************************
1712 * *
1713 * Public functions *
1714 * *
1715 ************************************************************************/
1716
1717/**
1718 * xmlBuildURI:
1719 * @URI: the URI instance found in the document
1720 * @base: the base value
1721 *
1722 * Computes he final URI of the reference done by checking that
1723 * the given URI is valid, and building the final URI using the
1724 * base URI. This is processed according to section 5.2 of the
1725 * RFC 2396
1726 *
1727 * 5.2. Resolving Relative References to Absolute Form
1728 *
1729 * Returns a new URI string (to be freed by the caller) or NULL in case
1730 * of error.
1731 */
1732xmlChar *
1733xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1734 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001735 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001736 xmlURIPtr ref = NULL;
1737 xmlURIPtr bas = NULL;
1738 xmlURIPtr res = NULL;
1739
1740 /*
1741 * 1) The URI reference is parsed into the potential four components and
1742 * fragment identifier, as described in Section 4.3.
1743 *
1744 * NOTE that a completely empty URI is treated by modern browsers
1745 * as a reference to "." rather than as a synonym for the current
1746 * URI. Should we do that here?
1747 */
1748 if (URI == NULL)
1749 ret = -1;
1750 else {
1751 if (*URI) {
1752 ref = xmlCreateURI();
1753 if (ref == NULL)
1754 goto done;
1755 ret = xmlParseURIReference(ref, (const char *) URI);
1756 }
1757 else
1758 ret = 0;
1759 }
1760 if (ret != 0)
1761 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001762 if ((ref != NULL) && (ref->scheme != NULL)) {
1763 /*
1764 * The URI is absolute don't modify.
1765 */
1766 val = xmlStrdup(URI);
1767 goto done;
1768 }
Owen Taylor3473f882001-02-23 17:55:21 +00001769 if (base == NULL)
1770 ret = -1;
1771 else {
1772 bas = xmlCreateURI();
1773 if (bas == NULL)
1774 goto done;
1775 ret = xmlParseURIReference(bas, (const char *) base);
1776 }
1777 if (ret != 0) {
1778 if (ref)
1779 val = xmlSaveUri(ref);
1780 goto done;
1781 }
1782 if (ref == NULL) {
1783 /*
1784 * the base fragment must be ignored
1785 */
1786 if (bas->fragment != NULL) {
1787 xmlFree(bas->fragment);
1788 bas->fragment = NULL;
1789 }
1790 val = xmlSaveUri(bas);
1791 goto done;
1792 }
1793
1794 /*
1795 * 2) If the path component is empty and the scheme, authority, and
1796 * query components are undefined, then it is a reference to the
1797 * current document and we are done. Otherwise, the reference URI's
1798 * query and fragment components are defined as found (or not found)
1799 * within the URI reference and not inherited from the base URI.
1800 *
1801 * NOTE that in modern browsers, the parsing differs from the above
1802 * in the following aspect: the query component is allowed to be
1803 * defined while still treating this as a reference to the current
1804 * document.
1805 */
1806 res = xmlCreateURI();
1807 if (res == NULL)
1808 goto done;
1809 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1810 ((ref->authority == NULL) && (ref->server == NULL))) {
1811 if (bas->scheme != NULL)
1812 res->scheme = xmlMemStrdup(bas->scheme);
1813 if (bas->authority != NULL)
1814 res->authority = xmlMemStrdup(bas->authority);
1815 else if (bas->server != NULL) {
1816 res->server = xmlMemStrdup(bas->server);
1817 if (bas->user != NULL)
1818 res->user = xmlMemStrdup(bas->user);
1819 res->port = bas->port;
1820 }
1821 if (bas->path != NULL)
1822 res->path = xmlMemStrdup(bas->path);
1823 if (ref->query != NULL)
1824 res->query = xmlMemStrdup(ref->query);
1825 else if (bas->query != NULL)
1826 res->query = xmlMemStrdup(bas->query);
1827 if (ref->fragment != NULL)
1828 res->fragment = xmlMemStrdup(ref->fragment);
1829 goto step_7;
1830 }
Owen Taylor3473f882001-02-23 17:55:21 +00001831
1832 /*
1833 * 3) If the scheme component is defined, indicating that the reference
1834 * starts with a scheme name, then the reference is interpreted as an
1835 * absolute URI and we are done. Otherwise, the reference URI's
1836 * scheme is inherited from the base URI's scheme component.
1837 */
1838 if (ref->scheme != NULL) {
1839 val = xmlSaveUri(ref);
1840 goto done;
1841 }
1842 if (bas->scheme != NULL)
1843 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001844
1845 if (ref->query != NULL)
1846 res->query = xmlMemStrdup(ref->query);
1847 if (ref->fragment != NULL)
1848 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001849
1850 /*
1851 * 4) If the authority component is defined, then the reference is a
1852 * network-path and we skip to step 7. Otherwise, the reference
1853 * URI's authority is inherited from the base URI's authority
1854 * component, which will also be undefined if the URI scheme does not
1855 * use an authority component.
1856 */
1857 if ((ref->authority != NULL) || (ref->server != NULL)) {
1858 if (ref->authority != NULL)
1859 res->authority = xmlMemStrdup(ref->authority);
1860 else {
1861 res->server = xmlMemStrdup(ref->server);
1862 if (ref->user != NULL)
1863 res->user = xmlMemStrdup(ref->user);
1864 res->port = ref->port;
1865 }
1866 if (ref->path != NULL)
1867 res->path = xmlMemStrdup(ref->path);
1868 goto step_7;
1869 }
1870 if (bas->authority != NULL)
1871 res->authority = xmlMemStrdup(bas->authority);
1872 else if (bas->server != NULL) {
1873 res->server = xmlMemStrdup(bas->server);
1874 if (bas->user != NULL)
1875 res->user = xmlMemStrdup(bas->user);
1876 res->port = bas->port;
1877 }
1878
1879 /*
1880 * 5) If the path component begins with a slash character ("/"), then
1881 * the reference is an absolute-path and we skip to step 7.
1882 */
1883 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1884 res->path = xmlMemStrdup(ref->path);
1885 goto step_7;
1886 }
1887
1888
1889 /*
1890 * 6) If this step is reached, then we are resolving a relative-path
1891 * reference. The relative path needs to be merged with the base
1892 * URI's path. Although there are many ways to do this, we will
1893 * describe a simple method using a separate string buffer.
1894 *
1895 * Allocate a buffer large enough for the result string.
1896 */
1897 len = 2; /* extra / and 0 */
1898 if (ref->path != NULL)
1899 len += strlen(ref->path);
1900 if (bas->path != NULL)
1901 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001902 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 if (res->path == NULL) {
1904 xmlGenericError(xmlGenericErrorContext,
1905 "xmlBuildURI: out of memory\n");
1906 goto done;
1907 }
1908 res->path[0] = 0;
1909
1910 /*
1911 * a) All but the last segment of the base URI's path component is
1912 * copied to the buffer. In other words, any characters after the
1913 * last (right-most) slash character, if any, are excluded.
1914 */
1915 cur = 0;
1916 out = 0;
1917 if (bas->path != NULL) {
1918 while (bas->path[cur] != 0) {
1919 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1920 cur++;
1921 if (bas->path[cur] == 0)
1922 break;
1923
1924 cur++;
1925 while (out < cur) {
1926 res->path[out] = bas->path[out];
1927 out++;
1928 }
1929 }
1930 }
1931 res->path[out] = 0;
1932
1933 /*
1934 * b) The reference's path component is appended to the buffer
1935 * string.
1936 */
1937 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001938 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001939 /*
1940 * Ensure the path includes a '/'
1941 */
1942 if ((out == 0) && (bas->server != NULL))
1943 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001944 while (ref->path[indx] != 0) {
1945 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00001946 }
1947 }
1948 res->path[out] = 0;
1949
1950 /*
1951 * Steps c) to h) are really path normalization steps
1952 */
1953 xmlNormalizeURIPath(res->path);
1954
1955step_7:
1956
1957 /*
1958 * 7) The resulting URI components, including any inherited from the
1959 * base URI, are recombined to give the absolute form of the URI
1960 * reference.
1961 */
1962 val = xmlSaveUri(res);
1963
1964done:
1965 if (ref != NULL)
1966 xmlFreeURI(ref);
1967 if (bas != NULL)
1968 xmlFreeURI(bas);
1969 if (res != NULL)
1970 xmlFreeURI(res);
1971 return(val);
1972}
1973
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00001974/**
1975 * xmlCanonicPath:
1976 * @path: the resource locator in a filesystem notation
1977 *
1978 * Constructs a canonic path from the specified path.
1979 *
1980 * Returns a new canonic path, or a duplicate of the path parameter if the
1981 * construction fails. The caller is responsible for freeing the memory occupied
1982 * by the returned string. If there is insufficient memory available, or the
1983 * argument is NULL, the function returns NULL.
1984 */
1985#define IS_WINDOWS_PATH(p) \
1986 ((p != NULL) && \
1987 (((p[0] >= 'a') && (p[0] <= 'z')) || \
1988 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
1989 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
1990xmlChar*
1991xmlCanonicPath(const xmlChar *path)
1992{
Daniel Veillardc64b8e92003-02-24 11:47:13 +00001993#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00001994 int len = 0;
1995 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00001996 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00001997#endif
1998 xmlChar *ret;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00001999 xmlURIPtr uri;
2000
2001 if (path == NULL)
2002 return(NULL);
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002003 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002004 xmlFreeURI(uri);
2005 return xmlStrdup(path);
2006 }
2007
2008 uri = xmlCreateURI();
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002009 if (uri == NULL) {
2010 return(NULL);
2011 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002012
Igor Zlatkovicce076162003-02-23 13:39:39 +00002013#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002014 len = xmlStrlen(path);
2015 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2016 uri->scheme = xmlStrdup(BAD_CAST "file");
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002017 uri->path = xmlMallocAtomic(len + 2);
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002018 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002019 p = uri->path + 1;
2020 strncpy(p, path, len + 1);
2021 } else {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002022 uri->path = xmlStrdup(path);
Igor Zlatkovicce076162003-02-23 13:39:39 +00002023 p = uri->path;
2024 }
2025 while (*p != '\0') {
2026 if (*p == '\\')
2027 *p = '/';
2028 p++;
2029 }
2030#else
Daniel Veillard42f12e92003-03-07 18:32:59 +00002031 uri->path = (char *) xmlStrdup((const xmlChar *) path);
Igor Zlatkovicce076162003-02-23 13:39:39 +00002032#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002033
2034 ret = xmlSaveUri(uri);
2035 xmlFreeURI(uri);
2036 return(ret);
2037}
Owen Taylor3473f882001-02-23 17:55:21 +00002038