blob: b95e091b09a0d0454ce2f929c023c93414a8f4d4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
William M. Brack015ccb22005-02-13 08:18:52 +00004 * Reference: RFCs 2396, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
William M. Brack015ccb22005-02-13 08:18:52 +000081 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
Owen Taylor3473f882001-02-23 17:55:21 +000083 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
William M. Brack015ccb22005-02-13 08:18:52 +000087 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000089
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163/*
William M. Brack015ccb22005-02-13 08:18:52 +0000164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000171
172/*
Owen Taylor3473f882001-02-23 17:55:21 +0000173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
Daniel Veillard336a8e12005-08-07 10:46:19 +0000188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/************************************************************************
191 * *
192 * Generic URI structure functions *
193 * *
194 ************************************************************************/
195
196/**
197 * xmlCreateURI:
198 *
199 * Simply creates an empty xmlURI
200 *
201 * Returns the new structure or NULL in case of error
202 */
203xmlURIPtr
204xmlCreateURI(void) {
205 xmlURIPtr ret;
206
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208 if (ret == NULL) {
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
211 return(NULL);
212 }
213 memset(ret, 0, sizeof(xmlURI));
214 return(ret);
215}
216
217/**
218 * xmlSaveUri:
219 * @uri: pointer to an xmlURI
220 *
221 * Save the URI as an escaped string
222 *
223 * Returns a new string (to be deallocated by caller)
224 */
225xmlChar *
226xmlSaveUri(xmlURIPtr uri) {
227 xmlChar *ret = NULL;
228 const char *p;
229 int len;
230 int max;
231
232 if (uri == NULL) return(NULL);
233
234
235 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 len = 0;
243
244 if (uri->scheme != NULL) {
245 p = uri->scheme;
246 while (*p != 0) {
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = *p++;
257 }
258 if (len >= max) {
259 max *= 2;
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
261 if (ret == NULL) {
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
264 return(NULL);
265 }
266 }
267 ret[len++] = ':';
268 }
269 if (uri->opaque != NULL) {
270 p = uri->opaque;
271 while (*p != 0) {
272 if (len + 3 >= max) {
273 max *= 2;
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
275 if (ret == NULL) {
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
278 return(NULL);
279 }
280 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000282 ret[len++] = *p++;
283 else {
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
286 ret[len++] = '%';
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289 }
290 }
Owen Taylor3473f882001-02-23 17:55:21 +0000291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
429 if (ret == NULL) {
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
432 return(NULL);
433 }
434 }
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
438 ((*(p) == ',')))
439 ret[len++] = *p++;
440 else {
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
443 ret[len++] = '%';
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446 }
447 }
448 }
449 if (uri->query != NULL) {
450 if (len + 3 >= max) {
451 max *= 2;
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
454 if (ret == NULL) {
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
457 return(NULL);
458 }
459 }
460 ret[len++] = '?';
461 p = uri->query;
462 while (*p != 0) {
463 if (len + 3 >= max) {
464 max *= 2;
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
467 if (ret == NULL) {
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
470 return(NULL);
471 }
472 }
473 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
474 ret[len++] = *p++;
475 else {
476 int val = *(unsigned char *)p++;
477 int hi = val / 0x10, lo = val % 0x10;
478 ret[len++] = '%';
479 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
480 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
481 }
482 }
483 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000484 }
485 if (uri->fragment != NULL) {
486 if (len + 3 >= max) {
487 max *= 2;
488 ret = (xmlChar *) xmlRealloc(ret,
489 (max + 1) * sizeof(xmlChar));
490 if (ret == NULL) {
491 xmlGenericError(xmlGenericErrorContext,
492 "xmlSaveUri: out of memory\n");
493 return(NULL);
494 }
495 }
496 ret[len++] = '#';
497 p = uri->fragment;
498 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 if (len + 3 >= max) {
500 max *= 2;
501 ret = (xmlChar *) xmlRealloc(ret,
502 (max + 1) * sizeof(xmlChar));
503 if (ret == NULL) {
504 xmlGenericError(xmlGenericErrorContext,
505 "xmlSaveUri: out of memory\n");
506 return(NULL);
507 }
508 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000509 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
510 ret[len++] = *p++;
511 else {
512 int val = *(unsigned char *)p++;
513 int hi = val / 0x10, lo = val % 0x10;
514 ret[len++] = '%';
515 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
516 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000517 }
518 }
Owen Taylor3473f882001-02-23 17:55:21 +0000519 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000520 if (len >= max) {
521 max *= 2;
522 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
523 if (ret == NULL) {
524 xmlGenericError(xmlGenericErrorContext,
525 "xmlSaveUri: out of memory\n");
526 return(NULL);
527 }
528 }
529 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000530 return(ret);
531}
532
533/**
534 * xmlPrintURI:
535 * @stream: a FILE* for the output
536 * @uri: pointer to an xmlURI
537 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000538 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000539 */
540void
541xmlPrintURI(FILE *stream, xmlURIPtr uri) {
542 xmlChar *out;
543
544 out = xmlSaveUri(uri);
545 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000546 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000547 xmlFree(out);
548 }
549}
550
551/**
552 * xmlCleanURI:
553 * @uri: pointer to an xmlURI
554 *
555 * Make sure the xmlURI struct is free of content
556 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000557static void
Owen Taylor3473f882001-02-23 17:55:21 +0000558xmlCleanURI(xmlURIPtr uri) {
559 if (uri == NULL) return;
560
561 if (uri->scheme != NULL) xmlFree(uri->scheme);
562 uri->scheme = NULL;
563 if (uri->server != NULL) xmlFree(uri->server);
564 uri->server = NULL;
565 if (uri->user != NULL) xmlFree(uri->user);
566 uri->user = NULL;
567 if (uri->path != NULL) xmlFree(uri->path);
568 uri->path = NULL;
569 if (uri->fragment != NULL) xmlFree(uri->fragment);
570 uri->fragment = NULL;
571 if (uri->opaque != NULL) xmlFree(uri->opaque);
572 uri->opaque = NULL;
573 if (uri->authority != NULL) xmlFree(uri->authority);
574 uri->authority = NULL;
575 if (uri->query != NULL) xmlFree(uri->query);
576 uri->query = NULL;
577}
578
579/**
580 * xmlFreeURI:
581 * @uri: pointer to an xmlURI
582 *
583 * Free up the xmlURI struct
584 */
585void
586xmlFreeURI(xmlURIPtr uri) {
587 if (uri == NULL) return;
588
589 if (uri->scheme != NULL) xmlFree(uri->scheme);
590 if (uri->server != NULL) xmlFree(uri->server);
591 if (uri->user != NULL) xmlFree(uri->user);
592 if (uri->path != NULL) xmlFree(uri->path);
593 if (uri->fragment != NULL) xmlFree(uri->fragment);
594 if (uri->opaque != NULL) xmlFree(uri->opaque);
595 if (uri->authority != NULL) xmlFree(uri->authority);
596 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000597 xmlFree(uri);
598}
599
600/************************************************************************
601 * *
602 * Helper functions *
603 * *
604 ************************************************************************/
605
Owen Taylor3473f882001-02-23 17:55:21 +0000606/**
607 * xmlNormalizeURIPath:
608 * @path: pointer to the path string
609 *
610 * Applies the 5 normalization steps to a path string--that is, RFC 2396
611 * Section 5.2, steps 6.c through 6.g.
612 *
613 * Normalization occurs directly on the string, no new allocation is done
614 *
615 * Returns 0 or an error code
616 */
617int
618xmlNormalizeURIPath(char *path) {
619 char *cur, *out;
620
621 if (path == NULL)
622 return(-1);
623
624 /* Skip all initial "/" chars. We want to get to the beginning of the
625 * first non-empty segment.
626 */
627 cur = path;
628 while (cur[0] == '/')
629 ++cur;
630 if (cur[0] == '\0')
631 return(0);
632
633 /* Keep everything we've seen so far. */
634 out = cur;
635
636 /*
637 * Analyze each segment in sequence for cases (c) and (d).
638 */
639 while (cur[0] != '\0') {
640 /*
641 * c) All occurrences of "./", where "." is a complete path segment,
642 * are removed from the buffer string.
643 */
644 if ((cur[0] == '.') && (cur[1] == '/')) {
645 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000646 /* '//' normalization should be done at this point too */
647 while (cur[0] == '/')
648 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000649 continue;
650 }
651
652 /*
653 * d) If the buffer string ends with "." as a complete path segment,
654 * that "." is removed.
655 */
656 if ((cur[0] == '.') && (cur[1] == '\0'))
657 break;
658
659 /* Otherwise keep the segment. */
660 while (cur[0] != '/') {
661 if (cur[0] == '\0')
662 goto done_cd;
663 (out++)[0] = (cur++)[0];
664 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000665 /* nomalize // */
666 while ((cur[0] == '/') && (cur[1] == '/'))
667 cur++;
668
Owen Taylor3473f882001-02-23 17:55:21 +0000669 (out++)[0] = (cur++)[0];
670 }
671 done_cd:
672 out[0] = '\0';
673
674 /* Reset to the beginning of the first segment for the next sequence. */
675 cur = path;
676 while (cur[0] == '/')
677 ++cur;
678 if (cur[0] == '\0')
679 return(0);
680
681 /*
682 * Analyze each segment in sequence for cases (e) and (f).
683 *
684 * e) All occurrences of "<segment>/../", where <segment> is a
685 * complete path segment not equal to "..", are removed from the
686 * buffer string. Removal of these path segments is performed
687 * iteratively, removing the leftmost matching pattern on each
688 * iteration, until no matching pattern remains.
689 *
690 * f) If the buffer string ends with "<segment>/..", where <segment>
691 * is a complete path segment not equal to "..", that
692 * "<segment>/.." is removed.
693 *
694 * To satisfy the "iterative" clause in (e), we need to collapse the
695 * string every time we find something that needs to be removed. Thus,
696 * we don't need to keep two pointers into the string: we only need a
697 * "current position" pointer.
698 */
699 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000700 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000701
702 /* At the beginning of each iteration of this loop, "cur" points to
703 * the first character of the segment we want to examine.
704 */
705
706 /* Find the end of the current segment. */
707 segp = cur;
708 while ((segp[0] != '/') && (segp[0] != '\0'))
709 ++segp;
710
711 /* If this is the last segment, we're done (we need at least two
712 * segments to meet the criteria for the (e) and (f) cases).
713 */
714 if (segp[0] == '\0')
715 break;
716
717 /* If the first segment is "..", or if the next segment _isn't_ "..",
718 * keep this segment and try the next one.
719 */
720 ++segp;
721 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
722 || ((segp[0] != '.') || (segp[1] != '.')
723 || ((segp[2] != '/') && (segp[2] != '\0')))) {
724 cur = segp;
725 continue;
726 }
727
728 /* If we get here, remove this segment and the next one and back up
729 * to the previous segment (if there is one), to implement the
730 * "iteratively" clause. It's pretty much impossible to back up
731 * while maintaining two pointers into the buffer, so just compact
732 * the whole buffer now.
733 */
734
735 /* If this is the end of the buffer, we're done. */
736 if (segp[2] == '\0') {
737 cur[0] = '\0';
738 break;
739 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000740 /* Valgrind complained, strcpy(cur, segp + 3); */
741 /* string will overlap, do not use strcpy */
742 tmp = cur;
743 segp += 3;
744 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000745
746 /* If there are no previous segments, then keep going from here. */
747 segp = cur;
748 while ((segp > path) && ((--segp)[0] == '/'))
749 ;
750 if (segp == path)
751 continue;
752
753 /* "segp" is pointing to the end of a previous segment; find it's
754 * start. We need to back up to the previous segment and start
755 * over with that to handle things like "foo/bar/../..". If we
756 * don't do this, then on the first pass we'll remove the "bar/..",
757 * but be pointing at the second ".." so we won't realize we can also
758 * remove the "foo/..".
759 */
760 cur = segp;
761 while ((cur > path) && (cur[-1] != '/'))
762 --cur;
763 }
764 out[0] = '\0';
765
766 /*
767 * g) If the resulting buffer string still begins with one or more
768 * complete path segments of "..", then the reference is
769 * considered to be in error. Implementations may handle this
770 * error by retaining these components in the resolved path (i.e.,
771 * treating them as part of the final URI), by removing them from
772 * the resolved path (i.e., discarding relative levels above the
773 * root), or by avoiding traversal of the reference.
774 *
775 * We discard them from the final path.
776 */
777 if (path[0] == '/') {
778 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000779 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000780 && ((cur[3] == '/') || (cur[3] == '\0')))
781 cur += 3;
782
783 if (cur != path) {
784 out = path;
785 while (cur[0] != '\0')
786 (out++)[0] = (cur++)[0];
787 out[0] = 0;
788 }
789 }
790
791 return(0);
792}
Owen Taylor3473f882001-02-23 17:55:21 +0000793
Daniel Veillard966a31e2004-05-09 02:58:44 +0000794static int is_hex(char c) {
795 if (((c >= '0') && (c <= '9')) ||
796 ((c >= 'a') && (c <= 'f')) ||
797 ((c >= 'A') && (c <= 'F')))
798 return(1);
799 return(0);
800}
801
Owen Taylor3473f882001-02-23 17:55:21 +0000802/**
803 * xmlURIUnescapeString:
804 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000805 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000806 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000807 *
808 * Unescaping routine, does not do validity checks !
809 * Output is direct unsigned char translation of %XX values (no encoding)
810 *
811 * Returns an copy of the string, but unescaped
812 */
813char *
814xmlURIUnescapeString(const char *str, int len, char *target) {
815 char *ret, *out;
816 const char *in;
817
818 if (str == NULL)
819 return(NULL);
820 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000821 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000822
823 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000824 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000825 if (ret == NULL) {
826 xmlGenericError(xmlGenericErrorContext,
827 "xmlURIUnescapeString: out of memory\n");
828 return(NULL);
829 }
830 } else
831 ret = target;
832 in = str;
833 out = ret;
834 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000835 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000836 in++;
837 if ((*in >= '0') && (*in <= '9'))
838 *out = (*in - '0');
839 else if ((*in >= 'a') && (*in <= 'f'))
840 *out = (*in - 'a') + 10;
841 else if ((*in >= 'A') && (*in <= 'F'))
842 *out = (*in - 'A') + 10;
843 in++;
844 if ((*in >= '0') && (*in <= '9'))
845 *out = *out * 16 + (*in - '0');
846 else if ((*in >= 'a') && (*in <= 'f'))
847 *out = *out * 16 + (*in - 'a') + 10;
848 else if ((*in >= 'A') && (*in <= 'F'))
849 *out = *out * 16 + (*in - 'A') + 10;
850 in++;
851 len -= 3;
852 out++;
853 } else {
854 *out++ = *in++;
855 len--;
856 }
857 }
858 *out = 0;
859 return(ret);
860}
861
862/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000863 * xmlURIEscapeStr:
864 * @str: string to escape
865 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000866 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000867 * This routine escapes a string to hex, ignoring reserved characters (a-z)
868 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000869 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000870 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000871 */
872xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000873xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
874 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000875 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000876
Owen Taylor3473f882001-02-23 17:55:21 +0000877 unsigned int len, out;
878
879 if (str == NULL)
880 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000881 if (str[0] == 0)
882 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000883 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000884 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000885
886 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000887 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000888 if (ret == NULL) {
889 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000890 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000891 return(NULL);
892 }
893 in = (const xmlChar *) str;
894 out = 0;
895 while(*in != 0) {
896 if (len - out <= 3) {
897 len += 20;
898 ret = (xmlChar *) xmlRealloc(ret, len);
899 if (ret == NULL) {
900 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000901 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000902 return(NULL);
903 }
904 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000905
906 ch = *in;
907
Daniel Veillardeb475a32002-04-14 22:00:22 +0000908 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000909 unsigned char val;
910 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000911 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000912 if (val <= 9)
913 ret[out++] = '0' + val;
914 else
915 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000916 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 if (val <= 9)
918 ret[out++] = '0' + val;
919 else
920 ret[out++] = 'A' + val - 0xA;
921 in++;
922 } else {
923 ret[out++] = *in++;
924 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000925
Owen Taylor3473f882001-02-23 17:55:21 +0000926 }
927 ret[out] = 0;
928 return(ret);
929}
930
Daniel Veillard8514c672001-05-23 10:29:12 +0000931/**
932 * xmlURIEscape:
933 * @str: the string of the URI to escape
934 *
935 * Escaping routine, does not do validity checks !
936 * It will try to escape the chars needing this, but this is heuristic
937 * based it's impossible to be sure.
938 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000939 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000940 *
941 * 25 May 2001
942 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
943 * according to RFC2396.
944 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000945 */
946xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000947xmlURIEscape(const xmlChar * str)
948{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000949 xmlChar *ret, *segment = NULL;
950 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000951 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000952
Daniel Veillard6278fb52001-05-25 07:38:41 +0000953#define NULLCHK(p) if(!p) { \
954 xmlGenericError(xmlGenericErrorContext, \
955 "xmlURIEscape: out of memory\n"); \
956 return NULL; }
957
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000958 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000959 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000960
961 uri = xmlCreateURI();
962 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000963 /*
964 * Allow escaping errors in the unescaped form
965 */
966 uri->cleanup = 1;
967 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000968 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000969 xmlFreeURI(uri);
970 return (NULL);
971 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000972 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000973
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000974 if (!uri)
975 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000976
977 ret = NULL;
978
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000979 if (uri->scheme) {
980 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
981 NULLCHK(segment)
982 ret = xmlStrcat(ret, segment);
983 ret = xmlStrcat(ret, BAD_CAST ":");
984 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000985 }
986
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000987 if (uri->authority) {
988 segment =
989 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
990 NULLCHK(segment)
991 ret = xmlStrcat(ret, BAD_CAST "//");
992 ret = xmlStrcat(ret, segment);
993 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000994 }
995
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000996 if (uri->user) {
997 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
998 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +0000999 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001000 ret = xmlStrcat(ret, segment);
1001 ret = xmlStrcat(ret, BAD_CAST "@");
1002 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001003 }
1004
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001005 if (uri->server) {
1006 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1007 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001008 if (uri->user == NULL)
1009 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001010 ret = xmlStrcat(ret, segment);
1011 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001012 }
1013
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001014 if (uri->port) {
1015 xmlChar port[10];
1016
Daniel Veillard43d3f612001-11-10 11:57:23 +00001017 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001018 ret = xmlStrcat(ret, BAD_CAST ":");
1019 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001020 }
1021
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001022 if (uri->path) {
1023 segment =
1024 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1025 NULLCHK(segment)
1026 ret = xmlStrcat(ret, segment);
1027 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001028 }
1029
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001030 if (uri->query) {
1031 segment =
1032 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1033 NULLCHK(segment)
1034 ret = xmlStrcat(ret, BAD_CAST "?");
1035 ret = xmlStrcat(ret, segment);
1036 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001037 }
1038
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001039 if (uri->opaque) {
1040 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1041 NULLCHK(segment)
1042 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001043 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001044 }
1045
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001046 if (uri->fragment) {
1047 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1048 NULLCHK(segment)
1049 ret = xmlStrcat(ret, BAD_CAST "#");
1050 ret = xmlStrcat(ret, segment);
1051 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001052 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001053
1054 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001055#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001056
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001057 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001058}
1059
Owen Taylor3473f882001-02-23 17:55:21 +00001060/************************************************************************
1061 * *
1062 * Escaped URI parsing *
1063 * *
1064 ************************************************************************/
1065
1066/**
1067 * xmlParseURIFragment:
1068 * @uri: pointer to an URI structure
1069 * @str: pointer to the string to analyze
1070 *
1071 * Parse an URI fragment string and fills in the appropriate fields
1072 * of the @uri structure.
1073 *
1074 * fragment = *uric
1075 *
1076 * Returns 0 or the error code
1077 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001078static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001079xmlParseURIFragment(xmlURIPtr uri, const char **str)
1080{
Daniel Veillard30e76072006-03-09 14:13:55 +00001081 const char *cur;
1082
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001083 if (str == NULL)
1084 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001085
Daniel Veillard30e76072006-03-09 14:13:55 +00001086 cur = *str;
1087
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001088 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001089 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001090 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001091 if (uri->fragment != NULL)
1092 xmlFree(uri->fragment);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001093 if (uri->cleanup & 2)
1094 uri->fragment = STRNDUP(*str, cur - *str);
1095 else
1096 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001097 }
1098 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001099 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001100}
1101
1102/**
1103 * xmlParseURIQuery:
1104 * @uri: pointer to an URI structure
1105 * @str: pointer to the string to analyze
1106 *
1107 * Parse the query part of an URI
1108 *
1109 * query = *uric
1110 *
1111 * Returns 0 or the error code
1112 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001113static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001114xmlParseURIQuery(xmlURIPtr uri, const char **str)
1115{
Daniel Veillard30e76072006-03-09 14:13:55 +00001116 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001117
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001118 if (str == NULL)
1119 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001120
Daniel Veillard30e76072006-03-09 14:13:55 +00001121 cur = *str;
1122
Daniel Veillard336a8e12005-08-07 10:46:19 +00001123 while ((IS_URIC(cur)) ||
1124 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001125 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001126 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001127 if (uri->query != NULL)
1128 xmlFree(uri->query);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001129 if (uri->cleanup & 2)
1130 uri->query = STRNDUP(*str, cur - *str);
1131 else
1132 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001133 }
1134 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001135 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001136}
1137
1138/**
1139 * xmlParseURIScheme:
1140 * @uri: pointer to an URI structure
1141 * @str: pointer to the string to analyze
1142 *
1143 * Parse an URI scheme
1144 *
1145 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1146 *
1147 * Returns 0 or the error code
1148 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001149static int
Owen Taylor3473f882001-02-23 17:55:21 +00001150xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1151 const char *cur;
1152
1153 if (str == NULL)
1154 return(-1);
1155
1156 cur = *str;
1157 if (!IS_ALPHA(*cur))
1158 return(2);
1159 cur++;
1160 while (IS_SCHEME(*cur)) cur++;
1161 if (uri != NULL) {
1162 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001163 uri->scheme = STRNDUP(*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001164 }
1165 *str = cur;
1166 return(0);
1167}
1168
1169/**
1170 * xmlParseURIOpaquePart:
1171 * @uri: pointer to an URI structure
1172 * @str: pointer to the string to analyze
1173 *
1174 * Parse an URI opaque part
1175 *
1176 * opaque_part = uric_no_slash *uric
1177 *
1178 * Returns 0 or the error code
1179 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001180static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001181xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1182{
Owen Taylor3473f882001-02-23 17:55:21 +00001183 const char *cur;
1184
1185 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001186 return (-1);
1187
Owen Taylor3473f882001-02-23 17:55:21 +00001188 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001189 if (!((IS_URIC_NO_SLASH(cur)) ||
1190 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001191 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001192 }
1193 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001194 while ((IS_URIC(cur)) ||
1195 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001196 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001197 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001198 if (uri->opaque != NULL)
1199 xmlFree(uri->opaque);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001200 if (uri->cleanup & 2)
1201 uri->opaque = STRNDUP(*str, cur - *str);
1202 else
1203 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001204 }
1205 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001206 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001207}
1208
1209/**
1210 * xmlParseURIServer:
1211 * @uri: pointer to an URI structure
1212 * @str: pointer to the string to analyze
1213 *
1214 * Parse a server subpart of an URI, it's a finer grain analysis
1215 * of the authority part.
1216 *
1217 * server = [ [ userinfo "@" ] hostport ]
1218 * userinfo = *( unreserved | escaped |
1219 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1220 * hostport = host [ ":" port ]
William M. Brack015ccb22005-02-13 08:18:52 +00001221 * host = hostname | IPv4address | IPv6reference
Owen Taylor3473f882001-02-23 17:55:21 +00001222 * hostname = *( domainlabel "." ) toplabel [ "." ]
1223 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1224 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
William M. Brack015ccb22005-02-13 08:18:52 +00001225 * IPv6reference = "[" IPv6address "]"
1226 * IPv6address = hexpart [ ":" IPv4address ]
1227 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1228 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1229 * hexseq = hex4 *( ":" hex4)
1230 * hex4 = 1*4hexdig
Owen Taylor3473f882001-02-23 17:55:21 +00001231 * port = *digit
1232 *
1233 * Returns 0 or the error code
1234 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001235static int
Owen Taylor3473f882001-02-23 17:55:21 +00001236xmlParseURIServer(xmlURIPtr uri, const char **str) {
1237 const char *cur;
1238 const char *host, *tmp;
William M. Brack015ccb22005-02-13 08:18:52 +00001239 const int IPV4max = 4;
1240 const int IPV6max = 8;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001241 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001242
1243 if (str == NULL)
1244 return(-1);
1245
1246 cur = *str;
1247
1248 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001249 * is there a userinfo ?
Owen Taylor3473f882001-02-23 17:55:21 +00001250 */
1251 while (IS_USERINFO(cur)) NEXT(cur);
1252 if (*cur == '@') {
1253 if (uri != NULL) {
1254 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001255 if (uri->cleanup & 2)
1256 uri->path = STRNDUP(*str, cur - *str);
1257 else
1258 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260 cur++;
1261 } else {
1262 if (uri != NULL) {
1263 if (uri->user != NULL) xmlFree(uri->user);
1264 uri->user = NULL;
1265 }
1266 cur = *str;
1267 }
1268 /*
1269 * This can be empty in the case where there is no server
1270 */
1271 host = cur;
1272 if (*cur == '/') {
1273 if (uri != NULL) {
1274 if (uri->authority != NULL) xmlFree(uri->authority);
1275 uri->authority = NULL;
1276 if (uri->server != NULL) xmlFree(uri->server);
1277 uri->server = NULL;
1278 uri->port = 0;
1279 }
1280 return(0);
1281 }
1282 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001283 * host part of hostport can denote an IPV4 address, an IPV6 address
1284 * or an unresolved name. Check the IP first, its easier to detect
1285 * errors if wrong one.
1286 * An IPV6 address must start with a '[' and end with a ']'.
Owen Taylor3473f882001-02-23 17:55:21 +00001287 */
William M. Brack015ccb22005-02-13 08:18:52 +00001288 if (*cur == '[') {
1289 int compress=0;
1290 cur++;
1291 for (oct = 0; oct < IPV6max; ++oct) {
1292 if (*cur == ':') {
1293 if (compress)
1294 return(3); /* multiple compression attempted */
1295 if (!oct) { /* initial char is compression */
1296 if (*++cur != ':')
1297 return(3);
1298 }
1299 compress = 1; /* set compression-encountered flag */
1300 cur++; /* skip over the second ':' */
1301 continue;
1302 }
1303 while(IS_HEX(*cur)) cur++;
1304 if (oct == (IPV6max-1))
1305 continue;
1306 if (*cur != ':')
1307 break;
1308 cur++;
1309 }
1310 if ((!compress) && (oct != IPV6max))
1311 return(3);
1312 if (*cur != ']')
1313 return(3);
1314 if (uri != NULL) {
1315 if (uri->server != NULL) xmlFree(uri->server);
1316 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1317 (cur-host)-1);
1318 }
1319 cur++;
1320 } else {
1321 /*
1322 * Not IPV6, maybe IPV4
1323 */
1324 for (oct = 0; oct < IPV4max; ++oct) {
1325 if (*cur == '.')
1326 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1327 while(IS_DIGIT(*cur)) cur++;
1328 if (oct == (IPV4max-1))
1329 continue;
1330 if (*cur != '.')
1331 break;
1332 cur++;
1333 }
Owen Taylor3473f882001-02-23 17:55:21 +00001334 }
William M. Brack015ccb22005-02-13 08:18:52 +00001335 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1336 IS_ALPHA(*cur))) {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001337 /* maybe host_name */
1338 if (!IS_ALPHANUM(*cur))
1339 return(4); /* e.g. http://xml.$oft */
1340 do {
1341 do ++cur; while (IS_ALPHANUM(*cur));
1342 if (*cur == '-') {
1343 --cur;
1344 if (*cur == '.')
1345 return(5); /* e.g. http://xml.-soft */
1346 ++cur;
1347 continue;
1348 }
1349 if (*cur == '.') {
1350 --cur;
1351 if (*cur == '-')
1352 return(6); /* e.g. http://xml-.soft */
1353 if (*cur == '.')
1354 return(7); /* e.g. http://xml..soft */
1355 ++cur;
1356 continue;
1357 }
1358 break;
1359 } while (1);
1360 tmp = cur;
1361 if (tmp[-1] == '.')
1362 --tmp; /* e.g. http://xml.$Oft/ */
1363 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1364 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1365 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001366 }
Owen Taylor3473f882001-02-23 17:55:21 +00001367 if (uri != NULL) {
1368 if (uri->authority != NULL) xmlFree(uri->authority);
1369 uri->authority = NULL;
William M. Brack015ccb22005-02-13 08:18:52 +00001370 if (host[0] != '[') { /* it's not an IPV6 addr */
1371 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001372 if (uri->cleanup & 2)
1373 uri->server = STRNDUP(host, cur - host);
1374 else
1375 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
William M. Brack015ccb22005-02-13 08:18:52 +00001376 }
Owen Taylor3473f882001-02-23 17:55:21 +00001377 }
Owen Taylor3473f882001-02-23 17:55:21 +00001378 /*
1379 * finish by checking for a port presence.
1380 */
1381 if (*cur == ':') {
1382 cur++;
1383 if (IS_DIGIT(*cur)) {
1384 if (uri != NULL)
1385 uri->port = 0;
1386 while (IS_DIGIT(*cur)) {
1387 if (uri != NULL)
1388 uri->port = uri->port * 10 + (*cur - '0');
1389 cur++;
1390 }
1391 }
1392 }
1393 *str = cur;
1394 return(0);
1395}
1396
1397/**
1398 * xmlParseURIRelSegment:
1399 * @uri: pointer to an URI structure
1400 * @str: pointer to the string to analyze
1401 *
1402 * Parse an URI relative segment
1403 *
1404 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1405 * "+" | "$" | "," )
1406 *
1407 * Returns 0 or the error code
1408 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001409static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001410xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1411{
Owen Taylor3473f882001-02-23 17:55:21 +00001412 const char *cur;
1413
1414 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001415 return (-1);
1416
Owen Taylor3473f882001-02-23 17:55:21 +00001417 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001418 if (!((IS_SEGMENT(cur)) ||
1419 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001420 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001421 }
1422 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001423 while ((IS_SEGMENT(cur)) ||
1424 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001425 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001426 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001427 if (uri->path != NULL)
1428 xmlFree(uri->path);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001429 if (uri->cleanup & 2)
1430 uri->path = STRNDUP(*str, cur - *str);
1431 else
1432 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001433 }
1434 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001435 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001436}
1437
1438/**
1439 * xmlParseURIPathSegments:
1440 * @uri: pointer to an URI structure
1441 * @str: pointer to the string to analyze
1442 * @slash: should we add a leading slash
1443 *
1444 * Parse an URI set of path segments
1445 *
1446 * path_segments = segment *( "/" segment )
1447 * segment = *pchar *( ";" param )
1448 * param = *pchar
1449 *
1450 * Returns 0 or the error code
1451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001452static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001453xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1454{
Owen Taylor3473f882001-02-23 17:55:21 +00001455 const char *cur;
1456
1457 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001458 return (-1);
1459
Owen Taylor3473f882001-02-23 17:55:21 +00001460 cur = *str;
1461
1462 do {
Daniel Veillard336a8e12005-08-07 10:46:19 +00001463 while ((IS_PCHAR(cur)) ||
1464 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001465 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001466 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001467 cur++;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001468 while ((IS_PCHAR(cur)) ||
1469 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001470 NEXT(cur);
1471 }
1472 if (*cur != '/')
1473 break;
1474 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001475 } while (1);
1476 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001477 int len, len2 = 0;
1478 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001479
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001480 /*
1481 * Concat the set of path segments to the current path
1482 */
1483 len = cur - *str;
1484 if (slash)
1485 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001486
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001487 if (uri->path != NULL) {
1488 len2 = strlen(uri->path);
1489 len += len2;
1490 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001491 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001492 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001493 xmlGenericError(xmlGenericErrorContext,
1494 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001495 *str = cur;
1496 return (-1);
1497 }
1498 if (uri->path != NULL)
1499 memcpy(path, uri->path, len2);
1500 if (slash) {
1501 path[len2] = '/';
1502 len2++;
1503 }
1504 path[len2] = 0;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001505 if (cur - *str > 0) {
1506 if (uri->cleanup & 2) {
1507 memcpy(&path[len2], *str, cur - *str);
1508 path[len2 + (cur - *str)] = 0;
1509 } else
1510 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1511 }
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001512 if (uri->path != NULL)
1513 xmlFree(uri->path);
1514 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 }
1516 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001517 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001518}
1519
1520/**
1521 * xmlParseURIAuthority:
1522 * @uri: pointer to an URI structure
1523 * @str: pointer to the string to analyze
1524 *
1525 * Parse the authority part of an URI.
1526 *
1527 * authority = server | reg_name
1528 * server = [ [ userinfo "@" ] hostport ]
1529 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1530 * "@" | "&" | "=" | "+" )
1531 *
1532 * Note : this is completely ambiguous since reg_name is allowed to
1533 * use the full set of chars in use by server:
1534 *
1535 * 3.2.1. Registry-based Naming Authority
1536 *
1537 * The structure of a registry-based naming authority is specific
1538 * to the URI scheme, but constrained to the allowed characters
1539 * for an authority component.
1540 *
1541 * Returns 0 or the error code
1542 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001543static int
Owen Taylor3473f882001-02-23 17:55:21 +00001544xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1545 const char *cur;
1546 int ret;
1547
1548 if (str == NULL)
1549 return(-1);
1550
1551 cur = *str;
1552
1553 /*
1554 * try first to parse it as a server string.
1555 */
1556 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001557 if ((ret == 0) && (*str != NULL) &&
1558 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001559 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001560 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001561
1562 /*
1563 * failed, fallback to reg_name
1564 */
1565 if (!IS_REG_NAME(cur)) {
1566 return(5);
1567 }
1568 NEXT(cur);
1569 while (IS_REG_NAME(cur)) NEXT(cur);
1570 if (uri != NULL) {
1571 if (uri->server != NULL) xmlFree(uri->server);
1572 uri->server = NULL;
1573 if (uri->user != NULL) xmlFree(uri->user);
1574 uri->user = NULL;
1575 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001576 if (uri->cleanup & 2)
1577 uri->authority = STRNDUP(*str, cur - *str);
1578 else
1579 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001580 }
1581 *str = cur;
1582 return(0);
1583}
1584
1585/**
1586 * xmlParseURIHierPart:
1587 * @uri: pointer to an URI structure
1588 * @str: pointer to the string to analyze
1589 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001590 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001591 *
1592 * hier_part = ( net_path | abs_path ) [ "?" query ]
1593 * abs_path = "/" path_segments
1594 * net_path = "//" authority [ abs_path ]
1595 *
1596 * Returns 0 or the error code
1597 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001598static int
Owen Taylor3473f882001-02-23 17:55:21 +00001599xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1600 int ret;
1601 const char *cur;
1602
1603 if (str == NULL)
1604 return(-1);
1605
1606 cur = *str;
1607
1608 if ((cur[0] == '/') && (cur[1] == '/')) {
1609 cur += 2;
1610 ret = xmlParseURIAuthority(uri, &cur);
1611 if (ret != 0)
1612 return(ret);
1613 if (cur[0] == '/') {
1614 cur++;
1615 ret = xmlParseURIPathSegments(uri, &cur, 1);
1616 }
1617 } else if (cur[0] == '/') {
1618 cur++;
1619 ret = xmlParseURIPathSegments(uri, &cur, 1);
1620 } else {
1621 return(4);
1622 }
1623 if (ret != 0)
1624 return(ret);
1625 if (*cur == '?') {
1626 cur++;
1627 ret = xmlParseURIQuery(uri, &cur);
1628 if (ret != 0)
1629 return(ret);
1630 }
1631 *str = cur;
1632 return(0);
1633}
1634
1635/**
1636 * xmlParseAbsoluteURI:
1637 * @uri: pointer to an URI structure
1638 * @str: pointer to the string to analyze
1639 *
1640 * Parse an URI reference string and fills in the appropriate fields
1641 * of the @uri structure
1642 *
1643 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1644 *
1645 * Returns 0 or the error code
1646 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001647static int
Owen Taylor3473f882001-02-23 17:55:21 +00001648xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1649 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001650 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001651
1652 if (str == NULL)
1653 return(-1);
1654
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001655 cur = *str;
1656
Owen Taylor3473f882001-02-23 17:55:21 +00001657 ret = xmlParseURIScheme(uri, str);
1658 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001659 if (**str != ':') {
1660 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001661 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001662 }
Owen Taylor3473f882001-02-23 17:55:21 +00001663 (*str)++;
1664 if (**str == '/')
1665 return(xmlParseURIHierPart(uri, str));
1666 return(xmlParseURIOpaquePart(uri, str));
1667}
1668
1669/**
1670 * xmlParseRelativeURI:
1671 * @uri: pointer to an URI structure
1672 * @str: pointer to the string to analyze
1673 *
1674 * Parse an relative URI string and fills in the appropriate fields
1675 * of the @uri structure
1676 *
1677 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1678 * abs_path = "/" path_segments
1679 * net_path = "//" authority [ abs_path ]
1680 * rel_path = rel_segment [ abs_path ]
1681 *
1682 * Returns 0 or the error code
1683 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001684static int
Owen Taylor3473f882001-02-23 17:55:21 +00001685xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1686 int ret = 0;
1687 const char *cur;
1688
1689 if (str == NULL)
1690 return(-1);
1691
1692 cur = *str;
1693 if ((cur[0] == '/') && (cur[1] == '/')) {
1694 cur += 2;
1695 ret = xmlParseURIAuthority(uri, &cur);
1696 if (ret != 0)
1697 return(ret);
1698 if (cur[0] == '/') {
1699 cur++;
1700 ret = xmlParseURIPathSegments(uri, &cur, 1);
1701 }
1702 } else if (cur[0] == '/') {
1703 cur++;
1704 ret = xmlParseURIPathSegments(uri, &cur, 1);
1705 } else if (cur[0] != '#' && cur[0] != '?') {
1706 ret = xmlParseURIRelSegment(uri, &cur);
1707 if (ret != 0)
1708 return(ret);
1709 if (cur[0] == '/') {
1710 cur++;
1711 ret = xmlParseURIPathSegments(uri, &cur, 1);
1712 }
1713 }
1714 if (ret != 0)
1715 return(ret);
1716 if (*cur == '?') {
1717 cur++;
1718 ret = xmlParseURIQuery(uri, &cur);
1719 if (ret != 0)
1720 return(ret);
1721 }
1722 *str = cur;
1723 return(ret);
1724}
1725
1726/**
1727 * xmlParseURIReference:
1728 * @uri: pointer to an URI structure
1729 * @str: the string to analyze
1730 *
1731 * Parse an URI reference string and fills in the appropriate fields
1732 * of the @uri structure
1733 *
1734 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1735 *
1736 * Returns 0 or the error code
1737 */
1738int
1739xmlParseURIReference(xmlURIPtr uri, const char *str) {
1740 int ret;
1741 const char *tmp = str;
1742
1743 if (str == NULL)
1744 return(-1);
1745 xmlCleanURI(uri);
1746
1747 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001748 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001749 * it fails.
1750 */
1751 ret = xmlParseAbsoluteURI(uri, &str);
1752 if (ret != 0) {
1753 xmlCleanURI(uri);
1754 str = tmp;
1755 ret = xmlParseRelativeURI(uri, &str);
1756 }
1757 if (ret != 0) {
1758 xmlCleanURI(uri);
1759 return(ret);
1760 }
1761
1762 if (*str == '#') {
1763 str++;
1764 ret = xmlParseURIFragment(uri, &str);
1765 if (ret != 0) return(ret);
1766 }
1767 if (*str != 0) {
1768 xmlCleanURI(uri);
1769 return(1);
1770 }
1771 return(0);
1772}
1773
1774/**
1775 * xmlParseURI:
1776 * @str: the URI string to analyze
1777 *
1778 * Parse an URI
1779 *
1780 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1781 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001782 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001783 */
1784xmlURIPtr
1785xmlParseURI(const char *str) {
1786 xmlURIPtr uri;
1787 int ret;
1788
1789 if (str == NULL)
1790 return(NULL);
1791 uri = xmlCreateURI();
1792 if (uri != NULL) {
1793 ret = xmlParseURIReference(uri, str);
1794 if (ret) {
1795 xmlFreeURI(uri);
1796 return(NULL);
1797 }
1798 }
1799 return(uri);
1800}
1801
Daniel Veillard336a8e12005-08-07 10:46:19 +00001802/**
1803 * xmlParseURIRaw:
1804 * @str: the URI string to analyze
1805 * @raw: if 1 unescaping of URI pieces are disabled
1806 *
1807 * Parse an URI but allows to keep intact the original fragments.
1808 *
1809 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1810 *
1811 * Returns a newly built xmlURIPtr or NULL in case of error
1812 */
1813xmlURIPtr
1814xmlParseURIRaw(const char *str, int raw) {
1815 xmlURIPtr uri;
1816 int ret;
1817
1818 if (str == NULL)
1819 return(NULL);
1820 uri = xmlCreateURI();
1821 if (uri != NULL) {
1822 if (raw) {
1823 uri->cleanup |= 2;
1824 }
1825 ret = xmlParseURIReference(uri, str);
1826 if (ret) {
1827 xmlFreeURI(uri);
1828 return(NULL);
1829 }
1830 }
1831 return(uri);
1832}
1833
Owen Taylor3473f882001-02-23 17:55:21 +00001834/************************************************************************
1835 * *
1836 * Public functions *
1837 * *
1838 ************************************************************************/
1839
1840/**
1841 * xmlBuildURI:
1842 * @URI: the URI instance found in the document
1843 * @base: the base value
1844 *
1845 * Computes he final URI of the reference done by checking that
1846 * the given URI is valid, and building the final URI using the
1847 * base URI. This is processed according to section 5.2 of the
1848 * RFC 2396
1849 *
1850 * 5.2. Resolving Relative References to Absolute Form
1851 *
1852 * Returns a new URI string (to be freed by the caller) or NULL in case
1853 * of error.
1854 */
1855xmlChar *
1856xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1857 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001858 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001859 xmlURIPtr ref = NULL;
1860 xmlURIPtr bas = NULL;
1861 xmlURIPtr res = NULL;
1862
1863 /*
1864 * 1) The URI reference is parsed into the potential four components and
1865 * fragment identifier, as described in Section 4.3.
1866 *
1867 * NOTE that a completely empty URI is treated by modern browsers
1868 * as a reference to "." rather than as a synonym for the current
1869 * URI. Should we do that here?
1870 */
1871 if (URI == NULL)
1872 ret = -1;
1873 else {
1874 if (*URI) {
1875 ref = xmlCreateURI();
1876 if (ref == NULL)
1877 goto done;
1878 ret = xmlParseURIReference(ref, (const char *) URI);
1879 }
1880 else
1881 ret = 0;
1882 }
1883 if (ret != 0)
1884 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001885 if ((ref != NULL) && (ref->scheme != NULL)) {
1886 /*
1887 * The URI is absolute don't modify.
1888 */
1889 val = xmlStrdup(URI);
1890 goto done;
1891 }
Owen Taylor3473f882001-02-23 17:55:21 +00001892 if (base == NULL)
1893 ret = -1;
1894 else {
1895 bas = xmlCreateURI();
1896 if (bas == NULL)
1897 goto done;
1898 ret = xmlParseURIReference(bas, (const char *) base);
1899 }
1900 if (ret != 0) {
1901 if (ref)
1902 val = xmlSaveUri(ref);
1903 goto done;
1904 }
1905 if (ref == NULL) {
1906 /*
1907 * the base fragment must be ignored
1908 */
1909 if (bas->fragment != NULL) {
1910 xmlFree(bas->fragment);
1911 bas->fragment = NULL;
1912 }
1913 val = xmlSaveUri(bas);
1914 goto done;
1915 }
1916
1917 /*
1918 * 2) If the path component is empty and the scheme, authority, and
1919 * query components are undefined, then it is a reference to the
1920 * current document and we are done. Otherwise, the reference URI's
1921 * query and fragment components are defined as found (or not found)
1922 * within the URI reference and not inherited from the base URI.
1923 *
1924 * NOTE that in modern browsers, the parsing differs from the above
1925 * in the following aspect: the query component is allowed to be
1926 * defined while still treating this as a reference to the current
1927 * document.
1928 */
1929 res = xmlCreateURI();
1930 if (res == NULL)
1931 goto done;
1932 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1933 ((ref->authority == NULL) && (ref->server == NULL))) {
1934 if (bas->scheme != NULL)
1935 res->scheme = xmlMemStrdup(bas->scheme);
1936 if (bas->authority != NULL)
1937 res->authority = xmlMemStrdup(bas->authority);
1938 else if (bas->server != NULL) {
1939 res->server = xmlMemStrdup(bas->server);
1940 if (bas->user != NULL)
1941 res->user = xmlMemStrdup(bas->user);
1942 res->port = bas->port;
1943 }
1944 if (bas->path != NULL)
1945 res->path = xmlMemStrdup(bas->path);
1946 if (ref->query != NULL)
1947 res->query = xmlMemStrdup(ref->query);
1948 else if (bas->query != NULL)
1949 res->query = xmlMemStrdup(bas->query);
1950 if (ref->fragment != NULL)
1951 res->fragment = xmlMemStrdup(ref->fragment);
1952 goto step_7;
1953 }
Owen Taylor3473f882001-02-23 17:55:21 +00001954
1955 /*
1956 * 3) If the scheme component is defined, indicating that the reference
1957 * starts with a scheme name, then the reference is interpreted as an
1958 * absolute URI and we are done. Otherwise, the reference URI's
1959 * scheme is inherited from the base URI's scheme component.
1960 */
1961 if (ref->scheme != NULL) {
1962 val = xmlSaveUri(ref);
1963 goto done;
1964 }
1965 if (bas->scheme != NULL)
1966 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001967
1968 if (ref->query != NULL)
1969 res->query = xmlMemStrdup(ref->query);
1970 if (ref->fragment != NULL)
1971 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001972
1973 /*
1974 * 4) If the authority component is defined, then the reference is a
1975 * network-path and we skip to step 7. Otherwise, the reference
1976 * URI's authority is inherited from the base URI's authority
1977 * component, which will also be undefined if the URI scheme does not
1978 * use an authority component.
1979 */
1980 if ((ref->authority != NULL) || (ref->server != NULL)) {
1981 if (ref->authority != NULL)
1982 res->authority = xmlMemStrdup(ref->authority);
1983 else {
1984 res->server = xmlMemStrdup(ref->server);
1985 if (ref->user != NULL)
1986 res->user = xmlMemStrdup(ref->user);
1987 res->port = ref->port;
1988 }
1989 if (ref->path != NULL)
1990 res->path = xmlMemStrdup(ref->path);
1991 goto step_7;
1992 }
1993 if (bas->authority != NULL)
1994 res->authority = xmlMemStrdup(bas->authority);
1995 else if (bas->server != NULL) {
1996 res->server = xmlMemStrdup(bas->server);
1997 if (bas->user != NULL)
1998 res->user = xmlMemStrdup(bas->user);
1999 res->port = bas->port;
2000 }
2001
2002 /*
2003 * 5) If the path component begins with a slash character ("/"), then
2004 * the reference is an absolute-path and we skip to step 7.
2005 */
2006 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2007 res->path = xmlMemStrdup(ref->path);
2008 goto step_7;
2009 }
2010
2011
2012 /*
2013 * 6) If this step is reached, then we are resolving a relative-path
2014 * reference. The relative path needs to be merged with the base
2015 * URI's path. Although there are many ways to do this, we will
2016 * describe a simple method using a separate string buffer.
2017 *
2018 * Allocate a buffer large enough for the result string.
2019 */
2020 len = 2; /* extra / and 0 */
2021 if (ref->path != NULL)
2022 len += strlen(ref->path);
2023 if (bas->path != NULL)
2024 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002025 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002026 if (res->path == NULL) {
2027 xmlGenericError(xmlGenericErrorContext,
2028 "xmlBuildURI: out of memory\n");
2029 goto done;
2030 }
2031 res->path[0] = 0;
2032
2033 /*
2034 * a) All but the last segment of the base URI's path component is
2035 * copied to the buffer. In other words, any characters after the
2036 * last (right-most) slash character, if any, are excluded.
2037 */
2038 cur = 0;
2039 out = 0;
2040 if (bas->path != NULL) {
2041 while (bas->path[cur] != 0) {
2042 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2043 cur++;
2044 if (bas->path[cur] == 0)
2045 break;
2046
2047 cur++;
2048 while (out < cur) {
2049 res->path[out] = bas->path[out];
2050 out++;
2051 }
2052 }
2053 }
2054 res->path[out] = 0;
2055
2056 /*
2057 * b) The reference's path component is appended to the buffer
2058 * string.
2059 */
2060 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002061 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002062 /*
2063 * Ensure the path includes a '/'
2064 */
2065 if ((out == 0) && (bas->server != NULL))
2066 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002067 while (ref->path[indx] != 0) {
2068 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002069 }
2070 }
2071 res->path[out] = 0;
2072
2073 /*
2074 * Steps c) to h) are really path normalization steps
2075 */
2076 xmlNormalizeURIPath(res->path);
2077
2078step_7:
2079
2080 /*
2081 * 7) The resulting URI components, including any inherited from the
2082 * base URI, are recombined to give the absolute form of the URI
2083 * reference.
2084 */
2085 val = xmlSaveUri(res);
2086
2087done:
2088 if (ref != NULL)
2089 xmlFreeURI(ref);
2090 if (bas != NULL)
2091 xmlFreeURI(bas);
2092 if (res != NULL)
2093 xmlFreeURI(res);
2094 return(val);
2095}
2096
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002097/**
William M. Brackf7789b12004-06-07 08:57:27 +00002098 * xmlBuildRelativeURI:
2099 * @URI: the URI reference under consideration
2100 * @base: the base value
2101 *
2102 * Expresses the URI of the reference in terms relative to the
2103 * base. Some examples of this operation include:
2104 * base = "http://site1.com/docs/book1.html"
2105 * URI input URI returned
2106 * docs/pic1.gif pic1.gif
2107 * docs/img/pic1.gif img/pic1.gif
2108 * img/pic1.gif ../img/pic1.gif
2109 * http://site1.com/docs/pic1.gif pic1.gif
2110 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2111 *
2112 * base = "docs/book1.html"
2113 * URI input URI returned
2114 * docs/pic1.gif pic1.gif
2115 * docs/img/pic1.gif img/pic1.gif
2116 * img/pic1.gif ../img/pic1.gif
2117 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2118 *
2119 *
2120 * Note: if the URI reference is really wierd or complicated, it may be
2121 * worthwhile to first convert it into a "nice" one by calling
2122 * xmlBuildURI (using 'base') before calling this routine,
2123 * since this routine (for reasonable efficiency) assumes URI has
2124 * already been through some validation.
2125 *
2126 * Returns a new URI string (to be freed by the caller) or NULL in case
2127 * error.
2128 */
2129xmlChar *
2130xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2131{
2132 xmlChar *val = NULL;
2133 int ret;
2134 int ix;
2135 int pos = 0;
2136 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002137 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002138 xmlURIPtr ref = NULL;
2139 xmlURIPtr bas = NULL;
2140 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002141 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002142
2143 if ((URI == NULL) || (*URI == 0))
2144 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002145
2146 /*
2147 * First parse URI into a standard form
2148 */
2149 ref = xmlCreateURI ();
2150 if (ref == NULL)
2151 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002152 /* If URI not already in "relative" form */
2153 if (URI[0] != '.') {
2154 ret = xmlParseURIReference (ref, (const char *) URI);
2155 if (ret != 0)
2156 goto done; /* Error in URI, return NULL */
2157 } else
2158 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002159
2160 /*
2161 * Next parse base into the same standard form
2162 */
2163 if ((base == NULL) || (*base == 0)) {
2164 val = xmlStrdup (URI);
2165 goto done;
2166 }
2167 bas = xmlCreateURI ();
2168 if (bas == NULL)
2169 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002170 if (base[0] != '.') {
2171 ret = xmlParseURIReference (bas, (const char *) base);
2172 if (ret != 0)
2173 goto done; /* Error in base, return NULL */
2174 } else
2175 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002176
2177 /*
2178 * If the scheme / server on the URI differs from the base,
2179 * just return the URI
2180 */
2181 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002182 ((bas->scheme == NULL) ||
2183 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2184 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002185 val = xmlStrdup (URI);
2186 goto done;
2187 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002188 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2189 val = xmlStrdup(BAD_CAST "");
2190 goto done;
2191 }
2192 if (bas->path == NULL) {
2193 val = xmlStrdup((xmlChar *)ref->path);
2194 goto done;
2195 }
2196 if (ref->path == NULL) {
2197 ref->path = (char *) "/";
2198 remove_path = 1;
2199 }
William M. Brackf7789b12004-06-07 08:57:27 +00002200
2201 /*
2202 * At this point (at last!) we can compare the two paths
2203 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002204 * First we take care of the special case where either of the
2205 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002206 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002207 if (bas->path == NULL) {
2208 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002209 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002210 if (*uptr == '/')
2211 uptr++;
2212 val = xmlStrdup(uptr);
2213 }
2214 goto done;
2215 }
William M. Brackf7789b12004-06-07 08:57:27 +00002216 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002217 if (ref->path == NULL) {
2218 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002219 if (bptr[ix] == '/')
2220 nbslash++;
2221 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002222 uptr = NULL;
2223 len = 1; /* this is for a string terminator only */
2224 } else {
2225 /*
2226 * Next we compare the two strings and find where they first differ
2227 */
2228 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2229 pos += 2;
2230 if ((*bptr == '.') && (bptr[1] == '/'))
2231 bptr += 2;
2232 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2233 bptr++;
2234 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2235 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002236
William M. Brack820d5ed2005-09-14 05:24:27 +00002237 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002238 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002239 goto done; /* (I can't imagine why anyone would do this) */
2240 }
2241
2242 /*
2243 * In URI, "back up" to the last '/' encountered. This will be the
2244 * beginning of the "unique" suffix of URI
2245 */
2246 ix = pos;
2247 if ((ref->path[ix] == '/') && (ix > 0))
2248 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002249 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2250 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002251 for (; ix > 0; ix--) {
2252 if (ref->path[ix] == '/')
2253 break;
2254 }
2255 if (ix == 0) {
2256 uptr = (xmlChar *)ref->path;
2257 } else {
2258 ix++;
2259 uptr = (xmlChar *)&ref->path[ix];
2260 }
2261
2262 /*
2263 * In base, count the number of '/' from the differing point
2264 */
2265 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2266 for (; bptr[ix] != 0; ix++) {
2267 if (bptr[ix] == '/')
2268 nbslash++;
2269 }
2270 }
2271 len = xmlStrlen (uptr) + 1;
2272 }
2273
William M. Brackf7789b12004-06-07 08:57:27 +00002274 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002275 if (uptr != NULL)
2276 val = xmlStrdup (uptr);
William M. Brackf7789b12004-06-07 08:57:27 +00002277 goto done;
2278 }
William M. Brackf7789b12004-06-07 08:57:27 +00002279
2280 /*
2281 * Allocate just enough space for the returned string -
2282 * length of the remainder of the URI, plus enough space
2283 * for the "../" groups, plus one for the terminator
2284 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002285 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002286 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002287 xmlGenericError(xmlGenericErrorContext,
2288 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002289 goto done;
2290 }
2291 vptr = val;
2292 /*
2293 * Put in as many "../" as needed
2294 */
2295 for (; nbslash>0; nbslash--) {
2296 *vptr++ = '.';
2297 *vptr++ = '.';
2298 *vptr++ = '/';
2299 }
2300 /*
2301 * Finish up with the end of the URI
2302 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002303 if (uptr != NULL) {
2304 if ((vptr > val) && (len > 0) &&
2305 (uptr[0] == '/') && (vptr[-1] == '/')) {
2306 memcpy (vptr, uptr + 1, len - 1);
2307 vptr[len - 2] = 0;
2308 } else {
2309 memcpy (vptr, uptr, len);
2310 vptr[len - 1] = 0;
2311 }
2312 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002313 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002314 }
William M. Brackf7789b12004-06-07 08:57:27 +00002315
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002316done:
William M. Brackf7789b12004-06-07 08:57:27 +00002317 /*
2318 * Free the working variables
2319 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002320 if (remove_path != 0)
2321 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002322 if (ref != NULL)
2323 xmlFreeURI (ref);
2324 if (bas != NULL)
2325 xmlFreeURI (bas);
2326
2327 return val;
2328}
2329
2330/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002331 * xmlCanonicPath:
2332 * @path: the resource locator in a filesystem notation
2333 *
2334 * Constructs a canonic path from the specified path.
2335 *
2336 * Returns a new canonic path, or a duplicate of the path parameter if the
2337 * construction fails. The caller is responsible for freeing the memory occupied
2338 * by the returned string. If there is insufficient memory available, or the
2339 * argument is NULL, the function returns NULL.
2340 */
2341#define IS_WINDOWS_PATH(p) \
2342 ((p != NULL) && \
2343 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2344 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2345 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002346xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002347xmlCanonicPath(const xmlChar *path)
2348{
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002349#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002350 int len = 0;
2351 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002352 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002353#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002354 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002355 xmlChar *ret;
2356 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002357
2358 if (path == NULL)
2359 return(NULL);
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002360 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002361 xmlFreeURI(uri);
2362 return xmlStrdup(path);
2363 }
2364
Daniel Veillard336a8e12005-08-07 10:46:19 +00002365 absuri = xmlStrstr(path, BAD_CAST "://");
2366 if (absuri != NULL) {
2367 int l, j;
2368 unsigned char c;
2369 xmlChar *escURI;
2370
2371 /*
2372 * this looks like an URI where some parts have not been
2373 * escaped leading to a parsing problem check that the first
2374 * part matches a protocol.
2375 */
2376 l = absuri - path;
2377 if ((l <= 0) || (l > 20))
2378 goto path_processing;
2379 for (j = 0;j < l;j++) {
2380 c = path[j];
2381 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2382 goto path_processing;
2383 }
2384
2385 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2386 if (escURI != NULL) {
2387 uri = xmlParseURI((const char *) escURI);
2388 if (uri != NULL) {
2389 xmlFreeURI(uri);
2390 return escURI;
2391 }
2392 xmlFreeURI(uri);
2393 }
2394 }
2395
2396path_processing:
2397#if defined(_WIN32) && !defined(__CYGWIN__)
2398 /*
2399 * This really need to be cleaned up by someone with a Windows box
2400 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002401 uri = xmlCreateURI();
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002402 if (uri == NULL) {
2403 return(NULL);
2404 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002405
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002406 len = xmlStrlen(path);
2407 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2408 uri->scheme = xmlStrdup(BAD_CAST "file");
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002409 uri->path = xmlMallocAtomic(len + 2);
2410 if (uri->path == NULL) {
2411 xmlFreeURI(uri);
2412 return(NULL);
2413 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002414 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002415 p = uri->path + 1;
2416 strncpy(p, path, len + 1);
2417 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002418 uri->path = xmlStrdup(path);
2419 if (uri->path == NULL) {
2420 xmlFreeURI(uri);
2421 return(NULL);
2422 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002423 p = uri->path;
2424 }
2425 while (*p != '\0') {
2426 if (*p == '\\')
2427 *p = '/';
2428 p++;
2429 }
William M. Bracka3215c72004-07-31 16:24:01 +00002430 if (uri->path == NULL) {
2431 xmlFreeURI(uri);
2432 return(NULL);
2433 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002434
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002435 if (uri->scheme == NULL) {
2436 ret = xmlStrdup((const xmlChar *) path);
2437 } else {
2438 ret = xmlSaveUri(uri);
2439 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002440
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002441 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002442#else
2443 ret = xmlStrdup((const xmlChar *) path);
2444#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002445 return(ret);
2446}
Owen Taylor3473f882001-02-23 17:55:21 +00002447
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002448/**
2449 * xmlPathToURI:
2450 * @path: the resource locator in a filesystem notation
2451 *
2452 * Constructs an URI expressing the existing path
2453 *
2454 * Returns a new URI, or a duplicate of the path parameter if the
2455 * construction fails. The caller is responsible for freeing the memory
2456 * occupied by the returned string. If there is insufficient memory available,
2457 * or the argument is NULL, the function returns NULL.
2458 */
2459xmlChar *
2460xmlPathToURI(const xmlChar *path)
2461{
2462 xmlURIPtr uri;
2463 xmlURI temp;
2464 xmlChar *ret, *cal;
2465
2466 if (path == NULL)
2467 return(NULL);
2468
2469 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2470 xmlFreeURI(uri);
2471 return xmlStrdup(path);
2472 }
2473 cal = xmlCanonicPath(path);
2474 if (cal == NULL)
2475 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002476#if defined(_WIN32) && !defined(__CYGWIN__)
2477 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2478 If 'cal' is a valid URI allready then we are done here, as continuing would make
2479 it invalid. */
2480 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2481 xmlFreeURI(uri);
2482 return cal;
2483 }
2484 /* 'cal' can contain a relative path with backslashes. If that is processed
2485 by xmlSaveURI, they will be escaped and the external entity loader machinery
2486 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2487 ret = cal;
2488 while (*ret != '\0') {
2489 if (*ret == '\\')
2490 *ret = '/';
2491 ret++;
2492 }
2493#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002494 memset(&temp, 0, sizeof(temp));
2495 temp.path = (char *) cal;
2496 ret = xmlSaveUri(&temp);
2497 xmlFree(cal);
2498 return(ret);
2499}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002500#define bottom_uri
2501#include "elfgcchack.h"