blob: a00415c5d5ed797b34915ed79ef5cd046d0a4c11 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
William M. Brack015ccb22005-02-13 08:18:52 +00004 * Reference: RFCs 2396, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
William M. Brack015ccb22005-02-13 08:18:52 +000081 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
Owen Taylor3473f882001-02-23 17:55:21 +000083 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
William M. Brack015ccb22005-02-13 08:18:52 +000087 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000089
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163/*
William M. Brack015ccb22005-02-13 08:18:52 +0000164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000171
172/*
Owen Taylor3473f882001-02-23 17:55:21 +0000173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
Daniel Veillard336a8e12005-08-07 10:46:19 +0000188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/************************************************************************
191 * *
192 * Generic URI structure functions *
193 * *
194 ************************************************************************/
195
196/**
197 * xmlCreateURI:
198 *
199 * Simply creates an empty xmlURI
200 *
201 * Returns the new structure or NULL in case of error
202 */
203xmlURIPtr
204xmlCreateURI(void) {
205 xmlURIPtr ret;
206
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208 if (ret == NULL) {
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
211 return(NULL);
212 }
213 memset(ret, 0, sizeof(xmlURI));
214 return(ret);
215}
216
217/**
218 * xmlSaveUri:
219 * @uri: pointer to an xmlURI
220 *
221 * Save the URI as an escaped string
222 *
223 * Returns a new string (to be deallocated by caller)
224 */
225xmlChar *
226xmlSaveUri(xmlURIPtr uri) {
227 xmlChar *ret = NULL;
228 const char *p;
229 int len;
230 int max;
231
232 if (uri == NULL) return(NULL);
233
234
235 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 len = 0;
243
244 if (uri->scheme != NULL) {
245 p = uri->scheme;
246 while (*p != 0) {
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = *p++;
257 }
258 if (len >= max) {
259 max *= 2;
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
261 if (ret == NULL) {
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
264 return(NULL);
265 }
266 }
267 ret[len++] = ':';
268 }
269 if (uri->opaque != NULL) {
270 p = uri->opaque;
271 while (*p != 0) {
272 if (len + 3 >= max) {
273 max *= 2;
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
275 if (ret == NULL) {
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
278 return(NULL);
279 }
280 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000282 ret[len++] = *p++;
283 else {
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
286 ret[len++] = '%';
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289 }
290 }
Owen Taylor3473f882001-02-23 17:55:21 +0000291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
429 if (ret == NULL) {
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
432 return(NULL);
433 }
434 }
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
438 ((*(p) == ',')))
439 ret[len++] = *p++;
440 else {
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
443 ret[len++] = '%';
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446 }
447 }
448 }
449 if (uri->query != NULL) {
450 if (len + 3 >= max) {
451 max *= 2;
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
454 if (ret == NULL) {
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
457 return(NULL);
458 }
459 }
460 ret[len++] = '?';
461 p = uri->query;
462 while (*p != 0) {
463 if (len + 3 >= max) {
464 max *= 2;
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
467 if (ret == NULL) {
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
470 return(NULL);
471 }
472 }
473 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
474 ret[len++] = *p++;
475 else {
476 int val = *(unsigned char *)p++;
477 int hi = val / 0x10, lo = val % 0x10;
478 ret[len++] = '%';
479 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
480 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
481 }
482 }
483 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000484 }
485 if (uri->fragment != NULL) {
486 if (len + 3 >= max) {
487 max *= 2;
488 ret = (xmlChar *) xmlRealloc(ret,
489 (max + 1) * sizeof(xmlChar));
490 if (ret == NULL) {
491 xmlGenericError(xmlGenericErrorContext,
492 "xmlSaveUri: out of memory\n");
493 return(NULL);
494 }
495 }
496 ret[len++] = '#';
497 p = uri->fragment;
498 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 if (len + 3 >= max) {
500 max *= 2;
501 ret = (xmlChar *) xmlRealloc(ret,
502 (max + 1) * sizeof(xmlChar));
503 if (ret == NULL) {
504 xmlGenericError(xmlGenericErrorContext,
505 "xmlSaveUri: out of memory\n");
506 return(NULL);
507 }
508 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000509 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
510 ret[len++] = *p++;
511 else {
512 int val = *(unsigned char *)p++;
513 int hi = val / 0x10, lo = val % 0x10;
514 ret[len++] = '%';
515 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
516 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000517 }
518 }
Owen Taylor3473f882001-02-23 17:55:21 +0000519 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000520 if (len >= max) {
521 max *= 2;
522 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
523 if (ret == NULL) {
524 xmlGenericError(xmlGenericErrorContext,
525 "xmlSaveUri: out of memory\n");
526 return(NULL);
527 }
528 }
529 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000530 return(ret);
531}
532
533/**
534 * xmlPrintURI:
535 * @stream: a FILE* for the output
536 * @uri: pointer to an xmlURI
537 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000538 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000539 */
540void
541xmlPrintURI(FILE *stream, xmlURIPtr uri) {
542 xmlChar *out;
543
544 out = xmlSaveUri(uri);
545 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000546 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000547 xmlFree(out);
548 }
549}
550
551/**
552 * xmlCleanURI:
553 * @uri: pointer to an xmlURI
554 *
555 * Make sure the xmlURI struct is free of content
556 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000557static void
Owen Taylor3473f882001-02-23 17:55:21 +0000558xmlCleanURI(xmlURIPtr uri) {
559 if (uri == NULL) return;
560
561 if (uri->scheme != NULL) xmlFree(uri->scheme);
562 uri->scheme = NULL;
563 if (uri->server != NULL) xmlFree(uri->server);
564 uri->server = NULL;
565 if (uri->user != NULL) xmlFree(uri->user);
566 uri->user = NULL;
567 if (uri->path != NULL) xmlFree(uri->path);
568 uri->path = NULL;
569 if (uri->fragment != NULL) xmlFree(uri->fragment);
570 uri->fragment = NULL;
571 if (uri->opaque != NULL) xmlFree(uri->opaque);
572 uri->opaque = NULL;
573 if (uri->authority != NULL) xmlFree(uri->authority);
574 uri->authority = NULL;
575 if (uri->query != NULL) xmlFree(uri->query);
576 uri->query = NULL;
577}
578
579/**
580 * xmlFreeURI:
581 * @uri: pointer to an xmlURI
582 *
583 * Free up the xmlURI struct
584 */
585void
586xmlFreeURI(xmlURIPtr uri) {
587 if (uri == NULL) return;
588
589 if (uri->scheme != NULL) xmlFree(uri->scheme);
590 if (uri->server != NULL) xmlFree(uri->server);
591 if (uri->user != NULL) xmlFree(uri->user);
592 if (uri->path != NULL) xmlFree(uri->path);
593 if (uri->fragment != NULL) xmlFree(uri->fragment);
594 if (uri->opaque != NULL) xmlFree(uri->opaque);
595 if (uri->authority != NULL) xmlFree(uri->authority);
596 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000597 xmlFree(uri);
598}
599
600/************************************************************************
601 * *
602 * Helper functions *
603 * *
604 ************************************************************************/
605
Owen Taylor3473f882001-02-23 17:55:21 +0000606/**
607 * xmlNormalizeURIPath:
608 * @path: pointer to the path string
609 *
610 * Applies the 5 normalization steps to a path string--that is, RFC 2396
611 * Section 5.2, steps 6.c through 6.g.
612 *
613 * Normalization occurs directly on the string, no new allocation is done
614 *
615 * Returns 0 or an error code
616 */
617int
618xmlNormalizeURIPath(char *path) {
619 char *cur, *out;
620
621 if (path == NULL)
622 return(-1);
623
624 /* Skip all initial "/" chars. We want to get to the beginning of the
625 * first non-empty segment.
626 */
627 cur = path;
628 while (cur[0] == '/')
629 ++cur;
630 if (cur[0] == '\0')
631 return(0);
632
633 /* Keep everything we've seen so far. */
634 out = cur;
635
636 /*
637 * Analyze each segment in sequence for cases (c) and (d).
638 */
639 while (cur[0] != '\0') {
640 /*
641 * c) All occurrences of "./", where "." is a complete path segment,
642 * are removed from the buffer string.
643 */
644 if ((cur[0] == '.') && (cur[1] == '/')) {
645 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000646 /* '//' normalization should be done at this point too */
647 while (cur[0] == '/')
648 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000649 continue;
650 }
651
652 /*
653 * d) If the buffer string ends with "." as a complete path segment,
654 * that "." is removed.
655 */
656 if ((cur[0] == '.') && (cur[1] == '\0'))
657 break;
658
659 /* Otherwise keep the segment. */
660 while (cur[0] != '/') {
661 if (cur[0] == '\0')
662 goto done_cd;
663 (out++)[0] = (cur++)[0];
664 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000665 /* nomalize // */
666 while ((cur[0] == '/') && (cur[1] == '/'))
667 cur++;
668
Owen Taylor3473f882001-02-23 17:55:21 +0000669 (out++)[0] = (cur++)[0];
670 }
671 done_cd:
672 out[0] = '\0';
673
674 /* Reset to the beginning of the first segment for the next sequence. */
675 cur = path;
676 while (cur[0] == '/')
677 ++cur;
678 if (cur[0] == '\0')
679 return(0);
680
681 /*
682 * Analyze each segment in sequence for cases (e) and (f).
683 *
684 * e) All occurrences of "<segment>/../", where <segment> is a
685 * complete path segment not equal to "..", are removed from the
686 * buffer string. Removal of these path segments is performed
687 * iteratively, removing the leftmost matching pattern on each
688 * iteration, until no matching pattern remains.
689 *
690 * f) If the buffer string ends with "<segment>/..", where <segment>
691 * is a complete path segment not equal to "..", that
692 * "<segment>/.." is removed.
693 *
694 * To satisfy the "iterative" clause in (e), we need to collapse the
695 * string every time we find something that needs to be removed. Thus,
696 * we don't need to keep two pointers into the string: we only need a
697 * "current position" pointer.
698 */
699 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000700 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000701
702 /* At the beginning of each iteration of this loop, "cur" points to
703 * the first character of the segment we want to examine.
704 */
705
706 /* Find the end of the current segment. */
707 segp = cur;
708 while ((segp[0] != '/') && (segp[0] != '\0'))
709 ++segp;
710
711 /* If this is the last segment, we're done (we need at least two
712 * segments to meet the criteria for the (e) and (f) cases).
713 */
714 if (segp[0] == '\0')
715 break;
716
717 /* If the first segment is "..", or if the next segment _isn't_ "..",
718 * keep this segment and try the next one.
719 */
720 ++segp;
721 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
722 || ((segp[0] != '.') || (segp[1] != '.')
723 || ((segp[2] != '/') && (segp[2] != '\0')))) {
724 cur = segp;
725 continue;
726 }
727
728 /* If we get here, remove this segment and the next one and back up
729 * to the previous segment (if there is one), to implement the
730 * "iteratively" clause. It's pretty much impossible to back up
731 * while maintaining two pointers into the buffer, so just compact
732 * the whole buffer now.
733 */
734
735 /* If this is the end of the buffer, we're done. */
736 if (segp[2] == '\0') {
737 cur[0] = '\0';
738 break;
739 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000740 /* Valgrind complained, strcpy(cur, segp + 3); */
741 /* string will overlap, do not use strcpy */
742 tmp = cur;
743 segp += 3;
744 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000745
746 /* If there are no previous segments, then keep going from here. */
747 segp = cur;
748 while ((segp > path) && ((--segp)[0] == '/'))
749 ;
750 if (segp == path)
751 continue;
752
753 /* "segp" is pointing to the end of a previous segment; find it's
754 * start. We need to back up to the previous segment and start
755 * over with that to handle things like "foo/bar/../..". If we
756 * don't do this, then on the first pass we'll remove the "bar/..",
757 * but be pointing at the second ".." so we won't realize we can also
758 * remove the "foo/..".
759 */
760 cur = segp;
761 while ((cur > path) && (cur[-1] != '/'))
762 --cur;
763 }
764 out[0] = '\0';
765
766 /*
767 * g) If the resulting buffer string still begins with one or more
768 * complete path segments of "..", then the reference is
769 * considered to be in error. Implementations may handle this
770 * error by retaining these components in the resolved path (i.e.,
771 * treating them as part of the final URI), by removing them from
772 * the resolved path (i.e., discarding relative levels above the
773 * root), or by avoiding traversal of the reference.
774 *
775 * We discard them from the final path.
776 */
777 if (path[0] == '/') {
778 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000779 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000780 && ((cur[3] == '/') || (cur[3] == '\0')))
781 cur += 3;
782
783 if (cur != path) {
784 out = path;
785 while (cur[0] != '\0')
786 (out++)[0] = (cur++)[0];
787 out[0] = 0;
788 }
789 }
790
791 return(0);
792}
Owen Taylor3473f882001-02-23 17:55:21 +0000793
Daniel Veillard966a31e2004-05-09 02:58:44 +0000794static int is_hex(char c) {
795 if (((c >= '0') && (c <= '9')) ||
796 ((c >= 'a') && (c <= 'f')) ||
797 ((c >= 'A') && (c <= 'F')))
798 return(1);
799 return(0);
800}
801
Owen Taylor3473f882001-02-23 17:55:21 +0000802/**
803 * xmlURIUnescapeString:
804 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000805 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000806 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000807 *
808 * Unescaping routine, does not do validity checks !
809 * Output is direct unsigned char translation of %XX values (no encoding)
810 *
811 * Returns an copy of the string, but unescaped
812 */
813char *
814xmlURIUnescapeString(const char *str, int len, char *target) {
815 char *ret, *out;
816 const char *in;
817
818 if (str == NULL)
819 return(NULL);
820 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000821 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000822
823 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000824 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000825 if (ret == NULL) {
826 xmlGenericError(xmlGenericErrorContext,
827 "xmlURIUnescapeString: out of memory\n");
828 return(NULL);
829 }
830 } else
831 ret = target;
832 in = str;
833 out = ret;
834 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000835 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000836 in++;
837 if ((*in >= '0') && (*in <= '9'))
838 *out = (*in - '0');
839 else if ((*in >= 'a') && (*in <= 'f'))
840 *out = (*in - 'a') + 10;
841 else if ((*in >= 'A') && (*in <= 'F'))
842 *out = (*in - 'A') + 10;
843 in++;
844 if ((*in >= '0') && (*in <= '9'))
845 *out = *out * 16 + (*in - '0');
846 else if ((*in >= 'a') && (*in <= 'f'))
847 *out = *out * 16 + (*in - 'a') + 10;
848 else if ((*in >= 'A') && (*in <= 'F'))
849 *out = *out * 16 + (*in - 'A') + 10;
850 in++;
851 len -= 3;
852 out++;
853 } else {
854 *out++ = *in++;
855 len--;
856 }
857 }
858 *out = 0;
859 return(ret);
860}
861
862/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000863 * xmlURIEscapeStr:
864 * @str: string to escape
865 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000866 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000867 * This routine escapes a string to hex, ignoring reserved characters (a-z)
868 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000869 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000870 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000871 */
872xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000873xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
874 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000875 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000876
Owen Taylor3473f882001-02-23 17:55:21 +0000877 unsigned int len, out;
878
879 if (str == NULL)
880 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000881 if (str[0] == 0)
882 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000883 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000884 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000885
886 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000887 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000888 if (ret == NULL) {
889 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000890 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000891 return(NULL);
892 }
893 in = (const xmlChar *) str;
894 out = 0;
895 while(*in != 0) {
896 if (len - out <= 3) {
897 len += 20;
898 ret = (xmlChar *) xmlRealloc(ret, len);
899 if (ret == NULL) {
900 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000901 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000902 return(NULL);
903 }
904 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000905
906 ch = *in;
907
Daniel Veillardeb475a32002-04-14 22:00:22 +0000908 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000909 unsigned char val;
910 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000911 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000912 if (val <= 9)
913 ret[out++] = '0' + val;
914 else
915 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000916 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 if (val <= 9)
918 ret[out++] = '0' + val;
919 else
920 ret[out++] = 'A' + val - 0xA;
921 in++;
922 } else {
923 ret[out++] = *in++;
924 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000925
Owen Taylor3473f882001-02-23 17:55:21 +0000926 }
927 ret[out] = 0;
928 return(ret);
929}
930
Daniel Veillard8514c672001-05-23 10:29:12 +0000931/**
932 * xmlURIEscape:
933 * @str: the string of the URI to escape
934 *
935 * Escaping routine, does not do validity checks !
936 * It will try to escape the chars needing this, but this is heuristic
937 * based it's impossible to be sure.
938 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000939 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000940 *
941 * 25 May 2001
942 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
943 * according to RFC2396.
944 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000945 */
946xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000947xmlURIEscape(const xmlChar * str)
948{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000949 xmlChar *ret, *segment = NULL;
950 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000951 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000952
Daniel Veillard6278fb52001-05-25 07:38:41 +0000953#define NULLCHK(p) if(!p) { \
954 xmlGenericError(xmlGenericErrorContext, \
955 "xmlURIEscape: out of memory\n"); \
956 return NULL; }
957
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000958 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000959 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000960
961 uri = xmlCreateURI();
962 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000963 /*
964 * Allow escaping errors in the unescaped form
965 */
966 uri->cleanup = 1;
967 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000968 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000969 xmlFreeURI(uri);
970 return (NULL);
971 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000972 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000973
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000974 if (!uri)
975 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000976
977 ret = NULL;
978
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000979 if (uri->scheme) {
980 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
981 NULLCHK(segment)
982 ret = xmlStrcat(ret, segment);
983 ret = xmlStrcat(ret, BAD_CAST ":");
984 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000985 }
986
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000987 if (uri->authority) {
988 segment =
989 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
990 NULLCHK(segment)
991 ret = xmlStrcat(ret, BAD_CAST "//");
992 ret = xmlStrcat(ret, segment);
993 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000994 }
995
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000996 if (uri->user) {
997 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
998 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +0000999 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001000 ret = xmlStrcat(ret, segment);
1001 ret = xmlStrcat(ret, BAD_CAST "@");
1002 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001003 }
1004
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001005 if (uri->server) {
1006 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1007 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001008 if (uri->user == NULL)
1009 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001010 ret = xmlStrcat(ret, segment);
1011 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001012 }
1013
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001014 if (uri->port) {
1015 xmlChar port[10];
1016
Daniel Veillard43d3f612001-11-10 11:57:23 +00001017 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001018 ret = xmlStrcat(ret, BAD_CAST ":");
1019 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001020 }
1021
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001022 if (uri->path) {
1023 segment =
1024 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1025 NULLCHK(segment)
1026 ret = xmlStrcat(ret, segment);
1027 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001028 }
1029
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001030 if (uri->query) {
1031 segment =
1032 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1033 NULLCHK(segment)
1034 ret = xmlStrcat(ret, BAD_CAST "?");
1035 ret = xmlStrcat(ret, segment);
1036 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001037 }
1038
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001039 if (uri->opaque) {
1040 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1041 NULLCHK(segment)
1042 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001043 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001044 }
1045
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001046 if (uri->fragment) {
1047 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1048 NULLCHK(segment)
1049 ret = xmlStrcat(ret, BAD_CAST "#");
1050 ret = xmlStrcat(ret, segment);
1051 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001052 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001053
1054 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001055#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001056
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001057 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001058}
1059
Owen Taylor3473f882001-02-23 17:55:21 +00001060/************************************************************************
1061 * *
1062 * Escaped URI parsing *
1063 * *
1064 ************************************************************************/
1065
1066/**
1067 * xmlParseURIFragment:
1068 * @uri: pointer to an URI structure
1069 * @str: pointer to the string to analyze
1070 *
1071 * Parse an URI fragment string and fills in the appropriate fields
1072 * of the @uri structure.
1073 *
1074 * fragment = *uric
1075 *
1076 * Returns 0 or the error code
1077 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001078static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001079xmlParseURIFragment(xmlURIPtr uri, const char **str)
1080{
Owen Taylor3473f882001-02-23 17:55:21 +00001081 const char *cur = *str;
1082
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001083 if (str == NULL)
1084 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001085
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001086 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001087 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001088 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001089 if (uri->fragment != NULL)
1090 xmlFree(uri->fragment);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001091 if (uri->cleanup & 2)
1092 uri->fragment = STRNDUP(*str, cur - *str);
1093 else
1094 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001095 }
1096 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001097 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001098}
1099
1100/**
1101 * xmlParseURIQuery:
1102 * @uri: pointer to an URI structure
1103 * @str: pointer to the string to analyze
1104 *
1105 * Parse the query part of an URI
1106 *
1107 * query = *uric
1108 *
1109 * Returns 0 or the error code
1110 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001111static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001112xmlParseURIQuery(xmlURIPtr uri, const char **str)
1113{
Owen Taylor3473f882001-02-23 17:55:21 +00001114 const char *cur = *str;
1115
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001116 if (str == NULL)
1117 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001118
Daniel Veillard336a8e12005-08-07 10:46:19 +00001119 while ((IS_URIC(cur)) ||
1120 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001121 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001122 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001123 if (uri->query != NULL)
1124 xmlFree(uri->query);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001125 if (uri->cleanup & 2)
1126 uri->query = STRNDUP(*str, cur - *str);
1127 else
1128 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001129 }
1130 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001131 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001132}
1133
1134/**
1135 * xmlParseURIScheme:
1136 * @uri: pointer to an URI structure
1137 * @str: pointer to the string to analyze
1138 *
1139 * Parse an URI scheme
1140 *
1141 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1142 *
1143 * Returns 0 or the error code
1144 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001145static int
Owen Taylor3473f882001-02-23 17:55:21 +00001146xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1147 const char *cur;
1148
1149 if (str == NULL)
1150 return(-1);
1151
1152 cur = *str;
1153 if (!IS_ALPHA(*cur))
1154 return(2);
1155 cur++;
1156 while (IS_SCHEME(*cur)) cur++;
1157 if (uri != NULL) {
1158 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001159 uri->scheme = STRNDUP(*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001160 }
1161 *str = cur;
1162 return(0);
1163}
1164
1165/**
1166 * xmlParseURIOpaquePart:
1167 * @uri: pointer to an URI structure
1168 * @str: pointer to the string to analyze
1169 *
1170 * Parse an URI opaque part
1171 *
1172 * opaque_part = uric_no_slash *uric
1173 *
1174 * Returns 0 or the error code
1175 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001176static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001177xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1178{
Owen Taylor3473f882001-02-23 17:55:21 +00001179 const char *cur;
1180
1181 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001182 return (-1);
1183
Owen Taylor3473f882001-02-23 17:55:21 +00001184 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001185 if (!((IS_URIC_NO_SLASH(cur)) ||
1186 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001187 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001188 }
1189 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001190 while ((IS_URIC(cur)) ||
1191 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001192 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001193 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001194 if (uri->opaque != NULL)
1195 xmlFree(uri->opaque);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001196 if (uri->cleanup & 2)
1197 uri->opaque = STRNDUP(*str, cur - *str);
1198 else
1199 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001200 }
1201 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001202 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001203}
1204
1205/**
1206 * xmlParseURIServer:
1207 * @uri: pointer to an URI structure
1208 * @str: pointer to the string to analyze
1209 *
1210 * Parse a server subpart of an URI, it's a finer grain analysis
1211 * of the authority part.
1212 *
1213 * server = [ [ userinfo "@" ] hostport ]
1214 * userinfo = *( unreserved | escaped |
1215 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1216 * hostport = host [ ":" port ]
William M. Brack015ccb22005-02-13 08:18:52 +00001217 * host = hostname | IPv4address | IPv6reference
Owen Taylor3473f882001-02-23 17:55:21 +00001218 * hostname = *( domainlabel "." ) toplabel [ "." ]
1219 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1220 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
William M. Brack015ccb22005-02-13 08:18:52 +00001221 * IPv6reference = "[" IPv6address "]"
1222 * IPv6address = hexpart [ ":" IPv4address ]
1223 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1224 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1225 * hexseq = hex4 *( ":" hex4)
1226 * hex4 = 1*4hexdig
Owen Taylor3473f882001-02-23 17:55:21 +00001227 * port = *digit
1228 *
1229 * Returns 0 or the error code
1230 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001231static int
Owen Taylor3473f882001-02-23 17:55:21 +00001232xmlParseURIServer(xmlURIPtr uri, const char **str) {
1233 const char *cur;
1234 const char *host, *tmp;
William M. Brack015ccb22005-02-13 08:18:52 +00001235 const int IPV4max = 4;
1236 const int IPV6max = 8;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001237 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001238
1239 if (str == NULL)
1240 return(-1);
1241
1242 cur = *str;
1243
1244 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001245 * is there a userinfo ?
Owen Taylor3473f882001-02-23 17:55:21 +00001246 */
1247 while (IS_USERINFO(cur)) NEXT(cur);
1248 if (*cur == '@') {
1249 if (uri != NULL) {
1250 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001251 if (uri->cleanup & 2)
1252 uri->path = STRNDUP(*str, cur - *str);
1253 else
1254 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001255 }
1256 cur++;
1257 } else {
1258 if (uri != NULL) {
1259 if (uri->user != NULL) xmlFree(uri->user);
1260 uri->user = NULL;
1261 }
1262 cur = *str;
1263 }
1264 /*
1265 * This can be empty in the case where there is no server
1266 */
1267 host = cur;
1268 if (*cur == '/') {
1269 if (uri != NULL) {
1270 if (uri->authority != NULL) xmlFree(uri->authority);
1271 uri->authority = NULL;
1272 if (uri->server != NULL) xmlFree(uri->server);
1273 uri->server = NULL;
1274 uri->port = 0;
1275 }
1276 return(0);
1277 }
1278 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001279 * host part of hostport can denote an IPV4 address, an IPV6 address
1280 * or an unresolved name. Check the IP first, its easier to detect
1281 * errors if wrong one.
1282 * An IPV6 address must start with a '[' and end with a ']'.
Owen Taylor3473f882001-02-23 17:55:21 +00001283 */
William M. Brack015ccb22005-02-13 08:18:52 +00001284 if (*cur == '[') {
1285 int compress=0;
1286 cur++;
1287 for (oct = 0; oct < IPV6max; ++oct) {
1288 if (*cur == ':') {
1289 if (compress)
1290 return(3); /* multiple compression attempted */
1291 if (!oct) { /* initial char is compression */
1292 if (*++cur != ':')
1293 return(3);
1294 }
1295 compress = 1; /* set compression-encountered flag */
1296 cur++; /* skip over the second ':' */
1297 continue;
1298 }
1299 while(IS_HEX(*cur)) cur++;
1300 if (oct == (IPV6max-1))
1301 continue;
1302 if (*cur != ':')
1303 break;
1304 cur++;
1305 }
1306 if ((!compress) && (oct != IPV6max))
1307 return(3);
1308 if (*cur != ']')
1309 return(3);
1310 if (uri != NULL) {
1311 if (uri->server != NULL) xmlFree(uri->server);
1312 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1313 (cur-host)-1);
1314 }
1315 cur++;
1316 } else {
1317 /*
1318 * Not IPV6, maybe IPV4
1319 */
1320 for (oct = 0; oct < IPV4max; ++oct) {
1321 if (*cur == '.')
1322 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1323 while(IS_DIGIT(*cur)) cur++;
1324 if (oct == (IPV4max-1))
1325 continue;
1326 if (*cur != '.')
1327 break;
1328 cur++;
1329 }
Owen Taylor3473f882001-02-23 17:55:21 +00001330 }
William M. Brack015ccb22005-02-13 08:18:52 +00001331 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1332 IS_ALPHA(*cur))) {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001333 /* maybe host_name */
1334 if (!IS_ALPHANUM(*cur))
1335 return(4); /* e.g. http://xml.$oft */
1336 do {
1337 do ++cur; while (IS_ALPHANUM(*cur));
1338 if (*cur == '-') {
1339 --cur;
1340 if (*cur == '.')
1341 return(5); /* e.g. http://xml.-soft */
1342 ++cur;
1343 continue;
1344 }
1345 if (*cur == '.') {
1346 --cur;
1347 if (*cur == '-')
1348 return(6); /* e.g. http://xml-.soft */
1349 if (*cur == '.')
1350 return(7); /* e.g. http://xml..soft */
1351 ++cur;
1352 continue;
1353 }
1354 break;
1355 } while (1);
1356 tmp = cur;
1357 if (tmp[-1] == '.')
1358 --tmp; /* e.g. http://xml.$Oft/ */
1359 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1360 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1361 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001362 }
Owen Taylor3473f882001-02-23 17:55:21 +00001363 if (uri != NULL) {
1364 if (uri->authority != NULL) xmlFree(uri->authority);
1365 uri->authority = NULL;
William M. Brack015ccb22005-02-13 08:18:52 +00001366 if (host[0] != '[') { /* it's not an IPV6 addr */
1367 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001368 if (uri->cleanup & 2)
1369 uri->server = STRNDUP(host, cur - host);
1370 else
1371 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
William M. Brack015ccb22005-02-13 08:18:52 +00001372 }
Owen Taylor3473f882001-02-23 17:55:21 +00001373 }
Owen Taylor3473f882001-02-23 17:55:21 +00001374 /*
1375 * finish by checking for a port presence.
1376 */
1377 if (*cur == ':') {
1378 cur++;
1379 if (IS_DIGIT(*cur)) {
1380 if (uri != NULL)
1381 uri->port = 0;
1382 while (IS_DIGIT(*cur)) {
1383 if (uri != NULL)
1384 uri->port = uri->port * 10 + (*cur - '0');
1385 cur++;
1386 }
1387 }
1388 }
1389 *str = cur;
1390 return(0);
1391}
1392
1393/**
1394 * xmlParseURIRelSegment:
1395 * @uri: pointer to an URI structure
1396 * @str: pointer to the string to analyze
1397 *
1398 * Parse an URI relative segment
1399 *
1400 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1401 * "+" | "$" | "," )
1402 *
1403 * Returns 0 or the error code
1404 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001405static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001406xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1407{
Owen Taylor3473f882001-02-23 17:55:21 +00001408 const char *cur;
1409
1410 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001411 return (-1);
1412
Owen Taylor3473f882001-02-23 17:55:21 +00001413 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001414 if (!((IS_SEGMENT(cur)) ||
1415 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001416 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001417 }
1418 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001419 while ((IS_SEGMENT(cur)) ||
1420 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001421 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001422 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001423 if (uri->path != NULL)
1424 xmlFree(uri->path);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001425 if (uri->cleanup & 2)
1426 uri->path = STRNDUP(*str, cur - *str);
1427 else
1428 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001429 }
1430 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001431 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001432}
1433
1434/**
1435 * xmlParseURIPathSegments:
1436 * @uri: pointer to an URI structure
1437 * @str: pointer to the string to analyze
1438 * @slash: should we add a leading slash
1439 *
1440 * Parse an URI set of path segments
1441 *
1442 * path_segments = segment *( "/" segment )
1443 * segment = *pchar *( ";" param )
1444 * param = *pchar
1445 *
1446 * Returns 0 or the error code
1447 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001448static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001449xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1450{
Owen Taylor3473f882001-02-23 17:55:21 +00001451 const char *cur;
1452
1453 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001454 return (-1);
1455
Owen Taylor3473f882001-02-23 17:55:21 +00001456 cur = *str;
1457
1458 do {
Daniel Veillard336a8e12005-08-07 10:46:19 +00001459 while ((IS_PCHAR(cur)) ||
1460 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001461 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001462 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001463 cur++;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001464 while ((IS_PCHAR(cur)) ||
1465 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001466 NEXT(cur);
1467 }
1468 if (*cur != '/')
1469 break;
1470 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001471 } while (1);
1472 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001473 int len, len2 = 0;
1474 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001475
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001476 /*
1477 * Concat the set of path segments to the current path
1478 */
1479 len = cur - *str;
1480 if (slash)
1481 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001482
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001483 if (uri->path != NULL) {
1484 len2 = strlen(uri->path);
1485 len += len2;
1486 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001487 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001488 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001489 xmlGenericError(xmlGenericErrorContext,
1490 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001491 *str = cur;
1492 return (-1);
1493 }
1494 if (uri->path != NULL)
1495 memcpy(path, uri->path, len2);
1496 if (slash) {
1497 path[len2] = '/';
1498 len2++;
1499 }
1500 path[len2] = 0;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001501 if (cur - *str > 0) {
1502 if (uri->cleanup & 2) {
1503 memcpy(&path[len2], *str, cur - *str);
1504 path[len2 + (cur - *str)] = 0;
1505 } else
1506 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1507 }
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001508 if (uri->path != NULL)
1509 xmlFree(uri->path);
1510 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001511 }
1512 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001513 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001514}
1515
1516/**
1517 * xmlParseURIAuthority:
1518 * @uri: pointer to an URI structure
1519 * @str: pointer to the string to analyze
1520 *
1521 * Parse the authority part of an URI.
1522 *
1523 * authority = server | reg_name
1524 * server = [ [ userinfo "@" ] hostport ]
1525 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1526 * "@" | "&" | "=" | "+" )
1527 *
1528 * Note : this is completely ambiguous since reg_name is allowed to
1529 * use the full set of chars in use by server:
1530 *
1531 * 3.2.1. Registry-based Naming Authority
1532 *
1533 * The structure of a registry-based naming authority is specific
1534 * to the URI scheme, but constrained to the allowed characters
1535 * for an authority component.
1536 *
1537 * Returns 0 or the error code
1538 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001539static int
Owen Taylor3473f882001-02-23 17:55:21 +00001540xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1541 const char *cur;
1542 int ret;
1543
1544 if (str == NULL)
1545 return(-1);
1546
1547 cur = *str;
1548
1549 /*
1550 * try first to parse it as a server string.
1551 */
1552 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001553 if ((ret == 0) && (*str != NULL) &&
1554 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001555 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001556 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001557
1558 /*
1559 * failed, fallback to reg_name
1560 */
1561 if (!IS_REG_NAME(cur)) {
1562 return(5);
1563 }
1564 NEXT(cur);
1565 while (IS_REG_NAME(cur)) NEXT(cur);
1566 if (uri != NULL) {
1567 if (uri->server != NULL) xmlFree(uri->server);
1568 uri->server = NULL;
1569 if (uri->user != NULL) xmlFree(uri->user);
1570 uri->user = NULL;
1571 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001572 if (uri->cleanup & 2)
1573 uri->authority = STRNDUP(*str, cur - *str);
1574 else
1575 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001576 }
1577 *str = cur;
1578 return(0);
1579}
1580
1581/**
1582 * xmlParseURIHierPart:
1583 * @uri: pointer to an URI structure
1584 * @str: pointer to the string to analyze
1585 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001586 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001587 *
1588 * hier_part = ( net_path | abs_path ) [ "?" query ]
1589 * abs_path = "/" path_segments
1590 * net_path = "//" authority [ abs_path ]
1591 *
1592 * Returns 0 or the error code
1593 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001594static int
Owen Taylor3473f882001-02-23 17:55:21 +00001595xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1596 int ret;
1597 const char *cur;
1598
1599 if (str == NULL)
1600 return(-1);
1601
1602 cur = *str;
1603
1604 if ((cur[0] == '/') && (cur[1] == '/')) {
1605 cur += 2;
1606 ret = xmlParseURIAuthority(uri, &cur);
1607 if (ret != 0)
1608 return(ret);
1609 if (cur[0] == '/') {
1610 cur++;
1611 ret = xmlParseURIPathSegments(uri, &cur, 1);
1612 }
1613 } else if (cur[0] == '/') {
1614 cur++;
1615 ret = xmlParseURIPathSegments(uri, &cur, 1);
1616 } else {
1617 return(4);
1618 }
1619 if (ret != 0)
1620 return(ret);
1621 if (*cur == '?') {
1622 cur++;
1623 ret = xmlParseURIQuery(uri, &cur);
1624 if (ret != 0)
1625 return(ret);
1626 }
1627 *str = cur;
1628 return(0);
1629}
1630
1631/**
1632 * xmlParseAbsoluteURI:
1633 * @uri: pointer to an URI structure
1634 * @str: pointer to the string to analyze
1635 *
1636 * Parse an URI reference string and fills in the appropriate fields
1637 * of the @uri structure
1638 *
1639 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1640 *
1641 * Returns 0 or the error code
1642 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001643static int
Owen Taylor3473f882001-02-23 17:55:21 +00001644xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1645 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001646 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001647
1648 if (str == NULL)
1649 return(-1);
1650
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001651 cur = *str;
1652
Owen Taylor3473f882001-02-23 17:55:21 +00001653 ret = xmlParseURIScheme(uri, str);
1654 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001655 if (**str != ':') {
1656 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001657 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001658 }
Owen Taylor3473f882001-02-23 17:55:21 +00001659 (*str)++;
1660 if (**str == '/')
1661 return(xmlParseURIHierPart(uri, str));
1662 return(xmlParseURIOpaquePart(uri, str));
1663}
1664
1665/**
1666 * xmlParseRelativeURI:
1667 * @uri: pointer to an URI structure
1668 * @str: pointer to the string to analyze
1669 *
1670 * Parse an relative URI string and fills in the appropriate fields
1671 * of the @uri structure
1672 *
1673 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1674 * abs_path = "/" path_segments
1675 * net_path = "//" authority [ abs_path ]
1676 * rel_path = rel_segment [ abs_path ]
1677 *
1678 * Returns 0 or the error code
1679 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001680static int
Owen Taylor3473f882001-02-23 17:55:21 +00001681xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1682 int ret = 0;
1683 const char *cur;
1684
1685 if (str == NULL)
1686 return(-1);
1687
1688 cur = *str;
1689 if ((cur[0] == '/') && (cur[1] == '/')) {
1690 cur += 2;
1691 ret = xmlParseURIAuthority(uri, &cur);
1692 if (ret != 0)
1693 return(ret);
1694 if (cur[0] == '/') {
1695 cur++;
1696 ret = xmlParseURIPathSegments(uri, &cur, 1);
1697 }
1698 } else if (cur[0] == '/') {
1699 cur++;
1700 ret = xmlParseURIPathSegments(uri, &cur, 1);
1701 } else if (cur[0] != '#' && cur[0] != '?') {
1702 ret = xmlParseURIRelSegment(uri, &cur);
1703 if (ret != 0)
1704 return(ret);
1705 if (cur[0] == '/') {
1706 cur++;
1707 ret = xmlParseURIPathSegments(uri, &cur, 1);
1708 }
1709 }
1710 if (ret != 0)
1711 return(ret);
1712 if (*cur == '?') {
1713 cur++;
1714 ret = xmlParseURIQuery(uri, &cur);
1715 if (ret != 0)
1716 return(ret);
1717 }
1718 *str = cur;
1719 return(ret);
1720}
1721
1722/**
1723 * xmlParseURIReference:
1724 * @uri: pointer to an URI structure
1725 * @str: the string to analyze
1726 *
1727 * Parse an URI reference string and fills in the appropriate fields
1728 * of the @uri structure
1729 *
1730 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1731 *
1732 * Returns 0 or the error code
1733 */
1734int
1735xmlParseURIReference(xmlURIPtr uri, const char *str) {
1736 int ret;
1737 const char *tmp = str;
1738
1739 if (str == NULL)
1740 return(-1);
1741 xmlCleanURI(uri);
1742
1743 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001744 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001745 * it fails.
1746 */
1747 ret = xmlParseAbsoluteURI(uri, &str);
1748 if (ret != 0) {
1749 xmlCleanURI(uri);
1750 str = tmp;
1751 ret = xmlParseRelativeURI(uri, &str);
1752 }
1753 if (ret != 0) {
1754 xmlCleanURI(uri);
1755 return(ret);
1756 }
1757
1758 if (*str == '#') {
1759 str++;
1760 ret = xmlParseURIFragment(uri, &str);
1761 if (ret != 0) return(ret);
1762 }
1763 if (*str != 0) {
1764 xmlCleanURI(uri);
1765 return(1);
1766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseURI:
1772 * @str: the URI string to analyze
1773 *
1774 * Parse an URI
1775 *
1776 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1777 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001778 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001779 */
1780xmlURIPtr
1781xmlParseURI(const char *str) {
1782 xmlURIPtr uri;
1783 int ret;
1784
1785 if (str == NULL)
1786 return(NULL);
1787 uri = xmlCreateURI();
1788 if (uri != NULL) {
1789 ret = xmlParseURIReference(uri, str);
1790 if (ret) {
1791 xmlFreeURI(uri);
1792 return(NULL);
1793 }
1794 }
1795 return(uri);
1796}
1797
Daniel Veillard336a8e12005-08-07 10:46:19 +00001798/**
1799 * xmlParseURIRaw:
1800 * @str: the URI string to analyze
1801 * @raw: if 1 unescaping of URI pieces are disabled
1802 *
1803 * Parse an URI but allows to keep intact the original fragments.
1804 *
1805 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1806 *
1807 * Returns a newly built xmlURIPtr or NULL in case of error
1808 */
1809xmlURIPtr
1810xmlParseURIRaw(const char *str, int raw) {
1811 xmlURIPtr uri;
1812 int ret;
1813
1814 if (str == NULL)
1815 return(NULL);
1816 uri = xmlCreateURI();
1817 if (uri != NULL) {
1818 if (raw) {
1819 uri->cleanup |= 2;
1820 }
1821 ret = xmlParseURIReference(uri, str);
1822 if (ret) {
1823 xmlFreeURI(uri);
1824 return(NULL);
1825 }
1826 }
1827 return(uri);
1828}
1829
Owen Taylor3473f882001-02-23 17:55:21 +00001830/************************************************************************
1831 * *
1832 * Public functions *
1833 * *
1834 ************************************************************************/
1835
1836/**
1837 * xmlBuildURI:
1838 * @URI: the URI instance found in the document
1839 * @base: the base value
1840 *
1841 * Computes he final URI of the reference done by checking that
1842 * the given URI is valid, and building the final URI using the
1843 * base URI. This is processed according to section 5.2 of the
1844 * RFC 2396
1845 *
1846 * 5.2. Resolving Relative References to Absolute Form
1847 *
1848 * Returns a new URI string (to be freed by the caller) or NULL in case
1849 * of error.
1850 */
1851xmlChar *
1852xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1853 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001854 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001855 xmlURIPtr ref = NULL;
1856 xmlURIPtr bas = NULL;
1857 xmlURIPtr res = NULL;
1858
1859 /*
1860 * 1) The URI reference is parsed into the potential four components and
1861 * fragment identifier, as described in Section 4.3.
1862 *
1863 * NOTE that a completely empty URI is treated by modern browsers
1864 * as a reference to "." rather than as a synonym for the current
1865 * URI. Should we do that here?
1866 */
1867 if (URI == NULL)
1868 ret = -1;
1869 else {
1870 if (*URI) {
1871 ref = xmlCreateURI();
1872 if (ref == NULL)
1873 goto done;
1874 ret = xmlParseURIReference(ref, (const char *) URI);
1875 }
1876 else
1877 ret = 0;
1878 }
1879 if (ret != 0)
1880 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001881 if ((ref != NULL) && (ref->scheme != NULL)) {
1882 /*
1883 * The URI is absolute don't modify.
1884 */
1885 val = xmlStrdup(URI);
1886 goto done;
1887 }
Owen Taylor3473f882001-02-23 17:55:21 +00001888 if (base == NULL)
1889 ret = -1;
1890 else {
1891 bas = xmlCreateURI();
1892 if (bas == NULL)
1893 goto done;
1894 ret = xmlParseURIReference(bas, (const char *) base);
1895 }
1896 if (ret != 0) {
1897 if (ref)
1898 val = xmlSaveUri(ref);
1899 goto done;
1900 }
1901 if (ref == NULL) {
1902 /*
1903 * the base fragment must be ignored
1904 */
1905 if (bas->fragment != NULL) {
1906 xmlFree(bas->fragment);
1907 bas->fragment = NULL;
1908 }
1909 val = xmlSaveUri(bas);
1910 goto done;
1911 }
1912
1913 /*
1914 * 2) If the path component is empty and the scheme, authority, and
1915 * query components are undefined, then it is a reference to the
1916 * current document and we are done. Otherwise, the reference URI's
1917 * query and fragment components are defined as found (or not found)
1918 * within the URI reference and not inherited from the base URI.
1919 *
1920 * NOTE that in modern browsers, the parsing differs from the above
1921 * in the following aspect: the query component is allowed to be
1922 * defined while still treating this as a reference to the current
1923 * document.
1924 */
1925 res = xmlCreateURI();
1926 if (res == NULL)
1927 goto done;
1928 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1929 ((ref->authority == NULL) && (ref->server == NULL))) {
1930 if (bas->scheme != NULL)
1931 res->scheme = xmlMemStrdup(bas->scheme);
1932 if (bas->authority != NULL)
1933 res->authority = xmlMemStrdup(bas->authority);
1934 else if (bas->server != NULL) {
1935 res->server = xmlMemStrdup(bas->server);
1936 if (bas->user != NULL)
1937 res->user = xmlMemStrdup(bas->user);
1938 res->port = bas->port;
1939 }
1940 if (bas->path != NULL)
1941 res->path = xmlMemStrdup(bas->path);
1942 if (ref->query != NULL)
1943 res->query = xmlMemStrdup(ref->query);
1944 else if (bas->query != NULL)
1945 res->query = xmlMemStrdup(bas->query);
1946 if (ref->fragment != NULL)
1947 res->fragment = xmlMemStrdup(ref->fragment);
1948 goto step_7;
1949 }
Owen Taylor3473f882001-02-23 17:55:21 +00001950
1951 /*
1952 * 3) If the scheme component is defined, indicating that the reference
1953 * starts with a scheme name, then the reference is interpreted as an
1954 * absolute URI and we are done. Otherwise, the reference URI's
1955 * scheme is inherited from the base URI's scheme component.
1956 */
1957 if (ref->scheme != NULL) {
1958 val = xmlSaveUri(ref);
1959 goto done;
1960 }
1961 if (bas->scheme != NULL)
1962 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001963
1964 if (ref->query != NULL)
1965 res->query = xmlMemStrdup(ref->query);
1966 if (ref->fragment != NULL)
1967 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001968
1969 /*
1970 * 4) If the authority component is defined, then the reference is a
1971 * network-path and we skip to step 7. Otherwise, the reference
1972 * URI's authority is inherited from the base URI's authority
1973 * component, which will also be undefined if the URI scheme does not
1974 * use an authority component.
1975 */
1976 if ((ref->authority != NULL) || (ref->server != NULL)) {
1977 if (ref->authority != NULL)
1978 res->authority = xmlMemStrdup(ref->authority);
1979 else {
1980 res->server = xmlMemStrdup(ref->server);
1981 if (ref->user != NULL)
1982 res->user = xmlMemStrdup(ref->user);
1983 res->port = ref->port;
1984 }
1985 if (ref->path != NULL)
1986 res->path = xmlMemStrdup(ref->path);
1987 goto step_7;
1988 }
1989 if (bas->authority != NULL)
1990 res->authority = xmlMemStrdup(bas->authority);
1991 else if (bas->server != NULL) {
1992 res->server = xmlMemStrdup(bas->server);
1993 if (bas->user != NULL)
1994 res->user = xmlMemStrdup(bas->user);
1995 res->port = bas->port;
1996 }
1997
1998 /*
1999 * 5) If the path component begins with a slash character ("/"), then
2000 * the reference is an absolute-path and we skip to step 7.
2001 */
2002 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2003 res->path = xmlMemStrdup(ref->path);
2004 goto step_7;
2005 }
2006
2007
2008 /*
2009 * 6) If this step is reached, then we are resolving a relative-path
2010 * reference. The relative path needs to be merged with the base
2011 * URI's path. Although there are many ways to do this, we will
2012 * describe a simple method using a separate string buffer.
2013 *
2014 * Allocate a buffer large enough for the result string.
2015 */
2016 len = 2; /* extra / and 0 */
2017 if (ref->path != NULL)
2018 len += strlen(ref->path);
2019 if (bas->path != NULL)
2020 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002021 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002022 if (res->path == NULL) {
2023 xmlGenericError(xmlGenericErrorContext,
2024 "xmlBuildURI: out of memory\n");
2025 goto done;
2026 }
2027 res->path[0] = 0;
2028
2029 /*
2030 * a) All but the last segment of the base URI's path component is
2031 * copied to the buffer. In other words, any characters after the
2032 * last (right-most) slash character, if any, are excluded.
2033 */
2034 cur = 0;
2035 out = 0;
2036 if (bas->path != NULL) {
2037 while (bas->path[cur] != 0) {
2038 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2039 cur++;
2040 if (bas->path[cur] == 0)
2041 break;
2042
2043 cur++;
2044 while (out < cur) {
2045 res->path[out] = bas->path[out];
2046 out++;
2047 }
2048 }
2049 }
2050 res->path[out] = 0;
2051
2052 /*
2053 * b) The reference's path component is appended to the buffer
2054 * string.
2055 */
2056 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002057 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * Ensure the path includes a '/'
2060 */
2061 if ((out == 0) && (bas->server != NULL))
2062 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002063 while (ref->path[indx] != 0) {
2064 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002065 }
2066 }
2067 res->path[out] = 0;
2068
2069 /*
2070 * Steps c) to h) are really path normalization steps
2071 */
2072 xmlNormalizeURIPath(res->path);
2073
2074step_7:
2075
2076 /*
2077 * 7) The resulting URI components, including any inherited from the
2078 * base URI, are recombined to give the absolute form of the URI
2079 * reference.
2080 */
2081 val = xmlSaveUri(res);
2082
2083done:
2084 if (ref != NULL)
2085 xmlFreeURI(ref);
2086 if (bas != NULL)
2087 xmlFreeURI(bas);
2088 if (res != NULL)
2089 xmlFreeURI(res);
2090 return(val);
2091}
2092
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002093/**
William M. Brackf7789b12004-06-07 08:57:27 +00002094 * xmlBuildRelativeURI:
2095 * @URI: the URI reference under consideration
2096 * @base: the base value
2097 *
2098 * Expresses the URI of the reference in terms relative to the
2099 * base. Some examples of this operation include:
2100 * base = "http://site1.com/docs/book1.html"
2101 * URI input URI returned
2102 * docs/pic1.gif pic1.gif
2103 * docs/img/pic1.gif img/pic1.gif
2104 * img/pic1.gif ../img/pic1.gif
2105 * http://site1.com/docs/pic1.gif pic1.gif
2106 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2107 *
2108 * base = "docs/book1.html"
2109 * URI input URI returned
2110 * docs/pic1.gif pic1.gif
2111 * docs/img/pic1.gif img/pic1.gif
2112 * img/pic1.gif ../img/pic1.gif
2113 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2114 *
2115 *
2116 * Note: if the URI reference is really wierd or complicated, it may be
2117 * worthwhile to first convert it into a "nice" one by calling
2118 * xmlBuildURI (using 'base') before calling this routine,
2119 * since this routine (for reasonable efficiency) assumes URI has
2120 * already been through some validation.
2121 *
2122 * Returns a new URI string (to be freed by the caller) or NULL in case
2123 * error.
2124 */
2125xmlChar *
2126xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2127{
2128 xmlChar *val = NULL;
2129 int ret;
2130 int ix;
2131 int pos = 0;
2132 int nbslash = 0;
2133 xmlURIPtr ref = NULL;
2134 xmlURIPtr bas = NULL;
2135 xmlChar *bptr, *uptr, *vptr;
2136
2137 if ((URI == NULL) || (*URI == 0))
2138 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002139
2140 /*
2141 * First parse URI into a standard form
2142 */
2143 ref = xmlCreateURI ();
2144 if (ref == NULL)
2145 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002146 /* If URI not already in "relative" form */
2147 if (URI[0] != '.') {
2148 ret = xmlParseURIReference (ref, (const char *) URI);
2149 if (ret != 0)
2150 goto done; /* Error in URI, return NULL */
2151 } else
2152 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002153
2154 /*
2155 * Next parse base into the same standard form
2156 */
2157 if ((base == NULL) || (*base == 0)) {
2158 val = xmlStrdup (URI);
2159 goto done;
2160 }
2161 bas = xmlCreateURI ();
2162 if (bas == NULL)
2163 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002164 if (base[0] != '.') {
2165 ret = xmlParseURIReference (bas, (const char *) base);
2166 if (ret != 0)
2167 goto done; /* Error in base, return NULL */
2168 } else
2169 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002170
2171 /*
2172 * If the scheme / server on the URI differs from the base,
2173 * just return the URI
2174 */
2175 if ((ref->scheme != NULL) &&
2176 ((bas->scheme == NULL) ||
2177 xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme) ||
2178 xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server))) {
2179 val = xmlStrdup (URI);
2180 goto done;
2181 }
2182
2183 /*
2184 * At this point (at last!) we can compare the two paths
2185 *
2186 * First we compare the two strings and find where they first differ
2187 */
2188 bptr = (xmlChar *)bas->path;
William M. Brackf20fbf72004-06-25 05:49:08 +00002189 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2190 pos += 2;
2191 if ((*bptr == '.') && (bptr[1] == '/'))
2192 bptr += 2;
2193 else if ((*bptr == '/') && (ref->path[pos] != '/'))
William M. Brackf7789b12004-06-07 08:57:27 +00002194 bptr++;
2195 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2196 pos++;
2197
2198 if (bptr[pos] == ref->path[pos]) {
2199 val = NULL; /* if no differences, return NULL */
2200 goto done; /* (I can't imagine why anyone would do this) */
2201 }
2202
2203 /*
2204 * In URI, "back up" to the last '/' encountered. This will be the
2205 * beginning of the "unique" suffix of URI
2206 */
2207 ix = pos;
2208 if ((ref->path[ix] == '/') && (ix > 0))
2209 ix--;
2210 for (; ix > 0; ix--) {
2211 if (ref->path[ix] == '/')
2212 break;
2213 }
William M. Brackf2a657a2004-10-27 16:33:09 +00002214 if (ix == 0) {
William M. Brackf7789b12004-06-07 08:57:27 +00002215 uptr = (xmlChar *)ref->path;
William M. Brackf2a657a2004-10-27 16:33:09 +00002216 } else {
2217 ix++;
2218 uptr = (xmlChar *)&ref->path[ix];
2219 }
William M. Brackf7789b12004-06-07 08:57:27 +00002220
2221 /*
2222 * In base, count the number of '/' from the differing point
2223 */
2224 if (bptr[pos] != ref->path[pos]) { /* check for trivial URI == base */
2225 for (; bptr[ix] != 0; ix++) {
2226 if (bptr[ix] == '/')
2227 nbslash++;
2228 }
2229 }
2230
2231 if (nbslash == 0) {
2232 val = xmlStrdup (uptr);
2233 goto done;
2234 }
William M. Brackf7789b12004-06-07 08:57:27 +00002235
2236 /*
2237 * Allocate just enough space for the returned string -
2238 * length of the remainder of the URI, plus enough space
2239 * for the "../" groups, plus one for the terminator
2240 */
2241 ix = xmlStrlen (uptr) + 1;
2242 val = (xmlChar *) xmlMalloc (ix + 3 * nbslash);
2243 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002244 xmlGenericError(xmlGenericErrorContext,
2245 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002246 goto done;
2247 }
2248 vptr = val;
2249 /*
2250 * Put in as many "../" as needed
2251 */
2252 for (; nbslash>0; nbslash--) {
2253 *vptr++ = '.';
2254 *vptr++ = '.';
2255 *vptr++ = '/';
2256 }
2257 /*
2258 * Finish up with the end of the URI
2259 */
2260 memcpy (vptr, uptr, ix);
2261
2262 done:
2263 /*
2264 * Free the working variables
2265 */
2266 if (ref != NULL)
2267 xmlFreeURI (ref);
2268 if (bas != NULL)
2269 xmlFreeURI (bas);
2270
2271 return val;
2272}
2273
2274/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002275 * xmlCanonicPath:
2276 * @path: the resource locator in a filesystem notation
2277 *
2278 * Constructs a canonic path from the specified path.
2279 *
2280 * Returns a new canonic path, or a duplicate of the path parameter if the
2281 * construction fails. The caller is responsible for freeing the memory occupied
2282 * by the returned string. If there is insufficient memory available, or the
2283 * argument is NULL, the function returns NULL.
2284 */
2285#define IS_WINDOWS_PATH(p) \
2286 ((p != NULL) && \
2287 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2288 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2289 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2290xmlChar*
2291xmlCanonicPath(const xmlChar *path)
2292{
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002293#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002294 int len = 0;
2295 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002296 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002297#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002298 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002299 xmlChar *ret;
2300 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002301
2302 if (path == NULL)
2303 return(NULL);
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002304 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002305 xmlFreeURI(uri);
2306 return xmlStrdup(path);
2307 }
2308
Daniel Veillard336a8e12005-08-07 10:46:19 +00002309 absuri = xmlStrstr(path, BAD_CAST "://");
2310 if (absuri != NULL) {
2311 int l, j;
2312 unsigned char c;
2313 xmlChar *escURI;
2314
2315 /*
2316 * this looks like an URI where some parts have not been
2317 * escaped leading to a parsing problem check that the first
2318 * part matches a protocol.
2319 */
2320 l = absuri - path;
2321 if ((l <= 0) || (l > 20))
2322 goto path_processing;
2323 for (j = 0;j < l;j++) {
2324 c = path[j];
2325 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2326 goto path_processing;
2327 }
2328
2329 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2330 if (escURI != NULL) {
2331 uri = xmlParseURI((const char *) escURI);
2332 if (uri != NULL) {
2333 xmlFreeURI(uri);
2334 return escURI;
2335 }
2336 xmlFreeURI(uri);
2337 }
2338 }
2339
2340path_processing:
2341#if defined(_WIN32) && !defined(__CYGWIN__)
2342 /*
2343 * This really need to be cleaned up by someone with a Windows box
2344 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002345 uri = xmlCreateURI();
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002346 if (uri == NULL) {
2347 return(NULL);
2348 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002349
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002350 len = xmlStrlen(path);
2351 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2352 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack42331a92004-07-29 07:07:16 +00002353 uri->path = xmlMallocAtomic(len + 2); /* FIXME - check alloc! */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002354 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002355 p = uri->path + 1;
2356 strncpy(p, path, len + 1);
2357 } else {
William M. Bracka3215c72004-07-31 16:24:01 +00002358 uri->path = xmlStrdup(path); /* FIXME - check alloc! */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002359 p = uri->path;
2360 }
2361 while (*p != '\0') {
2362 if (*p == '\\')
2363 *p = '/';
2364 p++;
2365 }
William M. Bracka3215c72004-07-31 16:24:01 +00002366 if (uri->path == NULL) {
2367 xmlFreeURI(uri);
2368 return(NULL);
2369 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002370 ret = xmlSaveUri(uri);
2371 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002372#else
2373 ret = xmlStrdup((const xmlChar *) path);
2374#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002375 return(ret);
2376}
Owen Taylor3473f882001-02-23 17:55:21 +00002377
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002378#define bottom_uri
2379#include "elfgcchack.h"