blob: 1e292237c91640d314ff976beec058c40a8d6c34 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
William M. Brack015ccb22005-02-13 08:18:52 +00004 * Reference: RFCs 2396, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
William M. Brack015ccb22005-02-13 08:18:52 +000081 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
Owen Taylor3473f882001-02-23 17:55:21 +000083 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
William M. Brack015ccb22005-02-13 08:18:52 +000087 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000089
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163/*
William M. Brack015ccb22005-02-13 08:18:52 +0000164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000171
172/*
Owen Taylor3473f882001-02-23 17:55:21 +0000173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
Daniel Veillard336a8e12005-08-07 10:46:19 +0000188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/************************************************************************
191 * *
192 * Generic URI structure functions *
193 * *
194 ************************************************************************/
195
196/**
197 * xmlCreateURI:
198 *
199 * Simply creates an empty xmlURI
200 *
201 * Returns the new structure or NULL in case of error
202 */
203xmlURIPtr
204xmlCreateURI(void) {
205 xmlURIPtr ret;
206
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208 if (ret == NULL) {
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
211 return(NULL);
212 }
213 memset(ret, 0, sizeof(xmlURI));
214 return(ret);
215}
216
217/**
218 * xmlSaveUri:
219 * @uri: pointer to an xmlURI
220 *
221 * Save the URI as an escaped string
222 *
223 * Returns a new string (to be deallocated by caller)
224 */
225xmlChar *
226xmlSaveUri(xmlURIPtr uri) {
227 xmlChar *ret = NULL;
228 const char *p;
229 int len;
230 int max;
231
232 if (uri == NULL) return(NULL);
233
234
235 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 len = 0;
243
244 if (uri->scheme != NULL) {
245 p = uri->scheme;
246 while (*p != 0) {
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = *p++;
257 }
258 if (len >= max) {
259 max *= 2;
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
261 if (ret == NULL) {
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
264 return(NULL);
265 }
266 }
267 ret[len++] = ':';
268 }
269 if (uri->opaque != NULL) {
270 p = uri->opaque;
271 while (*p != 0) {
272 if (len + 3 >= max) {
273 max *= 2;
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
275 if (ret == NULL) {
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
278 return(NULL);
279 }
280 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000282 ret[len++] = *p++;
283 else {
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
286 ret[len++] = '%';
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289 }
290 }
Owen Taylor3473f882001-02-23 17:55:21 +0000291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
429 if (ret == NULL) {
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
432 return(NULL);
433 }
434 }
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
438 ((*(p) == ',')))
439 ret[len++] = *p++;
440 else {
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
443 ret[len++] = '%';
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446 }
447 }
448 }
449 if (uri->query != NULL) {
450 if (len + 3 >= max) {
451 max *= 2;
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
454 if (ret == NULL) {
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
457 return(NULL);
458 }
459 }
460 ret[len++] = '?';
461 p = uri->query;
462 while (*p != 0) {
463 if (len + 3 >= max) {
464 max *= 2;
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
467 if (ret == NULL) {
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
470 return(NULL);
471 }
472 }
473 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
474 ret[len++] = *p++;
475 else {
476 int val = *(unsigned char *)p++;
477 int hi = val / 0x10, lo = val % 0x10;
478 ret[len++] = '%';
479 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
480 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
481 }
482 }
483 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000484 }
485 if (uri->fragment != NULL) {
486 if (len + 3 >= max) {
487 max *= 2;
488 ret = (xmlChar *) xmlRealloc(ret,
489 (max + 1) * sizeof(xmlChar));
490 if (ret == NULL) {
491 xmlGenericError(xmlGenericErrorContext,
492 "xmlSaveUri: out of memory\n");
493 return(NULL);
494 }
495 }
496 ret[len++] = '#';
497 p = uri->fragment;
498 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 if (len + 3 >= max) {
500 max *= 2;
501 ret = (xmlChar *) xmlRealloc(ret,
502 (max + 1) * sizeof(xmlChar));
503 if (ret == NULL) {
504 xmlGenericError(xmlGenericErrorContext,
505 "xmlSaveUri: out of memory\n");
506 return(NULL);
507 }
508 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000509 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
510 ret[len++] = *p++;
511 else {
512 int val = *(unsigned char *)p++;
513 int hi = val / 0x10, lo = val % 0x10;
514 ret[len++] = '%';
515 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
516 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000517 }
518 }
Owen Taylor3473f882001-02-23 17:55:21 +0000519 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000520 if (len >= max) {
521 max *= 2;
522 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
523 if (ret == NULL) {
524 xmlGenericError(xmlGenericErrorContext,
525 "xmlSaveUri: out of memory\n");
526 return(NULL);
527 }
528 }
529 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000530 return(ret);
531}
532
533/**
534 * xmlPrintURI:
535 * @stream: a FILE* for the output
536 * @uri: pointer to an xmlURI
537 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000538 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000539 */
540void
541xmlPrintURI(FILE *stream, xmlURIPtr uri) {
542 xmlChar *out;
543
544 out = xmlSaveUri(uri);
545 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000546 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000547 xmlFree(out);
548 }
549}
550
551/**
552 * xmlCleanURI:
553 * @uri: pointer to an xmlURI
554 *
555 * Make sure the xmlURI struct is free of content
556 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000557static void
Owen Taylor3473f882001-02-23 17:55:21 +0000558xmlCleanURI(xmlURIPtr uri) {
559 if (uri == NULL) return;
560
561 if (uri->scheme != NULL) xmlFree(uri->scheme);
562 uri->scheme = NULL;
563 if (uri->server != NULL) xmlFree(uri->server);
564 uri->server = NULL;
565 if (uri->user != NULL) xmlFree(uri->user);
566 uri->user = NULL;
567 if (uri->path != NULL) xmlFree(uri->path);
568 uri->path = NULL;
569 if (uri->fragment != NULL) xmlFree(uri->fragment);
570 uri->fragment = NULL;
571 if (uri->opaque != NULL) xmlFree(uri->opaque);
572 uri->opaque = NULL;
573 if (uri->authority != NULL) xmlFree(uri->authority);
574 uri->authority = NULL;
575 if (uri->query != NULL) xmlFree(uri->query);
576 uri->query = NULL;
577}
578
579/**
580 * xmlFreeURI:
581 * @uri: pointer to an xmlURI
582 *
583 * Free up the xmlURI struct
584 */
585void
586xmlFreeURI(xmlURIPtr uri) {
587 if (uri == NULL) return;
588
589 if (uri->scheme != NULL) xmlFree(uri->scheme);
590 if (uri->server != NULL) xmlFree(uri->server);
591 if (uri->user != NULL) xmlFree(uri->user);
592 if (uri->path != NULL) xmlFree(uri->path);
593 if (uri->fragment != NULL) xmlFree(uri->fragment);
594 if (uri->opaque != NULL) xmlFree(uri->opaque);
595 if (uri->authority != NULL) xmlFree(uri->authority);
596 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000597 xmlFree(uri);
598}
599
600/************************************************************************
601 * *
602 * Helper functions *
603 * *
604 ************************************************************************/
605
Owen Taylor3473f882001-02-23 17:55:21 +0000606/**
607 * xmlNormalizeURIPath:
608 * @path: pointer to the path string
609 *
610 * Applies the 5 normalization steps to a path string--that is, RFC 2396
611 * Section 5.2, steps 6.c through 6.g.
612 *
613 * Normalization occurs directly on the string, no new allocation is done
614 *
615 * Returns 0 or an error code
616 */
617int
618xmlNormalizeURIPath(char *path) {
619 char *cur, *out;
620
621 if (path == NULL)
622 return(-1);
623
624 /* Skip all initial "/" chars. We want to get to the beginning of the
625 * first non-empty segment.
626 */
627 cur = path;
628 while (cur[0] == '/')
629 ++cur;
630 if (cur[0] == '\0')
631 return(0);
632
633 /* Keep everything we've seen so far. */
634 out = cur;
635
636 /*
637 * Analyze each segment in sequence for cases (c) and (d).
638 */
639 while (cur[0] != '\0') {
640 /*
641 * c) All occurrences of "./", where "." is a complete path segment,
642 * are removed from the buffer string.
643 */
644 if ((cur[0] == '.') && (cur[1] == '/')) {
645 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000646 /* '//' normalization should be done at this point too */
647 while (cur[0] == '/')
648 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000649 continue;
650 }
651
652 /*
653 * d) If the buffer string ends with "." as a complete path segment,
654 * that "." is removed.
655 */
656 if ((cur[0] == '.') && (cur[1] == '\0'))
657 break;
658
659 /* Otherwise keep the segment. */
660 while (cur[0] != '/') {
661 if (cur[0] == '\0')
662 goto done_cd;
663 (out++)[0] = (cur++)[0];
664 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000665 /* nomalize // */
666 while ((cur[0] == '/') && (cur[1] == '/'))
667 cur++;
668
Owen Taylor3473f882001-02-23 17:55:21 +0000669 (out++)[0] = (cur++)[0];
670 }
671 done_cd:
672 out[0] = '\0';
673
674 /* Reset to the beginning of the first segment for the next sequence. */
675 cur = path;
676 while (cur[0] == '/')
677 ++cur;
678 if (cur[0] == '\0')
679 return(0);
680
681 /*
682 * Analyze each segment in sequence for cases (e) and (f).
683 *
684 * e) All occurrences of "<segment>/../", where <segment> is a
685 * complete path segment not equal to "..", are removed from the
686 * buffer string. Removal of these path segments is performed
687 * iteratively, removing the leftmost matching pattern on each
688 * iteration, until no matching pattern remains.
689 *
690 * f) If the buffer string ends with "<segment>/..", where <segment>
691 * is a complete path segment not equal to "..", that
692 * "<segment>/.." is removed.
693 *
694 * To satisfy the "iterative" clause in (e), we need to collapse the
695 * string every time we find something that needs to be removed. Thus,
696 * we don't need to keep two pointers into the string: we only need a
697 * "current position" pointer.
698 */
699 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000700 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000701
702 /* At the beginning of each iteration of this loop, "cur" points to
703 * the first character of the segment we want to examine.
704 */
705
706 /* Find the end of the current segment. */
707 segp = cur;
708 while ((segp[0] != '/') && (segp[0] != '\0'))
709 ++segp;
710
711 /* If this is the last segment, we're done (we need at least two
712 * segments to meet the criteria for the (e) and (f) cases).
713 */
714 if (segp[0] == '\0')
715 break;
716
717 /* If the first segment is "..", or if the next segment _isn't_ "..",
718 * keep this segment and try the next one.
719 */
720 ++segp;
721 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
722 || ((segp[0] != '.') || (segp[1] != '.')
723 || ((segp[2] != '/') && (segp[2] != '\0')))) {
724 cur = segp;
725 continue;
726 }
727
728 /* If we get here, remove this segment and the next one and back up
729 * to the previous segment (if there is one), to implement the
730 * "iteratively" clause. It's pretty much impossible to back up
731 * while maintaining two pointers into the buffer, so just compact
732 * the whole buffer now.
733 */
734
735 /* If this is the end of the buffer, we're done. */
736 if (segp[2] == '\0') {
737 cur[0] = '\0';
738 break;
739 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000740 /* Valgrind complained, strcpy(cur, segp + 3); */
741 /* string will overlap, do not use strcpy */
742 tmp = cur;
743 segp += 3;
744 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000745
746 /* If there are no previous segments, then keep going from here. */
747 segp = cur;
748 while ((segp > path) && ((--segp)[0] == '/'))
749 ;
750 if (segp == path)
751 continue;
752
753 /* "segp" is pointing to the end of a previous segment; find it's
754 * start. We need to back up to the previous segment and start
755 * over with that to handle things like "foo/bar/../..". If we
756 * don't do this, then on the first pass we'll remove the "bar/..",
757 * but be pointing at the second ".." so we won't realize we can also
758 * remove the "foo/..".
759 */
760 cur = segp;
761 while ((cur > path) && (cur[-1] != '/'))
762 --cur;
763 }
764 out[0] = '\0';
765
766 /*
767 * g) If the resulting buffer string still begins with one or more
768 * complete path segments of "..", then the reference is
769 * considered to be in error. Implementations may handle this
770 * error by retaining these components in the resolved path (i.e.,
771 * treating them as part of the final URI), by removing them from
772 * the resolved path (i.e., discarding relative levels above the
773 * root), or by avoiding traversal of the reference.
774 *
775 * We discard them from the final path.
776 */
777 if (path[0] == '/') {
778 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000779 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000780 && ((cur[3] == '/') || (cur[3] == '\0')))
781 cur += 3;
782
783 if (cur != path) {
784 out = path;
785 while (cur[0] != '\0')
786 (out++)[0] = (cur++)[0];
787 out[0] = 0;
788 }
789 }
790
791 return(0);
792}
Owen Taylor3473f882001-02-23 17:55:21 +0000793
Daniel Veillard966a31e2004-05-09 02:58:44 +0000794static int is_hex(char c) {
795 if (((c >= '0') && (c <= '9')) ||
796 ((c >= 'a') && (c <= 'f')) ||
797 ((c >= 'A') && (c <= 'F')))
798 return(1);
799 return(0);
800}
801
Owen Taylor3473f882001-02-23 17:55:21 +0000802/**
803 * xmlURIUnescapeString:
804 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000805 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000806 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000807 *
808 * Unescaping routine, does not do validity checks !
809 * Output is direct unsigned char translation of %XX values (no encoding)
810 *
811 * Returns an copy of the string, but unescaped
812 */
813char *
814xmlURIUnescapeString(const char *str, int len, char *target) {
815 char *ret, *out;
816 const char *in;
817
818 if (str == NULL)
819 return(NULL);
820 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000821 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000822
823 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000824 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000825 if (ret == NULL) {
826 xmlGenericError(xmlGenericErrorContext,
827 "xmlURIUnescapeString: out of memory\n");
828 return(NULL);
829 }
830 } else
831 ret = target;
832 in = str;
833 out = ret;
834 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000835 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000836 in++;
837 if ((*in >= '0') && (*in <= '9'))
838 *out = (*in - '0');
839 else if ((*in >= 'a') && (*in <= 'f'))
840 *out = (*in - 'a') + 10;
841 else if ((*in >= 'A') && (*in <= 'F'))
842 *out = (*in - 'A') + 10;
843 in++;
844 if ((*in >= '0') && (*in <= '9'))
845 *out = *out * 16 + (*in - '0');
846 else if ((*in >= 'a') && (*in <= 'f'))
847 *out = *out * 16 + (*in - 'a') + 10;
848 else if ((*in >= 'A') && (*in <= 'F'))
849 *out = *out * 16 + (*in - 'A') + 10;
850 in++;
851 len -= 3;
852 out++;
853 } else {
854 *out++ = *in++;
855 len--;
856 }
857 }
858 *out = 0;
859 return(ret);
860}
861
862/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000863 * xmlURIEscapeStr:
864 * @str: string to escape
865 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000866 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000867 * This routine escapes a string to hex, ignoring reserved characters (a-z)
868 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000869 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000870 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000871 */
872xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000873xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
874 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000875 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000876
Owen Taylor3473f882001-02-23 17:55:21 +0000877 unsigned int len, out;
878
879 if (str == NULL)
880 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000881 if (str[0] == 0)
882 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000883 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000884 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000885
886 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000887 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000888 if (ret == NULL) {
889 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000890 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000891 return(NULL);
892 }
893 in = (const xmlChar *) str;
894 out = 0;
895 while(*in != 0) {
896 if (len - out <= 3) {
897 len += 20;
898 ret = (xmlChar *) xmlRealloc(ret, len);
899 if (ret == NULL) {
900 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000901 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000902 return(NULL);
903 }
904 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000905
906 ch = *in;
907
Daniel Veillardeb475a32002-04-14 22:00:22 +0000908 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000909 unsigned char val;
910 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000911 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000912 if (val <= 9)
913 ret[out++] = '0' + val;
914 else
915 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000916 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 if (val <= 9)
918 ret[out++] = '0' + val;
919 else
920 ret[out++] = 'A' + val - 0xA;
921 in++;
922 } else {
923 ret[out++] = *in++;
924 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000925
Owen Taylor3473f882001-02-23 17:55:21 +0000926 }
927 ret[out] = 0;
928 return(ret);
929}
930
Daniel Veillard8514c672001-05-23 10:29:12 +0000931/**
932 * xmlURIEscape:
933 * @str: the string of the URI to escape
934 *
935 * Escaping routine, does not do validity checks !
936 * It will try to escape the chars needing this, but this is heuristic
937 * based it's impossible to be sure.
938 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000939 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000940 *
941 * 25 May 2001
942 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
943 * according to RFC2396.
944 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000945 */
946xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000947xmlURIEscape(const xmlChar * str)
948{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000949 xmlChar *ret, *segment = NULL;
950 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000951 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000952
Daniel Veillard6278fb52001-05-25 07:38:41 +0000953#define NULLCHK(p) if(!p) { \
954 xmlGenericError(xmlGenericErrorContext, \
955 "xmlURIEscape: out of memory\n"); \
956 return NULL; }
957
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000958 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000959 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000960
961 uri = xmlCreateURI();
962 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000963 /*
964 * Allow escaping errors in the unescaped form
965 */
966 uri->cleanup = 1;
967 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000968 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000969 xmlFreeURI(uri);
970 return (NULL);
971 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000972 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000973
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000974 if (!uri)
975 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000976
977 ret = NULL;
978
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000979 if (uri->scheme) {
980 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
981 NULLCHK(segment)
982 ret = xmlStrcat(ret, segment);
983 ret = xmlStrcat(ret, BAD_CAST ":");
984 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000985 }
986
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000987 if (uri->authority) {
988 segment =
989 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
990 NULLCHK(segment)
991 ret = xmlStrcat(ret, BAD_CAST "//");
992 ret = xmlStrcat(ret, segment);
993 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000994 }
995
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000996 if (uri->user) {
997 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
998 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +0000999 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001000 ret = xmlStrcat(ret, segment);
1001 ret = xmlStrcat(ret, BAD_CAST "@");
1002 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001003 }
1004
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001005 if (uri->server) {
1006 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1007 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001008 if (uri->user == NULL)
1009 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001010 ret = xmlStrcat(ret, segment);
1011 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001012 }
1013
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001014 if (uri->port) {
1015 xmlChar port[10];
1016
Daniel Veillard43d3f612001-11-10 11:57:23 +00001017 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001018 ret = xmlStrcat(ret, BAD_CAST ":");
1019 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001020 }
1021
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001022 if (uri->path) {
1023 segment =
1024 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1025 NULLCHK(segment)
1026 ret = xmlStrcat(ret, segment);
1027 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001028 }
1029
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001030 if (uri->query) {
1031 segment =
1032 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1033 NULLCHK(segment)
1034 ret = xmlStrcat(ret, BAD_CAST "?");
1035 ret = xmlStrcat(ret, segment);
1036 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001037 }
1038
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001039 if (uri->opaque) {
1040 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1041 NULLCHK(segment)
1042 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001043 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001044 }
1045
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001046 if (uri->fragment) {
1047 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1048 NULLCHK(segment)
1049 ret = xmlStrcat(ret, BAD_CAST "#");
1050 ret = xmlStrcat(ret, segment);
1051 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001052 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001053
1054 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001055#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001056
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001057 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001058}
1059
Owen Taylor3473f882001-02-23 17:55:21 +00001060/************************************************************************
1061 * *
1062 * Escaped URI parsing *
1063 * *
1064 ************************************************************************/
1065
1066/**
1067 * xmlParseURIFragment:
1068 * @uri: pointer to an URI structure
1069 * @str: pointer to the string to analyze
1070 *
1071 * Parse an URI fragment string and fills in the appropriate fields
1072 * of the @uri structure.
1073 *
1074 * fragment = *uric
1075 *
1076 * Returns 0 or the error code
1077 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001078static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001079xmlParseURIFragment(xmlURIPtr uri, const char **str)
1080{
Daniel Veillard30e76072006-03-09 14:13:55 +00001081 const char *cur;
1082
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001083 if (str == NULL)
1084 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001085
Daniel Veillard30e76072006-03-09 14:13:55 +00001086 cur = *str;
1087
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001088 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001089 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001090 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001091 if (uri->fragment != NULL)
1092 xmlFree(uri->fragment);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001093 if (uri->cleanup & 2)
1094 uri->fragment = STRNDUP(*str, cur - *str);
1095 else
1096 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001097 }
1098 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001099 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001100}
1101
1102/**
1103 * xmlParseURIQuery:
1104 * @uri: pointer to an URI structure
1105 * @str: pointer to the string to analyze
1106 *
1107 * Parse the query part of an URI
1108 *
1109 * query = *uric
1110 *
1111 * Returns 0 or the error code
1112 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001113static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001114xmlParseURIQuery(xmlURIPtr uri, const char **str)
1115{
Daniel Veillard30e76072006-03-09 14:13:55 +00001116 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001117
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001118 if (str == NULL)
1119 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001120
Daniel Veillard30e76072006-03-09 14:13:55 +00001121 cur = *str;
1122
Daniel Veillard336a8e12005-08-07 10:46:19 +00001123 while ((IS_URIC(cur)) ||
1124 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001125 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001126 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001127 if (uri->query != NULL)
1128 xmlFree(uri->query);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001129 if (uri->cleanup & 2)
1130 uri->query = STRNDUP(*str, cur - *str);
1131 else
1132 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001133 }
1134 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001135 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001136}
1137
1138/**
1139 * xmlParseURIScheme:
1140 * @uri: pointer to an URI structure
1141 * @str: pointer to the string to analyze
1142 *
1143 * Parse an URI scheme
1144 *
1145 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1146 *
1147 * Returns 0 or the error code
1148 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001149static int
Owen Taylor3473f882001-02-23 17:55:21 +00001150xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1151 const char *cur;
1152
1153 if (str == NULL)
1154 return(-1);
1155
1156 cur = *str;
1157 if (!IS_ALPHA(*cur))
1158 return(2);
1159 cur++;
1160 while (IS_SCHEME(*cur)) cur++;
1161 if (uri != NULL) {
1162 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001163 uri->scheme = STRNDUP(*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001164 }
1165 *str = cur;
1166 return(0);
1167}
1168
1169/**
1170 * xmlParseURIOpaquePart:
1171 * @uri: pointer to an URI structure
1172 * @str: pointer to the string to analyze
1173 *
1174 * Parse an URI opaque part
1175 *
1176 * opaque_part = uric_no_slash *uric
1177 *
1178 * Returns 0 or the error code
1179 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001180static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001181xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1182{
Owen Taylor3473f882001-02-23 17:55:21 +00001183 const char *cur;
1184
1185 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001186 return (-1);
1187
Owen Taylor3473f882001-02-23 17:55:21 +00001188 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001189 if (!((IS_URIC_NO_SLASH(cur)) ||
1190 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001191 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001192 }
1193 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001194 while ((IS_URIC(cur)) ||
1195 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001196 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001197 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001198 if (uri->opaque != NULL)
1199 xmlFree(uri->opaque);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001200 if (uri->cleanup & 2)
1201 uri->opaque = STRNDUP(*str, cur - *str);
1202 else
1203 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001204 }
1205 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001206 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001207}
1208
1209/**
1210 * xmlParseURIServer:
1211 * @uri: pointer to an URI structure
1212 * @str: pointer to the string to analyze
1213 *
1214 * Parse a server subpart of an URI, it's a finer grain analysis
1215 * of the authority part.
1216 *
1217 * server = [ [ userinfo "@" ] hostport ]
1218 * userinfo = *( unreserved | escaped |
1219 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1220 * hostport = host [ ":" port ]
William M. Brack015ccb22005-02-13 08:18:52 +00001221 * host = hostname | IPv4address | IPv6reference
Owen Taylor3473f882001-02-23 17:55:21 +00001222 * hostname = *( domainlabel "." ) toplabel [ "." ]
1223 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1224 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
William M. Brack015ccb22005-02-13 08:18:52 +00001225 * IPv6reference = "[" IPv6address "]"
1226 * IPv6address = hexpart [ ":" IPv4address ]
1227 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1228 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1229 * hexseq = hex4 *( ":" hex4)
1230 * hex4 = 1*4hexdig
Owen Taylor3473f882001-02-23 17:55:21 +00001231 * port = *digit
1232 *
1233 * Returns 0 or the error code
1234 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001235static int
Owen Taylor3473f882001-02-23 17:55:21 +00001236xmlParseURIServer(xmlURIPtr uri, const char **str) {
1237 const char *cur;
1238 const char *host, *tmp;
William M. Brack015ccb22005-02-13 08:18:52 +00001239 const int IPV4max = 4;
1240 const int IPV6max = 8;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001241 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001242
1243 if (str == NULL)
1244 return(-1);
1245
1246 cur = *str;
1247
1248 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001249 * is there a userinfo ?
Owen Taylor3473f882001-02-23 17:55:21 +00001250 */
1251 while (IS_USERINFO(cur)) NEXT(cur);
1252 if (*cur == '@') {
1253 if (uri != NULL) {
1254 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001255 if (uri->cleanup & 2)
1256 uri->path = STRNDUP(*str, cur - *str);
1257 else
1258 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260 cur++;
1261 } else {
1262 if (uri != NULL) {
1263 if (uri->user != NULL) xmlFree(uri->user);
1264 uri->user = NULL;
1265 }
1266 cur = *str;
1267 }
1268 /*
1269 * This can be empty in the case where there is no server
1270 */
1271 host = cur;
1272 if (*cur == '/') {
1273 if (uri != NULL) {
1274 if (uri->authority != NULL) xmlFree(uri->authority);
1275 uri->authority = NULL;
1276 if (uri->server != NULL) xmlFree(uri->server);
1277 uri->server = NULL;
1278 uri->port = 0;
1279 }
1280 return(0);
1281 }
1282 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001283 * host part of hostport can denote an IPV4 address, an IPV6 address
1284 * or an unresolved name. Check the IP first, its easier to detect
1285 * errors if wrong one.
1286 * An IPV6 address must start with a '[' and end with a ']'.
Owen Taylor3473f882001-02-23 17:55:21 +00001287 */
William M. Brack015ccb22005-02-13 08:18:52 +00001288 if (*cur == '[') {
1289 int compress=0;
1290 cur++;
1291 for (oct = 0; oct < IPV6max; ++oct) {
1292 if (*cur == ':') {
1293 if (compress)
1294 return(3); /* multiple compression attempted */
1295 if (!oct) { /* initial char is compression */
1296 if (*++cur != ':')
1297 return(3);
1298 }
1299 compress = 1; /* set compression-encountered flag */
1300 cur++; /* skip over the second ':' */
1301 continue;
1302 }
1303 while(IS_HEX(*cur)) cur++;
1304 if (oct == (IPV6max-1))
1305 continue;
1306 if (*cur != ':')
1307 break;
1308 cur++;
1309 }
1310 if ((!compress) && (oct != IPV6max))
1311 return(3);
1312 if (*cur != ']')
1313 return(3);
1314 if (uri != NULL) {
1315 if (uri->server != NULL) xmlFree(uri->server);
1316 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1317 (cur-host)-1);
1318 }
1319 cur++;
1320 } else {
1321 /*
1322 * Not IPV6, maybe IPV4
1323 */
1324 for (oct = 0; oct < IPV4max; ++oct) {
1325 if (*cur == '.')
1326 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1327 while(IS_DIGIT(*cur)) cur++;
1328 if (oct == (IPV4max-1))
1329 continue;
1330 if (*cur != '.')
1331 break;
1332 cur++;
1333 }
Owen Taylor3473f882001-02-23 17:55:21 +00001334 }
William M. Brack015ccb22005-02-13 08:18:52 +00001335 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1336 IS_ALPHA(*cur))) {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001337 /* maybe host_name */
1338 if (!IS_ALPHANUM(*cur))
1339 return(4); /* e.g. http://xml.$oft */
1340 do {
1341 do ++cur; while (IS_ALPHANUM(*cur));
1342 if (*cur == '-') {
1343 --cur;
1344 if (*cur == '.')
1345 return(5); /* e.g. http://xml.-soft */
1346 ++cur;
1347 continue;
1348 }
1349 if (*cur == '.') {
1350 --cur;
1351 if (*cur == '-')
1352 return(6); /* e.g. http://xml-.soft */
1353 if (*cur == '.')
1354 return(7); /* e.g. http://xml..soft */
1355 ++cur;
1356 continue;
1357 }
1358 break;
1359 } while (1);
1360 tmp = cur;
1361 if (tmp[-1] == '.')
1362 --tmp; /* e.g. http://xml.$Oft/ */
1363 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1364 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1365 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001366 }
Owen Taylor3473f882001-02-23 17:55:21 +00001367 if (uri != NULL) {
1368 if (uri->authority != NULL) xmlFree(uri->authority);
1369 uri->authority = NULL;
William M. Brack015ccb22005-02-13 08:18:52 +00001370 if (host[0] != '[') { /* it's not an IPV6 addr */
1371 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001372 if (uri->cleanup & 2)
1373 uri->server = STRNDUP(host, cur - host);
1374 else
1375 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
William M. Brack015ccb22005-02-13 08:18:52 +00001376 }
Owen Taylor3473f882001-02-23 17:55:21 +00001377 }
Owen Taylor3473f882001-02-23 17:55:21 +00001378 /*
1379 * finish by checking for a port presence.
1380 */
1381 if (*cur == ':') {
1382 cur++;
1383 if (IS_DIGIT(*cur)) {
1384 if (uri != NULL)
1385 uri->port = 0;
1386 while (IS_DIGIT(*cur)) {
1387 if (uri != NULL)
1388 uri->port = uri->port * 10 + (*cur - '0');
1389 cur++;
1390 }
1391 }
1392 }
1393 *str = cur;
1394 return(0);
1395}
1396
1397/**
1398 * xmlParseURIRelSegment:
1399 * @uri: pointer to an URI structure
1400 * @str: pointer to the string to analyze
1401 *
1402 * Parse an URI relative segment
1403 *
1404 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1405 * "+" | "$" | "," )
1406 *
1407 * Returns 0 or the error code
1408 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001409static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001410xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1411{
Owen Taylor3473f882001-02-23 17:55:21 +00001412 const char *cur;
1413
1414 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001415 return (-1);
1416
Owen Taylor3473f882001-02-23 17:55:21 +00001417 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001418 if (!((IS_SEGMENT(cur)) ||
1419 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001420 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001421 }
1422 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001423 while ((IS_SEGMENT(cur)) ||
1424 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001425 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001426 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001427 if (uri->path != NULL)
1428 xmlFree(uri->path);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001429 if (uri->cleanup & 2)
1430 uri->path = STRNDUP(*str, cur - *str);
1431 else
1432 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001433 }
1434 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001435 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001436}
1437
1438/**
1439 * xmlParseURIPathSegments:
1440 * @uri: pointer to an URI structure
1441 * @str: pointer to the string to analyze
1442 * @slash: should we add a leading slash
1443 *
1444 * Parse an URI set of path segments
1445 *
1446 * path_segments = segment *( "/" segment )
1447 * segment = *pchar *( ";" param )
1448 * param = *pchar
1449 *
1450 * Returns 0 or the error code
1451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001452static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001453xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1454{
Owen Taylor3473f882001-02-23 17:55:21 +00001455 const char *cur;
1456
1457 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001458 return (-1);
1459
Owen Taylor3473f882001-02-23 17:55:21 +00001460 cur = *str;
1461
1462 do {
Daniel Veillard336a8e12005-08-07 10:46:19 +00001463 while ((IS_PCHAR(cur)) ||
1464 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001465 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001466 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001467 cur++;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001468 while ((IS_PCHAR(cur)) ||
1469 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001470 NEXT(cur);
1471 }
1472 if (*cur != '/')
1473 break;
1474 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001475 } while (1);
1476 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001477 int len, len2 = 0;
1478 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001479
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001480 /*
1481 * Concat the set of path segments to the current path
1482 */
1483 len = cur - *str;
1484 if (slash)
1485 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001486
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001487 if (uri->path != NULL) {
1488 len2 = strlen(uri->path);
1489 len += len2;
1490 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001491 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001492 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001493 xmlGenericError(xmlGenericErrorContext,
1494 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001495 *str = cur;
1496 return (-1);
1497 }
1498 if (uri->path != NULL)
1499 memcpy(path, uri->path, len2);
1500 if (slash) {
1501 path[len2] = '/';
1502 len2++;
1503 }
1504 path[len2] = 0;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001505 if (cur - *str > 0) {
1506 if (uri->cleanup & 2) {
1507 memcpy(&path[len2], *str, cur - *str);
1508 path[len2 + (cur - *str)] = 0;
1509 } else
1510 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1511 }
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001512 if (uri->path != NULL)
1513 xmlFree(uri->path);
1514 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 }
1516 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001517 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001518}
1519
1520/**
1521 * xmlParseURIAuthority:
1522 * @uri: pointer to an URI structure
1523 * @str: pointer to the string to analyze
1524 *
1525 * Parse the authority part of an URI.
1526 *
1527 * authority = server | reg_name
1528 * server = [ [ userinfo "@" ] hostport ]
1529 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1530 * "@" | "&" | "=" | "+" )
1531 *
1532 * Note : this is completely ambiguous since reg_name is allowed to
1533 * use the full set of chars in use by server:
1534 *
1535 * 3.2.1. Registry-based Naming Authority
1536 *
1537 * The structure of a registry-based naming authority is specific
1538 * to the URI scheme, but constrained to the allowed characters
1539 * for an authority component.
1540 *
1541 * Returns 0 or the error code
1542 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001543static int
Owen Taylor3473f882001-02-23 17:55:21 +00001544xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1545 const char *cur;
1546 int ret;
1547
1548 if (str == NULL)
1549 return(-1);
1550
1551 cur = *str;
1552
1553 /*
1554 * try first to parse it as a server string.
1555 */
1556 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001557 if ((ret == 0) && (*str != NULL) &&
1558 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001559 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001560 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001561
1562 /*
1563 * failed, fallback to reg_name
1564 */
1565 if (!IS_REG_NAME(cur)) {
1566 return(5);
1567 }
1568 NEXT(cur);
1569 while (IS_REG_NAME(cur)) NEXT(cur);
1570 if (uri != NULL) {
1571 if (uri->server != NULL) xmlFree(uri->server);
1572 uri->server = NULL;
1573 if (uri->user != NULL) xmlFree(uri->user);
1574 uri->user = NULL;
1575 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001576 if (uri->cleanup & 2)
1577 uri->authority = STRNDUP(*str, cur - *str);
1578 else
1579 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001580 }
1581 *str = cur;
1582 return(0);
1583}
1584
1585/**
1586 * xmlParseURIHierPart:
1587 * @uri: pointer to an URI structure
1588 * @str: pointer to the string to analyze
1589 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001590 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001591 *
1592 * hier_part = ( net_path | abs_path ) [ "?" query ]
1593 * abs_path = "/" path_segments
1594 * net_path = "//" authority [ abs_path ]
1595 *
1596 * Returns 0 or the error code
1597 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001598static int
Owen Taylor3473f882001-02-23 17:55:21 +00001599xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1600 int ret;
1601 const char *cur;
1602
1603 if (str == NULL)
1604 return(-1);
1605
1606 cur = *str;
1607
1608 if ((cur[0] == '/') && (cur[1] == '/')) {
1609 cur += 2;
1610 ret = xmlParseURIAuthority(uri, &cur);
1611 if (ret != 0)
1612 return(ret);
1613 if (cur[0] == '/') {
1614 cur++;
1615 ret = xmlParseURIPathSegments(uri, &cur, 1);
1616 }
1617 } else if (cur[0] == '/') {
1618 cur++;
1619 ret = xmlParseURIPathSegments(uri, &cur, 1);
1620 } else {
1621 return(4);
1622 }
1623 if (ret != 0)
1624 return(ret);
1625 if (*cur == '?') {
1626 cur++;
1627 ret = xmlParseURIQuery(uri, &cur);
1628 if (ret != 0)
1629 return(ret);
1630 }
1631 *str = cur;
1632 return(0);
1633}
1634
1635/**
1636 * xmlParseAbsoluteURI:
1637 * @uri: pointer to an URI structure
1638 * @str: pointer to the string to analyze
1639 *
1640 * Parse an URI reference string and fills in the appropriate fields
1641 * of the @uri structure
1642 *
1643 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1644 *
1645 * Returns 0 or the error code
1646 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001647static int
Owen Taylor3473f882001-02-23 17:55:21 +00001648xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1649 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001650 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001651
1652 if (str == NULL)
1653 return(-1);
1654
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001655 cur = *str;
1656
Owen Taylor3473f882001-02-23 17:55:21 +00001657 ret = xmlParseURIScheme(uri, str);
1658 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001659 if (**str != ':') {
1660 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001661 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001662 }
Owen Taylor3473f882001-02-23 17:55:21 +00001663 (*str)++;
1664 if (**str == '/')
1665 return(xmlParseURIHierPart(uri, str));
1666 return(xmlParseURIOpaquePart(uri, str));
1667}
1668
1669/**
1670 * xmlParseRelativeURI:
1671 * @uri: pointer to an URI structure
1672 * @str: pointer to the string to analyze
1673 *
1674 * Parse an relative URI string and fills in the appropriate fields
1675 * of the @uri structure
1676 *
1677 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1678 * abs_path = "/" path_segments
1679 * net_path = "//" authority [ abs_path ]
1680 * rel_path = rel_segment [ abs_path ]
1681 *
1682 * Returns 0 or the error code
1683 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001684static int
Owen Taylor3473f882001-02-23 17:55:21 +00001685xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1686 int ret = 0;
1687 const char *cur;
1688
1689 if (str == NULL)
1690 return(-1);
1691
1692 cur = *str;
1693 if ((cur[0] == '/') && (cur[1] == '/')) {
1694 cur += 2;
1695 ret = xmlParseURIAuthority(uri, &cur);
1696 if (ret != 0)
1697 return(ret);
1698 if (cur[0] == '/') {
1699 cur++;
1700 ret = xmlParseURIPathSegments(uri, &cur, 1);
1701 }
1702 } else if (cur[0] == '/') {
1703 cur++;
1704 ret = xmlParseURIPathSegments(uri, &cur, 1);
1705 } else if (cur[0] != '#' && cur[0] != '?') {
1706 ret = xmlParseURIRelSegment(uri, &cur);
1707 if (ret != 0)
1708 return(ret);
1709 if (cur[0] == '/') {
1710 cur++;
1711 ret = xmlParseURIPathSegments(uri, &cur, 1);
1712 }
1713 }
1714 if (ret != 0)
1715 return(ret);
1716 if (*cur == '?') {
1717 cur++;
1718 ret = xmlParseURIQuery(uri, &cur);
1719 if (ret != 0)
1720 return(ret);
1721 }
1722 *str = cur;
1723 return(ret);
1724}
1725
1726/**
1727 * xmlParseURIReference:
1728 * @uri: pointer to an URI structure
1729 * @str: the string to analyze
1730 *
1731 * Parse an URI reference string and fills in the appropriate fields
1732 * of the @uri structure
1733 *
1734 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1735 *
1736 * Returns 0 or the error code
1737 */
1738int
1739xmlParseURIReference(xmlURIPtr uri, const char *str) {
1740 int ret;
1741 const char *tmp = str;
1742
1743 if (str == NULL)
1744 return(-1);
1745 xmlCleanURI(uri);
1746
1747 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001748 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001749 * it fails.
1750 */
1751 ret = xmlParseAbsoluteURI(uri, &str);
1752 if (ret != 0) {
1753 xmlCleanURI(uri);
1754 str = tmp;
1755 ret = xmlParseRelativeURI(uri, &str);
1756 }
1757 if (ret != 0) {
1758 xmlCleanURI(uri);
1759 return(ret);
1760 }
1761
1762 if (*str == '#') {
1763 str++;
1764 ret = xmlParseURIFragment(uri, &str);
1765 if (ret != 0) return(ret);
1766 }
1767 if (*str != 0) {
1768 xmlCleanURI(uri);
1769 return(1);
1770 }
1771 return(0);
1772}
1773
1774/**
1775 * xmlParseURI:
1776 * @str: the URI string to analyze
1777 *
1778 * Parse an URI
1779 *
1780 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1781 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001782 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001783 */
1784xmlURIPtr
1785xmlParseURI(const char *str) {
1786 xmlURIPtr uri;
1787 int ret;
1788
1789 if (str == NULL)
1790 return(NULL);
1791 uri = xmlCreateURI();
1792 if (uri != NULL) {
1793 ret = xmlParseURIReference(uri, str);
1794 if (ret) {
1795 xmlFreeURI(uri);
1796 return(NULL);
1797 }
1798 }
1799 return(uri);
1800}
1801
Daniel Veillard336a8e12005-08-07 10:46:19 +00001802/**
1803 * xmlParseURIRaw:
1804 * @str: the URI string to analyze
1805 * @raw: if 1 unescaping of URI pieces are disabled
1806 *
1807 * Parse an URI but allows to keep intact the original fragments.
1808 *
1809 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1810 *
1811 * Returns a newly built xmlURIPtr or NULL in case of error
1812 */
1813xmlURIPtr
1814xmlParseURIRaw(const char *str, int raw) {
1815 xmlURIPtr uri;
1816 int ret;
1817
1818 if (str == NULL)
1819 return(NULL);
1820 uri = xmlCreateURI();
1821 if (uri != NULL) {
1822 if (raw) {
1823 uri->cleanup |= 2;
1824 }
1825 ret = xmlParseURIReference(uri, str);
1826 if (ret) {
1827 xmlFreeURI(uri);
1828 return(NULL);
1829 }
1830 }
1831 return(uri);
1832}
1833
Owen Taylor3473f882001-02-23 17:55:21 +00001834/************************************************************************
1835 * *
1836 * Public functions *
1837 * *
1838 ************************************************************************/
1839
1840/**
1841 * xmlBuildURI:
1842 * @URI: the URI instance found in the document
1843 * @base: the base value
1844 *
1845 * Computes he final URI of the reference done by checking that
1846 * the given URI is valid, and building the final URI using the
1847 * base URI. This is processed according to section 5.2 of the
1848 * RFC 2396
1849 *
1850 * 5.2. Resolving Relative References to Absolute Form
1851 *
1852 * Returns a new URI string (to be freed by the caller) or NULL in case
1853 * of error.
1854 */
1855xmlChar *
1856xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1857 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001858 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001859 xmlURIPtr ref = NULL;
1860 xmlURIPtr bas = NULL;
1861 xmlURIPtr res = NULL;
1862
1863 /*
1864 * 1) The URI reference is parsed into the potential four components and
1865 * fragment identifier, as described in Section 4.3.
1866 *
1867 * NOTE that a completely empty URI is treated by modern browsers
1868 * as a reference to "." rather than as a synonym for the current
1869 * URI. Should we do that here?
1870 */
1871 if (URI == NULL)
1872 ret = -1;
1873 else {
1874 if (*URI) {
1875 ref = xmlCreateURI();
1876 if (ref == NULL)
1877 goto done;
1878 ret = xmlParseURIReference(ref, (const char *) URI);
1879 }
1880 else
1881 ret = 0;
1882 }
1883 if (ret != 0)
1884 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001885 if ((ref != NULL) && (ref->scheme != NULL)) {
1886 /*
1887 * The URI is absolute don't modify.
1888 */
1889 val = xmlStrdup(URI);
1890 goto done;
1891 }
Owen Taylor3473f882001-02-23 17:55:21 +00001892 if (base == NULL)
1893 ret = -1;
1894 else {
1895 bas = xmlCreateURI();
1896 if (bas == NULL)
1897 goto done;
1898 ret = xmlParseURIReference(bas, (const char *) base);
1899 }
1900 if (ret != 0) {
1901 if (ref)
1902 val = xmlSaveUri(ref);
1903 goto done;
1904 }
1905 if (ref == NULL) {
1906 /*
1907 * the base fragment must be ignored
1908 */
1909 if (bas->fragment != NULL) {
1910 xmlFree(bas->fragment);
1911 bas->fragment = NULL;
1912 }
1913 val = xmlSaveUri(bas);
1914 goto done;
1915 }
1916
1917 /*
1918 * 2) If the path component is empty and the scheme, authority, and
1919 * query components are undefined, then it is a reference to the
1920 * current document and we are done. Otherwise, the reference URI's
1921 * query and fragment components are defined as found (or not found)
1922 * within the URI reference and not inherited from the base URI.
1923 *
1924 * NOTE that in modern browsers, the parsing differs from the above
1925 * in the following aspect: the query component is allowed to be
1926 * defined while still treating this as a reference to the current
1927 * document.
1928 */
1929 res = xmlCreateURI();
1930 if (res == NULL)
1931 goto done;
1932 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1933 ((ref->authority == NULL) && (ref->server == NULL))) {
1934 if (bas->scheme != NULL)
1935 res->scheme = xmlMemStrdup(bas->scheme);
1936 if (bas->authority != NULL)
1937 res->authority = xmlMemStrdup(bas->authority);
1938 else if (bas->server != NULL) {
1939 res->server = xmlMemStrdup(bas->server);
1940 if (bas->user != NULL)
1941 res->user = xmlMemStrdup(bas->user);
1942 res->port = bas->port;
1943 }
1944 if (bas->path != NULL)
1945 res->path = xmlMemStrdup(bas->path);
1946 if (ref->query != NULL)
1947 res->query = xmlMemStrdup(ref->query);
1948 else if (bas->query != NULL)
1949 res->query = xmlMemStrdup(bas->query);
1950 if (ref->fragment != NULL)
1951 res->fragment = xmlMemStrdup(ref->fragment);
1952 goto step_7;
1953 }
Owen Taylor3473f882001-02-23 17:55:21 +00001954
1955 /*
1956 * 3) If the scheme component is defined, indicating that the reference
1957 * starts with a scheme name, then the reference is interpreted as an
1958 * absolute URI and we are done. Otherwise, the reference URI's
1959 * scheme is inherited from the base URI's scheme component.
1960 */
1961 if (ref->scheme != NULL) {
1962 val = xmlSaveUri(ref);
1963 goto done;
1964 }
1965 if (bas->scheme != NULL)
1966 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001967
1968 if (ref->query != NULL)
1969 res->query = xmlMemStrdup(ref->query);
1970 if (ref->fragment != NULL)
1971 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001972
1973 /*
1974 * 4) If the authority component is defined, then the reference is a
1975 * network-path and we skip to step 7. Otherwise, the reference
1976 * URI's authority is inherited from the base URI's authority
1977 * component, which will also be undefined if the URI scheme does not
1978 * use an authority component.
1979 */
1980 if ((ref->authority != NULL) || (ref->server != NULL)) {
1981 if (ref->authority != NULL)
1982 res->authority = xmlMemStrdup(ref->authority);
1983 else {
1984 res->server = xmlMemStrdup(ref->server);
1985 if (ref->user != NULL)
1986 res->user = xmlMemStrdup(ref->user);
1987 res->port = ref->port;
1988 }
1989 if (ref->path != NULL)
1990 res->path = xmlMemStrdup(ref->path);
1991 goto step_7;
1992 }
1993 if (bas->authority != NULL)
1994 res->authority = xmlMemStrdup(bas->authority);
1995 else if (bas->server != NULL) {
1996 res->server = xmlMemStrdup(bas->server);
1997 if (bas->user != NULL)
1998 res->user = xmlMemStrdup(bas->user);
1999 res->port = bas->port;
2000 }
2001
2002 /*
2003 * 5) If the path component begins with a slash character ("/"), then
2004 * the reference is an absolute-path and we skip to step 7.
2005 */
2006 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2007 res->path = xmlMemStrdup(ref->path);
2008 goto step_7;
2009 }
2010
2011
2012 /*
2013 * 6) If this step is reached, then we are resolving a relative-path
2014 * reference. The relative path needs to be merged with the base
2015 * URI's path. Although there are many ways to do this, we will
2016 * describe a simple method using a separate string buffer.
2017 *
2018 * Allocate a buffer large enough for the result string.
2019 */
2020 len = 2; /* extra / and 0 */
2021 if (ref->path != NULL)
2022 len += strlen(ref->path);
2023 if (bas->path != NULL)
2024 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002025 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002026 if (res->path == NULL) {
2027 xmlGenericError(xmlGenericErrorContext,
2028 "xmlBuildURI: out of memory\n");
2029 goto done;
2030 }
2031 res->path[0] = 0;
2032
2033 /*
2034 * a) All but the last segment of the base URI's path component is
2035 * copied to the buffer. In other words, any characters after the
2036 * last (right-most) slash character, if any, are excluded.
2037 */
2038 cur = 0;
2039 out = 0;
2040 if (bas->path != NULL) {
2041 while (bas->path[cur] != 0) {
2042 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2043 cur++;
2044 if (bas->path[cur] == 0)
2045 break;
2046
2047 cur++;
2048 while (out < cur) {
2049 res->path[out] = bas->path[out];
2050 out++;
2051 }
2052 }
2053 }
2054 res->path[out] = 0;
2055
2056 /*
2057 * b) The reference's path component is appended to the buffer
2058 * string.
2059 */
2060 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002061 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002062 /*
2063 * Ensure the path includes a '/'
2064 */
2065 if ((out == 0) && (bas->server != NULL))
2066 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002067 while (ref->path[indx] != 0) {
2068 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002069 }
2070 }
2071 res->path[out] = 0;
2072
2073 /*
2074 * Steps c) to h) are really path normalization steps
2075 */
2076 xmlNormalizeURIPath(res->path);
2077
2078step_7:
2079
2080 /*
2081 * 7) The resulting URI components, including any inherited from the
2082 * base URI, are recombined to give the absolute form of the URI
2083 * reference.
2084 */
2085 val = xmlSaveUri(res);
2086
2087done:
2088 if (ref != NULL)
2089 xmlFreeURI(ref);
2090 if (bas != NULL)
2091 xmlFreeURI(bas);
2092 if (res != NULL)
2093 xmlFreeURI(res);
2094 return(val);
2095}
2096
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002097/**
William M. Brackf7789b12004-06-07 08:57:27 +00002098 * xmlBuildRelativeURI:
2099 * @URI: the URI reference under consideration
2100 * @base: the base value
2101 *
2102 * Expresses the URI of the reference in terms relative to the
2103 * base. Some examples of this operation include:
2104 * base = "http://site1.com/docs/book1.html"
2105 * URI input URI returned
2106 * docs/pic1.gif pic1.gif
2107 * docs/img/pic1.gif img/pic1.gif
2108 * img/pic1.gif ../img/pic1.gif
2109 * http://site1.com/docs/pic1.gif pic1.gif
2110 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2111 *
2112 * base = "docs/book1.html"
2113 * URI input URI returned
2114 * docs/pic1.gif pic1.gif
2115 * docs/img/pic1.gif img/pic1.gif
2116 * img/pic1.gif ../img/pic1.gif
2117 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2118 *
2119 *
2120 * Note: if the URI reference is really wierd or complicated, it may be
2121 * worthwhile to first convert it into a "nice" one by calling
2122 * xmlBuildURI (using 'base') before calling this routine,
2123 * since this routine (for reasonable efficiency) assumes URI has
2124 * already been through some validation.
2125 *
2126 * Returns a new URI string (to be freed by the caller) or NULL in case
2127 * error.
2128 */
2129xmlChar *
2130xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2131{
2132 xmlChar *val = NULL;
2133 int ret;
2134 int ix;
2135 int pos = 0;
2136 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002137 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002138 xmlURIPtr ref = NULL;
2139 xmlURIPtr bas = NULL;
2140 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002141 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002142
2143 if ((URI == NULL) || (*URI == 0))
2144 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002145
2146 /*
2147 * First parse URI into a standard form
2148 */
2149 ref = xmlCreateURI ();
2150 if (ref == NULL)
2151 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002152 /* If URI not already in "relative" form */
2153 if (URI[0] != '.') {
2154 ret = xmlParseURIReference (ref, (const char *) URI);
2155 if (ret != 0)
2156 goto done; /* Error in URI, return NULL */
2157 } else
2158 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002159
2160 /*
2161 * Next parse base into the same standard form
2162 */
2163 if ((base == NULL) || (*base == 0)) {
2164 val = xmlStrdup (URI);
2165 goto done;
2166 }
2167 bas = xmlCreateURI ();
2168 if (bas == NULL)
2169 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002170 if (base[0] != '.') {
2171 ret = xmlParseURIReference (bas, (const char *) base);
2172 if (ret != 0)
2173 goto done; /* Error in base, return NULL */
2174 } else
2175 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002176
2177 /*
2178 * If the scheme / server on the URI differs from the base,
2179 * just return the URI
2180 */
2181 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002182 ((bas->scheme == NULL) ||
2183 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2184 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002185 val = xmlStrdup (URI);
2186 goto done;
2187 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002188 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2189 val = xmlStrdup(BAD_CAST "");
2190 goto done;
2191 }
2192 if (bas->path == NULL) {
2193 val = xmlStrdup((xmlChar *)ref->path);
2194 goto done;
2195 }
2196 if (ref->path == NULL) {
2197 ref->path = (char *) "/";
2198 remove_path = 1;
2199 }
William M. Brackf7789b12004-06-07 08:57:27 +00002200
2201 /*
2202 * At this point (at last!) we can compare the two paths
2203 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002204 * First we take care of the special case where either of the
2205 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002206 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002207 if (bas->path == NULL) {
2208 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002209 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002210 if (*uptr == '/')
2211 uptr++;
2212 val = xmlStrdup(uptr);
2213 }
2214 goto done;
2215 }
William M. Brackf7789b12004-06-07 08:57:27 +00002216 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002217 if (ref->path == NULL) {
2218 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002219 if (bptr[ix] == '/')
2220 nbslash++;
2221 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002222 uptr = NULL;
2223 len = 1; /* this is for a string terminator only */
2224 } else {
2225 /*
2226 * Next we compare the two strings and find where they first differ
2227 */
2228 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2229 pos += 2;
2230 if ((*bptr == '.') && (bptr[1] == '/'))
2231 bptr += 2;
2232 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2233 bptr++;
2234 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2235 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002236
William M. Brack820d5ed2005-09-14 05:24:27 +00002237 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002238 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002239 goto done; /* (I can't imagine why anyone would do this) */
2240 }
2241
2242 /*
2243 * In URI, "back up" to the last '/' encountered. This will be the
2244 * beginning of the "unique" suffix of URI
2245 */
2246 ix = pos;
2247 if ((ref->path[ix] == '/') && (ix > 0))
2248 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002249 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2250 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002251 for (; ix > 0; ix--) {
2252 if (ref->path[ix] == '/')
2253 break;
2254 }
2255 if (ix == 0) {
2256 uptr = (xmlChar *)ref->path;
2257 } else {
2258 ix++;
2259 uptr = (xmlChar *)&ref->path[ix];
2260 }
2261
2262 /*
2263 * In base, count the number of '/' from the differing point
2264 */
2265 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2266 for (; bptr[ix] != 0; ix++) {
2267 if (bptr[ix] == '/')
2268 nbslash++;
2269 }
2270 }
2271 len = xmlStrlen (uptr) + 1;
2272 }
2273
William M. Brackf7789b12004-06-07 08:57:27 +00002274 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002275 if (uptr != NULL)
2276 val = xmlStrdup (uptr);
William M. Brackf7789b12004-06-07 08:57:27 +00002277 goto done;
2278 }
William M. Brackf7789b12004-06-07 08:57:27 +00002279
2280 /*
2281 * Allocate just enough space for the returned string -
2282 * length of the remainder of the URI, plus enough space
2283 * for the "../" groups, plus one for the terminator
2284 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002285 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002286 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002287 xmlGenericError(xmlGenericErrorContext,
2288 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002289 goto done;
2290 }
2291 vptr = val;
2292 /*
2293 * Put in as many "../" as needed
2294 */
2295 for (; nbslash>0; nbslash--) {
2296 *vptr++ = '.';
2297 *vptr++ = '.';
2298 *vptr++ = '/';
2299 }
2300 /*
2301 * Finish up with the end of the URI
2302 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002303 if (uptr != NULL) {
2304 if ((vptr > val) && (len > 0) &&
2305 (uptr[0] == '/') && (vptr[-1] == '/')) {
2306 memcpy (vptr, uptr + 1, len - 1);
2307 vptr[len - 2] = 0;
2308 } else {
2309 memcpy (vptr, uptr, len);
2310 vptr[len - 1] = 0;
2311 }
2312 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002313 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002314 }
William M. Brackf7789b12004-06-07 08:57:27 +00002315
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002316done:
William M. Brackf7789b12004-06-07 08:57:27 +00002317 /*
2318 * Free the working variables
2319 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002320 if (remove_path != 0)
2321 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002322 if (ref != NULL)
2323 xmlFreeURI (ref);
2324 if (bas != NULL)
2325 xmlFreeURI (bas);
2326
2327 return val;
2328}
2329
2330/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002331 * xmlCanonicPath:
2332 * @path: the resource locator in a filesystem notation
2333 *
2334 * Constructs a canonic path from the specified path.
2335 *
2336 * Returns a new canonic path, or a duplicate of the path parameter if the
2337 * construction fails. The caller is responsible for freeing the memory occupied
2338 * by the returned string. If there is insufficient memory available, or the
2339 * argument is NULL, the function returns NULL.
2340 */
2341#define IS_WINDOWS_PATH(p) \
2342 ((p != NULL) && \
2343 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2344 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2345 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002346xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002347xmlCanonicPath(const xmlChar *path)
2348{
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002349#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002350 int len = 0;
2351 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002352 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002353#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002354 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002355 xmlChar *ret;
2356 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002357
2358 if (path == NULL)
2359 return(NULL);
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002360 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002361 xmlFreeURI(uri);
2362 return xmlStrdup(path);
2363 }
2364
Daniel Veillard336a8e12005-08-07 10:46:19 +00002365 absuri = xmlStrstr(path, BAD_CAST "://");
2366 if (absuri != NULL) {
2367 int l, j;
2368 unsigned char c;
2369 xmlChar *escURI;
2370
2371 /*
2372 * this looks like an URI where some parts have not been
2373 * escaped leading to a parsing problem check that the first
2374 * part matches a protocol.
2375 */
2376 l = absuri - path;
2377 if ((l <= 0) || (l > 20))
2378 goto path_processing;
2379 for (j = 0;j < l;j++) {
2380 c = path[j];
2381 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2382 goto path_processing;
2383 }
2384
2385 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2386 if (escURI != NULL) {
2387 uri = xmlParseURI((const char *) escURI);
2388 if (uri != NULL) {
2389 xmlFreeURI(uri);
2390 return escURI;
2391 }
2392 xmlFreeURI(uri);
2393 }
2394 }
2395
2396path_processing:
2397#if defined(_WIN32) && !defined(__CYGWIN__)
2398 /*
2399 * This really need to be cleaned up by someone with a Windows box
2400 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002401 uri = xmlCreateURI();
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002402 if (uri == NULL) {
2403 return(NULL);
2404 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002405
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002406 len = xmlStrlen(path);
2407 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2408 uri->scheme = xmlStrdup(BAD_CAST "file");
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002409 uri->path = xmlMallocAtomic(len + 2);
2410 if (uri->path == NULL) {
2411 xmlFreeURI(uri);
2412 return(NULL);
2413 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002414 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002415 p = uri->path + 1;
2416 strncpy(p, path, len + 1);
2417 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002418 uri->path = xmlStrdup(path);
2419 if (uri->path == NULL) {
2420 xmlFreeURI(uri);
2421 return(NULL);
2422 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002423 p = uri->path;
2424 }
2425 while (*p != '\0') {
2426 if (*p == '\\')
2427 *p = '/';
2428 p++;
2429 }
William M. Bracka3215c72004-07-31 16:24:01 +00002430 if (uri->path == NULL) {
2431 xmlFreeURI(uri);
2432 return(NULL);
2433 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002434
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002435 if (uri->scheme == NULL) {
2436 ret = xmlStrdup((const xmlChar *) path);
2437 } else {
2438 ret = xmlSaveUri(uri);
2439 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002440
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002441 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002442#else
2443 ret = xmlStrdup((const xmlChar *) path);
2444#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002445 return(ret);
2446}
Owen Taylor3473f882001-02-23 17:55:21 +00002447
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002448/**
2449 * xmlPathToURI:
2450 * @path: the resource locator in a filesystem notation
2451 *
2452 * Constructs an URI expressing the existing path
2453 *
2454 * Returns a new URI, or a duplicate of the path parameter if the
2455 * construction fails. The caller is responsible for freeing the memory
2456 * occupied by the returned string. If there is insufficient memory available,
2457 * or the argument is NULL, the function returns NULL.
2458 */
2459xmlChar *
2460xmlPathToURI(const xmlChar *path)
2461{
2462 xmlURIPtr uri;
2463 xmlURI temp;
2464 xmlChar *ret, *cal;
2465
2466 if (path == NULL)
2467 return(NULL);
2468
2469 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2470 xmlFreeURI(uri);
2471 return xmlStrdup(path);
2472 }
2473 cal = xmlCanonicPath(path);
2474 if (cal == NULL)
2475 return(NULL);
2476 memset(&temp, 0, sizeof(temp));
2477 temp.path = (char *) cal;
2478 ret = xmlSaveUri(&temp);
2479 xmlFree(cal);
2480 return(ret);
2481}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002482#define bottom_uri
2483#include "elfgcchack.h"