blob: 0b91c243b5a430832230c9bc2d0e0d04835827e1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
William M. Brack015ccb22005-02-13 08:18:52 +00004 * Reference: RFCs 2396, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
William M. Brack015ccb22005-02-13 08:18:52 +000081 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
Owen Taylor3473f882001-02-23 17:55:21 +000083 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
William M. Brack015ccb22005-02-13 08:18:52 +000087 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000089
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163/*
William M. Brack015ccb22005-02-13 08:18:52 +0000164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000171
172/*
Owen Taylor3473f882001-02-23 17:55:21 +0000173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
Daniel Veillard336a8e12005-08-07 10:46:19 +0000188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/************************************************************************
191 * *
192 * Generic URI structure functions *
193 * *
194 ************************************************************************/
195
196/**
197 * xmlCreateURI:
198 *
199 * Simply creates an empty xmlURI
200 *
201 * Returns the new structure or NULL in case of error
202 */
203xmlURIPtr
204xmlCreateURI(void) {
205 xmlURIPtr ret;
206
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208 if (ret == NULL) {
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
211 return(NULL);
212 }
213 memset(ret, 0, sizeof(xmlURI));
214 return(ret);
215}
216
217/**
218 * xmlSaveUri:
219 * @uri: pointer to an xmlURI
220 *
221 * Save the URI as an escaped string
222 *
223 * Returns a new string (to be deallocated by caller)
224 */
225xmlChar *
226xmlSaveUri(xmlURIPtr uri) {
227 xmlChar *ret = NULL;
228 const char *p;
229 int len;
230 int max;
231
232 if (uri == NULL) return(NULL);
233
234
235 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 len = 0;
243
244 if (uri->scheme != NULL) {
245 p = uri->scheme;
246 while (*p != 0) {
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = *p++;
257 }
258 if (len >= max) {
259 max *= 2;
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
261 if (ret == NULL) {
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
264 return(NULL);
265 }
266 }
267 ret[len++] = ':';
268 }
269 if (uri->opaque != NULL) {
270 p = uri->opaque;
271 while (*p != 0) {
272 if (len + 3 >= max) {
273 max *= 2;
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
275 if (ret == NULL) {
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
278 return(NULL);
279 }
280 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000282 ret[len++] = *p++;
283 else {
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
286 ret[len++] = '%';
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289 }
290 }
Owen Taylor3473f882001-02-23 17:55:21 +0000291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
429 if (ret == NULL) {
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
432 return(NULL);
433 }
434 }
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
438 ((*(p) == ',')))
439 ret[len++] = *p++;
440 else {
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
443 ret[len++] = '%';
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446 }
447 }
448 }
449 if (uri->query != NULL) {
450 if (len + 3 >= max) {
451 max *= 2;
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
454 if (ret == NULL) {
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
457 return(NULL);
458 }
459 }
460 ret[len++] = '?';
461 p = uri->query;
462 while (*p != 0) {
463 if (len + 3 >= max) {
464 max *= 2;
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
467 if (ret == NULL) {
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
470 return(NULL);
471 }
472 }
473 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
474 ret[len++] = *p++;
475 else {
476 int val = *(unsigned char *)p++;
477 int hi = val / 0x10, lo = val % 0x10;
478 ret[len++] = '%';
479 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
480 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
481 }
482 }
483 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000484 }
485 if (uri->fragment != NULL) {
486 if (len + 3 >= max) {
487 max *= 2;
488 ret = (xmlChar *) xmlRealloc(ret,
489 (max + 1) * sizeof(xmlChar));
490 if (ret == NULL) {
491 xmlGenericError(xmlGenericErrorContext,
492 "xmlSaveUri: out of memory\n");
493 return(NULL);
494 }
495 }
496 ret[len++] = '#';
497 p = uri->fragment;
498 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000499 if (len + 3 >= max) {
500 max *= 2;
501 ret = (xmlChar *) xmlRealloc(ret,
502 (max + 1) * sizeof(xmlChar));
503 if (ret == NULL) {
504 xmlGenericError(xmlGenericErrorContext,
505 "xmlSaveUri: out of memory\n");
506 return(NULL);
507 }
508 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000509 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
510 ret[len++] = *p++;
511 else {
512 int val = *(unsigned char *)p++;
513 int hi = val / 0x10, lo = val % 0x10;
514 ret[len++] = '%';
515 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
516 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000517 }
518 }
Owen Taylor3473f882001-02-23 17:55:21 +0000519 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000520 if (len >= max) {
521 max *= 2;
522 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
523 if (ret == NULL) {
524 xmlGenericError(xmlGenericErrorContext,
525 "xmlSaveUri: out of memory\n");
526 return(NULL);
527 }
528 }
529 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000530 return(ret);
531}
532
533/**
534 * xmlPrintURI:
535 * @stream: a FILE* for the output
536 * @uri: pointer to an xmlURI
537 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000538 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000539 */
540void
541xmlPrintURI(FILE *stream, xmlURIPtr uri) {
542 xmlChar *out;
543
544 out = xmlSaveUri(uri);
545 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000546 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000547 xmlFree(out);
548 }
549}
550
551/**
552 * xmlCleanURI:
553 * @uri: pointer to an xmlURI
554 *
555 * Make sure the xmlURI struct is free of content
556 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000557static void
Owen Taylor3473f882001-02-23 17:55:21 +0000558xmlCleanURI(xmlURIPtr uri) {
559 if (uri == NULL) return;
560
561 if (uri->scheme != NULL) xmlFree(uri->scheme);
562 uri->scheme = NULL;
563 if (uri->server != NULL) xmlFree(uri->server);
564 uri->server = NULL;
565 if (uri->user != NULL) xmlFree(uri->user);
566 uri->user = NULL;
567 if (uri->path != NULL) xmlFree(uri->path);
568 uri->path = NULL;
569 if (uri->fragment != NULL) xmlFree(uri->fragment);
570 uri->fragment = NULL;
571 if (uri->opaque != NULL) xmlFree(uri->opaque);
572 uri->opaque = NULL;
573 if (uri->authority != NULL) xmlFree(uri->authority);
574 uri->authority = NULL;
575 if (uri->query != NULL) xmlFree(uri->query);
576 uri->query = NULL;
577}
578
579/**
580 * xmlFreeURI:
581 * @uri: pointer to an xmlURI
582 *
583 * Free up the xmlURI struct
584 */
585void
586xmlFreeURI(xmlURIPtr uri) {
587 if (uri == NULL) return;
588
589 if (uri->scheme != NULL) xmlFree(uri->scheme);
590 if (uri->server != NULL) xmlFree(uri->server);
591 if (uri->user != NULL) xmlFree(uri->user);
592 if (uri->path != NULL) xmlFree(uri->path);
593 if (uri->fragment != NULL) xmlFree(uri->fragment);
594 if (uri->opaque != NULL) xmlFree(uri->opaque);
595 if (uri->authority != NULL) xmlFree(uri->authority);
596 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000597 xmlFree(uri);
598}
599
600/************************************************************************
601 * *
602 * Helper functions *
603 * *
604 ************************************************************************/
605
Owen Taylor3473f882001-02-23 17:55:21 +0000606/**
607 * xmlNormalizeURIPath:
608 * @path: pointer to the path string
609 *
610 * Applies the 5 normalization steps to a path string--that is, RFC 2396
611 * Section 5.2, steps 6.c through 6.g.
612 *
613 * Normalization occurs directly on the string, no new allocation is done
614 *
615 * Returns 0 or an error code
616 */
617int
618xmlNormalizeURIPath(char *path) {
619 char *cur, *out;
620
621 if (path == NULL)
622 return(-1);
623
624 /* Skip all initial "/" chars. We want to get to the beginning of the
625 * first non-empty segment.
626 */
627 cur = path;
628 while (cur[0] == '/')
629 ++cur;
630 if (cur[0] == '\0')
631 return(0);
632
633 /* Keep everything we've seen so far. */
634 out = cur;
635
636 /*
637 * Analyze each segment in sequence for cases (c) and (d).
638 */
639 while (cur[0] != '\0') {
640 /*
641 * c) All occurrences of "./", where "." is a complete path segment,
642 * are removed from the buffer string.
643 */
644 if ((cur[0] == '.') && (cur[1] == '/')) {
645 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000646 /* '//' normalization should be done at this point too */
647 while (cur[0] == '/')
648 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000649 continue;
650 }
651
652 /*
653 * d) If the buffer string ends with "." as a complete path segment,
654 * that "." is removed.
655 */
656 if ((cur[0] == '.') && (cur[1] == '\0'))
657 break;
658
659 /* Otherwise keep the segment. */
660 while (cur[0] != '/') {
661 if (cur[0] == '\0')
662 goto done_cd;
663 (out++)[0] = (cur++)[0];
664 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000665 /* nomalize // */
666 while ((cur[0] == '/') && (cur[1] == '/'))
667 cur++;
668
Owen Taylor3473f882001-02-23 17:55:21 +0000669 (out++)[0] = (cur++)[0];
670 }
671 done_cd:
672 out[0] = '\0';
673
674 /* Reset to the beginning of the first segment for the next sequence. */
675 cur = path;
676 while (cur[0] == '/')
677 ++cur;
678 if (cur[0] == '\0')
679 return(0);
680
681 /*
682 * Analyze each segment in sequence for cases (e) and (f).
683 *
684 * e) All occurrences of "<segment>/../", where <segment> is a
685 * complete path segment not equal to "..", are removed from the
686 * buffer string. Removal of these path segments is performed
687 * iteratively, removing the leftmost matching pattern on each
688 * iteration, until no matching pattern remains.
689 *
690 * f) If the buffer string ends with "<segment>/..", where <segment>
691 * is a complete path segment not equal to "..", that
692 * "<segment>/.." is removed.
693 *
694 * To satisfy the "iterative" clause in (e), we need to collapse the
695 * string every time we find something that needs to be removed. Thus,
696 * we don't need to keep two pointers into the string: we only need a
697 * "current position" pointer.
698 */
699 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000700 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000701
702 /* At the beginning of each iteration of this loop, "cur" points to
703 * the first character of the segment we want to examine.
704 */
705
706 /* Find the end of the current segment. */
707 segp = cur;
708 while ((segp[0] != '/') && (segp[0] != '\0'))
709 ++segp;
710
711 /* If this is the last segment, we're done (we need at least two
712 * segments to meet the criteria for the (e) and (f) cases).
713 */
714 if (segp[0] == '\0')
715 break;
716
717 /* If the first segment is "..", or if the next segment _isn't_ "..",
718 * keep this segment and try the next one.
719 */
720 ++segp;
721 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
722 || ((segp[0] != '.') || (segp[1] != '.')
723 || ((segp[2] != '/') && (segp[2] != '\0')))) {
724 cur = segp;
725 continue;
726 }
727
728 /* If we get here, remove this segment and the next one and back up
729 * to the previous segment (if there is one), to implement the
730 * "iteratively" clause. It's pretty much impossible to back up
731 * while maintaining two pointers into the buffer, so just compact
732 * the whole buffer now.
733 */
734
735 /* If this is the end of the buffer, we're done. */
736 if (segp[2] == '\0') {
737 cur[0] = '\0';
738 break;
739 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000740 /* Valgrind complained, strcpy(cur, segp + 3); */
741 /* string will overlap, do not use strcpy */
742 tmp = cur;
743 segp += 3;
744 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000745
746 /* If there are no previous segments, then keep going from here. */
747 segp = cur;
748 while ((segp > path) && ((--segp)[0] == '/'))
749 ;
750 if (segp == path)
751 continue;
752
753 /* "segp" is pointing to the end of a previous segment; find it's
754 * start. We need to back up to the previous segment and start
755 * over with that to handle things like "foo/bar/../..". If we
756 * don't do this, then on the first pass we'll remove the "bar/..",
757 * but be pointing at the second ".." so we won't realize we can also
758 * remove the "foo/..".
759 */
760 cur = segp;
761 while ((cur > path) && (cur[-1] != '/'))
762 --cur;
763 }
764 out[0] = '\0';
765
766 /*
767 * g) If the resulting buffer string still begins with one or more
768 * complete path segments of "..", then the reference is
769 * considered to be in error. Implementations may handle this
770 * error by retaining these components in the resolved path (i.e.,
771 * treating them as part of the final URI), by removing them from
772 * the resolved path (i.e., discarding relative levels above the
773 * root), or by avoiding traversal of the reference.
774 *
775 * We discard them from the final path.
776 */
777 if (path[0] == '/') {
778 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000779 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000780 && ((cur[3] == '/') || (cur[3] == '\0')))
781 cur += 3;
782
783 if (cur != path) {
784 out = path;
785 while (cur[0] != '\0')
786 (out++)[0] = (cur++)[0];
787 out[0] = 0;
788 }
789 }
790
791 return(0);
792}
Owen Taylor3473f882001-02-23 17:55:21 +0000793
Daniel Veillard966a31e2004-05-09 02:58:44 +0000794static int is_hex(char c) {
795 if (((c >= '0') && (c <= '9')) ||
796 ((c >= 'a') && (c <= 'f')) ||
797 ((c >= 'A') && (c <= 'F')))
798 return(1);
799 return(0);
800}
801
Owen Taylor3473f882001-02-23 17:55:21 +0000802/**
803 * xmlURIUnescapeString:
804 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000805 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000806 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000807 *
Daniel Veillarda44294f2007-04-24 08:57:54 +0000808 * Unescaping routine, but does not check that the string is an URI. The
809 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +0000810 * Note that the length of the result can only be smaller or same size as
811 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +0000812 *
Daniel Veillard79187652007-04-24 10:19:52 +0000813 * Returns a copy of the string, but unescaped, will return NULL only in case
814 * of error
Owen Taylor3473f882001-02-23 17:55:21 +0000815 */
816char *
817xmlURIUnescapeString(const char *str, int len, char *target) {
818 char *ret, *out;
819 const char *in;
820
821 if (str == NULL)
822 return(NULL);
823 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000824 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000825
826 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000827 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000828 if (ret == NULL) {
829 xmlGenericError(xmlGenericErrorContext,
830 "xmlURIUnescapeString: out of memory\n");
831 return(NULL);
832 }
833 } else
834 ret = target;
835 in = str;
836 out = ret;
837 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000838 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000839 in++;
840 if ((*in >= '0') && (*in <= '9'))
841 *out = (*in - '0');
842 else if ((*in >= 'a') && (*in <= 'f'))
843 *out = (*in - 'a') + 10;
844 else if ((*in >= 'A') && (*in <= 'F'))
845 *out = (*in - 'A') + 10;
846 in++;
847 if ((*in >= '0') && (*in <= '9'))
848 *out = *out * 16 + (*in - '0');
849 else if ((*in >= 'a') && (*in <= 'f'))
850 *out = *out * 16 + (*in - 'a') + 10;
851 else if ((*in >= 'A') && (*in <= 'F'))
852 *out = *out * 16 + (*in - 'A') + 10;
853 in++;
854 len -= 3;
855 out++;
856 } else {
857 *out++ = *in++;
858 len--;
859 }
860 }
861 *out = 0;
862 return(ret);
863}
864
865/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000866 * xmlURIEscapeStr:
867 * @str: string to escape
868 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000869 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000870 * This routine escapes a string to hex, ignoring reserved characters (a-z)
871 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000872 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000873 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000874 */
875xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000876xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
877 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000878 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000879
Owen Taylor3473f882001-02-23 17:55:21 +0000880 unsigned int len, out;
881
882 if (str == NULL)
883 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000884 if (str[0] == 0)
885 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000886 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000887 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000888
889 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000890 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000891 if (ret == NULL) {
892 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000893 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000894 return(NULL);
895 }
896 in = (const xmlChar *) str;
897 out = 0;
898 while(*in != 0) {
899 if (len - out <= 3) {
900 len += 20;
901 ret = (xmlChar *) xmlRealloc(ret, len);
902 if (ret == NULL) {
903 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000904 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000905 return(NULL);
906 }
907 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000908
909 ch = *in;
910
Daniel Veillardeb475a32002-04-14 22:00:22 +0000911 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000912 unsigned char val;
913 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000914 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000915 if (val <= 9)
916 ret[out++] = '0' + val;
917 else
918 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000919 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000920 if (val <= 9)
921 ret[out++] = '0' + val;
922 else
923 ret[out++] = 'A' + val - 0xA;
924 in++;
925 } else {
926 ret[out++] = *in++;
927 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000928
Owen Taylor3473f882001-02-23 17:55:21 +0000929 }
930 ret[out] = 0;
931 return(ret);
932}
933
Daniel Veillard8514c672001-05-23 10:29:12 +0000934/**
935 * xmlURIEscape:
936 * @str: the string of the URI to escape
937 *
938 * Escaping routine, does not do validity checks !
939 * It will try to escape the chars needing this, but this is heuristic
940 * based it's impossible to be sure.
941 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000942 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000943 *
944 * 25 May 2001
945 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
946 * according to RFC2396.
947 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000948 */
949xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000950xmlURIEscape(const xmlChar * str)
951{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000952 xmlChar *ret, *segment = NULL;
953 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000954 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000955
Daniel Veillard6278fb52001-05-25 07:38:41 +0000956#define NULLCHK(p) if(!p) { \
957 xmlGenericError(xmlGenericErrorContext, \
958 "xmlURIEscape: out of memory\n"); \
959 return NULL; }
960
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000961 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000962 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000963
964 uri = xmlCreateURI();
965 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000966 /*
967 * Allow escaping errors in the unescaped form
968 */
969 uri->cleanup = 1;
970 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000971 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000972 xmlFreeURI(uri);
973 return (NULL);
974 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000975 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000976
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000977 if (!uri)
978 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000979
980 ret = NULL;
981
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000982 if (uri->scheme) {
983 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
984 NULLCHK(segment)
985 ret = xmlStrcat(ret, segment);
986 ret = xmlStrcat(ret, BAD_CAST ":");
987 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000988 }
989
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000990 if (uri->authority) {
991 segment =
992 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
993 NULLCHK(segment)
994 ret = xmlStrcat(ret, BAD_CAST "//");
995 ret = xmlStrcat(ret, segment);
996 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000997 }
998
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000999 if (uri->user) {
1000 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1001 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001002 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001003 ret = xmlStrcat(ret, segment);
1004 ret = xmlStrcat(ret, BAD_CAST "@");
1005 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001006 }
1007
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001008 if (uri->server) {
1009 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1010 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001011 if (uri->user == NULL)
1012 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001013 ret = xmlStrcat(ret, segment);
1014 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001015 }
1016
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001017 if (uri->port) {
1018 xmlChar port[10];
1019
Daniel Veillard43d3f612001-11-10 11:57:23 +00001020 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001021 ret = xmlStrcat(ret, BAD_CAST ":");
1022 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001023 }
1024
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001025 if (uri->path) {
1026 segment =
1027 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1028 NULLCHK(segment)
1029 ret = xmlStrcat(ret, segment);
1030 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001031 }
1032
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001033 if (uri->query) {
1034 segment =
1035 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1036 NULLCHK(segment)
1037 ret = xmlStrcat(ret, BAD_CAST "?");
1038 ret = xmlStrcat(ret, segment);
1039 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001040 }
1041
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001042 if (uri->opaque) {
1043 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1044 NULLCHK(segment)
1045 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001046 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001047 }
1048
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001049 if (uri->fragment) {
1050 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1051 NULLCHK(segment)
1052 ret = xmlStrcat(ret, BAD_CAST "#");
1053 ret = xmlStrcat(ret, segment);
1054 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001055 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001056
1057 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001058#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001059
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001060 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001061}
1062
Owen Taylor3473f882001-02-23 17:55:21 +00001063/************************************************************************
1064 * *
1065 * Escaped URI parsing *
1066 * *
1067 ************************************************************************/
1068
1069/**
1070 * xmlParseURIFragment:
1071 * @uri: pointer to an URI structure
1072 * @str: pointer to the string to analyze
1073 *
1074 * Parse an URI fragment string and fills in the appropriate fields
1075 * of the @uri structure.
1076 *
1077 * fragment = *uric
1078 *
1079 * Returns 0 or the error code
1080 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001081static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001082xmlParseURIFragment(xmlURIPtr uri, const char **str)
1083{
Daniel Veillard30e76072006-03-09 14:13:55 +00001084 const char *cur;
1085
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001086 if (str == NULL)
1087 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001088
Daniel Veillard30e76072006-03-09 14:13:55 +00001089 cur = *str;
1090
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001091 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001092 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001093 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001094 if (uri->fragment != NULL)
1095 xmlFree(uri->fragment);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001096 if (uri->cleanup & 2)
1097 uri->fragment = STRNDUP(*str, cur - *str);
1098 else
1099 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001100 }
1101 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001102 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001103}
1104
1105/**
1106 * xmlParseURIQuery:
1107 * @uri: pointer to an URI structure
1108 * @str: pointer to the string to analyze
1109 *
1110 * Parse the query part of an URI
1111 *
1112 * query = *uric
1113 *
1114 * Returns 0 or the error code
1115 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001116static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001117xmlParseURIQuery(xmlURIPtr uri, const char **str)
1118{
Daniel Veillard30e76072006-03-09 14:13:55 +00001119 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001120
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001121 if (str == NULL)
1122 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001123
Daniel Veillard30e76072006-03-09 14:13:55 +00001124 cur = *str;
1125
Daniel Veillard336a8e12005-08-07 10:46:19 +00001126 while ((IS_URIC(cur)) ||
1127 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001128 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001129 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001130 if (uri->query != NULL)
1131 xmlFree(uri->query);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001132 if (uri->cleanup & 2)
1133 uri->query = STRNDUP(*str, cur - *str);
1134 else
1135 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001136 }
1137 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001138 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001139}
1140
1141/**
1142 * xmlParseURIScheme:
1143 * @uri: pointer to an URI structure
1144 * @str: pointer to the string to analyze
1145 *
1146 * Parse an URI scheme
1147 *
1148 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1149 *
1150 * Returns 0 or the error code
1151 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001152static int
Owen Taylor3473f882001-02-23 17:55:21 +00001153xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1154 const char *cur;
1155
1156 if (str == NULL)
1157 return(-1);
1158
1159 cur = *str;
1160 if (!IS_ALPHA(*cur))
1161 return(2);
1162 cur++;
1163 while (IS_SCHEME(*cur)) cur++;
1164 if (uri != NULL) {
1165 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001166 uri->scheme = STRNDUP(*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001167 }
1168 *str = cur;
1169 return(0);
1170}
1171
1172/**
1173 * xmlParseURIOpaquePart:
1174 * @uri: pointer to an URI structure
1175 * @str: pointer to the string to analyze
1176 *
1177 * Parse an URI opaque part
1178 *
1179 * opaque_part = uric_no_slash *uric
1180 *
1181 * Returns 0 or the error code
1182 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001183static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001184xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1185{
Owen Taylor3473f882001-02-23 17:55:21 +00001186 const char *cur;
1187
1188 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001189 return (-1);
1190
Owen Taylor3473f882001-02-23 17:55:21 +00001191 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001192 if (!((IS_URIC_NO_SLASH(cur)) ||
1193 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001194 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001195 }
1196 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001197 while ((IS_URIC(cur)) ||
1198 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001199 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001200 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001201 if (uri->opaque != NULL)
1202 xmlFree(uri->opaque);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001203 if (uri->cleanup & 2)
1204 uri->opaque = STRNDUP(*str, cur - *str);
1205 else
1206 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001207 }
1208 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001209 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001210}
1211
1212/**
1213 * xmlParseURIServer:
1214 * @uri: pointer to an URI structure
1215 * @str: pointer to the string to analyze
1216 *
1217 * Parse a server subpart of an URI, it's a finer grain analysis
1218 * of the authority part.
1219 *
1220 * server = [ [ userinfo "@" ] hostport ]
1221 * userinfo = *( unreserved | escaped |
1222 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1223 * hostport = host [ ":" port ]
William M. Brack015ccb22005-02-13 08:18:52 +00001224 * host = hostname | IPv4address | IPv6reference
Owen Taylor3473f882001-02-23 17:55:21 +00001225 * hostname = *( domainlabel "." ) toplabel [ "." ]
1226 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1227 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
William M. Brack015ccb22005-02-13 08:18:52 +00001228 * IPv6reference = "[" IPv6address "]"
1229 * IPv6address = hexpart [ ":" IPv4address ]
1230 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1231 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1232 * hexseq = hex4 *( ":" hex4)
1233 * hex4 = 1*4hexdig
Owen Taylor3473f882001-02-23 17:55:21 +00001234 * port = *digit
1235 *
1236 * Returns 0 or the error code
1237 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001238static int
Owen Taylor3473f882001-02-23 17:55:21 +00001239xmlParseURIServer(xmlURIPtr uri, const char **str) {
1240 const char *cur;
1241 const char *host, *tmp;
William M. Brack015ccb22005-02-13 08:18:52 +00001242 const int IPV4max = 4;
1243 const int IPV6max = 8;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001244 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001245
1246 if (str == NULL)
1247 return(-1);
1248
1249 cur = *str;
1250
1251 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001252 * is there a userinfo ?
Owen Taylor3473f882001-02-23 17:55:21 +00001253 */
1254 while (IS_USERINFO(cur)) NEXT(cur);
1255 if (*cur == '@') {
1256 if (uri != NULL) {
1257 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001258 if (uri->cleanup & 2)
1259 uri->path = STRNDUP(*str, cur - *str);
1260 else
1261 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001262 }
1263 cur++;
1264 } else {
1265 if (uri != NULL) {
1266 if (uri->user != NULL) xmlFree(uri->user);
1267 uri->user = NULL;
1268 }
1269 cur = *str;
1270 }
1271 /*
1272 * This can be empty in the case where there is no server
1273 */
1274 host = cur;
1275 if (*cur == '/') {
1276 if (uri != NULL) {
1277 if (uri->authority != NULL) xmlFree(uri->authority);
1278 uri->authority = NULL;
1279 if (uri->server != NULL) xmlFree(uri->server);
1280 uri->server = NULL;
1281 uri->port = 0;
1282 }
1283 return(0);
1284 }
1285 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001286 * host part of hostport can denote an IPV4 address, an IPV6 address
1287 * or an unresolved name. Check the IP first, its easier to detect
1288 * errors if wrong one.
1289 * An IPV6 address must start with a '[' and end with a ']'.
Owen Taylor3473f882001-02-23 17:55:21 +00001290 */
William M. Brack015ccb22005-02-13 08:18:52 +00001291 if (*cur == '[') {
1292 int compress=0;
1293 cur++;
1294 for (oct = 0; oct < IPV6max; ++oct) {
1295 if (*cur == ':') {
1296 if (compress)
1297 return(3); /* multiple compression attempted */
1298 if (!oct) { /* initial char is compression */
1299 if (*++cur != ':')
1300 return(3);
1301 }
1302 compress = 1; /* set compression-encountered flag */
1303 cur++; /* skip over the second ':' */
1304 continue;
1305 }
1306 while(IS_HEX(*cur)) cur++;
1307 if (oct == (IPV6max-1))
1308 continue;
1309 if (*cur != ':')
1310 break;
1311 cur++;
1312 }
1313 if ((!compress) && (oct != IPV6max))
1314 return(3);
1315 if (*cur != ']')
1316 return(3);
1317 if (uri != NULL) {
1318 if (uri->server != NULL) xmlFree(uri->server);
1319 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1320 (cur-host)-1);
1321 }
1322 cur++;
1323 } else {
1324 /*
1325 * Not IPV6, maybe IPV4
1326 */
1327 for (oct = 0; oct < IPV4max; ++oct) {
1328 if (*cur == '.')
1329 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1330 while(IS_DIGIT(*cur)) cur++;
1331 if (oct == (IPV4max-1))
1332 continue;
1333 if (*cur != '.')
1334 break;
1335 cur++;
1336 }
Owen Taylor3473f882001-02-23 17:55:21 +00001337 }
William M. Brack015ccb22005-02-13 08:18:52 +00001338 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1339 IS_ALPHA(*cur))) {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001340 /* maybe host_name */
1341 if (!IS_ALPHANUM(*cur))
1342 return(4); /* e.g. http://xml.$oft */
1343 do {
1344 do ++cur; while (IS_ALPHANUM(*cur));
1345 if (*cur == '-') {
1346 --cur;
1347 if (*cur == '.')
1348 return(5); /* e.g. http://xml.-soft */
1349 ++cur;
1350 continue;
1351 }
1352 if (*cur == '.') {
1353 --cur;
1354 if (*cur == '-')
1355 return(6); /* e.g. http://xml-.soft */
1356 if (*cur == '.')
1357 return(7); /* e.g. http://xml..soft */
1358 ++cur;
1359 continue;
1360 }
1361 break;
1362 } while (1);
1363 tmp = cur;
1364 if (tmp[-1] == '.')
1365 --tmp; /* e.g. http://xml.$Oft/ */
1366 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1367 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1368 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001369 }
Owen Taylor3473f882001-02-23 17:55:21 +00001370 if (uri != NULL) {
1371 if (uri->authority != NULL) xmlFree(uri->authority);
1372 uri->authority = NULL;
William M. Brack015ccb22005-02-13 08:18:52 +00001373 if (host[0] != '[') { /* it's not an IPV6 addr */
1374 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001375 if (uri->cleanup & 2)
1376 uri->server = STRNDUP(host, cur - host);
1377 else
1378 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
William M. Brack015ccb22005-02-13 08:18:52 +00001379 }
Owen Taylor3473f882001-02-23 17:55:21 +00001380 }
Owen Taylor3473f882001-02-23 17:55:21 +00001381 /*
1382 * finish by checking for a port presence.
1383 */
1384 if (*cur == ':') {
1385 cur++;
1386 if (IS_DIGIT(*cur)) {
1387 if (uri != NULL)
1388 uri->port = 0;
1389 while (IS_DIGIT(*cur)) {
1390 if (uri != NULL)
1391 uri->port = uri->port * 10 + (*cur - '0');
1392 cur++;
1393 }
1394 }
1395 }
1396 *str = cur;
1397 return(0);
1398}
1399
1400/**
1401 * xmlParseURIRelSegment:
1402 * @uri: pointer to an URI structure
1403 * @str: pointer to the string to analyze
1404 *
1405 * Parse an URI relative segment
1406 *
1407 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1408 * "+" | "$" | "," )
1409 *
1410 * Returns 0 or the error code
1411 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001413xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1414{
Owen Taylor3473f882001-02-23 17:55:21 +00001415 const char *cur;
1416
1417 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001418 return (-1);
1419
Owen Taylor3473f882001-02-23 17:55:21 +00001420 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001421 if (!((IS_SEGMENT(cur)) ||
1422 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001423 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001424 }
1425 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001426 while ((IS_SEGMENT(cur)) ||
1427 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001428 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001429 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001430 if (uri->path != NULL)
1431 xmlFree(uri->path);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001432 if (uri->cleanup & 2)
1433 uri->path = STRNDUP(*str, cur - *str);
1434 else
1435 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 }
1437 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001438 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001439}
1440
1441/**
1442 * xmlParseURIPathSegments:
1443 * @uri: pointer to an URI structure
1444 * @str: pointer to the string to analyze
1445 * @slash: should we add a leading slash
1446 *
1447 * Parse an URI set of path segments
1448 *
1449 * path_segments = segment *( "/" segment )
1450 * segment = *pchar *( ";" param )
1451 * param = *pchar
1452 *
1453 * Returns 0 or the error code
1454 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001455static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001456xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1457{
Owen Taylor3473f882001-02-23 17:55:21 +00001458 const char *cur;
1459
1460 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001461 return (-1);
1462
Owen Taylor3473f882001-02-23 17:55:21 +00001463 cur = *str;
1464
1465 do {
Daniel Veillard336a8e12005-08-07 10:46:19 +00001466 while ((IS_PCHAR(cur)) ||
1467 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001468 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001469 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001470 cur++;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001471 while ((IS_PCHAR(cur)) ||
1472 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001473 NEXT(cur);
1474 }
1475 if (*cur != '/')
1476 break;
1477 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001478 } while (1);
1479 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001480 int len, len2 = 0;
1481 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001482
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001483 /*
1484 * Concat the set of path segments to the current path
1485 */
1486 len = cur - *str;
1487 if (slash)
1488 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001489
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001490 if (uri->path != NULL) {
1491 len2 = strlen(uri->path);
1492 len += len2;
1493 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001494 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001495 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001496 xmlGenericError(xmlGenericErrorContext,
1497 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001498 *str = cur;
1499 return (-1);
1500 }
1501 if (uri->path != NULL)
1502 memcpy(path, uri->path, len2);
1503 if (slash) {
1504 path[len2] = '/';
1505 len2++;
1506 }
1507 path[len2] = 0;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001508 if (cur - *str > 0) {
1509 if (uri->cleanup & 2) {
1510 memcpy(&path[len2], *str, cur - *str);
1511 path[len2 + (cur - *str)] = 0;
1512 } else
1513 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1514 }
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001515 if (uri->path != NULL)
1516 xmlFree(uri->path);
1517 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001518 }
1519 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001520 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001521}
1522
1523/**
1524 * xmlParseURIAuthority:
1525 * @uri: pointer to an URI structure
1526 * @str: pointer to the string to analyze
1527 *
1528 * Parse the authority part of an URI.
1529 *
1530 * authority = server | reg_name
1531 * server = [ [ userinfo "@" ] hostport ]
1532 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1533 * "@" | "&" | "=" | "+" )
1534 *
1535 * Note : this is completely ambiguous since reg_name is allowed to
1536 * use the full set of chars in use by server:
1537 *
1538 * 3.2.1. Registry-based Naming Authority
1539 *
1540 * The structure of a registry-based naming authority is specific
1541 * to the URI scheme, but constrained to the allowed characters
1542 * for an authority component.
1543 *
1544 * Returns 0 or the error code
1545 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001546static int
Owen Taylor3473f882001-02-23 17:55:21 +00001547xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1548 const char *cur;
1549 int ret;
1550
1551 if (str == NULL)
1552 return(-1);
1553
1554 cur = *str;
1555
1556 /*
1557 * try first to parse it as a server string.
1558 */
1559 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001560 if ((ret == 0) && (*str != NULL) &&
1561 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001562 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001563 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001564
1565 /*
1566 * failed, fallback to reg_name
1567 */
1568 if (!IS_REG_NAME(cur)) {
1569 return(5);
1570 }
1571 NEXT(cur);
1572 while (IS_REG_NAME(cur)) NEXT(cur);
1573 if (uri != NULL) {
1574 if (uri->server != NULL) xmlFree(uri->server);
1575 uri->server = NULL;
1576 if (uri->user != NULL) xmlFree(uri->user);
1577 uri->user = NULL;
1578 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001579 if (uri->cleanup & 2)
1580 uri->authority = STRNDUP(*str, cur - *str);
1581 else
1582 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001583 }
1584 *str = cur;
1585 return(0);
1586}
1587
1588/**
1589 * xmlParseURIHierPart:
1590 * @uri: pointer to an URI structure
1591 * @str: pointer to the string to analyze
1592 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001593 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001594 *
1595 * hier_part = ( net_path | abs_path ) [ "?" query ]
1596 * abs_path = "/" path_segments
1597 * net_path = "//" authority [ abs_path ]
1598 *
1599 * Returns 0 or the error code
1600 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001601static int
Owen Taylor3473f882001-02-23 17:55:21 +00001602xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1603 int ret;
1604 const char *cur;
1605
1606 if (str == NULL)
1607 return(-1);
1608
1609 cur = *str;
1610
1611 if ((cur[0] == '/') && (cur[1] == '/')) {
1612 cur += 2;
1613 ret = xmlParseURIAuthority(uri, &cur);
1614 if (ret != 0)
1615 return(ret);
1616 if (cur[0] == '/') {
1617 cur++;
1618 ret = xmlParseURIPathSegments(uri, &cur, 1);
1619 }
1620 } else if (cur[0] == '/') {
1621 cur++;
1622 ret = xmlParseURIPathSegments(uri, &cur, 1);
1623 } else {
1624 return(4);
1625 }
1626 if (ret != 0)
1627 return(ret);
1628 if (*cur == '?') {
1629 cur++;
1630 ret = xmlParseURIQuery(uri, &cur);
1631 if (ret != 0)
1632 return(ret);
1633 }
1634 *str = cur;
1635 return(0);
1636}
1637
1638/**
1639 * xmlParseAbsoluteURI:
1640 * @uri: pointer to an URI structure
1641 * @str: pointer to the string to analyze
1642 *
1643 * Parse an URI reference string and fills in the appropriate fields
1644 * of the @uri structure
1645 *
1646 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1647 *
1648 * Returns 0 or the error code
1649 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001650static int
Owen Taylor3473f882001-02-23 17:55:21 +00001651xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1652 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001653 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001654
1655 if (str == NULL)
1656 return(-1);
1657
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001658 cur = *str;
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660 ret = xmlParseURIScheme(uri, str);
1661 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001662 if (**str != ':') {
1663 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001664 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001665 }
Owen Taylor3473f882001-02-23 17:55:21 +00001666 (*str)++;
1667 if (**str == '/')
1668 return(xmlParseURIHierPart(uri, str));
1669 return(xmlParseURIOpaquePart(uri, str));
1670}
1671
1672/**
1673 * xmlParseRelativeURI:
1674 * @uri: pointer to an URI structure
1675 * @str: pointer to the string to analyze
1676 *
1677 * Parse an relative URI string and fills in the appropriate fields
1678 * of the @uri structure
1679 *
1680 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1681 * abs_path = "/" path_segments
1682 * net_path = "//" authority [ abs_path ]
1683 * rel_path = rel_segment [ abs_path ]
1684 *
1685 * Returns 0 or the error code
1686 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001687static int
Owen Taylor3473f882001-02-23 17:55:21 +00001688xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1689 int ret = 0;
1690 const char *cur;
1691
1692 if (str == NULL)
1693 return(-1);
1694
1695 cur = *str;
1696 if ((cur[0] == '/') && (cur[1] == '/')) {
1697 cur += 2;
1698 ret = xmlParseURIAuthority(uri, &cur);
1699 if (ret != 0)
1700 return(ret);
1701 if (cur[0] == '/') {
1702 cur++;
1703 ret = xmlParseURIPathSegments(uri, &cur, 1);
1704 }
1705 } else if (cur[0] == '/') {
1706 cur++;
1707 ret = xmlParseURIPathSegments(uri, &cur, 1);
1708 } else if (cur[0] != '#' && cur[0] != '?') {
1709 ret = xmlParseURIRelSegment(uri, &cur);
1710 if (ret != 0)
1711 return(ret);
1712 if (cur[0] == '/') {
1713 cur++;
1714 ret = xmlParseURIPathSegments(uri, &cur, 1);
1715 }
1716 }
1717 if (ret != 0)
1718 return(ret);
1719 if (*cur == '?') {
1720 cur++;
1721 ret = xmlParseURIQuery(uri, &cur);
1722 if (ret != 0)
1723 return(ret);
1724 }
1725 *str = cur;
1726 return(ret);
1727}
1728
1729/**
1730 * xmlParseURIReference:
1731 * @uri: pointer to an URI structure
1732 * @str: the string to analyze
1733 *
1734 * Parse an URI reference string and fills in the appropriate fields
1735 * of the @uri structure
1736 *
1737 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1738 *
1739 * Returns 0 or the error code
1740 */
1741int
1742xmlParseURIReference(xmlURIPtr uri, const char *str) {
1743 int ret;
1744 const char *tmp = str;
1745
1746 if (str == NULL)
1747 return(-1);
1748 xmlCleanURI(uri);
1749
1750 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001751 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001752 * it fails.
1753 */
1754 ret = xmlParseAbsoluteURI(uri, &str);
1755 if (ret != 0) {
1756 xmlCleanURI(uri);
1757 str = tmp;
1758 ret = xmlParseRelativeURI(uri, &str);
1759 }
1760 if (ret != 0) {
1761 xmlCleanURI(uri);
1762 return(ret);
1763 }
1764
1765 if (*str == '#') {
1766 str++;
1767 ret = xmlParseURIFragment(uri, &str);
1768 if (ret != 0) return(ret);
1769 }
1770 if (*str != 0) {
1771 xmlCleanURI(uri);
1772 return(1);
1773 }
1774 return(0);
1775}
1776
1777/**
1778 * xmlParseURI:
1779 * @str: the URI string to analyze
1780 *
1781 * Parse an URI
1782 *
1783 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1784 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001785 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001786 */
1787xmlURIPtr
1788xmlParseURI(const char *str) {
1789 xmlURIPtr uri;
1790 int ret;
1791
1792 if (str == NULL)
1793 return(NULL);
1794 uri = xmlCreateURI();
1795 if (uri != NULL) {
1796 ret = xmlParseURIReference(uri, str);
1797 if (ret) {
1798 xmlFreeURI(uri);
1799 return(NULL);
1800 }
1801 }
1802 return(uri);
1803}
1804
Daniel Veillard336a8e12005-08-07 10:46:19 +00001805/**
1806 * xmlParseURIRaw:
1807 * @str: the URI string to analyze
1808 * @raw: if 1 unescaping of URI pieces are disabled
1809 *
1810 * Parse an URI but allows to keep intact the original fragments.
1811 *
1812 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1813 *
1814 * Returns a newly built xmlURIPtr or NULL in case of error
1815 */
1816xmlURIPtr
1817xmlParseURIRaw(const char *str, int raw) {
1818 xmlURIPtr uri;
1819 int ret;
1820
1821 if (str == NULL)
1822 return(NULL);
1823 uri = xmlCreateURI();
1824 if (uri != NULL) {
1825 if (raw) {
1826 uri->cleanup |= 2;
1827 }
1828 ret = xmlParseURIReference(uri, str);
1829 if (ret) {
1830 xmlFreeURI(uri);
1831 return(NULL);
1832 }
1833 }
1834 return(uri);
1835}
1836
Owen Taylor3473f882001-02-23 17:55:21 +00001837/************************************************************************
1838 * *
1839 * Public functions *
1840 * *
1841 ************************************************************************/
1842
1843/**
1844 * xmlBuildURI:
1845 * @URI: the URI instance found in the document
1846 * @base: the base value
1847 *
1848 * Computes he final URI of the reference done by checking that
1849 * the given URI is valid, and building the final URI using the
1850 * base URI. This is processed according to section 5.2 of the
1851 * RFC 2396
1852 *
1853 * 5.2. Resolving Relative References to Absolute Form
1854 *
1855 * Returns a new URI string (to be freed by the caller) or NULL in case
1856 * of error.
1857 */
1858xmlChar *
1859xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1860 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001861 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001862 xmlURIPtr ref = NULL;
1863 xmlURIPtr bas = NULL;
1864 xmlURIPtr res = NULL;
1865
1866 /*
1867 * 1) The URI reference is parsed into the potential four components and
1868 * fragment identifier, as described in Section 4.3.
1869 *
1870 * NOTE that a completely empty URI is treated by modern browsers
1871 * as a reference to "." rather than as a synonym for the current
1872 * URI. Should we do that here?
1873 */
1874 if (URI == NULL)
1875 ret = -1;
1876 else {
1877 if (*URI) {
1878 ref = xmlCreateURI();
1879 if (ref == NULL)
1880 goto done;
1881 ret = xmlParseURIReference(ref, (const char *) URI);
1882 }
1883 else
1884 ret = 0;
1885 }
1886 if (ret != 0)
1887 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001888 if ((ref != NULL) && (ref->scheme != NULL)) {
1889 /*
1890 * The URI is absolute don't modify.
1891 */
1892 val = xmlStrdup(URI);
1893 goto done;
1894 }
Owen Taylor3473f882001-02-23 17:55:21 +00001895 if (base == NULL)
1896 ret = -1;
1897 else {
1898 bas = xmlCreateURI();
1899 if (bas == NULL)
1900 goto done;
1901 ret = xmlParseURIReference(bas, (const char *) base);
1902 }
1903 if (ret != 0) {
1904 if (ref)
1905 val = xmlSaveUri(ref);
1906 goto done;
1907 }
1908 if (ref == NULL) {
1909 /*
1910 * the base fragment must be ignored
1911 */
1912 if (bas->fragment != NULL) {
1913 xmlFree(bas->fragment);
1914 bas->fragment = NULL;
1915 }
1916 val = xmlSaveUri(bas);
1917 goto done;
1918 }
1919
1920 /*
1921 * 2) If the path component is empty and the scheme, authority, and
1922 * query components are undefined, then it is a reference to the
1923 * current document and we are done. Otherwise, the reference URI's
1924 * query and fragment components are defined as found (or not found)
1925 * within the URI reference and not inherited from the base URI.
1926 *
1927 * NOTE that in modern browsers, the parsing differs from the above
1928 * in the following aspect: the query component is allowed to be
1929 * defined while still treating this as a reference to the current
1930 * document.
1931 */
1932 res = xmlCreateURI();
1933 if (res == NULL)
1934 goto done;
1935 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1936 ((ref->authority == NULL) && (ref->server == NULL))) {
1937 if (bas->scheme != NULL)
1938 res->scheme = xmlMemStrdup(bas->scheme);
1939 if (bas->authority != NULL)
1940 res->authority = xmlMemStrdup(bas->authority);
1941 else if (bas->server != NULL) {
1942 res->server = xmlMemStrdup(bas->server);
1943 if (bas->user != NULL)
1944 res->user = xmlMemStrdup(bas->user);
1945 res->port = bas->port;
1946 }
1947 if (bas->path != NULL)
1948 res->path = xmlMemStrdup(bas->path);
1949 if (ref->query != NULL)
1950 res->query = xmlMemStrdup(ref->query);
1951 else if (bas->query != NULL)
1952 res->query = xmlMemStrdup(bas->query);
1953 if (ref->fragment != NULL)
1954 res->fragment = xmlMemStrdup(ref->fragment);
1955 goto step_7;
1956 }
Owen Taylor3473f882001-02-23 17:55:21 +00001957
1958 /*
1959 * 3) If the scheme component is defined, indicating that the reference
1960 * starts with a scheme name, then the reference is interpreted as an
1961 * absolute URI and we are done. Otherwise, the reference URI's
1962 * scheme is inherited from the base URI's scheme component.
1963 */
1964 if (ref->scheme != NULL) {
1965 val = xmlSaveUri(ref);
1966 goto done;
1967 }
1968 if (bas->scheme != NULL)
1969 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001970
1971 if (ref->query != NULL)
1972 res->query = xmlMemStrdup(ref->query);
1973 if (ref->fragment != NULL)
1974 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001975
1976 /*
1977 * 4) If the authority component is defined, then the reference is a
1978 * network-path and we skip to step 7. Otherwise, the reference
1979 * URI's authority is inherited from the base URI's authority
1980 * component, which will also be undefined if the URI scheme does not
1981 * use an authority component.
1982 */
1983 if ((ref->authority != NULL) || (ref->server != NULL)) {
1984 if (ref->authority != NULL)
1985 res->authority = xmlMemStrdup(ref->authority);
1986 else {
1987 res->server = xmlMemStrdup(ref->server);
1988 if (ref->user != NULL)
1989 res->user = xmlMemStrdup(ref->user);
1990 res->port = ref->port;
1991 }
1992 if (ref->path != NULL)
1993 res->path = xmlMemStrdup(ref->path);
1994 goto step_7;
1995 }
1996 if (bas->authority != NULL)
1997 res->authority = xmlMemStrdup(bas->authority);
1998 else if (bas->server != NULL) {
1999 res->server = xmlMemStrdup(bas->server);
2000 if (bas->user != NULL)
2001 res->user = xmlMemStrdup(bas->user);
2002 res->port = bas->port;
2003 }
2004
2005 /*
2006 * 5) If the path component begins with a slash character ("/"), then
2007 * the reference is an absolute-path and we skip to step 7.
2008 */
2009 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2010 res->path = xmlMemStrdup(ref->path);
2011 goto step_7;
2012 }
2013
2014
2015 /*
2016 * 6) If this step is reached, then we are resolving a relative-path
2017 * reference. The relative path needs to be merged with the base
2018 * URI's path. Although there are many ways to do this, we will
2019 * describe a simple method using a separate string buffer.
2020 *
2021 * Allocate a buffer large enough for the result string.
2022 */
2023 len = 2; /* extra / and 0 */
2024 if (ref->path != NULL)
2025 len += strlen(ref->path);
2026 if (bas->path != NULL)
2027 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002028 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002029 if (res->path == NULL) {
2030 xmlGenericError(xmlGenericErrorContext,
2031 "xmlBuildURI: out of memory\n");
2032 goto done;
2033 }
2034 res->path[0] = 0;
2035
2036 /*
2037 * a) All but the last segment of the base URI's path component is
2038 * copied to the buffer. In other words, any characters after the
2039 * last (right-most) slash character, if any, are excluded.
2040 */
2041 cur = 0;
2042 out = 0;
2043 if (bas->path != NULL) {
2044 while (bas->path[cur] != 0) {
2045 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2046 cur++;
2047 if (bas->path[cur] == 0)
2048 break;
2049
2050 cur++;
2051 while (out < cur) {
2052 res->path[out] = bas->path[out];
2053 out++;
2054 }
2055 }
2056 }
2057 res->path[out] = 0;
2058
2059 /*
2060 * b) The reference's path component is appended to the buffer
2061 * string.
2062 */
2063 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002064 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002065 /*
2066 * Ensure the path includes a '/'
2067 */
2068 if ((out == 0) && (bas->server != NULL))
2069 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002070 while (ref->path[indx] != 0) {
2071 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002072 }
2073 }
2074 res->path[out] = 0;
2075
2076 /*
2077 * Steps c) to h) are really path normalization steps
2078 */
2079 xmlNormalizeURIPath(res->path);
2080
2081step_7:
2082
2083 /*
2084 * 7) The resulting URI components, including any inherited from the
2085 * base URI, are recombined to give the absolute form of the URI
2086 * reference.
2087 */
2088 val = xmlSaveUri(res);
2089
2090done:
2091 if (ref != NULL)
2092 xmlFreeURI(ref);
2093 if (bas != NULL)
2094 xmlFreeURI(bas);
2095 if (res != NULL)
2096 xmlFreeURI(res);
2097 return(val);
2098}
2099
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002100/**
William M. Brackf7789b12004-06-07 08:57:27 +00002101 * xmlBuildRelativeURI:
2102 * @URI: the URI reference under consideration
2103 * @base: the base value
2104 *
2105 * Expresses the URI of the reference in terms relative to the
2106 * base. Some examples of this operation include:
2107 * base = "http://site1.com/docs/book1.html"
2108 * URI input URI returned
2109 * docs/pic1.gif pic1.gif
2110 * docs/img/pic1.gif img/pic1.gif
2111 * img/pic1.gif ../img/pic1.gif
2112 * http://site1.com/docs/pic1.gif pic1.gif
2113 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2114 *
2115 * base = "docs/book1.html"
2116 * URI input URI returned
2117 * docs/pic1.gif pic1.gif
2118 * docs/img/pic1.gif img/pic1.gif
2119 * img/pic1.gif ../img/pic1.gif
2120 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2121 *
2122 *
2123 * Note: if the URI reference is really wierd or complicated, it may be
2124 * worthwhile to first convert it into a "nice" one by calling
2125 * xmlBuildURI (using 'base') before calling this routine,
2126 * since this routine (for reasonable efficiency) assumes URI has
2127 * already been through some validation.
2128 *
2129 * Returns a new URI string (to be freed by the caller) or NULL in case
2130 * error.
2131 */
2132xmlChar *
2133xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2134{
2135 xmlChar *val = NULL;
2136 int ret;
2137 int ix;
2138 int pos = 0;
2139 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002140 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002141 xmlURIPtr ref = NULL;
2142 xmlURIPtr bas = NULL;
2143 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002144 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002145
2146 if ((URI == NULL) || (*URI == 0))
2147 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002148
2149 /*
2150 * First parse URI into a standard form
2151 */
2152 ref = xmlCreateURI ();
2153 if (ref == NULL)
2154 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002155 /* If URI not already in "relative" form */
2156 if (URI[0] != '.') {
2157 ret = xmlParseURIReference (ref, (const char *) URI);
2158 if (ret != 0)
2159 goto done; /* Error in URI, return NULL */
2160 } else
2161 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002162
2163 /*
2164 * Next parse base into the same standard form
2165 */
2166 if ((base == NULL) || (*base == 0)) {
2167 val = xmlStrdup (URI);
2168 goto done;
2169 }
2170 bas = xmlCreateURI ();
2171 if (bas == NULL)
2172 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002173 if (base[0] != '.') {
2174 ret = xmlParseURIReference (bas, (const char *) base);
2175 if (ret != 0)
2176 goto done; /* Error in base, return NULL */
2177 } else
2178 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002179
2180 /*
2181 * If the scheme / server on the URI differs from the base,
2182 * just return the URI
2183 */
2184 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002185 ((bas->scheme == NULL) ||
2186 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2187 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002188 val = xmlStrdup (URI);
2189 goto done;
2190 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002191 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2192 val = xmlStrdup(BAD_CAST "");
2193 goto done;
2194 }
2195 if (bas->path == NULL) {
2196 val = xmlStrdup((xmlChar *)ref->path);
2197 goto done;
2198 }
2199 if (ref->path == NULL) {
2200 ref->path = (char *) "/";
2201 remove_path = 1;
2202 }
William M. Brackf7789b12004-06-07 08:57:27 +00002203
2204 /*
2205 * At this point (at last!) we can compare the two paths
2206 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002207 * First we take care of the special case where either of the
2208 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002209 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002210 if (bas->path == NULL) {
2211 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002212 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002213 if (*uptr == '/')
2214 uptr++;
2215 val = xmlStrdup(uptr);
2216 }
2217 goto done;
2218 }
William M. Brackf7789b12004-06-07 08:57:27 +00002219 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002220 if (ref->path == NULL) {
2221 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002222 if (bptr[ix] == '/')
2223 nbslash++;
2224 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002225 uptr = NULL;
2226 len = 1; /* this is for a string terminator only */
2227 } else {
2228 /*
2229 * Next we compare the two strings and find where they first differ
2230 */
2231 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2232 pos += 2;
2233 if ((*bptr == '.') && (bptr[1] == '/'))
2234 bptr += 2;
2235 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2236 bptr++;
2237 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2238 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002239
William M. Brack820d5ed2005-09-14 05:24:27 +00002240 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002241 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002242 goto done; /* (I can't imagine why anyone would do this) */
2243 }
2244
2245 /*
2246 * In URI, "back up" to the last '/' encountered. This will be the
2247 * beginning of the "unique" suffix of URI
2248 */
2249 ix = pos;
2250 if ((ref->path[ix] == '/') && (ix > 0))
2251 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002252 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2253 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002254 for (; ix > 0; ix--) {
2255 if (ref->path[ix] == '/')
2256 break;
2257 }
2258 if (ix == 0) {
2259 uptr = (xmlChar *)ref->path;
2260 } else {
2261 ix++;
2262 uptr = (xmlChar *)&ref->path[ix];
2263 }
2264
2265 /*
2266 * In base, count the number of '/' from the differing point
2267 */
2268 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2269 for (; bptr[ix] != 0; ix++) {
2270 if (bptr[ix] == '/')
2271 nbslash++;
2272 }
2273 }
2274 len = xmlStrlen (uptr) + 1;
2275 }
2276
William M. Brackf7789b12004-06-07 08:57:27 +00002277 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002278 if (uptr != NULL)
2279 val = xmlStrdup (uptr);
William M. Brackf7789b12004-06-07 08:57:27 +00002280 goto done;
2281 }
William M. Brackf7789b12004-06-07 08:57:27 +00002282
2283 /*
2284 * Allocate just enough space for the returned string -
2285 * length of the remainder of the URI, plus enough space
2286 * for the "../" groups, plus one for the terminator
2287 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002288 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002289 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002290 xmlGenericError(xmlGenericErrorContext,
2291 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002292 goto done;
2293 }
2294 vptr = val;
2295 /*
2296 * Put in as many "../" as needed
2297 */
2298 for (; nbslash>0; nbslash--) {
2299 *vptr++ = '.';
2300 *vptr++ = '.';
2301 *vptr++ = '/';
2302 }
2303 /*
2304 * Finish up with the end of the URI
2305 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002306 if (uptr != NULL) {
2307 if ((vptr > val) && (len > 0) &&
2308 (uptr[0] == '/') && (vptr[-1] == '/')) {
2309 memcpy (vptr, uptr + 1, len - 1);
2310 vptr[len - 2] = 0;
2311 } else {
2312 memcpy (vptr, uptr, len);
2313 vptr[len - 1] = 0;
2314 }
2315 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002316 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002317 }
William M. Brackf7789b12004-06-07 08:57:27 +00002318
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002319done:
William M. Brackf7789b12004-06-07 08:57:27 +00002320 /*
2321 * Free the working variables
2322 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002323 if (remove_path != 0)
2324 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002325 if (ref != NULL)
2326 xmlFreeURI (ref);
2327 if (bas != NULL)
2328 xmlFreeURI (bas);
2329
2330 return val;
2331}
2332
2333/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002334 * xmlCanonicPath:
2335 * @path: the resource locator in a filesystem notation
2336 *
2337 * Constructs a canonic path from the specified path.
2338 *
2339 * Returns a new canonic path, or a duplicate of the path parameter if the
2340 * construction fails. The caller is responsible for freeing the memory occupied
2341 * by the returned string. If there is insufficient memory available, or the
2342 * argument is NULL, the function returns NULL.
2343 */
2344#define IS_WINDOWS_PATH(p) \
2345 ((p != NULL) && \
2346 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2347 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2348 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002349xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002350xmlCanonicPath(const xmlChar *path)
2351{
William M. Brack22242272007-01-27 07:59:37 +00002352/*
2353 * For Windows implementations, additional work needs to be done to
2354 * replace backslashes in pathnames with "forward slashes"
2355 */
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002356#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002357 int len = 0;
2358 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002359 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002360#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002361 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002362 xmlChar *ret;
2363 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002364
2365 if (path == NULL)
2366 return(NULL);
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002367 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002368 xmlFreeURI(uri);
2369 return xmlStrdup(path);
2370 }
2371
William M. Brack22242272007-01-27 07:59:37 +00002372 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002373 absuri = xmlStrstr(path, BAD_CAST "://");
2374 if (absuri != NULL) {
2375 int l, j;
2376 unsigned char c;
2377 xmlChar *escURI;
2378
2379 /*
2380 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002381 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002382 * part matches a protocol.
2383 */
2384 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002385 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002386 if ((l <= 0) || (l > 20))
2387 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002388 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002389 for (j = 0;j < l;j++) {
2390 c = path[j];
2391 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2392 goto path_processing;
2393 }
2394
William M. Brack22242272007-01-27 07:59:37 +00002395 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002396 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2397 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002398 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002399 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002400 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002401 if (uri != NULL) {
2402 xmlFreeURI(uri);
2403 return escURI;
2404 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002405 }
2406 }
2407
2408path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002409/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002410#if defined(_WIN32) && !defined(__CYGWIN__)
2411 /*
William M. Brack22242272007-01-27 07:59:37 +00002412 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002413 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002414 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002415 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002416 return(NULL);
2417 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002418
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002419 len = xmlStrlen(path);
2420 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002421 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002422 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002423 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002424 uri->path = xmlMallocAtomic(len + 2);
2425 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002426 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002427 return(NULL);
2428 }
William M. Brack22242272007-01-27 07:59:37 +00002429 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002430 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002431 p = uri->path + 1;
2432 strncpy(p, path, len + 1);
2433 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002434 uri->path = xmlStrdup(path);
2435 if (uri->path == NULL) {
2436 xmlFreeURI(uri);
2437 return(NULL);
2438 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002439 p = uri->path;
2440 }
William M. Brack22242272007-01-27 07:59:37 +00002441 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002442 while (*p != '\0') {
2443 if (*p == '\\')
2444 *p = '/';
2445 p++;
2446 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002447
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002448 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002449 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002450 } else {
2451 ret = xmlSaveUri(uri);
2452 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002453
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002454 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002455#else
2456 ret = xmlStrdup((const xmlChar *) path);
2457#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002458 return(ret);
2459}
Owen Taylor3473f882001-02-23 17:55:21 +00002460
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002461/**
2462 * xmlPathToURI:
2463 * @path: the resource locator in a filesystem notation
2464 *
2465 * Constructs an URI expressing the existing path
2466 *
2467 * Returns a new URI, or a duplicate of the path parameter if the
2468 * construction fails. The caller is responsible for freeing the memory
2469 * occupied by the returned string. If there is insufficient memory available,
2470 * or the argument is NULL, the function returns NULL.
2471 */
2472xmlChar *
2473xmlPathToURI(const xmlChar *path)
2474{
2475 xmlURIPtr uri;
2476 xmlURI temp;
2477 xmlChar *ret, *cal;
2478
2479 if (path == NULL)
2480 return(NULL);
2481
2482 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2483 xmlFreeURI(uri);
2484 return xmlStrdup(path);
2485 }
2486 cal = xmlCanonicPath(path);
2487 if (cal == NULL)
2488 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002489#if defined(_WIN32) && !defined(__CYGWIN__)
2490 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2491 If 'cal' is a valid URI allready then we are done here, as continuing would make
2492 it invalid. */
2493 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2494 xmlFreeURI(uri);
2495 return cal;
2496 }
2497 /* 'cal' can contain a relative path with backslashes. If that is processed
2498 by xmlSaveURI, they will be escaped and the external entity loader machinery
2499 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2500 ret = cal;
2501 while (*ret != '\0') {
2502 if (*ret == '\\')
2503 *ret = '/';
2504 ret++;
2505 }
2506#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002507 memset(&temp, 0, sizeof(temp));
2508 temp.path = (char *) cal;
2509 ret = xmlSaveUri(&temp);
2510 xmlFree(cal);
2511 return(ret);
2512}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002513#define bottom_uri
2514#include "elfgcchack.h"