blob: 09abd3d7fef72b59a77d31f737a4bf96ac51a0e2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
William M. Brack015ccb22005-02-13 08:18:52 +00004 * Reference: RFCs 2396, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
William M. Brack015ccb22005-02-13 08:18:52 +000081 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
Owen Taylor3473f882001-02-23 17:55:21 +000083 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
William M. Brack015ccb22005-02-13 08:18:52 +000087 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000089
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163/*
William M. Brack015ccb22005-02-13 08:18:52 +0000164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000171
172/*
Owen Taylor3473f882001-02-23 17:55:21 +0000173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
188/************************************************************************
189 * *
190 * Generic URI structure functions *
191 * *
192 ************************************************************************/
193
194/**
195 * xmlCreateURI:
196 *
197 * Simply creates an empty xmlURI
198 *
199 * Returns the new structure or NULL in case of error
200 */
201xmlURIPtr
202xmlCreateURI(void) {
203 xmlURIPtr ret;
204
205 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
206 if (ret == NULL) {
207 xmlGenericError(xmlGenericErrorContext,
208 "xmlCreateURI: out of memory\n");
209 return(NULL);
210 }
211 memset(ret, 0, sizeof(xmlURI));
212 return(ret);
213}
214
215/**
216 * xmlSaveUri:
217 * @uri: pointer to an xmlURI
218 *
219 * Save the URI as an escaped string
220 *
221 * Returns a new string (to be deallocated by caller)
222 */
223xmlChar *
224xmlSaveUri(xmlURIPtr uri) {
225 xmlChar *ret = NULL;
226 const char *p;
227 int len;
228 int max;
229
230 if (uri == NULL) return(NULL);
231
232
233 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000234 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000235 if (ret == NULL) {
236 xmlGenericError(xmlGenericErrorContext,
237 "xmlSaveUri: out of memory\n");
238 return(NULL);
239 }
240 len = 0;
241
242 if (uri->scheme != NULL) {
243 p = uri->scheme;
244 while (*p != 0) {
245 if (len >= max) {
246 max *= 2;
247 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
248 if (ret == NULL) {
249 xmlGenericError(xmlGenericErrorContext,
250 "xmlSaveUri: out of memory\n");
251 return(NULL);
252 }
253 }
254 ret[len++] = *p++;
255 }
256 if (len >= max) {
257 max *= 2;
258 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
259 if (ret == NULL) {
260 xmlGenericError(xmlGenericErrorContext,
261 "xmlSaveUri: out of memory\n");
262 return(NULL);
263 }
264 }
265 ret[len++] = ':';
266 }
267 if (uri->opaque != NULL) {
268 p = uri->opaque;
269 while (*p != 0) {
270 if (len + 3 >= max) {
271 max *= 2;
272 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
273 if (ret == NULL) {
274 xmlGenericError(xmlGenericErrorContext,
275 "xmlSaveUri: out of memory\n");
276 return(NULL);
277 }
278 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000279 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000280 ret[len++] = *p++;
281 else {
282 int val = *(unsigned char *)p++;
283 int hi = val / 0x10, lo = val % 0x10;
284 ret[len++] = '%';
285 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
286 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
287 }
288 }
Owen Taylor3473f882001-02-23 17:55:21 +0000289 } else {
290 if (uri->server != NULL) {
291 if (len + 3 >= max) {
292 max *= 2;
293 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
294 if (ret == NULL) {
295 xmlGenericError(xmlGenericErrorContext,
296 "xmlSaveUri: out of memory\n");
297 return(NULL);
298 }
299 }
300 ret[len++] = '/';
301 ret[len++] = '/';
302 if (uri->user != NULL) {
303 p = uri->user;
304 while (*p != 0) {
305 if (len + 3 >= max) {
306 max *= 2;
307 ret = (xmlChar *) xmlRealloc(ret,
308 (max + 1) * sizeof(xmlChar));
309 if (ret == NULL) {
310 xmlGenericError(xmlGenericErrorContext,
311 "xmlSaveUri: out of memory\n");
312 return(NULL);
313 }
314 }
315 if ((IS_UNRESERVED(*(p))) ||
316 ((*(p) == ';')) || ((*(p) == ':')) ||
317 ((*(p) == '&')) || ((*(p) == '=')) ||
318 ((*(p) == '+')) || ((*(p) == '$')) ||
319 ((*(p) == ',')))
320 ret[len++] = *p++;
321 else {
322 int val = *(unsigned char *)p++;
323 int hi = val / 0x10, lo = val % 0x10;
324 ret[len++] = '%';
325 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
326 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
327 }
328 }
329 if (len + 3 >= max) {
330 max *= 2;
331 ret = (xmlChar *) xmlRealloc(ret,
332 (max + 1) * sizeof(xmlChar));
333 if (ret == NULL) {
334 xmlGenericError(xmlGenericErrorContext,
335 "xmlSaveUri: out of memory\n");
336 return(NULL);
337 }
338 }
339 ret[len++] = '@';
340 }
341 p = uri->server;
342 while (*p != 0) {
343 if (len >= max) {
344 max *= 2;
345 ret = (xmlChar *) xmlRealloc(ret,
346 (max + 1) * sizeof(xmlChar));
347 if (ret == NULL) {
348 xmlGenericError(xmlGenericErrorContext,
349 "xmlSaveUri: out of memory\n");
350 return(NULL);
351 }
352 }
353 ret[len++] = *p++;
354 }
355 if (uri->port > 0) {
356 if (len + 10 >= max) {
357 max *= 2;
358 ret = (xmlChar *) xmlRealloc(ret,
359 (max + 1) * sizeof(xmlChar));
360 if (ret == NULL) {
361 xmlGenericError(xmlGenericErrorContext,
362 "xmlSaveUri: out of memory\n");
363 return(NULL);
364 }
365 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000366 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000367 }
368 } else if (uri->authority != NULL) {
369 if (len + 3 >= max) {
370 max *= 2;
371 ret = (xmlChar *) xmlRealloc(ret,
372 (max + 1) * sizeof(xmlChar));
373 if (ret == NULL) {
374 xmlGenericError(xmlGenericErrorContext,
375 "xmlSaveUri: out of memory\n");
376 return(NULL);
377 }
378 }
379 ret[len++] = '/';
380 ret[len++] = '/';
381 p = uri->authority;
382 while (*p != 0) {
383 if (len + 3 >= max) {
384 max *= 2;
385 ret = (xmlChar *) xmlRealloc(ret,
386 (max + 1) * sizeof(xmlChar));
387 if (ret == NULL) {
388 xmlGenericError(xmlGenericErrorContext,
389 "xmlSaveUri: out of memory\n");
390 return(NULL);
391 }
392 }
393 if ((IS_UNRESERVED(*(p))) ||
394 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
395 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
396 ((*(p) == '=')) || ((*(p) == '+')))
397 ret[len++] = *p++;
398 else {
399 int val = *(unsigned char *)p++;
400 int hi = val / 0x10, lo = val % 0x10;
401 ret[len++] = '%';
402 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
403 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
404 }
405 }
406 } else if (uri->scheme != NULL) {
407 if (len + 3 >= max) {
408 max *= 2;
409 ret = (xmlChar *) xmlRealloc(ret,
410 (max + 1) * sizeof(xmlChar));
411 if (ret == NULL) {
412 xmlGenericError(xmlGenericErrorContext,
413 "xmlSaveUri: out of memory\n");
414 return(NULL);
415 }
416 }
417 ret[len++] = '/';
418 ret[len++] = '/';
419 }
420 if (uri->path != NULL) {
421 p = uri->path;
422 while (*p != 0) {
423 if (len + 3 >= max) {
424 max *= 2;
425 ret = (xmlChar *) xmlRealloc(ret,
426 (max + 1) * sizeof(xmlChar));
427 if (ret == NULL) {
428 xmlGenericError(xmlGenericErrorContext,
429 "xmlSaveUri: out of memory\n");
430 return(NULL);
431 }
432 }
433 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
434 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
435 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
436 ((*(p) == ',')))
437 ret[len++] = *p++;
438 else {
439 int val = *(unsigned char *)p++;
440 int hi = val / 0x10, lo = val % 0x10;
441 ret[len++] = '%';
442 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
443 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
444 }
445 }
446 }
447 if (uri->query != NULL) {
448 if (len + 3 >= max) {
449 max *= 2;
450 ret = (xmlChar *) xmlRealloc(ret,
451 (max + 1) * sizeof(xmlChar));
452 if (ret == NULL) {
453 xmlGenericError(xmlGenericErrorContext,
454 "xmlSaveUri: out of memory\n");
455 return(NULL);
456 }
457 }
458 ret[len++] = '?';
459 p = uri->query;
460 while (*p != 0) {
461 if (len + 3 >= max) {
462 max *= 2;
463 ret = (xmlChar *) xmlRealloc(ret,
464 (max + 1) * sizeof(xmlChar));
465 if (ret == NULL) {
466 xmlGenericError(xmlGenericErrorContext,
467 "xmlSaveUri: out of memory\n");
468 return(NULL);
469 }
470 }
471 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
472 ret[len++] = *p++;
473 else {
474 int val = *(unsigned char *)p++;
475 int hi = val / 0x10, lo = val % 0x10;
476 ret[len++] = '%';
477 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
478 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
479 }
480 }
481 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000482 }
483 if (uri->fragment != NULL) {
484 if (len + 3 >= max) {
485 max *= 2;
486 ret = (xmlChar *) xmlRealloc(ret,
487 (max + 1) * sizeof(xmlChar));
488 if (ret == NULL) {
489 xmlGenericError(xmlGenericErrorContext,
490 "xmlSaveUri: out of memory\n");
491 return(NULL);
492 }
493 }
494 ret[len++] = '#';
495 p = uri->fragment;
496 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000497 if (len + 3 >= max) {
498 max *= 2;
499 ret = (xmlChar *) xmlRealloc(ret,
500 (max + 1) * sizeof(xmlChar));
501 if (ret == NULL) {
502 xmlGenericError(xmlGenericErrorContext,
503 "xmlSaveUri: out of memory\n");
504 return(NULL);
505 }
506 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000507 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
508 ret[len++] = *p++;
509 else {
510 int val = *(unsigned char *)p++;
511 int hi = val / 0x10, lo = val % 0x10;
512 ret[len++] = '%';
513 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
514 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000515 }
516 }
Owen Taylor3473f882001-02-23 17:55:21 +0000517 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000518 if (len >= max) {
519 max *= 2;
520 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
521 if (ret == NULL) {
522 xmlGenericError(xmlGenericErrorContext,
523 "xmlSaveUri: out of memory\n");
524 return(NULL);
525 }
526 }
527 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000528 return(ret);
529}
530
531/**
532 * xmlPrintURI:
533 * @stream: a FILE* for the output
534 * @uri: pointer to an xmlURI
535 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000536 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000537 */
538void
539xmlPrintURI(FILE *stream, xmlURIPtr uri) {
540 xmlChar *out;
541
542 out = xmlSaveUri(uri);
543 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000544 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000545 xmlFree(out);
546 }
547}
548
549/**
550 * xmlCleanURI:
551 * @uri: pointer to an xmlURI
552 *
553 * Make sure the xmlURI struct is free of content
554 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000555static void
Owen Taylor3473f882001-02-23 17:55:21 +0000556xmlCleanURI(xmlURIPtr uri) {
557 if (uri == NULL) return;
558
559 if (uri->scheme != NULL) xmlFree(uri->scheme);
560 uri->scheme = NULL;
561 if (uri->server != NULL) xmlFree(uri->server);
562 uri->server = NULL;
563 if (uri->user != NULL) xmlFree(uri->user);
564 uri->user = NULL;
565 if (uri->path != NULL) xmlFree(uri->path);
566 uri->path = NULL;
567 if (uri->fragment != NULL) xmlFree(uri->fragment);
568 uri->fragment = NULL;
569 if (uri->opaque != NULL) xmlFree(uri->opaque);
570 uri->opaque = NULL;
571 if (uri->authority != NULL) xmlFree(uri->authority);
572 uri->authority = NULL;
573 if (uri->query != NULL) xmlFree(uri->query);
574 uri->query = NULL;
575}
576
577/**
578 * xmlFreeURI:
579 * @uri: pointer to an xmlURI
580 *
581 * Free up the xmlURI struct
582 */
583void
584xmlFreeURI(xmlURIPtr uri) {
585 if (uri == NULL) return;
586
587 if (uri->scheme != NULL) xmlFree(uri->scheme);
588 if (uri->server != NULL) xmlFree(uri->server);
589 if (uri->user != NULL) xmlFree(uri->user);
590 if (uri->path != NULL) xmlFree(uri->path);
591 if (uri->fragment != NULL) xmlFree(uri->fragment);
592 if (uri->opaque != NULL) xmlFree(uri->opaque);
593 if (uri->authority != NULL) xmlFree(uri->authority);
594 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000595 xmlFree(uri);
596}
597
598/************************************************************************
599 * *
600 * Helper functions *
601 * *
602 ************************************************************************/
603
Owen Taylor3473f882001-02-23 17:55:21 +0000604/**
605 * xmlNormalizeURIPath:
606 * @path: pointer to the path string
607 *
608 * Applies the 5 normalization steps to a path string--that is, RFC 2396
609 * Section 5.2, steps 6.c through 6.g.
610 *
611 * Normalization occurs directly on the string, no new allocation is done
612 *
613 * Returns 0 or an error code
614 */
615int
616xmlNormalizeURIPath(char *path) {
617 char *cur, *out;
618
619 if (path == NULL)
620 return(-1);
621
622 /* Skip all initial "/" chars. We want to get to the beginning of the
623 * first non-empty segment.
624 */
625 cur = path;
626 while (cur[0] == '/')
627 ++cur;
628 if (cur[0] == '\0')
629 return(0);
630
631 /* Keep everything we've seen so far. */
632 out = cur;
633
634 /*
635 * Analyze each segment in sequence for cases (c) and (d).
636 */
637 while (cur[0] != '\0') {
638 /*
639 * c) All occurrences of "./", where "." is a complete path segment,
640 * are removed from the buffer string.
641 */
642 if ((cur[0] == '.') && (cur[1] == '/')) {
643 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000644 /* '//' normalization should be done at this point too */
645 while (cur[0] == '/')
646 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000647 continue;
648 }
649
650 /*
651 * d) If the buffer string ends with "." as a complete path segment,
652 * that "." is removed.
653 */
654 if ((cur[0] == '.') && (cur[1] == '\0'))
655 break;
656
657 /* Otherwise keep the segment. */
658 while (cur[0] != '/') {
659 if (cur[0] == '\0')
660 goto done_cd;
661 (out++)[0] = (cur++)[0];
662 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000663 /* nomalize // */
664 while ((cur[0] == '/') && (cur[1] == '/'))
665 cur++;
666
Owen Taylor3473f882001-02-23 17:55:21 +0000667 (out++)[0] = (cur++)[0];
668 }
669 done_cd:
670 out[0] = '\0';
671
672 /* Reset to the beginning of the first segment for the next sequence. */
673 cur = path;
674 while (cur[0] == '/')
675 ++cur;
676 if (cur[0] == '\0')
677 return(0);
678
679 /*
680 * Analyze each segment in sequence for cases (e) and (f).
681 *
682 * e) All occurrences of "<segment>/../", where <segment> is a
683 * complete path segment not equal to "..", are removed from the
684 * buffer string. Removal of these path segments is performed
685 * iteratively, removing the leftmost matching pattern on each
686 * iteration, until no matching pattern remains.
687 *
688 * f) If the buffer string ends with "<segment>/..", where <segment>
689 * is a complete path segment not equal to "..", that
690 * "<segment>/.." is removed.
691 *
692 * To satisfy the "iterative" clause in (e), we need to collapse the
693 * string every time we find something that needs to be removed. Thus,
694 * we don't need to keep two pointers into the string: we only need a
695 * "current position" pointer.
696 */
697 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000698 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000699
700 /* At the beginning of each iteration of this loop, "cur" points to
701 * the first character of the segment we want to examine.
702 */
703
704 /* Find the end of the current segment. */
705 segp = cur;
706 while ((segp[0] != '/') && (segp[0] != '\0'))
707 ++segp;
708
709 /* If this is the last segment, we're done (we need at least two
710 * segments to meet the criteria for the (e) and (f) cases).
711 */
712 if (segp[0] == '\0')
713 break;
714
715 /* If the first segment is "..", or if the next segment _isn't_ "..",
716 * keep this segment and try the next one.
717 */
718 ++segp;
719 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
720 || ((segp[0] != '.') || (segp[1] != '.')
721 || ((segp[2] != '/') && (segp[2] != '\0')))) {
722 cur = segp;
723 continue;
724 }
725
726 /* If we get here, remove this segment and the next one and back up
727 * to the previous segment (if there is one), to implement the
728 * "iteratively" clause. It's pretty much impossible to back up
729 * while maintaining two pointers into the buffer, so just compact
730 * the whole buffer now.
731 */
732
733 /* If this is the end of the buffer, we're done. */
734 if (segp[2] == '\0') {
735 cur[0] = '\0';
736 break;
737 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000738 /* Valgrind complained, strcpy(cur, segp + 3); */
739 /* string will overlap, do not use strcpy */
740 tmp = cur;
741 segp += 3;
742 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000743
744 /* If there are no previous segments, then keep going from here. */
745 segp = cur;
746 while ((segp > path) && ((--segp)[0] == '/'))
747 ;
748 if (segp == path)
749 continue;
750
751 /* "segp" is pointing to the end of a previous segment; find it's
752 * start. We need to back up to the previous segment and start
753 * over with that to handle things like "foo/bar/../..". If we
754 * don't do this, then on the first pass we'll remove the "bar/..",
755 * but be pointing at the second ".." so we won't realize we can also
756 * remove the "foo/..".
757 */
758 cur = segp;
759 while ((cur > path) && (cur[-1] != '/'))
760 --cur;
761 }
762 out[0] = '\0';
763
764 /*
765 * g) If the resulting buffer string still begins with one or more
766 * complete path segments of "..", then the reference is
767 * considered to be in error. Implementations may handle this
768 * error by retaining these components in the resolved path (i.e.,
769 * treating them as part of the final URI), by removing them from
770 * the resolved path (i.e., discarding relative levels above the
771 * root), or by avoiding traversal of the reference.
772 *
773 * We discard them from the final path.
774 */
775 if (path[0] == '/') {
776 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000777 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000778 && ((cur[3] == '/') || (cur[3] == '\0')))
779 cur += 3;
780
781 if (cur != path) {
782 out = path;
783 while (cur[0] != '\0')
784 (out++)[0] = (cur++)[0];
785 out[0] = 0;
786 }
787 }
788
789 return(0);
790}
Owen Taylor3473f882001-02-23 17:55:21 +0000791
Daniel Veillard966a31e2004-05-09 02:58:44 +0000792static int is_hex(char c) {
793 if (((c >= '0') && (c <= '9')) ||
794 ((c >= 'a') && (c <= 'f')) ||
795 ((c >= 'A') && (c <= 'F')))
796 return(1);
797 return(0);
798}
799
Owen Taylor3473f882001-02-23 17:55:21 +0000800/**
801 * xmlURIUnescapeString:
802 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000803 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000805 *
806 * Unescaping routine, does not do validity checks !
807 * Output is direct unsigned char translation of %XX values (no encoding)
808 *
809 * Returns an copy of the string, but unescaped
810 */
811char *
812xmlURIUnescapeString(const char *str, int len, char *target) {
813 char *ret, *out;
814 const char *in;
815
816 if (str == NULL)
817 return(NULL);
818 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000819 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000820
821 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000822 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000823 if (ret == NULL) {
824 xmlGenericError(xmlGenericErrorContext,
825 "xmlURIUnescapeString: out of memory\n");
826 return(NULL);
827 }
828 } else
829 ret = target;
830 in = str;
831 out = ret;
832 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000833 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000834 in++;
835 if ((*in >= '0') && (*in <= '9'))
836 *out = (*in - '0');
837 else if ((*in >= 'a') && (*in <= 'f'))
838 *out = (*in - 'a') + 10;
839 else if ((*in >= 'A') && (*in <= 'F'))
840 *out = (*in - 'A') + 10;
841 in++;
842 if ((*in >= '0') && (*in <= '9'))
843 *out = *out * 16 + (*in - '0');
844 else if ((*in >= 'a') && (*in <= 'f'))
845 *out = *out * 16 + (*in - 'a') + 10;
846 else if ((*in >= 'A') && (*in <= 'F'))
847 *out = *out * 16 + (*in - 'A') + 10;
848 in++;
849 len -= 3;
850 out++;
851 } else {
852 *out++ = *in++;
853 len--;
854 }
855 }
856 *out = 0;
857 return(ret);
858}
859
860/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000861 * xmlURIEscapeStr:
862 * @str: string to escape
863 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000864 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000865 * This routine escapes a string to hex, ignoring reserved characters (a-z)
866 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000867 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000868 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000869 */
870xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000871xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
872 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000873 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000874
Owen Taylor3473f882001-02-23 17:55:21 +0000875 unsigned int len, out;
876
877 if (str == NULL)
878 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000879 if (str[0] == 0)
880 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000881 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000882 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000883
884 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000885 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000886 if (ret == NULL) {
887 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000888 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000889 return(NULL);
890 }
891 in = (const xmlChar *) str;
892 out = 0;
893 while(*in != 0) {
894 if (len - out <= 3) {
895 len += 20;
896 ret = (xmlChar *) xmlRealloc(ret, len);
897 if (ret == NULL) {
898 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000899 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000900 return(NULL);
901 }
902 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000903
904 ch = *in;
905
Daniel Veillardeb475a32002-04-14 22:00:22 +0000906 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000907 unsigned char val;
908 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000909 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000910 if (val <= 9)
911 ret[out++] = '0' + val;
912 else
913 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000914 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000915 if (val <= 9)
916 ret[out++] = '0' + val;
917 else
918 ret[out++] = 'A' + val - 0xA;
919 in++;
920 } else {
921 ret[out++] = *in++;
922 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000923
Owen Taylor3473f882001-02-23 17:55:21 +0000924 }
925 ret[out] = 0;
926 return(ret);
927}
928
Daniel Veillard8514c672001-05-23 10:29:12 +0000929/**
930 * xmlURIEscape:
931 * @str: the string of the URI to escape
932 *
933 * Escaping routine, does not do validity checks !
934 * It will try to escape the chars needing this, but this is heuristic
935 * based it's impossible to be sure.
936 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000937 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000938 *
939 * 25 May 2001
940 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
941 * according to RFC2396.
942 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000943 */
944xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000945xmlURIEscape(const xmlChar * str)
946{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000947 xmlChar *ret, *segment = NULL;
948 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000949 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000950
Daniel Veillard6278fb52001-05-25 07:38:41 +0000951#define NULLCHK(p) if(!p) { \
952 xmlGenericError(xmlGenericErrorContext, \
953 "xmlURIEscape: out of memory\n"); \
954 return NULL; }
955
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000956 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000957 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000958
959 uri = xmlCreateURI();
960 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000961 /*
962 * Allow escaping errors in the unescaped form
963 */
964 uri->cleanup = 1;
965 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000966 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000967 xmlFreeURI(uri);
968 return (NULL);
969 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000970 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000971
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000972 if (!uri)
973 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000974
975 ret = NULL;
976
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000977 if (uri->scheme) {
978 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
979 NULLCHK(segment)
980 ret = xmlStrcat(ret, segment);
981 ret = xmlStrcat(ret, BAD_CAST ":");
982 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000983 }
984
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000985 if (uri->authority) {
986 segment =
987 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
988 NULLCHK(segment)
989 ret = xmlStrcat(ret, BAD_CAST "//");
990 ret = xmlStrcat(ret, segment);
991 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000992 }
993
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000994 if (uri->user) {
995 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
996 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +0000997 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000998 ret = xmlStrcat(ret, segment);
999 ret = xmlStrcat(ret, BAD_CAST "@");
1000 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001001 }
1002
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001003 if (uri->server) {
1004 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1005 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001006 if (uri->user == NULL)
1007 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001008 ret = xmlStrcat(ret, segment);
1009 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001010 }
1011
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001012 if (uri->port) {
1013 xmlChar port[10];
1014
Daniel Veillard43d3f612001-11-10 11:57:23 +00001015 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001016 ret = xmlStrcat(ret, BAD_CAST ":");
1017 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001018 }
1019
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001020 if (uri->path) {
1021 segment =
1022 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1023 NULLCHK(segment)
1024 ret = xmlStrcat(ret, segment);
1025 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001026 }
1027
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001028 if (uri->query) {
1029 segment =
1030 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1031 NULLCHK(segment)
1032 ret = xmlStrcat(ret, BAD_CAST "?");
1033 ret = xmlStrcat(ret, segment);
1034 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001035 }
1036
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001037 if (uri->opaque) {
1038 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1039 NULLCHK(segment)
1040 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001041 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001042 }
1043
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001044 if (uri->fragment) {
1045 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1046 NULLCHK(segment)
1047 ret = xmlStrcat(ret, BAD_CAST "#");
1048 ret = xmlStrcat(ret, segment);
1049 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001050 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001051
1052 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001053#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001054
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001055 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001056}
1057
Owen Taylor3473f882001-02-23 17:55:21 +00001058/************************************************************************
1059 * *
1060 * Escaped URI parsing *
1061 * *
1062 ************************************************************************/
1063
1064/**
1065 * xmlParseURIFragment:
1066 * @uri: pointer to an URI structure
1067 * @str: pointer to the string to analyze
1068 *
1069 * Parse an URI fragment string and fills in the appropriate fields
1070 * of the @uri structure.
1071 *
1072 * fragment = *uric
1073 *
1074 * Returns 0 or the error code
1075 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001076static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001077xmlParseURIFragment(xmlURIPtr uri, const char **str)
1078{
Owen Taylor3473f882001-02-23 17:55:21 +00001079 const char *cur = *str;
1080
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001081 if (str == NULL)
1082 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001083
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001084 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001085 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001086 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001087 if (uri->fragment != NULL)
1088 xmlFree(uri->fragment);
1089 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001090 }
1091 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001092 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001093}
1094
1095/**
1096 * xmlParseURIQuery:
1097 * @uri: pointer to an URI structure
1098 * @str: pointer to the string to analyze
1099 *
1100 * Parse the query part of an URI
1101 *
1102 * query = *uric
1103 *
1104 * Returns 0 or the error code
1105 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001106static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001107xmlParseURIQuery(xmlURIPtr uri, const char **str)
1108{
Owen Taylor3473f882001-02-23 17:55:21 +00001109 const char *cur = *str;
1110
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001111 if (str == NULL)
1112 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001113
Daniel Veillard9231ff92003-03-23 22:00:51 +00001114 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001115 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001116 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001117 if (uri->query != NULL)
1118 xmlFree(uri->query);
1119 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001120 }
1121 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001122 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001123}
1124
1125/**
1126 * xmlParseURIScheme:
1127 * @uri: pointer to an URI structure
1128 * @str: pointer to the string to analyze
1129 *
1130 * Parse an URI scheme
1131 *
1132 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1133 *
1134 * Returns 0 or the error code
1135 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001136static int
Owen Taylor3473f882001-02-23 17:55:21 +00001137xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1138 const char *cur;
1139
1140 if (str == NULL)
1141 return(-1);
1142
1143 cur = *str;
1144 if (!IS_ALPHA(*cur))
1145 return(2);
1146 cur++;
1147 while (IS_SCHEME(*cur)) cur++;
1148 if (uri != NULL) {
1149 if (uri->scheme != NULL) xmlFree(uri->scheme);
1150 /* !!! strndup */
1151 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1152 }
1153 *str = cur;
1154 return(0);
1155}
1156
1157/**
1158 * xmlParseURIOpaquePart:
1159 * @uri: pointer to an URI structure
1160 * @str: pointer to the string to analyze
1161 *
1162 * Parse an URI opaque part
1163 *
1164 * opaque_part = uric_no_slash *uric
1165 *
1166 * Returns 0 or the error code
1167 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001168static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001169xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1170{
Owen Taylor3473f882001-02-23 17:55:21 +00001171 const char *cur;
1172
1173 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001174 return (-1);
1175
Owen Taylor3473f882001-02-23 17:55:21 +00001176 cur = *str;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001177 if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001178 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001179 }
1180 NEXT(cur);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001181 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001182 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001183 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001184 if (uri->opaque != NULL)
1185 xmlFree(uri->opaque);
1186 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001187 }
1188 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001189 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001190}
1191
1192/**
1193 * xmlParseURIServer:
1194 * @uri: pointer to an URI structure
1195 * @str: pointer to the string to analyze
1196 *
1197 * Parse a server subpart of an URI, it's a finer grain analysis
1198 * of the authority part.
1199 *
1200 * server = [ [ userinfo "@" ] hostport ]
1201 * userinfo = *( unreserved | escaped |
1202 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1203 * hostport = host [ ":" port ]
William M. Brack015ccb22005-02-13 08:18:52 +00001204 * host = hostname | IPv4address | IPv6reference
Owen Taylor3473f882001-02-23 17:55:21 +00001205 * hostname = *( domainlabel "." ) toplabel [ "." ]
1206 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1207 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
William M. Brack015ccb22005-02-13 08:18:52 +00001208 * IPv6reference = "[" IPv6address "]"
1209 * IPv6address = hexpart [ ":" IPv4address ]
1210 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1211 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1212 * hexseq = hex4 *( ":" hex4)
1213 * hex4 = 1*4hexdig
Owen Taylor3473f882001-02-23 17:55:21 +00001214 * port = *digit
1215 *
1216 * Returns 0 or the error code
1217 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001218static int
Owen Taylor3473f882001-02-23 17:55:21 +00001219xmlParseURIServer(xmlURIPtr uri, const char **str) {
1220 const char *cur;
1221 const char *host, *tmp;
William M. Brack015ccb22005-02-13 08:18:52 +00001222 const int IPV4max = 4;
1223 const int IPV6max = 8;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001224 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001225
1226 if (str == NULL)
1227 return(-1);
1228
1229 cur = *str;
1230
1231 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001232 * is there a userinfo ?
Owen Taylor3473f882001-02-23 17:55:21 +00001233 */
1234 while (IS_USERINFO(cur)) NEXT(cur);
1235 if (*cur == '@') {
1236 if (uri != NULL) {
1237 if (uri->user != NULL) xmlFree(uri->user);
1238 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1239 }
1240 cur++;
1241 } else {
1242 if (uri != NULL) {
1243 if (uri->user != NULL) xmlFree(uri->user);
1244 uri->user = NULL;
1245 }
1246 cur = *str;
1247 }
1248 /*
1249 * This can be empty in the case where there is no server
1250 */
1251 host = cur;
1252 if (*cur == '/') {
1253 if (uri != NULL) {
1254 if (uri->authority != NULL) xmlFree(uri->authority);
1255 uri->authority = NULL;
1256 if (uri->server != NULL) xmlFree(uri->server);
1257 uri->server = NULL;
1258 uri->port = 0;
1259 }
1260 return(0);
1261 }
1262 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001263 * host part of hostport can denote an IPV4 address, an IPV6 address
1264 * or an unresolved name. Check the IP first, its easier to detect
1265 * errors if wrong one.
1266 * An IPV6 address must start with a '[' and end with a ']'.
Owen Taylor3473f882001-02-23 17:55:21 +00001267 */
William M. Brack015ccb22005-02-13 08:18:52 +00001268 if (*cur == '[') {
1269 int compress=0;
1270 cur++;
1271 for (oct = 0; oct < IPV6max; ++oct) {
1272 if (*cur == ':') {
1273 if (compress)
1274 return(3); /* multiple compression attempted */
1275 if (!oct) { /* initial char is compression */
1276 if (*++cur != ':')
1277 return(3);
1278 }
1279 compress = 1; /* set compression-encountered flag */
1280 cur++; /* skip over the second ':' */
1281 continue;
1282 }
1283 while(IS_HEX(*cur)) cur++;
1284 if (oct == (IPV6max-1))
1285 continue;
1286 if (*cur != ':')
1287 break;
1288 cur++;
1289 }
1290 if ((!compress) && (oct != IPV6max))
1291 return(3);
1292 if (*cur != ']')
1293 return(3);
1294 if (uri != NULL) {
1295 if (uri->server != NULL) xmlFree(uri->server);
1296 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1297 (cur-host)-1);
1298 }
1299 cur++;
1300 } else {
1301 /*
1302 * Not IPV6, maybe IPV4
1303 */
1304 for (oct = 0; oct < IPV4max; ++oct) {
1305 if (*cur == '.')
1306 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1307 while(IS_DIGIT(*cur)) cur++;
1308 if (oct == (IPV4max-1))
1309 continue;
1310 if (*cur != '.')
1311 break;
1312 cur++;
1313 }
Owen Taylor3473f882001-02-23 17:55:21 +00001314 }
William M. Brack015ccb22005-02-13 08:18:52 +00001315 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1316 IS_ALPHA(*cur))) {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001317 /* maybe host_name */
1318 if (!IS_ALPHANUM(*cur))
1319 return(4); /* e.g. http://xml.$oft */
1320 do {
1321 do ++cur; while (IS_ALPHANUM(*cur));
1322 if (*cur == '-') {
1323 --cur;
1324 if (*cur == '.')
1325 return(5); /* e.g. http://xml.-soft */
1326 ++cur;
1327 continue;
1328 }
1329 if (*cur == '.') {
1330 --cur;
1331 if (*cur == '-')
1332 return(6); /* e.g. http://xml-.soft */
1333 if (*cur == '.')
1334 return(7); /* e.g. http://xml..soft */
1335 ++cur;
1336 continue;
1337 }
1338 break;
1339 } while (1);
1340 tmp = cur;
1341 if (tmp[-1] == '.')
1342 --tmp; /* e.g. http://xml.$Oft/ */
1343 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1344 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1345 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001346 }
Owen Taylor3473f882001-02-23 17:55:21 +00001347 if (uri != NULL) {
1348 if (uri->authority != NULL) xmlFree(uri->authority);
1349 uri->authority = NULL;
William M. Brack015ccb22005-02-13 08:18:52 +00001350 if (host[0] != '[') { /* it's not an IPV6 addr */
1351 if (uri->server != NULL) xmlFree(uri->server);
1352 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1353 }
Owen Taylor3473f882001-02-23 17:55:21 +00001354 }
Owen Taylor3473f882001-02-23 17:55:21 +00001355 /*
1356 * finish by checking for a port presence.
1357 */
1358 if (*cur == ':') {
1359 cur++;
1360 if (IS_DIGIT(*cur)) {
1361 if (uri != NULL)
1362 uri->port = 0;
1363 while (IS_DIGIT(*cur)) {
1364 if (uri != NULL)
1365 uri->port = uri->port * 10 + (*cur - '0');
1366 cur++;
1367 }
1368 }
1369 }
1370 *str = cur;
1371 return(0);
1372}
1373
1374/**
1375 * xmlParseURIRelSegment:
1376 * @uri: pointer to an URI structure
1377 * @str: pointer to the string to analyze
1378 *
1379 * Parse an URI relative segment
1380 *
1381 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1382 * "+" | "$" | "," )
1383 *
1384 * Returns 0 or the error code
1385 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001386static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001387xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1388{
Owen Taylor3473f882001-02-23 17:55:21 +00001389 const char *cur;
1390
1391 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001392 return (-1);
1393
Owen Taylor3473f882001-02-23 17:55:21 +00001394 cur = *str;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001395 if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001396 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001397 }
1398 NEXT(cur);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001399 while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001400 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001401 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001402 if (uri->path != NULL)
1403 xmlFree(uri->path);
1404 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001405 }
1406 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001407 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001408}
1409
1410/**
1411 * xmlParseURIPathSegments:
1412 * @uri: pointer to an URI structure
1413 * @str: pointer to the string to analyze
1414 * @slash: should we add a leading slash
1415 *
1416 * Parse an URI set of path segments
1417 *
1418 * path_segments = segment *( "/" segment )
1419 * segment = *pchar *( ";" param )
1420 * param = *pchar
1421 *
1422 * Returns 0 or the error code
1423 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001424static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001425xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1426{
Owen Taylor3473f882001-02-23 17:55:21 +00001427 const char *cur;
1428
1429 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001430 return (-1);
1431
Owen Taylor3473f882001-02-23 17:55:21 +00001432 cur = *str;
1433
1434 do {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001435 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001436 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001437 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001438 cur++;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001439 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001440 NEXT(cur);
1441 }
1442 if (*cur != '/')
1443 break;
1444 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001445 } while (1);
1446 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001447 int len, len2 = 0;
1448 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001449
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001450 /*
1451 * Concat the set of path segments to the current path
1452 */
1453 len = cur - *str;
1454 if (slash)
1455 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001456
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001457 if (uri->path != NULL) {
1458 len2 = strlen(uri->path);
1459 len += len2;
1460 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001461 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001462 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001463 xmlGenericError(xmlGenericErrorContext,
1464 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001465 *str = cur;
1466 return (-1);
1467 }
1468 if (uri->path != NULL)
1469 memcpy(path, uri->path, len2);
1470 if (slash) {
1471 path[len2] = '/';
1472 len2++;
1473 }
1474 path[len2] = 0;
1475 if (cur - *str > 0)
1476 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1477 if (uri->path != NULL)
1478 xmlFree(uri->path);
1479 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001480 }
1481 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001482 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001483}
1484
1485/**
1486 * xmlParseURIAuthority:
1487 * @uri: pointer to an URI structure
1488 * @str: pointer to the string to analyze
1489 *
1490 * Parse the authority part of an URI.
1491 *
1492 * authority = server | reg_name
1493 * server = [ [ userinfo "@" ] hostport ]
1494 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1495 * "@" | "&" | "=" | "+" )
1496 *
1497 * Note : this is completely ambiguous since reg_name is allowed to
1498 * use the full set of chars in use by server:
1499 *
1500 * 3.2.1. Registry-based Naming Authority
1501 *
1502 * The structure of a registry-based naming authority is specific
1503 * to the URI scheme, but constrained to the allowed characters
1504 * for an authority component.
1505 *
1506 * Returns 0 or the error code
1507 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001508static int
Owen Taylor3473f882001-02-23 17:55:21 +00001509xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1510 const char *cur;
1511 int ret;
1512
1513 if (str == NULL)
1514 return(-1);
1515
1516 cur = *str;
1517
1518 /*
1519 * try first to parse it as a server string.
1520 */
1521 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001522 if ((ret == 0) && (*str != NULL) &&
1523 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001524 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001525 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001526
1527 /*
1528 * failed, fallback to reg_name
1529 */
1530 if (!IS_REG_NAME(cur)) {
1531 return(5);
1532 }
1533 NEXT(cur);
1534 while (IS_REG_NAME(cur)) NEXT(cur);
1535 if (uri != NULL) {
1536 if (uri->server != NULL) xmlFree(uri->server);
1537 uri->server = NULL;
1538 if (uri->user != NULL) xmlFree(uri->user);
1539 uri->user = NULL;
1540 if (uri->authority != NULL) xmlFree(uri->authority);
1541 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1542 }
1543 *str = cur;
1544 return(0);
1545}
1546
1547/**
1548 * xmlParseURIHierPart:
1549 * @uri: pointer to an URI structure
1550 * @str: pointer to the string to analyze
1551 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001552 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001553 *
1554 * hier_part = ( net_path | abs_path ) [ "?" query ]
1555 * abs_path = "/" path_segments
1556 * net_path = "//" authority [ abs_path ]
1557 *
1558 * Returns 0 or the error code
1559 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001560static int
Owen Taylor3473f882001-02-23 17:55:21 +00001561xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1562 int ret;
1563 const char *cur;
1564
1565 if (str == NULL)
1566 return(-1);
1567
1568 cur = *str;
1569
1570 if ((cur[0] == '/') && (cur[1] == '/')) {
1571 cur += 2;
1572 ret = xmlParseURIAuthority(uri, &cur);
1573 if (ret != 0)
1574 return(ret);
1575 if (cur[0] == '/') {
1576 cur++;
1577 ret = xmlParseURIPathSegments(uri, &cur, 1);
1578 }
1579 } else if (cur[0] == '/') {
1580 cur++;
1581 ret = xmlParseURIPathSegments(uri, &cur, 1);
1582 } else {
1583 return(4);
1584 }
1585 if (ret != 0)
1586 return(ret);
1587 if (*cur == '?') {
1588 cur++;
1589 ret = xmlParseURIQuery(uri, &cur);
1590 if (ret != 0)
1591 return(ret);
1592 }
1593 *str = cur;
1594 return(0);
1595}
1596
1597/**
1598 * xmlParseAbsoluteURI:
1599 * @uri: pointer to an URI structure
1600 * @str: pointer to the string to analyze
1601 *
1602 * Parse an URI reference string and fills in the appropriate fields
1603 * of the @uri structure
1604 *
1605 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1606 *
1607 * Returns 0 or the error code
1608 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001609static int
Owen Taylor3473f882001-02-23 17:55:21 +00001610xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1611 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001612 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001613
1614 if (str == NULL)
1615 return(-1);
1616
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001617 cur = *str;
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 ret = xmlParseURIScheme(uri, str);
1620 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001621 if (**str != ':') {
1622 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001623 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001624 }
Owen Taylor3473f882001-02-23 17:55:21 +00001625 (*str)++;
1626 if (**str == '/')
1627 return(xmlParseURIHierPart(uri, str));
1628 return(xmlParseURIOpaquePart(uri, str));
1629}
1630
1631/**
1632 * xmlParseRelativeURI:
1633 * @uri: pointer to an URI structure
1634 * @str: pointer to the string to analyze
1635 *
1636 * Parse an relative URI string and fills in the appropriate fields
1637 * of the @uri structure
1638 *
1639 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1640 * abs_path = "/" path_segments
1641 * net_path = "//" authority [ abs_path ]
1642 * rel_path = rel_segment [ abs_path ]
1643 *
1644 * Returns 0 or the error code
1645 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001646static int
Owen Taylor3473f882001-02-23 17:55:21 +00001647xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1648 int ret = 0;
1649 const char *cur;
1650
1651 if (str == NULL)
1652 return(-1);
1653
1654 cur = *str;
1655 if ((cur[0] == '/') && (cur[1] == '/')) {
1656 cur += 2;
1657 ret = xmlParseURIAuthority(uri, &cur);
1658 if (ret != 0)
1659 return(ret);
1660 if (cur[0] == '/') {
1661 cur++;
1662 ret = xmlParseURIPathSegments(uri, &cur, 1);
1663 }
1664 } else if (cur[0] == '/') {
1665 cur++;
1666 ret = xmlParseURIPathSegments(uri, &cur, 1);
1667 } else if (cur[0] != '#' && cur[0] != '?') {
1668 ret = xmlParseURIRelSegment(uri, &cur);
1669 if (ret != 0)
1670 return(ret);
1671 if (cur[0] == '/') {
1672 cur++;
1673 ret = xmlParseURIPathSegments(uri, &cur, 1);
1674 }
1675 }
1676 if (ret != 0)
1677 return(ret);
1678 if (*cur == '?') {
1679 cur++;
1680 ret = xmlParseURIQuery(uri, &cur);
1681 if (ret != 0)
1682 return(ret);
1683 }
1684 *str = cur;
1685 return(ret);
1686}
1687
1688/**
1689 * xmlParseURIReference:
1690 * @uri: pointer to an URI structure
1691 * @str: the string to analyze
1692 *
1693 * Parse an URI reference string and fills in the appropriate fields
1694 * of the @uri structure
1695 *
1696 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1697 *
1698 * Returns 0 or the error code
1699 */
1700int
1701xmlParseURIReference(xmlURIPtr uri, const char *str) {
1702 int ret;
1703 const char *tmp = str;
1704
1705 if (str == NULL)
1706 return(-1);
1707 xmlCleanURI(uri);
1708
1709 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001710 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001711 * it fails.
1712 */
1713 ret = xmlParseAbsoluteURI(uri, &str);
1714 if (ret != 0) {
1715 xmlCleanURI(uri);
1716 str = tmp;
1717 ret = xmlParseRelativeURI(uri, &str);
1718 }
1719 if (ret != 0) {
1720 xmlCleanURI(uri);
1721 return(ret);
1722 }
1723
1724 if (*str == '#') {
1725 str++;
1726 ret = xmlParseURIFragment(uri, &str);
1727 if (ret != 0) return(ret);
1728 }
1729 if (*str != 0) {
1730 xmlCleanURI(uri);
1731 return(1);
1732 }
1733 return(0);
1734}
1735
1736/**
1737 * xmlParseURI:
1738 * @str: the URI string to analyze
1739 *
1740 * Parse an URI
1741 *
1742 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1743 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001744 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001745 */
1746xmlURIPtr
1747xmlParseURI(const char *str) {
1748 xmlURIPtr uri;
1749 int ret;
1750
1751 if (str == NULL)
1752 return(NULL);
1753 uri = xmlCreateURI();
1754 if (uri != NULL) {
1755 ret = xmlParseURIReference(uri, str);
1756 if (ret) {
1757 xmlFreeURI(uri);
1758 return(NULL);
1759 }
1760 }
1761 return(uri);
1762}
1763
1764/************************************************************************
1765 * *
1766 * Public functions *
1767 * *
1768 ************************************************************************/
1769
1770/**
1771 * xmlBuildURI:
1772 * @URI: the URI instance found in the document
1773 * @base: the base value
1774 *
1775 * Computes he final URI of the reference done by checking that
1776 * the given URI is valid, and building the final URI using the
1777 * base URI. This is processed according to section 5.2 of the
1778 * RFC 2396
1779 *
1780 * 5.2. Resolving Relative References to Absolute Form
1781 *
1782 * Returns a new URI string (to be freed by the caller) or NULL in case
1783 * of error.
1784 */
1785xmlChar *
1786xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1787 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001789 xmlURIPtr ref = NULL;
1790 xmlURIPtr bas = NULL;
1791 xmlURIPtr res = NULL;
1792
1793 /*
1794 * 1) The URI reference is parsed into the potential four components and
1795 * fragment identifier, as described in Section 4.3.
1796 *
1797 * NOTE that a completely empty URI is treated by modern browsers
1798 * as a reference to "." rather than as a synonym for the current
1799 * URI. Should we do that here?
1800 */
1801 if (URI == NULL)
1802 ret = -1;
1803 else {
1804 if (*URI) {
1805 ref = xmlCreateURI();
1806 if (ref == NULL)
1807 goto done;
1808 ret = xmlParseURIReference(ref, (const char *) URI);
1809 }
1810 else
1811 ret = 0;
1812 }
1813 if (ret != 0)
1814 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001815 if ((ref != NULL) && (ref->scheme != NULL)) {
1816 /*
1817 * The URI is absolute don't modify.
1818 */
1819 val = xmlStrdup(URI);
1820 goto done;
1821 }
Owen Taylor3473f882001-02-23 17:55:21 +00001822 if (base == NULL)
1823 ret = -1;
1824 else {
1825 bas = xmlCreateURI();
1826 if (bas == NULL)
1827 goto done;
1828 ret = xmlParseURIReference(bas, (const char *) base);
1829 }
1830 if (ret != 0) {
1831 if (ref)
1832 val = xmlSaveUri(ref);
1833 goto done;
1834 }
1835 if (ref == NULL) {
1836 /*
1837 * the base fragment must be ignored
1838 */
1839 if (bas->fragment != NULL) {
1840 xmlFree(bas->fragment);
1841 bas->fragment = NULL;
1842 }
1843 val = xmlSaveUri(bas);
1844 goto done;
1845 }
1846
1847 /*
1848 * 2) If the path component is empty and the scheme, authority, and
1849 * query components are undefined, then it is a reference to the
1850 * current document and we are done. Otherwise, the reference URI's
1851 * query and fragment components are defined as found (or not found)
1852 * within the URI reference and not inherited from the base URI.
1853 *
1854 * NOTE that in modern browsers, the parsing differs from the above
1855 * in the following aspect: the query component is allowed to be
1856 * defined while still treating this as a reference to the current
1857 * document.
1858 */
1859 res = xmlCreateURI();
1860 if (res == NULL)
1861 goto done;
1862 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1863 ((ref->authority == NULL) && (ref->server == NULL))) {
1864 if (bas->scheme != NULL)
1865 res->scheme = xmlMemStrdup(bas->scheme);
1866 if (bas->authority != NULL)
1867 res->authority = xmlMemStrdup(bas->authority);
1868 else if (bas->server != NULL) {
1869 res->server = xmlMemStrdup(bas->server);
1870 if (bas->user != NULL)
1871 res->user = xmlMemStrdup(bas->user);
1872 res->port = bas->port;
1873 }
1874 if (bas->path != NULL)
1875 res->path = xmlMemStrdup(bas->path);
1876 if (ref->query != NULL)
1877 res->query = xmlMemStrdup(ref->query);
1878 else if (bas->query != NULL)
1879 res->query = xmlMemStrdup(bas->query);
1880 if (ref->fragment != NULL)
1881 res->fragment = xmlMemStrdup(ref->fragment);
1882 goto step_7;
1883 }
Owen Taylor3473f882001-02-23 17:55:21 +00001884
1885 /*
1886 * 3) If the scheme component is defined, indicating that the reference
1887 * starts with a scheme name, then the reference is interpreted as an
1888 * absolute URI and we are done. Otherwise, the reference URI's
1889 * scheme is inherited from the base URI's scheme component.
1890 */
1891 if (ref->scheme != NULL) {
1892 val = xmlSaveUri(ref);
1893 goto done;
1894 }
1895 if (bas->scheme != NULL)
1896 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001897
1898 if (ref->query != NULL)
1899 res->query = xmlMemStrdup(ref->query);
1900 if (ref->fragment != NULL)
1901 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001902
1903 /*
1904 * 4) If the authority component is defined, then the reference is a
1905 * network-path and we skip to step 7. Otherwise, the reference
1906 * URI's authority is inherited from the base URI's authority
1907 * component, which will also be undefined if the URI scheme does not
1908 * use an authority component.
1909 */
1910 if ((ref->authority != NULL) || (ref->server != NULL)) {
1911 if (ref->authority != NULL)
1912 res->authority = xmlMemStrdup(ref->authority);
1913 else {
1914 res->server = xmlMemStrdup(ref->server);
1915 if (ref->user != NULL)
1916 res->user = xmlMemStrdup(ref->user);
1917 res->port = ref->port;
1918 }
1919 if (ref->path != NULL)
1920 res->path = xmlMemStrdup(ref->path);
1921 goto step_7;
1922 }
1923 if (bas->authority != NULL)
1924 res->authority = xmlMemStrdup(bas->authority);
1925 else if (bas->server != NULL) {
1926 res->server = xmlMemStrdup(bas->server);
1927 if (bas->user != NULL)
1928 res->user = xmlMemStrdup(bas->user);
1929 res->port = bas->port;
1930 }
1931
1932 /*
1933 * 5) If the path component begins with a slash character ("/"), then
1934 * the reference is an absolute-path and we skip to step 7.
1935 */
1936 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1937 res->path = xmlMemStrdup(ref->path);
1938 goto step_7;
1939 }
1940
1941
1942 /*
1943 * 6) If this step is reached, then we are resolving a relative-path
1944 * reference. The relative path needs to be merged with the base
1945 * URI's path. Although there are many ways to do this, we will
1946 * describe a simple method using a separate string buffer.
1947 *
1948 * Allocate a buffer large enough for the result string.
1949 */
1950 len = 2; /* extra / and 0 */
1951 if (ref->path != NULL)
1952 len += strlen(ref->path);
1953 if (bas->path != NULL)
1954 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001955 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001956 if (res->path == NULL) {
1957 xmlGenericError(xmlGenericErrorContext,
1958 "xmlBuildURI: out of memory\n");
1959 goto done;
1960 }
1961 res->path[0] = 0;
1962
1963 /*
1964 * a) All but the last segment of the base URI's path component is
1965 * copied to the buffer. In other words, any characters after the
1966 * last (right-most) slash character, if any, are excluded.
1967 */
1968 cur = 0;
1969 out = 0;
1970 if (bas->path != NULL) {
1971 while (bas->path[cur] != 0) {
1972 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1973 cur++;
1974 if (bas->path[cur] == 0)
1975 break;
1976
1977 cur++;
1978 while (out < cur) {
1979 res->path[out] = bas->path[out];
1980 out++;
1981 }
1982 }
1983 }
1984 res->path[out] = 0;
1985
1986 /*
1987 * b) The reference's path component is appended to the buffer
1988 * string.
1989 */
1990 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001991 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001992 /*
1993 * Ensure the path includes a '/'
1994 */
1995 if ((out == 0) && (bas->server != NULL))
1996 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001997 while (ref->path[indx] != 0) {
1998 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00001999 }
2000 }
2001 res->path[out] = 0;
2002
2003 /*
2004 * Steps c) to h) are really path normalization steps
2005 */
2006 xmlNormalizeURIPath(res->path);
2007
2008step_7:
2009
2010 /*
2011 * 7) The resulting URI components, including any inherited from the
2012 * base URI, are recombined to give the absolute form of the URI
2013 * reference.
2014 */
2015 val = xmlSaveUri(res);
2016
2017done:
2018 if (ref != NULL)
2019 xmlFreeURI(ref);
2020 if (bas != NULL)
2021 xmlFreeURI(bas);
2022 if (res != NULL)
2023 xmlFreeURI(res);
2024 return(val);
2025}
2026
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002027/**
William M. Brackf7789b12004-06-07 08:57:27 +00002028 * xmlBuildRelativeURI:
2029 * @URI: the URI reference under consideration
2030 * @base: the base value
2031 *
2032 * Expresses the URI of the reference in terms relative to the
2033 * base. Some examples of this operation include:
2034 * base = "http://site1.com/docs/book1.html"
2035 * URI input URI returned
2036 * docs/pic1.gif pic1.gif
2037 * docs/img/pic1.gif img/pic1.gif
2038 * img/pic1.gif ../img/pic1.gif
2039 * http://site1.com/docs/pic1.gif pic1.gif
2040 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2041 *
2042 * base = "docs/book1.html"
2043 * URI input URI returned
2044 * docs/pic1.gif pic1.gif
2045 * docs/img/pic1.gif img/pic1.gif
2046 * img/pic1.gif ../img/pic1.gif
2047 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2048 *
2049 *
2050 * Note: if the URI reference is really wierd or complicated, it may be
2051 * worthwhile to first convert it into a "nice" one by calling
2052 * xmlBuildURI (using 'base') before calling this routine,
2053 * since this routine (for reasonable efficiency) assumes URI has
2054 * already been through some validation.
2055 *
2056 * Returns a new URI string (to be freed by the caller) or NULL in case
2057 * error.
2058 */
2059xmlChar *
2060xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2061{
2062 xmlChar *val = NULL;
2063 int ret;
2064 int ix;
2065 int pos = 0;
2066 int nbslash = 0;
2067 xmlURIPtr ref = NULL;
2068 xmlURIPtr bas = NULL;
2069 xmlChar *bptr, *uptr, *vptr;
2070
2071 if ((URI == NULL) || (*URI == 0))
2072 return NULL;
2073 /*
2074 * Special case - if URI starts with '.', we assume it's already
2075 * in relative form, so nothing to do.
2076 */
2077 if (*URI == '.') {
2078 val = xmlStrdup (URI);
2079 goto done;
2080 }
2081
2082 /*
2083 * First parse URI into a standard form
2084 */
2085 ref = xmlCreateURI ();
2086 if (ref == NULL)
2087 return NULL;
2088 ret = xmlParseURIReference (ref, (const char *) URI);
2089 if (ret != 0)
2090 goto done; /* Error in URI, return NULL */
2091
2092 /*
2093 * Next parse base into the same standard form
2094 */
2095 if ((base == NULL) || (*base == 0)) {
2096 val = xmlStrdup (URI);
2097 goto done;
2098 }
2099 bas = xmlCreateURI ();
2100 if (bas == NULL)
2101 goto done;
2102 ret = xmlParseURIReference (bas, (const char *) base);
2103 if (ret != 0)
2104 goto done; /* Error in base, return NULL */
2105
2106 /*
2107 * If the scheme / server on the URI differs from the base,
2108 * just return the URI
2109 */
2110 if ((ref->scheme != NULL) &&
2111 ((bas->scheme == NULL) ||
2112 xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme) ||
2113 xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server))) {
2114 val = xmlStrdup (URI);
2115 goto done;
2116 }
2117
2118 /*
2119 * At this point (at last!) we can compare the two paths
2120 *
2121 * First we compare the two strings and find where they first differ
2122 */
2123 bptr = (xmlChar *)bas->path;
William M. Brackf20fbf72004-06-25 05:49:08 +00002124 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2125 pos += 2;
2126 if ((*bptr == '.') && (bptr[1] == '/'))
2127 bptr += 2;
2128 else if ((*bptr == '/') && (ref->path[pos] != '/'))
William M. Brackf7789b12004-06-07 08:57:27 +00002129 bptr++;
2130 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2131 pos++;
2132
2133 if (bptr[pos] == ref->path[pos]) {
2134 val = NULL; /* if no differences, return NULL */
2135 goto done; /* (I can't imagine why anyone would do this) */
2136 }
2137
2138 /*
2139 * In URI, "back up" to the last '/' encountered. This will be the
2140 * beginning of the "unique" suffix of URI
2141 */
2142 ix = pos;
2143 if ((ref->path[ix] == '/') && (ix > 0))
2144 ix--;
2145 for (; ix > 0; ix--) {
2146 if (ref->path[ix] == '/')
2147 break;
2148 }
William M. Brackf2a657a2004-10-27 16:33:09 +00002149 if (ix == 0) {
William M. Brackf7789b12004-06-07 08:57:27 +00002150 uptr = (xmlChar *)ref->path;
William M. Brackf2a657a2004-10-27 16:33:09 +00002151 } else {
2152 ix++;
2153 uptr = (xmlChar *)&ref->path[ix];
2154 }
William M. Brackf7789b12004-06-07 08:57:27 +00002155
2156 /*
2157 * In base, count the number of '/' from the differing point
2158 */
2159 if (bptr[pos] != ref->path[pos]) { /* check for trivial URI == base */
2160 for (; bptr[ix] != 0; ix++) {
2161 if (bptr[ix] == '/')
2162 nbslash++;
2163 }
2164 }
2165
2166 if (nbslash == 0) {
2167 val = xmlStrdup (uptr);
2168 goto done;
2169 }
William M. Brackf7789b12004-06-07 08:57:27 +00002170
2171 /*
2172 * Allocate just enough space for the returned string -
2173 * length of the remainder of the URI, plus enough space
2174 * for the "../" groups, plus one for the terminator
2175 */
2176 ix = xmlStrlen (uptr) + 1;
2177 val = (xmlChar *) xmlMalloc (ix + 3 * nbslash);
2178 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002179 xmlGenericError(xmlGenericErrorContext,
2180 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002181 goto done;
2182 }
2183 vptr = val;
2184 /*
2185 * Put in as many "../" as needed
2186 */
2187 for (; nbslash>0; nbslash--) {
2188 *vptr++ = '.';
2189 *vptr++ = '.';
2190 *vptr++ = '/';
2191 }
2192 /*
2193 * Finish up with the end of the URI
2194 */
2195 memcpy (vptr, uptr, ix);
2196
2197 done:
2198 /*
2199 * Free the working variables
2200 */
2201 if (ref != NULL)
2202 xmlFreeURI (ref);
2203 if (bas != NULL)
2204 xmlFreeURI (bas);
2205
2206 return val;
2207}
2208
2209/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002210 * xmlCanonicPath:
2211 * @path: the resource locator in a filesystem notation
2212 *
2213 * Constructs a canonic path from the specified path.
2214 *
2215 * Returns a new canonic path, or a duplicate of the path parameter if the
2216 * construction fails. The caller is responsible for freeing the memory occupied
2217 * by the returned string. If there is insufficient memory available, or the
2218 * argument is NULL, the function returns NULL.
2219 */
2220#define IS_WINDOWS_PATH(p) \
2221 ((p != NULL) && \
2222 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2223 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2224 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2225xmlChar*
2226xmlCanonicPath(const xmlChar *path)
2227{
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002228#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002229 int len = 0;
2230 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002231 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002232#endif
2233 xmlChar *ret;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002234 xmlURIPtr uri;
2235
2236 if (path == NULL)
2237 return(NULL);
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002238 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002239 xmlFreeURI(uri);
2240 return xmlStrdup(path);
2241 }
2242
2243 uri = xmlCreateURI();
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002244 if (uri == NULL) {
2245 return(NULL);
2246 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002247
Igor Zlatkovicce076162003-02-23 13:39:39 +00002248#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002249 len = xmlStrlen(path);
2250 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2251 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack42331a92004-07-29 07:07:16 +00002252 uri->path = xmlMallocAtomic(len + 2); /* FIXME - check alloc! */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002253 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002254 p = uri->path + 1;
2255 strncpy(p, path, len + 1);
2256 } else {
William M. Bracka3215c72004-07-31 16:24:01 +00002257 uri->path = xmlStrdup(path); /* FIXME - check alloc! */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002258 p = uri->path;
2259 }
2260 while (*p != '\0') {
2261 if (*p == '\\')
2262 *p = '/';
2263 p++;
2264 }
2265#else
Daniel Veillard42f12e92003-03-07 18:32:59 +00002266 uri->path = (char *) xmlStrdup((const xmlChar *) path);
Igor Zlatkovicce076162003-02-23 13:39:39 +00002267#endif
William M. Bracka3215c72004-07-31 16:24:01 +00002268 if (uri->path == NULL) {
2269 xmlFreeURI(uri);
2270 return(NULL);
2271 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002272 ret = xmlSaveUri(uri);
2273 xmlFreeURI(uri);
2274 return(ret);
2275}
Owen Taylor3473f882001-02-23 17:55:21 +00002276