blob: d386e1d3072847f3ed1af5960dfe49b91aed5b3e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#include <string.h>
14
15#include <libxml/xmlmemory.h>
16#include <libxml/uri.h>
17#include <libxml/xmlerror.h>
18
19/************************************************************************
20 * *
21 * Macros to differenciate various character type *
22 * directly extracted from RFC 2396 *
23 * *
24 ************************************************************************/
25
26/*
27 * alpha = lowalpha | upalpha
28 */
29#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
30
31
32/*
33 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
34 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
35 * "u" | "v" | "w" | "x" | "y" | "z"
36 */
37
38#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
39
40/*
41 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
42 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
43 * "U" | "V" | "W" | "X" | "Y" | "Z"
44 */
45#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
46
47/*
48 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
49 */
50
51#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53/*
54 * alphanum = alpha | digit
55 */
56
57#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59/*
60 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
61 * "a" | "b" | "c" | "d" | "e" | "f"
62 */
63
64#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
65 (((x) >= 'A') && ((x) <= 'F')))
66
67/*
68 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
69 */
70
71#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
72 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
73 ((x) == '(') || ((x) == ')'))
74
75
76/*
77 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
78 */
79
80#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82 ((x) == '+') || ((x) == '$') || ((x) == ','))
83
84/*
85 * unreserved = alphanum | mark
86 */
87
88#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
89
90/*
91 * escaped = "%" hex hex
92 */
93
94#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
95 (IS_HEX((p)[2])))
96
97/*
98 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
99 * "&" | "=" | "+" | "$" | ","
100 */
101#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
102 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
103 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
104 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
105
106/*
107 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
108 */
109#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
110 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
111 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
112 ((*(p) == ',')))
113
114/*
115 * rel_segment = 1*( unreserved | escaped |
116 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
117 */
118
119#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
120 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
121 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
122 ((*(p) == ',')))
123
124/*
125 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
126 */
127
128#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
129 ((x) == '+') || ((x) == '-') || ((x) == '.'))
130
131/*
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
134 */
135
136#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
137 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
138 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
139 ((*(p) == '=')) || ((*(p) == '+')))
140
141/*
142 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
143 * "+" | "$" | "," )
144 */
145#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
146 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
147 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
148 ((*(p) == ',')))
149
150/*
151 * uric = reserved | unreserved | escaped
152 */
153
154#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
155 (IS_RESERVED(*(p))))
156
157/*
158 * Skip to next pointer char, handle escaped sequences
159 */
160
161#define NEXT(p) ((*p == '%')? p += 3 : p++)
162
163/*
164 * Productions from the spec.
165 *
166 * authority = server | reg_name
167 * reg_name = 1*( unreserved | escaped | "$" | "," |
168 * ";" | ":" | "@" | "&" | "=" | "+" )
169 *
170 * path = [ abs_path | opaque_part ]
171 */
172
173/************************************************************************
174 * *
175 * Generic URI structure functions *
176 * *
177 ************************************************************************/
178
179/**
180 * xmlCreateURI:
181 *
182 * Simply creates an empty xmlURI
183 *
184 * Returns the new structure or NULL in case of error
185 */
186xmlURIPtr
187xmlCreateURI(void) {
188 xmlURIPtr ret;
189
190 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
191 if (ret == NULL) {
192 xmlGenericError(xmlGenericErrorContext,
193 "xmlCreateURI: out of memory\n");
194 return(NULL);
195 }
196 memset(ret, 0, sizeof(xmlURI));
197 return(ret);
198}
199
200/**
201 * xmlSaveUri:
202 * @uri: pointer to an xmlURI
203 *
204 * Save the URI as an escaped string
205 *
206 * Returns a new string (to be deallocated by caller)
207 */
208xmlChar *
209xmlSaveUri(xmlURIPtr uri) {
210 xmlChar *ret = NULL;
211 const char *p;
212 int len;
213 int max;
214
215 if (uri == NULL) return(NULL);
216
217
218 max = 80;
219 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
220 if (ret == NULL) {
221 xmlGenericError(xmlGenericErrorContext,
222 "xmlSaveUri: out of memory\n");
223 return(NULL);
224 }
225 len = 0;
226
227 if (uri->scheme != NULL) {
228 p = uri->scheme;
229 while (*p != 0) {
230 if (len >= max) {
231 max *= 2;
232 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
233 if (ret == NULL) {
234 xmlGenericError(xmlGenericErrorContext,
235 "xmlSaveUri: out of memory\n");
236 return(NULL);
237 }
238 }
239 ret[len++] = *p++;
240 }
241 if (len >= max) {
242 max *= 2;
243 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
244 if (ret == NULL) {
245 xmlGenericError(xmlGenericErrorContext,
246 "xmlSaveUri: out of memory\n");
247 return(NULL);
248 }
249 }
250 ret[len++] = ':';
251 }
252 if (uri->opaque != NULL) {
253 p = uri->opaque;
254 while (*p != 0) {
255 if (len + 3 >= max) {
256 max *= 2;
257 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
258 if (ret == NULL) {
259 xmlGenericError(xmlGenericErrorContext,
260 "xmlSaveUri: out of memory\n");
261 return(NULL);
262 }
263 }
264 if ((IS_UNRESERVED(*(p))) ||
265 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
266 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
267 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
268 ret[len++] = *p++;
269 else {
270 int val = *(unsigned char *)p++;
271 int hi = val / 0x10, lo = val % 0x10;
272 ret[len++] = '%';
273 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
274 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
275 }
276 }
277 if (len >= max) {
278 max *= 2;
279 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
280 if (ret == NULL) {
281 xmlGenericError(xmlGenericErrorContext,
282 "xmlSaveUri: out of memory\n");
283 return(NULL);
284 }
285 }
286 ret[len++] = 0;
287 } else {
288 if (uri->server != NULL) {
289 if (len + 3 >= max) {
290 max *= 2;
291 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
292 if (ret == NULL) {
293 xmlGenericError(xmlGenericErrorContext,
294 "xmlSaveUri: out of memory\n");
295 return(NULL);
296 }
297 }
298 ret[len++] = '/';
299 ret[len++] = '/';
300 if (uri->user != NULL) {
301 p = uri->user;
302 while (*p != 0) {
303 if (len + 3 >= max) {
304 max *= 2;
305 ret = (xmlChar *) xmlRealloc(ret,
306 (max + 1) * sizeof(xmlChar));
307 if (ret == NULL) {
308 xmlGenericError(xmlGenericErrorContext,
309 "xmlSaveUri: out of memory\n");
310 return(NULL);
311 }
312 }
313 if ((IS_UNRESERVED(*(p))) ||
314 ((*(p) == ';')) || ((*(p) == ':')) ||
315 ((*(p) == '&')) || ((*(p) == '=')) ||
316 ((*(p) == '+')) || ((*(p) == '$')) ||
317 ((*(p) == ',')))
318 ret[len++] = *p++;
319 else {
320 int val = *(unsigned char *)p++;
321 int hi = val / 0x10, lo = val % 0x10;
322 ret[len++] = '%';
323 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
324 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
325 }
326 }
327 if (len + 3 >= max) {
328 max *= 2;
329 ret = (xmlChar *) xmlRealloc(ret,
330 (max + 1) * sizeof(xmlChar));
331 if (ret == NULL) {
332 xmlGenericError(xmlGenericErrorContext,
333 "xmlSaveUri: out of memory\n");
334 return(NULL);
335 }
336 }
337 ret[len++] = '@';
338 }
339 p = uri->server;
340 while (*p != 0) {
341 if (len >= max) {
342 max *= 2;
343 ret = (xmlChar *) xmlRealloc(ret,
344 (max + 1) * sizeof(xmlChar));
345 if (ret == NULL) {
346 xmlGenericError(xmlGenericErrorContext,
347 "xmlSaveUri: out of memory\n");
348 return(NULL);
349 }
350 }
351 ret[len++] = *p++;
352 }
353 if (uri->port > 0) {
354 if (len + 10 >= max) {
355 max *= 2;
356 ret = (xmlChar *) xmlRealloc(ret,
357 (max + 1) * sizeof(xmlChar));
358 if (ret == NULL) {
359 xmlGenericError(xmlGenericErrorContext,
360 "xmlSaveUri: out of memory\n");
361 return(NULL);
362 }
363 }
364 len += sprintf((char *) &ret[len], ":%d", uri->port);
365 }
366 } else if (uri->authority != NULL) {
367 if (len + 3 >= max) {
368 max *= 2;
369 ret = (xmlChar *) xmlRealloc(ret,
370 (max + 1) * sizeof(xmlChar));
371 if (ret == NULL) {
372 xmlGenericError(xmlGenericErrorContext,
373 "xmlSaveUri: out of memory\n");
374 return(NULL);
375 }
376 }
377 ret[len++] = '/';
378 ret[len++] = '/';
379 p = uri->authority;
380 while (*p != 0) {
381 if (len + 3 >= max) {
382 max *= 2;
383 ret = (xmlChar *) xmlRealloc(ret,
384 (max + 1) * sizeof(xmlChar));
385 if (ret == NULL) {
386 xmlGenericError(xmlGenericErrorContext,
387 "xmlSaveUri: out of memory\n");
388 return(NULL);
389 }
390 }
391 if ((IS_UNRESERVED(*(p))) ||
392 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
393 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
394 ((*(p) == '=')) || ((*(p) == '+')))
395 ret[len++] = *p++;
396 else {
397 int val = *(unsigned char *)p++;
398 int hi = val / 0x10, lo = val % 0x10;
399 ret[len++] = '%';
400 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
401 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
402 }
403 }
404 } else if (uri->scheme != NULL) {
405 if (len + 3 >= max) {
406 max *= 2;
407 ret = (xmlChar *) xmlRealloc(ret,
408 (max + 1) * sizeof(xmlChar));
409 if (ret == NULL) {
410 xmlGenericError(xmlGenericErrorContext,
411 "xmlSaveUri: out of memory\n");
412 return(NULL);
413 }
414 }
415 ret[len++] = '/';
416 ret[len++] = '/';
417 }
418 if (uri->path != NULL) {
419 p = uri->path;
420 while (*p != 0) {
421 if (len + 3 >= max) {
422 max *= 2;
423 ret = (xmlChar *) xmlRealloc(ret,
424 (max + 1) * sizeof(xmlChar));
425 if (ret == NULL) {
426 xmlGenericError(xmlGenericErrorContext,
427 "xmlSaveUri: out of memory\n");
428 return(NULL);
429 }
430 }
431 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
432 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
433 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
434 ((*(p) == ',')))
435 ret[len++] = *p++;
436 else {
437 int val = *(unsigned char *)p++;
438 int hi = val / 0x10, lo = val % 0x10;
439 ret[len++] = '%';
440 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
441 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
442 }
443 }
444 }
445 if (uri->query != NULL) {
446 if (len + 3 >= max) {
447 max *= 2;
448 ret = (xmlChar *) xmlRealloc(ret,
449 (max + 1) * sizeof(xmlChar));
450 if (ret == NULL) {
451 xmlGenericError(xmlGenericErrorContext,
452 "xmlSaveUri: out of memory\n");
453 return(NULL);
454 }
455 }
456 ret[len++] = '?';
457 p = uri->query;
458 while (*p != 0) {
459 if (len + 3 >= max) {
460 max *= 2;
461 ret = (xmlChar *) xmlRealloc(ret,
462 (max + 1) * sizeof(xmlChar));
463 if (ret == NULL) {
464 xmlGenericError(xmlGenericErrorContext,
465 "xmlSaveUri: out of memory\n");
466 return(NULL);
467 }
468 }
469 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
470 ret[len++] = *p++;
471 else {
472 int val = *(unsigned char *)p++;
473 int hi = val / 0x10, lo = val % 0x10;
474 ret[len++] = '%';
475 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
476 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
477 }
478 }
479 }
480 if (uri->fragment != NULL) {
481 if (len + 3 >= max) {
482 max *= 2;
483 ret = (xmlChar *) xmlRealloc(ret,
484 (max + 1) * sizeof(xmlChar));
485 if (ret == NULL) {
486 xmlGenericError(xmlGenericErrorContext,
487 "xmlSaveUri: out of memory\n");
488 return(NULL);
489 }
490 }
491 ret[len++] = '#';
492 p = uri->fragment;
493 while (*p != 0) {
494 if (len + 3 >= max) {
495 max *= 2;
496 ret = (xmlChar *) xmlRealloc(ret,
497 (max + 1) * sizeof(xmlChar));
498 if (ret == NULL) {
499 xmlGenericError(xmlGenericErrorContext,
500 "xmlSaveUri: out of memory\n");
501 return(NULL);
502 }
503 }
504 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
505 ret[len++] = *p++;
506 else {
507 int val = *(unsigned char *)p++;
508 int hi = val / 0x10, lo = val % 0x10;
509 ret[len++] = '%';
510 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
511 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
512 }
513 }
514 }
515 if (len >= max) {
516 max *= 2;
517 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
518 if (ret == NULL) {
519 xmlGenericError(xmlGenericErrorContext,
520 "xmlSaveUri: out of memory\n");
521 return(NULL);
522 }
523 }
524 ret[len++] = 0;
525 }
526 return(ret);
527}
528
529/**
530 * xmlPrintURI:
531 * @stream: a FILE* for the output
532 * @uri: pointer to an xmlURI
533 *
534 * Prints the URI in the stream @steam.
535 */
536void
537xmlPrintURI(FILE *stream, xmlURIPtr uri) {
538 xmlChar *out;
539
540 out = xmlSaveUri(uri);
541 if (out != NULL) {
542 fprintf(stream, "%s", out);
543 xmlFree(out);
544 }
545}
546
547/**
548 * xmlCleanURI:
549 * @uri: pointer to an xmlURI
550 *
551 * Make sure the xmlURI struct is free of content
552 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000553static void
Owen Taylor3473f882001-02-23 17:55:21 +0000554xmlCleanURI(xmlURIPtr uri) {
555 if (uri == NULL) return;
556
557 if (uri->scheme != NULL) xmlFree(uri->scheme);
558 uri->scheme = NULL;
559 if (uri->server != NULL) xmlFree(uri->server);
560 uri->server = NULL;
561 if (uri->user != NULL) xmlFree(uri->user);
562 uri->user = NULL;
563 if (uri->path != NULL) xmlFree(uri->path);
564 uri->path = NULL;
565 if (uri->fragment != NULL) xmlFree(uri->fragment);
566 uri->fragment = NULL;
567 if (uri->opaque != NULL) xmlFree(uri->opaque);
568 uri->opaque = NULL;
569 if (uri->authority != NULL) xmlFree(uri->authority);
570 uri->authority = NULL;
571 if (uri->query != NULL) xmlFree(uri->query);
572 uri->query = NULL;
573}
574
575/**
576 * xmlFreeURI:
577 * @uri: pointer to an xmlURI
578 *
579 * Free up the xmlURI struct
580 */
581void
582xmlFreeURI(xmlURIPtr uri) {
583 if (uri == NULL) return;
584
585 if (uri->scheme != NULL) xmlFree(uri->scheme);
586 if (uri->server != NULL) xmlFree(uri->server);
587 if (uri->user != NULL) xmlFree(uri->user);
588 if (uri->path != NULL) xmlFree(uri->path);
589 if (uri->fragment != NULL) xmlFree(uri->fragment);
590 if (uri->opaque != NULL) xmlFree(uri->opaque);
591 if (uri->authority != NULL) xmlFree(uri->authority);
592 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000593 xmlFree(uri);
594}
595
596/************************************************************************
597 * *
598 * Helper functions *
599 * *
600 ************************************************************************/
601
602#if 0
603/**
604 * xmlNormalizeURIPath:
605 * @path: pointer to the path string
606 *
607 * applies the 5 normalization steps to a path string
608 * Normalization occurs directly on the string, no new allocation is done
609 *
610 * Returns 0 or an error code
611 */
612int
613xmlNormalizeURIPath(char *path) {
614 int cur, out;
615
616 if (path == NULL)
617 return(-1);
618 cur = 0;
619 out = 0;
620 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
621 if (path[cur] == 0)
622 return(0);
623
624 /* we are positionned at the beginning of the first segment */
625 cur++;
626 out = cur;
627
628 /*
629 * Analyze each segment in sequence.
630 */
631 while (path[cur] != 0) {
632 /*
633 * c) All occurrences of "./", where "." is a complete path segment,
634 * are removed from the buffer string.
635 */
636 if ((path[cur] == '.') && (path[cur + 1] == '/')) {
637 cur += 2;
638 if (path[cur] == 0) {
639 path[out++] = 0;
640 }
641 continue;
642 }
643
644 /*
645 * d) If the buffer string ends with "." as a complete path segment,
646 * that "." is removed.
647 */
648 if ((path[cur] == '.') && (path[cur + 1] == 0)) {
649 path[out] = 0;
650 break;
651 }
652
653 /* read the segment */
654 while ((path[cur] != 0) && (path[cur] != '/')) {
655 path[out++] = path[cur++];
656 }
657 path[out++] = path[cur];
658 if (path[cur] != 0) {
659 cur++;
660 }
661 }
662
663 cur = 0;
664 out = 0;
665 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
666 if (path[cur] == 0)
667 return(0);
668 /* we are positionned at the beginning of the first segment */
669 cur++;
670 out = cur;
671 /*
672 * Analyze each segment in sequence.
673 */
674 while (path[cur] != 0) {
675 /*
676 * e) All occurrences of "<segment>/../", where <segment> is a
677 * complete path segment not equal to "..", are removed from the
678 * buffer string. Removal of these path segments is performed
679 * iteratively, removing the leftmost matching pattern on each
680 * iteration, until no matching pattern remains.
681 */
682 if ((cur > 1) && (out > 1) &&
683 (path[cur] == '/') && (path[cur + 1] == '.') &&
684 (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
685 ((path[out] != '.') || (path[out - 1] != '.') ||
686 (path[out - 2] != '/'))) {
687 cur += 3;
688 out --;
689 while ((out > 0) && (path[out] != '/')) { out --; }
690 path[out] = 0;
691 continue;
692 }
693
694 /*
695 * f) If the buffer string ends with "<segment>/..", where <segment>
696 * is a complete path segment not equal to "..", that
697 * "<segment>/.." is removed.
698 */
699 if ((path[cur] == '/') && (path[cur + 1] == '.') &&
700 (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
701 ((path[out] != '.') || (path[out - 1] != '.') ||
702 (path[out - 2] != '/'))) {
703 cur += 4;
704 out --;
705 while ((out > 0) && (path[out - 1] != '/')) { out --; }
706 path[out] = 0;
707 continue;
708 }
709
710 path[out++] = path[cur++]; /* / or 0 */
711 }
712 path[out] = 0;
713
714 /*
715 * g) If the resulting buffer string still begins with one or more
716 * complete path segments of "..", then the reference is
717 * considered to be in error. Implementations may handle this
718 * error by retaining these components in the resolved path (i.e.,
719 * treating them as part of the final URI), by removing them from
720 * the resolved path (i.e., discarding relative levels above the
721 * root), or by avoiding traversal of the reference.
722 *
723 * We discard them from the final path.
724 */
725 cur = 0;
726 while ((path[cur] == '/') && (path[cur + 1] == '.') &&
727 (path[cur + 2] == '.'))
728 cur += 3;
729 if (cur != 0) {
730 out = 0;
731 while (path[cur] != 0) path[out++] = path[cur++];
732 path[out] = 0;
733 }
734 return(0);
735}
736#else
737/**
738 * xmlNormalizeURIPath:
739 * @path: pointer to the path string
740 *
741 * Applies the 5 normalization steps to a path string--that is, RFC 2396
742 * Section 5.2, steps 6.c through 6.g.
743 *
744 * Normalization occurs directly on the string, no new allocation is done
745 *
746 * Returns 0 or an error code
747 */
748int
749xmlNormalizeURIPath(char *path) {
750 char *cur, *out;
751
752 if (path == NULL)
753 return(-1);
754
755 /* Skip all initial "/" chars. We want to get to the beginning of the
756 * first non-empty segment.
757 */
758 cur = path;
759 while (cur[0] == '/')
760 ++cur;
761 if (cur[0] == '\0')
762 return(0);
763
764 /* Keep everything we've seen so far. */
765 out = cur;
766
767 /*
768 * Analyze each segment in sequence for cases (c) and (d).
769 */
770 while (cur[0] != '\0') {
771 /*
772 * c) All occurrences of "./", where "." is a complete path segment,
773 * are removed from the buffer string.
774 */
775 if ((cur[0] == '.') && (cur[1] == '/')) {
776 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000777 /* '//' normalization should be done at this point too */
778 while (cur[0] == '/')
779 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000780 continue;
781 }
782
783 /*
784 * d) If the buffer string ends with "." as a complete path segment,
785 * that "." is removed.
786 */
787 if ((cur[0] == '.') && (cur[1] == '\0'))
788 break;
789
790 /* Otherwise keep the segment. */
791 while (cur[0] != '/') {
792 if (cur[0] == '\0')
793 goto done_cd;
794 (out++)[0] = (cur++)[0];
795 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000796 /* nomalize // */
797 while ((cur[0] == '/') && (cur[1] == '/'))
798 cur++;
799
Owen Taylor3473f882001-02-23 17:55:21 +0000800 (out++)[0] = (cur++)[0];
801 }
802 done_cd:
803 out[0] = '\0';
804
805 /* Reset to the beginning of the first segment for the next sequence. */
806 cur = path;
807 while (cur[0] == '/')
808 ++cur;
809 if (cur[0] == '\0')
810 return(0);
811
812 /*
813 * Analyze each segment in sequence for cases (e) and (f).
814 *
815 * e) All occurrences of "<segment>/../", where <segment> is a
816 * complete path segment not equal to "..", are removed from the
817 * buffer string. Removal of these path segments is performed
818 * iteratively, removing the leftmost matching pattern on each
819 * iteration, until no matching pattern remains.
820 *
821 * f) If the buffer string ends with "<segment>/..", where <segment>
822 * is a complete path segment not equal to "..", that
823 * "<segment>/.." is removed.
824 *
825 * To satisfy the "iterative" clause in (e), we need to collapse the
826 * string every time we find something that needs to be removed. Thus,
827 * we don't need to keep two pointers into the string: we only need a
828 * "current position" pointer.
829 */
830 while (1) {
831 char *segp;
832
833 /* At the beginning of each iteration of this loop, "cur" points to
834 * the first character of the segment we want to examine.
835 */
836
837 /* Find the end of the current segment. */
838 segp = cur;
839 while ((segp[0] != '/') && (segp[0] != '\0'))
840 ++segp;
841
842 /* If this is the last segment, we're done (we need at least two
843 * segments to meet the criteria for the (e) and (f) cases).
844 */
845 if (segp[0] == '\0')
846 break;
847
848 /* If the first segment is "..", or if the next segment _isn't_ "..",
849 * keep this segment and try the next one.
850 */
851 ++segp;
852 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
853 || ((segp[0] != '.') || (segp[1] != '.')
854 || ((segp[2] != '/') && (segp[2] != '\0')))) {
855 cur = segp;
856 continue;
857 }
858
859 /* If we get here, remove this segment and the next one and back up
860 * to the previous segment (if there is one), to implement the
861 * "iteratively" clause. It's pretty much impossible to back up
862 * while maintaining two pointers into the buffer, so just compact
863 * the whole buffer now.
864 */
865
866 /* If this is the end of the buffer, we're done. */
867 if (segp[2] == '\0') {
868 cur[0] = '\0';
869 break;
870 }
871 strcpy(cur, segp + 3);
872
873 /* If there are no previous segments, then keep going from here. */
874 segp = cur;
875 while ((segp > path) && ((--segp)[0] == '/'))
876 ;
877 if (segp == path)
878 continue;
879
880 /* "segp" is pointing to the end of a previous segment; find it's
881 * start. We need to back up to the previous segment and start
882 * over with that to handle things like "foo/bar/../..". If we
883 * don't do this, then on the first pass we'll remove the "bar/..",
884 * but be pointing at the second ".." so we won't realize we can also
885 * remove the "foo/..".
886 */
887 cur = segp;
888 while ((cur > path) && (cur[-1] != '/'))
889 --cur;
890 }
891 out[0] = '\0';
892
893 /*
894 * g) If the resulting buffer string still begins with one or more
895 * complete path segments of "..", then the reference is
896 * considered to be in error. Implementations may handle this
897 * error by retaining these components in the resolved path (i.e.,
898 * treating them as part of the final URI), by removing them from
899 * the resolved path (i.e., discarding relative levels above the
900 * root), or by avoiding traversal of the reference.
901 *
902 * We discard them from the final path.
903 */
904 if (path[0] == '/') {
905 cur = path;
906 while ((cur[1] == '.') && (cur[2] == '.')
907 && ((cur[3] == '/') || (cur[3] == '\0')))
908 cur += 3;
909
910 if (cur != path) {
911 out = path;
912 while (cur[0] != '\0')
913 (out++)[0] = (cur++)[0];
914 out[0] = 0;
915 }
916 }
917
918 return(0);
919}
920#endif
921
922/**
923 * xmlURIUnescapeString:
924 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000925 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Owen Taylor3473f882001-02-23 17:55:21 +0000926 * @target: optionnal destination buffer
927 *
928 * Unescaping routine, does not do validity checks !
929 * Output is direct unsigned char translation of %XX values (no encoding)
930 *
931 * Returns an copy of the string, but unescaped
932 */
933char *
934xmlURIUnescapeString(const char *str, int len, char *target) {
935 char *ret, *out;
936 const char *in;
937
938 if (str == NULL)
939 return(NULL);
940 if (len <= 0) len = strlen(str);
941 if (len <= 0) return(NULL);
942
943 if (target == NULL) {
944 ret = (char *) xmlMalloc(len + 1);
945 if (ret == NULL) {
946 xmlGenericError(xmlGenericErrorContext,
947 "xmlURIUnescapeString: out of memory\n");
948 return(NULL);
949 }
950 } else
951 ret = target;
952 in = str;
953 out = ret;
954 while(len > 0) {
955 if (*in == '%') {
956 in++;
957 if ((*in >= '0') && (*in <= '9'))
958 *out = (*in - '0');
959 else if ((*in >= 'a') && (*in <= 'f'))
960 *out = (*in - 'a') + 10;
961 else if ((*in >= 'A') && (*in <= 'F'))
962 *out = (*in - 'A') + 10;
963 in++;
964 if ((*in >= '0') && (*in <= '9'))
965 *out = *out * 16 + (*in - '0');
966 else if ((*in >= 'a') && (*in <= 'f'))
967 *out = *out * 16 + (*in - 'a') + 10;
968 else if ((*in >= 'A') && (*in <= 'F'))
969 *out = *out * 16 + (*in - 'A') + 10;
970 in++;
971 len -= 3;
972 out++;
973 } else {
974 *out++ = *in++;
975 len--;
976 }
977 }
978 *out = 0;
979 return(ret);
980}
981
982/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000983 * xmlURIEscapeStr:
984 * @str: string to escape
985 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000986 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000987 * This routine escapes a string to hex, ignoring reserved characters (a-z)
988 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000989 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000990 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000991 */
992xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000993xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
994 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000995 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000996
Owen Taylor3473f882001-02-23 17:55:21 +0000997 unsigned int len, out;
998
999 if (str == NULL)
1000 return(NULL);
1001 len = xmlStrlen(str);
1002 if (len <= 0) return(NULL);
1003
1004 len += 20;
1005 ret = (xmlChar *) xmlMalloc(len);
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext,
1008 "xmlURIEscape: out of memory\n");
1009 return(NULL);
1010 }
1011 in = (const xmlChar *) str;
1012 out = 0;
1013 while(*in != 0) {
1014 if (len - out <= 3) {
1015 len += 20;
1016 ret = (xmlChar *) xmlRealloc(ret, len);
1017 if (ret == NULL) {
1018 xmlGenericError(xmlGenericErrorContext,
1019 "xmlURIEscape: out of memory\n");
1020 return(NULL);
1021 }
1022 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001023
1024 ch = *in;
1025
1026 if ( (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch)) ) {
Owen Taylor3473f882001-02-23 17:55:21 +00001027 unsigned char val;
1028 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001029 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001030 if (val <= 9)
1031 ret[out++] = '0' + val;
1032 else
1033 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001034 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001035 if (val <= 9)
1036 ret[out++] = '0' + val;
1037 else
1038 ret[out++] = 'A' + val - 0xA;
1039 in++;
1040 } else {
1041 ret[out++] = *in++;
1042 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001043
Owen Taylor3473f882001-02-23 17:55:21 +00001044 }
1045 ret[out] = 0;
1046 return(ret);
1047}
1048
Daniel Veillard8514c672001-05-23 10:29:12 +00001049/**
1050 * xmlURIEscape:
1051 * @str: the string of the URI to escape
1052 *
1053 * Escaping routine, does not do validity checks !
1054 * It will try to escape the chars needing this, but this is heuristic
1055 * based it's impossible to be sure.
1056 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001057 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001058 *
1059 * 25 May 2001
1060 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1061 * according to RFC2396.
1062 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001063 */
1064xmlChar *
1065xmlURIEscape(const xmlChar *str) {
Daniel Veillard6278fb52001-05-25 07:38:41 +00001066 xmlChar *ret, *segment = NULL;
1067 xmlURIPtr uri;
Daniel Veillard8514c672001-05-23 10:29:12 +00001068
Daniel Veillard6278fb52001-05-25 07:38:41 +00001069#define NULLCHK(p) if(!p) { \
1070 xmlGenericError(xmlGenericErrorContext, \
1071 "xmlURIEscape: out of memory\n"); \
1072 return NULL; }
1073
1074 uri = xmlParseURI( (const char *) str);
1075
1076 if(!uri)
1077 return NULL;
1078
1079 ret = NULL;
1080
1081 if(uri->scheme) {
1082 segment = xmlURIEscapeStr( BAD_CAST uri->scheme, BAD_CAST "+-.");
1083 NULLCHK(segment)
1084 xmlStrcat(ret, segment);
1085 xmlStrcat(ret, BAD_CAST ":");
1086 xmlFree(segment);
1087 }
1088
1089 if(uri->authority) {
1090 segment = xmlURIEscapeStr( BAD_CAST uri->authority, BAD_CAST "/?;:@");
1091 NULLCHK(segment)
1092 xmlStrcat(ret, BAD_CAST "//");
1093 xmlStrcat(ret, segment);
1094 xmlFree(segment);
1095 }
1096
1097 if(uri->user) {
1098 segment = xmlURIEscapeStr( BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1099 NULLCHK(segment)
1100 xmlStrcat(ret, segment);
1101 xmlStrcat(ret, BAD_CAST "@");
1102 xmlFree(segment);
1103 }
1104
1105 if(uri->server) {
1106 segment = xmlURIEscapeStr( BAD_CAST uri->server, BAD_CAST "/?;:@");
1107 NULLCHK(segment)
1108 xmlStrcat(ret, BAD_CAST "//");
1109 xmlStrcat(ret, segment);
1110 xmlFree(segment);
1111 }
1112
1113 if(uri->port) {
1114 xmlChar port[10];
Daniel Veillarde95e2392001-06-06 10:46:28 +00001115 snprintf((char *) segment, 10, "%d", uri->port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001116 xmlStrcat(ret, BAD_CAST ":");
1117 xmlStrcat(ret, port);
1118 xmlFree(segment);
1119 }
1120
1121 if(uri->path) {
1122 segment = xmlURIEscapeStr( BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1123 NULLCHK(segment)
1124 xmlStrcat(ret, segment);
1125 xmlFree(segment);
1126 }
1127
1128 if(uri->query) {
1129 segment = xmlURIEscapeStr( BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1130 NULLCHK(segment)
1131 xmlStrcat(ret, BAD_CAST "?");
1132 xmlStrcat(ret, segment);
1133 xmlFree(segment);
1134 }
1135
1136 if(uri->opaque) {
1137 segment = xmlURIEscapeStr( BAD_CAST uri->opaque, BAD_CAST "");
1138 NULLCHK(segment)
1139 xmlStrcat(ret, segment);
1140 xmlStrcat(ret, BAD_CAST ":");
1141 xmlFree(segment);
1142 }
1143
1144 if(uri->fragment) {
1145 segment = xmlURIEscapeStr( BAD_CAST uri->fragment, BAD_CAST "#");
1146 NULLCHK(segment)
1147 xmlStrcat(ret, BAD_CAST "#");
1148 xmlStrcat(ret, segment);
1149 xmlFree(segment);
1150 }
1151
1152#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001153
1154 return(ret);
1155}
1156
Owen Taylor3473f882001-02-23 17:55:21 +00001157/************************************************************************
1158 * *
1159 * Escaped URI parsing *
1160 * *
1161 ************************************************************************/
1162
1163/**
1164 * xmlParseURIFragment:
1165 * @uri: pointer to an URI structure
1166 * @str: pointer to the string to analyze
1167 *
1168 * Parse an URI fragment string and fills in the appropriate fields
1169 * of the @uri structure.
1170 *
1171 * fragment = *uric
1172 *
1173 * Returns 0 or the error code
1174 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001175static int
Owen Taylor3473f882001-02-23 17:55:21 +00001176xmlParseURIFragment(xmlURIPtr uri, const char **str) {
1177 const char *cur = *str;
1178
1179 if (str == NULL) return(-1);
1180
1181 while (IS_URIC(cur)) NEXT(cur);
1182 if (uri != NULL) {
1183 if (uri->fragment != NULL) xmlFree(uri->fragment);
1184 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1185 }
1186 *str = cur;
1187 return(0);
1188}
1189
1190/**
1191 * xmlParseURIQuery:
1192 * @uri: pointer to an URI structure
1193 * @str: pointer to the string to analyze
1194 *
1195 * Parse the query part of an URI
1196 *
1197 * query = *uric
1198 *
1199 * Returns 0 or the error code
1200 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001201static int
Owen Taylor3473f882001-02-23 17:55:21 +00001202xmlParseURIQuery(xmlURIPtr uri, const char **str) {
1203 const char *cur = *str;
1204
1205 if (str == NULL) return(-1);
1206
1207 while (IS_URIC(cur)) NEXT(cur);
1208 if (uri != NULL) {
1209 if (uri->query != NULL) xmlFree(uri->query);
1210 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1211 }
1212 *str = cur;
1213 return(0);
1214}
1215
1216/**
1217 * xmlParseURIScheme:
1218 * @uri: pointer to an URI structure
1219 * @str: pointer to the string to analyze
1220 *
1221 * Parse an URI scheme
1222 *
1223 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1224 *
1225 * Returns 0 or the error code
1226 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001227static int
Owen Taylor3473f882001-02-23 17:55:21 +00001228xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1229 const char *cur;
1230
1231 if (str == NULL)
1232 return(-1);
1233
1234 cur = *str;
1235 if (!IS_ALPHA(*cur))
1236 return(2);
1237 cur++;
1238 while (IS_SCHEME(*cur)) cur++;
1239 if (uri != NULL) {
1240 if (uri->scheme != NULL) xmlFree(uri->scheme);
1241 /* !!! strndup */
1242 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1243 }
1244 *str = cur;
1245 return(0);
1246}
1247
1248/**
1249 * xmlParseURIOpaquePart:
1250 * @uri: pointer to an URI structure
1251 * @str: pointer to the string to analyze
1252 *
1253 * Parse an URI opaque part
1254 *
1255 * opaque_part = uric_no_slash *uric
1256 *
1257 * Returns 0 or the error code
1258 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001259static int
Owen Taylor3473f882001-02-23 17:55:21 +00001260xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
1261 const char *cur;
1262
1263 if (str == NULL)
1264 return(-1);
1265
1266 cur = *str;
1267 if (!IS_URIC_NO_SLASH(cur)) {
1268 return(3);
1269 }
1270 NEXT(cur);
1271 while (IS_URIC(cur)) NEXT(cur);
1272 if (uri != NULL) {
1273 if (uri->opaque != NULL) xmlFree(uri->opaque);
1274 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1275 }
1276 *str = cur;
1277 return(0);
1278}
1279
1280/**
1281 * xmlParseURIServer:
1282 * @uri: pointer to an URI structure
1283 * @str: pointer to the string to analyze
1284 *
1285 * Parse a server subpart of an URI, it's a finer grain analysis
1286 * of the authority part.
1287 *
1288 * server = [ [ userinfo "@" ] hostport ]
1289 * userinfo = *( unreserved | escaped |
1290 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1291 * hostport = host [ ":" port ]
1292 * host = hostname | IPv4address
1293 * hostname = *( domainlabel "." ) toplabel [ "." ]
1294 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1295 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1296 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1297 * port = *digit
1298 *
1299 * Returns 0 or the error code
1300 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001301static int
Owen Taylor3473f882001-02-23 17:55:21 +00001302xmlParseURIServer(xmlURIPtr uri, const char **str) {
1303 const char *cur;
1304 const char *host, *tmp;
1305
1306 if (str == NULL)
1307 return(-1);
1308
1309 cur = *str;
1310
1311 /*
1312 * is there an userinfo ?
1313 */
1314 while (IS_USERINFO(cur)) NEXT(cur);
1315 if (*cur == '@') {
1316 if (uri != NULL) {
1317 if (uri->user != NULL) xmlFree(uri->user);
1318 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1319 }
1320 cur++;
1321 } else {
1322 if (uri != NULL) {
1323 if (uri->user != NULL) xmlFree(uri->user);
1324 uri->user = NULL;
1325 }
1326 cur = *str;
1327 }
1328 /*
1329 * This can be empty in the case where there is no server
1330 */
1331 host = cur;
1332 if (*cur == '/') {
1333 if (uri != NULL) {
1334 if (uri->authority != NULL) xmlFree(uri->authority);
1335 uri->authority = NULL;
1336 if (uri->server != NULL) xmlFree(uri->server);
1337 uri->server = NULL;
1338 uri->port = 0;
1339 }
1340 return(0);
1341 }
1342 /*
1343 * host part of hostport can derive either an IPV4 address
1344 * or an unresolved name. Check the IP first, it easier to detect
1345 * errors if wrong one
1346 */
1347 if (IS_DIGIT(*cur)) {
1348 while(IS_DIGIT(*cur)) cur++;
1349 if (*cur != '.')
1350 goto host_name;
1351 cur++;
1352 if (!IS_DIGIT(*cur))
1353 goto host_name;
1354 while(IS_DIGIT(*cur)) cur++;
1355 if (*cur != '.')
1356 goto host_name;
1357 cur++;
1358 if (!IS_DIGIT(*cur))
1359 goto host_name;
1360 while(IS_DIGIT(*cur)) cur++;
1361 if (*cur != '.')
1362 goto host_name;
1363 cur++;
1364 if (!IS_DIGIT(*cur))
1365 goto host_name;
1366 while(IS_DIGIT(*cur)) cur++;
1367 if (uri != NULL) {
1368 if (uri->authority != NULL) xmlFree(uri->authority);
1369 uri->authority = NULL;
1370 if (uri->server != NULL) xmlFree(uri->server);
1371 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1372 }
1373 goto host_done;
1374 }
1375host_name:
1376 /*
1377 * the hostname production as-is is a parser nightmare.
1378 * simplify it to
1379 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1380 * and just make sure the last label starts with a non numeric char.
1381 */
1382 if (!IS_ALPHANUM(*cur))
1383 return(6);
1384 while (IS_ALPHANUM(*cur)) {
1385 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1386 if (*cur == '.')
1387 cur++;
1388 }
1389 tmp = cur;
1390 tmp--;
1391 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1392 tmp++;
1393 if (!IS_ALPHA(*tmp))
1394 return(7);
1395 if (uri != NULL) {
1396 if (uri->authority != NULL) xmlFree(uri->authority);
1397 uri->authority = NULL;
1398 if (uri->server != NULL) xmlFree(uri->server);
1399 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1400 }
1401
1402host_done:
1403
1404 /*
1405 * finish by checking for a port presence.
1406 */
1407 if (*cur == ':') {
1408 cur++;
1409 if (IS_DIGIT(*cur)) {
1410 if (uri != NULL)
1411 uri->port = 0;
1412 while (IS_DIGIT(*cur)) {
1413 if (uri != NULL)
1414 uri->port = uri->port * 10 + (*cur - '0');
1415 cur++;
1416 }
1417 }
1418 }
1419 *str = cur;
1420 return(0);
1421}
1422
1423/**
1424 * xmlParseURIRelSegment:
1425 * @uri: pointer to an URI structure
1426 * @str: pointer to the string to analyze
1427 *
1428 * Parse an URI relative segment
1429 *
1430 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1431 * "+" | "$" | "," )
1432 *
1433 * Returns 0 or the error code
1434 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001435static int
Owen Taylor3473f882001-02-23 17:55:21 +00001436xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
1437 const char *cur;
1438
1439 if (str == NULL)
1440 return(-1);
1441
1442 cur = *str;
1443 if (!IS_SEGMENT(cur)) {
1444 return(3);
1445 }
1446 NEXT(cur);
1447 while (IS_SEGMENT(cur)) NEXT(cur);
1448 if (uri != NULL) {
1449 if (uri->path != NULL) xmlFree(uri->path);
1450 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1451 }
1452 *str = cur;
1453 return(0);
1454}
1455
1456/**
1457 * xmlParseURIPathSegments:
1458 * @uri: pointer to an URI structure
1459 * @str: pointer to the string to analyze
1460 * @slash: should we add a leading slash
1461 *
1462 * Parse an URI set of path segments
1463 *
1464 * path_segments = segment *( "/" segment )
1465 * segment = *pchar *( ";" param )
1466 * param = *pchar
1467 *
1468 * Returns 0 or the error code
1469 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001470static int
Owen Taylor3473f882001-02-23 17:55:21 +00001471xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
1472 const char *cur;
1473
1474 if (str == NULL)
1475 return(-1);
1476
1477 cur = *str;
1478
1479 do {
1480 while (IS_PCHAR(cur)) NEXT(cur);
1481 if (*cur == ';') {
1482 cur++;
1483 while (IS_PCHAR(cur)) NEXT(cur);
1484 }
1485 if (*cur != '/') break;
1486 cur++;
1487 } while (1);
1488 if (uri != NULL) {
1489 int len, len2 = 0;
1490 char *path;
1491
1492 /*
1493 * Concat the set of path segments to the current path
1494 */
1495 len = cur - *str;
1496 if (slash)
1497 len++;
1498
1499 if (uri->path != NULL) {
1500 len2 = strlen(uri->path);
1501 len += len2;
1502 }
1503 path = (char *) xmlMalloc(len + 1);
1504 if (path == NULL) {
1505 xmlGenericError(xmlGenericErrorContext,
1506 "xmlParseURIPathSegments: out of memory\n");
1507 *str = cur;
1508 return(-1);
1509 }
1510 if (uri->path != NULL)
1511 memcpy(path, uri->path, len2);
1512 if (slash) {
1513 path[len2] = '/';
1514 len2++;
1515 }
1516 path[len2] = 0;
1517 if (cur - *str > 0)
1518 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1519 if (uri->path != NULL)
1520 xmlFree(uri->path);
1521 uri->path = path;
1522 }
1523 *str = cur;
1524 return(0);
1525}
1526
1527/**
1528 * xmlParseURIAuthority:
1529 * @uri: pointer to an URI structure
1530 * @str: pointer to the string to analyze
1531 *
1532 * Parse the authority part of an URI.
1533 *
1534 * authority = server | reg_name
1535 * server = [ [ userinfo "@" ] hostport ]
1536 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1537 * "@" | "&" | "=" | "+" )
1538 *
1539 * Note : this is completely ambiguous since reg_name is allowed to
1540 * use the full set of chars in use by server:
1541 *
1542 * 3.2.1. Registry-based Naming Authority
1543 *
1544 * The structure of a registry-based naming authority is specific
1545 * to the URI scheme, but constrained to the allowed characters
1546 * for an authority component.
1547 *
1548 * Returns 0 or the error code
1549 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001550static int
Owen Taylor3473f882001-02-23 17:55:21 +00001551xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1552 const char *cur;
1553 int ret;
1554
1555 if (str == NULL)
1556 return(-1);
1557
1558 cur = *str;
1559
1560 /*
1561 * try first to parse it as a server string.
1562 */
1563 ret = xmlParseURIServer(uri, str);
1564 if (ret == 0)
1565 return(0);
1566
1567 /*
1568 * failed, fallback to reg_name
1569 */
1570 if (!IS_REG_NAME(cur)) {
1571 return(5);
1572 }
1573 NEXT(cur);
1574 while (IS_REG_NAME(cur)) NEXT(cur);
1575 if (uri != NULL) {
1576 if (uri->server != NULL) xmlFree(uri->server);
1577 uri->server = NULL;
1578 if (uri->user != NULL) xmlFree(uri->user);
1579 uri->user = NULL;
1580 if (uri->authority != NULL) xmlFree(uri->authority);
1581 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1582 }
1583 *str = cur;
1584 return(0);
1585}
1586
1587/**
1588 * xmlParseURIHierPart:
1589 * @uri: pointer to an URI structure
1590 * @str: pointer to the string to analyze
1591 *
1592 * Parse an URI hirarchical part
1593 *
1594 * hier_part = ( net_path | abs_path ) [ "?" query ]
1595 * abs_path = "/" path_segments
1596 * net_path = "//" authority [ abs_path ]
1597 *
1598 * Returns 0 or the error code
1599 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001600static int
Owen Taylor3473f882001-02-23 17:55:21 +00001601xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1602 int ret;
1603 const char *cur;
1604
1605 if (str == NULL)
1606 return(-1);
1607
1608 cur = *str;
1609
1610 if ((cur[0] == '/') && (cur[1] == '/')) {
1611 cur += 2;
1612 ret = xmlParseURIAuthority(uri, &cur);
1613 if (ret != 0)
1614 return(ret);
1615 if (cur[0] == '/') {
1616 cur++;
1617 ret = xmlParseURIPathSegments(uri, &cur, 1);
1618 }
1619 } else if (cur[0] == '/') {
1620 cur++;
1621 ret = xmlParseURIPathSegments(uri, &cur, 1);
1622 } else {
1623 return(4);
1624 }
1625 if (ret != 0)
1626 return(ret);
1627 if (*cur == '?') {
1628 cur++;
1629 ret = xmlParseURIQuery(uri, &cur);
1630 if (ret != 0)
1631 return(ret);
1632 }
1633 *str = cur;
1634 return(0);
1635}
1636
1637/**
1638 * xmlParseAbsoluteURI:
1639 * @uri: pointer to an URI structure
1640 * @str: pointer to the string to analyze
1641 *
1642 * Parse an URI reference string and fills in the appropriate fields
1643 * of the @uri structure
1644 *
1645 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1646 *
1647 * Returns 0 or the error code
1648 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001649static int
Owen Taylor3473f882001-02-23 17:55:21 +00001650xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1651 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001652 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001653
1654 if (str == NULL)
1655 return(-1);
1656
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001657 cur = *str;
1658
Owen Taylor3473f882001-02-23 17:55:21 +00001659 ret = xmlParseURIScheme(uri, str);
1660 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001661 if (**str != ':') {
1662 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001663 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001664 }
Owen Taylor3473f882001-02-23 17:55:21 +00001665 (*str)++;
1666 if (**str == '/')
1667 return(xmlParseURIHierPart(uri, str));
1668 return(xmlParseURIOpaquePart(uri, str));
1669}
1670
1671/**
1672 * xmlParseRelativeURI:
1673 * @uri: pointer to an URI structure
1674 * @str: pointer to the string to analyze
1675 *
1676 * Parse an relative URI string and fills in the appropriate fields
1677 * of the @uri structure
1678 *
1679 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1680 * abs_path = "/" path_segments
1681 * net_path = "//" authority [ abs_path ]
1682 * rel_path = rel_segment [ abs_path ]
1683 *
1684 * Returns 0 or the error code
1685 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001686static int
Owen Taylor3473f882001-02-23 17:55:21 +00001687xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1688 int ret = 0;
1689 const char *cur;
1690
1691 if (str == NULL)
1692 return(-1);
1693
1694 cur = *str;
1695 if ((cur[0] == '/') && (cur[1] == '/')) {
1696 cur += 2;
1697 ret = xmlParseURIAuthority(uri, &cur);
1698 if (ret != 0)
1699 return(ret);
1700 if (cur[0] == '/') {
1701 cur++;
1702 ret = xmlParseURIPathSegments(uri, &cur, 1);
1703 }
1704 } else if (cur[0] == '/') {
1705 cur++;
1706 ret = xmlParseURIPathSegments(uri, &cur, 1);
1707 } else if (cur[0] != '#' && cur[0] != '?') {
1708 ret = xmlParseURIRelSegment(uri, &cur);
1709 if (ret != 0)
1710 return(ret);
1711 if (cur[0] == '/') {
1712 cur++;
1713 ret = xmlParseURIPathSegments(uri, &cur, 1);
1714 }
1715 }
1716 if (ret != 0)
1717 return(ret);
1718 if (*cur == '?') {
1719 cur++;
1720 ret = xmlParseURIQuery(uri, &cur);
1721 if (ret != 0)
1722 return(ret);
1723 }
1724 *str = cur;
1725 return(ret);
1726}
1727
1728/**
1729 * xmlParseURIReference:
1730 * @uri: pointer to an URI structure
1731 * @str: the string to analyze
1732 *
1733 * Parse an URI reference string and fills in the appropriate fields
1734 * of the @uri structure
1735 *
1736 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1737 *
1738 * Returns 0 or the error code
1739 */
1740int
1741xmlParseURIReference(xmlURIPtr uri, const char *str) {
1742 int ret;
1743 const char *tmp = str;
1744
1745 if (str == NULL)
1746 return(-1);
1747 xmlCleanURI(uri);
1748
1749 /*
1750 * Try first to parse aboslute refs, then fallback to relative if
1751 * it fails.
1752 */
1753 ret = xmlParseAbsoluteURI(uri, &str);
1754 if (ret != 0) {
1755 xmlCleanURI(uri);
1756 str = tmp;
1757 ret = xmlParseRelativeURI(uri, &str);
1758 }
1759 if (ret != 0) {
1760 xmlCleanURI(uri);
1761 return(ret);
1762 }
1763
1764 if (*str == '#') {
1765 str++;
1766 ret = xmlParseURIFragment(uri, &str);
1767 if (ret != 0) return(ret);
1768 }
1769 if (*str != 0) {
1770 xmlCleanURI(uri);
1771 return(1);
1772 }
1773 return(0);
1774}
1775
1776/**
1777 * xmlParseURI:
1778 * @str: the URI string to analyze
1779 *
1780 * Parse an URI
1781 *
1782 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1783 *
1784 * Returns a newly build xmlURIPtr or NULL in case of error
1785 */
1786xmlURIPtr
1787xmlParseURI(const char *str) {
1788 xmlURIPtr uri;
1789 int ret;
1790
1791 if (str == NULL)
1792 return(NULL);
1793 uri = xmlCreateURI();
1794 if (uri != NULL) {
1795 ret = xmlParseURIReference(uri, str);
1796 if (ret) {
1797 xmlFreeURI(uri);
1798 return(NULL);
1799 }
1800 }
1801 return(uri);
1802}
1803
1804/************************************************************************
1805 * *
1806 * Public functions *
1807 * *
1808 ************************************************************************/
1809
1810/**
1811 * xmlBuildURI:
1812 * @URI: the URI instance found in the document
1813 * @base: the base value
1814 *
1815 * Computes he final URI of the reference done by checking that
1816 * the given URI is valid, and building the final URI using the
1817 * base URI. This is processed according to section 5.2 of the
1818 * RFC 2396
1819 *
1820 * 5.2. Resolving Relative References to Absolute Form
1821 *
1822 * Returns a new URI string (to be freed by the caller) or NULL in case
1823 * of error.
1824 */
1825xmlChar *
1826xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1827 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001828 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlURIPtr ref = NULL;
1830 xmlURIPtr bas = NULL;
1831 xmlURIPtr res = NULL;
1832
1833 /*
1834 * 1) The URI reference is parsed into the potential four components and
1835 * fragment identifier, as described in Section 4.3.
1836 *
1837 * NOTE that a completely empty URI is treated by modern browsers
1838 * as a reference to "." rather than as a synonym for the current
1839 * URI. Should we do that here?
1840 */
1841 if (URI == NULL)
1842 ret = -1;
1843 else {
1844 if (*URI) {
1845 ref = xmlCreateURI();
1846 if (ref == NULL)
1847 goto done;
1848 ret = xmlParseURIReference(ref, (const char *) URI);
1849 }
1850 else
1851 ret = 0;
1852 }
1853 if (ret != 0)
1854 goto done;
1855 if (base == NULL)
1856 ret = -1;
1857 else {
1858 bas = xmlCreateURI();
1859 if (bas == NULL)
1860 goto done;
1861 ret = xmlParseURIReference(bas, (const char *) base);
1862 }
1863 if (ret != 0) {
1864 if (ref)
1865 val = xmlSaveUri(ref);
1866 goto done;
1867 }
1868 if (ref == NULL) {
1869 /*
1870 * the base fragment must be ignored
1871 */
1872 if (bas->fragment != NULL) {
1873 xmlFree(bas->fragment);
1874 bas->fragment = NULL;
1875 }
1876 val = xmlSaveUri(bas);
1877 goto done;
1878 }
1879
1880 /*
1881 * 2) If the path component is empty and the scheme, authority, and
1882 * query components are undefined, then it is a reference to the
1883 * current document and we are done. Otherwise, the reference URI's
1884 * query and fragment components are defined as found (or not found)
1885 * within the URI reference and not inherited from the base URI.
1886 *
1887 * NOTE that in modern browsers, the parsing differs from the above
1888 * in the following aspect: the query component is allowed to be
1889 * defined while still treating this as a reference to the current
1890 * document.
1891 */
1892 res = xmlCreateURI();
1893 if (res == NULL)
1894 goto done;
1895 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1896 ((ref->authority == NULL) && (ref->server == NULL))) {
1897 if (bas->scheme != NULL)
1898 res->scheme = xmlMemStrdup(bas->scheme);
1899 if (bas->authority != NULL)
1900 res->authority = xmlMemStrdup(bas->authority);
1901 else if (bas->server != NULL) {
1902 res->server = xmlMemStrdup(bas->server);
1903 if (bas->user != NULL)
1904 res->user = xmlMemStrdup(bas->user);
1905 res->port = bas->port;
1906 }
1907 if (bas->path != NULL)
1908 res->path = xmlMemStrdup(bas->path);
1909 if (ref->query != NULL)
1910 res->query = xmlMemStrdup(ref->query);
1911 else if (bas->query != NULL)
1912 res->query = xmlMemStrdup(bas->query);
1913 if (ref->fragment != NULL)
1914 res->fragment = xmlMemStrdup(ref->fragment);
1915 goto step_7;
1916 }
1917
1918 if (ref->query != NULL)
1919 res->query = xmlMemStrdup(ref->query);
1920 if (ref->fragment != NULL)
1921 res->fragment = xmlMemStrdup(ref->fragment);
1922
1923 /*
1924 * 3) If the scheme component is defined, indicating that the reference
1925 * starts with a scheme name, then the reference is interpreted as an
1926 * absolute URI and we are done. Otherwise, the reference URI's
1927 * scheme is inherited from the base URI's scheme component.
1928 */
1929 if (ref->scheme != NULL) {
1930 val = xmlSaveUri(ref);
1931 goto done;
1932 }
1933 if (bas->scheme != NULL)
1934 res->scheme = xmlMemStrdup(bas->scheme);
1935
1936 /*
1937 * 4) If the authority component is defined, then the reference is a
1938 * network-path and we skip to step 7. Otherwise, the reference
1939 * URI's authority is inherited from the base URI's authority
1940 * component, which will also be undefined if the URI scheme does not
1941 * use an authority component.
1942 */
1943 if ((ref->authority != NULL) || (ref->server != NULL)) {
1944 if (ref->authority != NULL)
1945 res->authority = xmlMemStrdup(ref->authority);
1946 else {
1947 res->server = xmlMemStrdup(ref->server);
1948 if (ref->user != NULL)
1949 res->user = xmlMemStrdup(ref->user);
1950 res->port = ref->port;
1951 }
1952 if (ref->path != NULL)
1953 res->path = xmlMemStrdup(ref->path);
1954 goto step_7;
1955 }
1956 if (bas->authority != NULL)
1957 res->authority = xmlMemStrdup(bas->authority);
1958 else if (bas->server != NULL) {
1959 res->server = xmlMemStrdup(bas->server);
1960 if (bas->user != NULL)
1961 res->user = xmlMemStrdup(bas->user);
1962 res->port = bas->port;
1963 }
1964
1965 /*
1966 * 5) If the path component begins with a slash character ("/"), then
1967 * the reference is an absolute-path and we skip to step 7.
1968 */
1969 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1970 res->path = xmlMemStrdup(ref->path);
1971 goto step_7;
1972 }
1973
1974
1975 /*
1976 * 6) If this step is reached, then we are resolving a relative-path
1977 * reference. The relative path needs to be merged with the base
1978 * URI's path. Although there are many ways to do this, we will
1979 * describe a simple method using a separate string buffer.
1980 *
1981 * Allocate a buffer large enough for the result string.
1982 */
1983 len = 2; /* extra / and 0 */
1984 if (ref->path != NULL)
1985 len += strlen(ref->path);
1986 if (bas->path != NULL)
1987 len += strlen(bas->path);
1988 res->path = (char *) xmlMalloc(len);
1989 if (res->path == NULL) {
1990 xmlGenericError(xmlGenericErrorContext,
1991 "xmlBuildURI: out of memory\n");
1992 goto done;
1993 }
1994 res->path[0] = 0;
1995
1996 /*
1997 * a) All but the last segment of the base URI's path component is
1998 * copied to the buffer. In other words, any characters after the
1999 * last (right-most) slash character, if any, are excluded.
2000 */
2001 cur = 0;
2002 out = 0;
2003 if (bas->path != NULL) {
2004 while (bas->path[cur] != 0) {
2005 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2006 cur++;
2007 if (bas->path[cur] == 0)
2008 break;
2009
2010 cur++;
2011 while (out < cur) {
2012 res->path[out] = bas->path[out];
2013 out++;
2014 }
2015 }
2016 }
2017 res->path[out] = 0;
2018
2019 /*
2020 * b) The reference's path component is appended to the buffer
2021 * string.
2022 */
2023 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002024 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002025 /*
2026 * Ensure the path includes a '/'
2027 */
2028 if ((out == 0) && (bas->server != NULL))
2029 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002030 while (ref->path[indx] != 0) {
2031 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002032 }
2033 }
2034 res->path[out] = 0;
2035
2036 /*
2037 * Steps c) to h) are really path normalization steps
2038 */
2039 xmlNormalizeURIPath(res->path);
2040
2041step_7:
2042
2043 /*
2044 * 7) The resulting URI components, including any inherited from the
2045 * base URI, are recombined to give the absolute form of the URI
2046 * reference.
2047 */
2048 val = xmlSaveUri(res);
2049
2050done:
2051 if (ref != NULL)
2052 xmlFreeURI(ref);
2053 if (bas != NULL)
2054 xmlFreeURI(bas);
2055 if (res != NULL)
2056 xmlFreeURI(res);
2057 return(val);
2058}
2059
2060