blob: b4554606e5684c56685f89cacae172b0259829e4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
8 * Daniel.Veillard@w3.org
9 */
10
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef WIN32
14#define INCLUDE_WINSOCK
Owen Taylor3473f882001-02-23 17:55:21 +000015#endif
16
Owen Taylor3473f882001-02-23 17:55:21 +000017#include <string.h>
18
19#include <libxml/xmlmemory.h>
20#include <libxml/uri.h>
21#include <libxml/xmlerror.h>
22
23/************************************************************************
24 * *
25 * Macros to differenciate various character type *
26 * directly extracted from RFC 2396 *
27 * *
28 ************************************************************************/
29
30/*
31 * alpha = lowalpha | upalpha
32 */
33#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
34
35
36/*
37 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
38 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
39 * "u" | "v" | "w" | "x" | "y" | "z"
40 */
41
42#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
43
44/*
45 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
46 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
47 * "U" | "V" | "W" | "X" | "Y" | "Z"
48 */
49#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
50
51/*
52 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
53 */
54
55#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
81 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
82 */
83
84#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
85 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
86 ((x) == '+') || ((x) == '$') || ((x) == ','))
87
88/*
89 * unreserved = alphanum | mark
90 */
91
92#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
93
94/*
95 * escaped = "%" hex hex
96 */
97
98#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
99 (IS_HEX((p)[2])))
100
101/*
102 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
103 * "&" | "=" | "+" | "$" | ","
104 */
105#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
106 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
107 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
108 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
109
110/*
111 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
112 */
113#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
114 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
115 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
116 ((*(p) == ',')))
117
118/*
119 * rel_segment = 1*( unreserved | escaped |
120 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
121 */
122
123#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
124 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
125 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
126 ((*(p) == ',')))
127
128/*
129 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
130 */
131
132#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
133 ((x) == '+') || ((x) == '-') || ((x) == '.'))
134
135/*
136 * reg_name = 1*( unreserved | escaped | "$" | "," |
137 * ";" | ":" | "@" | "&" | "=" | "+" )
138 */
139
140#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
141 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
142 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
143 ((*(p) == '=')) || ((*(p) == '+')))
144
145/*
146 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
147 * "+" | "$" | "," )
148 */
149#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
150 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
151 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
152 ((*(p) == ',')))
153
154/*
155 * uric = reserved | unreserved | escaped
156 */
157
158#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
159 (IS_RESERVED(*(p))))
160
161/*
162 * Skip to next pointer char, handle escaped sequences
163 */
164
165#define NEXT(p) ((*p == '%')? p += 3 : p++)
166
167/*
168 * Productions from the spec.
169 *
170 * authority = server | reg_name
171 * reg_name = 1*( unreserved | escaped | "$" | "," |
172 * ";" | ":" | "@" | "&" | "=" | "+" )
173 *
174 * path = [ abs_path | opaque_part ]
175 */
176
177/************************************************************************
178 * *
179 * Generic URI structure functions *
180 * *
181 ************************************************************************/
182
183/**
184 * xmlCreateURI:
185 *
186 * Simply creates an empty xmlURI
187 *
188 * Returns the new structure or NULL in case of error
189 */
190xmlURIPtr
191xmlCreateURI(void) {
192 xmlURIPtr ret;
193
194 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
195 if (ret == NULL) {
196 xmlGenericError(xmlGenericErrorContext,
197 "xmlCreateURI: out of memory\n");
198 return(NULL);
199 }
200 memset(ret, 0, sizeof(xmlURI));
201 return(ret);
202}
203
204/**
205 * xmlSaveUri:
206 * @uri: pointer to an xmlURI
207 *
208 * Save the URI as an escaped string
209 *
210 * Returns a new string (to be deallocated by caller)
211 */
212xmlChar *
213xmlSaveUri(xmlURIPtr uri) {
214 xmlChar *ret = NULL;
215 const char *p;
216 int len;
217 int max;
218
219 if (uri == NULL) return(NULL);
220
221
222 max = 80;
223 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
224 if (ret == NULL) {
225 xmlGenericError(xmlGenericErrorContext,
226 "xmlSaveUri: out of memory\n");
227 return(NULL);
228 }
229 len = 0;
230
231 if (uri->scheme != NULL) {
232 p = uri->scheme;
233 while (*p != 0) {
234 if (len >= max) {
235 max *= 2;
236 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 }
243 ret[len++] = *p++;
244 }
245 if (len >= max) {
246 max *= 2;
247 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
248 if (ret == NULL) {
249 xmlGenericError(xmlGenericErrorContext,
250 "xmlSaveUri: out of memory\n");
251 return(NULL);
252 }
253 }
254 ret[len++] = ':';
255 }
256 if (uri->opaque != NULL) {
257 p = uri->opaque;
258 while (*p != 0) {
259 if (len + 3 >= max) {
260 max *= 2;
261 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
262 if (ret == NULL) {
263 xmlGenericError(xmlGenericErrorContext,
264 "xmlSaveUri: out of memory\n");
265 return(NULL);
266 }
267 }
268 if ((IS_UNRESERVED(*(p))) ||
269 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
270 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
271 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
272 ret[len++] = *p++;
273 else {
274 int val = *(unsigned char *)p++;
275 int hi = val / 0x10, lo = val % 0x10;
276 ret[len++] = '%';
277 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
278 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
279 }
280 }
281 if (len >= max) {
282 max *= 2;
283 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
284 if (ret == NULL) {
285 xmlGenericError(xmlGenericErrorContext,
286 "xmlSaveUri: out of memory\n");
287 return(NULL);
288 }
289 }
290 ret[len++] = 0;
291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
368 len += sprintf((char *) &ret[len], ":%d", uri->port);
369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
429 if (ret == NULL) {
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
432 return(NULL);
433 }
434 }
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
438 ((*(p) == ',')))
439 ret[len++] = *p++;
440 else {
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
443 ret[len++] = '%';
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446 }
447 }
448 }
449 if (uri->query != NULL) {
450 if (len + 3 >= max) {
451 max *= 2;
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
454 if (ret == NULL) {
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
457 return(NULL);
458 }
459 }
460 ret[len++] = '?';
461 p = uri->query;
462 while (*p != 0) {
463 if (len + 3 >= max) {
464 max *= 2;
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
467 if (ret == NULL) {
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
470 return(NULL);
471 }
472 }
473 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
474 ret[len++] = *p++;
475 else {
476 int val = *(unsigned char *)p++;
477 int hi = val / 0x10, lo = val % 0x10;
478 ret[len++] = '%';
479 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
480 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
481 }
482 }
483 }
484 if (uri->fragment != NULL) {
485 if (len + 3 >= max) {
486 max *= 2;
487 ret = (xmlChar *) xmlRealloc(ret,
488 (max + 1) * sizeof(xmlChar));
489 if (ret == NULL) {
490 xmlGenericError(xmlGenericErrorContext,
491 "xmlSaveUri: out of memory\n");
492 return(NULL);
493 }
494 }
495 ret[len++] = '#';
496 p = uri->fragment;
497 while (*p != 0) {
498 if (len + 3 >= max) {
499 max *= 2;
500 ret = (xmlChar *) xmlRealloc(ret,
501 (max + 1) * sizeof(xmlChar));
502 if (ret == NULL) {
503 xmlGenericError(xmlGenericErrorContext,
504 "xmlSaveUri: out of memory\n");
505 return(NULL);
506 }
507 }
508 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
509 ret[len++] = *p++;
510 else {
511 int val = *(unsigned char *)p++;
512 int hi = val / 0x10, lo = val % 0x10;
513 ret[len++] = '%';
514 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
515 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
516 }
517 }
518 }
519 if (len >= max) {
520 max *= 2;
521 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
522 if (ret == NULL) {
523 xmlGenericError(xmlGenericErrorContext,
524 "xmlSaveUri: out of memory\n");
525 return(NULL);
526 }
527 }
528 ret[len++] = 0;
529 }
530 return(ret);
531}
532
533/**
534 * xmlPrintURI:
535 * @stream: a FILE* for the output
536 * @uri: pointer to an xmlURI
537 *
538 * Prints the URI in the stream @steam.
539 */
540void
541xmlPrintURI(FILE *stream, xmlURIPtr uri) {
542 xmlChar *out;
543
544 out = xmlSaveUri(uri);
545 if (out != NULL) {
546 fprintf(stream, "%s", out);
547 xmlFree(out);
548 }
549}
550
551/**
552 * xmlCleanURI:
553 * @uri: pointer to an xmlURI
554 *
555 * Make sure the xmlURI struct is free of content
556 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000557static void
Owen Taylor3473f882001-02-23 17:55:21 +0000558xmlCleanURI(xmlURIPtr uri) {
559 if (uri == NULL) return;
560
561 if (uri->scheme != NULL) xmlFree(uri->scheme);
562 uri->scheme = NULL;
563 if (uri->server != NULL) xmlFree(uri->server);
564 uri->server = NULL;
565 if (uri->user != NULL) xmlFree(uri->user);
566 uri->user = NULL;
567 if (uri->path != NULL) xmlFree(uri->path);
568 uri->path = NULL;
569 if (uri->fragment != NULL) xmlFree(uri->fragment);
570 uri->fragment = NULL;
571 if (uri->opaque != NULL) xmlFree(uri->opaque);
572 uri->opaque = NULL;
573 if (uri->authority != NULL) xmlFree(uri->authority);
574 uri->authority = NULL;
575 if (uri->query != NULL) xmlFree(uri->query);
576 uri->query = NULL;
577}
578
579/**
580 * xmlFreeURI:
581 * @uri: pointer to an xmlURI
582 *
583 * Free up the xmlURI struct
584 */
585void
586xmlFreeURI(xmlURIPtr uri) {
587 if (uri == NULL) return;
588
589 if (uri->scheme != NULL) xmlFree(uri->scheme);
590 if (uri->server != NULL) xmlFree(uri->server);
591 if (uri->user != NULL) xmlFree(uri->user);
592 if (uri->path != NULL) xmlFree(uri->path);
593 if (uri->fragment != NULL) xmlFree(uri->fragment);
594 if (uri->opaque != NULL) xmlFree(uri->opaque);
595 if (uri->authority != NULL) xmlFree(uri->authority);
596 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000597 xmlFree(uri);
598}
599
600/************************************************************************
601 * *
602 * Helper functions *
603 * *
604 ************************************************************************/
605
606#if 0
607/**
608 * xmlNormalizeURIPath:
609 * @path: pointer to the path string
610 *
611 * applies the 5 normalization steps to a path string
612 * Normalization occurs directly on the string, no new allocation is done
613 *
614 * Returns 0 or an error code
615 */
616int
617xmlNormalizeURIPath(char *path) {
618 int cur, out;
619
620 if (path == NULL)
621 return(-1);
622 cur = 0;
623 out = 0;
624 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
625 if (path[cur] == 0)
626 return(0);
627
628 /* we are positionned at the beginning of the first segment */
629 cur++;
630 out = cur;
631
632 /*
633 * Analyze each segment in sequence.
634 */
635 while (path[cur] != 0) {
636 /*
637 * c) All occurrences of "./", where "." is a complete path segment,
638 * are removed from the buffer string.
639 */
640 if ((path[cur] == '.') && (path[cur + 1] == '/')) {
641 cur += 2;
642 if (path[cur] == 0) {
643 path[out++] = 0;
644 }
645 continue;
646 }
647
648 /*
649 * d) If the buffer string ends with "." as a complete path segment,
650 * that "." is removed.
651 */
652 if ((path[cur] == '.') && (path[cur + 1] == 0)) {
653 path[out] = 0;
654 break;
655 }
656
657 /* read the segment */
658 while ((path[cur] != 0) && (path[cur] != '/')) {
659 path[out++] = path[cur++];
660 }
661 path[out++] = path[cur];
662 if (path[cur] != 0) {
663 cur++;
664 }
665 }
666
667 cur = 0;
668 out = 0;
669 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
670 if (path[cur] == 0)
671 return(0);
672 /* we are positionned at the beginning of the first segment */
673 cur++;
674 out = cur;
675 /*
676 * Analyze each segment in sequence.
677 */
678 while (path[cur] != 0) {
679 /*
680 * e) All occurrences of "<segment>/../", where <segment> is a
681 * complete path segment not equal to "..", are removed from the
682 * buffer string. Removal of these path segments is performed
683 * iteratively, removing the leftmost matching pattern on each
684 * iteration, until no matching pattern remains.
685 */
686 if ((cur > 1) && (out > 1) &&
687 (path[cur] == '/') && (path[cur + 1] == '.') &&
688 (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
689 ((path[out] != '.') || (path[out - 1] != '.') ||
690 (path[out - 2] != '/'))) {
691 cur += 3;
692 out --;
693 while ((out > 0) && (path[out] != '/')) { out --; }
694 path[out] = 0;
695 continue;
696 }
697
698 /*
699 * f) If the buffer string ends with "<segment>/..", where <segment>
700 * is a complete path segment not equal to "..", that
701 * "<segment>/.." is removed.
702 */
703 if ((path[cur] == '/') && (path[cur + 1] == '.') &&
704 (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
705 ((path[out] != '.') || (path[out - 1] != '.') ||
706 (path[out - 2] != '/'))) {
707 cur += 4;
708 out --;
709 while ((out > 0) && (path[out - 1] != '/')) { out --; }
710 path[out] = 0;
711 continue;
712 }
713
714 path[out++] = path[cur++]; /* / or 0 */
715 }
716 path[out] = 0;
717
718 /*
719 * g) If the resulting buffer string still begins with one or more
720 * complete path segments of "..", then the reference is
721 * considered to be in error. Implementations may handle this
722 * error by retaining these components in the resolved path (i.e.,
723 * treating them as part of the final URI), by removing them from
724 * the resolved path (i.e., discarding relative levels above the
725 * root), or by avoiding traversal of the reference.
726 *
727 * We discard them from the final path.
728 */
729 cur = 0;
730 while ((path[cur] == '/') && (path[cur + 1] == '.') &&
731 (path[cur + 2] == '.'))
732 cur += 3;
733 if (cur != 0) {
734 out = 0;
735 while (path[cur] != 0) path[out++] = path[cur++];
736 path[out] = 0;
737 }
738 return(0);
739}
740#else
741/**
742 * xmlNormalizeURIPath:
743 * @path: pointer to the path string
744 *
745 * Applies the 5 normalization steps to a path string--that is, RFC 2396
746 * Section 5.2, steps 6.c through 6.g.
747 *
748 * Normalization occurs directly on the string, no new allocation is done
749 *
750 * Returns 0 or an error code
751 */
752int
753xmlNormalizeURIPath(char *path) {
754 char *cur, *out;
755
756 if (path == NULL)
757 return(-1);
758
759 /* Skip all initial "/" chars. We want to get to the beginning of the
760 * first non-empty segment.
761 */
762 cur = path;
763 while (cur[0] == '/')
764 ++cur;
765 if (cur[0] == '\0')
766 return(0);
767
768 /* Keep everything we've seen so far. */
769 out = cur;
770
771 /*
772 * Analyze each segment in sequence for cases (c) and (d).
773 */
774 while (cur[0] != '\0') {
775 /*
776 * c) All occurrences of "./", where "." is a complete path segment,
777 * are removed from the buffer string.
778 */
779 if ((cur[0] == '.') && (cur[1] == '/')) {
780 cur += 2;
781 continue;
782 }
783
784 /*
785 * d) If the buffer string ends with "." as a complete path segment,
786 * that "." is removed.
787 */
788 if ((cur[0] == '.') && (cur[1] == '\0'))
789 break;
790
791 /* Otherwise keep the segment. */
792 while (cur[0] != '/') {
793 if (cur[0] == '\0')
794 goto done_cd;
795 (out++)[0] = (cur++)[0];
796 }
797 (out++)[0] = (cur++)[0];
798 }
799 done_cd:
800 out[0] = '\0';
801
802 /* Reset to the beginning of the first segment for the next sequence. */
803 cur = path;
804 while (cur[0] == '/')
805 ++cur;
806 if (cur[0] == '\0')
807 return(0);
808
809 /*
810 * Analyze each segment in sequence for cases (e) and (f).
811 *
812 * e) All occurrences of "<segment>/../", where <segment> is a
813 * complete path segment not equal to "..", are removed from the
814 * buffer string. Removal of these path segments is performed
815 * iteratively, removing the leftmost matching pattern on each
816 * iteration, until no matching pattern remains.
817 *
818 * f) If the buffer string ends with "<segment>/..", where <segment>
819 * is a complete path segment not equal to "..", that
820 * "<segment>/.." is removed.
821 *
822 * To satisfy the "iterative" clause in (e), we need to collapse the
823 * string every time we find something that needs to be removed. Thus,
824 * we don't need to keep two pointers into the string: we only need a
825 * "current position" pointer.
826 */
827 while (1) {
828 char *segp;
829
830 /* At the beginning of each iteration of this loop, "cur" points to
831 * the first character of the segment we want to examine.
832 */
833
834 /* Find the end of the current segment. */
835 segp = cur;
836 while ((segp[0] != '/') && (segp[0] != '\0'))
837 ++segp;
838
839 /* If this is the last segment, we're done (we need at least two
840 * segments to meet the criteria for the (e) and (f) cases).
841 */
842 if (segp[0] == '\0')
843 break;
844
845 /* If the first segment is "..", or if the next segment _isn't_ "..",
846 * keep this segment and try the next one.
847 */
848 ++segp;
849 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
850 || ((segp[0] != '.') || (segp[1] != '.')
851 || ((segp[2] != '/') && (segp[2] != '\0')))) {
852 cur = segp;
853 continue;
854 }
855
856 /* If we get here, remove this segment and the next one and back up
857 * to the previous segment (if there is one), to implement the
858 * "iteratively" clause. It's pretty much impossible to back up
859 * while maintaining two pointers into the buffer, so just compact
860 * the whole buffer now.
861 */
862
863 /* If this is the end of the buffer, we're done. */
864 if (segp[2] == '\0') {
865 cur[0] = '\0';
866 break;
867 }
868 strcpy(cur, segp + 3);
869
870 /* If there are no previous segments, then keep going from here. */
871 segp = cur;
872 while ((segp > path) && ((--segp)[0] == '/'))
873 ;
874 if (segp == path)
875 continue;
876
877 /* "segp" is pointing to the end of a previous segment; find it's
878 * start. We need to back up to the previous segment and start
879 * over with that to handle things like "foo/bar/../..". If we
880 * don't do this, then on the first pass we'll remove the "bar/..",
881 * but be pointing at the second ".." so we won't realize we can also
882 * remove the "foo/..".
883 */
884 cur = segp;
885 while ((cur > path) && (cur[-1] != '/'))
886 --cur;
887 }
888 out[0] = '\0';
889
890 /*
891 * g) If the resulting buffer string still begins with one or more
892 * complete path segments of "..", then the reference is
893 * considered to be in error. Implementations may handle this
894 * error by retaining these components in the resolved path (i.e.,
895 * treating them as part of the final URI), by removing them from
896 * the resolved path (i.e., discarding relative levels above the
897 * root), or by avoiding traversal of the reference.
898 *
899 * We discard them from the final path.
900 */
901 if (path[0] == '/') {
902 cur = path;
903 while ((cur[1] == '.') && (cur[2] == '.')
904 && ((cur[3] == '/') || (cur[3] == '\0')))
905 cur += 3;
906
907 if (cur != path) {
908 out = path;
909 while (cur[0] != '\0')
910 (out++)[0] = (cur++)[0];
911 out[0] = 0;
912 }
913 }
914
915 return(0);
916}
917#endif
918
919/**
920 * xmlURIUnescapeString:
921 * @str: the string to unescape
922 * @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
923 * @target: optionnal destination buffer
924 *
925 * Unescaping routine, does not do validity checks !
926 * Output is direct unsigned char translation of %XX values (no encoding)
927 *
928 * Returns an copy of the string, but unescaped
929 */
930char *
931xmlURIUnescapeString(const char *str, int len, char *target) {
932 char *ret, *out;
933 const char *in;
934
935 if (str == NULL)
936 return(NULL);
937 if (len <= 0) len = strlen(str);
938 if (len <= 0) return(NULL);
939
940 if (target == NULL) {
941 ret = (char *) xmlMalloc(len + 1);
942 if (ret == NULL) {
943 xmlGenericError(xmlGenericErrorContext,
944 "xmlURIUnescapeString: out of memory\n");
945 return(NULL);
946 }
947 } else
948 ret = target;
949 in = str;
950 out = ret;
951 while(len > 0) {
952 if (*in == '%') {
953 in++;
954 if ((*in >= '0') && (*in <= '9'))
955 *out = (*in - '0');
956 else if ((*in >= 'a') && (*in <= 'f'))
957 *out = (*in - 'a') + 10;
958 else if ((*in >= 'A') && (*in <= 'F'))
959 *out = (*in - 'A') + 10;
960 in++;
961 if ((*in >= '0') && (*in <= '9'))
962 *out = *out * 16 + (*in - '0');
963 else if ((*in >= 'a') && (*in <= 'f'))
964 *out = *out * 16 + (*in - 'a') + 10;
965 else if ((*in >= 'A') && (*in <= 'F'))
966 *out = *out * 16 + (*in - 'A') + 10;
967 in++;
968 len -= 3;
969 out++;
970 } else {
971 *out++ = *in++;
972 len--;
973 }
974 }
975 *out = 0;
976 return(ret);
977}
978
979/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000980 * xmlURIEscapeStr:
981 * @str: string to escape
982 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000983 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000984 * This routine escapes a string to hex, ignoring reserved characters (a-z)
985 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000986 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000987 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000988 */
989xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000990xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
991 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000992 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000993
Owen Taylor3473f882001-02-23 17:55:21 +0000994 unsigned int len, out;
995
996 if (str == NULL)
997 return(NULL);
998 len = xmlStrlen(str);
999 if (len <= 0) return(NULL);
1000
1001 len += 20;
1002 ret = (xmlChar *) xmlMalloc(len);
1003 if (ret == NULL) {
1004 xmlGenericError(xmlGenericErrorContext,
1005 "xmlURIEscape: out of memory\n");
1006 return(NULL);
1007 }
1008 in = (const xmlChar *) str;
1009 out = 0;
1010 while(*in != 0) {
1011 if (len - out <= 3) {
1012 len += 20;
1013 ret = (xmlChar *) xmlRealloc(ret, len);
1014 if (ret == NULL) {
1015 xmlGenericError(xmlGenericErrorContext,
1016 "xmlURIEscape: out of memory\n");
1017 return(NULL);
1018 }
1019 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001020
1021 ch = *in;
1022
1023 if ( (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch)) ) {
Owen Taylor3473f882001-02-23 17:55:21 +00001024 unsigned char val;
1025 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +00001026 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +00001027 if (val <= 9)
1028 ret[out++] = '0' + val;
1029 else
1030 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +00001031 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +00001032 if (val <= 9)
1033 ret[out++] = '0' + val;
1034 else
1035 ret[out++] = 'A' + val - 0xA;
1036 in++;
1037 } else {
1038 ret[out++] = *in++;
1039 }
Daniel Veillard8514c672001-05-23 10:29:12 +00001040
Owen Taylor3473f882001-02-23 17:55:21 +00001041 }
1042 ret[out] = 0;
1043 return(ret);
1044}
1045
Daniel Veillard8514c672001-05-23 10:29:12 +00001046/**
1047 * xmlURIEscape:
1048 * @str: the string of the URI to escape
1049 *
1050 * Escaping routine, does not do validity checks !
1051 * It will try to escape the chars needing this, but this is heuristic
1052 * based it's impossible to be sure.
1053 *
Daniel Veillard8514c672001-05-23 10:29:12 +00001054 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +00001055 *
1056 * 25 May 2001
1057 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1058 * according to RFC2396.
1059 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001060 */
1061xmlChar *
1062xmlURIEscape(const xmlChar *str) {
Daniel Veillard6278fb52001-05-25 07:38:41 +00001063 xmlChar *ret, *segment = NULL;
1064 xmlURIPtr uri;
Daniel Veillard8514c672001-05-23 10:29:12 +00001065
Daniel Veillard6278fb52001-05-25 07:38:41 +00001066#define NULLCHK(p) if(!p) { \
1067 xmlGenericError(xmlGenericErrorContext, \
1068 "xmlURIEscape: out of memory\n"); \
1069 return NULL; }
1070
1071 uri = xmlParseURI( (const char *) str);
1072
1073 if(!uri)
1074 return NULL;
1075
1076 ret = NULL;
1077
1078 if(uri->scheme) {
1079 segment = xmlURIEscapeStr( BAD_CAST uri->scheme, BAD_CAST "+-.");
1080 NULLCHK(segment)
1081 xmlStrcat(ret, segment);
1082 xmlStrcat(ret, BAD_CAST ":");
1083 xmlFree(segment);
1084 }
1085
1086 if(uri->authority) {
1087 segment = xmlURIEscapeStr( BAD_CAST uri->authority, BAD_CAST "/?;:@");
1088 NULLCHK(segment)
1089 xmlStrcat(ret, BAD_CAST "//");
1090 xmlStrcat(ret, segment);
1091 xmlFree(segment);
1092 }
1093
1094 if(uri->user) {
1095 segment = xmlURIEscapeStr( BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1096 NULLCHK(segment)
1097 xmlStrcat(ret, segment);
1098 xmlStrcat(ret, BAD_CAST "@");
1099 xmlFree(segment);
1100 }
1101
1102 if(uri->server) {
1103 segment = xmlURIEscapeStr( BAD_CAST uri->server, BAD_CAST "/?;:@");
1104 NULLCHK(segment)
1105 xmlStrcat(ret, BAD_CAST "//");
1106 xmlStrcat(ret, segment);
1107 xmlFree(segment);
1108 }
1109
1110 if(uri->port) {
1111 xmlChar port[10];
1112 snprintf(segment, 10, "%d", uri->port);
1113 xmlStrcat(ret, BAD_CAST ":");
1114 xmlStrcat(ret, port);
1115 xmlFree(segment);
1116 }
1117
1118 if(uri->path) {
1119 segment = xmlURIEscapeStr( BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1120 NULLCHK(segment)
1121 xmlStrcat(ret, segment);
1122 xmlFree(segment);
1123 }
1124
1125 if(uri->query) {
1126 segment = xmlURIEscapeStr( BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1127 NULLCHK(segment)
1128 xmlStrcat(ret, BAD_CAST "?");
1129 xmlStrcat(ret, segment);
1130 xmlFree(segment);
1131 }
1132
1133 if(uri->opaque) {
1134 segment = xmlURIEscapeStr( BAD_CAST uri->opaque, BAD_CAST "");
1135 NULLCHK(segment)
1136 xmlStrcat(ret, segment);
1137 xmlStrcat(ret, BAD_CAST ":");
1138 xmlFree(segment);
1139 }
1140
1141 if(uri->fragment) {
1142 segment = xmlURIEscapeStr( BAD_CAST uri->fragment, BAD_CAST "#");
1143 NULLCHK(segment)
1144 xmlStrcat(ret, BAD_CAST "#");
1145 xmlStrcat(ret, segment);
1146 xmlFree(segment);
1147 }
1148
1149#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001150
1151 return(ret);
1152}
1153
Owen Taylor3473f882001-02-23 17:55:21 +00001154/************************************************************************
1155 * *
1156 * Escaped URI parsing *
1157 * *
1158 ************************************************************************/
1159
1160/**
1161 * xmlParseURIFragment:
1162 * @uri: pointer to an URI structure
1163 * @str: pointer to the string to analyze
1164 *
1165 * Parse an URI fragment string and fills in the appropriate fields
1166 * of the @uri structure.
1167 *
1168 * fragment = *uric
1169 *
1170 * Returns 0 or the error code
1171 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001172static int
Owen Taylor3473f882001-02-23 17:55:21 +00001173xmlParseURIFragment(xmlURIPtr uri, const char **str) {
1174 const char *cur = *str;
1175
1176 if (str == NULL) return(-1);
1177
1178 while (IS_URIC(cur)) NEXT(cur);
1179 if (uri != NULL) {
1180 if (uri->fragment != NULL) xmlFree(uri->fragment);
1181 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1182 }
1183 *str = cur;
1184 return(0);
1185}
1186
1187/**
1188 * xmlParseURIQuery:
1189 * @uri: pointer to an URI structure
1190 * @str: pointer to the string to analyze
1191 *
1192 * Parse the query part of an URI
1193 *
1194 * query = *uric
1195 *
1196 * Returns 0 or the error code
1197 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001198static int
Owen Taylor3473f882001-02-23 17:55:21 +00001199xmlParseURIQuery(xmlURIPtr uri, const char **str) {
1200 const char *cur = *str;
1201
1202 if (str == NULL) return(-1);
1203
1204 while (IS_URIC(cur)) NEXT(cur);
1205 if (uri != NULL) {
1206 if (uri->query != NULL) xmlFree(uri->query);
1207 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1208 }
1209 *str = cur;
1210 return(0);
1211}
1212
1213/**
1214 * xmlParseURIScheme:
1215 * @uri: pointer to an URI structure
1216 * @str: pointer to the string to analyze
1217 *
1218 * Parse an URI scheme
1219 *
1220 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1221 *
1222 * Returns 0 or the error code
1223 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001224static int
Owen Taylor3473f882001-02-23 17:55:21 +00001225xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1226 const char *cur;
1227
1228 if (str == NULL)
1229 return(-1);
1230
1231 cur = *str;
1232 if (!IS_ALPHA(*cur))
1233 return(2);
1234 cur++;
1235 while (IS_SCHEME(*cur)) cur++;
1236 if (uri != NULL) {
1237 if (uri->scheme != NULL) xmlFree(uri->scheme);
1238 /* !!! strndup */
1239 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1240 }
1241 *str = cur;
1242 return(0);
1243}
1244
1245/**
1246 * xmlParseURIOpaquePart:
1247 * @uri: pointer to an URI structure
1248 * @str: pointer to the string to analyze
1249 *
1250 * Parse an URI opaque part
1251 *
1252 * opaque_part = uric_no_slash *uric
1253 *
1254 * Returns 0 or the error code
1255 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001256static int
Owen Taylor3473f882001-02-23 17:55:21 +00001257xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
1258 const char *cur;
1259
1260 if (str == NULL)
1261 return(-1);
1262
1263 cur = *str;
1264 if (!IS_URIC_NO_SLASH(cur)) {
1265 return(3);
1266 }
1267 NEXT(cur);
1268 while (IS_URIC(cur)) NEXT(cur);
1269 if (uri != NULL) {
1270 if (uri->opaque != NULL) xmlFree(uri->opaque);
1271 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1272 }
1273 *str = cur;
1274 return(0);
1275}
1276
1277/**
1278 * xmlParseURIServer:
1279 * @uri: pointer to an URI structure
1280 * @str: pointer to the string to analyze
1281 *
1282 * Parse a server subpart of an URI, it's a finer grain analysis
1283 * of the authority part.
1284 *
1285 * server = [ [ userinfo "@" ] hostport ]
1286 * userinfo = *( unreserved | escaped |
1287 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1288 * hostport = host [ ":" port ]
1289 * host = hostname | IPv4address
1290 * hostname = *( domainlabel "." ) toplabel [ "." ]
1291 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1292 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1293 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1294 * port = *digit
1295 *
1296 * Returns 0 or the error code
1297 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001298static int
Owen Taylor3473f882001-02-23 17:55:21 +00001299xmlParseURIServer(xmlURIPtr uri, const char **str) {
1300 const char *cur;
1301 const char *host, *tmp;
1302
1303 if (str == NULL)
1304 return(-1);
1305
1306 cur = *str;
1307
1308 /*
1309 * is there an userinfo ?
1310 */
1311 while (IS_USERINFO(cur)) NEXT(cur);
1312 if (*cur == '@') {
1313 if (uri != NULL) {
1314 if (uri->user != NULL) xmlFree(uri->user);
1315 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1316 }
1317 cur++;
1318 } else {
1319 if (uri != NULL) {
1320 if (uri->user != NULL) xmlFree(uri->user);
1321 uri->user = NULL;
1322 }
1323 cur = *str;
1324 }
1325 /*
1326 * This can be empty in the case where there is no server
1327 */
1328 host = cur;
1329 if (*cur == '/') {
1330 if (uri != NULL) {
1331 if (uri->authority != NULL) xmlFree(uri->authority);
1332 uri->authority = NULL;
1333 if (uri->server != NULL) xmlFree(uri->server);
1334 uri->server = NULL;
1335 uri->port = 0;
1336 }
1337 return(0);
1338 }
1339 /*
1340 * host part of hostport can derive either an IPV4 address
1341 * or an unresolved name. Check the IP first, it easier to detect
1342 * errors if wrong one
1343 */
1344 if (IS_DIGIT(*cur)) {
1345 while(IS_DIGIT(*cur)) cur++;
1346 if (*cur != '.')
1347 goto host_name;
1348 cur++;
1349 if (!IS_DIGIT(*cur))
1350 goto host_name;
1351 while(IS_DIGIT(*cur)) cur++;
1352 if (*cur != '.')
1353 goto host_name;
1354 cur++;
1355 if (!IS_DIGIT(*cur))
1356 goto host_name;
1357 while(IS_DIGIT(*cur)) cur++;
1358 if (*cur != '.')
1359 goto host_name;
1360 cur++;
1361 if (!IS_DIGIT(*cur))
1362 goto host_name;
1363 while(IS_DIGIT(*cur)) cur++;
1364 if (uri != NULL) {
1365 if (uri->authority != NULL) xmlFree(uri->authority);
1366 uri->authority = NULL;
1367 if (uri->server != NULL) xmlFree(uri->server);
1368 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1369 }
1370 goto host_done;
1371 }
1372host_name:
1373 /*
1374 * the hostname production as-is is a parser nightmare.
1375 * simplify it to
1376 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1377 * and just make sure the last label starts with a non numeric char.
1378 */
1379 if (!IS_ALPHANUM(*cur))
1380 return(6);
1381 while (IS_ALPHANUM(*cur)) {
1382 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1383 if (*cur == '.')
1384 cur++;
1385 }
1386 tmp = cur;
1387 tmp--;
1388 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1389 tmp++;
1390 if (!IS_ALPHA(*tmp))
1391 return(7);
1392 if (uri != NULL) {
1393 if (uri->authority != NULL) xmlFree(uri->authority);
1394 uri->authority = NULL;
1395 if (uri->server != NULL) xmlFree(uri->server);
1396 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1397 }
1398
1399host_done:
1400
1401 /*
1402 * finish by checking for a port presence.
1403 */
1404 if (*cur == ':') {
1405 cur++;
1406 if (IS_DIGIT(*cur)) {
1407 if (uri != NULL)
1408 uri->port = 0;
1409 while (IS_DIGIT(*cur)) {
1410 if (uri != NULL)
1411 uri->port = uri->port * 10 + (*cur - '0');
1412 cur++;
1413 }
1414 }
1415 }
1416 *str = cur;
1417 return(0);
1418}
1419
1420/**
1421 * xmlParseURIRelSegment:
1422 * @uri: pointer to an URI structure
1423 * @str: pointer to the string to analyze
1424 *
1425 * Parse an URI relative segment
1426 *
1427 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1428 * "+" | "$" | "," )
1429 *
1430 * Returns 0 or the error code
1431 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001432static int
Owen Taylor3473f882001-02-23 17:55:21 +00001433xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
1434 const char *cur;
1435
1436 if (str == NULL)
1437 return(-1);
1438
1439 cur = *str;
1440 if (!IS_SEGMENT(cur)) {
1441 return(3);
1442 }
1443 NEXT(cur);
1444 while (IS_SEGMENT(cur)) NEXT(cur);
1445 if (uri != NULL) {
1446 if (uri->path != NULL) xmlFree(uri->path);
1447 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1448 }
1449 *str = cur;
1450 return(0);
1451}
1452
1453/**
1454 * xmlParseURIPathSegments:
1455 * @uri: pointer to an URI structure
1456 * @str: pointer to the string to analyze
1457 * @slash: should we add a leading slash
1458 *
1459 * Parse an URI set of path segments
1460 *
1461 * path_segments = segment *( "/" segment )
1462 * segment = *pchar *( ";" param )
1463 * param = *pchar
1464 *
1465 * Returns 0 or the error code
1466 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001467static int
Owen Taylor3473f882001-02-23 17:55:21 +00001468xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
1469 const char *cur;
1470
1471 if (str == NULL)
1472 return(-1);
1473
1474 cur = *str;
1475
1476 do {
1477 while (IS_PCHAR(cur)) NEXT(cur);
1478 if (*cur == ';') {
1479 cur++;
1480 while (IS_PCHAR(cur)) NEXT(cur);
1481 }
1482 if (*cur != '/') break;
1483 cur++;
1484 } while (1);
1485 if (uri != NULL) {
1486 int len, len2 = 0;
1487 char *path;
1488
1489 /*
1490 * Concat the set of path segments to the current path
1491 */
1492 len = cur - *str;
1493 if (slash)
1494 len++;
1495
1496 if (uri->path != NULL) {
1497 len2 = strlen(uri->path);
1498 len += len2;
1499 }
1500 path = (char *) xmlMalloc(len + 1);
1501 if (path == NULL) {
1502 xmlGenericError(xmlGenericErrorContext,
1503 "xmlParseURIPathSegments: out of memory\n");
1504 *str = cur;
1505 return(-1);
1506 }
1507 if (uri->path != NULL)
1508 memcpy(path, uri->path, len2);
1509 if (slash) {
1510 path[len2] = '/';
1511 len2++;
1512 }
1513 path[len2] = 0;
1514 if (cur - *str > 0)
1515 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1516 if (uri->path != NULL)
1517 xmlFree(uri->path);
1518 uri->path = path;
1519 }
1520 *str = cur;
1521 return(0);
1522}
1523
1524/**
1525 * xmlParseURIAuthority:
1526 * @uri: pointer to an URI structure
1527 * @str: pointer to the string to analyze
1528 *
1529 * Parse the authority part of an URI.
1530 *
1531 * authority = server | reg_name
1532 * server = [ [ userinfo "@" ] hostport ]
1533 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1534 * "@" | "&" | "=" | "+" )
1535 *
1536 * Note : this is completely ambiguous since reg_name is allowed to
1537 * use the full set of chars in use by server:
1538 *
1539 * 3.2.1. Registry-based Naming Authority
1540 *
1541 * The structure of a registry-based naming authority is specific
1542 * to the URI scheme, but constrained to the allowed characters
1543 * for an authority component.
1544 *
1545 * Returns 0 or the error code
1546 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001547static int
Owen Taylor3473f882001-02-23 17:55:21 +00001548xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1549 const char *cur;
1550 int ret;
1551
1552 if (str == NULL)
1553 return(-1);
1554
1555 cur = *str;
1556
1557 /*
1558 * try first to parse it as a server string.
1559 */
1560 ret = xmlParseURIServer(uri, str);
1561 if (ret == 0)
1562 return(0);
1563
1564 /*
1565 * failed, fallback to reg_name
1566 */
1567 if (!IS_REG_NAME(cur)) {
1568 return(5);
1569 }
1570 NEXT(cur);
1571 while (IS_REG_NAME(cur)) NEXT(cur);
1572 if (uri != NULL) {
1573 if (uri->server != NULL) xmlFree(uri->server);
1574 uri->server = NULL;
1575 if (uri->user != NULL) xmlFree(uri->user);
1576 uri->user = NULL;
1577 if (uri->authority != NULL) xmlFree(uri->authority);
1578 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1579 }
1580 *str = cur;
1581 return(0);
1582}
1583
1584/**
1585 * xmlParseURIHierPart:
1586 * @uri: pointer to an URI structure
1587 * @str: pointer to the string to analyze
1588 *
1589 * Parse an URI hirarchical part
1590 *
1591 * hier_part = ( net_path | abs_path ) [ "?" query ]
1592 * abs_path = "/" path_segments
1593 * net_path = "//" authority [ abs_path ]
1594 *
1595 * Returns 0 or the error code
1596 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001597static int
Owen Taylor3473f882001-02-23 17:55:21 +00001598xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1599 int ret;
1600 const char *cur;
1601
1602 if (str == NULL)
1603 return(-1);
1604
1605 cur = *str;
1606
1607 if ((cur[0] == '/') && (cur[1] == '/')) {
1608 cur += 2;
1609 ret = xmlParseURIAuthority(uri, &cur);
1610 if (ret != 0)
1611 return(ret);
1612 if (cur[0] == '/') {
1613 cur++;
1614 ret = xmlParseURIPathSegments(uri, &cur, 1);
1615 }
1616 } else if (cur[0] == '/') {
1617 cur++;
1618 ret = xmlParseURIPathSegments(uri, &cur, 1);
1619 } else {
1620 return(4);
1621 }
1622 if (ret != 0)
1623 return(ret);
1624 if (*cur == '?') {
1625 cur++;
1626 ret = xmlParseURIQuery(uri, &cur);
1627 if (ret != 0)
1628 return(ret);
1629 }
1630 *str = cur;
1631 return(0);
1632}
1633
1634/**
1635 * xmlParseAbsoluteURI:
1636 * @uri: pointer to an URI structure
1637 * @str: pointer to the string to analyze
1638 *
1639 * Parse an URI reference string and fills in the appropriate fields
1640 * of the @uri structure
1641 *
1642 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1643 *
1644 * Returns 0 or the error code
1645 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001646static int
Owen Taylor3473f882001-02-23 17:55:21 +00001647xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1648 int ret;
1649
1650 if (str == NULL)
1651 return(-1);
1652
1653 ret = xmlParseURIScheme(uri, str);
1654 if (ret != 0) return(ret);
1655 if (**str != ':')
1656 return(1);
1657 (*str)++;
1658 if (**str == '/')
1659 return(xmlParseURIHierPart(uri, str));
1660 return(xmlParseURIOpaquePart(uri, str));
1661}
1662
1663/**
1664 * xmlParseRelativeURI:
1665 * @uri: pointer to an URI structure
1666 * @str: pointer to the string to analyze
1667 *
1668 * Parse an relative URI string and fills in the appropriate fields
1669 * of the @uri structure
1670 *
1671 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1672 * abs_path = "/" path_segments
1673 * net_path = "//" authority [ abs_path ]
1674 * rel_path = rel_segment [ abs_path ]
1675 *
1676 * Returns 0 or the error code
1677 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001678static int
Owen Taylor3473f882001-02-23 17:55:21 +00001679xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1680 int ret = 0;
1681 const char *cur;
1682
1683 if (str == NULL)
1684 return(-1);
1685
1686 cur = *str;
1687 if ((cur[0] == '/') && (cur[1] == '/')) {
1688 cur += 2;
1689 ret = xmlParseURIAuthority(uri, &cur);
1690 if (ret != 0)
1691 return(ret);
1692 if (cur[0] == '/') {
1693 cur++;
1694 ret = xmlParseURIPathSegments(uri, &cur, 1);
1695 }
1696 } else if (cur[0] == '/') {
1697 cur++;
1698 ret = xmlParseURIPathSegments(uri, &cur, 1);
1699 } else if (cur[0] != '#' && cur[0] != '?') {
1700 ret = xmlParseURIRelSegment(uri, &cur);
1701 if (ret != 0)
1702 return(ret);
1703 if (cur[0] == '/') {
1704 cur++;
1705 ret = xmlParseURIPathSegments(uri, &cur, 1);
1706 }
1707 }
1708 if (ret != 0)
1709 return(ret);
1710 if (*cur == '?') {
1711 cur++;
1712 ret = xmlParseURIQuery(uri, &cur);
1713 if (ret != 0)
1714 return(ret);
1715 }
1716 *str = cur;
1717 return(ret);
1718}
1719
1720/**
1721 * xmlParseURIReference:
1722 * @uri: pointer to an URI structure
1723 * @str: the string to analyze
1724 *
1725 * Parse an URI reference string and fills in the appropriate fields
1726 * of the @uri structure
1727 *
1728 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1729 *
1730 * Returns 0 or the error code
1731 */
1732int
1733xmlParseURIReference(xmlURIPtr uri, const char *str) {
1734 int ret;
1735 const char *tmp = str;
1736
1737 if (str == NULL)
1738 return(-1);
1739 xmlCleanURI(uri);
1740
1741 /*
1742 * Try first to parse aboslute refs, then fallback to relative if
1743 * it fails.
1744 */
1745 ret = xmlParseAbsoluteURI(uri, &str);
1746 if (ret != 0) {
1747 xmlCleanURI(uri);
1748 str = tmp;
1749 ret = xmlParseRelativeURI(uri, &str);
1750 }
1751 if (ret != 0) {
1752 xmlCleanURI(uri);
1753 return(ret);
1754 }
1755
1756 if (*str == '#') {
1757 str++;
1758 ret = xmlParseURIFragment(uri, &str);
1759 if (ret != 0) return(ret);
1760 }
1761 if (*str != 0) {
1762 xmlCleanURI(uri);
1763 return(1);
1764 }
1765 return(0);
1766}
1767
1768/**
1769 * xmlParseURI:
1770 * @str: the URI string to analyze
1771 *
1772 * Parse an URI
1773 *
1774 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1775 *
1776 * Returns a newly build xmlURIPtr or NULL in case of error
1777 */
1778xmlURIPtr
1779xmlParseURI(const char *str) {
1780 xmlURIPtr uri;
1781 int ret;
1782
1783 if (str == NULL)
1784 return(NULL);
1785 uri = xmlCreateURI();
1786 if (uri != NULL) {
1787 ret = xmlParseURIReference(uri, str);
1788 if (ret) {
1789 xmlFreeURI(uri);
1790 return(NULL);
1791 }
1792 }
1793 return(uri);
1794}
1795
1796/************************************************************************
1797 * *
1798 * Public functions *
1799 * *
1800 ************************************************************************/
1801
1802/**
1803 * xmlBuildURI:
1804 * @URI: the URI instance found in the document
1805 * @base: the base value
1806 *
1807 * Computes he final URI of the reference done by checking that
1808 * the given URI is valid, and building the final URI using the
1809 * base URI. This is processed according to section 5.2 of the
1810 * RFC 2396
1811 *
1812 * 5.2. Resolving Relative References to Absolute Form
1813 *
1814 * Returns a new URI string (to be freed by the caller) or NULL in case
1815 * of error.
1816 */
1817xmlChar *
1818xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1819 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001821 xmlURIPtr ref = NULL;
1822 xmlURIPtr bas = NULL;
1823 xmlURIPtr res = NULL;
1824
1825 /*
1826 * 1) The URI reference is parsed into the potential four components and
1827 * fragment identifier, as described in Section 4.3.
1828 *
1829 * NOTE that a completely empty URI is treated by modern browsers
1830 * as a reference to "." rather than as a synonym for the current
1831 * URI. Should we do that here?
1832 */
1833 if (URI == NULL)
1834 ret = -1;
1835 else {
1836 if (*URI) {
1837 ref = xmlCreateURI();
1838 if (ref == NULL)
1839 goto done;
1840 ret = xmlParseURIReference(ref, (const char *) URI);
1841 }
1842 else
1843 ret = 0;
1844 }
1845 if (ret != 0)
1846 goto done;
1847 if (base == NULL)
1848 ret = -1;
1849 else {
1850 bas = xmlCreateURI();
1851 if (bas == NULL)
1852 goto done;
1853 ret = xmlParseURIReference(bas, (const char *) base);
1854 }
1855 if (ret != 0) {
1856 if (ref)
1857 val = xmlSaveUri(ref);
1858 goto done;
1859 }
1860 if (ref == NULL) {
1861 /*
1862 * the base fragment must be ignored
1863 */
1864 if (bas->fragment != NULL) {
1865 xmlFree(bas->fragment);
1866 bas->fragment = NULL;
1867 }
1868 val = xmlSaveUri(bas);
1869 goto done;
1870 }
1871
1872 /*
1873 * 2) If the path component is empty and the scheme, authority, and
1874 * query components are undefined, then it is a reference to the
1875 * current document and we are done. Otherwise, the reference URI's
1876 * query and fragment components are defined as found (or not found)
1877 * within the URI reference and not inherited from the base URI.
1878 *
1879 * NOTE that in modern browsers, the parsing differs from the above
1880 * in the following aspect: the query component is allowed to be
1881 * defined while still treating this as a reference to the current
1882 * document.
1883 */
1884 res = xmlCreateURI();
1885 if (res == NULL)
1886 goto done;
1887 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1888 ((ref->authority == NULL) && (ref->server == NULL))) {
1889 if (bas->scheme != NULL)
1890 res->scheme = xmlMemStrdup(bas->scheme);
1891 if (bas->authority != NULL)
1892 res->authority = xmlMemStrdup(bas->authority);
1893 else if (bas->server != NULL) {
1894 res->server = xmlMemStrdup(bas->server);
1895 if (bas->user != NULL)
1896 res->user = xmlMemStrdup(bas->user);
1897 res->port = bas->port;
1898 }
1899 if (bas->path != NULL)
1900 res->path = xmlMemStrdup(bas->path);
1901 if (ref->query != NULL)
1902 res->query = xmlMemStrdup(ref->query);
1903 else if (bas->query != NULL)
1904 res->query = xmlMemStrdup(bas->query);
1905 if (ref->fragment != NULL)
1906 res->fragment = xmlMemStrdup(ref->fragment);
1907 goto step_7;
1908 }
1909
1910 if (ref->query != NULL)
1911 res->query = xmlMemStrdup(ref->query);
1912 if (ref->fragment != NULL)
1913 res->fragment = xmlMemStrdup(ref->fragment);
1914
1915 /*
1916 * 3) If the scheme component is defined, indicating that the reference
1917 * starts with a scheme name, then the reference is interpreted as an
1918 * absolute URI and we are done. Otherwise, the reference URI's
1919 * scheme is inherited from the base URI's scheme component.
1920 */
1921 if (ref->scheme != NULL) {
1922 val = xmlSaveUri(ref);
1923 goto done;
1924 }
1925 if (bas->scheme != NULL)
1926 res->scheme = xmlMemStrdup(bas->scheme);
1927
1928 /*
1929 * 4) If the authority component is defined, then the reference is a
1930 * network-path and we skip to step 7. Otherwise, the reference
1931 * URI's authority is inherited from the base URI's authority
1932 * component, which will also be undefined if the URI scheme does not
1933 * use an authority component.
1934 */
1935 if ((ref->authority != NULL) || (ref->server != NULL)) {
1936 if (ref->authority != NULL)
1937 res->authority = xmlMemStrdup(ref->authority);
1938 else {
1939 res->server = xmlMemStrdup(ref->server);
1940 if (ref->user != NULL)
1941 res->user = xmlMemStrdup(ref->user);
1942 res->port = ref->port;
1943 }
1944 if (ref->path != NULL)
1945 res->path = xmlMemStrdup(ref->path);
1946 goto step_7;
1947 }
1948 if (bas->authority != NULL)
1949 res->authority = xmlMemStrdup(bas->authority);
1950 else if (bas->server != NULL) {
1951 res->server = xmlMemStrdup(bas->server);
1952 if (bas->user != NULL)
1953 res->user = xmlMemStrdup(bas->user);
1954 res->port = bas->port;
1955 }
1956
1957 /*
1958 * 5) If the path component begins with a slash character ("/"), then
1959 * the reference is an absolute-path and we skip to step 7.
1960 */
1961 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1962 res->path = xmlMemStrdup(ref->path);
1963 goto step_7;
1964 }
1965
1966
1967 /*
1968 * 6) If this step is reached, then we are resolving a relative-path
1969 * reference. The relative path needs to be merged with the base
1970 * URI's path. Although there are many ways to do this, we will
1971 * describe a simple method using a separate string buffer.
1972 *
1973 * Allocate a buffer large enough for the result string.
1974 */
1975 len = 2; /* extra / and 0 */
1976 if (ref->path != NULL)
1977 len += strlen(ref->path);
1978 if (bas->path != NULL)
1979 len += strlen(bas->path);
1980 res->path = (char *) xmlMalloc(len);
1981 if (res->path == NULL) {
1982 xmlGenericError(xmlGenericErrorContext,
1983 "xmlBuildURI: out of memory\n");
1984 goto done;
1985 }
1986 res->path[0] = 0;
1987
1988 /*
1989 * a) All but the last segment of the base URI's path component is
1990 * copied to the buffer. In other words, any characters after the
1991 * last (right-most) slash character, if any, are excluded.
1992 */
1993 cur = 0;
1994 out = 0;
1995 if (bas->path != NULL) {
1996 while (bas->path[cur] != 0) {
1997 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1998 cur++;
1999 if (bas->path[cur] == 0)
2000 break;
2001
2002 cur++;
2003 while (out < cur) {
2004 res->path[out] = bas->path[out];
2005 out++;
2006 }
2007 }
2008 }
2009 res->path[out] = 0;
2010
2011 /*
2012 * b) The reference's path component is appended to the buffer
2013 * string.
2014 */
2015 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002016 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017 /*
2018 * Ensure the path includes a '/'
2019 */
2020 if ((out == 0) && (bas->server != NULL))
2021 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002022 while (ref->path[indx] != 0) {
2023 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002024 }
2025 }
2026 res->path[out] = 0;
2027
2028 /*
2029 * Steps c) to h) are really path normalization steps
2030 */
2031 xmlNormalizeURIPath(res->path);
2032
2033step_7:
2034
2035 /*
2036 * 7) The resulting URI components, including any inherited from the
2037 * base URI, are recombined to give the absolute form of the URI
2038 * reference.
2039 */
2040 val = xmlSaveUri(res);
2041
2042done:
2043 if (ref != NULL)
2044 xmlFreeURI(ref);
2045 if (bas != NULL)
2046 xmlFreeURI(bas);
2047 if (res != NULL)
2048 xmlFreeURI(res);
2049 return(val);
2050}
2051
2052