blob: e0c96ddd22ef5aa81966d606de4d44f60fa4571c [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
49/*
50 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
51 */
52
53#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
54
55/*
56 * alphanum = alpha | digit
57 */
58
59#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
60
61/*
62 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
63 * "a" | "b" | "c" | "d" | "e" | "f"
64 */
65
66#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
67 (((x) >= 'A') && ((x) <= 'F')))
68
69/*
70 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
71 */
72
73#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
74 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
75 ((x) == '(') || ((x) == ')'))
76
77
78/*
79 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
80 */
81
82#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84 ((x) == '+') || ((x) == '$') || ((x) == ','))
85
86/*
87 * unreserved = alphanum | mark
88 */
89
90#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
91
92/*
93 * escaped = "%" hex hex
94 */
95
96#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
97 (IS_HEX((p)[2])))
98
99/*
100 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
101 * "&" | "=" | "+" | "$" | ","
102 */
103#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
104 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
105 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
106 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
107
108/*
109 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
110 */
111#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
112 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
113 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
114 ((*(p) == ',')))
115
116/*
117 * rel_segment = 1*( unreserved | escaped |
118 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
119 */
120
121#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
122 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
123 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
124 ((*(p) == ',')))
125
126/*
127 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
128 */
129
130#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
131 ((x) == '+') || ((x) == '-') || ((x) == '.'))
132
133/*
134 * reg_name = 1*( unreserved | escaped | "$" | "," |
135 * ";" | ":" | "@" | "&" | "=" | "+" )
136 */
137
138#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
139 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
140 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
141 ((*(p) == '=')) || ((*(p) == '+')))
142
143/*
144 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
145 * "+" | "$" | "," )
146 */
147#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
148 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
149 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
150 ((*(p) == ',')))
151
152/*
153 * uric = reserved | unreserved | escaped
154 */
155
156#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
157 (IS_RESERVED(*(p))))
158
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000159/*
160* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
161*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163#define IS_UNWISE(p) \
164 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
165 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
166 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000167
168/*
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * Skip to next pointer char, handle escaped sequences
170 */
171
172#define NEXT(p) ((*p == '%')? p += 3 : p++)
173
174/*
175 * Productions from the spec.
176 *
177 * authority = server | reg_name
178 * reg_name = 1*( unreserved | escaped | "$" | "," |
179 * ";" | ":" | "@" | "&" | "=" | "+" )
180 *
181 * path = [ abs_path | opaque_part ]
182 */
183
184/************************************************************************
185 * *
186 * Generic URI structure functions *
187 * *
188 ************************************************************************/
189
190/**
191 * xmlCreateURI:
192 *
193 * Simply creates an empty xmlURI
194 *
195 * Returns the new structure or NULL in case of error
196 */
197xmlURIPtr
198xmlCreateURI(void) {
199 xmlURIPtr ret;
200
201 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
202 if (ret == NULL) {
203 xmlGenericError(xmlGenericErrorContext,
204 "xmlCreateURI: out of memory\n");
205 return(NULL);
206 }
207 memset(ret, 0, sizeof(xmlURI));
208 return(ret);
209}
210
211/**
212 * xmlSaveUri:
213 * @uri: pointer to an xmlURI
214 *
215 * Save the URI as an escaped string
216 *
217 * Returns a new string (to be deallocated by caller)
218 */
219xmlChar *
220xmlSaveUri(xmlURIPtr uri) {
221 xmlChar *ret = NULL;
222 const char *p;
223 int len;
224 int max;
225
226 if (uri == NULL) return(NULL);
227
228
229 max = 80;
230 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
231 if (ret == NULL) {
232 xmlGenericError(xmlGenericErrorContext,
233 "xmlSaveUri: out of memory\n");
234 return(NULL);
235 }
236 len = 0;
237
238 if (uri->scheme != NULL) {
239 p = uri->scheme;
240 while (*p != 0) {
241 if (len >= max) {
242 max *= 2;
243 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
244 if (ret == NULL) {
245 xmlGenericError(xmlGenericErrorContext,
246 "xmlSaveUri: out of memory\n");
247 return(NULL);
248 }
249 }
250 ret[len++] = *p++;
251 }
252 if (len >= max) {
253 max *= 2;
254 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
255 if (ret == NULL) {
256 xmlGenericError(xmlGenericErrorContext,
257 "xmlSaveUri: out of memory\n");
258 return(NULL);
259 }
260 }
261 ret[len++] = ':';
262 }
263 if (uri->opaque != NULL) {
264 p = uri->opaque;
265 while (*p != 0) {
266 if (len + 3 >= max) {
267 max *= 2;
268 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
269 if (ret == NULL) {
270 xmlGenericError(xmlGenericErrorContext,
271 "xmlSaveUri: out of memory\n");
272 return(NULL);
273 }
274 }
275 if ((IS_UNRESERVED(*(p))) ||
276 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
277 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
278 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
279 ret[len++] = *p++;
280 else {
281 int val = *(unsigned char *)p++;
282 int hi = val / 0x10, lo = val % 0x10;
283 ret[len++] = '%';
284 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
285 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
286 }
287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288 } else {
289 if (uri->server != NULL) {
290 if (len + 3 >= max) {
291 max *= 2;
292 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
293 if (ret == NULL) {
294 xmlGenericError(xmlGenericErrorContext,
295 "xmlSaveUri: out of memory\n");
296 return(NULL);
297 }
298 }
299 ret[len++] = '/';
300 ret[len++] = '/';
301 if (uri->user != NULL) {
302 p = uri->user;
303 while (*p != 0) {
304 if (len + 3 >= max) {
305 max *= 2;
306 ret = (xmlChar *) xmlRealloc(ret,
307 (max + 1) * sizeof(xmlChar));
308 if (ret == NULL) {
309 xmlGenericError(xmlGenericErrorContext,
310 "xmlSaveUri: out of memory\n");
311 return(NULL);
312 }
313 }
314 if ((IS_UNRESERVED(*(p))) ||
315 ((*(p) == ';')) || ((*(p) == ':')) ||
316 ((*(p) == '&')) || ((*(p) == '=')) ||
317 ((*(p) == '+')) || ((*(p) == '$')) ||
318 ((*(p) == ',')))
319 ret[len++] = *p++;
320 else {
321 int val = *(unsigned char *)p++;
322 int hi = val / 0x10, lo = val % 0x10;
323 ret[len++] = '%';
324 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
325 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
326 }
327 }
328 if (len + 3 >= max) {
329 max *= 2;
330 ret = (xmlChar *) xmlRealloc(ret,
331 (max + 1) * sizeof(xmlChar));
332 if (ret == NULL) {
333 xmlGenericError(xmlGenericErrorContext,
334 "xmlSaveUri: out of memory\n");
335 return(NULL);
336 }
337 }
338 ret[len++] = '@';
339 }
340 p = uri->server;
341 while (*p != 0) {
342 if (len >= max) {
343 max *= 2;
344 ret = (xmlChar *) xmlRealloc(ret,
345 (max + 1) * sizeof(xmlChar));
346 if (ret == NULL) {
347 xmlGenericError(xmlGenericErrorContext,
348 "xmlSaveUri: out of memory\n");
349 return(NULL);
350 }
351 }
352 ret[len++] = *p++;
353 }
354 if (uri->port > 0) {
355 if (len + 10 >= max) {
356 max *= 2;
357 ret = (xmlChar *) xmlRealloc(ret,
358 (max + 1) * sizeof(xmlChar));
359 if (ret == NULL) {
360 xmlGenericError(xmlGenericErrorContext,
361 "xmlSaveUri: out of memory\n");
362 return(NULL);
363 }
364 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000365 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000366 }
367 } else if (uri->authority != NULL) {
368 if (len + 3 >= max) {
369 max *= 2;
370 ret = (xmlChar *) xmlRealloc(ret,
371 (max + 1) * sizeof(xmlChar));
372 if (ret == NULL) {
373 xmlGenericError(xmlGenericErrorContext,
374 "xmlSaveUri: out of memory\n");
375 return(NULL);
376 }
377 }
378 ret[len++] = '/';
379 ret[len++] = '/';
380 p = uri->authority;
381 while (*p != 0) {
382 if (len + 3 >= max) {
383 max *= 2;
384 ret = (xmlChar *) xmlRealloc(ret,
385 (max + 1) * sizeof(xmlChar));
386 if (ret == NULL) {
387 xmlGenericError(xmlGenericErrorContext,
388 "xmlSaveUri: out of memory\n");
389 return(NULL);
390 }
391 }
392 if ((IS_UNRESERVED(*(p))) ||
393 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
394 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
395 ((*(p) == '=')) || ((*(p) == '+')))
396 ret[len++] = *p++;
397 else {
398 int val = *(unsigned char *)p++;
399 int hi = val / 0x10, lo = val % 0x10;
400 ret[len++] = '%';
401 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
402 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
403 }
404 }
405 } else if (uri->scheme != NULL) {
406 if (len + 3 >= max) {
407 max *= 2;
408 ret = (xmlChar *) xmlRealloc(ret,
409 (max + 1) * sizeof(xmlChar));
410 if (ret == NULL) {
411 xmlGenericError(xmlGenericErrorContext,
412 "xmlSaveUri: out of memory\n");
413 return(NULL);
414 }
415 }
416 ret[len++] = '/';
417 ret[len++] = '/';
418 }
419 if (uri->path != NULL) {
420 p = uri->path;
421 while (*p != 0) {
422 if (len + 3 >= max) {
423 max *= 2;
424 ret = (xmlChar *) xmlRealloc(ret,
425 (max + 1) * sizeof(xmlChar));
426 if (ret == NULL) {
427 xmlGenericError(xmlGenericErrorContext,
428 "xmlSaveUri: out of memory\n");
429 return(NULL);
430 }
431 }
432 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
433 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
434 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
435 ((*(p) == ',')))
436 ret[len++] = *p++;
437 else {
438 int val = *(unsigned char *)p++;
439 int hi = val / 0x10, lo = val % 0x10;
440 ret[len++] = '%';
441 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
442 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
443 }
444 }
445 }
446 if (uri->query != NULL) {
447 if (len + 3 >= max) {
448 max *= 2;
449 ret = (xmlChar *) xmlRealloc(ret,
450 (max + 1) * sizeof(xmlChar));
451 if (ret == NULL) {
452 xmlGenericError(xmlGenericErrorContext,
453 "xmlSaveUri: out of memory\n");
454 return(NULL);
455 }
456 }
457 ret[len++] = '?';
458 p = uri->query;
459 while (*p != 0) {
460 if (len + 3 >= max) {
461 max *= 2;
462 ret = (xmlChar *) xmlRealloc(ret,
463 (max + 1) * sizeof(xmlChar));
464 if (ret == NULL) {
465 xmlGenericError(xmlGenericErrorContext,
466 "xmlSaveUri: out of memory\n");
467 return(NULL);
468 }
469 }
470 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
471 ret[len++] = *p++;
472 else {
473 int val = *(unsigned char *)p++;
474 int hi = val / 0x10, lo = val % 0x10;
475 ret[len++] = '%';
476 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
477 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
478 }
479 }
480 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000481 }
482 if (uri->fragment != NULL) {
483 if (len + 3 >= max) {
484 max *= 2;
485 ret = (xmlChar *) xmlRealloc(ret,
486 (max + 1) * sizeof(xmlChar));
487 if (ret == NULL) {
488 xmlGenericError(xmlGenericErrorContext,
489 "xmlSaveUri: out of memory\n");
490 return(NULL);
491 }
492 }
493 ret[len++] = '#';
494 p = uri->fragment;
495 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000496 if (len + 3 >= max) {
497 max *= 2;
498 ret = (xmlChar *) xmlRealloc(ret,
499 (max + 1) * sizeof(xmlChar));
500 if (ret == NULL) {
501 xmlGenericError(xmlGenericErrorContext,
502 "xmlSaveUri: out of memory\n");
503 return(NULL);
504 }
505 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000506 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
507 ret[len++] = *p++;
508 else {
509 int val = *(unsigned char *)p++;
510 int hi = val / 0x10, lo = val % 0x10;
511 ret[len++] = '%';
512 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
513 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000514 }
515 }
Owen Taylor3473f882001-02-23 17:55:21 +0000516 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000517 if (len >= max) {
518 max *= 2;
519 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
520 if (ret == NULL) {
521 xmlGenericError(xmlGenericErrorContext,
522 "xmlSaveUri: out of memory\n");
523 return(NULL);
524 }
525 }
526 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000527 return(ret);
528}
529
530/**
531 * xmlPrintURI:
532 * @stream: a FILE* for the output
533 * @uri: pointer to an xmlURI
534 *
535 * Prints the URI in the stream @steam.
536 */
537void
538xmlPrintURI(FILE *stream, xmlURIPtr uri) {
539 xmlChar *out;
540
541 out = xmlSaveUri(uri);
542 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000543 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000544 xmlFree(out);
545 }
546}
547
548/**
549 * xmlCleanURI:
550 * @uri: pointer to an xmlURI
551 *
552 * Make sure the xmlURI struct is free of content
553 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000554static void
Owen Taylor3473f882001-02-23 17:55:21 +0000555xmlCleanURI(xmlURIPtr uri) {
556 if (uri == NULL) return;
557
558 if (uri->scheme != NULL) xmlFree(uri->scheme);
559 uri->scheme = NULL;
560 if (uri->server != NULL) xmlFree(uri->server);
561 uri->server = NULL;
562 if (uri->user != NULL) xmlFree(uri->user);
563 uri->user = NULL;
564 if (uri->path != NULL) xmlFree(uri->path);
565 uri->path = NULL;
566 if (uri->fragment != NULL) xmlFree(uri->fragment);
567 uri->fragment = NULL;
568 if (uri->opaque != NULL) xmlFree(uri->opaque);
569 uri->opaque = NULL;
570 if (uri->authority != NULL) xmlFree(uri->authority);
571 uri->authority = NULL;
572 if (uri->query != NULL) xmlFree(uri->query);
573 uri->query = NULL;
574}
575
576/**
577 * xmlFreeURI:
578 * @uri: pointer to an xmlURI
579 *
580 * Free up the xmlURI struct
581 */
582void
583xmlFreeURI(xmlURIPtr uri) {
584 if (uri == NULL) return;
585
586 if (uri->scheme != NULL) xmlFree(uri->scheme);
587 if (uri->server != NULL) xmlFree(uri->server);
588 if (uri->user != NULL) xmlFree(uri->user);
589 if (uri->path != NULL) xmlFree(uri->path);
590 if (uri->fragment != NULL) xmlFree(uri->fragment);
591 if (uri->opaque != NULL) xmlFree(uri->opaque);
592 if (uri->authority != NULL) xmlFree(uri->authority);
593 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000594 xmlFree(uri);
595}
596
597/************************************************************************
598 * *
599 * Helper functions *
600 * *
601 ************************************************************************/
602
Owen Taylor3473f882001-02-23 17:55:21 +0000603/**
604 * xmlNormalizeURIPath:
605 * @path: pointer to the path string
606 *
607 * Applies the 5 normalization steps to a path string--that is, RFC 2396
608 * Section 5.2, steps 6.c through 6.g.
609 *
610 * Normalization occurs directly on the string, no new allocation is done
611 *
612 * Returns 0 or an error code
613 */
614int
615xmlNormalizeURIPath(char *path) {
616 char *cur, *out;
617
618 if (path == NULL)
619 return(-1);
620
621 /* Skip all initial "/" chars. We want to get to the beginning of the
622 * first non-empty segment.
623 */
624 cur = path;
625 while (cur[0] == '/')
626 ++cur;
627 if (cur[0] == '\0')
628 return(0);
629
630 /* Keep everything we've seen so far. */
631 out = cur;
632
633 /*
634 * Analyze each segment in sequence for cases (c) and (d).
635 */
636 while (cur[0] != '\0') {
637 /*
638 * c) All occurrences of "./", where "." is a complete path segment,
639 * are removed from the buffer string.
640 */
641 if ((cur[0] == '.') && (cur[1] == '/')) {
642 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000643 /* '//' normalization should be done at this point too */
644 while (cur[0] == '/')
645 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000646 continue;
647 }
648
649 /*
650 * d) If the buffer string ends with "." as a complete path segment,
651 * that "." is removed.
652 */
653 if ((cur[0] == '.') && (cur[1] == '\0'))
654 break;
655
656 /* Otherwise keep the segment. */
657 while (cur[0] != '/') {
658 if (cur[0] == '\0')
659 goto done_cd;
660 (out++)[0] = (cur++)[0];
661 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000662 /* nomalize // */
663 while ((cur[0] == '/') && (cur[1] == '/'))
664 cur++;
665
Owen Taylor3473f882001-02-23 17:55:21 +0000666 (out++)[0] = (cur++)[0];
667 }
668 done_cd:
669 out[0] = '\0';
670
671 /* Reset to the beginning of the first segment for the next sequence. */
672 cur = path;
673 while (cur[0] == '/')
674 ++cur;
675 if (cur[0] == '\0')
676 return(0);
677
678 /*
679 * Analyze each segment in sequence for cases (e) and (f).
680 *
681 * e) All occurrences of "<segment>/../", where <segment> is a
682 * complete path segment not equal to "..", are removed from the
683 * buffer string. Removal of these path segments is performed
684 * iteratively, removing the leftmost matching pattern on each
685 * iteration, until no matching pattern remains.
686 *
687 * f) If the buffer string ends with "<segment>/..", where <segment>
688 * is a complete path segment not equal to "..", that
689 * "<segment>/.." is removed.
690 *
691 * To satisfy the "iterative" clause in (e), we need to collapse the
692 * string every time we find something that needs to be removed. Thus,
693 * we don't need to keep two pointers into the string: we only need a
694 * "current position" pointer.
695 */
696 while (1) {
697 char *segp;
698
699 /* At the beginning of each iteration of this loop, "cur" points to
700 * the first character of the segment we want to examine.
701 */
702
703 /* Find the end of the current segment. */
704 segp = cur;
705 while ((segp[0] != '/') && (segp[0] != '\0'))
706 ++segp;
707
708 /* If this is the last segment, we're done (we need at least two
709 * segments to meet the criteria for the (e) and (f) cases).
710 */
711 if (segp[0] == '\0')
712 break;
713
714 /* If the first segment is "..", or if the next segment _isn't_ "..",
715 * keep this segment and try the next one.
716 */
717 ++segp;
718 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
719 || ((segp[0] != '.') || (segp[1] != '.')
720 || ((segp[2] != '/') && (segp[2] != '\0')))) {
721 cur = segp;
722 continue;
723 }
724
725 /* If we get here, remove this segment and the next one and back up
726 * to the previous segment (if there is one), to implement the
727 * "iteratively" clause. It's pretty much impossible to back up
728 * while maintaining two pointers into the buffer, so just compact
729 * the whole buffer now.
730 */
731
732 /* If this is the end of the buffer, we're done. */
733 if (segp[2] == '\0') {
734 cur[0] = '\0';
735 break;
736 }
737 strcpy(cur, segp + 3);
738
739 /* If there are no previous segments, then keep going from here. */
740 segp = cur;
741 while ((segp > path) && ((--segp)[0] == '/'))
742 ;
743 if (segp == path)
744 continue;
745
746 /* "segp" is pointing to the end of a previous segment; find it's
747 * start. We need to back up to the previous segment and start
748 * over with that to handle things like "foo/bar/../..". If we
749 * don't do this, then on the first pass we'll remove the "bar/..",
750 * but be pointing at the second ".." so we won't realize we can also
751 * remove the "foo/..".
752 */
753 cur = segp;
754 while ((cur > path) && (cur[-1] != '/'))
755 --cur;
756 }
757 out[0] = '\0';
758
759 /*
760 * g) If the resulting buffer string still begins with one or more
761 * complete path segments of "..", then the reference is
762 * considered to be in error. Implementations may handle this
763 * error by retaining these components in the resolved path (i.e.,
764 * treating them as part of the final URI), by removing them from
765 * the resolved path (i.e., discarding relative levels above the
766 * root), or by avoiding traversal of the reference.
767 *
768 * We discard them from the final path.
769 */
770 if (path[0] == '/') {
771 cur = path;
772 while ((cur[1] == '.') && (cur[2] == '.')
773 && ((cur[3] == '/') || (cur[3] == '\0')))
774 cur += 3;
775
776 if (cur != path) {
777 out = path;
778 while (cur[0] != '\0')
779 (out++)[0] = (cur++)[0];
780 out[0] = 0;
781 }
782 }
783
784 return(0);
785}
Owen Taylor3473f882001-02-23 17:55:21 +0000786
787/**
788 * xmlURIUnescapeString:
789 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000790 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000791 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000792 *
793 * Unescaping routine, does not do validity checks !
794 * Output is direct unsigned char translation of %XX values (no encoding)
795 *
796 * Returns an copy of the string, but unescaped
797 */
798char *
799xmlURIUnescapeString(const char *str, int len, char *target) {
800 char *ret, *out;
801 const char *in;
802
803 if (str == NULL)
804 return(NULL);
805 if (len <= 0) len = strlen(str);
806 if (len <= 0) return(NULL);
807
808 if (target == NULL) {
809 ret = (char *) xmlMalloc(len + 1);
810 if (ret == NULL) {
811 xmlGenericError(xmlGenericErrorContext,
812 "xmlURIUnescapeString: out of memory\n");
813 return(NULL);
814 }
815 } else
816 ret = target;
817 in = str;
818 out = ret;
819 while(len > 0) {
820 if (*in == '%') {
821 in++;
822 if ((*in >= '0') && (*in <= '9'))
823 *out = (*in - '0');
824 else if ((*in >= 'a') && (*in <= 'f'))
825 *out = (*in - 'a') + 10;
826 else if ((*in >= 'A') && (*in <= 'F'))
827 *out = (*in - 'A') + 10;
828 in++;
829 if ((*in >= '0') && (*in <= '9'))
830 *out = *out * 16 + (*in - '0');
831 else if ((*in >= 'a') && (*in <= 'f'))
832 *out = *out * 16 + (*in - 'a') + 10;
833 else if ((*in >= 'A') && (*in <= 'F'))
834 *out = *out * 16 + (*in - 'A') + 10;
835 in++;
836 len -= 3;
837 out++;
838 } else {
839 *out++ = *in++;
840 len--;
841 }
842 }
843 *out = 0;
844 return(ret);
845}
846
847/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000848 * xmlURIEscapeStr:
849 * @str: string to escape
850 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000851 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000852 * This routine escapes a string to hex, ignoring reserved characters (a-z)
853 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000854 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000855 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000856 */
857xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000858xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
859 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000860 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000861
Owen Taylor3473f882001-02-23 17:55:21 +0000862 unsigned int len, out;
863
864 if (str == NULL)
865 return(NULL);
866 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000867 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000868
869 len += 20;
870 ret = (xmlChar *) xmlMalloc(len);
871 if (ret == NULL) {
872 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000873 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000874 return(NULL);
875 }
876 in = (const xmlChar *) str;
877 out = 0;
878 while(*in != 0) {
879 if (len - out <= 3) {
880 len += 20;
881 ret = (xmlChar *) xmlRealloc(ret, len);
882 if (ret == NULL) {
883 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000884 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000885 return(NULL);
886 }
887 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000888
889 ch = *in;
890
Daniel Veillardeb475a32002-04-14 22:00:22 +0000891 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000892 unsigned char val;
893 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000894 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000895 if (val <= 9)
896 ret[out++] = '0' + val;
897 else
898 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000899 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000900 if (val <= 9)
901 ret[out++] = '0' + val;
902 else
903 ret[out++] = 'A' + val - 0xA;
904 in++;
905 } else {
906 ret[out++] = *in++;
907 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000908
Owen Taylor3473f882001-02-23 17:55:21 +0000909 }
910 ret[out] = 0;
911 return(ret);
912}
913
Daniel Veillard8514c672001-05-23 10:29:12 +0000914/**
915 * xmlURIEscape:
916 * @str: the string of the URI to escape
917 *
918 * Escaping routine, does not do validity checks !
919 * It will try to escape the chars needing this, but this is heuristic
920 * based it's impossible to be sure.
921 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000922 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000923 *
924 * 25 May 2001
925 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
926 * according to RFC2396.
927 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000928 */
929xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000930xmlURIEscape(const xmlChar * str)
931{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000932 xmlChar *ret, *segment = NULL;
933 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000934 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000935
Daniel Veillard6278fb52001-05-25 07:38:41 +0000936#define NULLCHK(p) if(!p) { \
937 xmlGenericError(xmlGenericErrorContext, \
938 "xmlURIEscape: out of memory\n"); \
939 return NULL; }
940
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000941 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000942 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000943
944 uri = xmlCreateURI();
945 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000946 /*
947 * Allow escaping errors in the unescaped form
948 */
949 uri->cleanup = 1;
950 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000951 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000952 xmlFreeURI(uri);
953 return (NULL);
954 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000955 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000956
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000957 if (!uri)
958 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000959
960 ret = NULL;
961
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000962 if (uri->scheme) {
963 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
964 NULLCHK(segment)
965 ret = xmlStrcat(ret, segment);
966 ret = xmlStrcat(ret, BAD_CAST ":");
967 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000968 }
969
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000970 if (uri->authority) {
971 segment =
972 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
973 NULLCHK(segment)
974 ret = xmlStrcat(ret, BAD_CAST "//");
975 ret = xmlStrcat(ret, segment);
976 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000977 }
978
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000979 if (uri->user) {
980 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
981 NULLCHK(segment)
982 ret = xmlStrcat(ret, segment);
983 ret = xmlStrcat(ret, BAD_CAST "@");
984 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000985 }
986
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000987 if (uri->server) {
988 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
989 NULLCHK(segment)
990 ret = xmlStrcat(ret, BAD_CAST "//");
991 ret = xmlStrcat(ret, segment);
992 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000993 }
994
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000995 if (uri->port) {
996 xmlChar port[10];
997
Daniel Veillard43d3f612001-11-10 11:57:23 +0000998 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000999 ret = xmlStrcat(ret, BAD_CAST ":");
1000 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001001 }
1002
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001003 if (uri->path) {
1004 segment =
1005 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1006 NULLCHK(segment)
1007 ret = xmlStrcat(ret, segment);
1008 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001009 }
1010
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001011 if (uri->query) {
1012 segment =
1013 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1014 NULLCHK(segment)
1015 ret = xmlStrcat(ret, BAD_CAST "?");
1016 ret = xmlStrcat(ret, segment);
1017 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001018 }
1019
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001020 if (uri->opaque) {
1021 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1022 NULLCHK(segment)
1023 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001024 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001025 }
1026
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001027 if (uri->fragment) {
1028 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1029 NULLCHK(segment)
1030 ret = xmlStrcat(ret, BAD_CAST "#");
1031 ret = xmlStrcat(ret, segment);
1032 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001033 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001034
1035 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001036#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001037
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001038 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001039}
1040
Owen Taylor3473f882001-02-23 17:55:21 +00001041/************************************************************************
1042 * *
1043 * Escaped URI parsing *
1044 * *
1045 ************************************************************************/
1046
1047/**
1048 * xmlParseURIFragment:
1049 * @uri: pointer to an URI structure
1050 * @str: pointer to the string to analyze
1051 *
1052 * Parse an URI fragment string and fills in the appropriate fields
1053 * of the @uri structure.
1054 *
1055 * fragment = *uric
1056 *
1057 * Returns 0 or the error code
1058 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001059static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001060xmlParseURIFragment(xmlURIPtr uri, const char **str)
1061{
Owen Taylor3473f882001-02-23 17:55:21 +00001062 const char *cur = *str;
1063
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001064 if (str == NULL)
1065 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001066
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001067 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001068 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001069 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001070 if (uri->fragment != NULL)
1071 xmlFree(uri->fragment);
1072 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001073 }
1074 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001075 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001076}
1077
1078/**
1079 * xmlParseURIQuery:
1080 * @uri: pointer to an URI structure
1081 * @str: pointer to the string to analyze
1082 *
1083 * Parse the query part of an URI
1084 *
1085 * query = *uric
1086 *
1087 * Returns 0 or the error code
1088 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001089static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001090xmlParseURIQuery(xmlURIPtr uri, const char **str)
1091{
Owen Taylor3473f882001-02-23 17:55:21 +00001092 const char *cur = *str;
1093
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001094 if (str == NULL)
1095 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001096
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001097 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1098 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001099 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001100 if (uri->query != NULL)
1101 xmlFree(uri->query);
1102 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001103 }
1104 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001105 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001106}
1107
1108/**
1109 * xmlParseURIScheme:
1110 * @uri: pointer to an URI structure
1111 * @str: pointer to the string to analyze
1112 *
1113 * Parse an URI scheme
1114 *
1115 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1116 *
1117 * Returns 0 or the error code
1118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001119static int
Owen Taylor3473f882001-02-23 17:55:21 +00001120xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1121 const char *cur;
1122
1123 if (str == NULL)
1124 return(-1);
1125
1126 cur = *str;
1127 if (!IS_ALPHA(*cur))
1128 return(2);
1129 cur++;
1130 while (IS_SCHEME(*cur)) cur++;
1131 if (uri != NULL) {
1132 if (uri->scheme != NULL) xmlFree(uri->scheme);
1133 /* !!! strndup */
1134 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1135 }
1136 *str = cur;
1137 return(0);
1138}
1139
1140/**
1141 * xmlParseURIOpaquePart:
1142 * @uri: pointer to an URI structure
1143 * @str: pointer to the string to analyze
1144 *
1145 * Parse an URI opaque part
1146 *
1147 * opaque_part = uric_no_slash *uric
1148 *
1149 * Returns 0 or the error code
1150 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001151static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001152xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1153{
Owen Taylor3473f882001-02-23 17:55:21 +00001154 const char *cur;
1155
1156 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001157 return (-1);
1158
Owen Taylor3473f882001-02-23 17:55:21 +00001159 cur = *str;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001160 if (!(IS_URIC_NO_SLASH(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001161 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001162 }
1163 NEXT(cur);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001164 while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1165 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001166 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001167 if (uri->opaque != NULL)
1168 xmlFree(uri->opaque);
1169 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001170 }
1171 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001172 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001173}
1174
1175/**
1176 * xmlParseURIServer:
1177 * @uri: pointer to an URI structure
1178 * @str: pointer to the string to analyze
1179 *
1180 * Parse a server subpart of an URI, it's a finer grain analysis
1181 * of the authority part.
1182 *
1183 * server = [ [ userinfo "@" ] hostport ]
1184 * userinfo = *( unreserved | escaped |
1185 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1186 * hostport = host [ ":" port ]
1187 * host = hostname | IPv4address
1188 * hostname = *( domainlabel "." ) toplabel [ "." ]
1189 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1190 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1191 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1192 * port = *digit
1193 *
1194 * Returns 0 or the error code
1195 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001196static int
Owen Taylor3473f882001-02-23 17:55:21 +00001197xmlParseURIServer(xmlURIPtr uri, const char **str) {
1198 const char *cur;
1199 const char *host, *tmp;
1200
1201 if (str == NULL)
1202 return(-1);
1203
1204 cur = *str;
1205
1206 /*
1207 * is there an userinfo ?
1208 */
1209 while (IS_USERINFO(cur)) NEXT(cur);
1210 if (*cur == '@') {
1211 if (uri != NULL) {
1212 if (uri->user != NULL) xmlFree(uri->user);
1213 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1214 }
1215 cur++;
1216 } else {
1217 if (uri != NULL) {
1218 if (uri->user != NULL) xmlFree(uri->user);
1219 uri->user = NULL;
1220 }
1221 cur = *str;
1222 }
1223 /*
1224 * This can be empty in the case where there is no server
1225 */
1226 host = cur;
1227 if (*cur == '/') {
1228 if (uri != NULL) {
1229 if (uri->authority != NULL) xmlFree(uri->authority);
1230 uri->authority = NULL;
1231 if (uri->server != NULL) xmlFree(uri->server);
1232 uri->server = NULL;
1233 uri->port = 0;
1234 }
1235 return(0);
1236 }
1237 /*
1238 * host part of hostport can derive either an IPV4 address
1239 * or an unresolved name. Check the IP first, it easier to detect
1240 * errors if wrong one
1241 */
1242 if (IS_DIGIT(*cur)) {
1243 while(IS_DIGIT(*cur)) cur++;
1244 if (*cur != '.')
1245 goto host_name;
1246 cur++;
1247 if (!IS_DIGIT(*cur))
1248 goto host_name;
1249 while(IS_DIGIT(*cur)) cur++;
1250 if (*cur != '.')
1251 goto host_name;
1252 cur++;
1253 if (!IS_DIGIT(*cur))
1254 goto host_name;
1255 while(IS_DIGIT(*cur)) cur++;
1256 if (*cur != '.')
1257 goto host_name;
1258 cur++;
1259 if (!IS_DIGIT(*cur))
1260 goto host_name;
1261 while(IS_DIGIT(*cur)) cur++;
1262 if (uri != NULL) {
1263 if (uri->authority != NULL) xmlFree(uri->authority);
1264 uri->authority = NULL;
1265 if (uri->server != NULL) xmlFree(uri->server);
1266 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1267 }
1268 goto host_done;
1269 }
1270host_name:
1271 /*
1272 * the hostname production as-is is a parser nightmare.
1273 * simplify it to
1274 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1275 * and just make sure the last label starts with a non numeric char.
1276 */
1277 if (!IS_ALPHANUM(*cur))
1278 return(6);
1279 while (IS_ALPHANUM(*cur)) {
1280 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1281 if (*cur == '.')
1282 cur++;
1283 }
1284 tmp = cur;
1285 tmp--;
1286 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1287 tmp++;
1288 if (!IS_ALPHA(*tmp))
1289 return(7);
1290 if (uri != NULL) {
1291 if (uri->authority != NULL) xmlFree(uri->authority);
1292 uri->authority = NULL;
1293 if (uri->server != NULL) xmlFree(uri->server);
1294 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1295 }
1296
1297host_done:
1298
1299 /*
1300 * finish by checking for a port presence.
1301 */
1302 if (*cur == ':') {
1303 cur++;
1304 if (IS_DIGIT(*cur)) {
1305 if (uri != NULL)
1306 uri->port = 0;
1307 while (IS_DIGIT(*cur)) {
1308 if (uri != NULL)
1309 uri->port = uri->port * 10 + (*cur - '0');
1310 cur++;
1311 }
1312 }
1313 }
1314 *str = cur;
1315 return(0);
1316}
1317
1318/**
1319 * xmlParseURIRelSegment:
1320 * @uri: pointer to an URI structure
1321 * @str: pointer to the string to analyze
1322 *
1323 * Parse an URI relative segment
1324 *
1325 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1326 * "+" | "$" | "," )
1327 *
1328 * Returns 0 or the error code
1329 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001330static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001331xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1332{
Owen Taylor3473f882001-02-23 17:55:21 +00001333 const char *cur;
1334
1335 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001336 return (-1);
1337
Owen Taylor3473f882001-02-23 17:55:21 +00001338 cur = *str;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001339 if (!(IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) {
1340 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001341 }
1342 NEXT(cur);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001343 while (IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1344 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001345 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001346 if (uri->path != NULL)
1347 xmlFree(uri->path);
1348 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001349 }
1350 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001351 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001352}
1353
1354/**
1355 * xmlParseURIPathSegments:
1356 * @uri: pointer to an URI structure
1357 * @str: pointer to the string to analyze
1358 * @slash: should we add a leading slash
1359 *
1360 * Parse an URI set of path segments
1361 *
1362 * path_segments = segment *( "/" segment )
1363 * segment = *pchar *( ";" param )
1364 * param = *pchar
1365 *
1366 * Returns 0 or the error code
1367 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001368static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001369xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1370{
Owen Taylor3473f882001-02-23 17:55:21 +00001371 const char *cur;
1372
1373 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001374 return (-1);
1375
Owen Taylor3473f882001-02-23 17:55:21 +00001376 cur = *str;
1377
1378 do {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001379 while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1380 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001381 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001382 cur++;
1383 while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))
1384 NEXT(cur);
1385 }
1386 if (*cur != '/')
1387 break;
1388 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001389 } while (1);
1390 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001391 int len, len2 = 0;
1392 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001393
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001394 /*
1395 * Concat the set of path segments to the current path
1396 */
1397 len = cur - *str;
1398 if (slash)
1399 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001400
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001401 if (uri->path != NULL) {
1402 len2 = strlen(uri->path);
1403 len += len2;
1404 }
Owen Taylor3473f882001-02-23 17:55:21 +00001405 path = (char *) xmlMalloc(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001406 if (path == NULL) {
1407 xmlGenericError(xmlGenericErrorContext,
1408 "xmlParseURIPathSegments: out of memory\n");
1409 *str = cur;
1410 return (-1);
1411 }
1412 if (uri->path != NULL)
1413 memcpy(path, uri->path, len2);
1414 if (slash) {
1415 path[len2] = '/';
1416 len2++;
1417 }
1418 path[len2] = 0;
1419 if (cur - *str > 0)
1420 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1421 if (uri->path != NULL)
1422 xmlFree(uri->path);
1423 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001424 }
1425 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001426 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001427}
1428
1429/**
1430 * xmlParseURIAuthority:
1431 * @uri: pointer to an URI structure
1432 * @str: pointer to the string to analyze
1433 *
1434 * Parse the authority part of an URI.
1435 *
1436 * authority = server | reg_name
1437 * server = [ [ userinfo "@" ] hostport ]
1438 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1439 * "@" | "&" | "=" | "+" )
1440 *
1441 * Note : this is completely ambiguous since reg_name is allowed to
1442 * use the full set of chars in use by server:
1443 *
1444 * 3.2.1. Registry-based Naming Authority
1445 *
1446 * The structure of a registry-based naming authority is specific
1447 * to the URI scheme, but constrained to the allowed characters
1448 * for an authority component.
1449 *
1450 * Returns 0 or the error code
1451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001452static int
Owen Taylor3473f882001-02-23 17:55:21 +00001453xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1454 const char *cur;
1455 int ret;
1456
1457 if (str == NULL)
1458 return(-1);
1459
1460 cur = *str;
1461
1462 /*
1463 * try first to parse it as a server string.
1464 */
1465 ret = xmlParseURIServer(uri, str);
1466 if (ret == 0)
1467 return(0);
1468
1469 /*
1470 * failed, fallback to reg_name
1471 */
1472 if (!IS_REG_NAME(cur)) {
1473 return(5);
1474 }
1475 NEXT(cur);
1476 while (IS_REG_NAME(cur)) NEXT(cur);
1477 if (uri != NULL) {
1478 if (uri->server != NULL) xmlFree(uri->server);
1479 uri->server = NULL;
1480 if (uri->user != NULL) xmlFree(uri->user);
1481 uri->user = NULL;
1482 if (uri->authority != NULL) xmlFree(uri->authority);
1483 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1484 }
1485 *str = cur;
1486 return(0);
1487}
1488
1489/**
1490 * xmlParseURIHierPart:
1491 * @uri: pointer to an URI structure
1492 * @str: pointer to the string to analyze
1493 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001494 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001495 *
1496 * hier_part = ( net_path | abs_path ) [ "?" query ]
1497 * abs_path = "/" path_segments
1498 * net_path = "//" authority [ abs_path ]
1499 *
1500 * Returns 0 or the error code
1501 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001502static int
Owen Taylor3473f882001-02-23 17:55:21 +00001503xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1504 int ret;
1505 const char *cur;
1506
1507 if (str == NULL)
1508 return(-1);
1509
1510 cur = *str;
1511
1512 if ((cur[0] == '/') && (cur[1] == '/')) {
1513 cur += 2;
1514 ret = xmlParseURIAuthority(uri, &cur);
1515 if (ret != 0)
1516 return(ret);
1517 if (cur[0] == '/') {
1518 cur++;
1519 ret = xmlParseURIPathSegments(uri, &cur, 1);
1520 }
1521 } else if (cur[0] == '/') {
1522 cur++;
1523 ret = xmlParseURIPathSegments(uri, &cur, 1);
1524 } else {
1525 return(4);
1526 }
1527 if (ret != 0)
1528 return(ret);
1529 if (*cur == '?') {
1530 cur++;
1531 ret = xmlParseURIQuery(uri, &cur);
1532 if (ret != 0)
1533 return(ret);
1534 }
1535 *str = cur;
1536 return(0);
1537}
1538
1539/**
1540 * xmlParseAbsoluteURI:
1541 * @uri: pointer to an URI structure
1542 * @str: pointer to the string to analyze
1543 *
1544 * Parse an URI reference string and fills in the appropriate fields
1545 * of the @uri structure
1546 *
1547 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1548 *
1549 * Returns 0 or the error code
1550 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001551static int
Owen Taylor3473f882001-02-23 17:55:21 +00001552xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1553 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001554 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556 if (str == NULL)
1557 return(-1);
1558
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001559 cur = *str;
1560
Owen Taylor3473f882001-02-23 17:55:21 +00001561 ret = xmlParseURIScheme(uri, str);
1562 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001563 if (**str != ':') {
1564 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001565 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001566 }
Owen Taylor3473f882001-02-23 17:55:21 +00001567 (*str)++;
1568 if (**str == '/')
1569 return(xmlParseURIHierPart(uri, str));
1570 return(xmlParseURIOpaquePart(uri, str));
1571}
1572
1573/**
1574 * xmlParseRelativeURI:
1575 * @uri: pointer to an URI structure
1576 * @str: pointer to the string to analyze
1577 *
1578 * Parse an relative URI string and fills in the appropriate fields
1579 * of the @uri structure
1580 *
1581 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1582 * abs_path = "/" path_segments
1583 * net_path = "//" authority [ abs_path ]
1584 * rel_path = rel_segment [ abs_path ]
1585 *
1586 * Returns 0 or the error code
1587 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001588static int
Owen Taylor3473f882001-02-23 17:55:21 +00001589xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1590 int ret = 0;
1591 const char *cur;
1592
1593 if (str == NULL)
1594 return(-1);
1595
1596 cur = *str;
1597 if ((cur[0] == '/') && (cur[1] == '/')) {
1598 cur += 2;
1599 ret = xmlParseURIAuthority(uri, &cur);
1600 if (ret != 0)
1601 return(ret);
1602 if (cur[0] == '/') {
1603 cur++;
1604 ret = xmlParseURIPathSegments(uri, &cur, 1);
1605 }
1606 } else if (cur[0] == '/') {
1607 cur++;
1608 ret = xmlParseURIPathSegments(uri, &cur, 1);
1609 } else if (cur[0] != '#' && cur[0] != '?') {
1610 ret = xmlParseURIRelSegment(uri, &cur);
1611 if (ret != 0)
1612 return(ret);
1613 if (cur[0] == '/') {
1614 cur++;
1615 ret = xmlParseURIPathSegments(uri, &cur, 1);
1616 }
1617 }
1618 if (ret != 0)
1619 return(ret);
1620 if (*cur == '?') {
1621 cur++;
1622 ret = xmlParseURIQuery(uri, &cur);
1623 if (ret != 0)
1624 return(ret);
1625 }
1626 *str = cur;
1627 return(ret);
1628}
1629
1630/**
1631 * xmlParseURIReference:
1632 * @uri: pointer to an URI structure
1633 * @str: the string to analyze
1634 *
1635 * Parse an URI reference string and fills in the appropriate fields
1636 * of the @uri structure
1637 *
1638 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1639 *
1640 * Returns 0 or the error code
1641 */
1642int
1643xmlParseURIReference(xmlURIPtr uri, const char *str) {
1644 int ret;
1645 const char *tmp = str;
1646
1647 if (str == NULL)
1648 return(-1);
1649 xmlCleanURI(uri);
1650
1651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001652 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001653 * it fails.
1654 */
1655 ret = xmlParseAbsoluteURI(uri, &str);
1656 if (ret != 0) {
1657 xmlCleanURI(uri);
1658 str = tmp;
1659 ret = xmlParseRelativeURI(uri, &str);
1660 }
1661 if (ret != 0) {
1662 xmlCleanURI(uri);
1663 return(ret);
1664 }
1665
1666 if (*str == '#') {
1667 str++;
1668 ret = xmlParseURIFragment(uri, &str);
1669 if (ret != 0) return(ret);
1670 }
1671 if (*str != 0) {
1672 xmlCleanURI(uri);
1673 return(1);
1674 }
1675 return(0);
1676}
1677
1678/**
1679 * xmlParseURI:
1680 * @str: the URI string to analyze
1681 *
1682 * Parse an URI
1683 *
1684 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1685 *
1686 * Returns a newly build xmlURIPtr or NULL in case of error
1687 */
1688xmlURIPtr
1689xmlParseURI(const char *str) {
1690 xmlURIPtr uri;
1691 int ret;
1692
1693 if (str == NULL)
1694 return(NULL);
1695 uri = xmlCreateURI();
1696 if (uri != NULL) {
1697 ret = xmlParseURIReference(uri, str);
1698 if (ret) {
1699 xmlFreeURI(uri);
1700 return(NULL);
1701 }
1702 }
1703 return(uri);
1704}
1705
1706/************************************************************************
1707 * *
1708 * Public functions *
1709 * *
1710 ************************************************************************/
1711
1712/**
1713 * xmlBuildURI:
1714 * @URI: the URI instance found in the document
1715 * @base: the base value
1716 *
1717 * Computes he final URI of the reference done by checking that
1718 * the given URI is valid, and building the final URI using the
1719 * base URI. This is processed according to section 5.2 of the
1720 * RFC 2396
1721 *
1722 * 5.2. Resolving Relative References to Absolute Form
1723 *
1724 * Returns a new URI string (to be freed by the caller) or NULL in case
1725 * of error.
1726 */
1727xmlChar *
1728xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1729 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001730 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001731 xmlURIPtr ref = NULL;
1732 xmlURIPtr bas = NULL;
1733 xmlURIPtr res = NULL;
1734
1735 /*
1736 * 1) The URI reference is parsed into the potential four components and
1737 * fragment identifier, as described in Section 4.3.
1738 *
1739 * NOTE that a completely empty URI is treated by modern browsers
1740 * as a reference to "." rather than as a synonym for the current
1741 * URI. Should we do that here?
1742 */
1743 if (URI == NULL)
1744 ret = -1;
1745 else {
1746 if (*URI) {
1747 ref = xmlCreateURI();
1748 if (ref == NULL)
1749 goto done;
1750 ret = xmlParseURIReference(ref, (const char *) URI);
1751 }
1752 else
1753 ret = 0;
1754 }
1755 if (ret != 0)
1756 goto done;
1757 if (base == NULL)
1758 ret = -1;
1759 else {
1760 bas = xmlCreateURI();
1761 if (bas == NULL)
1762 goto done;
1763 ret = xmlParseURIReference(bas, (const char *) base);
1764 }
1765 if (ret != 0) {
1766 if (ref)
1767 val = xmlSaveUri(ref);
1768 goto done;
1769 }
1770 if (ref == NULL) {
1771 /*
1772 * the base fragment must be ignored
1773 */
1774 if (bas->fragment != NULL) {
1775 xmlFree(bas->fragment);
1776 bas->fragment = NULL;
1777 }
1778 val = xmlSaveUri(bas);
1779 goto done;
1780 }
1781
1782 /*
1783 * 2) If the path component is empty and the scheme, authority, and
1784 * query components are undefined, then it is a reference to the
1785 * current document and we are done. Otherwise, the reference URI's
1786 * query and fragment components are defined as found (or not found)
1787 * within the URI reference and not inherited from the base URI.
1788 *
1789 * NOTE that in modern browsers, the parsing differs from the above
1790 * in the following aspect: the query component is allowed to be
1791 * defined while still treating this as a reference to the current
1792 * document.
1793 */
1794 res = xmlCreateURI();
1795 if (res == NULL)
1796 goto done;
1797 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1798 ((ref->authority == NULL) && (ref->server == NULL))) {
1799 if (bas->scheme != NULL)
1800 res->scheme = xmlMemStrdup(bas->scheme);
1801 if (bas->authority != NULL)
1802 res->authority = xmlMemStrdup(bas->authority);
1803 else if (bas->server != NULL) {
1804 res->server = xmlMemStrdup(bas->server);
1805 if (bas->user != NULL)
1806 res->user = xmlMemStrdup(bas->user);
1807 res->port = bas->port;
1808 }
1809 if (bas->path != NULL)
1810 res->path = xmlMemStrdup(bas->path);
1811 if (ref->query != NULL)
1812 res->query = xmlMemStrdup(ref->query);
1813 else if (bas->query != NULL)
1814 res->query = xmlMemStrdup(bas->query);
1815 if (ref->fragment != NULL)
1816 res->fragment = xmlMemStrdup(ref->fragment);
1817 goto step_7;
1818 }
1819
1820 if (ref->query != NULL)
1821 res->query = xmlMemStrdup(ref->query);
1822 if (ref->fragment != NULL)
1823 res->fragment = xmlMemStrdup(ref->fragment);
1824
1825 /*
1826 * 3) If the scheme component is defined, indicating that the reference
1827 * starts with a scheme name, then the reference is interpreted as an
1828 * absolute URI and we are done. Otherwise, the reference URI's
1829 * scheme is inherited from the base URI's scheme component.
1830 */
1831 if (ref->scheme != NULL) {
1832 val = xmlSaveUri(ref);
1833 goto done;
1834 }
1835 if (bas->scheme != NULL)
1836 res->scheme = xmlMemStrdup(bas->scheme);
1837
1838 /*
1839 * 4) If the authority component is defined, then the reference is a
1840 * network-path and we skip to step 7. Otherwise, the reference
1841 * URI's authority is inherited from the base URI's authority
1842 * component, which will also be undefined if the URI scheme does not
1843 * use an authority component.
1844 */
1845 if ((ref->authority != NULL) || (ref->server != NULL)) {
1846 if (ref->authority != NULL)
1847 res->authority = xmlMemStrdup(ref->authority);
1848 else {
1849 res->server = xmlMemStrdup(ref->server);
1850 if (ref->user != NULL)
1851 res->user = xmlMemStrdup(ref->user);
1852 res->port = ref->port;
1853 }
1854 if (ref->path != NULL)
1855 res->path = xmlMemStrdup(ref->path);
1856 goto step_7;
1857 }
1858 if (bas->authority != NULL)
1859 res->authority = xmlMemStrdup(bas->authority);
1860 else if (bas->server != NULL) {
1861 res->server = xmlMemStrdup(bas->server);
1862 if (bas->user != NULL)
1863 res->user = xmlMemStrdup(bas->user);
1864 res->port = bas->port;
1865 }
1866
1867 /*
1868 * 5) If the path component begins with a slash character ("/"), then
1869 * the reference is an absolute-path and we skip to step 7.
1870 */
1871 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1872 res->path = xmlMemStrdup(ref->path);
1873 goto step_7;
1874 }
1875
1876
1877 /*
1878 * 6) If this step is reached, then we are resolving a relative-path
1879 * reference. The relative path needs to be merged with the base
1880 * URI's path. Although there are many ways to do this, we will
1881 * describe a simple method using a separate string buffer.
1882 *
1883 * Allocate a buffer large enough for the result string.
1884 */
1885 len = 2; /* extra / and 0 */
1886 if (ref->path != NULL)
1887 len += strlen(ref->path);
1888 if (bas->path != NULL)
1889 len += strlen(bas->path);
1890 res->path = (char *) xmlMalloc(len);
1891 if (res->path == NULL) {
1892 xmlGenericError(xmlGenericErrorContext,
1893 "xmlBuildURI: out of memory\n");
1894 goto done;
1895 }
1896 res->path[0] = 0;
1897
1898 /*
1899 * a) All but the last segment of the base URI's path component is
1900 * copied to the buffer. In other words, any characters after the
1901 * last (right-most) slash character, if any, are excluded.
1902 */
1903 cur = 0;
1904 out = 0;
1905 if (bas->path != NULL) {
1906 while (bas->path[cur] != 0) {
1907 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1908 cur++;
1909 if (bas->path[cur] == 0)
1910 break;
1911
1912 cur++;
1913 while (out < cur) {
1914 res->path[out] = bas->path[out];
1915 out++;
1916 }
1917 }
1918 }
1919 res->path[out] = 0;
1920
1921 /*
1922 * b) The reference's path component is appended to the buffer
1923 * string.
1924 */
1925 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001926 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001927 /*
1928 * Ensure the path includes a '/'
1929 */
1930 if ((out == 0) && (bas->server != NULL))
1931 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001932 while (ref->path[indx] != 0) {
1933 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00001934 }
1935 }
1936 res->path[out] = 0;
1937
1938 /*
1939 * Steps c) to h) are really path normalization steps
1940 */
1941 xmlNormalizeURIPath(res->path);
1942
1943step_7:
1944
1945 /*
1946 * 7) The resulting URI components, including any inherited from the
1947 * base URI, are recombined to give the absolute form of the URI
1948 * reference.
1949 */
1950 val = xmlSaveUri(res);
1951
1952done:
1953 if (ref != NULL)
1954 xmlFreeURI(ref);
1955 if (bas != NULL)
1956 xmlFreeURI(bas);
1957 if (res != NULL)
1958 xmlFreeURI(res);
1959 return(val);
1960}
1961
1962