blob: fd6b58825b6add3c106145e571bf91854519599f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
81 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
82 */
83
84#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
85 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
86 ((x) == '+') || ((x) == '$') || ((x) == ','))
87
88/*
89 * unreserved = alphanum | mark
90 */
91
92#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
93
94/*
95 * escaped = "%" hex hex
96 */
97
98#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
99 (IS_HEX((p)[2])))
100
101/*
102 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
103 * "&" | "=" | "+" | "$" | ","
104 */
105#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
106 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
107 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
108 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
109
110/*
111 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
112 */
113#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
114 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
115 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
116 ((*(p) == ',')))
117
118/*
119 * rel_segment = 1*( unreserved | escaped |
120 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
121 */
122
123#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
124 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
125 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
126 ((*(p) == ',')))
127
128/*
129 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
130 */
131
132#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
133 ((x) == '+') || ((x) == '-') || ((x) == '.'))
134
135/*
136 * reg_name = 1*( unreserved | escaped | "$" | "," |
137 * ";" | ":" | "@" | "&" | "=" | "+" )
138 */
139
140#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
141 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
142 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
143 ((*(p) == '=')) || ((*(p) == '+')))
144
145/*
146 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
147 * "+" | "$" | "," )
148 */
149#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
150 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
151 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
152 ((*(p) == ',')))
153
154/*
155 * uric = reserved | unreserved | escaped
156 */
157
158#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
159 (IS_RESERVED(*(p))))
160
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000161/*
162* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
163*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000164
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165#define IS_UNWISE(p) \
166 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
167 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
168 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000169
170/*
Owen Taylor3473f882001-02-23 17:55:21 +0000171 * Skip to next pointer char, handle escaped sequences
172 */
173
174#define NEXT(p) ((*p == '%')? p += 3 : p++)
175
176/*
177 * Productions from the spec.
178 *
179 * authority = server | reg_name
180 * reg_name = 1*( unreserved | escaped | "$" | "," |
181 * ";" | ":" | "@" | "&" | "=" | "+" )
182 *
183 * path = [ abs_path | opaque_part ]
184 */
185
186/************************************************************************
187 * *
188 * Generic URI structure functions *
189 * *
190 ************************************************************************/
191
192/**
193 * xmlCreateURI:
194 *
195 * Simply creates an empty xmlURI
196 *
197 * Returns the new structure or NULL in case of error
198 */
199xmlURIPtr
200xmlCreateURI(void) {
201 xmlURIPtr ret;
202
203 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
204 if (ret == NULL) {
205 xmlGenericError(xmlGenericErrorContext,
206 "xmlCreateURI: out of memory\n");
207 return(NULL);
208 }
209 memset(ret, 0, sizeof(xmlURI));
210 return(ret);
211}
212
213/**
214 * xmlSaveUri:
215 * @uri: pointer to an xmlURI
216 *
217 * Save the URI as an escaped string
218 *
219 * Returns a new string (to be deallocated by caller)
220 */
221xmlChar *
222xmlSaveUri(xmlURIPtr uri) {
223 xmlChar *ret = NULL;
224 const char *p;
225 int len;
226 int max;
227
228 if (uri == NULL) return(NULL);
229
230
231 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000232 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000233 if (ret == NULL) {
234 xmlGenericError(xmlGenericErrorContext,
235 "xmlSaveUri: out of memory\n");
236 return(NULL);
237 }
238 len = 0;
239
240 if (uri->scheme != NULL) {
241 p = uri->scheme;
242 while (*p != 0) {
243 if (len >= max) {
244 max *= 2;
245 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
246 if (ret == NULL) {
247 xmlGenericError(xmlGenericErrorContext,
248 "xmlSaveUri: out of memory\n");
249 return(NULL);
250 }
251 }
252 ret[len++] = *p++;
253 }
254 if (len >= max) {
255 max *= 2;
256 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
257 if (ret == NULL) {
258 xmlGenericError(xmlGenericErrorContext,
259 "xmlSaveUri: out of memory\n");
260 return(NULL);
261 }
262 }
263 ret[len++] = ':';
264 }
265 if (uri->opaque != NULL) {
266 p = uri->opaque;
267 while (*p != 0) {
268 if (len + 3 >= max) {
269 max *= 2;
270 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
271 if (ret == NULL) {
272 xmlGenericError(xmlGenericErrorContext,
273 "xmlSaveUri: out of memory\n");
274 return(NULL);
275 }
276 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000277 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000278 ret[len++] = *p++;
279 else {
280 int val = *(unsigned char *)p++;
281 int hi = val / 0x10, lo = val % 0x10;
282 ret[len++] = '%';
283 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
284 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
285 }
286 }
Owen Taylor3473f882001-02-23 17:55:21 +0000287 } else {
288 if (uri->server != NULL) {
289 if (len + 3 >= max) {
290 max *= 2;
291 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
292 if (ret == NULL) {
293 xmlGenericError(xmlGenericErrorContext,
294 "xmlSaveUri: out of memory\n");
295 return(NULL);
296 }
297 }
298 ret[len++] = '/';
299 ret[len++] = '/';
300 if (uri->user != NULL) {
301 p = uri->user;
302 while (*p != 0) {
303 if (len + 3 >= max) {
304 max *= 2;
305 ret = (xmlChar *) xmlRealloc(ret,
306 (max + 1) * sizeof(xmlChar));
307 if (ret == NULL) {
308 xmlGenericError(xmlGenericErrorContext,
309 "xmlSaveUri: out of memory\n");
310 return(NULL);
311 }
312 }
313 if ((IS_UNRESERVED(*(p))) ||
314 ((*(p) == ';')) || ((*(p) == ':')) ||
315 ((*(p) == '&')) || ((*(p) == '=')) ||
316 ((*(p) == '+')) || ((*(p) == '$')) ||
317 ((*(p) == ',')))
318 ret[len++] = *p++;
319 else {
320 int val = *(unsigned char *)p++;
321 int hi = val / 0x10, lo = val % 0x10;
322 ret[len++] = '%';
323 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
324 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
325 }
326 }
327 if (len + 3 >= max) {
328 max *= 2;
329 ret = (xmlChar *) xmlRealloc(ret,
330 (max + 1) * sizeof(xmlChar));
331 if (ret == NULL) {
332 xmlGenericError(xmlGenericErrorContext,
333 "xmlSaveUri: out of memory\n");
334 return(NULL);
335 }
336 }
337 ret[len++] = '@';
338 }
339 p = uri->server;
340 while (*p != 0) {
341 if (len >= max) {
342 max *= 2;
343 ret = (xmlChar *) xmlRealloc(ret,
344 (max + 1) * sizeof(xmlChar));
345 if (ret == NULL) {
346 xmlGenericError(xmlGenericErrorContext,
347 "xmlSaveUri: out of memory\n");
348 return(NULL);
349 }
350 }
351 ret[len++] = *p++;
352 }
353 if (uri->port > 0) {
354 if (len + 10 >= max) {
355 max *= 2;
356 ret = (xmlChar *) xmlRealloc(ret,
357 (max + 1) * sizeof(xmlChar));
358 if (ret == NULL) {
359 xmlGenericError(xmlGenericErrorContext,
360 "xmlSaveUri: out of memory\n");
361 return(NULL);
362 }
363 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000364 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000365 }
366 } else if (uri->authority != NULL) {
367 if (len + 3 >= max) {
368 max *= 2;
369 ret = (xmlChar *) xmlRealloc(ret,
370 (max + 1) * sizeof(xmlChar));
371 if (ret == NULL) {
372 xmlGenericError(xmlGenericErrorContext,
373 "xmlSaveUri: out of memory\n");
374 return(NULL);
375 }
376 }
377 ret[len++] = '/';
378 ret[len++] = '/';
379 p = uri->authority;
380 while (*p != 0) {
381 if (len + 3 >= max) {
382 max *= 2;
383 ret = (xmlChar *) xmlRealloc(ret,
384 (max + 1) * sizeof(xmlChar));
385 if (ret == NULL) {
386 xmlGenericError(xmlGenericErrorContext,
387 "xmlSaveUri: out of memory\n");
388 return(NULL);
389 }
390 }
391 if ((IS_UNRESERVED(*(p))) ||
392 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
393 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
394 ((*(p) == '=')) || ((*(p) == '+')))
395 ret[len++] = *p++;
396 else {
397 int val = *(unsigned char *)p++;
398 int hi = val / 0x10, lo = val % 0x10;
399 ret[len++] = '%';
400 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
401 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
402 }
403 }
404 } else if (uri->scheme != NULL) {
405 if (len + 3 >= max) {
406 max *= 2;
407 ret = (xmlChar *) xmlRealloc(ret,
408 (max + 1) * sizeof(xmlChar));
409 if (ret == NULL) {
410 xmlGenericError(xmlGenericErrorContext,
411 "xmlSaveUri: out of memory\n");
412 return(NULL);
413 }
414 }
415 ret[len++] = '/';
416 ret[len++] = '/';
417 }
418 if (uri->path != NULL) {
419 p = uri->path;
420 while (*p != 0) {
421 if (len + 3 >= max) {
422 max *= 2;
423 ret = (xmlChar *) xmlRealloc(ret,
424 (max + 1) * sizeof(xmlChar));
425 if (ret == NULL) {
426 xmlGenericError(xmlGenericErrorContext,
427 "xmlSaveUri: out of memory\n");
428 return(NULL);
429 }
430 }
431 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
432 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
433 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
434 ((*(p) == ',')))
435 ret[len++] = *p++;
436 else {
437 int val = *(unsigned char *)p++;
438 int hi = val / 0x10, lo = val % 0x10;
439 ret[len++] = '%';
440 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
441 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
442 }
443 }
444 }
445 if (uri->query != NULL) {
446 if (len + 3 >= max) {
447 max *= 2;
448 ret = (xmlChar *) xmlRealloc(ret,
449 (max + 1) * sizeof(xmlChar));
450 if (ret == NULL) {
451 xmlGenericError(xmlGenericErrorContext,
452 "xmlSaveUri: out of memory\n");
453 return(NULL);
454 }
455 }
456 ret[len++] = '?';
457 p = uri->query;
458 while (*p != 0) {
459 if (len + 3 >= max) {
460 max *= 2;
461 ret = (xmlChar *) xmlRealloc(ret,
462 (max + 1) * sizeof(xmlChar));
463 if (ret == NULL) {
464 xmlGenericError(xmlGenericErrorContext,
465 "xmlSaveUri: out of memory\n");
466 return(NULL);
467 }
468 }
469 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
470 ret[len++] = *p++;
471 else {
472 int val = *(unsigned char *)p++;
473 int hi = val / 0x10, lo = val % 0x10;
474 ret[len++] = '%';
475 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
476 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
477 }
478 }
479 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000480 }
481 if (uri->fragment != NULL) {
482 if (len + 3 >= max) {
483 max *= 2;
484 ret = (xmlChar *) xmlRealloc(ret,
485 (max + 1) * sizeof(xmlChar));
486 if (ret == NULL) {
487 xmlGenericError(xmlGenericErrorContext,
488 "xmlSaveUri: out of memory\n");
489 return(NULL);
490 }
491 }
492 ret[len++] = '#';
493 p = uri->fragment;
494 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000495 if (len + 3 >= max) {
496 max *= 2;
497 ret = (xmlChar *) xmlRealloc(ret,
498 (max + 1) * sizeof(xmlChar));
499 if (ret == NULL) {
500 xmlGenericError(xmlGenericErrorContext,
501 "xmlSaveUri: out of memory\n");
502 return(NULL);
503 }
504 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000505 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
506 ret[len++] = *p++;
507 else {
508 int val = *(unsigned char *)p++;
509 int hi = val / 0x10, lo = val % 0x10;
510 ret[len++] = '%';
511 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
512 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000513 }
514 }
Owen Taylor3473f882001-02-23 17:55:21 +0000515 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000516 if (len >= max) {
517 max *= 2;
518 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
519 if (ret == NULL) {
520 xmlGenericError(xmlGenericErrorContext,
521 "xmlSaveUri: out of memory\n");
522 return(NULL);
523 }
524 }
525 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 return(ret);
527}
528
529/**
530 * xmlPrintURI:
531 * @stream: a FILE* for the output
532 * @uri: pointer to an xmlURI
533 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000534 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000535 */
536void
537xmlPrintURI(FILE *stream, xmlURIPtr uri) {
538 xmlChar *out;
539
540 out = xmlSaveUri(uri);
541 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000542 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000543 xmlFree(out);
544 }
545}
546
547/**
548 * xmlCleanURI:
549 * @uri: pointer to an xmlURI
550 *
551 * Make sure the xmlURI struct is free of content
552 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000553static void
Owen Taylor3473f882001-02-23 17:55:21 +0000554xmlCleanURI(xmlURIPtr uri) {
555 if (uri == NULL) return;
556
557 if (uri->scheme != NULL) xmlFree(uri->scheme);
558 uri->scheme = NULL;
559 if (uri->server != NULL) xmlFree(uri->server);
560 uri->server = NULL;
561 if (uri->user != NULL) xmlFree(uri->user);
562 uri->user = NULL;
563 if (uri->path != NULL) xmlFree(uri->path);
564 uri->path = NULL;
565 if (uri->fragment != NULL) xmlFree(uri->fragment);
566 uri->fragment = NULL;
567 if (uri->opaque != NULL) xmlFree(uri->opaque);
568 uri->opaque = NULL;
569 if (uri->authority != NULL) xmlFree(uri->authority);
570 uri->authority = NULL;
571 if (uri->query != NULL) xmlFree(uri->query);
572 uri->query = NULL;
573}
574
575/**
576 * xmlFreeURI:
577 * @uri: pointer to an xmlURI
578 *
579 * Free up the xmlURI struct
580 */
581void
582xmlFreeURI(xmlURIPtr uri) {
583 if (uri == NULL) return;
584
585 if (uri->scheme != NULL) xmlFree(uri->scheme);
586 if (uri->server != NULL) xmlFree(uri->server);
587 if (uri->user != NULL) xmlFree(uri->user);
588 if (uri->path != NULL) xmlFree(uri->path);
589 if (uri->fragment != NULL) xmlFree(uri->fragment);
590 if (uri->opaque != NULL) xmlFree(uri->opaque);
591 if (uri->authority != NULL) xmlFree(uri->authority);
592 if (uri->query != NULL) xmlFree(uri->query);
Owen Taylor3473f882001-02-23 17:55:21 +0000593 xmlFree(uri);
594}
595
596/************************************************************************
597 * *
598 * Helper functions *
599 * *
600 ************************************************************************/
601
Owen Taylor3473f882001-02-23 17:55:21 +0000602/**
603 * xmlNormalizeURIPath:
604 * @path: pointer to the path string
605 *
606 * Applies the 5 normalization steps to a path string--that is, RFC 2396
607 * Section 5.2, steps 6.c through 6.g.
608 *
609 * Normalization occurs directly on the string, no new allocation is done
610 *
611 * Returns 0 or an error code
612 */
613int
614xmlNormalizeURIPath(char *path) {
615 char *cur, *out;
616
617 if (path == NULL)
618 return(-1);
619
620 /* Skip all initial "/" chars. We want to get to the beginning of the
621 * first non-empty segment.
622 */
623 cur = path;
624 while (cur[0] == '/')
625 ++cur;
626 if (cur[0] == '\0')
627 return(0);
628
629 /* Keep everything we've seen so far. */
630 out = cur;
631
632 /*
633 * Analyze each segment in sequence for cases (c) and (d).
634 */
635 while (cur[0] != '\0') {
636 /*
637 * c) All occurrences of "./", where "." is a complete path segment,
638 * are removed from the buffer string.
639 */
640 if ((cur[0] == '.') && (cur[1] == '/')) {
641 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000642 /* '//' normalization should be done at this point too */
643 while (cur[0] == '/')
644 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000645 continue;
646 }
647
648 /*
649 * d) If the buffer string ends with "." as a complete path segment,
650 * that "." is removed.
651 */
652 if ((cur[0] == '.') && (cur[1] == '\0'))
653 break;
654
655 /* Otherwise keep the segment. */
656 while (cur[0] != '/') {
657 if (cur[0] == '\0')
658 goto done_cd;
659 (out++)[0] = (cur++)[0];
660 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000661 /* nomalize // */
662 while ((cur[0] == '/') && (cur[1] == '/'))
663 cur++;
664
Owen Taylor3473f882001-02-23 17:55:21 +0000665 (out++)[0] = (cur++)[0];
666 }
667 done_cd:
668 out[0] = '\0';
669
670 /* Reset to the beginning of the first segment for the next sequence. */
671 cur = path;
672 while (cur[0] == '/')
673 ++cur;
674 if (cur[0] == '\0')
675 return(0);
676
677 /*
678 * Analyze each segment in sequence for cases (e) and (f).
679 *
680 * e) All occurrences of "<segment>/../", where <segment> is a
681 * complete path segment not equal to "..", are removed from the
682 * buffer string. Removal of these path segments is performed
683 * iteratively, removing the leftmost matching pattern on each
684 * iteration, until no matching pattern remains.
685 *
686 * f) If the buffer string ends with "<segment>/..", where <segment>
687 * is a complete path segment not equal to "..", that
688 * "<segment>/.." is removed.
689 *
690 * To satisfy the "iterative" clause in (e), we need to collapse the
691 * string every time we find something that needs to be removed. Thus,
692 * we don't need to keep two pointers into the string: we only need a
693 * "current position" pointer.
694 */
695 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000696 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000697
698 /* At the beginning of each iteration of this loop, "cur" points to
699 * the first character of the segment we want to examine.
700 */
701
702 /* Find the end of the current segment. */
703 segp = cur;
704 while ((segp[0] != '/') && (segp[0] != '\0'))
705 ++segp;
706
707 /* If this is the last segment, we're done (we need at least two
708 * segments to meet the criteria for the (e) and (f) cases).
709 */
710 if (segp[0] == '\0')
711 break;
712
713 /* If the first segment is "..", or if the next segment _isn't_ "..",
714 * keep this segment and try the next one.
715 */
716 ++segp;
717 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
718 || ((segp[0] != '.') || (segp[1] != '.')
719 || ((segp[2] != '/') && (segp[2] != '\0')))) {
720 cur = segp;
721 continue;
722 }
723
724 /* If we get here, remove this segment and the next one and back up
725 * to the previous segment (if there is one), to implement the
726 * "iteratively" clause. It's pretty much impossible to back up
727 * while maintaining two pointers into the buffer, so just compact
728 * the whole buffer now.
729 */
730
731 /* If this is the end of the buffer, we're done. */
732 if (segp[2] == '\0') {
733 cur[0] = '\0';
734 break;
735 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000736 /* Valgrind complained, strcpy(cur, segp + 3); */
737 /* string will overlap, do not use strcpy */
738 tmp = cur;
739 segp += 3;
740 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000741
742 /* If there are no previous segments, then keep going from here. */
743 segp = cur;
744 while ((segp > path) && ((--segp)[0] == '/'))
745 ;
746 if (segp == path)
747 continue;
748
749 /* "segp" is pointing to the end of a previous segment; find it's
750 * start. We need to back up to the previous segment and start
751 * over with that to handle things like "foo/bar/../..". If we
752 * don't do this, then on the first pass we'll remove the "bar/..",
753 * but be pointing at the second ".." so we won't realize we can also
754 * remove the "foo/..".
755 */
756 cur = segp;
757 while ((cur > path) && (cur[-1] != '/'))
758 --cur;
759 }
760 out[0] = '\0';
761
762 /*
763 * g) If the resulting buffer string still begins with one or more
764 * complete path segments of "..", then the reference is
765 * considered to be in error. Implementations may handle this
766 * error by retaining these components in the resolved path (i.e.,
767 * treating them as part of the final URI), by removing them from
768 * the resolved path (i.e., discarding relative levels above the
769 * root), or by avoiding traversal of the reference.
770 *
771 * We discard them from the final path.
772 */
773 if (path[0] == '/') {
774 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000775 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000776 && ((cur[3] == '/') || (cur[3] == '\0')))
777 cur += 3;
778
779 if (cur != path) {
780 out = path;
781 while (cur[0] != '\0')
782 (out++)[0] = (cur++)[0];
783 out[0] = 0;
784 }
785 }
786
787 return(0);
788}
Owen Taylor3473f882001-02-23 17:55:21 +0000789
Daniel Veillard966a31e2004-05-09 02:58:44 +0000790static int is_hex(char c) {
791 if (((c >= '0') && (c <= '9')) ||
792 ((c >= 'a') && (c <= 'f')) ||
793 ((c >= 'A') && (c <= 'F')))
794 return(1);
795 return(0);
796}
797
Owen Taylor3473f882001-02-23 17:55:21 +0000798/**
799 * xmlURIUnescapeString:
800 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000801 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000802 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000803 *
804 * Unescaping routine, does not do validity checks !
805 * Output is direct unsigned char translation of %XX values (no encoding)
806 *
807 * Returns an copy of the string, but unescaped
808 */
809char *
810xmlURIUnescapeString(const char *str, int len, char *target) {
811 char *ret, *out;
812 const char *in;
813
814 if (str == NULL)
815 return(NULL);
816 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000817 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000818
819 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000820 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000821 if (ret == NULL) {
822 xmlGenericError(xmlGenericErrorContext,
823 "xmlURIUnescapeString: out of memory\n");
824 return(NULL);
825 }
826 } else
827 ret = target;
828 in = str;
829 out = ret;
830 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000831 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000832 in++;
833 if ((*in >= '0') && (*in <= '9'))
834 *out = (*in - '0');
835 else if ((*in >= 'a') && (*in <= 'f'))
836 *out = (*in - 'a') + 10;
837 else if ((*in >= 'A') && (*in <= 'F'))
838 *out = (*in - 'A') + 10;
839 in++;
840 if ((*in >= '0') && (*in <= '9'))
841 *out = *out * 16 + (*in - '0');
842 else if ((*in >= 'a') && (*in <= 'f'))
843 *out = *out * 16 + (*in - 'a') + 10;
844 else if ((*in >= 'A') && (*in <= 'F'))
845 *out = *out * 16 + (*in - 'A') + 10;
846 in++;
847 len -= 3;
848 out++;
849 } else {
850 *out++ = *in++;
851 len--;
852 }
853 }
854 *out = 0;
855 return(ret);
856}
857
858/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000859 * xmlURIEscapeStr:
860 * @str: string to escape
861 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000862 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000863 * This routine escapes a string to hex, ignoring reserved characters (a-z)
864 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000865 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000866 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000867 */
868xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000869xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
870 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000871 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000872
Owen Taylor3473f882001-02-23 17:55:21 +0000873 unsigned int len, out;
874
875 if (str == NULL)
876 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000877 if (str[0] == 0)
878 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000879 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000880 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000881
882 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000883 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (ret == NULL) {
885 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000886 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000887 return(NULL);
888 }
889 in = (const xmlChar *) str;
890 out = 0;
891 while(*in != 0) {
892 if (len - out <= 3) {
893 len += 20;
894 ret = (xmlChar *) xmlRealloc(ret, len);
895 if (ret == NULL) {
896 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000897 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000898 return(NULL);
899 }
900 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000901
902 ch = *in;
903
Daniel Veillardeb475a32002-04-14 22:00:22 +0000904 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000905 unsigned char val;
906 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000907 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000908 if (val <= 9)
909 ret[out++] = '0' + val;
910 else
911 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000912 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 if (val <= 9)
914 ret[out++] = '0' + val;
915 else
916 ret[out++] = 'A' + val - 0xA;
917 in++;
918 } else {
919 ret[out++] = *in++;
920 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000921
Owen Taylor3473f882001-02-23 17:55:21 +0000922 }
923 ret[out] = 0;
924 return(ret);
925}
926
Daniel Veillard8514c672001-05-23 10:29:12 +0000927/**
928 * xmlURIEscape:
929 * @str: the string of the URI to escape
930 *
931 * Escaping routine, does not do validity checks !
932 * It will try to escape the chars needing this, but this is heuristic
933 * based it's impossible to be sure.
934 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000935 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000936 *
937 * 25 May 2001
938 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
939 * according to RFC2396.
940 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000941 */
942xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000943xmlURIEscape(const xmlChar * str)
944{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000945 xmlChar *ret, *segment = NULL;
946 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000947 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000948
Daniel Veillard6278fb52001-05-25 07:38:41 +0000949#define NULLCHK(p) if(!p) { \
950 xmlGenericError(xmlGenericErrorContext, \
951 "xmlURIEscape: out of memory\n"); \
952 return NULL; }
953
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000954 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000955 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000956
957 uri = xmlCreateURI();
958 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000959 /*
960 * Allow escaping errors in the unescaped form
961 */
962 uri->cleanup = 1;
963 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000964 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000965 xmlFreeURI(uri);
966 return (NULL);
967 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000968 }
Daniel Veillard6278fb52001-05-25 07:38:41 +0000969
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000970 if (!uri)
971 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +0000972
973 ret = NULL;
974
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000975 if (uri->scheme) {
976 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
977 NULLCHK(segment)
978 ret = xmlStrcat(ret, segment);
979 ret = xmlStrcat(ret, BAD_CAST ":");
980 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000981 }
982
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000983 if (uri->authority) {
984 segment =
985 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
986 NULLCHK(segment)
987 ret = xmlStrcat(ret, BAD_CAST "//");
988 ret = xmlStrcat(ret, segment);
989 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000990 }
991
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000992 if (uri->user) {
993 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
994 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +0000995 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000996 ret = xmlStrcat(ret, segment);
997 ret = xmlStrcat(ret, BAD_CAST "@");
998 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +0000999 }
1000
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001001 if (uri->server) {
1002 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1003 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001004 if (uri->user == NULL)
1005 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001006 ret = xmlStrcat(ret, segment);
1007 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001008 }
1009
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001010 if (uri->port) {
1011 xmlChar port[10];
1012
Daniel Veillard43d3f612001-11-10 11:57:23 +00001013 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001014 ret = xmlStrcat(ret, BAD_CAST ":");
1015 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001016 }
1017
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001018 if (uri->path) {
1019 segment =
1020 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1021 NULLCHK(segment)
1022 ret = xmlStrcat(ret, segment);
1023 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001024 }
1025
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001026 if (uri->query) {
1027 segment =
1028 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1029 NULLCHK(segment)
1030 ret = xmlStrcat(ret, BAD_CAST "?");
1031 ret = xmlStrcat(ret, segment);
1032 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001033 }
1034
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001035 if (uri->opaque) {
1036 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1037 NULLCHK(segment)
1038 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001039 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001040 }
1041
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001042 if (uri->fragment) {
1043 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1044 NULLCHK(segment)
1045 ret = xmlStrcat(ret, BAD_CAST "#");
1046 ret = xmlStrcat(ret, segment);
1047 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001048 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001049
1050 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001051#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001052
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001053 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001054}
1055
Owen Taylor3473f882001-02-23 17:55:21 +00001056/************************************************************************
1057 * *
1058 * Escaped URI parsing *
1059 * *
1060 ************************************************************************/
1061
1062/**
1063 * xmlParseURIFragment:
1064 * @uri: pointer to an URI structure
1065 * @str: pointer to the string to analyze
1066 *
1067 * Parse an URI fragment string and fills in the appropriate fields
1068 * of the @uri structure.
1069 *
1070 * fragment = *uric
1071 *
1072 * Returns 0 or the error code
1073 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001074static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001075xmlParseURIFragment(xmlURIPtr uri, const char **str)
1076{
Owen Taylor3473f882001-02-23 17:55:21 +00001077 const char *cur = *str;
1078
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001079 if (str == NULL)
1080 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001081
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001082 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001083 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001084 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001085 if (uri->fragment != NULL)
1086 xmlFree(uri->fragment);
1087 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001088 }
1089 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001090 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001091}
1092
1093/**
1094 * xmlParseURIQuery:
1095 * @uri: pointer to an URI structure
1096 * @str: pointer to the string to analyze
1097 *
1098 * Parse the query part of an URI
1099 *
1100 * query = *uric
1101 *
1102 * Returns 0 or the error code
1103 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001104static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001105xmlParseURIQuery(xmlURIPtr uri, const char **str)
1106{
Owen Taylor3473f882001-02-23 17:55:21 +00001107 const char *cur = *str;
1108
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001109 if (str == NULL)
1110 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001111
Daniel Veillard9231ff92003-03-23 22:00:51 +00001112 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001113 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001114 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001115 if (uri->query != NULL)
1116 xmlFree(uri->query);
1117 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001118 }
1119 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001120 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001121}
1122
1123/**
1124 * xmlParseURIScheme:
1125 * @uri: pointer to an URI structure
1126 * @str: pointer to the string to analyze
1127 *
1128 * Parse an URI scheme
1129 *
1130 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1131 *
1132 * Returns 0 or the error code
1133 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001134static int
Owen Taylor3473f882001-02-23 17:55:21 +00001135xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1136 const char *cur;
1137
1138 if (str == NULL)
1139 return(-1);
1140
1141 cur = *str;
1142 if (!IS_ALPHA(*cur))
1143 return(2);
1144 cur++;
1145 while (IS_SCHEME(*cur)) cur++;
1146 if (uri != NULL) {
1147 if (uri->scheme != NULL) xmlFree(uri->scheme);
1148 /* !!! strndup */
1149 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
1150 }
1151 *str = cur;
1152 return(0);
1153}
1154
1155/**
1156 * xmlParseURIOpaquePart:
1157 * @uri: pointer to an URI structure
1158 * @str: pointer to the string to analyze
1159 *
1160 * Parse an URI opaque part
1161 *
1162 * opaque_part = uric_no_slash *uric
1163 *
1164 * Returns 0 or the error code
1165 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001166static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001167xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1168{
Owen Taylor3473f882001-02-23 17:55:21 +00001169 const char *cur;
1170
1171 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001172 return (-1);
1173
Owen Taylor3473f882001-02-23 17:55:21 +00001174 cur = *str;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001175 if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001176 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001177 }
1178 NEXT(cur);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001179 while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001180 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001181 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001182 if (uri->opaque != NULL)
1183 xmlFree(uri->opaque);
1184 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001185 }
1186 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001187 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001188}
1189
1190/**
1191 * xmlParseURIServer:
1192 * @uri: pointer to an URI structure
1193 * @str: pointer to the string to analyze
1194 *
1195 * Parse a server subpart of an URI, it's a finer grain analysis
1196 * of the authority part.
1197 *
1198 * server = [ [ userinfo "@" ] hostport ]
1199 * userinfo = *( unreserved | escaped |
1200 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1201 * hostport = host [ ":" port ]
1202 * host = hostname | IPv4address
1203 * hostname = *( domainlabel "." ) toplabel [ "." ]
1204 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1205 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1206 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1207 * port = *digit
1208 *
1209 * Returns 0 or the error code
1210 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001211static int
Owen Taylor3473f882001-02-23 17:55:21 +00001212xmlParseURIServer(xmlURIPtr uri, const char **str) {
1213 const char *cur;
1214 const char *host, *tmp;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001215 const int IPmax = 4;
1216 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001217
1218 if (str == NULL)
1219 return(-1);
1220
1221 cur = *str;
1222
1223 /*
1224 * is there an userinfo ?
1225 */
1226 while (IS_USERINFO(cur)) NEXT(cur);
1227 if (*cur == '@') {
1228 if (uri != NULL) {
1229 if (uri->user != NULL) xmlFree(uri->user);
1230 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1231 }
1232 cur++;
1233 } else {
1234 if (uri != NULL) {
1235 if (uri->user != NULL) xmlFree(uri->user);
1236 uri->user = NULL;
1237 }
1238 cur = *str;
1239 }
1240 /*
1241 * This can be empty in the case where there is no server
1242 */
1243 host = cur;
1244 if (*cur == '/') {
1245 if (uri != NULL) {
1246 if (uri->authority != NULL) xmlFree(uri->authority);
1247 uri->authority = NULL;
1248 if (uri->server != NULL) xmlFree(uri->server);
1249 uri->server = NULL;
1250 uri->port = 0;
1251 }
1252 return(0);
1253 }
1254 /*
1255 * host part of hostport can derive either an IPV4 address
1256 * or an unresolved name. Check the IP first, it easier to detect
1257 * errors if wrong one
1258 */
Daniel Veillard9231ff92003-03-23 22:00:51 +00001259 for (oct = 0; oct < IPmax; ++oct) {
1260 if (*cur == '.')
1261 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001262 while(IS_DIGIT(*cur)) cur++;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001263 if (oct == (IPmax-1))
1264 continue;
1265 if (*cur != '.')
1266 break;
1267 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001268 }
Daniel Veillard9231ff92003-03-23 22:00:51 +00001269 if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) {
1270 /* maybe host_name */
1271 if (!IS_ALPHANUM(*cur))
1272 return(4); /* e.g. http://xml.$oft */
1273 do {
1274 do ++cur; while (IS_ALPHANUM(*cur));
1275 if (*cur == '-') {
1276 --cur;
1277 if (*cur == '.')
1278 return(5); /* e.g. http://xml.-soft */
1279 ++cur;
1280 continue;
1281 }
1282 if (*cur == '.') {
1283 --cur;
1284 if (*cur == '-')
1285 return(6); /* e.g. http://xml-.soft */
1286 if (*cur == '.')
1287 return(7); /* e.g. http://xml..soft */
1288 ++cur;
1289 continue;
1290 }
1291 break;
1292 } while (1);
1293 tmp = cur;
1294 if (tmp[-1] == '.')
1295 --tmp; /* e.g. http://xml.$Oft/ */
1296 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1297 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1298 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001299 }
Owen Taylor3473f882001-02-23 17:55:21 +00001300 if (uri != NULL) {
1301 if (uri->authority != NULL) xmlFree(uri->authority);
1302 uri->authority = NULL;
1303 if (uri->server != NULL) xmlFree(uri->server);
1304 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1305 }
Owen Taylor3473f882001-02-23 17:55:21 +00001306 /*
1307 * finish by checking for a port presence.
1308 */
1309 if (*cur == ':') {
1310 cur++;
1311 if (IS_DIGIT(*cur)) {
1312 if (uri != NULL)
1313 uri->port = 0;
1314 while (IS_DIGIT(*cur)) {
1315 if (uri != NULL)
1316 uri->port = uri->port * 10 + (*cur - '0');
1317 cur++;
1318 }
1319 }
1320 }
1321 *str = cur;
1322 return(0);
1323}
1324
1325/**
1326 * xmlParseURIRelSegment:
1327 * @uri: pointer to an URI structure
1328 * @str: pointer to the string to analyze
1329 *
1330 * Parse an URI relative segment
1331 *
1332 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1333 * "+" | "$" | "," )
1334 *
1335 * Returns 0 or the error code
1336 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001337static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001338xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1339{
Owen Taylor3473f882001-02-23 17:55:21 +00001340 const char *cur;
1341
1342 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001343 return (-1);
1344
Owen Taylor3473f882001-02-23 17:55:21 +00001345 cur = *str;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001346 if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001347 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001348 }
1349 NEXT(cur);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001350 while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001351 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001352 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001353 if (uri->path != NULL)
1354 xmlFree(uri->path);
1355 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001356 }
1357 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001358 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001359}
1360
1361/**
1362 * xmlParseURIPathSegments:
1363 * @uri: pointer to an URI structure
1364 * @str: pointer to the string to analyze
1365 * @slash: should we add a leading slash
1366 *
1367 * Parse an URI set of path segments
1368 *
1369 * path_segments = segment *( "/" segment )
1370 * segment = *pchar *( ";" param )
1371 * param = *pchar
1372 *
1373 * Returns 0 or the error code
1374 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001375static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001376xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1377{
Owen Taylor3473f882001-02-23 17:55:21 +00001378 const char *cur;
1379
1380 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001381 return (-1);
1382
Owen Taylor3473f882001-02-23 17:55:21 +00001383 cur = *str;
1384
1385 do {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001386 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001387 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001388 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001389 cur++;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001390 while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001391 NEXT(cur);
1392 }
1393 if (*cur != '/')
1394 break;
1395 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001396 } while (1);
1397 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001398 int len, len2 = 0;
1399 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001400
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001401 /*
1402 * Concat the set of path segments to the current path
1403 */
1404 len = cur - *str;
1405 if (slash)
1406 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001407
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001408 if (uri->path != NULL) {
1409 len2 = strlen(uri->path);
1410 len += len2;
1411 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001412 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001413 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001414 xmlGenericError(xmlGenericErrorContext,
1415 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001416 *str = cur;
1417 return (-1);
1418 }
1419 if (uri->path != NULL)
1420 memcpy(path, uri->path, len2);
1421 if (slash) {
1422 path[len2] = '/';
1423 len2++;
1424 }
1425 path[len2] = 0;
1426 if (cur - *str > 0)
1427 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1428 if (uri->path != NULL)
1429 xmlFree(uri->path);
1430 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001431 }
1432 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001433 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001434}
1435
1436/**
1437 * xmlParseURIAuthority:
1438 * @uri: pointer to an URI structure
1439 * @str: pointer to the string to analyze
1440 *
1441 * Parse the authority part of an URI.
1442 *
1443 * authority = server | reg_name
1444 * server = [ [ userinfo "@" ] hostport ]
1445 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1446 * "@" | "&" | "=" | "+" )
1447 *
1448 * Note : this is completely ambiguous since reg_name is allowed to
1449 * use the full set of chars in use by server:
1450 *
1451 * 3.2.1. Registry-based Naming Authority
1452 *
1453 * The structure of a registry-based naming authority is specific
1454 * to the URI scheme, but constrained to the allowed characters
1455 * for an authority component.
1456 *
1457 * Returns 0 or the error code
1458 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001459static int
Owen Taylor3473f882001-02-23 17:55:21 +00001460xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1461 const char *cur;
1462 int ret;
1463
1464 if (str == NULL)
1465 return(-1);
1466
1467 cur = *str;
1468
1469 /*
1470 * try first to parse it as a server string.
1471 */
1472 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001473 if ((ret == 0) && (*str != NULL) &&
1474 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001475 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001476 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001477
1478 /*
1479 * failed, fallback to reg_name
1480 */
1481 if (!IS_REG_NAME(cur)) {
1482 return(5);
1483 }
1484 NEXT(cur);
1485 while (IS_REG_NAME(cur)) NEXT(cur);
1486 if (uri != NULL) {
1487 if (uri->server != NULL) xmlFree(uri->server);
1488 uri->server = NULL;
1489 if (uri->user != NULL) xmlFree(uri->user);
1490 uri->user = NULL;
1491 if (uri->authority != NULL) xmlFree(uri->authority);
1492 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1493 }
1494 *str = cur;
1495 return(0);
1496}
1497
1498/**
1499 * xmlParseURIHierPart:
1500 * @uri: pointer to an URI structure
1501 * @str: pointer to the string to analyze
1502 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001503 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001504 *
1505 * hier_part = ( net_path | abs_path ) [ "?" query ]
1506 * abs_path = "/" path_segments
1507 * net_path = "//" authority [ abs_path ]
1508 *
1509 * Returns 0 or the error code
1510 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001511static int
Owen Taylor3473f882001-02-23 17:55:21 +00001512xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1513 int ret;
1514 const char *cur;
1515
1516 if (str == NULL)
1517 return(-1);
1518
1519 cur = *str;
1520
1521 if ((cur[0] == '/') && (cur[1] == '/')) {
1522 cur += 2;
1523 ret = xmlParseURIAuthority(uri, &cur);
1524 if (ret != 0)
1525 return(ret);
1526 if (cur[0] == '/') {
1527 cur++;
1528 ret = xmlParseURIPathSegments(uri, &cur, 1);
1529 }
1530 } else if (cur[0] == '/') {
1531 cur++;
1532 ret = xmlParseURIPathSegments(uri, &cur, 1);
1533 } else {
1534 return(4);
1535 }
1536 if (ret != 0)
1537 return(ret);
1538 if (*cur == '?') {
1539 cur++;
1540 ret = xmlParseURIQuery(uri, &cur);
1541 if (ret != 0)
1542 return(ret);
1543 }
1544 *str = cur;
1545 return(0);
1546}
1547
1548/**
1549 * xmlParseAbsoluteURI:
1550 * @uri: pointer to an URI structure
1551 * @str: pointer to the string to analyze
1552 *
1553 * Parse an URI reference string and fills in the appropriate fields
1554 * of the @uri structure
1555 *
1556 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1557 *
1558 * Returns 0 or the error code
1559 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001560static int
Owen Taylor3473f882001-02-23 17:55:21 +00001561xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1562 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001563 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001564
1565 if (str == NULL)
1566 return(-1);
1567
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001568 cur = *str;
1569
Owen Taylor3473f882001-02-23 17:55:21 +00001570 ret = xmlParseURIScheme(uri, str);
1571 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001572 if (**str != ':') {
1573 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001574 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001575 }
Owen Taylor3473f882001-02-23 17:55:21 +00001576 (*str)++;
1577 if (**str == '/')
1578 return(xmlParseURIHierPart(uri, str));
1579 return(xmlParseURIOpaquePart(uri, str));
1580}
1581
1582/**
1583 * xmlParseRelativeURI:
1584 * @uri: pointer to an URI structure
1585 * @str: pointer to the string to analyze
1586 *
1587 * Parse an relative URI string and fills in the appropriate fields
1588 * of the @uri structure
1589 *
1590 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1591 * abs_path = "/" path_segments
1592 * net_path = "//" authority [ abs_path ]
1593 * rel_path = rel_segment [ abs_path ]
1594 *
1595 * Returns 0 or the error code
1596 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001597static int
Owen Taylor3473f882001-02-23 17:55:21 +00001598xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1599 int ret = 0;
1600 const char *cur;
1601
1602 if (str == NULL)
1603 return(-1);
1604
1605 cur = *str;
1606 if ((cur[0] == '/') && (cur[1] == '/')) {
1607 cur += 2;
1608 ret = xmlParseURIAuthority(uri, &cur);
1609 if (ret != 0)
1610 return(ret);
1611 if (cur[0] == '/') {
1612 cur++;
1613 ret = xmlParseURIPathSegments(uri, &cur, 1);
1614 }
1615 } else if (cur[0] == '/') {
1616 cur++;
1617 ret = xmlParseURIPathSegments(uri, &cur, 1);
1618 } else if (cur[0] != '#' && cur[0] != '?') {
1619 ret = xmlParseURIRelSegment(uri, &cur);
1620 if (ret != 0)
1621 return(ret);
1622 if (cur[0] == '/') {
1623 cur++;
1624 ret = xmlParseURIPathSegments(uri, &cur, 1);
1625 }
1626 }
1627 if (ret != 0)
1628 return(ret);
1629 if (*cur == '?') {
1630 cur++;
1631 ret = xmlParseURIQuery(uri, &cur);
1632 if (ret != 0)
1633 return(ret);
1634 }
1635 *str = cur;
1636 return(ret);
1637}
1638
1639/**
1640 * xmlParseURIReference:
1641 * @uri: pointer to an URI structure
1642 * @str: the string to analyze
1643 *
1644 * Parse an URI reference string and fills in the appropriate fields
1645 * of the @uri structure
1646 *
1647 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1648 *
1649 * Returns 0 or the error code
1650 */
1651int
1652xmlParseURIReference(xmlURIPtr uri, const char *str) {
1653 int ret;
1654 const char *tmp = str;
1655
1656 if (str == NULL)
1657 return(-1);
1658 xmlCleanURI(uri);
1659
1660 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001661 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001662 * it fails.
1663 */
1664 ret = xmlParseAbsoluteURI(uri, &str);
1665 if (ret != 0) {
1666 xmlCleanURI(uri);
1667 str = tmp;
1668 ret = xmlParseRelativeURI(uri, &str);
1669 }
1670 if (ret != 0) {
1671 xmlCleanURI(uri);
1672 return(ret);
1673 }
1674
1675 if (*str == '#') {
1676 str++;
1677 ret = xmlParseURIFragment(uri, &str);
1678 if (ret != 0) return(ret);
1679 }
1680 if (*str != 0) {
1681 xmlCleanURI(uri);
1682 return(1);
1683 }
1684 return(0);
1685}
1686
1687/**
1688 * xmlParseURI:
1689 * @str: the URI string to analyze
1690 *
1691 * Parse an URI
1692 *
1693 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1694 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001695 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001696 */
1697xmlURIPtr
1698xmlParseURI(const char *str) {
1699 xmlURIPtr uri;
1700 int ret;
1701
1702 if (str == NULL)
1703 return(NULL);
1704 uri = xmlCreateURI();
1705 if (uri != NULL) {
1706 ret = xmlParseURIReference(uri, str);
1707 if (ret) {
1708 xmlFreeURI(uri);
1709 return(NULL);
1710 }
1711 }
1712 return(uri);
1713}
1714
1715/************************************************************************
1716 * *
1717 * Public functions *
1718 * *
1719 ************************************************************************/
1720
1721/**
1722 * xmlBuildURI:
1723 * @URI: the URI instance found in the document
1724 * @base: the base value
1725 *
1726 * Computes he final URI of the reference done by checking that
1727 * the given URI is valid, and building the final URI using the
1728 * base URI. This is processed according to section 5.2 of the
1729 * RFC 2396
1730 *
1731 * 5.2. Resolving Relative References to Absolute Form
1732 *
1733 * Returns a new URI string (to be freed by the caller) or NULL in case
1734 * of error.
1735 */
1736xmlChar *
1737xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1738 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001739 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 xmlURIPtr ref = NULL;
1741 xmlURIPtr bas = NULL;
1742 xmlURIPtr res = NULL;
1743
1744 /*
1745 * 1) The URI reference is parsed into the potential four components and
1746 * fragment identifier, as described in Section 4.3.
1747 *
1748 * NOTE that a completely empty URI is treated by modern browsers
1749 * as a reference to "." rather than as a synonym for the current
1750 * URI. Should we do that here?
1751 */
1752 if (URI == NULL)
1753 ret = -1;
1754 else {
1755 if (*URI) {
1756 ref = xmlCreateURI();
1757 if (ref == NULL)
1758 goto done;
1759 ret = xmlParseURIReference(ref, (const char *) URI);
1760 }
1761 else
1762 ret = 0;
1763 }
1764 if (ret != 0)
1765 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001766 if ((ref != NULL) && (ref->scheme != NULL)) {
1767 /*
1768 * The URI is absolute don't modify.
1769 */
1770 val = xmlStrdup(URI);
1771 goto done;
1772 }
Owen Taylor3473f882001-02-23 17:55:21 +00001773 if (base == NULL)
1774 ret = -1;
1775 else {
1776 bas = xmlCreateURI();
1777 if (bas == NULL)
1778 goto done;
1779 ret = xmlParseURIReference(bas, (const char *) base);
1780 }
1781 if (ret != 0) {
1782 if (ref)
1783 val = xmlSaveUri(ref);
1784 goto done;
1785 }
1786 if (ref == NULL) {
1787 /*
1788 * the base fragment must be ignored
1789 */
1790 if (bas->fragment != NULL) {
1791 xmlFree(bas->fragment);
1792 bas->fragment = NULL;
1793 }
1794 val = xmlSaveUri(bas);
1795 goto done;
1796 }
1797
1798 /*
1799 * 2) If the path component is empty and the scheme, authority, and
1800 * query components are undefined, then it is a reference to the
1801 * current document and we are done. Otherwise, the reference URI's
1802 * query and fragment components are defined as found (or not found)
1803 * within the URI reference and not inherited from the base URI.
1804 *
1805 * NOTE that in modern browsers, the parsing differs from the above
1806 * in the following aspect: the query component is allowed to be
1807 * defined while still treating this as a reference to the current
1808 * document.
1809 */
1810 res = xmlCreateURI();
1811 if (res == NULL)
1812 goto done;
1813 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1814 ((ref->authority == NULL) && (ref->server == NULL))) {
1815 if (bas->scheme != NULL)
1816 res->scheme = xmlMemStrdup(bas->scheme);
1817 if (bas->authority != NULL)
1818 res->authority = xmlMemStrdup(bas->authority);
1819 else if (bas->server != NULL) {
1820 res->server = xmlMemStrdup(bas->server);
1821 if (bas->user != NULL)
1822 res->user = xmlMemStrdup(bas->user);
1823 res->port = bas->port;
1824 }
1825 if (bas->path != NULL)
1826 res->path = xmlMemStrdup(bas->path);
1827 if (ref->query != NULL)
1828 res->query = xmlMemStrdup(ref->query);
1829 else if (bas->query != NULL)
1830 res->query = xmlMemStrdup(bas->query);
1831 if (ref->fragment != NULL)
1832 res->fragment = xmlMemStrdup(ref->fragment);
1833 goto step_7;
1834 }
Owen Taylor3473f882001-02-23 17:55:21 +00001835
1836 /*
1837 * 3) If the scheme component is defined, indicating that the reference
1838 * starts with a scheme name, then the reference is interpreted as an
1839 * absolute URI and we are done. Otherwise, the reference URI's
1840 * scheme is inherited from the base URI's scheme component.
1841 */
1842 if (ref->scheme != NULL) {
1843 val = xmlSaveUri(ref);
1844 goto done;
1845 }
1846 if (bas->scheme != NULL)
1847 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00001848
1849 if (ref->query != NULL)
1850 res->query = xmlMemStrdup(ref->query);
1851 if (ref->fragment != NULL)
1852 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00001853
1854 /*
1855 * 4) If the authority component is defined, then the reference is a
1856 * network-path and we skip to step 7. Otherwise, the reference
1857 * URI's authority is inherited from the base URI's authority
1858 * component, which will also be undefined if the URI scheme does not
1859 * use an authority component.
1860 */
1861 if ((ref->authority != NULL) || (ref->server != NULL)) {
1862 if (ref->authority != NULL)
1863 res->authority = xmlMemStrdup(ref->authority);
1864 else {
1865 res->server = xmlMemStrdup(ref->server);
1866 if (ref->user != NULL)
1867 res->user = xmlMemStrdup(ref->user);
1868 res->port = ref->port;
1869 }
1870 if (ref->path != NULL)
1871 res->path = xmlMemStrdup(ref->path);
1872 goto step_7;
1873 }
1874 if (bas->authority != NULL)
1875 res->authority = xmlMemStrdup(bas->authority);
1876 else if (bas->server != NULL) {
1877 res->server = xmlMemStrdup(bas->server);
1878 if (bas->user != NULL)
1879 res->user = xmlMemStrdup(bas->user);
1880 res->port = bas->port;
1881 }
1882
1883 /*
1884 * 5) If the path component begins with a slash character ("/"), then
1885 * the reference is an absolute-path and we skip to step 7.
1886 */
1887 if ((ref->path != NULL) && (ref->path[0] == '/')) {
1888 res->path = xmlMemStrdup(ref->path);
1889 goto step_7;
1890 }
1891
1892
1893 /*
1894 * 6) If this step is reached, then we are resolving a relative-path
1895 * reference. The relative path needs to be merged with the base
1896 * URI's path. Although there are many ways to do this, we will
1897 * describe a simple method using a separate string buffer.
1898 *
1899 * Allocate a buffer large enough for the result string.
1900 */
1901 len = 2; /* extra / and 0 */
1902 if (ref->path != NULL)
1903 len += strlen(ref->path);
1904 if (bas->path != NULL)
1905 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001906 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00001907 if (res->path == NULL) {
1908 xmlGenericError(xmlGenericErrorContext,
1909 "xmlBuildURI: out of memory\n");
1910 goto done;
1911 }
1912 res->path[0] = 0;
1913
1914 /*
1915 * a) All but the last segment of the base URI's path component is
1916 * copied to the buffer. In other words, any characters after the
1917 * last (right-most) slash character, if any, are excluded.
1918 */
1919 cur = 0;
1920 out = 0;
1921 if (bas->path != NULL) {
1922 while (bas->path[cur] != 0) {
1923 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1924 cur++;
1925 if (bas->path[cur] == 0)
1926 break;
1927
1928 cur++;
1929 while (out < cur) {
1930 res->path[out] = bas->path[out];
1931 out++;
1932 }
1933 }
1934 }
1935 res->path[out] = 0;
1936
1937 /*
1938 * b) The reference's path component is appended to the buffer
1939 * string.
1940 */
1941 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001942 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001943 /*
1944 * Ensure the path includes a '/'
1945 */
1946 if ((out == 0) && (bas->server != NULL))
1947 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001948 while (ref->path[indx] != 0) {
1949 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00001950 }
1951 }
1952 res->path[out] = 0;
1953
1954 /*
1955 * Steps c) to h) are really path normalization steps
1956 */
1957 xmlNormalizeURIPath(res->path);
1958
1959step_7:
1960
1961 /*
1962 * 7) The resulting URI components, including any inherited from the
1963 * base URI, are recombined to give the absolute form of the URI
1964 * reference.
1965 */
1966 val = xmlSaveUri(res);
1967
1968done:
1969 if (ref != NULL)
1970 xmlFreeURI(ref);
1971 if (bas != NULL)
1972 xmlFreeURI(bas);
1973 if (res != NULL)
1974 xmlFreeURI(res);
1975 return(val);
1976}
1977
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00001978/**
William M. Brackf7789b12004-06-07 08:57:27 +00001979 * xmlBuildRelativeURI:
1980 * @URI: the URI reference under consideration
1981 * @base: the base value
1982 *
1983 * Expresses the URI of the reference in terms relative to the
1984 * base. Some examples of this operation include:
1985 * base = "http://site1.com/docs/book1.html"
1986 * URI input URI returned
1987 * docs/pic1.gif pic1.gif
1988 * docs/img/pic1.gif img/pic1.gif
1989 * img/pic1.gif ../img/pic1.gif
1990 * http://site1.com/docs/pic1.gif pic1.gif
1991 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
1992 *
1993 * base = "docs/book1.html"
1994 * URI input URI returned
1995 * docs/pic1.gif pic1.gif
1996 * docs/img/pic1.gif img/pic1.gif
1997 * img/pic1.gif ../img/pic1.gif
1998 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
1999 *
2000 *
2001 * Note: if the URI reference is really wierd or complicated, it may be
2002 * worthwhile to first convert it into a "nice" one by calling
2003 * xmlBuildURI (using 'base') before calling this routine,
2004 * since this routine (for reasonable efficiency) assumes URI has
2005 * already been through some validation.
2006 *
2007 * Returns a new URI string (to be freed by the caller) or NULL in case
2008 * error.
2009 */
2010xmlChar *
2011xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2012{
2013 xmlChar *val = NULL;
2014 int ret;
2015 int ix;
2016 int pos = 0;
2017 int nbslash = 0;
2018 xmlURIPtr ref = NULL;
2019 xmlURIPtr bas = NULL;
2020 xmlChar *bptr, *uptr, *vptr;
2021
2022 if ((URI == NULL) || (*URI == 0))
2023 return NULL;
2024 /*
2025 * Special case - if URI starts with '.', we assume it's already
2026 * in relative form, so nothing to do.
2027 */
2028 if (*URI == '.') {
2029 val = xmlStrdup (URI);
2030 goto done;
2031 }
2032
2033 /*
2034 * First parse URI into a standard form
2035 */
2036 ref = xmlCreateURI ();
2037 if (ref == NULL)
2038 return NULL;
2039 ret = xmlParseURIReference (ref, (const char *) URI);
2040 if (ret != 0)
2041 goto done; /* Error in URI, return NULL */
2042
2043 /*
2044 * Next parse base into the same standard form
2045 */
2046 if ((base == NULL) || (*base == 0)) {
2047 val = xmlStrdup (URI);
2048 goto done;
2049 }
2050 bas = xmlCreateURI ();
2051 if (bas == NULL)
2052 goto done;
2053 ret = xmlParseURIReference (bas, (const char *) base);
2054 if (ret != 0)
2055 goto done; /* Error in base, return NULL */
2056
2057 /*
2058 * If the scheme / server on the URI differs from the base,
2059 * just return the URI
2060 */
2061 if ((ref->scheme != NULL) &&
2062 ((bas->scheme == NULL) ||
2063 xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme) ||
2064 xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server))) {
2065 val = xmlStrdup (URI);
2066 goto done;
2067 }
2068
2069 /*
2070 * At this point (at last!) we can compare the two paths
2071 *
2072 * First we compare the two strings and find where they first differ
2073 */
2074 bptr = (xmlChar *)bas->path;
William M. Brackf20fbf72004-06-25 05:49:08 +00002075 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2076 pos += 2;
2077 if ((*bptr == '.') && (bptr[1] == '/'))
2078 bptr += 2;
2079 else if ((*bptr == '/') && (ref->path[pos] != '/'))
William M. Brackf7789b12004-06-07 08:57:27 +00002080 bptr++;
2081 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2082 pos++;
2083
2084 if (bptr[pos] == ref->path[pos]) {
2085 val = NULL; /* if no differences, return NULL */
2086 goto done; /* (I can't imagine why anyone would do this) */
2087 }
2088
2089 /*
2090 * In URI, "back up" to the last '/' encountered. This will be the
2091 * beginning of the "unique" suffix of URI
2092 */
2093 ix = pos;
2094 if ((ref->path[ix] == '/') && (ix > 0))
2095 ix--;
2096 for (; ix > 0; ix--) {
2097 if (ref->path[ix] == '/')
2098 break;
2099 }
William M. Brackf2a657a2004-10-27 16:33:09 +00002100 if (ix == 0) {
William M. Brackf7789b12004-06-07 08:57:27 +00002101 uptr = (xmlChar *)ref->path;
William M. Brackf2a657a2004-10-27 16:33:09 +00002102 } else {
2103 ix++;
2104 uptr = (xmlChar *)&ref->path[ix];
2105 }
William M. Brackf7789b12004-06-07 08:57:27 +00002106
2107 /*
2108 * In base, count the number of '/' from the differing point
2109 */
2110 if (bptr[pos] != ref->path[pos]) { /* check for trivial URI == base */
2111 for (; bptr[ix] != 0; ix++) {
2112 if (bptr[ix] == '/')
2113 nbslash++;
2114 }
2115 }
2116
2117 if (nbslash == 0) {
2118 val = xmlStrdup (uptr);
2119 goto done;
2120 }
William M. Brackf7789b12004-06-07 08:57:27 +00002121
2122 /*
2123 * Allocate just enough space for the returned string -
2124 * length of the remainder of the URI, plus enough space
2125 * for the "../" groups, plus one for the terminator
2126 */
2127 ix = xmlStrlen (uptr) + 1;
2128 val = (xmlChar *) xmlMalloc (ix + 3 * nbslash);
2129 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002130 xmlGenericError(xmlGenericErrorContext,
2131 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002132 goto done;
2133 }
2134 vptr = val;
2135 /*
2136 * Put in as many "../" as needed
2137 */
2138 for (; nbslash>0; nbslash--) {
2139 *vptr++ = '.';
2140 *vptr++ = '.';
2141 *vptr++ = '/';
2142 }
2143 /*
2144 * Finish up with the end of the URI
2145 */
2146 memcpy (vptr, uptr, ix);
2147
2148 done:
2149 /*
2150 * Free the working variables
2151 */
2152 if (ref != NULL)
2153 xmlFreeURI (ref);
2154 if (bas != NULL)
2155 xmlFreeURI (bas);
2156
2157 return val;
2158}
2159
2160/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002161 * xmlCanonicPath:
2162 * @path: the resource locator in a filesystem notation
2163 *
2164 * Constructs a canonic path from the specified path.
2165 *
2166 * Returns a new canonic path, or a duplicate of the path parameter if the
2167 * construction fails. The caller is responsible for freeing the memory occupied
2168 * by the returned string. If there is insufficient memory available, or the
2169 * argument is NULL, the function returns NULL.
2170 */
2171#define IS_WINDOWS_PATH(p) \
2172 ((p != NULL) && \
2173 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2174 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2175 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2176xmlChar*
2177xmlCanonicPath(const xmlChar *path)
2178{
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002179#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002180 int len = 0;
2181 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002182 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002183#endif
2184 xmlChar *ret;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002185 xmlURIPtr uri;
2186
2187 if (path == NULL)
2188 return(NULL);
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002189 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002190 xmlFreeURI(uri);
2191 return xmlStrdup(path);
2192 }
2193
2194 uri = xmlCreateURI();
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002195 if (uri == NULL) {
2196 return(NULL);
2197 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002198
Igor Zlatkovicce076162003-02-23 13:39:39 +00002199#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002200 len = xmlStrlen(path);
2201 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2202 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack42331a92004-07-29 07:07:16 +00002203 uri->path = xmlMallocAtomic(len + 2); /* FIXME - check alloc! */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002204 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002205 p = uri->path + 1;
2206 strncpy(p, path, len + 1);
2207 } else {
William M. Bracka3215c72004-07-31 16:24:01 +00002208 uri->path = xmlStrdup(path); /* FIXME - check alloc! */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002209 p = uri->path;
2210 }
2211 while (*p != '\0') {
2212 if (*p == '\\')
2213 *p = '/';
2214 p++;
2215 }
2216#else
Daniel Veillard42f12e92003-03-07 18:32:59 +00002217 uri->path = (char *) xmlStrdup((const xmlChar *) path);
Igor Zlatkovicce076162003-02-23 13:39:39 +00002218#endif
William M. Bracka3215c72004-07-31 16:24:01 +00002219 if (uri->path == NULL) {
2220 xmlFreeURI(uri);
2221 return(NULL);
2222 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002223 ret = xmlSaveUri(uri);
2224 xmlFreeURI(uri);
2225 return(ret);
2226}
Owen Taylor3473f882001-02-23 17:55:21 +00002227