blob: cf3768b753751f796e987db41b563d0f88045773 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
William M. Brack015ccb22005-02-13 08:18:52 +00004 * Reference: RFCs 2396, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
William M. Brack015ccb22005-02-13 08:18:52 +000081 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
Owen Taylor3473f882001-02-23 17:55:21 +000083 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
William M. Brack015ccb22005-02-13 08:18:52 +000087 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000089
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163/*
William M. Brack015ccb22005-02-13 08:18:52 +0000164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000171
172/*
Owen Taylor3473f882001-02-23 17:55:21 +0000173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
Daniel Veillard336a8e12005-08-07 10:46:19 +0000188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/************************************************************************
191 * *
192 * Generic URI structure functions *
193 * *
194 ************************************************************************/
195
196/**
197 * xmlCreateURI:
198 *
199 * Simply creates an empty xmlURI
200 *
201 * Returns the new structure or NULL in case of error
202 */
203xmlURIPtr
204xmlCreateURI(void) {
205 xmlURIPtr ret;
206
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208 if (ret == NULL) {
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
211 return(NULL);
212 }
213 memset(ret, 0, sizeof(xmlURI));
214 return(ret);
215}
216
217/**
218 * xmlSaveUri:
219 * @uri: pointer to an xmlURI
220 *
221 * Save the URI as an escaped string
222 *
223 * Returns a new string (to be deallocated by caller)
224 */
225xmlChar *
226xmlSaveUri(xmlURIPtr uri) {
227 xmlChar *ret = NULL;
228 const char *p;
229 int len;
230 int max;
231
232 if (uri == NULL) return(NULL);
233
234
235 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 len = 0;
243
244 if (uri->scheme != NULL) {
245 p = uri->scheme;
246 while (*p != 0) {
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = *p++;
257 }
258 if (len >= max) {
259 max *= 2;
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
261 if (ret == NULL) {
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
264 return(NULL);
265 }
266 }
267 ret[len++] = ':';
268 }
269 if (uri->opaque != NULL) {
270 p = uri->opaque;
271 while (*p != 0) {
272 if (len + 3 >= max) {
273 max *= 2;
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
275 if (ret == NULL) {
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
278 return(NULL);
279 }
280 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000282 ret[len++] = *p++;
283 else {
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
286 ret[len++] = '%';
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289 }
290 }
Owen Taylor3473f882001-02-23 17:55:21 +0000291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
429 if (ret == NULL) {
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
432 return(NULL);
433 }
434 }
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
438 ((*(p) == ',')))
439 ret[len++] = *p++;
440 else {
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
443 ret[len++] = '%';
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446 }
447 }
448 }
Daniel Veillarda1413b82007-04-26 08:33:28 +0000449 if (uri->query_raw != NULL) {
450 if (len + 1 >= max) {
451 max *= 2;
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
454 if (ret == NULL) {
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
457 return(NULL);
458 }
459 }
460 ret[len++] = '?';
461 p = uri->query_raw;
462 while (*p != 0) {
463 if (len + 1 >= max) {
464 max *= 2;
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
467 if (ret == NULL) {
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
470 return(NULL);
471 }
472 }
473 ret[len++] = *p++;
474 }
475 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000476 if (len + 3 >= max) {
477 max *= 2;
478 ret = (xmlChar *) xmlRealloc(ret,
479 (max + 1) * sizeof(xmlChar));
480 if (ret == NULL) {
481 xmlGenericError(xmlGenericErrorContext,
482 "xmlSaveUri: out of memory\n");
483 return(NULL);
484 }
485 }
486 ret[len++] = '?';
487 p = uri->query;
488 while (*p != 0) {
489 if (len + 3 >= max) {
490 max *= 2;
491 ret = (xmlChar *) xmlRealloc(ret,
492 (max + 1) * sizeof(xmlChar));
493 if (ret == NULL) {
494 xmlGenericError(xmlGenericErrorContext,
495 "xmlSaveUri: out of memory\n");
496 return(NULL);
497 }
498 }
499 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
500 ret[len++] = *p++;
501 else {
502 int val = *(unsigned char *)p++;
503 int hi = val / 0x10, lo = val % 0x10;
504 ret[len++] = '%';
505 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
506 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
507 }
508 }
509 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000510 }
511 if (uri->fragment != NULL) {
512 if (len + 3 >= max) {
513 max *= 2;
514 ret = (xmlChar *) xmlRealloc(ret,
515 (max + 1) * sizeof(xmlChar));
516 if (ret == NULL) {
517 xmlGenericError(xmlGenericErrorContext,
518 "xmlSaveUri: out of memory\n");
519 return(NULL);
520 }
521 }
522 ret[len++] = '#';
523 p = uri->fragment;
524 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000525 if (len + 3 >= max) {
526 max *= 2;
527 ret = (xmlChar *) xmlRealloc(ret,
528 (max + 1) * sizeof(xmlChar));
529 if (ret == NULL) {
530 xmlGenericError(xmlGenericErrorContext,
531 "xmlSaveUri: out of memory\n");
532 return(NULL);
533 }
534 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000535 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
536 ret[len++] = *p++;
537 else {
538 int val = *(unsigned char *)p++;
539 int hi = val / 0x10, lo = val % 0x10;
540 ret[len++] = '%';
541 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
542 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000543 }
544 }
Owen Taylor3473f882001-02-23 17:55:21 +0000545 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000546 if (len >= max) {
547 max *= 2;
548 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
549 if (ret == NULL) {
550 xmlGenericError(xmlGenericErrorContext,
551 "xmlSaveUri: out of memory\n");
552 return(NULL);
553 }
554 }
555 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000556 return(ret);
557}
558
559/**
560 * xmlPrintURI:
561 * @stream: a FILE* for the output
562 * @uri: pointer to an xmlURI
563 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000564 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000565 */
566void
567xmlPrintURI(FILE *stream, xmlURIPtr uri) {
568 xmlChar *out;
569
570 out = xmlSaveUri(uri);
571 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000572 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000573 xmlFree(out);
574 }
575}
576
577/**
578 * xmlCleanURI:
579 * @uri: pointer to an xmlURI
580 *
581 * Make sure the xmlURI struct is free of content
582 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000583static void
Owen Taylor3473f882001-02-23 17:55:21 +0000584xmlCleanURI(xmlURIPtr uri) {
585 if (uri == NULL) return;
586
587 if (uri->scheme != NULL) xmlFree(uri->scheme);
588 uri->scheme = NULL;
589 if (uri->server != NULL) xmlFree(uri->server);
590 uri->server = NULL;
591 if (uri->user != NULL) xmlFree(uri->user);
592 uri->user = NULL;
593 if (uri->path != NULL) xmlFree(uri->path);
594 uri->path = NULL;
595 if (uri->fragment != NULL) xmlFree(uri->fragment);
596 uri->fragment = NULL;
597 if (uri->opaque != NULL) xmlFree(uri->opaque);
598 uri->opaque = NULL;
599 if (uri->authority != NULL) xmlFree(uri->authority);
600 uri->authority = NULL;
601 if (uri->query != NULL) xmlFree(uri->query);
602 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +0000603 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
604 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000605}
606
607/**
608 * xmlFreeURI:
609 * @uri: pointer to an xmlURI
610 *
611 * Free up the xmlURI struct
612 */
613void
614xmlFreeURI(xmlURIPtr uri) {
615 if (uri == NULL) return;
616
617 if (uri->scheme != NULL) xmlFree(uri->scheme);
618 if (uri->server != NULL) xmlFree(uri->server);
619 if (uri->user != NULL) xmlFree(uri->user);
620 if (uri->path != NULL) xmlFree(uri->path);
621 if (uri->fragment != NULL) xmlFree(uri->fragment);
622 if (uri->opaque != NULL) xmlFree(uri->opaque);
623 if (uri->authority != NULL) xmlFree(uri->authority);
624 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +0000625 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +0000626 xmlFree(uri);
627}
628
629/************************************************************************
630 * *
631 * Helper functions *
632 * *
633 ************************************************************************/
634
Owen Taylor3473f882001-02-23 17:55:21 +0000635/**
636 * xmlNormalizeURIPath:
637 * @path: pointer to the path string
638 *
639 * Applies the 5 normalization steps to a path string--that is, RFC 2396
640 * Section 5.2, steps 6.c through 6.g.
641 *
642 * Normalization occurs directly on the string, no new allocation is done
643 *
644 * Returns 0 or an error code
645 */
646int
647xmlNormalizeURIPath(char *path) {
648 char *cur, *out;
649
650 if (path == NULL)
651 return(-1);
652
653 /* Skip all initial "/" chars. We want to get to the beginning of the
654 * first non-empty segment.
655 */
656 cur = path;
657 while (cur[0] == '/')
658 ++cur;
659 if (cur[0] == '\0')
660 return(0);
661
662 /* Keep everything we've seen so far. */
663 out = cur;
664
665 /*
666 * Analyze each segment in sequence for cases (c) and (d).
667 */
668 while (cur[0] != '\0') {
669 /*
670 * c) All occurrences of "./", where "." is a complete path segment,
671 * are removed from the buffer string.
672 */
673 if ((cur[0] == '.') && (cur[1] == '/')) {
674 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000675 /* '//' normalization should be done at this point too */
676 while (cur[0] == '/')
677 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000678 continue;
679 }
680
681 /*
682 * d) If the buffer string ends with "." as a complete path segment,
683 * that "." is removed.
684 */
685 if ((cur[0] == '.') && (cur[1] == '\0'))
686 break;
687
688 /* Otherwise keep the segment. */
689 while (cur[0] != '/') {
690 if (cur[0] == '\0')
691 goto done_cd;
692 (out++)[0] = (cur++)[0];
693 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000694 /* nomalize // */
695 while ((cur[0] == '/') && (cur[1] == '/'))
696 cur++;
697
Owen Taylor3473f882001-02-23 17:55:21 +0000698 (out++)[0] = (cur++)[0];
699 }
700 done_cd:
701 out[0] = '\0';
702
703 /* Reset to the beginning of the first segment for the next sequence. */
704 cur = path;
705 while (cur[0] == '/')
706 ++cur;
707 if (cur[0] == '\0')
708 return(0);
709
710 /*
711 * Analyze each segment in sequence for cases (e) and (f).
712 *
713 * e) All occurrences of "<segment>/../", where <segment> is a
714 * complete path segment not equal to "..", are removed from the
715 * buffer string. Removal of these path segments is performed
716 * iteratively, removing the leftmost matching pattern on each
717 * iteration, until no matching pattern remains.
718 *
719 * f) If the buffer string ends with "<segment>/..", where <segment>
720 * is a complete path segment not equal to "..", that
721 * "<segment>/.." is removed.
722 *
723 * To satisfy the "iterative" clause in (e), we need to collapse the
724 * string every time we find something that needs to be removed. Thus,
725 * we don't need to keep two pointers into the string: we only need a
726 * "current position" pointer.
727 */
728 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000729 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000730
731 /* At the beginning of each iteration of this loop, "cur" points to
732 * the first character of the segment we want to examine.
733 */
734
735 /* Find the end of the current segment. */
736 segp = cur;
737 while ((segp[0] != '/') && (segp[0] != '\0'))
738 ++segp;
739
740 /* If this is the last segment, we're done (we need at least two
741 * segments to meet the criteria for the (e) and (f) cases).
742 */
743 if (segp[0] == '\0')
744 break;
745
746 /* If the first segment is "..", or if the next segment _isn't_ "..",
747 * keep this segment and try the next one.
748 */
749 ++segp;
750 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
751 || ((segp[0] != '.') || (segp[1] != '.')
752 || ((segp[2] != '/') && (segp[2] != '\0')))) {
753 cur = segp;
754 continue;
755 }
756
757 /* If we get here, remove this segment and the next one and back up
758 * to the previous segment (if there is one), to implement the
759 * "iteratively" clause. It's pretty much impossible to back up
760 * while maintaining two pointers into the buffer, so just compact
761 * the whole buffer now.
762 */
763
764 /* If this is the end of the buffer, we're done. */
765 if (segp[2] == '\0') {
766 cur[0] = '\0';
767 break;
768 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000769 /* Valgrind complained, strcpy(cur, segp + 3); */
770 /* string will overlap, do not use strcpy */
771 tmp = cur;
772 segp += 3;
773 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000774
775 /* If there are no previous segments, then keep going from here. */
776 segp = cur;
777 while ((segp > path) && ((--segp)[0] == '/'))
778 ;
779 if (segp == path)
780 continue;
781
782 /* "segp" is pointing to the end of a previous segment; find it's
783 * start. We need to back up to the previous segment and start
784 * over with that to handle things like "foo/bar/../..". If we
785 * don't do this, then on the first pass we'll remove the "bar/..",
786 * but be pointing at the second ".." so we won't realize we can also
787 * remove the "foo/..".
788 */
789 cur = segp;
790 while ((cur > path) && (cur[-1] != '/'))
791 --cur;
792 }
793 out[0] = '\0';
794
795 /*
796 * g) If the resulting buffer string still begins with one or more
797 * complete path segments of "..", then the reference is
798 * considered to be in error. Implementations may handle this
799 * error by retaining these components in the resolved path (i.e.,
800 * treating them as part of the final URI), by removing them from
801 * the resolved path (i.e., discarding relative levels above the
802 * root), or by avoiding traversal of the reference.
803 *
804 * We discard them from the final path.
805 */
806 if (path[0] == '/') {
807 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000808 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000809 && ((cur[3] == '/') || (cur[3] == '\0')))
810 cur += 3;
811
812 if (cur != path) {
813 out = path;
814 while (cur[0] != '\0')
815 (out++)[0] = (cur++)[0];
816 out[0] = 0;
817 }
818 }
819
820 return(0);
821}
Owen Taylor3473f882001-02-23 17:55:21 +0000822
Daniel Veillard966a31e2004-05-09 02:58:44 +0000823static int is_hex(char c) {
824 if (((c >= '0') && (c <= '9')) ||
825 ((c >= 'a') && (c <= 'f')) ||
826 ((c >= 'A') && (c <= 'F')))
827 return(1);
828 return(0);
829}
830
Owen Taylor3473f882001-02-23 17:55:21 +0000831/**
832 * xmlURIUnescapeString:
833 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000834 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000835 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000836 *
Daniel Veillarda44294f2007-04-24 08:57:54 +0000837 * Unescaping routine, but does not check that the string is an URI. The
838 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +0000839 * Note that the length of the result can only be smaller or same size as
840 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +0000841 *
Daniel Veillard79187652007-04-24 10:19:52 +0000842 * Returns a copy of the string, but unescaped, will return NULL only in case
843 * of error
Owen Taylor3473f882001-02-23 17:55:21 +0000844 */
845char *
846xmlURIUnescapeString(const char *str, int len, char *target) {
847 char *ret, *out;
848 const char *in;
849
850 if (str == NULL)
851 return(NULL);
852 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000853 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000854
855 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000856 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000857 if (ret == NULL) {
858 xmlGenericError(xmlGenericErrorContext,
859 "xmlURIUnescapeString: out of memory\n");
860 return(NULL);
861 }
862 } else
863 ret = target;
864 in = str;
865 out = ret;
866 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000867 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000868 in++;
869 if ((*in >= '0') && (*in <= '9'))
870 *out = (*in - '0');
871 else if ((*in >= 'a') && (*in <= 'f'))
872 *out = (*in - 'a') + 10;
873 else if ((*in >= 'A') && (*in <= 'F'))
874 *out = (*in - 'A') + 10;
875 in++;
876 if ((*in >= '0') && (*in <= '9'))
877 *out = *out * 16 + (*in - '0');
878 else if ((*in >= 'a') && (*in <= 'f'))
879 *out = *out * 16 + (*in - 'a') + 10;
880 else if ((*in >= 'A') && (*in <= 'F'))
881 *out = *out * 16 + (*in - 'A') + 10;
882 in++;
883 len -= 3;
884 out++;
885 } else {
886 *out++ = *in++;
887 len--;
888 }
889 }
890 *out = 0;
891 return(ret);
892}
893
894/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000895 * xmlURIEscapeStr:
896 * @str: string to escape
897 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000898 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000899 * This routine escapes a string to hex, ignoring reserved characters (a-z)
900 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000901 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000902 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000903 */
904xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000905xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
906 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000907 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000908
Owen Taylor3473f882001-02-23 17:55:21 +0000909 unsigned int len, out;
910
911 if (str == NULL)
912 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000913 if (str[0] == 0)
914 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000915 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000916 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000917
918 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000919 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000920 if (ret == NULL) {
921 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000922 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000923 return(NULL);
924 }
925 in = (const xmlChar *) str;
926 out = 0;
927 while(*in != 0) {
928 if (len - out <= 3) {
929 len += 20;
930 ret = (xmlChar *) xmlRealloc(ret, len);
931 if (ret == NULL) {
932 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000933 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000934 return(NULL);
935 }
936 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000937
938 ch = *in;
939
Daniel Veillardeb475a32002-04-14 22:00:22 +0000940 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000941 unsigned char val;
942 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000943 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000944 if (val <= 9)
945 ret[out++] = '0' + val;
946 else
947 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000948 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000949 if (val <= 9)
950 ret[out++] = '0' + val;
951 else
952 ret[out++] = 'A' + val - 0xA;
953 in++;
954 } else {
955 ret[out++] = *in++;
956 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000957
Owen Taylor3473f882001-02-23 17:55:21 +0000958 }
959 ret[out] = 0;
960 return(ret);
961}
962
Daniel Veillard8514c672001-05-23 10:29:12 +0000963/**
964 * xmlURIEscape:
965 * @str: the string of the URI to escape
966 *
967 * Escaping routine, does not do validity checks !
968 * It will try to escape the chars needing this, but this is heuristic
969 * based it's impossible to be sure.
970 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000971 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000972 *
973 * 25 May 2001
974 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
975 * according to RFC2396.
976 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +0000977 */
978xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000979xmlURIEscape(const xmlChar * str)
980{
Daniel Veillard6278fb52001-05-25 07:38:41 +0000981 xmlChar *ret, *segment = NULL;
982 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000983 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +0000984
Daniel Veillard6278fb52001-05-25 07:38:41 +0000985#define NULLCHK(p) if(!p) { \
986 xmlGenericError(xmlGenericErrorContext, \
987 "xmlURIEscape: out of memory\n"); \
988 return NULL; }
989
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000990 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000991 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000992
993 uri = xmlCreateURI();
994 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000995 /*
996 * Allow escaping errors in the unescaped form
997 */
998 uri->cleanup = 1;
999 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001000 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001001 xmlFreeURI(uri);
1002 return (NULL);
1003 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001004 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001005
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001006 if (!uri)
1007 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001008
1009 ret = NULL;
1010
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001011 if (uri->scheme) {
1012 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1013 NULLCHK(segment)
1014 ret = xmlStrcat(ret, segment);
1015 ret = xmlStrcat(ret, BAD_CAST ":");
1016 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001017 }
1018
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001019 if (uri->authority) {
1020 segment =
1021 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1022 NULLCHK(segment)
1023 ret = xmlStrcat(ret, BAD_CAST "//");
1024 ret = xmlStrcat(ret, segment);
1025 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001026 }
1027
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001028 if (uri->user) {
1029 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1030 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001031 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001032 ret = xmlStrcat(ret, segment);
1033 ret = xmlStrcat(ret, BAD_CAST "@");
1034 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001035 }
1036
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001037 if (uri->server) {
1038 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1039 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001040 if (uri->user == NULL)
1041 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001042 ret = xmlStrcat(ret, segment);
1043 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001044 }
1045
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001046 if (uri->port) {
1047 xmlChar port[10];
1048
Daniel Veillard43d3f612001-11-10 11:57:23 +00001049 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001050 ret = xmlStrcat(ret, BAD_CAST ":");
1051 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001052 }
1053
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001054 if (uri->path) {
1055 segment =
1056 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1057 NULLCHK(segment)
1058 ret = xmlStrcat(ret, segment);
1059 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001060 }
1061
Daniel Veillarda1413b82007-04-26 08:33:28 +00001062 if (uri->query_raw) {
1063 ret = xmlStrcat(ret, BAD_CAST "?");
1064 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1065 }
1066 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001067 segment =
1068 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1069 NULLCHK(segment)
1070 ret = xmlStrcat(ret, BAD_CAST "?");
1071 ret = xmlStrcat(ret, segment);
1072 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001073 }
1074
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001075 if (uri->opaque) {
1076 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1077 NULLCHK(segment)
1078 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001079 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001080 }
1081
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001082 if (uri->fragment) {
1083 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1084 NULLCHK(segment)
1085 ret = xmlStrcat(ret, BAD_CAST "#");
1086 ret = xmlStrcat(ret, segment);
1087 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001088 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001089
1090 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001091#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001092
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001093 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001094}
1095
Owen Taylor3473f882001-02-23 17:55:21 +00001096/************************************************************************
1097 * *
1098 * Escaped URI parsing *
1099 * *
1100 ************************************************************************/
1101
1102/**
1103 * xmlParseURIFragment:
1104 * @uri: pointer to an URI structure
1105 * @str: pointer to the string to analyze
1106 *
1107 * Parse an URI fragment string and fills in the appropriate fields
1108 * of the @uri structure.
1109 *
1110 * fragment = *uric
1111 *
1112 * Returns 0 or the error code
1113 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001114static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001115xmlParseURIFragment(xmlURIPtr uri, const char **str)
1116{
Daniel Veillard30e76072006-03-09 14:13:55 +00001117 const char *cur;
1118
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001119 if (str == NULL)
1120 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001121
Daniel Veillard30e76072006-03-09 14:13:55 +00001122 cur = *str;
1123
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001124 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001125 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001126 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001127 if (uri->fragment != NULL)
1128 xmlFree(uri->fragment);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001129 if (uri->cleanup & 2)
1130 uri->fragment = STRNDUP(*str, cur - *str);
1131 else
1132 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001133 }
1134 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001135 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001136}
1137
1138/**
1139 * xmlParseURIQuery:
1140 * @uri: pointer to an URI structure
1141 * @str: pointer to the string to analyze
1142 *
1143 * Parse the query part of an URI
1144 *
1145 * query = *uric
1146 *
1147 * Returns 0 or the error code
1148 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001149static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001150xmlParseURIQuery(xmlURIPtr uri, const char **str)
1151{
Daniel Veillard30e76072006-03-09 14:13:55 +00001152 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001153
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001154 if (str == NULL)
1155 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001156
Daniel Veillard30e76072006-03-09 14:13:55 +00001157 cur = *str;
1158
Daniel Veillard336a8e12005-08-07 10:46:19 +00001159 while ((IS_URIC(cur)) ||
1160 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001161 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001162 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001163 if (uri->query != NULL)
1164 xmlFree(uri->query);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001165 if (uri->cleanup & 2)
1166 uri->query = STRNDUP(*str, cur - *str);
1167 else
1168 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001169
1170 /* Save the raw bytes of the query as well.
1171 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
1172 */
1173 if (uri->query_raw != NULL)
1174 xmlFree (uri->query_raw);
1175 uri->query_raw = STRNDUP (*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 }
1177 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001178 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001179}
1180
1181/**
1182 * xmlParseURIScheme:
1183 * @uri: pointer to an URI structure
1184 * @str: pointer to the string to analyze
1185 *
1186 * Parse an URI scheme
1187 *
1188 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1189 *
1190 * Returns 0 or the error code
1191 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001192static int
Owen Taylor3473f882001-02-23 17:55:21 +00001193xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1194 const char *cur;
1195
1196 if (str == NULL)
1197 return(-1);
1198
1199 cur = *str;
1200 if (!IS_ALPHA(*cur))
1201 return(2);
1202 cur++;
1203 while (IS_SCHEME(*cur)) cur++;
1204 if (uri != NULL) {
1205 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001206 uri->scheme = STRNDUP(*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001207 }
1208 *str = cur;
1209 return(0);
1210}
1211
1212/**
1213 * xmlParseURIOpaquePart:
1214 * @uri: pointer to an URI structure
1215 * @str: pointer to the string to analyze
1216 *
1217 * Parse an URI opaque part
1218 *
1219 * opaque_part = uric_no_slash *uric
1220 *
1221 * Returns 0 or the error code
1222 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001223static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001224xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1225{
Owen Taylor3473f882001-02-23 17:55:21 +00001226 const char *cur;
1227
1228 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001229 return (-1);
1230
Owen Taylor3473f882001-02-23 17:55:21 +00001231 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001232 if (!((IS_URIC_NO_SLASH(cur)) ||
1233 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001234 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001235 }
1236 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001237 while ((IS_URIC(cur)) ||
1238 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001239 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001240 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001241 if (uri->opaque != NULL)
1242 xmlFree(uri->opaque);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001243 if (uri->cleanup & 2)
1244 uri->opaque = STRNDUP(*str, cur - *str);
1245 else
1246 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001247 }
1248 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001249 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001250}
1251
1252/**
1253 * xmlParseURIServer:
1254 * @uri: pointer to an URI structure
1255 * @str: pointer to the string to analyze
1256 *
1257 * Parse a server subpart of an URI, it's a finer grain analysis
1258 * of the authority part.
1259 *
1260 * server = [ [ userinfo "@" ] hostport ]
1261 * userinfo = *( unreserved | escaped |
1262 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1263 * hostport = host [ ":" port ]
William M. Brack015ccb22005-02-13 08:18:52 +00001264 * host = hostname | IPv4address | IPv6reference
Owen Taylor3473f882001-02-23 17:55:21 +00001265 * hostname = *( domainlabel "." ) toplabel [ "." ]
1266 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1267 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
William M. Brack015ccb22005-02-13 08:18:52 +00001268 * IPv6reference = "[" IPv6address "]"
1269 * IPv6address = hexpart [ ":" IPv4address ]
1270 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1271 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1272 * hexseq = hex4 *( ":" hex4)
1273 * hex4 = 1*4hexdig
Owen Taylor3473f882001-02-23 17:55:21 +00001274 * port = *digit
1275 *
1276 * Returns 0 or the error code
1277 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001278static int
Owen Taylor3473f882001-02-23 17:55:21 +00001279xmlParseURIServer(xmlURIPtr uri, const char **str) {
1280 const char *cur;
1281 const char *host, *tmp;
William M. Brack015ccb22005-02-13 08:18:52 +00001282 const int IPV4max = 4;
1283 const int IPV6max = 8;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001284 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286 if (str == NULL)
1287 return(-1);
1288
1289 cur = *str;
1290
1291 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001292 * is there a userinfo ?
Owen Taylor3473f882001-02-23 17:55:21 +00001293 */
1294 while (IS_USERINFO(cur)) NEXT(cur);
1295 if (*cur == '@') {
1296 if (uri != NULL) {
1297 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001298 if (uri->cleanup & 2)
Daniel Veillarde61d75f2007-05-28 14:16:33 +00001299 uri->user = STRNDUP(*str, cur - *str);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001300 else
1301 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001302 }
1303 cur++;
1304 } else {
1305 if (uri != NULL) {
1306 if (uri->user != NULL) xmlFree(uri->user);
1307 uri->user = NULL;
1308 }
1309 cur = *str;
1310 }
1311 /*
1312 * This can be empty in the case where there is no server
1313 */
1314 host = cur;
1315 if (*cur == '/') {
1316 if (uri != NULL) {
1317 if (uri->authority != NULL) xmlFree(uri->authority);
1318 uri->authority = NULL;
1319 if (uri->server != NULL) xmlFree(uri->server);
1320 uri->server = NULL;
1321 uri->port = 0;
1322 }
1323 return(0);
1324 }
1325 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001326 * host part of hostport can denote an IPV4 address, an IPV6 address
1327 * or an unresolved name. Check the IP first, its easier to detect
1328 * errors if wrong one.
1329 * An IPV6 address must start with a '[' and end with a ']'.
Owen Taylor3473f882001-02-23 17:55:21 +00001330 */
William M. Brack015ccb22005-02-13 08:18:52 +00001331 if (*cur == '[') {
1332 int compress=0;
1333 cur++;
1334 for (oct = 0; oct < IPV6max; ++oct) {
1335 if (*cur == ':') {
1336 if (compress)
1337 return(3); /* multiple compression attempted */
1338 if (!oct) { /* initial char is compression */
1339 if (*++cur != ':')
1340 return(3);
1341 }
1342 compress = 1; /* set compression-encountered flag */
1343 cur++; /* skip over the second ':' */
1344 continue;
1345 }
1346 while(IS_HEX(*cur)) cur++;
1347 if (oct == (IPV6max-1))
1348 continue;
1349 if (*cur != ':')
1350 break;
1351 cur++;
1352 }
1353 if ((!compress) && (oct != IPV6max))
1354 return(3);
1355 if (*cur != ']')
1356 return(3);
1357 if (uri != NULL) {
1358 if (uri->server != NULL) xmlFree(uri->server);
1359 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1360 (cur-host)-1);
1361 }
1362 cur++;
1363 } else {
1364 /*
1365 * Not IPV6, maybe IPV4
1366 */
1367 for (oct = 0; oct < IPV4max; ++oct) {
1368 if (*cur == '.')
1369 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1370 while(IS_DIGIT(*cur)) cur++;
1371 if (oct == (IPV4max-1))
1372 continue;
1373 if (*cur != '.')
1374 break;
1375 cur++;
1376 }
Owen Taylor3473f882001-02-23 17:55:21 +00001377 }
William M. Brack015ccb22005-02-13 08:18:52 +00001378 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1379 IS_ALPHA(*cur))) {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001380 /* maybe host_name */
1381 if (!IS_ALPHANUM(*cur))
1382 return(4); /* e.g. http://xml.$oft */
1383 do {
1384 do ++cur; while (IS_ALPHANUM(*cur));
1385 if (*cur == '-') {
1386 --cur;
1387 if (*cur == '.')
1388 return(5); /* e.g. http://xml.-soft */
1389 ++cur;
1390 continue;
1391 }
1392 if (*cur == '.') {
1393 --cur;
1394 if (*cur == '-')
1395 return(6); /* e.g. http://xml-.soft */
1396 if (*cur == '.')
1397 return(7); /* e.g. http://xml..soft */
1398 ++cur;
1399 continue;
1400 }
1401 break;
1402 } while (1);
1403 tmp = cur;
1404 if (tmp[-1] == '.')
1405 --tmp; /* e.g. http://xml.$Oft/ */
1406 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1407 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1408 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001409 }
Owen Taylor3473f882001-02-23 17:55:21 +00001410 if (uri != NULL) {
1411 if (uri->authority != NULL) xmlFree(uri->authority);
1412 uri->authority = NULL;
William M. Brack015ccb22005-02-13 08:18:52 +00001413 if (host[0] != '[') { /* it's not an IPV6 addr */
1414 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001415 if (uri->cleanup & 2)
1416 uri->server = STRNDUP(host, cur - host);
1417 else
1418 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
William M. Brack015ccb22005-02-13 08:18:52 +00001419 }
Owen Taylor3473f882001-02-23 17:55:21 +00001420 }
Owen Taylor3473f882001-02-23 17:55:21 +00001421 /*
1422 * finish by checking for a port presence.
1423 */
1424 if (*cur == ':') {
1425 cur++;
1426 if (IS_DIGIT(*cur)) {
1427 if (uri != NULL)
1428 uri->port = 0;
1429 while (IS_DIGIT(*cur)) {
1430 if (uri != NULL)
1431 uri->port = uri->port * 10 + (*cur - '0');
1432 cur++;
1433 }
1434 }
1435 }
1436 *str = cur;
1437 return(0);
1438}
1439
1440/**
1441 * xmlParseURIRelSegment:
1442 * @uri: pointer to an URI structure
1443 * @str: pointer to the string to analyze
1444 *
1445 * Parse an URI relative segment
1446 *
1447 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1448 * "+" | "$" | "," )
1449 *
1450 * Returns 0 or the error code
1451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001452static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001453xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1454{
Owen Taylor3473f882001-02-23 17:55:21 +00001455 const char *cur;
1456
1457 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001458 return (-1);
1459
Owen Taylor3473f882001-02-23 17:55:21 +00001460 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001461 if (!((IS_SEGMENT(cur)) ||
1462 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001463 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001464 }
1465 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001466 while ((IS_SEGMENT(cur)) ||
1467 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001468 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001469 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001470 if (uri->path != NULL)
1471 xmlFree(uri->path);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001472 if (uri->cleanup & 2)
1473 uri->path = STRNDUP(*str, cur - *str);
1474 else
1475 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001476 }
1477 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001478 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001479}
1480
1481/**
1482 * xmlParseURIPathSegments:
1483 * @uri: pointer to an URI structure
1484 * @str: pointer to the string to analyze
1485 * @slash: should we add a leading slash
1486 *
1487 * Parse an URI set of path segments
1488 *
1489 * path_segments = segment *( "/" segment )
1490 * segment = *pchar *( ";" param )
1491 * param = *pchar
1492 *
1493 * Returns 0 or the error code
1494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001496xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1497{
Owen Taylor3473f882001-02-23 17:55:21 +00001498 const char *cur;
1499
1500 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001501 return (-1);
1502
Owen Taylor3473f882001-02-23 17:55:21 +00001503 cur = *str;
1504
1505 do {
Daniel Veillard336a8e12005-08-07 10:46:19 +00001506 while ((IS_PCHAR(cur)) ||
1507 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001508 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001509 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001510 cur++;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001511 while ((IS_PCHAR(cur)) ||
1512 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001513 NEXT(cur);
1514 }
1515 if (*cur != '/')
1516 break;
1517 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001518 } while (1);
1519 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001520 int len, len2 = 0;
1521 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001522
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001523 /*
1524 * Concat the set of path segments to the current path
1525 */
1526 len = cur - *str;
1527 if (slash)
1528 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001529
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001530 if (uri->path != NULL) {
1531 len2 = strlen(uri->path);
1532 len += len2;
1533 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001534 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001535 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001536 xmlGenericError(xmlGenericErrorContext,
1537 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001538 *str = cur;
1539 return (-1);
1540 }
1541 if (uri->path != NULL)
1542 memcpy(path, uri->path, len2);
1543 if (slash) {
1544 path[len2] = '/';
1545 len2++;
1546 }
1547 path[len2] = 0;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001548 if (cur - *str > 0) {
1549 if (uri->cleanup & 2) {
1550 memcpy(&path[len2], *str, cur - *str);
1551 path[len2 + (cur - *str)] = 0;
1552 } else
1553 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1554 }
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001555 if (uri->path != NULL)
1556 xmlFree(uri->path);
1557 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001558 }
1559 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001560 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001561}
1562
1563/**
1564 * xmlParseURIAuthority:
1565 * @uri: pointer to an URI structure
1566 * @str: pointer to the string to analyze
1567 *
1568 * Parse the authority part of an URI.
1569 *
1570 * authority = server | reg_name
1571 * server = [ [ userinfo "@" ] hostport ]
1572 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1573 * "@" | "&" | "=" | "+" )
1574 *
1575 * Note : this is completely ambiguous since reg_name is allowed to
1576 * use the full set of chars in use by server:
1577 *
1578 * 3.2.1. Registry-based Naming Authority
1579 *
1580 * The structure of a registry-based naming authority is specific
1581 * to the URI scheme, but constrained to the allowed characters
1582 * for an authority component.
1583 *
1584 * Returns 0 or the error code
1585 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586static int
Owen Taylor3473f882001-02-23 17:55:21 +00001587xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1588 const char *cur;
1589 int ret;
1590
1591 if (str == NULL)
1592 return(-1);
1593
1594 cur = *str;
1595
1596 /*
1597 * try first to parse it as a server string.
1598 */
1599 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001600 if ((ret == 0) && (*str != NULL) &&
1601 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001602 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001603 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001604
1605 /*
1606 * failed, fallback to reg_name
1607 */
1608 if (!IS_REG_NAME(cur)) {
1609 return(5);
1610 }
1611 NEXT(cur);
1612 while (IS_REG_NAME(cur)) NEXT(cur);
1613 if (uri != NULL) {
1614 if (uri->server != NULL) xmlFree(uri->server);
1615 uri->server = NULL;
1616 if (uri->user != NULL) xmlFree(uri->user);
1617 uri->user = NULL;
1618 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001619 if (uri->cleanup & 2)
1620 uri->authority = STRNDUP(*str, cur - *str);
1621 else
1622 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 }
1624 *str = cur;
1625 return(0);
1626}
1627
1628/**
1629 * xmlParseURIHierPart:
1630 * @uri: pointer to an URI structure
1631 * @str: pointer to the string to analyze
1632 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001633 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001634 *
1635 * hier_part = ( net_path | abs_path ) [ "?" query ]
1636 * abs_path = "/" path_segments
1637 * net_path = "//" authority [ abs_path ]
1638 *
1639 * Returns 0 or the error code
1640 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001641static int
Owen Taylor3473f882001-02-23 17:55:21 +00001642xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1643 int ret;
1644 const char *cur;
1645
1646 if (str == NULL)
1647 return(-1);
1648
1649 cur = *str;
1650
1651 if ((cur[0] == '/') && (cur[1] == '/')) {
1652 cur += 2;
1653 ret = xmlParseURIAuthority(uri, &cur);
1654 if (ret != 0)
1655 return(ret);
1656 if (cur[0] == '/') {
1657 cur++;
1658 ret = xmlParseURIPathSegments(uri, &cur, 1);
1659 }
1660 } else if (cur[0] == '/') {
1661 cur++;
1662 ret = xmlParseURIPathSegments(uri, &cur, 1);
1663 } else {
1664 return(4);
1665 }
1666 if (ret != 0)
1667 return(ret);
1668 if (*cur == '?') {
1669 cur++;
1670 ret = xmlParseURIQuery(uri, &cur);
1671 if (ret != 0)
1672 return(ret);
1673 }
1674 *str = cur;
1675 return(0);
1676}
1677
1678/**
1679 * xmlParseAbsoluteURI:
1680 * @uri: pointer to an URI structure
1681 * @str: pointer to the string to analyze
1682 *
1683 * Parse an URI reference string and fills in the appropriate fields
1684 * of the @uri structure
1685 *
1686 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1687 *
1688 * Returns 0 or the error code
1689 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001690static int
Owen Taylor3473f882001-02-23 17:55:21 +00001691xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1692 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001693 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001694
1695 if (str == NULL)
1696 return(-1);
1697
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001698 cur = *str;
1699
Owen Taylor3473f882001-02-23 17:55:21 +00001700 ret = xmlParseURIScheme(uri, str);
1701 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001702 if (**str != ':') {
1703 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001705 }
Owen Taylor3473f882001-02-23 17:55:21 +00001706 (*str)++;
1707 if (**str == '/')
1708 return(xmlParseURIHierPart(uri, str));
1709 return(xmlParseURIOpaquePart(uri, str));
1710}
1711
1712/**
1713 * xmlParseRelativeURI:
1714 * @uri: pointer to an URI structure
1715 * @str: pointer to the string to analyze
1716 *
1717 * Parse an relative URI string and fills in the appropriate fields
1718 * of the @uri structure
1719 *
1720 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1721 * abs_path = "/" path_segments
1722 * net_path = "//" authority [ abs_path ]
1723 * rel_path = rel_segment [ abs_path ]
1724 *
1725 * Returns 0 or the error code
1726 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001727static int
Owen Taylor3473f882001-02-23 17:55:21 +00001728xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1729 int ret = 0;
1730 const char *cur;
1731
1732 if (str == NULL)
1733 return(-1);
1734
1735 cur = *str;
1736 if ((cur[0] == '/') && (cur[1] == '/')) {
1737 cur += 2;
1738 ret = xmlParseURIAuthority(uri, &cur);
1739 if (ret != 0)
1740 return(ret);
1741 if (cur[0] == '/') {
1742 cur++;
1743 ret = xmlParseURIPathSegments(uri, &cur, 1);
1744 }
1745 } else if (cur[0] == '/') {
1746 cur++;
1747 ret = xmlParseURIPathSegments(uri, &cur, 1);
1748 } else if (cur[0] != '#' && cur[0] != '?') {
1749 ret = xmlParseURIRelSegment(uri, &cur);
1750 if (ret != 0)
1751 return(ret);
1752 if (cur[0] == '/') {
1753 cur++;
1754 ret = xmlParseURIPathSegments(uri, &cur, 1);
1755 }
1756 }
1757 if (ret != 0)
1758 return(ret);
1759 if (*cur == '?') {
1760 cur++;
1761 ret = xmlParseURIQuery(uri, &cur);
1762 if (ret != 0)
1763 return(ret);
1764 }
1765 *str = cur;
1766 return(ret);
1767}
1768
1769/**
1770 * xmlParseURIReference:
1771 * @uri: pointer to an URI structure
1772 * @str: the string to analyze
1773 *
1774 * Parse an URI reference string and fills in the appropriate fields
1775 * of the @uri structure
1776 *
1777 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1778 *
1779 * Returns 0 or the error code
1780 */
1781int
1782xmlParseURIReference(xmlURIPtr uri, const char *str) {
1783 int ret;
1784 const char *tmp = str;
1785
1786 if (str == NULL)
1787 return(-1);
1788 xmlCleanURI(uri);
1789
1790 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001791 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001792 * it fails.
1793 */
1794 ret = xmlParseAbsoluteURI(uri, &str);
1795 if (ret != 0) {
1796 xmlCleanURI(uri);
1797 str = tmp;
1798 ret = xmlParseRelativeURI(uri, &str);
1799 }
1800 if (ret != 0) {
1801 xmlCleanURI(uri);
1802 return(ret);
1803 }
1804
1805 if (*str == '#') {
1806 str++;
1807 ret = xmlParseURIFragment(uri, &str);
1808 if (ret != 0) return(ret);
1809 }
1810 if (*str != 0) {
1811 xmlCleanURI(uri);
1812 return(1);
1813 }
1814 return(0);
1815}
1816
1817/**
1818 * xmlParseURI:
1819 * @str: the URI string to analyze
1820 *
1821 * Parse an URI
1822 *
1823 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1824 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001825 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001826 */
1827xmlURIPtr
1828xmlParseURI(const char *str) {
1829 xmlURIPtr uri;
1830 int ret;
1831
1832 if (str == NULL)
1833 return(NULL);
1834 uri = xmlCreateURI();
1835 if (uri != NULL) {
1836 ret = xmlParseURIReference(uri, str);
1837 if (ret) {
1838 xmlFreeURI(uri);
1839 return(NULL);
1840 }
1841 }
1842 return(uri);
1843}
1844
Daniel Veillard336a8e12005-08-07 10:46:19 +00001845/**
1846 * xmlParseURIRaw:
1847 * @str: the URI string to analyze
1848 * @raw: if 1 unescaping of URI pieces are disabled
1849 *
1850 * Parse an URI but allows to keep intact the original fragments.
1851 *
1852 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1853 *
1854 * Returns a newly built xmlURIPtr or NULL in case of error
1855 */
1856xmlURIPtr
1857xmlParseURIRaw(const char *str, int raw) {
1858 xmlURIPtr uri;
1859 int ret;
1860
1861 if (str == NULL)
1862 return(NULL);
1863 uri = xmlCreateURI();
1864 if (uri != NULL) {
1865 if (raw) {
1866 uri->cleanup |= 2;
1867 }
1868 ret = xmlParseURIReference(uri, str);
1869 if (ret) {
1870 xmlFreeURI(uri);
1871 return(NULL);
1872 }
1873 }
1874 return(uri);
1875}
1876
Owen Taylor3473f882001-02-23 17:55:21 +00001877/************************************************************************
1878 * *
1879 * Public functions *
1880 * *
1881 ************************************************************************/
1882
1883/**
1884 * xmlBuildURI:
1885 * @URI: the URI instance found in the document
1886 * @base: the base value
1887 *
1888 * Computes he final URI of the reference done by checking that
1889 * the given URI is valid, and building the final URI using the
1890 * base URI. This is processed according to section 5.2 of the
1891 * RFC 2396
1892 *
1893 * 5.2. Resolving Relative References to Absolute Form
1894 *
1895 * Returns a new URI string (to be freed by the caller) or NULL in case
1896 * of error.
1897 */
1898xmlChar *
1899xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1900 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001901 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001902 xmlURIPtr ref = NULL;
1903 xmlURIPtr bas = NULL;
1904 xmlURIPtr res = NULL;
1905
1906 /*
1907 * 1) The URI reference is parsed into the potential four components and
1908 * fragment identifier, as described in Section 4.3.
1909 *
1910 * NOTE that a completely empty URI is treated by modern browsers
1911 * as a reference to "." rather than as a synonym for the current
1912 * URI. Should we do that here?
1913 */
1914 if (URI == NULL)
1915 ret = -1;
1916 else {
1917 if (*URI) {
1918 ref = xmlCreateURI();
1919 if (ref == NULL)
1920 goto done;
1921 ret = xmlParseURIReference(ref, (const char *) URI);
1922 }
1923 else
1924 ret = 0;
1925 }
1926 if (ret != 0)
1927 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001928 if ((ref != NULL) && (ref->scheme != NULL)) {
1929 /*
1930 * The URI is absolute don't modify.
1931 */
1932 val = xmlStrdup(URI);
1933 goto done;
1934 }
Owen Taylor3473f882001-02-23 17:55:21 +00001935 if (base == NULL)
1936 ret = -1;
1937 else {
1938 bas = xmlCreateURI();
1939 if (bas == NULL)
1940 goto done;
1941 ret = xmlParseURIReference(bas, (const char *) base);
1942 }
1943 if (ret != 0) {
1944 if (ref)
1945 val = xmlSaveUri(ref);
1946 goto done;
1947 }
1948 if (ref == NULL) {
1949 /*
1950 * the base fragment must be ignored
1951 */
1952 if (bas->fragment != NULL) {
1953 xmlFree(bas->fragment);
1954 bas->fragment = NULL;
1955 }
1956 val = xmlSaveUri(bas);
1957 goto done;
1958 }
1959
1960 /*
1961 * 2) If the path component is empty and the scheme, authority, and
1962 * query components are undefined, then it is a reference to the
1963 * current document and we are done. Otherwise, the reference URI's
1964 * query and fragment components are defined as found (or not found)
1965 * within the URI reference and not inherited from the base URI.
1966 *
1967 * NOTE that in modern browsers, the parsing differs from the above
1968 * in the following aspect: the query component is allowed to be
1969 * defined while still treating this as a reference to the current
1970 * document.
1971 */
1972 res = xmlCreateURI();
1973 if (res == NULL)
1974 goto done;
1975 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1976 ((ref->authority == NULL) && (ref->server == NULL))) {
1977 if (bas->scheme != NULL)
1978 res->scheme = xmlMemStrdup(bas->scheme);
1979 if (bas->authority != NULL)
1980 res->authority = xmlMemStrdup(bas->authority);
1981 else if (bas->server != NULL) {
1982 res->server = xmlMemStrdup(bas->server);
1983 if (bas->user != NULL)
1984 res->user = xmlMemStrdup(bas->user);
1985 res->port = bas->port;
1986 }
1987 if (bas->path != NULL)
1988 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001989 if (ref->query_raw != NULL)
1990 res->query_raw = xmlMemStrdup (ref->query_raw);
1991 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00001992 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001993 else if (bas->query_raw != NULL)
1994 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00001995 else if (bas->query != NULL)
1996 res->query = xmlMemStrdup(bas->query);
1997 if (ref->fragment != NULL)
1998 res->fragment = xmlMemStrdup(ref->fragment);
1999 goto step_7;
2000 }
Owen Taylor3473f882001-02-23 17:55:21 +00002001
2002 /*
2003 * 3) If the scheme component is defined, indicating that the reference
2004 * starts with a scheme name, then the reference is interpreted as an
2005 * absolute URI and we are done. Otherwise, the reference URI's
2006 * scheme is inherited from the base URI's scheme component.
2007 */
2008 if (ref->scheme != NULL) {
2009 val = xmlSaveUri(ref);
2010 goto done;
2011 }
2012 if (bas->scheme != NULL)
2013 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00002014
Daniel Veillarda1413b82007-04-26 08:33:28 +00002015 if (ref->query_raw != NULL)
2016 res->query_raw = xmlMemStrdup(ref->query_raw);
2017 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002018 res->query = xmlMemStrdup(ref->query);
2019 if (ref->fragment != NULL)
2020 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002021
2022 /*
2023 * 4) If the authority component is defined, then the reference is a
2024 * network-path and we skip to step 7. Otherwise, the reference
2025 * URI's authority is inherited from the base URI's authority
2026 * component, which will also be undefined if the URI scheme does not
2027 * use an authority component.
2028 */
2029 if ((ref->authority != NULL) || (ref->server != NULL)) {
2030 if (ref->authority != NULL)
2031 res->authority = xmlMemStrdup(ref->authority);
2032 else {
2033 res->server = xmlMemStrdup(ref->server);
2034 if (ref->user != NULL)
2035 res->user = xmlMemStrdup(ref->user);
2036 res->port = ref->port;
2037 }
2038 if (ref->path != NULL)
2039 res->path = xmlMemStrdup(ref->path);
2040 goto step_7;
2041 }
2042 if (bas->authority != NULL)
2043 res->authority = xmlMemStrdup(bas->authority);
2044 else if (bas->server != NULL) {
2045 res->server = xmlMemStrdup(bas->server);
2046 if (bas->user != NULL)
2047 res->user = xmlMemStrdup(bas->user);
2048 res->port = bas->port;
2049 }
2050
2051 /*
2052 * 5) If the path component begins with a slash character ("/"), then
2053 * the reference is an absolute-path and we skip to step 7.
2054 */
2055 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2056 res->path = xmlMemStrdup(ref->path);
2057 goto step_7;
2058 }
2059
2060
2061 /*
2062 * 6) If this step is reached, then we are resolving a relative-path
2063 * reference. The relative path needs to be merged with the base
2064 * URI's path. Although there are many ways to do this, we will
2065 * describe a simple method using a separate string buffer.
2066 *
2067 * Allocate a buffer large enough for the result string.
2068 */
2069 len = 2; /* extra / and 0 */
2070 if (ref->path != NULL)
2071 len += strlen(ref->path);
2072 if (bas->path != NULL)
2073 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002074 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002075 if (res->path == NULL) {
2076 xmlGenericError(xmlGenericErrorContext,
2077 "xmlBuildURI: out of memory\n");
2078 goto done;
2079 }
2080 res->path[0] = 0;
2081
2082 /*
2083 * a) All but the last segment of the base URI's path component is
2084 * copied to the buffer. In other words, any characters after the
2085 * last (right-most) slash character, if any, are excluded.
2086 */
2087 cur = 0;
2088 out = 0;
2089 if (bas->path != NULL) {
2090 while (bas->path[cur] != 0) {
2091 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2092 cur++;
2093 if (bas->path[cur] == 0)
2094 break;
2095
2096 cur++;
2097 while (out < cur) {
2098 res->path[out] = bas->path[out];
2099 out++;
2100 }
2101 }
2102 }
2103 res->path[out] = 0;
2104
2105 /*
2106 * b) The reference's path component is appended to the buffer
2107 * string.
2108 */
2109 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002110 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002111 /*
2112 * Ensure the path includes a '/'
2113 */
2114 if ((out == 0) && (bas->server != NULL))
2115 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002116 while (ref->path[indx] != 0) {
2117 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002118 }
2119 }
2120 res->path[out] = 0;
2121
2122 /*
2123 * Steps c) to h) are really path normalization steps
2124 */
2125 xmlNormalizeURIPath(res->path);
2126
2127step_7:
2128
2129 /*
2130 * 7) The resulting URI components, including any inherited from the
2131 * base URI, are recombined to give the absolute form of the URI
2132 * reference.
2133 */
2134 val = xmlSaveUri(res);
2135
2136done:
2137 if (ref != NULL)
2138 xmlFreeURI(ref);
2139 if (bas != NULL)
2140 xmlFreeURI(bas);
2141 if (res != NULL)
2142 xmlFreeURI(res);
2143 return(val);
2144}
2145
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002146/**
William M. Brackf7789b12004-06-07 08:57:27 +00002147 * xmlBuildRelativeURI:
2148 * @URI: the URI reference under consideration
2149 * @base: the base value
2150 *
2151 * Expresses the URI of the reference in terms relative to the
2152 * base. Some examples of this operation include:
2153 * base = "http://site1.com/docs/book1.html"
2154 * URI input URI returned
2155 * docs/pic1.gif pic1.gif
2156 * docs/img/pic1.gif img/pic1.gif
2157 * img/pic1.gif ../img/pic1.gif
2158 * http://site1.com/docs/pic1.gif pic1.gif
2159 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2160 *
2161 * base = "docs/book1.html"
2162 * URI input URI returned
2163 * docs/pic1.gif pic1.gif
2164 * docs/img/pic1.gif img/pic1.gif
2165 * img/pic1.gif ../img/pic1.gif
2166 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2167 *
2168 *
2169 * Note: if the URI reference is really wierd or complicated, it may be
2170 * worthwhile to first convert it into a "nice" one by calling
2171 * xmlBuildURI (using 'base') before calling this routine,
2172 * since this routine (for reasonable efficiency) assumes URI has
2173 * already been through some validation.
2174 *
2175 * Returns a new URI string (to be freed by the caller) or NULL in case
2176 * error.
2177 */
2178xmlChar *
2179xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2180{
2181 xmlChar *val = NULL;
2182 int ret;
2183 int ix;
2184 int pos = 0;
2185 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002186 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002187 xmlURIPtr ref = NULL;
2188 xmlURIPtr bas = NULL;
2189 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002190 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002191
2192 if ((URI == NULL) || (*URI == 0))
2193 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002194
2195 /*
2196 * First parse URI into a standard form
2197 */
2198 ref = xmlCreateURI ();
2199 if (ref == NULL)
2200 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002201 /* If URI not already in "relative" form */
2202 if (URI[0] != '.') {
2203 ret = xmlParseURIReference (ref, (const char *) URI);
2204 if (ret != 0)
2205 goto done; /* Error in URI, return NULL */
2206 } else
2207 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002208
2209 /*
2210 * Next parse base into the same standard form
2211 */
2212 if ((base == NULL) || (*base == 0)) {
2213 val = xmlStrdup (URI);
2214 goto done;
2215 }
2216 bas = xmlCreateURI ();
2217 if (bas == NULL)
2218 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002219 if (base[0] != '.') {
2220 ret = xmlParseURIReference (bas, (const char *) base);
2221 if (ret != 0)
2222 goto done; /* Error in base, return NULL */
2223 } else
2224 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002225
2226 /*
2227 * If the scheme / server on the URI differs from the base,
2228 * just return the URI
2229 */
2230 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002231 ((bas->scheme == NULL) ||
2232 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2233 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002234 val = xmlStrdup (URI);
2235 goto done;
2236 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002237 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2238 val = xmlStrdup(BAD_CAST "");
2239 goto done;
2240 }
2241 if (bas->path == NULL) {
2242 val = xmlStrdup((xmlChar *)ref->path);
2243 goto done;
2244 }
2245 if (ref->path == NULL) {
2246 ref->path = (char *) "/";
2247 remove_path = 1;
2248 }
William M. Brackf7789b12004-06-07 08:57:27 +00002249
2250 /*
2251 * At this point (at last!) we can compare the two paths
2252 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002253 * First we take care of the special case where either of the
2254 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002255 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002256 if (bas->path == NULL) {
2257 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002258 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002259 if (*uptr == '/')
2260 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002261 /* exception characters from xmlSaveUri */
2262 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002263 }
2264 goto done;
2265 }
William M. Brackf7789b12004-06-07 08:57:27 +00002266 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002267 if (ref->path == NULL) {
2268 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002269 if (bptr[ix] == '/')
2270 nbslash++;
2271 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002272 uptr = NULL;
2273 len = 1; /* this is for a string terminator only */
2274 } else {
2275 /*
2276 * Next we compare the two strings and find where they first differ
2277 */
2278 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2279 pos += 2;
2280 if ((*bptr == '.') && (bptr[1] == '/'))
2281 bptr += 2;
2282 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2283 bptr++;
2284 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2285 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002286
William M. Brack820d5ed2005-09-14 05:24:27 +00002287 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002288 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002289 goto done; /* (I can't imagine why anyone would do this) */
2290 }
2291
2292 /*
2293 * In URI, "back up" to the last '/' encountered. This will be the
2294 * beginning of the "unique" suffix of URI
2295 */
2296 ix = pos;
2297 if ((ref->path[ix] == '/') && (ix > 0))
2298 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002299 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2300 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002301 for (; ix > 0; ix--) {
2302 if (ref->path[ix] == '/')
2303 break;
2304 }
2305 if (ix == 0) {
2306 uptr = (xmlChar *)ref->path;
2307 } else {
2308 ix++;
2309 uptr = (xmlChar *)&ref->path[ix];
2310 }
2311
2312 /*
2313 * In base, count the number of '/' from the differing point
2314 */
2315 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2316 for (; bptr[ix] != 0; ix++) {
2317 if (bptr[ix] == '/')
2318 nbslash++;
2319 }
2320 }
2321 len = xmlStrlen (uptr) + 1;
2322 }
2323
William M. Brackf7789b12004-06-07 08:57:27 +00002324 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002325 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002326 /* exception characters from xmlSaveUri */
2327 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002328 goto done;
2329 }
William M. Brackf7789b12004-06-07 08:57:27 +00002330
2331 /*
2332 * Allocate just enough space for the returned string -
2333 * length of the remainder of the URI, plus enough space
2334 * for the "../" groups, plus one for the terminator
2335 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002336 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002337 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002338 xmlGenericError(xmlGenericErrorContext,
2339 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002340 goto done;
2341 }
2342 vptr = val;
2343 /*
2344 * Put in as many "../" as needed
2345 */
2346 for (; nbslash>0; nbslash--) {
2347 *vptr++ = '.';
2348 *vptr++ = '.';
2349 *vptr++ = '/';
2350 }
2351 /*
2352 * Finish up with the end of the URI
2353 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002354 if (uptr != NULL) {
2355 if ((vptr > val) && (len > 0) &&
2356 (uptr[0] == '/') && (vptr[-1] == '/')) {
2357 memcpy (vptr, uptr + 1, len - 1);
2358 vptr[len - 2] = 0;
2359 } else {
2360 memcpy (vptr, uptr, len);
2361 vptr[len - 1] = 0;
2362 }
2363 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002364 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002365 }
William M. Brackf7789b12004-06-07 08:57:27 +00002366
William M. Brack50420192007-07-20 01:09:08 +00002367 /* escape the freshly-built path */
2368 vptr = val;
2369 /* exception characters from xmlSaveUri */
2370 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2371 xmlFree(vptr);
2372
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002373done:
William M. Brackf7789b12004-06-07 08:57:27 +00002374 /*
2375 * Free the working variables
2376 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002377 if (remove_path != 0)
2378 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002379 if (ref != NULL)
2380 xmlFreeURI (ref);
2381 if (bas != NULL)
2382 xmlFreeURI (bas);
2383
2384 return val;
2385}
2386
2387/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002388 * xmlCanonicPath:
2389 * @path: the resource locator in a filesystem notation
2390 *
2391 * Constructs a canonic path from the specified path.
2392 *
2393 * Returns a new canonic path, or a duplicate of the path parameter if the
2394 * construction fails. The caller is responsible for freeing the memory occupied
2395 * by the returned string. If there is insufficient memory available, or the
2396 * argument is NULL, the function returns NULL.
2397 */
2398#define IS_WINDOWS_PATH(p) \
2399 ((p != NULL) && \
2400 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2401 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2402 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002403xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002404xmlCanonicPath(const xmlChar *path)
2405{
William M. Brack22242272007-01-27 07:59:37 +00002406/*
2407 * For Windows implementations, additional work needs to be done to
2408 * replace backslashes in pathnames with "forward slashes"
2409 */
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002410#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002411 int len = 0;
2412 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002413 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002414#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002415 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002416 xmlChar *ret;
2417 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002418
2419 if (path == NULL)
2420 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002421
2422 /* sanitize filename starting with // so it can be used as URI */
2423 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2424 path++;
2425
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002426 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002427 xmlFreeURI(uri);
2428 return xmlStrdup(path);
2429 }
2430
William M. Brack22242272007-01-27 07:59:37 +00002431 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002432 absuri = xmlStrstr(path, BAD_CAST "://");
2433 if (absuri != NULL) {
2434 int l, j;
2435 unsigned char c;
2436 xmlChar *escURI;
2437
2438 /*
2439 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002440 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002441 * part matches a protocol.
2442 */
2443 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002444 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002445 if ((l <= 0) || (l > 20))
2446 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002447 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002448 for (j = 0;j < l;j++) {
2449 c = path[j];
2450 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2451 goto path_processing;
2452 }
2453
William M. Brack22242272007-01-27 07:59:37 +00002454 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002455 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2456 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002457 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002458 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002459 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002460 if (uri != NULL) {
2461 xmlFreeURI(uri);
2462 return escURI;
2463 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002464 }
2465 }
2466
2467path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002468/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002469#if defined(_WIN32) && !defined(__CYGWIN__)
2470 /*
William M. Brack22242272007-01-27 07:59:37 +00002471 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002472 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002473 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002474 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002475 return(NULL);
2476 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002477
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002478 len = xmlStrlen(path);
2479 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002480 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002481 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002482 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002483 uri->path = xmlMallocAtomic(len + 2);
2484 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002485 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002486 return(NULL);
2487 }
William M. Brack22242272007-01-27 07:59:37 +00002488 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002489 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002490 p = uri->path + 1;
2491 strncpy(p, path, len + 1);
2492 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002493 uri->path = xmlStrdup(path);
2494 if (uri->path == NULL) {
2495 xmlFreeURI(uri);
2496 return(NULL);
2497 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002498 p = uri->path;
2499 }
William M. Brack22242272007-01-27 07:59:37 +00002500 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002501 while (*p != '\0') {
2502 if (*p == '\\')
2503 *p = '/';
2504 p++;
2505 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002506
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002507 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002508 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002509 } else {
2510 ret = xmlSaveUri(uri);
2511 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002512
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002513 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002514#else
2515 ret = xmlStrdup((const xmlChar *) path);
2516#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002517 return(ret);
2518}
Owen Taylor3473f882001-02-23 17:55:21 +00002519
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002520/**
2521 * xmlPathToURI:
2522 * @path: the resource locator in a filesystem notation
2523 *
2524 * Constructs an URI expressing the existing path
2525 *
2526 * Returns a new URI, or a duplicate of the path parameter if the
2527 * construction fails. The caller is responsible for freeing the memory
2528 * occupied by the returned string. If there is insufficient memory available,
2529 * or the argument is NULL, the function returns NULL.
2530 */
2531xmlChar *
2532xmlPathToURI(const xmlChar *path)
2533{
2534 xmlURIPtr uri;
2535 xmlURI temp;
2536 xmlChar *ret, *cal;
2537
2538 if (path == NULL)
2539 return(NULL);
2540
2541 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2542 xmlFreeURI(uri);
2543 return xmlStrdup(path);
2544 }
2545 cal = xmlCanonicPath(path);
2546 if (cal == NULL)
2547 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002548#if defined(_WIN32) && !defined(__CYGWIN__)
2549 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2550 If 'cal' is a valid URI allready then we are done here, as continuing would make
2551 it invalid. */
2552 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2553 xmlFreeURI(uri);
2554 return cal;
2555 }
2556 /* 'cal' can contain a relative path with backslashes. If that is processed
2557 by xmlSaveURI, they will be escaped and the external entity loader machinery
2558 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2559 ret = cal;
2560 while (*ret != '\0') {
2561 if (*ret == '\\')
2562 *ret = '/';
2563 ret++;
2564 }
2565#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002566 memset(&temp, 0, sizeof(temp));
2567 temp.path = (char *) cal;
2568 ret = xmlSaveUri(&temp);
2569 xmlFree(cal);
2570 return(ret);
2571}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002572#define bottom_uri
2573#include "elfgcchack.h"