blob: fafd11258a174e9be785afc7cf06918e4c6a3950 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/**
2 * uri.c: set of generic URI related routines
3 *
William M. Brack015ccb22005-02-13 08:18:52 +00004 * Reference: RFCs 2396, 2732 and 2373
Owen Taylor3473f882001-02-23 17:55:21 +00005 *
6 * See Copyright for the status of this software.
7 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00008 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00009 */
10
Daniel Veillard34ce8be2002-03-18 19:37:11 +000011#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000012#include "libxml.h"
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000018#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000023 * Macros to differentiate various character type *
Owen Taylor3473f882001-02-23 17:55:21 +000024 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
Daniel Veillardbe3eb202004-07-09 12:05:25 +000049#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
Owen Taylor3473f882001-02-23 17:55:21 +000055#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
William M. Brack015ccb22005-02-13 08:18:52 +000081 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
Owen Taylor3473f882001-02-23 17:55:21 +000083 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
William M. Brack015ccb22005-02-13 08:18:52 +000087 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
Owen Taylor3473f882001-02-23 17:55:21 +000089
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000163/*
William M. Brack015ccb22005-02-13 08:18:52 +0000164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000165*/
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000166
Daniel Veillard4def3bd2001-10-30 09:47:47 +0000167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
Daniel Veillardbb6808e2001-10-29 23:59:27 +0000171
172/*
Owen Taylor3473f882001-02-23 17:55:21 +0000173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
Daniel Veillard336a8e12005-08-07 10:46:19 +0000188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/************************************************************************
191 * *
192 * Generic URI structure functions *
193 * *
194 ************************************************************************/
195
196/**
197 * xmlCreateURI:
198 *
199 * Simply creates an empty xmlURI
200 *
201 * Returns the new structure or NULL in case of error
202 */
203xmlURIPtr
204xmlCreateURI(void) {
205 xmlURIPtr ret;
206
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208 if (ret == NULL) {
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
211 return(NULL);
212 }
213 memset(ret, 0, sizeof(xmlURI));
214 return(ret);
215}
216
217/**
218 * xmlSaveUri:
219 * @uri: pointer to an xmlURI
220 *
221 * Save the URI as an escaped string
222 *
223 * Returns a new string (to be deallocated by caller)
224 */
225xmlChar *
226xmlSaveUri(xmlURIPtr uri) {
227 xmlChar *ret = NULL;
228 const char *p;
229 int len;
230 int max;
231
232 if (uri == NULL) return(NULL);
233
234
235 max = 80;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 len = 0;
243
244 if (uri->scheme != NULL) {
245 p = uri->scheme;
246 while (*p != 0) {
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = *p++;
257 }
258 if (len >= max) {
259 max *= 2;
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
261 if (ret == NULL) {
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
264 return(NULL);
265 }
266 }
267 ret[len++] = ':';
268 }
269 if (uri->opaque != NULL) {
270 p = uri->opaque;
271 while (*p != 0) {
272 if (len + 3 >= max) {
273 max *= 2;
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
275 if (ret == NULL) {
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
278 return(NULL);
279 }
280 }
Daniel Veillard9231ff92003-03-23 22:00:51 +0000281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
Owen Taylor3473f882001-02-23 17:55:21 +0000282 ret[len++] = *p++;
283 else {
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
286 ret[len++] = '%';
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289 }
290 }
Owen Taylor3473f882001-02-23 17:55:21 +0000291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
Aleksey Sanin49cc9752002-06-14 17:07:10 +0000368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
Owen Taylor3473f882001-02-23 17:55:21 +0000369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
Daniel Veillarde54c3172008-03-25 13:22:41 +0000424 /*
425 * the colon in file:///d: should not be escaped or
426 * Windows accesses fail later.
427 */
428 if ((uri->scheme != NULL) &&
429 (p[0] == '/') &&
430 (((p[1] >= 'a') && (p[1] <= 'z')) ||
431 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
432 (p[2] == ':') &&
433 (xmlStrEqual(uri->scheme, BAD_CAST "file"))) {
434 if (len + 3 >= max) {
435 max *= 2;
436 ret = (xmlChar *) xmlRealloc(ret,
437 (max + 1) * sizeof(xmlChar));
438 if (ret == NULL) {
439 xmlGenericError(xmlGenericErrorContext,
440 "xmlSaveUri: out of memory\n");
441 return(NULL);
442 }
443 }
444 ret[len++] = *p++;
445 ret[len++] = *p++;
446 ret[len++] = *p++;
447 }
Owen Taylor3473f882001-02-23 17:55:21 +0000448 while (*p != 0) {
449 if (len + 3 >= max) {
450 max *= 2;
451 ret = (xmlChar *) xmlRealloc(ret,
452 (max + 1) * sizeof(xmlChar));
453 if (ret == NULL) {
454 xmlGenericError(xmlGenericErrorContext,
455 "xmlSaveUri: out of memory\n");
456 return(NULL);
457 }
458 }
459 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
460 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
461 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
462 ((*(p) == ',')))
463 ret[len++] = *p++;
464 else {
465 int val = *(unsigned char *)p++;
466 int hi = val / 0x10, lo = val % 0x10;
467 ret[len++] = '%';
468 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
469 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
470 }
471 }
472 }
Daniel Veillarda1413b82007-04-26 08:33:28 +0000473 if (uri->query_raw != NULL) {
474 if (len + 1 >= max) {
475 max *= 2;
476 ret = (xmlChar *) xmlRealloc(ret,
477 (max + 1) * sizeof(xmlChar));
478 if (ret == NULL) {
479 xmlGenericError(xmlGenericErrorContext,
480 "xmlSaveUri: out of memory\n");
481 return(NULL);
482 }
483 }
484 ret[len++] = '?';
485 p = uri->query_raw;
486 while (*p != 0) {
487 if (len + 1 >= max) {
488 max *= 2;
489 ret = (xmlChar *) xmlRealloc(ret,
490 (max + 1) * sizeof(xmlChar));
491 if (ret == NULL) {
492 xmlGenericError(xmlGenericErrorContext,
493 "xmlSaveUri: out of memory\n");
494 return(NULL);
495 }
496 }
497 ret[len++] = *p++;
498 }
499 } else if (uri->query != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000500 if (len + 3 >= max) {
501 max *= 2;
502 ret = (xmlChar *) xmlRealloc(ret,
503 (max + 1) * sizeof(xmlChar));
504 if (ret == NULL) {
505 xmlGenericError(xmlGenericErrorContext,
506 "xmlSaveUri: out of memory\n");
507 return(NULL);
508 }
509 }
510 ret[len++] = '?';
511 p = uri->query;
512 while (*p != 0) {
513 if (len + 3 >= max) {
514 max *= 2;
515 ret = (xmlChar *) xmlRealloc(ret,
516 (max + 1) * sizeof(xmlChar));
517 if (ret == NULL) {
518 xmlGenericError(xmlGenericErrorContext,
519 "xmlSaveUri: out of memory\n");
520 return(NULL);
521 }
522 }
523 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
524 ret[len++] = *p++;
525 else {
526 int val = *(unsigned char *)p++;
527 int hi = val / 0x10, lo = val % 0x10;
528 ret[len++] = '%';
529 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
530 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
531 }
532 }
533 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000534 }
535 if (uri->fragment != NULL) {
536 if (len + 3 >= max) {
537 max *= 2;
538 ret = (xmlChar *) xmlRealloc(ret,
539 (max + 1) * sizeof(xmlChar));
540 if (ret == NULL) {
541 xmlGenericError(xmlGenericErrorContext,
542 "xmlSaveUri: out of memory\n");
543 return(NULL);
544 }
545 }
546 ret[len++] = '#';
547 p = uri->fragment;
548 while (*p != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000549 if (len + 3 >= max) {
550 max *= 2;
551 ret = (xmlChar *) xmlRealloc(ret,
552 (max + 1) * sizeof(xmlChar));
553 if (ret == NULL) {
554 xmlGenericError(xmlGenericErrorContext,
555 "xmlSaveUri: out of memory\n");
556 return(NULL);
557 }
558 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000559 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
560 ret[len++] = *p++;
561 else {
562 int val = *(unsigned char *)p++;
563 int hi = val / 0x10, lo = val % 0x10;
564 ret[len++] = '%';
565 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
566 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Owen Taylor3473f882001-02-23 17:55:21 +0000567 }
568 }
Owen Taylor3473f882001-02-23 17:55:21 +0000569 }
Daniel Veillardfdd27d22002-11-28 11:55:38 +0000570 if (len >= max) {
571 max *= 2;
572 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
573 if (ret == NULL) {
574 xmlGenericError(xmlGenericErrorContext,
575 "xmlSaveUri: out of memory\n");
576 return(NULL);
577 }
578 }
579 ret[len++] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000580 return(ret);
581}
582
583/**
584 * xmlPrintURI:
585 * @stream: a FILE* for the output
586 * @uri: pointer to an xmlURI
587 *
William M. Brackf3cf1a12005-01-06 02:25:59 +0000588 * Prints the URI in the stream @stream.
Owen Taylor3473f882001-02-23 17:55:21 +0000589 */
590void
591xmlPrintURI(FILE *stream, xmlURIPtr uri) {
592 xmlChar *out;
593
594 out = xmlSaveUri(uri);
595 if (out != NULL) {
Daniel Veillardea7751d2002-12-20 00:16:24 +0000596 fprintf(stream, "%s", (char *) out);
Owen Taylor3473f882001-02-23 17:55:21 +0000597 xmlFree(out);
598 }
599}
600
601/**
602 * xmlCleanURI:
603 * @uri: pointer to an xmlURI
604 *
605 * Make sure the xmlURI struct is free of content
606 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000607static void
Owen Taylor3473f882001-02-23 17:55:21 +0000608xmlCleanURI(xmlURIPtr uri) {
609 if (uri == NULL) return;
610
611 if (uri->scheme != NULL) xmlFree(uri->scheme);
612 uri->scheme = NULL;
613 if (uri->server != NULL) xmlFree(uri->server);
614 uri->server = NULL;
615 if (uri->user != NULL) xmlFree(uri->user);
616 uri->user = NULL;
617 if (uri->path != NULL) xmlFree(uri->path);
618 uri->path = NULL;
619 if (uri->fragment != NULL) xmlFree(uri->fragment);
620 uri->fragment = NULL;
621 if (uri->opaque != NULL) xmlFree(uri->opaque);
622 uri->opaque = NULL;
623 if (uri->authority != NULL) xmlFree(uri->authority);
624 uri->authority = NULL;
625 if (uri->query != NULL) xmlFree(uri->query);
626 uri->query = NULL;
Daniel Veillarda1413b82007-04-26 08:33:28 +0000627 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
628 uri->query_raw = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000629}
630
631/**
632 * xmlFreeURI:
633 * @uri: pointer to an xmlURI
634 *
635 * Free up the xmlURI struct
636 */
637void
638xmlFreeURI(xmlURIPtr uri) {
639 if (uri == NULL) return;
640
641 if (uri->scheme != NULL) xmlFree(uri->scheme);
642 if (uri->server != NULL) xmlFree(uri->server);
643 if (uri->user != NULL) xmlFree(uri->user);
644 if (uri->path != NULL) xmlFree(uri->path);
645 if (uri->fragment != NULL) xmlFree(uri->fragment);
646 if (uri->opaque != NULL) xmlFree(uri->opaque);
647 if (uri->authority != NULL) xmlFree(uri->authority);
648 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +0000649 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +0000650 xmlFree(uri);
651}
652
653/************************************************************************
654 * *
655 * Helper functions *
656 * *
657 ************************************************************************/
658
Owen Taylor3473f882001-02-23 17:55:21 +0000659/**
660 * xmlNormalizeURIPath:
661 * @path: pointer to the path string
662 *
663 * Applies the 5 normalization steps to a path string--that is, RFC 2396
664 * Section 5.2, steps 6.c through 6.g.
665 *
666 * Normalization occurs directly on the string, no new allocation is done
667 *
668 * Returns 0 or an error code
669 */
670int
671xmlNormalizeURIPath(char *path) {
672 char *cur, *out;
673
674 if (path == NULL)
675 return(-1);
676
677 /* Skip all initial "/" chars. We want to get to the beginning of the
678 * first non-empty segment.
679 */
680 cur = path;
681 while (cur[0] == '/')
682 ++cur;
683 if (cur[0] == '\0')
684 return(0);
685
686 /* Keep everything we've seen so far. */
687 out = cur;
688
689 /*
690 * Analyze each segment in sequence for cases (c) and (d).
691 */
692 while (cur[0] != '\0') {
693 /*
694 * c) All occurrences of "./", where "." is a complete path segment,
695 * are removed from the buffer string.
696 */
697 if ((cur[0] == '.') && (cur[1] == '/')) {
698 cur += 2;
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000699 /* '//' normalization should be done at this point too */
700 while (cur[0] == '/')
701 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000702 continue;
703 }
704
705 /*
706 * d) If the buffer string ends with "." as a complete path segment,
707 * that "." is removed.
708 */
709 if ((cur[0] == '.') && (cur[1] == '\0'))
710 break;
711
712 /* Otherwise keep the segment. */
713 while (cur[0] != '/') {
714 if (cur[0] == '\0')
715 goto done_cd;
716 (out++)[0] = (cur++)[0];
717 }
Daniel Veillardfcbd74a2001-06-26 07:47:23 +0000718 /* nomalize // */
719 while ((cur[0] == '/') && (cur[1] == '/'))
720 cur++;
721
Owen Taylor3473f882001-02-23 17:55:21 +0000722 (out++)[0] = (cur++)[0];
723 }
724 done_cd:
725 out[0] = '\0';
726
727 /* Reset to the beginning of the first segment for the next sequence. */
728 cur = path;
729 while (cur[0] == '/')
730 ++cur;
731 if (cur[0] == '\0')
732 return(0);
733
734 /*
735 * Analyze each segment in sequence for cases (e) and (f).
736 *
737 * e) All occurrences of "<segment>/../", where <segment> is a
738 * complete path segment not equal to "..", are removed from the
739 * buffer string. Removal of these path segments is performed
740 * iteratively, removing the leftmost matching pattern on each
741 * iteration, until no matching pattern remains.
742 *
743 * f) If the buffer string ends with "<segment>/..", where <segment>
744 * is a complete path segment not equal to "..", that
745 * "<segment>/.." is removed.
746 *
747 * To satisfy the "iterative" clause in (e), we need to collapse the
748 * string every time we find something that needs to be removed. Thus,
749 * we don't need to keep two pointers into the string: we only need a
750 * "current position" pointer.
751 */
752 while (1) {
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000753 char *segp, *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +0000754
755 /* At the beginning of each iteration of this loop, "cur" points to
756 * the first character of the segment we want to examine.
757 */
758
759 /* Find the end of the current segment. */
760 segp = cur;
761 while ((segp[0] != '/') && (segp[0] != '\0'))
762 ++segp;
763
764 /* If this is the last segment, we're done (we need at least two
765 * segments to meet the criteria for the (e) and (f) cases).
766 */
767 if (segp[0] == '\0')
768 break;
769
770 /* If the first segment is "..", or if the next segment _isn't_ "..",
771 * keep this segment and try the next one.
772 */
773 ++segp;
774 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
775 || ((segp[0] != '.') || (segp[1] != '.')
776 || ((segp[2] != '/') && (segp[2] != '\0')))) {
777 cur = segp;
778 continue;
779 }
780
781 /* If we get here, remove this segment and the next one and back up
782 * to the previous segment (if there is one), to implement the
783 * "iteratively" clause. It's pretty much impossible to back up
784 * while maintaining two pointers into the buffer, so just compact
785 * the whole buffer now.
786 */
787
788 /* If this is the end of the buffer, we're done. */
789 if (segp[2] == '\0') {
790 cur[0] = '\0';
791 break;
792 }
Daniel Veillard608d0ac2003-08-14 22:44:25 +0000793 /* Valgrind complained, strcpy(cur, segp + 3); */
794 /* string will overlap, do not use strcpy */
795 tmp = cur;
796 segp += 3;
797 while ((*tmp++ = *segp++) != 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000798
799 /* If there are no previous segments, then keep going from here. */
800 segp = cur;
801 while ((segp > path) && ((--segp)[0] == '/'))
802 ;
803 if (segp == path)
804 continue;
805
806 /* "segp" is pointing to the end of a previous segment; find it's
807 * start. We need to back up to the previous segment and start
808 * over with that to handle things like "foo/bar/../..". If we
809 * don't do this, then on the first pass we'll remove the "bar/..",
810 * but be pointing at the second ".." so we won't realize we can also
811 * remove the "foo/..".
812 */
813 cur = segp;
814 while ((cur > path) && (cur[-1] != '/'))
815 --cur;
816 }
817 out[0] = '\0';
818
819 /*
820 * g) If the resulting buffer string still begins with one or more
821 * complete path segments of "..", then the reference is
822 * considered to be in error. Implementations may handle this
823 * error by retaining these components in the resolved path (i.e.,
824 * treating them as part of the final URI), by removing them from
825 * the resolved path (i.e., discarding relative levels above the
826 * root), or by avoiding traversal of the reference.
827 *
828 * We discard them from the final path.
829 */
830 if (path[0] == '/') {
831 cur = path;
Daniel Veillard9231ff92003-03-23 22:00:51 +0000832 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
Owen Taylor3473f882001-02-23 17:55:21 +0000833 && ((cur[3] == '/') || (cur[3] == '\0')))
834 cur += 3;
835
836 if (cur != path) {
837 out = path;
838 while (cur[0] != '\0')
839 (out++)[0] = (cur++)[0];
840 out[0] = 0;
841 }
842 }
843
844 return(0);
845}
Owen Taylor3473f882001-02-23 17:55:21 +0000846
Daniel Veillard966a31e2004-05-09 02:58:44 +0000847static int is_hex(char c) {
848 if (((c >= '0') && (c <= '9')) ||
849 ((c >= 'a') && (c <= 'f')) ||
850 ((c >= 'A') && (c <= 'F')))
851 return(1);
852 return(0);
853}
854
Owen Taylor3473f882001-02-23 17:55:21 +0000855/**
856 * xmlURIUnescapeString:
857 * @str: the string to unescape
Daniel Veillard60087f32001-10-10 09:45:09 +0000858 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000859 * @target: optional destination buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000860 *
Daniel Veillarda44294f2007-04-24 08:57:54 +0000861 * Unescaping routine, but does not check that the string is an URI. The
862 * output is a direct unsigned char translation of %XX values (no encoding)
Daniel Veillard79187652007-04-24 10:19:52 +0000863 * Note that the length of the result can only be smaller or same size as
864 * the input string.
Owen Taylor3473f882001-02-23 17:55:21 +0000865 *
Daniel Veillard79187652007-04-24 10:19:52 +0000866 * Returns a copy of the string, but unescaped, will return NULL only in case
867 * of error
Owen Taylor3473f882001-02-23 17:55:21 +0000868 */
869char *
870xmlURIUnescapeString(const char *str, int len, char *target) {
871 char *ret, *out;
872 const char *in;
873
874 if (str == NULL)
875 return(NULL);
876 if (len <= 0) len = strlen(str);
Daniel Veillardd2298792003-02-14 16:54:11 +0000877 if (len < 0) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000878
879 if (target == NULL) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000880 ret = (char *) xmlMallocAtomic(len + 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000881 if (ret == NULL) {
882 xmlGenericError(xmlGenericErrorContext,
883 "xmlURIUnescapeString: out of memory\n");
884 return(NULL);
885 }
886 } else
887 ret = target;
888 in = str;
889 out = ret;
890 while(len > 0) {
Daniel Veillard8399ff32004-09-22 21:57:53 +0000891 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000892 in++;
893 if ((*in >= '0') && (*in <= '9'))
894 *out = (*in - '0');
895 else if ((*in >= 'a') && (*in <= 'f'))
896 *out = (*in - 'a') + 10;
897 else if ((*in >= 'A') && (*in <= 'F'))
898 *out = (*in - 'A') + 10;
899 in++;
900 if ((*in >= '0') && (*in <= '9'))
901 *out = *out * 16 + (*in - '0');
902 else if ((*in >= 'a') && (*in <= 'f'))
903 *out = *out * 16 + (*in - 'a') + 10;
904 else if ((*in >= 'A') && (*in <= 'F'))
905 *out = *out * 16 + (*in - 'A') + 10;
906 in++;
907 len -= 3;
908 out++;
909 } else {
910 *out++ = *in++;
911 len--;
912 }
913 }
914 *out = 0;
915 return(ret);
916}
917
918/**
Daniel Veillard8514c672001-05-23 10:29:12 +0000919 * xmlURIEscapeStr:
920 * @str: string to escape
921 * @list: exception list string of chars not to escape
Owen Taylor3473f882001-02-23 17:55:21 +0000922 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000923 * This routine escapes a string to hex, ignoring reserved characters (a-z)
924 * and the characters in the exception list.
Owen Taylor3473f882001-02-23 17:55:21 +0000925 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000926 * Returns a new escaped string or NULL in case of error.
Owen Taylor3473f882001-02-23 17:55:21 +0000927 */
928xmlChar *
Daniel Veillard8514c672001-05-23 10:29:12 +0000929xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
930 xmlChar *ret, ch;
Owen Taylor3473f882001-02-23 17:55:21 +0000931 const xmlChar *in;
Daniel Veillard8514c672001-05-23 10:29:12 +0000932
Owen Taylor3473f882001-02-23 17:55:21 +0000933 unsigned int len, out;
934
935 if (str == NULL)
936 return(NULL);
William M. Brackf3cf1a12005-01-06 02:25:59 +0000937 if (str[0] == 0)
938 return(xmlStrdup(str));
Owen Taylor3473f882001-02-23 17:55:21 +0000939 len = xmlStrlen(str);
Daniel Veillarde645e8c2002-10-22 17:35:37 +0000940 if (!(len > 0)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000941
942 len += 20;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000943 ret = (xmlChar *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +0000944 if (ret == NULL) {
945 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000946 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000947 return(NULL);
948 }
949 in = (const xmlChar *) str;
950 out = 0;
951 while(*in != 0) {
952 if (len - out <= 3) {
953 len += 20;
954 ret = (xmlChar *) xmlRealloc(ret, len);
955 if (ret == NULL) {
956 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000957 "xmlURIEscapeStr: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000958 return(NULL);
959 }
960 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000961
962 ch = *in;
963
Daniel Veillardeb475a32002-04-14 22:00:22 +0000964 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000965 unsigned char val;
966 ret[out++] = '%';
Daniel Veillard8514c672001-05-23 10:29:12 +0000967 val = ch >> 4;
Owen Taylor3473f882001-02-23 17:55:21 +0000968 if (val <= 9)
969 ret[out++] = '0' + val;
970 else
971 ret[out++] = 'A' + val - 0xA;
Daniel Veillard8514c672001-05-23 10:29:12 +0000972 val = ch & 0xF;
Owen Taylor3473f882001-02-23 17:55:21 +0000973 if (val <= 9)
974 ret[out++] = '0' + val;
975 else
976 ret[out++] = 'A' + val - 0xA;
977 in++;
978 } else {
979 ret[out++] = *in++;
980 }
Daniel Veillard8514c672001-05-23 10:29:12 +0000981
Owen Taylor3473f882001-02-23 17:55:21 +0000982 }
983 ret[out] = 0;
984 return(ret);
985}
986
Daniel Veillard8514c672001-05-23 10:29:12 +0000987/**
988 * xmlURIEscape:
989 * @str: the string of the URI to escape
990 *
991 * Escaping routine, does not do validity checks !
992 * It will try to escape the chars needing this, but this is heuristic
993 * based it's impossible to be sure.
994 *
Daniel Veillard8514c672001-05-23 10:29:12 +0000995 * Returns an copy of the string, but escaped
Daniel Veillard6278fb52001-05-25 07:38:41 +0000996 *
997 * 25 May 2001
998 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
999 * according to RFC2396.
1000 * - Carl Douglas
Daniel Veillard8514c672001-05-23 10:29:12 +00001001 */
1002xmlChar *
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001003xmlURIEscape(const xmlChar * str)
1004{
Daniel Veillard6278fb52001-05-25 07:38:41 +00001005 xmlChar *ret, *segment = NULL;
1006 xmlURIPtr uri;
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001007 int ret2;
Daniel Veillard8514c672001-05-23 10:29:12 +00001008
Daniel Veillard6278fb52001-05-25 07:38:41 +00001009#define NULLCHK(p) if(!p) { \
1010 xmlGenericError(xmlGenericErrorContext, \
1011 "xmlURIEscape: out of memory\n"); \
1012 return NULL; }
1013
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001014 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001015 return (NULL);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001016
1017 uri = xmlCreateURI();
1018 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001019 /*
1020 * Allow escaping errors in the unescaped form
1021 */
1022 uri->cleanup = 1;
1023 ret2 = xmlParseURIReference(uri, (const char *)str);
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001024 if (ret2) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001025 xmlFreeURI(uri);
1026 return (NULL);
1027 }
Daniel Veillardbb6808e2001-10-29 23:59:27 +00001028 }
Daniel Veillard6278fb52001-05-25 07:38:41 +00001029
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001030 if (!uri)
1031 return NULL;
Daniel Veillard6278fb52001-05-25 07:38:41 +00001032
1033 ret = NULL;
1034
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001035 if (uri->scheme) {
1036 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1037 NULLCHK(segment)
1038 ret = xmlStrcat(ret, segment);
1039 ret = xmlStrcat(ret, BAD_CAST ":");
1040 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001041 }
1042
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001043 if (uri->authority) {
1044 segment =
1045 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1046 NULLCHK(segment)
1047 ret = xmlStrcat(ret, BAD_CAST "//");
1048 ret = xmlStrcat(ret, segment);
1049 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001050 }
1051
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001052 if (uri->user) {
1053 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1054 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001055 ret = xmlStrcat(ret,BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001056 ret = xmlStrcat(ret, segment);
1057 ret = xmlStrcat(ret, BAD_CAST "@");
1058 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001059 }
1060
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001061 if (uri->server) {
1062 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1063 NULLCHK(segment)
Daniel Veillard0a194582004-04-01 20:09:22 +00001064 if (uri->user == NULL)
1065 ret = xmlStrcat(ret, BAD_CAST "//");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001066 ret = xmlStrcat(ret, segment);
1067 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001068 }
1069
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001070 if (uri->port) {
1071 xmlChar port[10];
1072
Daniel Veillard43d3f612001-11-10 11:57:23 +00001073 snprintf((char *) port, 10, "%d", uri->port);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001074 ret = xmlStrcat(ret, BAD_CAST ":");
1075 ret = xmlStrcat(ret, port);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001076 }
1077
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001078 if (uri->path) {
1079 segment =
1080 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1081 NULLCHK(segment)
1082 ret = xmlStrcat(ret, segment);
1083 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001084 }
1085
Daniel Veillarda1413b82007-04-26 08:33:28 +00001086 if (uri->query_raw) {
1087 ret = xmlStrcat(ret, BAD_CAST "?");
1088 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1089 }
1090 else if (uri->query) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001091 segment =
1092 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1093 NULLCHK(segment)
1094 ret = xmlStrcat(ret, BAD_CAST "?");
1095 ret = xmlStrcat(ret, segment);
1096 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001097 }
1098
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001099 if (uri->opaque) {
1100 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1101 NULLCHK(segment)
1102 ret = xmlStrcat(ret, segment);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001103 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001104 }
1105
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001106 if (uri->fragment) {
1107 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1108 NULLCHK(segment)
1109 ret = xmlStrcat(ret, BAD_CAST "#");
1110 ret = xmlStrcat(ret, segment);
1111 xmlFree(segment);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001112 }
Daniel Veillard43d3f612001-11-10 11:57:23 +00001113
1114 xmlFreeURI(uri);
Daniel Veillard6278fb52001-05-25 07:38:41 +00001115#undef NULLCHK
Daniel Veillard8514c672001-05-23 10:29:12 +00001116
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001117 return (ret);
Daniel Veillard8514c672001-05-23 10:29:12 +00001118}
1119
Owen Taylor3473f882001-02-23 17:55:21 +00001120/************************************************************************
1121 * *
1122 * Escaped URI parsing *
1123 * *
1124 ************************************************************************/
1125
1126/**
1127 * xmlParseURIFragment:
1128 * @uri: pointer to an URI structure
1129 * @str: pointer to the string to analyze
1130 *
1131 * Parse an URI fragment string and fills in the appropriate fields
1132 * of the @uri structure.
1133 *
1134 * fragment = *uric
1135 *
1136 * Returns 0 or the error code
1137 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001138static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001139xmlParseURIFragment(xmlURIPtr uri, const char **str)
1140{
Daniel Veillard30e76072006-03-09 14:13:55 +00001141 const char *cur;
1142
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001143 if (str == NULL)
1144 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001145
Daniel Veillard30e76072006-03-09 14:13:55 +00001146 cur = *str;
1147
Daniel Veillardfdd27d22002-11-28 11:55:38 +00001148 while (IS_URIC(cur) || IS_UNWISE(cur))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001149 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001150 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001151 if (uri->fragment != NULL)
1152 xmlFree(uri->fragment);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001153 if (uri->cleanup & 2)
1154 uri->fragment = STRNDUP(*str, cur - *str);
1155 else
1156 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001157 }
1158 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001159 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001160}
1161
1162/**
1163 * xmlParseURIQuery:
1164 * @uri: pointer to an URI structure
1165 * @str: pointer to the string to analyze
1166 *
1167 * Parse the query part of an URI
1168 *
1169 * query = *uric
1170 *
1171 * Returns 0 or the error code
1172 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001173static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001174xmlParseURIQuery(xmlURIPtr uri, const char **str)
1175{
Daniel Veillard30e76072006-03-09 14:13:55 +00001176 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001177
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001178 if (str == NULL)
1179 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001180
Daniel Veillard30e76072006-03-09 14:13:55 +00001181 cur = *str;
1182
Daniel Veillard336a8e12005-08-07 10:46:19 +00001183 while ((IS_URIC(cur)) ||
1184 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001185 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001186 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001187 if (uri->query != NULL)
1188 xmlFree(uri->query);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001189 if (uri->cleanup & 2)
1190 uri->query = STRNDUP(*str, cur - *str);
1191 else
1192 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillarda1413b82007-04-26 08:33:28 +00001193
1194 /* Save the raw bytes of the query as well.
1195 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
1196 */
1197 if (uri->query_raw != NULL)
1198 xmlFree (uri->query_raw);
1199 uri->query_raw = STRNDUP (*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001200 }
1201 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001202 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001203}
1204
1205/**
1206 * xmlParseURIScheme:
1207 * @uri: pointer to an URI structure
1208 * @str: pointer to the string to analyze
1209 *
1210 * Parse an URI scheme
1211 *
1212 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1213 *
1214 * Returns 0 or the error code
1215 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001216static int
Owen Taylor3473f882001-02-23 17:55:21 +00001217xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1218 const char *cur;
1219
1220 if (str == NULL)
1221 return(-1);
1222
1223 cur = *str;
1224 if (!IS_ALPHA(*cur))
1225 return(2);
1226 cur++;
1227 while (IS_SCHEME(*cur)) cur++;
1228 if (uri != NULL) {
1229 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001230 uri->scheme = STRNDUP(*str, cur - *str);
Owen Taylor3473f882001-02-23 17:55:21 +00001231 }
1232 *str = cur;
1233 return(0);
1234}
1235
1236/**
1237 * xmlParseURIOpaquePart:
1238 * @uri: pointer to an URI structure
1239 * @str: pointer to the string to analyze
1240 *
1241 * Parse an URI opaque part
1242 *
1243 * opaque_part = uric_no_slash *uric
1244 *
1245 * Returns 0 or the error code
1246 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001247static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001248xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1249{
Owen Taylor3473f882001-02-23 17:55:21 +00001250 const char *cur;
1251
1252 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001253 return (-1);
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001256 if (!((IS_URIC_NO_SLASH(cur)) ||
1257 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001258 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001261 while ((IS_URIC(cur)) ||
1262 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001263 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001264 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001265 if (uri->opaque != NULL)
1266 xmlFree(uri->opaque);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001267 if (uri->cleanup & 2)
1268 uri->opaque = STRNDUP(*str, cur - *str);
1269 else
1270 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001271 }
1272 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001273 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001274}
1275
1276/**
1277 * xmlParseURIServer:
1278 * @uri: pointer to an URI structure
1279 * @str: pointer to the string to analyze
1280 *
1281 * Parse a server subpart of an URI, it's a finer grain analysis
1282 * of the authority part.
1283 *
1284 * server = [ [ userinfo "@" ] hostport ]
1285 * userinfo = *( unreserved | escaped |
1286 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1287 * hostport = host [ ":" port ]
William M. Brack015ccb22005-02-13 08:18:52 +00001288 * host = hostname | IPv4address | IPv6reference
Owen Taylor3473f882001-02-23 17:55:21 +00001289 * hostname = *( domainlabel "." ) toplabel [ "." ]
1290 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1291 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
William M. Brack015ccb22005-02-13 08:18:52 +00001292 * IPv6reference = "[" IPv6address "]"
1293 * IPv6address = hexpart [ ":" IPv4address ]
1294 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1295 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1296 * hexseq = hex4 *( ":" hex4)
1297 * hex4 = 1*4hexdig
Owen Taylor3473f882001-02-23 17:55:21 +00001298 * port = *digit
1299 *
1300 * Returns 0 or the error code
1301 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001302static int
Owen Taylor3473f882001-02-23 17:55:21 +00001303xmlParseURIServer(xmlURIPtr uri, const char **str) {
1304 const char *cur;
1305 const char *host, *tmp;
William M. Brack015ccb22005-02-13 08:18:52 +00001306 const int IPV4max = 4;
1307 const int IPV6max = 8;
Daniel Veillard9231ff92003-03-23 22:00:51 +00001308 int oct;
Owen Taylor3473f882001-02-23 17:55:21 +00001309
1310 if (str == NULL)
1311 return(-1);
1312
1313 cur = *str;
1314
1315 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001316 * is there a userinfo ?
Owen Taylor3473f882001-02-23 17:55:21 +00001317 */
1318 while (IS_USERINFO(cur)) NEXT(cur);
1319 if (*cur == '@') {
1320 if (uri != NULL) {
1321 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001322 if (uri->cleanup & 2)
Daniel Veillarde61d75f2007-05-28 14:16:33 +00001323 uri->user = STRNDUP(*str, cur - *str);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001324 else
1325 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001326 }
1327 cur++;
1328 } else {
1329 if (uri != NULL) {
1330 if (uri->user != NULL) xmlFree(uri->user);
1331 uri->user = NULL;
1332 }
1333 cur = *str;
1334 }
1335 /*
1336 * This can be empty in the case where there is no server
1337 */
1338 host = cur;
1339 if (*cur == '/') {
1340 if (uri != NULL) {
1341 if (uri->authority != NULL) xmlFree(uri->authority);
1342 uri->authority = NULL;
1343 if (uri->server != NULL) xmlFree(uri->server);
1344 uri->server = NULL;
1345 uri->port = 0;
1346 }
1347 return(0);
1348 }
1349 /*
William M. Brack015ccb22005-02-13 08:18:52 +00001350 * host part of hostport can denote an IPV4 address, an IPV6 address
1351 * or an unresolved name. Check the IP first, its easier to detect
1352 * errors if wrong one.
1353 * An IPV6 address must start with a '[' and end with a ']'.
Owen Taylor3473f882001-02-23 17:55:21 +00001354 */
William M. Brack015ccb22005-02-13 08:18:52 +00001355 if (*cur == '[') {
1356 int compress=0;
1357 cur++;
1358 for (oct = 0; oct < IPV6max; ++oct) {
1359 if (*cur == ':') {
1360 if (compress)
1361 return(3); /* multiple compression attempted */
1362 if (!oct) { /* initial char is compression */
1363 if (*++cur != ':')
1364 return(3);
1365 }
1366 compress = 1; /* set compression-encountered flag */
1367 cur++; /* skip over the second ':' */
1368 continue;
1369 }
1370 while(IS_HEX(*cur)) cur++;
1371 if (oct == (IPV6max-1))
1372 continue;
1373 if (*cur != ':')
1374 break;
1375 cur++;
1376 }
1377 if ((!compress) && (oct != IPV6max))
1378 return(3);
1379 if (*cur != ']')
1380 return(3);
1381 if (uri != NULL) {
1382 if (uri->server != NULL) xmlFree(uri->server);
1383 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1384 (cur-host)-1);
1385 }
1386 cur++;
1387 } else {
1388 /*
1389 * Not IPV6, maybe IPV4
1390 */
1391 for (oct = 0; oct < IPV4max; ++oct) {
1392 if (*cur == '.')
1393 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1394 while(IS_DIGIT(*cur)) cur++;
1395 if (oct == (IPV4max-1))
1396 continue;
1397 if (*cur != '.')
1398 break;
1399 cur++;
1400 }
Owen Taylor3473f882001-02-23 17:55:21 +00001401 }
William M. Brack015ccb22005-02-13 08:18:52 +00001402 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1403 IS_ALPHA(*cur))) {
Daniel Veillard9231ff92003-03-23 22:00:51 +00001404 /* maybe host_name */
1405 if (!IS_ALPHANUM(*cur))
1406 return(4); /* e.g. http://xml.$oft */
1407 do {
1408 do ++cur; while (IS_ALPHANUM(*cur));
1409 if (*cur == '-') {
1410 --cur;
1411 if (*cur == '.')
1412 return(5); /* e.g. http://xml.-soft */
1413 ++cur;
1414 continue;
1415 }
1416 if (*cur == '.') {
1417 --cur;
1418 if (*cur == '-')
1419 return(6); /* e.g. http://xml-.soft */
1420 if (*cur == '.')
1421 return(7); /* e.g. http://xml..soft */
1422 ++cur;
1423 continue;
1424 }
1425 break;
1426 } while (1);
1427 tmp = cur;
1428 if (tmp[-1] == '.')
1429 --tmp; /* e.g. http://xml.$Oft/ */
1430 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1431 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1432 return(8); /* e.g. http://xmlsOft.0rg/ */
Owen Taylor3473f882001-02-23 17:55:21 +00001433 }
Owen Taylor3473f882001-02-23 17:55:21 +00001434 if (uri != NULL) {
1435 if (uri->authority != NULL) xmlFree(uri->authority);
1436 uri->authority = NULL;
William M. Brack015ccb22005-02-13 08:18:52 +00001437 if (host[0] != '[') { /* it's not an IPV6 addr */
1438 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001439 if (uri->cleanup & 2)
1440 uri->server = STRNDUP(host, cur - host);
1441 else
1442 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
William M. Brack015ccb22005-02-13 08:18:52 +00001443 }
Owen Taylor3473f882001-02-23 17:55:21 +00001444 }
Owen Taylor3473f882001-02-23 17:55:21 +00001445 /*
1446 * finish by checking for a port presence.
1447 */
1448 if (*cur == ':') {
1449 cur++;
1450 if (IS_DIGIT(*cur)) {
1451 if (uri != NULL)
1452 uri->port = 0;
1453 while (IS_DIGIT(*cur)) {
1454 if (uri != NULL)
1455 uri->port = uri->port * 10 + (*cur - '0');
1456 cur++;
1457 }
1458 }
1459 }
1460 *str = cur;
1461 return(0);
1462}
1463
1464/**
1465 * xmlParseURIRelSegment:
1466 * @uri: pointer to an URI structure
1467 * @str: pointer to the string to analyze
1468 *
1469 * Parse an URI relative segment
1470 *
1471 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1472 * "+" | "$" | "," )
1473 *
1474 * Returns 0 or the error code
1475 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001476static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001477xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1478{
Owen Taylor3473f882001-02-23 17:55:21 +00001479 const char *cur;
1480
1481 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001482 return (-1);
1483
Owen Taylor3473f882001-02-23 17:55:21 +00001484 cur = *str;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001485 if (!((IS_SEGMENT(cur)) ||
1486 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001487 return (3);
Owen Taylor3473f882001-02-23 17:55:21 +00001488 }
1489 NEXT(cur);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001490 while ((IS_SEGMENT(cur)) ||
1491 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001492 NEXT(cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001493 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001494 if (uri->path != NULL)
1495 xmlFree(uri->path);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001496 if (uri->cleanup & 2)
1497 uri->path = STRNDUP(*str, cur - *str);
1498 else
1499 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001500 }
1501 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001502 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001503}
1504
1505/**
1506 * xmlParseURIPathSegments:
1507 * @uri: pointer to an URI structure
1508 * @str: pointer to the string to analyze
1509 * @slash: should we add a leading slash
1510 *
1511 * Parse an URI set of path segments
1512 *
1513 * path_segments = segment *( "/" segment )
1514 * segment = *pchar *( ";" param )
1515 * param = *pchar
1516 *
1517 * Returns 0 or the error code
1518 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001519static int
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001520xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1521{
Owen Taylor3473f882001-02-23 17:55:21 +00001522 const char *cur;
1523
1524 if (str == NULL)
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001525 return (-1);
1526
Owen Taylor3473f882001-02-23 17:55:21 +00001527 cur = *str;
1528
1529 do {
Daniel Veillard336a8e12005-08-07 10:46:19 +00001530 while ((IS_PCHAR(cur)) ||
1531 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001532 NEXT(cur);
Daniel Veillard234bc4e2002-05-24 11:03:05 +00001533 while (*cur == ';') {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001534 cur++;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001535 while ((IS_PCHAR(cur)) ||
1536 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001537 NEXT(cur);
1538 }
1539 if (*cur != '/')
1540 break;
1541 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001542 } while (1);
1543 if (uri != NULL) {
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001544 int len, len2 = 0;
1545 char *path;
Owen Taylor3473f882001-02-23 17:55:21 +00001546
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001547 /*
1548 * Concat the set of path segments to the current path
1549 */
1550 len = cur - *str;
1551 if (slash)
1552 len++;
Owen Taylor3473f882001-02-23 17:55:21 +00001553
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001554 if (uri->path != NULL) {
1555 len2 = strlen(uri->path);
1556 len += len2;
1557 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001558 path = (char *) xmlMallocAtomic(len + 1);
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001559 if (path == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001560 xmlGenericError(xmlGenericErrorContext,
1561 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001562 *str = cur;
1563 return (-1);
1564 }
1565 if (uri->path != NULL)
1566 memcpy(path, uri->path, len2);
1567 if (slash) {
1568 path[len2] = '/';
1569 len2++;
1570 }
1571 path[len2] = 0;
Daniel Veillard336a8e12005-08-07 10:46:19 +00001572 if (cur - *str > 0) {
1573 if (uri->cleanup & 2) {
1574 memcpy(&path[len2], *str, cur - *str);
1575 path[len2 + (cur - *str)] = 0;
1576 } else
1577 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1578 }
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001579 if (uri->path != NULL)
1580 xmlFree(uri->path);
1581 uri->path = path;
Owen Taylor3473f882001-02-23 17:55:21 +00001582 }
1583 *str = cur;
Daniel Veillard4def3bd2001-10-30 09:47:47 +00001584 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001585}
1586
1587/**
1588 * xmlParseURIAuthority:
1589 * @uri: pointer to an URI structure
1590 * @str: pointer to the string to analyze
1591 *
1592 * Parse the authority part of an URI.
1593 *
1594 * authority = server | reg_name
1595 * server = [ [ userinfo "@" ] hostport ]
1596 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1597 * "@" | "&" | "=" | "+" )
1598 *
1599 * Note : this is completely ambiguous since reg_name is allowed to
1600 * use the full set of chars in use by server:
1601 *
1602 * 3.2.1. Registry-based Naming Authority
1603 *
1604 * The structure of a registry-based naming authority is specific
1605 * to the URI scheme, but constrained to the allowed characters
1606 * for an authority component.
1607 *
1608 * Returns 0 or the error code
1609 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001610static int
Owen Taylor3473f882001-02-23 17:55:21 +00001611xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1612 const char *cur;
1613 int ret;
1614
1615 if (str == NULL)
1616 return(-1);
1617
1618 cur = *str;
1619
1620 /*
1621 * try first to parse it as a server string.
1622 */
1623 ret = xmlParseURIServer(uri, str);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001624 if ((ret == 0) && (*str != NULL) &&
1625 ((**str == 0) || (**str == '/') || (**str == '?')))
Owen Taylor3473f882001-02-23 17:55:21 +00001626 return(0);
Daniel Veillard42f12e92003-03-07 18:32:59 +00001627 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001628
1629 /*
1630 * failed, fallback to reg_name
1631 */
1632 if (!IS_REG_NAME(cur)) {
1633 return(5);
1634 }
1635 NEXT(cur);
1636 while (IS_REG_NAME(cur)) NEXT(cur);
1637 if (uri != NULL) {
1638 if (uri->server != NULL) xmlFree(uri->server);
1639 uri->server = NULL;
1640 if (uri->user != NULL) xmlFree(uri->user);
1641 uri->user = NULL;
1642 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard336a8e12005-08-07 10:46:19 +00001643 if (uri->cleanup & 2)
1644 uri->authority = STRNDUP(*str, cur - *str);
1645 else
1646 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001647 }
1648 *str = cur;
1649 return(0);
1650}
1651
1652/**
1653 * xmlParseURIHierPart:
1654 * @uri: pointer to an URI structure
1655 * @str: pointer to the string to analyze
1656 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001657 * Parse an URI hierarchical part
Owen Taylor3473f882001-02-23 17:55:21 +00001658 *
1659 * hier_part = ( net_path | abs_path ) [ "?" query ]
1660 * abs_path = "/" path_segments
1661 * net_path = "//" authority [ abs_path ]
1662 *
1663 * Returns 0 or the error code
1664 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001665static int
Owen Taylor3473f882001-02-23 17:55:21 +00001666xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1667 int ret;
1668 const char *cur;
1669
1670 if (str == NULL)
1671 return(-1);
1672
1673 cur = *str;
1674
1675 if ((cur[0] == '/') && (cur[1] == '/')) {
1676 cur += 2;
1677 ret = xmlParseURIAuthority(uri, &cur);
1678 if (ret != 0)
1679 return(ret);
1680 if (cur[0] == '/') {
1681 cur++;
1682 ret = xmlParseURIPathSegments(uri, &cur, 1);
1683 }
1684 } else if (cur[0] == '/') {
1685 cur++;
1686 ret = xmlParseURIPathSegments(uri, &cur, 1);
1687 } else {
1688 return(4);
1689 }
1690 if (ret != 0)
1691 return(ret);
1692 if (*cur == '?') {
1693 cur++;
1694 ret = xmlParseURIQuery(uri, &cur);
1695 if (ret != 0)
1696 return(ret);
1697 }
1698 *str = cur;
1699 return(0);
1700}
1701
1702/**
1703 * xmlParseAbsoluteURI:
1704 * @uri: pointer to an URI structure
1705 * @str: pointer to the string to analyze
1706 *
1707 * Parse an URI reference string and fills in the appropriate fields
1708 * of the @uri structure
1709 *
1710 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1711 *
1712 * Returns 0 or the error code
1713 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001714static int
Owen Taylor3473f882001-02-23 17:55:21 +00001715xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1716 int ret;
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001717 const char *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001718
1719 if (str == NULL)
1720 return(-1);
1721
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001722 cur = *str;
1723
Owen Taylor3473f882001-02-23 17:55:21 +00001724 ret = xmlParseURIScheme(uri, str);
1725 if (ret != 0) return(ret);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001726 if (**str != ':') {
1727 *str = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001728 return(1);
Daniel Veillard20ee8c02001-10-05 09:18:14 +00001729 }
Owen Taylor3473f882001-02-23 17:55:21 +00001730 (*str)++;
1731 if (**str == '/')
1732 return(xmlParseURIHierPart(uri, str));
1733 return(xmlParseURIOpaquePart(uri, str));
1734}
1735
1736/**
1737 * xmlParseRelativeURI:
1738 * @uri: pointer to an URI structure
1739 * @str: pointer to the string to analyze
1740 *
1741 * Parse an relative URI string and fills in the appropriate fields
1742 * of the @uri structure
1743 *
1744 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1745 * abs_path = "/" path_segments
1746 * net_path = "//" authority [ abs_path ]
1747 * rel_path = rel_segment [ abs_path ]
1748 *
1749 * Returns 0 or the error code
1750 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001751static int
Owen Taylor3473f882001-02-23 17:55:21 +00001752xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1753 int ret = 0;
1754 const char *cur;
1755
1756 if (str == NULL)
1757 return(-1);
1758
1759 cur = *str;
1760 if ((cur[0] == '/') && (cur[1] == '/')) {
1761 cur += 2;
1762 ret = xmlParseURIAuthority(uri, &cur);
1763 if (ret != 0)
1764 return(ret);
1765 if (cur[0] == '/') {
1766 cur++;
1767 ret = xmlParseURIPathSegments(uri, &cur, 1);
1768 }
1769 } else if (cur[0] == '/') {
1770 cur++;
1771 ret = xmlParseURIPathSegments(uri, &cur, 1);
1772 } else if (cur[0] != '#' && cur[0] != '?') {
1773 ret = xmlParseURIRelSegment(uri, &cur);
1774 if (ret != 0)
1775 return(ret);
1776 if (cur[0] == '/') {
1777 cur++;
1778 ret = xmlParseURIPathSegments(uri, &cur, 1);
1779 }
1780 }
1781 if (ret != 0)
1782 return(ret);
1783 if (*cur == '?') {
1784 cur++;
1785 ret = xmlParseURIQuery(uri, &cur);
1786 if (ret != 0)
1787 return(ret);
1788 }
1789 *str = cur;
1790 return(ret);
1791}
1792
1793/**
1794 * xmlParseURIReference:
1795 * @uri: pointer to an URI structure
1796 * @str: the string to analyze
1797 *
1798 * Parse an URI reference string and fills in the appropriate fields
1799 * of the @uri structure
1800 *
1801 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1802 *
1803 * Returns 0 or the error code
1804 */
1805int
1806xmlParseURIReference(xmlURIPtr uri, const char *str) {
1807 int ret;
1808 const char *tmp = str;
1809
1810 if (str == NULL)
1811 return(-1);
1812 xmlCleanURI(uri);
1813
1814 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001815 * Try first to parse absolute refs, then fallback to relative if
Owen Taylor3473f882001-02-23 17:55:21 +00001816 * it fails.
1817 */
1818 ret = xmlParseAbsoluteURI(uri, &str);
1819 if (ret != 0) {
1820 xmlCleanURI(uri);
1821 str = tmp;
1822 ret = xmlParseRelativeURI(uri, &str);
1823 }
1824 if (ret != 0) {
1825 xmlCleanURI(uri);
1826 return(ret);
1827 }
1828
1829 if (*str == '#') {
1830 str++;
1831 ret = xmlParseURIFragment(uri, &str);
1832 if (ret != 0) return(ret);
1833 }
1834 if (*str != 0) {
1835 xmlCleanURI(uri);
1836 return(1);
1837 }
1838 return(0);
1839}
1840
1841/**
1842 * xmlParseURI:
1843 * @str: the URI string to analyze
1844 *
1845 * Parse an URI
1846 *
1847 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1848 *
William M. Brackf3cf1a12005-01-06 02:25:59 +00001849 * Returns a newly built xmlURIPtr or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00001850 */
1851xmlURIPtr
1852xmlParseURI(const char *str) {
1853 xmlURIPtr uri;
1854 int ret;
1855
1856 if (str == NULL)
1857 return(NULL);
1858 uri = xmlCreateURI();
1859 if (uri != NULL) {
1860 ret = xmlParseURIReference(uri, str);
1861 if (ret) {
1862 xmlFreeURI(uri);
1863 return(NULL);
1864 }
1865 }
1866 return(uri);
1867}
1868
Daniel Veillard336a8e12005-08-07 10:46:19 +00001869/**
1870 * xmlParseURIRaw:
1871 * @str: the URI string to analyze
1872 * @raw: if 1 unescaping of URI pieces are disabled
1873 *
1874 * Parse an URI but allows to keep intact the original fragments.
1875 *
1876 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1877 *
1878 * Returns a newly built xmlURIPtr or NULL in case of error
1879 */
1880xmlURIPtr
1881xmlParseURIRaw(const char *str, int raw) {
1882 xmlURIPtr uri;
1883 int ret;
1884
1885 if (str == NULL)
1886 return(NULL);
1887 uri = xmlCreateURI();
1888 if (uri != NULL) {
1889 if (raw) {
1890 uri->cleanup |= 2;
1891 }
1892 ret = xmlParseURIReference(uri, str);
1893 if (ret) {
1894 xmlFreeURI(uri);
1895 return(NULL);
1896 }
1897 }
1898 return(uri);
1899}
1900
Owen Taylor3473f882001-02-23 17:55:21 +00001901/************************************************************************
1902 * *
1903 * Public functions *
1904 * *
1905 ************************************************************************/
1906
1907/**
1908 * xmlBuildURI:
1909 * @URI: the URI instance found in the document
1910 * @base: the base value
1911 *
1912 * Computes he final URI of the reference done by checking that
1913 * the given URI is valid, and building the final URI using the
1914 * base URI. This is processed according to section 5.2 of the
1915 * RFC 2396
1916 *
1917 * 5.2. Resolving Relative References to Absolute Form
1918 *
1919 * Returns a new URI string (to be freed by the caller) or NULL in case
1920 * of error.
1921 */
1922xmlChar *
1923xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1924 xmlChar *val = NULL;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001925 int ret, len, indx, cur, out;
Owen Taylor3473f882001-02-23 17:55:21 +00001926 xmlURIPtr ref = NULL;
1927 xmlURIPtr bas = NULL;
1928 xmlURIPtr res = NULL;
1929
1930 /*
1931 * 1) The URI reference is parsed into the potential four components and
1932 * fragment identifier, as described in Section 4.3.
1933 *
1934 * NOTE that a completely empty URI is treated by modern browsers
1935 * as a reference to "." rather than as a synonym for the current
1936 * URI. Should we do that here?
1937 */
1938 if (URI == NULL)
1939 ret = -1;
1940 else {
1941 if (*URI) {
1942 ref = xmlCreateURI();
1943 if (ref == NULL)
1944 goto done;
1945 ret = xmlParseURIReference(ref, (const char *) URI);
1946 }
1947 else
1948 ret = 0;
1949 }
1950 if (ret != 0)
1951 goto done;
Daniel Veillard7b4b2f92003-01-06 13:11:20 +00001952 if ((ref != NULL) && (ref->scheme != NULL)) {
1953 /*
1954 * The URI is absolute don't modify.
1955 */
1956 val = xmlStrdup(URI);
1957 goto done;
1958 }
Owen Taylor3473f882001-02-23 17:55:21 +00001959 if (base == NULL)
1960 ret = -1;
1961 else {
1962 bas = xmlCreateURI();
1963 if (bas == NULL)
1964 goto done;
1965 ret = xmlParseURIReference(bas, (const char *) base);
1966 }
1967 if (ret != 0) {
1968 if (ref)
1969 val = xmlSaveUri(ref);
1970 goto done;
1971 }
1972 if (ref == NULL) {
1973 /*
1974 * the base fragment must be ignored
1975 */
1976 if (bas->fragment != NULL) {
1977 xmlFree(bas->fragment);
1978 bas->fragment = NULL;
1979 }
1980 val = xmlSaveUri(bas);
1981 goto done;
1982 }
1983
1984 /*
1985 * 2) If the path component is empty and the scheme, authority, and
1986 * query components are undefined, then it is a reference to the
1987 * current document and we are done. Otherwise, the reference URI's
1988 * query and fragment components are defined as found (or not found)
1989 * within the URI reference and not inherited from the base URI.
1990 *
1991 * NOTE that in modern browsers, the parsing differs from the above
1992 * in the following aspect: the query component is allowed to be
1993 * defined while still treating this as a reference to the current
1994 * document.
1995 */
1996 res = xmlCreateURI();
1997 if (res == NULL)
1998 goto done;
1999 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2000 ((ref->authority == NULL) && (ref->server == NULL))) {
2001 if (bas->scheme != NULL)
2002 res->scheme = xmlMemStrdup(bas->scheme);
2003 if (bas->authority != NULL)
2004 res->authority = xmlMemStrdup(bas->authority);
2005 else if (bas->server != NULL) {
2006 res->server = xmlMemStrdup(bas->server);
2007 if (bas->user != NULL)
2008 res->user = xmlMemStrdup(bas->user);
2009 res->port = bas->port;
2010 }
2011 if (bas->path != NULL)
2012 res->path = xmlMemStrdup(bas->path);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002013 if (ref->query_raw != NULL)
2014 res->query_raw = xmlMemStrdup (ref->query_raw);
2015 else if (ref->query != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00002016 res->query = xmlMemStrdup(ref->query);
Daniel Veillarda1413b82007-04-26 08:33:28 +00002017 else if (bas->query_raw != NULL)
2018 res->query_raw = xmlMemStrdup(bas->query_raw);
Owen Taylor3473f882001-02-23 17:55:21 +00002019 else if (bas->query != NULL)
2020 res->query = xmlMemStrdup(bas->query);
2021 if (ref->fragment != NULL)
2022 res->fragment = xmlMemStrdup(ref->fragment);
2023 goto step_7;
2024 }
Owen Taylor3473f882001-02-23 17:55:21 +00002025
2026 /*
2027 * 3) If the scheme component is defined, indicating that the reference
2028 * starts with a scheme name, then the reference is interpreted as an
2029 * absolute URI and we are done. Otherwise, the reference URI's
2030 * scheme is inherited from the base URI's scheme component.
2031 */
2032 if (ref->scheme != NULL) {
2033 val = xmlSaveUri(ref);
2034 goto done;
2035 }
2036 if (bas->scheme != NULL)
2037 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillard9231ff92003-03-23 22:00:51 +00002038
Daniel Veillarda1413b82007-04-26 08:33:28 +00002039 if (ref->query_raw != NULL)
2040 res->query_raw = xmlMemStrdup(ref->query_raw);
2041 else if (ref->query != NULL)
Daniel Veillard9231ff92003-03-23 22:00:51 +00002042 res->query = xmlMemStrdup(ref->query);
2043 if (ref->fragment != NULL)
2044 res->fragment = xmlMemStrdup(ref->fragment);
Owen Taylor3473f882001-02-23 17:55:21 +00002045
2046 /*
2047 * 4) If the authority component is defined, then the reference is a
2048 * network-path and we skip to step 7. Otherwise, the reference
2049 * URI's authority is inherited from the base URI's authority
2050 * component, which will also be undefined if the URI scheme does not
2051 * use an authority component.
2052 */
2053 if ((ref->authority != NULL) || (ref->server != NULL)) {
2054 if (ref->authority != NULL)
2055 res->authority = xmlMemStrdup(ref->authority);
2056 else {
2057 res->server = xmlMemStrdup(ref->server);
2058 if (ref->user != NULL)
2059 res->user = xmlMemStrdup(ref->user);
2060 res->port = ref->port;
2061 }
2062 if (ref->path != NULL)
2063 res->path = xmlMemStrdup(ref->path);
2064 goto step_7;
2065 }
2066 if (bas->authority != NULL)
2067 res->authority = xmlMemStrdup(bas->authority);
2068 else if (bas->server != NULL) {
2069 res->server = xmlMemStrdup(bas->server);
2070 if (bas->user != NULL)
2071 res->user = xmlMemStrdup(bas->user);
2072 res->port = bas->port;
2073 }
2074
2075 /*
2076 * 5) If the path component begins with a slash character ("/"), then
2077 * the reference is an absolute-path and we skip to step 7.
2078 */
2079 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2080 res->path = xmlMemStrdup(ref->path);
2081 goto step_7;
2082 }
2083
2084
2085 /*
2086 * 6) If this step is reached, then we are resolving a relative-path
2087 * reference. The relative path needs to be merged with the base
2088 * URI's path. Although there are many ways to do this, we will
2089 * describe a simple method using a separate string buffer.
2090 *
2091 * Allocate a buffer large enough for the result string.
2092 */
2093 len = 2; /* extra / and 0 */
2094 if (ref->path != NULL)
2095 len += strlen(ref->path);
2096 if (bas->path != NULL)
2097 len += strlen(bas->path);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002098 res->path = (char *) xmlMallocAtomic(len);
Owen Taylor3473f882001-02-23 17:55:21 +00002099 if (res->path == NULL) {
2100 xmlGenericError(xmlGenericErrorContext,
2101 "xmlBuildURI: out of memory\n");
2102 goto done;
2103 }
2104 res->path[0] = 0;
2105
2106 /*
2107 * a) All but the last segment of the base URI's path component is
2108 * copied to the buffer. In other words, any characters after the
2109 * last (right-most) slash character, if any, are excluded.
2110 */
2111 cur = 0;
2112 out = 0;
2113 if (bas->path != NULL) {
2114 while (bas->path[cur] != 0) {
2115 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2116 cur++;
2117 if (bas->path[cur] == 0)
2118 break;
2119
2120 cur++;
2121 while (out < cur) {
2122 res->path[out] = bas->path[out];
2123 out++;
2124 }
2125 }
2126 }
2127 res->path[out] = 0;
2128
2129 /*
2130 * b) The reference's path component is appended to the buffer
2131 * string.
2132 */
2133 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002134 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002135 /*
2136 * Ensure the path includes a '/'
2137 */
2138 if ((out == 0) && (bas->server != NULL))
2139 res->path[out++] = '/';
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002140 while (ref->path[indx] != 0) {
2141 res->path[out++] = ref->path[indx++];
Owen Taylor3473f882001-02-23 17:55:21 +00002142 }
2143 }
2144 res->path[out] = 0;
2145
2146 /*
2147 * Steps c) to h) are really path normalization steps
2148 */
2149 xmlNormalizeURIPath(res->path);
2150
2151step_7:
2152
2153 /*
2154 * 7) The resulting URI components, including any inherited from the
2155 * base URI, are recombined to give the absolute form of the URI
2156 * reference.
2157 */
2158 val = xmlSaveUri(res);
2159
2160done:
2161 if (ref != NULL)
2162 xmlFreeURI(ref);
2163 if (bas != NULL)
2164 xmlFreeURI(bas);
2165 if (res != NULL)
2166 xmlFreeURI(res);
2167 return(val);
2168}
2169
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002170/**
William M. Brackf7789b12004-06-07 08:57:27 +00002171 * xmlBuildRelativeURI:
2172 * @URI: the URI reference under consideration
2173 * @base: the base value
2174 *
2175 * Expresses the URI of the reference in terms relative to the
2176 * base. Some examples of this operation include:
2177 * base = "http://site1.com/docs/book1.html"
2178 * URI input URI returned
2179 * docs/pic1.gif pic1.gif
2180 * docs/img/pic1.gif img/pic1.gif
2181 * img/pic1.gif ../img/pic1.gif
2182 * http://site1.com/docs/pic1.gif pic1.gif
2183 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2184 *
2185 * base = "docs/book1.html"
2186 * URI input URI returned
2187 * docs/pic1.gif pic1.gif
2188 * docs/img/pic1.gif img/pic1.gif
2189 * img/pic1.gif ../img/pic1.gif
2190 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2191 *
2192 *
2193 * Note: if the URI reference is really wierd or complicated, it may be
2194 * worthwhile to first convert it into a "nice" one by calling
2195 * xmlBuildURI (using 'base') before calling this routine,
2196 * since this routine (for reasonable efficiency) assumes URI has
2197 * already been through some validation.
2198 *
2199 * Returns a new URI string (to be freed by the caller) or NULL in case
2200 * error.
2201 */
2202xmlChar *
2203xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2204{
2205 xmlChar *val = NULL;
2206 int ret;
2207 int ix;
2208 int pos = 0;
2209 int nbslash = 0;
William M. Brack820d5ed2005-09-14 05:24:27 +00002210 int len;
William M. Brackf7789b12004-06-07 08:57:27 +00002211 xmlURIPtr ref = NULL;
2212 xmlURIPtr bas = NULL;
2213 xmlChar *bptr, *uptr, *vptr;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002214 int remove_path = 0;
William M. Brackf7789b12004-06-07 08:57:27 +00002215
2216 if ((URI == NULL) || (*URI == 0))
2217 return NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002218
2219 /*
2220 * First parse URI into a standard form
2221 */
2222 ref = xmlCreateURI ();
2223 if (ref == NULL)
2224 return NULL;
William M. Brack38c4b332005-07-25 18:39:34 +00002225 /* If URI not already in "relative" form */
2226 if (URI[0] != '.') {
2227 ret = xmlParseURIReference (ref, (const char *) URI);
2228 if (ret != 0)
2229 goto done; /* Error in URI, return NULL */
2230 } else
2231 ref->path = (char *)xmlStrdup(URI);
William M. Brackf7789b12004-06-07 08:57:27 +00002232
2233 /*
2234 * Next parse base into the same standard form
2235 */
2236 if ((base == NULL) || (*base == 0)) {
2237 val = xmlStrdup (URI);
2238 goto done;
2239 }
2240 bas = xmlCreateURI ();
2241 if (bas == NULL)
2242 goto done;
William M. Brack38c4b332005-07-25 18:39:34 +00002243 if (base[0] != '.') {
2244 ret = xmlParseURIReference (bas, (const char *) base);
2245 if (ret != 0)
2246 goto done; /* Error in base, return NULL */
2247 } else
2248 bas->path = (char *)xmlStrdup(base);
William M. Brackf7789b12004-06-07 08:57:27 +00002249
2250 /*
2251 * If the scheme / server on the URI differs from the base,
2252 * just return the URI
2253 */
2254 if ((ref->scheme != NULL) &&
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002255 ((bas->scheme == NULL) ||
2256 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2257 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
William M. Brackf7789b12004-06-07 08:57:27 +00002258 val = xmlStrdup (URI);
2259 goto done;
2260 }
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002261 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2262 val = xmlStrdup(BAD_CAST "");
2263 goto done;
2264 }
2265 if (bas->path == NULL) {
2266 val = xmlStrdup((xmlChar *)ref->path);
2267 goto done;
2268 }
2269 if (ref->path == NULL) {
2270 ref->path = (char *) "/";
2271 remove_path = 1;
2272 }
William M. Brackf7789b12004-06-07 08:57:27 +00002273
2274 /*
2275 * At this point (at last!) we can compare the two paths
2276 *
William M. Brack820d5ed2005-09-14 05:24:27 +00002277 * First we take care of the special case where either of the
2278 * two path components may be missing (bug 316224)
William M. Brackf7789b12004-06-07 08:57:27 +00002279 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002280 if (bas->path == NULL) {
2281 if (ref->path != NULL) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002282 uptr = (xmlChar *) ref->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002283 if (*uptr == '/')
2284 uptr++;
William M. Brack50420192007-07-20 01:09:08 +00002285 /* exception characters from xmlSaveUri */
2286 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brack820d5ed2005-09-14 05:24:27 +00002287 }
2288 goto done;
2289 }
William M. Brackf7789b12004-06-07 08:57:27 +00002290 bptr = (xmlChar *)bas->path;
William M. Brack820d5ed2005-09-14 05:24:27 +00002291 if (ref->path == NULL) {
2292 for (ix = 0; bptr[ix] != 0; ix++) {
William M. Brackf7789b12004-06-07 08:57:27 +00002293 if (bptr[ix] == '/')
2294 nbslash++;
2295 }
William M. Brack820d5ed2005-09-14 05:24:27 +00002296 uptr = NULL;
2297 len = 1; /* this is for a string terminator only */
2298 } else {
2299 /*
2300 * Next we compare the two strings and find where they first differ
2301 */
2302 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2303 pos += 2;
2304 if ((*bptr == '.') && (bptr[1] == '/'))
2305 bptr += 2;
2306 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2307 bptr++;
2308 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2309 pos++;
William M. Brackf7789b12004-06-07 08:57:27 +00002310
William M. Brack820d5ed2005-09-14 05:24:27 +00002311 if (bptr[pos] == ref->path[pos]) {
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002312 val = xmlStrdup(BAD_CAST "");
William M. Brack820d5ed2005-09-14 05:24:27 +00002313 goto done; /* (I can't imagine why anyone would do this) */
2314 }
2315
2316 /*
2317 * In URI, "back up" to the last '/' encountered. This will be the
2318 * beginning of the "unique" suffix of URI
2319 */
2320 ix = pos;
2321 if ((ref->path[ix] == '/') && (ix > 0))
2322 ix--;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002323 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2324 ix -= 2;
William M. Brack820d5ed2005-09-14 05:24:27 +00002325 for (; ix > 0; ix--) {
2326 if (ref->path[ix] == '/')
2327 break;
2328 }
2329 if (ix == 0) {
2330 uptr = (xmlChar *)ref->path;
2331 } else {
2332 ix++;
2333 uptr = (xmlChar *)&ref->path[ix];
2334 }
2335
2336 /*
2337 * In base, count the number of '/' from the differing point
2338 */
2339 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2340 for (; bptr[ix] != 0; ix++) {
2341 if (bptr[ix] == '/')
2342 nbslash++;
2343 }
2344 }
2345 len = xmlStrlen (uptr) + 1;
2346 }
2347
William M. Brackf7789b12004-06-07 08:57:27 +00002348 if (nbslash == 0) {
William M. Brack820d5ed2005-09-14 05:24:27 +00002349 if (uptr != NULL)
William M. Brack50420192007-07-20 01:09:08 +00002350 /* exception characters from xmlSaveUri */
2351 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
William M. Brackf7789b12004-06-07 08:57:27 +00002352 goto done;
2353 }
William M. Brackf7789b12004-06-07 08:57:27 +00002354
2355 /*
2356 * Allocate just enough space for the returned string -
2357 * length of the remainder of the URI, plus enough space
2358 * for the "../" groups, plus one for the terminator
2359 */
William M. Brack820d5ed2005-09-14 05:24:27 +00002360 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
William M. Brackf7789b12004-06-07 08:57:27 +00002361 if (val == NULL) {
William M. Brack42331a92004-07-29 07:07:16 +00002362 xmlGenericError(xmlGenericErrorContext,
2363 "xmlBuildRelativeURI: out of memory\n");
William M. Brackf7789b12004-06-07 08:57:27 +00002364 goto done;
2365 }
2366 vptr = val;
2367 /*
2368 * Put in as many "../" as needed
2369 */
2370 for (; nbslash>0; nbslash--) {
2371 *vptr++ = '.';
2372 *vptr++ = '.';
2373 *vptr++ = '/';
2374 }
2375 /*
2376 * Finish up with the end of the URI
2377 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002378 if (uptr != NULL) {
2379 if ((vptr > val) && (len > 0) &&
2380 (uptr[0] == '/') && (vptr[-1] == '/')) {
2381 memcpy (vptr, uptr + 1, len - 1);
2382 vptr[len - 2] = 0;
2383 } else {
2384 memcpy (vptr, uptr, len);
2385 vptr[len - 1] = 0;
2386 }
2387 } else {
William M. Brack820d5ed2005-09-14 05:24:27 +00002388 vptr[len - 1] = 0;
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002389 }
William M. Brackf7789b12004-06-07 08:57:27 +00002390
William M. Brack50420192007-07-20 01:09:08 +00002391 /* escape the freshly-built path */
2392 vptr = val;
2393 /* exception characters from xmlSaveUri */
2394 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2395 xmlFree(vptr);
2396
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002397done:
William M. Brackf7789b12004-06-07 08:57:27 +00002398 /*
2399 * Free the working variables
2400 */
Daniel Veillard0f7b3312005-09-15 14:15:20 +00002401 if (remove_path != 0)
2402 ref->path = NULL;
William M. Brackf7789b12004-06-07 08:57:27 +00002403 if (ref != NULL)
2404 xmlFreeURI (ref);
2405 if (bas != NULL)
2406 xmlFreeURI (bas);
2407
2408 return val;
2409}
2410
2411/**
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002412 * xmlCanonicPath:
2413 * @path: the resource locator in a filesystem notation
2414 *
2415 * Constructs a canonic path from the specified path.
2416 *
2417 * Returns a new canonic path, or a duplicate of the path parameter if the
2418 * construction fails. The caller is responsible for freeing the memory occupied
2419 * by the returned string. If there is insufficient memory available, or the
2420 * argument is NULL, the function returns NULL.
2421 */
2422#define IS_WINDOWS_PATH(p) \
2423 ((p != NULL) && \
2424 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2425 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2426 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002427xmlChar *
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002428xmlCanonicPath(const xmlChar *path)
2429{
William M. Brack22242272007-01-27 07:59:37 +00002430/*
2431 * For Windows implementations, additional work needs to be done to
2432 * replace backslashes in pathnames with "forward slashes"
2433 */
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002434#if defined(_WIN32) && !defined(__CYGWIN__)
Igor Zlatkovicce076162003-02-23 13:39:39 +00002435 int len = 0;
2436 int i = 0;
Igor Zlatkovicce076162003-02-23 13:39:39 +00002437 xmlChar *p = NULL;
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002438#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002439 xmlURIPtr uri;
Daniel Veillard336a8e12005-08-07 10:46:19 +00002440 xmlChar *ret;
2441 const xmlChar *absuri;
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002442
2443 if (path == NULL)
2444 return(NULL);
Daniel Veillard69f8a132008-02-05 08:37:56 +00002445
2446 /* sanitize filename starting with // so it can be used as URI */
2447 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2448 path++;
2449
Daniel Veillardc64b8e92003-02-24 11:47:13 +00002450 if ((uri = xmlParseURI((const char *) path)) != NULL) {
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002451 xmlFreeURI(uri);
2452 return xmlStrdup(path);
2453 }
2454
William M. Brack22242272007-01-27 07:59:37 +00002455 /* Check if this is an "absolute uri" */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002456 absuri = xmlStrstr(path, BAD_CAST "://");
2457 if (absuri != NULL) {
2458 int l, j;
2459 unsigned char c;
2460 xmlChar *escURI;
2461
2462 /*
2463 * this looks like an URI where some parts have not been
William M. Brack22242272007-01-27 07:59:37 +00002464 * escaped leading to a parsing problem. Check that the first
Daniel Veillard336a8e12005-08-07 10:46:19 +00002465 * part matches a protocol.
2466 */
2467 l = absuri - path;
William M. Brack22242272007-01-27 07:59:37 +00002468 /* Bypass if first part (part before the '://') is > 20 chars */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002469 if ((l <= 0) || (l > 20))
2470 goto path_processing;
William M. Brack22242272007-01-27 07:59:37 +00002471 /* Bypass if any non-alpha characters are present in first part */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002472 for (j = 0;j < l;j++) {
2473 c = path[j];
2474 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2475 goto path_processing;
2476 }
2477
William M. Brack22242272007-01-27 07:59:37 +00002478 /* Escape all except the characters specified in the supplied path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002479 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2480 if (escURI != NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002481 /* Try parsing the escaped path */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002482 uri = xmlParseURI((const char *) escURI);
William M. Brack22242272007-01-27 07:59:37 +00002483 /* If successful, return the escaped string */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002484 if (uri != NULL) {
2485 xmlFreeURI(uri);
2486 return escURI;
2487 }
Daniel Veillard336a8e12005-08-07 10:46:19 +00002488 }
2489 }
2490
2491path_processing:
William M. Brack22242272007-01-27 07:59:37 +00002492/* For Windows implementations, replace backslashes with 'forward slashes' */
Daniel Veillard336a8e12005-08-07 10:46:19 +00002493#if defined(_WIN32) && !defined(__CYGWIN__)
2494 /*
William M. Brack22242272007-01-27 07:59:37 +00002495 * Create a URI structure
Daniel Veillard336a8e12005-08-07 10:46:19 +00002496 */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002497 uri = xmlCreateURI();
William M. Brack22242272007-01-27 07:59:37 +00002498 if (uri == NULL) { /* Guard against 'out of memory' */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002499 return(NULL);
2500 }
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002501
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002502 len = xmlStrlen(path);
2503 if ((len > 2) && IS_WINDOWS_PATH(path)) {
William M. Brack22242272007-01-27 07:59:37 +00002504 /* make the scheme 'file' */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002505 uri->scheme = xmlStrdup(BAD_CAST "file");
William M. Brack22242272007-01-27 07:59:37 +00002506 /* allocate space for leading '/' + path + string terminator */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002507 uri->path = xmlMallocAtomic(len + 2);
2508 if (uri->path == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002509 xmlFreeURI(uri); /* Guard agains 'out of memory' */
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002510 return(NULL);
2511 }
William M. Brack22242272007-01-27 07:59:37 +00002512 /* Put in leading '/' plus path */
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002513 uri->path[0] = '/';
Igor Zlatkovicce076162003-02-23 13:39:39 +00002514 p = uri->path + 1;
2515 strncpy(p, path, len + 1);
2516 } else {
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002517 uri->path = xmlStrdup(path);
2518 if (uri->path == NULL) {
2519 xmlFreeURI(uri);
2520 return(NULL);
2521 }
Igor Zlatkovicce076162003-02-23 13:39:39 +00002522 p = uri->path;
2523 }
William M. Brack22242272007-01-27 07:59:37 +00002524 /* Now change all occurences of '\' to '/' */
Igor Zlatkovicce076162003-02-23 13:39:39 +00002525 while (*p != '\0') {
2526 if (*p == '\\')
2527 *p = '/';
2528 p++;
2529 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002530
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002531 if (uri->scheme == NULL) {
William M. Brack22242272007-01-27 07:59:37 +00002532 ret = xmlStrdup((const xmlChar *) uri->path);
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002533 } else {
2534 ret = xmlSaveUri(uri);
2535 }
Daniel Veillard8f3392e2006-02-03 09:45:10 +00002536
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002537 xmlFreeURI(uri);
Daniel Veillard336a8e12005-08-07 10:46:19 +00002538#else
2539 ret = xmlStrdup((const xmlChar *) path);
2540#endif
Igor Zlatkovicf2238e62003-02-19 14:50:35 +00002541 return(ret);
2542}
Owen Taylor3473f882001-02-23 17:55:21 +00002543
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002544/**
2545 * xmlPathToURI:
2546 * @path: the resource locator in a filesystem notation
2547 *
2548 * Constructs an URI expressing the existing path
2549 *
2550 * Returns a new URI, or a duplicate of the path parameter if the
2551 * construction fails. The caller is responsible for freeing the memory
2552 * occupied by the returned string. If there is insufficient memory available,
2553 * or the argument is NULL, the function returns NULL.
2554 */
2555xmlChar *
2556xmlPathToURI(const xmlChar *path)
2557{
2558 xmlURIPtr uri;
2559 xmlURI temp;
2560 xmlChar *ret, *cal;
2561
2562 if (path == NULL)
2563 return(NULL);
2564
2565 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2566 xmlFreeURI(uri);
2567 return xmlStrdup(path);
2568 }
2569 cal = xmlCanonicPath(path);
2570 if (cal == NULL)
2571 return(NULL);
Daniel Veillard481dcfc2006-11-06 08:54:18 +00002572#if defined(_WIN32) && !defined(__CYGWIN__)
2573 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2574 If 'cal' is a valid URI allready then we are done here, as continuing would make
2575 it invalid. */
2576 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2577 xmlFreeURI(uri);
2578 return cal;
2579 }
2580 /* 'cal' can contain a relative path with backslashes. If that is processed
2581 by xmlSaveURI, they will be escaped and the external entity loader machinery
2582 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2583 ret = cal;
2584 while (*ret != '\0') {
2585 if (*ret == '\\')
2586 *ret = '/';
2587 ret++;
2588 }
2589#endif
Daniel Veillardb8efdda2006-10-10 12:37:14 +00002590 memset(&temp, 0, sizeof(temp));
2591 temp.path = (char *) cal;
2592 ret = xmlSaveUri(&temp);
2593 xmlFree(cal);
2594 return(ret);
2595}
Daniel Veillard5d4644e2005-04-01 13:11:58 +00002596#define bottom_uri
2597#include "elfgcchack.h"