blob: cb1c215c20658c8ad548a43e2ec21edecf9904c9 [file] [log] [blame]
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
8 * Daniel.Veillard@w3.org
9 */
10
11#ifdef WIN32
12#define INCLUDE_WINSOCK
13#include "win32config.h"
14#else
15#include "config.h"
16#endif
17
18#include <stdio.h>
19#include <string.h>
20
Daniel Veillard361d8452000-04-03 19:48:13 +000021#include <libxml/xmlmemory.h>
22#include <libxml/uri.h>
Daniel Veillard3dd82e72000-03-20 11:48:04 +000023
Daniel Veillard06047432000-04-24 11:33:38 +000024/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000025 * alpha = lowalpha | upalpha
26 */
27#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
Daniel Veillard06047432000-04-24 11:33:38 +000030/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000031 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 * "u" | "v" | "w" | "x" | "y" | "z"
34 */
35
36#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
Daniel Veillard06047432000-04-24 11:33:38 +000038/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000039 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 * "U" | "V" | "W" | "X" | "Y" | "Z"
42 */
43#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
Daniel Veillard06047432000-04-24 11:33:38 +000045/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000046 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
47 */
48
49#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
50
Daniel Veillard06047432000-04-24 11:33:38 +000051/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000052 * alphanum = alpha | digit
53 */
54
55#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
56
Daniel Veillard06047432000-04-24 11:33:38 +000057/*
58 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
Daniel Veillard3dd82e72000-03-20 11:48:04 +000059 * "a" | "b" | "c" | "d" | "e" | "f"
60 */
61
62#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
63 (((x) >= 'A') && ((x) <= 'F')))
64
Daniel Veillard06047432000-04-24 11:33:38 +000065/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000066 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
67 */
68
69#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
70 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
71 ((x) == '(') || ((x) == ')'))
72
73
Daniel Veillard06047432000-04-24 11:33:38 +000074/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000075 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
76 */
77
78#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
79 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
80 ((x) == '+') || ((x) == '$') || ((x) == ','))
81
Daniel Veillard06047432000-04-24 11:33:38 +000082/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000083 * unreserved = alphanum | mark
84 */
85
86#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
87
Daniel Veillard06047432000-04-24 11:33:38 +000088/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000089 * escaped = "%" hex hex
90 */
91
92#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
93 (IS_HEX((p)[2])))
94
Daniel Veillard06047432000-04-24 11:33:38 +000095/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000096 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
97 * "&" | "=" | "+" | "$" | ","
98 */
99#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
100 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
101 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
102 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
103
Daniel Veillard06047432000-04-24 11:33:38 +0000104/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000105 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
106 */
107#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
108 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
109 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
110 ((*(p) == ',')))
111
Daniel Veillard06047432000-04-24 11:33:38 +0000112/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000113 * rel_segment = 1*( unreserved | escaped |
114 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
115 */
116
117#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
118 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
119 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
120 ((*(p) == ',')))
121
Daniel Veillard06047432000-04-24 11:33:38 +0000122/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000123 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
124 */
125
126#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
127 ((x) == '+') || ((x) == '-') || ((x) == '.'))
128
Daniel Veillard06047432000-04-24 11:33:38 +0000129/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000130 * reg_name = 1*( unreserved | escaped | "$" | "," |
131 * ";" | ":" | "@" | "&" | "=" | "+" )
132 */
133
134#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
135 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
136 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
137 ((*(p) == '=')) || ((*(p) == '+')))
138
Daniel Veillard06047432000-04-24 11:33:38 +0000139/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000140 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
141 * "+" | "$" | "," )
142 */
143#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
144 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
146 ((*(p) == ',')))
147
Daniel Veillard06047432000-04-24 11:33:38 +0000148/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000149 * uric = reserved | unreserved | escaped
150 */
151
152#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
153 (IS_RESERVED(*(p))))
154
Daniel Veillard06047432000-04-24 11:33:38 +0000155/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000156 * Skip to next pointer char, handle escaped sequences
157 */
158
159#define NEXT(p) ((*p == '%')? p += 3 : p++)
160
Daniel Veillard06047432000-04-24 11:33:38 +0000161/*
Daniel Veillard361d8452000-04-03 19:48:13 +0000162 * Productions from the spec.
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000163 *
Daniel Veillard361d8452000-04-03 19:48:13 +0000164 * authority = server | reg_name
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000165 * reg_name = 1*( unreserved | escaped | "$" | "," |
166 * ";" | ":" | "@" | "&" | "=" | "+" )
Daniel Veillard361d8452000-04-03 19:48:13 +0000167 *
168 * path = [ abs_path | opaque_part ]
169 */
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000170
171/**
172 * xmlCreateURI:
173 *
174 * Simply creates an empty xmlURI
175 *
176 * Returns the new structure or NULL in case of error
177 */
178xmlURIPtr
179xmlCreateURI(void) {
180 xmlURIPtr ret;
181
182 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
183 if (ret == NULL) {
184 fprintf(stderr, "xmlCreateURI: out of memory\n");
185 return(NULL);
186 }
187 memset(ret, 0, sizeof(xmlURI));
188 return(ret);
189}
190
191/**
Daniel Veillardec303412000-03-24 13:41:54 +0000192 * xmlSaveUri:
193 * @uri: pointer to an xmlURI
194 *
195 * Save the URI as an escaped string
196 *
197 * Returns a new string (to be deallocated by caller)
198 */
199xmlChar *
200xmlSaveUri(xmlURIPtr uri) {
201 xmlChar *ret = NULL;
202 const char *p;
203 int len;
204 int max;
205
206 if (uri == NULL) return(NULL);
207
208
209 max = 80;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000210 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000211 if (ret == NULL) {
212 fprintf(stderr, "xmlSaveUri: out of memory\n");
213 return(NULL);
214 }
215 len = 0;
216
217 if (uri->scheme != NULL) {
218 p = uri->scheme;
219 while (*p != 0) {
220 if (len >= max) {
221 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000222 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000223 if (ret == NULL) {
224 fprintf(stderr, "xmlSaveUri: out of memory\n");
225 return(NULL);
226 }
227 }
228 ret[len++] = *p++;
229 }
230 if (len >= max) {
231 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000232 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000233 if (ret == NULL) {
234 fprintf(stderr, "xmlSaveUri: out of memory\n");
235 return(NULL);
236 }
237 }
238 ret[len++] = ':';
239 }
240 if (uri->opaque != NULL) {
241 p = uri->opaque;
242 while (*p != 0) {
243 if (len + 3 >= max) {
244 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000245 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000246 if (ret == NULL) {
247 fprintf(stderr, "xmlSaveUri: out of memory\n");
248 return(NULL);
249 }
250 }
251 if ((IS_UNRESERVED(*(p))) ||
252 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
253 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
254 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
255 ret[len++] = *p++;
256 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000257 int val = *(unsigned char *)p++;
258 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000259 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000260 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
261 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000262 }
263 }
264 if (len >= max) {
265 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000266 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000267 if (ret == NULL) {
268 fprintf(stderr, "xmlSaveUri: out of memory\n");
269 return(NULL);
270 }
271 }
272 ret[len++] = 0;
273 } else {
Daniel Veillard361d8452000-04-03 19:48:13 +0000274 if (uri->server != NULL) {
275 if (len + 3 >= max) {
276 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000277 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000278 if (ret == NULL) {
279 fprintf(stderr, "xmlSaveUri: out of memory\n");
280 return(NULL);
281 }
282 }
283 ret[len++] = '/';
284 ret[len++] = '/';
285 if (uri->user != NULL) {
286 p = uri->user;
287 while (*p != 0) {
288 if (len + 3 >= max) {
289 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000290 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000291 if (ret == NULL) {
292 fprintf(stderr, "xmlSaveUri: out of memory\n");
293 return(NULL);
294 }
295 }
296 if ((IS_UNRESERVED(*(p))) ||
297 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) ||
298 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
299 ((*(p) == ',')))
300 ret[len++] = *p++;
301 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000302 int val = *(unsigned char *)p++;
303 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillard361d8452000-04-03 19:48:13 +0000304 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000305 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
306 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillard361d8452000-04-03 19:48:13 +0000307 }
308 }
309 if (len + 3 >= max) {
310 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000311 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000312 if (ret == NULL) {
313 fprintf(stderr, "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 ret[len++] = '@';
318 }
319 p = uri->server;
320 while (*p != 0) {
321 if (len >= max) {
322 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000323 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000324 if (ret == NULL) {
325 fprintf(stderr, "xmlSaveUri: out of memory\n");
326 return(NULL);
327 }
328 }
329 ret[len++] = *p++;
330 }
331 if (uri->port > 0) {
332 if (len + 10 >= max) {
333 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000334 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000335 if (ret == NULL) {
336 fprintf(stderr, "xmlSaveUri: out of memory\n");
337 return(NULL);
338 }
339 }
340 len += sprintf((char *) &ret[len], ":%d", uri->port);
341 }
342 } else if (uri->authority != NULL) {
Daniel Veillardec303412000-03-24 13:41:54 +0000343 if (len + 3 >= max) {
344 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000345 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000346 if (ret == NULL) {
347 fprintf(stderr, "xmlSaveUri: out of memory\n");
348 return(NULL);
349 }
350 }
351 ret[len++] = '/';
352 ret[len++] = '/';
353 p = uri->authority;
354 while (*p != 0) {
355 if (len + 3 >= max) {
356 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000357 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000358 if (ret == NULL) {
359 fprintf(stderr, "xmlSaveUri: out of memory\n");
360 return(NULL);
361 }
362 }
363 if ((IS_UNRESERVED(*(p))) ||
364 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
365 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
366 ((*(p) == '=')) || ((*(p) == '+')))
367 ret[len++] = *p++;
368 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000369 int val = *(unsigned char *)p++;
370 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000371 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000372 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
373 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000374 }
375 }
Daniel Veillard740abf52000-10-02 23:04:54 +0000376 } else if (uri->scheme != NULL) {
377 if (len + 3 >= max) {
378 max *= 2;
379 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
380 if (ret == NULL) {
381 fprintf(stderr, "xmlSaveUri: out of memory\n");
382 return(NULL);
383 }
384 }
385 ret[len++] = '/';
386 ret[len++] = '/';
Daniel Veillardec303412000-03-24 13:41:54 +0000387 }
388 if (uri->path != NULL) {
389 p = uri->path;
390 while (*p != 0) {
391 if (len + 3 >= max) {
392 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000393 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000394 if (ret == NULL) {
395 fprintf(stderr, "xmlSaveUri: out of memory\n");
396 return(NULL);
397 }
398 }
399 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
400 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
401 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
402 ((*(p) == ',')))
403 ret[len++] = *p++;
404 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000405 int val = *(unsigned char *)p++;
406 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000407 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000408 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
409 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000410 }
411 }
412 }
413 if (uri->query != NULL) {
414 if (len + 3 >= max) {
415 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000416 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000417 if (ret == NULL) {
418 fprintf(stderr, "xmlSaveUri: out of memory\n");
419 return(NULL);
420 }
421 }
422 ret[len++] = '?';
423 p = uri->query;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000427 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000428 if (ret == NULL) {
429 fprintf(stderr, "xmlSaveUri: out of memory\n");
430 return(NULL);
431 }
432 }
433 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
434 ret[len++] = *p++;
435 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000436 int val = *(unsigned char *)p++;
437 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000438 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000439 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
440 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000441 }
442 }
443 }
444 if (uri->fragment != NULL) {
445 if (len + 3 >= max) {
446 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000447 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000448 if (ret == NULL) {
449 fprintf(stderr, "xmlSaveUri: out of memory\n");
450 return(NULL);
451 }
452 }
453 ret[len++] = '#';
454 p = uri->fragment;
455 while (*p != 0) {
456 if (len + 3 >= max) {
457 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000458 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000459 if (ret == NULL) {
460 fprintf(stderr, "xmlSaveUri: out of memory\n");
461 return(NULL);
462 }
463 }
464 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
465 ret[len++] = *p++;
466 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000467 int val = *(unsigned char *)p++;
468 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000469 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000470 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
471 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000472 }
473 }
474 }
475 if (len >= max) {
476 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000477 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000478 if (ret == NULL) {
479 fprintf(stderr, "xmlSaveUri: out of memory\n");
480 return(NULL);
481 }
482 }
483 ret[len++] = 0;
484 }
485 return(ret);
486}
487
488/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000489 * xmlPrintURI:
490 * @stream: a FILE* for the output
491 * @uri: pointer to an xmlURI
492 *
493 * Prints the URI in the stream @steam.
494 */
495void
496xmlPrintURI(FILE *stream, xmlURIPtr uri) {
Daniel Veillardec303412000-03-24 13:41:54 +0000497 xmlChar *out;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000498
Daniel Veillardec303412000-03-24 13:41:54 +0000499 out = xmlSaveUri(uri);
500 if (out != NULL) {
501 fprintf(stream, "%s", out);
502 xmlFree(out);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000503 }
504}
505
506/**
507 * xmlCleanURI:
508 * @uri: pointer to an xmlURI
509 *
510 * Make sure the xmlURI struct is free of content
511 */
512void
513xmlCleanURI(xmlURIPtr uri) {
514 if (uri == NULL) return;
515
516 if (uri->scheme != NULL) xmlFree(uri->scheme);
517 uri->scheme = NULL;
518 if (uri->server != NULL) xmlFree(uri->server);
519 uri->server = NULL;
Daniel Veillard361d8452000-04-03 19:48:13 +0000520 if (uri->user != NULL) xmlFree(uri->user);
521 uri->user = NULL;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000522 if (uri->path != NULL) xmlFree(uri->path);
523 uri->path = NULL;
524 if (uri->fragment != NULL) xmlFree(uri->fragment);
525 uri->fragment = NULL;
526 if (uri->opaque != NULL) xmlFree(uri->opaque);
527 uri->opaque = NULL;
528 if (uri->authority != NULL) xmlFree(uri->authority);
529 uri->authority = NULL;
530 if (uri->query != NULL) xmlFree(uri->query);
531 uri->query = NULL;
532}
533
534/**
535 * xmlFreeURI:
536 * @uri: pointer to an xmlURI
537 *
538 * Free up the xmlURI struct
539 */
540void
541xmlFreeURI(xmlURIPtr uri) {
542 if (uri == NULL) return;
543
544 if (uri->scheme != NULL) xmlFree(uri->scheme);
545 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard361d8452000-04-03 19:48:13 +0000546 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000547 if (uri->path != NULL) xmlFree(uri->path);
548 if (uri->fragment != NULL) xmlFree(uri->fragment);
549 if (uri->opaque != NULL) xmlFree(uri->opaque);
550 if (uri->authority != NULL) xmlFree(uri->authority);
551 if (uri->query != NULL) xmlFree(uri->query);
552 memset(uri, -1, sizeof(xmlURI));
553 xmlFree(uri);
554}
555
556/**
Daniel Veillard361d8452000-04-03 19:48:13 +0000557 * xmlURIUnescapeString:
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000558 * @str: the string to unescape
559 * @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
560 * @target: optionnal destination buffer
561 *
562 * Unescaping routine, does not do validity checks !
Daniel Veillardec303412000-03-24 13:41:54 +0000563 * Output is direct unsigned char translation of %XX values (no encoding)
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000564 *
565 * Returns an copy of the string, but unescaped
566 */
567char *
Daniel Veillard361d8452000-04-03 19:48:13 +0000568xmlURIUnescapeString(const char *str, int len, char *target) {
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000569 char *ret, *out;
570 const char *in;
571
572 if (str == NULL)
573 return(NULL);
574 if (len <= 0) len = strlen(str);
575 if (len <= 0) return(NULL);
576
577 if (target == NULL) {
578 ret = (char *) xmlMalloc(len + 1);
579 if (ret == NULL) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000580 fprintf(stderr, "xmlURIUnescapeString: out of memory\n");
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000581 return(NULL);
582 }
583 } else
584 ret = target;
585 in = str;
586 out = ret;
587 while(len > 0) {
588 if (*in == '%') {
589 in++;
590 if ((*in >= '0') && (*in <= '9'))
591 *out = (*in - '0');
592 else if ((*in >= 'a') && (*in <= 'f'))
593 *out = (*in - 'a') + 10;
594 else if ((*in >= 'A') && (*in <= 'F'))
595 *out = (*in - 'A') + 10;
596 in++;
597 if ((*in >= '0') && (*in <= '9'))
598 *out = *out * 16 + (*in - '0');
599 else if ((*in >= 'a') && (*in <= 'f'))
600 *out = *out * 16 + (*in - 'a') + 10;
601 else if ((*in >= 'A') && (*in <= 'F'))
602 *out = *out * 16 + (*in - 'A') + 10;
603 in++;
604 len -= 3;
Daniel Veillardec303412000-03-24 13:41:54 +0000605 out++;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000606 } else {
607 *out++ = *in++;
608 len--;
609 }
610 }
611 *out = 0;
612 return(ret);
613}
614
615
616/**
617 * xmlParseURIFragment:
618 * @uri: pointer to an URI structure
619 * @str: pointer to the string to analyze
620 *
621 * Parse an URI fragment string and fills in the appropriate fields
622 * of the @uri structure.
623 *
624 * fragment = *uric
625 *
626 * Returns 0 or the error code
627 */
628int
629xmlParseURIFragment(xmlURIPtr uri, const char **str) {
630 const char *cur = *str;
631
632 if (str == NULL) return(-1);
633
634 while (IS_URIC(cur)) NEXT(cur);
635 if (uri != NULL) {
636 if (uri->fragment != NULL) xmlFree(uri->fragment);
Daniel Veillard361d8452000-04-03 19:48:13 +0000637 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000638 }
639 *str = cur;
640 return(0);
641}
642
643/**
644 * xmlParseURIQuery:
645 * @uri: pointer to an URI structure
646 * @str: pointer to the string to analyze
647 *
648 * Parse the query part of an URI
649 *
650 * query = *uric
651 *
652 * Returns 0 or the error code
653 */
654int
655xmlParseURIQuery(xmlURIPtr uri, const char **str) {
656 const char *cur = *str;
657
658 if (str == NULL) return(-1);
659
660 while (IS_URIC(cur)) NEXT(cur);
661 if (uri != NULL) {
662 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillard361d8452000-04-03 19:48:13 +0000663 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000664 }
665 *str = cur;
666 return(0);
667}
668
669/**
670 * xmlParseURIScheme:
671 * @uri: pointer to an URI structure
672 * @str: pointer to the string to analyze
673 *
674 * Parse an URI scheme
675 *
676 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
677 *
678 * Returns 0 or the error code
679 */
680int
681xmlParseURIScheme(xmlURIPtr uri, const char **str) {
682 const char *cur;
683
684 if (str == NULL)
685 return(-1);
686
687 cur = *str;
688 if (!IS_ALPHA(*cur))
689 return(2);
690 cur++;
691 while (IS_SCHEME(*cur)) cur++;
692 if (uri != NULL) {
693 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillard361d8452000-04-03 19:48:13 +0000694 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); /* !!! strndup */
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000695 }
696 *str = cur;
697 return(0);
698}
699
700/**
701 * xmlParseURIOpaquePart:
702 * @uri: pointer to an URI structure
703 * @str: pointer to the string to analyze
704 *
705 * Parse an URI opaque part
706 *
707 * opaque_part = uric_no_slash *uric
708 *
709 * Returns 0 or the error code
710 */
711int
712xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
713 const char *cur;
714
715 if (str == NULL)
716 return(-1);
717
718 cur = *str;
719 if (!IS_URIC_NO_SLASH(cur)) {
720 return(3);
721 }
722 NEXT(cur);
723 while (IS_URIC(cur)) NEXT(cur);
724 if (uri != NULL) {
725 if (uri->opaque != NULL) xmlFree(uri->opaque);
Daniel Veillard361d8452000-04-03 19:48:13 +0000726 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000727 }
728 *str = cur;
729 return(0);
730}
731
732/**
Daniel Veillard361d8452000-04-03 19:48:13 +0000733 * xmlParseURIServer:
734 * @uri: pointer to an URI structure
735 * @str: pointer to the string to analyze
736 *
737 * Parse a server subpart of an URI, it's a finer grain analysis
738 * of the authority part.
739 *
740 * server = [ [ userinfo "@" ] hostport ]
741 * userinfo = *( unreserved | escaped |
742 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
743 * hostport = host [ ":" port ]
744 * host = hostname | IPv4address
745 * hostname = *( domainlabel "." ) toplabel [ "." ]
746 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
747 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
748 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
749 * port = *digit
750 *
751 * Returns 0 or the error code
752 */
753int
754xmlParseURIServer(xmlURIPtr uri, const char **str) {
755 const char *cur;
756 const char *host, *tmp;
757
758 if (str == NULL)
759 return(-1);
760
761 cur = *str;
762
763 /*
764 * is there an userinfo ?
765 */
766 while (IS_USERINFO(cur)) NEXT(cur);
767 if (*cur == '@') {
768 if (uri != NULL) {
769 if (uri->user != NULL) xmlFree(uri->user);
770 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
771 }
772 cur++;
773 } else {
774 if (uri != NULL) {
775 if (uri->user != NULL) xmlFree(uri->user);
776 uri->user = NULL;
777 }
778 cur = *str;
779 }
780 /*
Daniel Veillard740abf52000-10-02 23:04:54 +0000781 * This can be empty in the case where there is no server
782 */
783 host = cur;
784 if (*cur == '/') {
785 if (uri != NULL) {
786 if (uri->authority != NULL) xmlFree(uri->authority);
787 uri->authority = NULL;
788 if (uri->server != NULL) xmlFree(uri->server);
789 uri->server = NULL;
790 uri->port = 0;
791 }
792 return(0);
793 }
794 /*
Daniel Veillard361d8452000-04-03 19:48:13 +0000795 * host part of hostport can derive either an IPV4 address
796 * or an unresolved name. Check the IP first, it easier to detect
797 * errors if wrong one
798 */
Daniel Veillard361d8452000-04-03 19:48:13 +0000799 if (IS_DIGIT(*cur)) {
800 while(IS_DIGIT(*cur)) cur++;
801 if (*cur != '.')
802 goto host_name;
803 cur++;
804 if (!IS_DIGIT(*cur))
805 goto host_name;
806 while(IS_DIGIT(*cur)) cur++;
807 if (*cur != '.')
808 goto host_name;
809 cur++;
810 if (!IS_DIGIT(*cur))
811 goto host_name;
812 while(IS_DIGIT(*cur)) cur++;
813 if (*cur != '.')
814 goto host_name;
815 cur++;
816 if (!IS_DIGIT(*cur))
817 goto host_name;
818 while(IS_DIGIT(*cur)) cur++;
819 if (uri != NULL) {
820 if (uri->authority != NULL) xmlFree(uri->authority);
821 uri->authority = NULL;
822 if (uri->server != NULL) xmlFree(uri->server);
823 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
824 }
825 goto host_done;
826 }
827host_name:
828 /*
829 * the hostname production as-is is a parser nightmare.
830 * simplify it to
831 * hostname = *( domainlabel "." ) domainlabel [ "." ]
832 * and just make sure the last label starts with a non numeric char.
833 */
834 if (!IS_ALPHANUM(*cur))
835 return(6);
836 while (IS_ALPHANUM(*cur)) {
837 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
838 if (*cur == '.')
839 cur++;
840 }
841 tmp = cur;
842 tmp--;
843 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
844 tmp++;
845 if (!IS_ALPHA(*tmp))
846 return(7);
847 if (uri != NULL) {
848 if (uri->authority != NULL) xmlFree(uri->authority);
849 uri->authority = NULL;
850 if (uri->server != NULL) xmlFree(uri->server);
851 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
852 }
853
854host_done:
855
856 /*
857 * finish by checking for a port presence.
858 */
859 if (*cur == ':') {
860 cur++;
861 if (IS_DIGIT(*cur)) {
862 if (uri != NULL)
863 uri->port = 0;
864 while (IS_DIGIT(*cur)) {
865 if (uri != NULL)
866 uri->port = uri->port * 10 + (*cur - '0');
867 cur++;
868 }
869 }
870 }
871 *str = cur;
872 return(0);
873}
874
875/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000876 * xmlParseURIRelSegment:
877 * @uri: pointer to an URI structure
878 * @str: pointer to the string to analyze
879 *
880 * Parse an URI relative segment
881 *
882 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
883 * "+" | "$" | "," )
884 *
885 * Returns 0 or the error code
886 */
887int
888xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
889 const char *cur;
890
891 if (str == NULL)
892 return(-1);
893
894 cur = *str;
895 if (!IS_SEGMENT(cur)) {
896 return(3);
897 }
898 NEXT(cur);
899 while (IS_SEGMENT(cur)) NEXT(cur);
900 if (uri != NULL) {
901 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard361d8452000-04-03 19:48:13 +0000902 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000903 }
904 *str = cur;
905 return(0);
906}
907
908/**
909 * xmlParseURIPathSegments:
910 * @uri: pointer to an URI structure
911 * @str: pointer to the string to analyze
912 * @slash: should we add a leading slash
913 *
914 * Parse an URI set of path segments
915 *
916 * path_segments = segment *( "/" segment )
917 * segment = *pchar *( ";" param )
918 * param = *pchar
919 *
920 * Returns 0 or the error code
921 */
922int
923xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
924 const char *cur;
925
926 if (str == NULL)
927 return(-1);
928
929 cur = *str;
930
931 do {
932 while (IS_PCHAR(cur)) NEXT(cur);
933 if (*cur == ';') {
934 cur++;
935 while (IS_PCHAR(cur)) NEXT(cur);
936 }
937 if (*cur != '/') break;
938 cur++;
939 } while (1);
940 if (uri != NULL) {
941 int len, len2 = 0;
942 char *path;
943
944 /*
945 * Concat the set of path segments to the current path
946 */
947 len = cur - *str;
948 if (slash)
949 len++;
950
951 if (uri->path != NULL) {
952 len2 = strlen(uri->path);
953 len += len2;
954 }
955 path = (char *) xmlMalloc(len + 1);
956 if (path == NULL) {
957 fprintf(stderr, "xmlParseURIPathSegments: out of memory\n");
958 *str = cur;
959 return(-1);
960 }
961 if (uri->path != NULL)
962 memcpy(path, uri->path, len2);
963 if (slash) {
964 path[len2] = '/';
965 len2++;
966 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000967 path[len2] = 0;
968 if (cur - *str > 0)
969 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000970 if (uri->path != NULL)
971 xmlFree(uri->path);
972 uri->path = path;
973 }
974 *str = cur;
975 return(0);
976}
977
978/**
979 * xmlParseURIAuthority:
980 * @uri: pointer to an URI structure
981 * @str: pointer to the string to analyze
982 *
983 * Parse the authority part of an URI.
984 *
985 * authority = server | reg_name
986 * server = [ [ userinfo "@" ] hostport ]
987 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
988 * "@" | "&" | "=" | "+" )
989 *
990 * Note : this is completely ambiguous since reg_name is allowed to
991 * use the full set of chars in use by server:
992 *
993 * 3.2.1. Registry-based Naming Authority
994 *
995 * The structure of a registry-based naming authority is specific
996 * to the URI scheme, but constrained to the allowed characters
997 * for an authority component.
998 *
999 * Returns 0 or the error code
1000 */
1001int
1002xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1003 const char *cur;
Daniel Veillard361d8452000-04-03 19:48:13 +00001004 int ret;
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001005
1006 if (str == NULL)
1007 return(-1);
1008
1009 cur = *str;
Daniel Veillard361d8452000-04-03 19:48:13 +00001010
1011 /*
1012 * try first to parse it as a server string.
1013 */
1014 ret = xmlParseURIServer(uri, str);
1015 if (ret == 0)
1016 return(0);
1017
1018 /*
1019 * failed, fallback to reg_name
1020 */
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001021 if (!IS_REG_NAME(cur)) {
1022 return(5);
1023 }
1024 NEXT(cur);
1025 while (IS_REG_NAME(cur)) NEXT(cur);
1026 if (uri != NULL) {
Daniel Veillard361d8452000-04-03 19:48:13 +00001027 if (uri->server != NULL) xmlFree(uri->server);
1028 uri->server = NULL;
1029 if (uri->user != NULL) xmlFree(uri->user);
1030 uri->user = NULL;
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001031 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard361d8452000-04-03 19:48:13 +00001032 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001033 }
1034 *str = cur;
1035 return(0);
1036}
1037
1038/**
1039 * xmlParseURIHierPart:
1040 * @uri: pointer to an URI structure
1041 * @str: pointer to the string to analyze
1042 *
1043 * Parse an URI hirarchical part
1044 *
1045 * hier_part = ( net_path | abs_path ) [ "?" query ]
1046 * abs_path = "/" path_segments
1047 * net_path = "//" authority [ abs_path ]
1048 *
1049 * Returns 0 or the error code
1050 */
1051int
1052xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1053 int ret;
1054 const char *cur;
1055
1056 if (str == NULL)
1057 return(-1);
1058
1059 cur = *str;
1060
1061 if ((cur[0] == '/') && (cur[1] == '/')) {
1062 cur += 2;
1063 ret = xmlParseURIAuthority(uri, &cur);
1064 if (ret != 0)
1065 return(ret);
1066 if (cur[0] == '/') {
1067 cur++;
1068 ret = xmlParseURIPathSegments(uri, &cur, 1);
1069 }
1070 } else if (cur[0] == '/') {
1071 cur++;
1072 ret = xmlParseURIPathSegments(uri, &cur, 1);
1073 } else {
1074 return(4);
1075 }
1076 if (ret != 0)
1077 return(ret);
1078 if (*cur == '?') {
1079 cur++;
1080 ret = xmlParseURIQuery(uri, &cur);
1081 if (ret != 0)
1082 return(ret);
1083 }
1084 *str = cur;
1085 return(0);
1086}
1087
1088/**
1089 * xmlParseAbsoluteURI:
1090 * @uri: pointer to an URI structure
1091 * @str: pointer to the string to analyze
1092 *
1093 * Parse an URI reference string and fills in the appropriate fields
1094 * of the @uri structure
1095 *
1096 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1097 *
1098 * Returns 0 or the error code
1099 */
1100int
1101xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1102 int ret;
1103
1104 if (str == NULL)
1105 return(-1);
1106
1107 ret = xmlParseURIScheme(uri, str);
1108 if (ret != 0) return(ret);
1109 if (**str != ':')
1110 return(1);
1111 (*str)++;
1112 if (**str == '/')
1113 return(xmlParseURIHierPart(uri, str));
1114 return(xmlParseURIOpaquePart(uri, str));
1115}
1116
1117/**
1118 * xmlParseRelativeURI:
1119 * @uri: pointer to an URI structure
1120 * @str: pointer to the string to analyze
1121 *
1122 * Parse an relative URI string and fills in the appropriate fields
1123 * of the @uri structure
1124 *
1125 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1126 * abs_path = "/" path_segments
1127 * net_path = "//" authority [ abs_path ]
1128 * rel_path = rel_segment [ abs_path ]
1129 *
1130 * Returns 0 or the error code
1131 */
1132int
1133xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1134 int ret = 0;
1135 const char *cur;
1136
1137 if (str == NULL)
1138 return(-1);
1139
1140 cur = *str;
1141 if ((cur[0] == '/') && (cur[1] == '/')) {
1142 cur += 2;
1143 ret = xmlParseURIAuthority(uri, &cur);
1144 if (ret != 0)
1145 return(ret);
1146 if (cur[0] == '/') {
1147 cur++;
1148 ret = xmlParseURIPathSegments(uri, &cur, 1);
1149 }
1150 } else if (cur[0] == '/') {
1151 cur++;
1152 ret = xmlParseURIPathSegments(uri, &cur, 1);
Daniel Veillard98a79162000-09-04 11:15:39 +00001153 } else if (cur[0] != '#' && cur[0] != '?') {
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001154 ret = xmlParseURIRelSegment(uri, &cur);
1155 if (ret != 0)
1156 return(ret);
1157 if (cur[0] == '/') {
1158 cur++;
1159 ret = xmlParseURIPathSegments(uri, &cur, 1);
1160 }
1161 }
1162 if (ret != 0)
1163 return(ret);
1164 if (*cur == '?') {
1165 cur++;
1166 ret = xmlParseURIQuery(uri, &cur);
1167 if (ret != 0)
1168 return(ret);
1169 }
1170 *str = cur;
1171 return(ret);
1172}
1173
1174/**
1175 * xmlParseURIReference:
1176 * @uri: pointer to an URI structure
1177 * @str: the string to analyze
1178 *
1179 * Parse an URI reference string and fills in the appropriate fields
1180 * of the @uri structure
1181 *
1182 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1183 *
1184 * Returns 0 or the error code
1185 */
1186int
1187xmlParseURIReference(xmlURIPtr uri, const char *str) {
1188 int ret;
1189 const char *tmp = str;
1190
1191 if (str == NULL)
1192 return(-1);
1193 xmlCleanURI(uri);
1194
1195 /*
1196 * Try first to parse aboslute refs, then fallback to relative if
1197 * it fails.
1198 */
1199 ret = xmlParseAbsoluteURI(uri, &str);
1200 if (ret != 0) {
1201 xmlCleanURI(uri);
1202 str = tmp;
1203 ret = xmlParseRelativeURI(uri, &str);
1204 }
1205 if (ret != 0) {
1206 xmlCleanURI(uri);
1207 return(ret);
1208 }
1209
1210 if (*str == '#') {
1211 str++;
1212 ret = xmlParseURIFragment(uri, &str);
1213 if (ret != 0) return(ret);
1214 }
1215 if (*str != 0) {
1216 xmlCleanURI(uri);
1217 return(1);
1218 }
1219 return(0);
1220}
1221
1222/**
Daniel Veillard496a1cf2000-05-03 14:20:55 +00001223 * xmlParseURI:
1224 * @str: the URI string to analyze
1225 *
1226 * Parse an URI
1227 *
1228 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1229 *
1230 * Returns a newly build xmlURIPtr or NULL in case of error
1231 */
1232xmlURIPtr
1233xmlParseURI(const char *str) {
1234 xmlURIPtr uri;
1235 int ret;
1236
1237 if (str == NULL)
1238 return(NULL);
1239 uri = xmlCreateURI();
1240 if (uri != NULL) {
1241 ret = xmlParseURIReference(uri, str);
1242 if (ret) {
1243 xmlFreeURI(uri);
1244 return(NULL);
1245 }
1246 }
1247 return(uri);
1248}
1249
1250/**
Daniel Veillardec303412000-03-24 13:41:54 +00001251 * xmlNormalizeURIPath:
1252 * @path: pointer to the path string
1253 *
1254 * applies the 5 normalization steps to a path string
1255 * Normalization occurs directly on the string, no new allocation is done
1256 *
1257 * Returns 0 or an error code
1258 */
1259int
1260xmlNormalizeURIPath(char *path) {
1261 int cur, out;
1262
1263 if (path == NULL)
1264 return(-1);
1265 cur = 0;
1266 out = 0;
1267 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1268 if (path[cur] == 0)
1269 return(0);
1270
1271 /* we are positionned at the beginning of the first segment */
1272 cur++;
1273 out = cur;
1274
1275 /*
1276 * Analyze each segment in sequence.
1277 */
1278 while (path[cur] != 0) {
1279 /*
1280 * c) All occurrences of "./", where "." is a complete path segment,
1281 * are removed from the buffer string.
1282 */
1283 if ((path[cur] == '.') && (path[cur + 1] == '/')) {
1284 cur += 2;
1285 continue;
1286 }
1287
1288 /*
1289 * d) If the buffer string ends with "." as a complete path segment,
1290 * that "." is removed.
1291 */
1292 if ((path[cur] == '.') && (path[cur + 1] == 0)) {
1293 path[out] = 0;
1294 break;
1295 }
1296
1297 /* read the segment */
1298 while ((path[cur] != 0) && (path[cur] != '/')) {
1299 path[out++] = path[cur++];
1300 }
1301 path[out++] = path[cur];
1302 if (path[cur] != 0) {
1303 cur++;
1304 }
1305 }
1306
1307 cur = 0;
1308 out = 0;
1309 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1310 if (path[cur] == 0)
1311 return(0);
1312 /* we are positionned at the beginning of the first segment */
1313 cur++;
1314 out = cur;
1315 /*
1316 * Analyze each segment in sequence.
1317 */
1318 while (path[cur] != 0) {
1319 /*
1320 * e) All occurrences of "<segment>/../", where <segment> is a
1321 * complete path segment not equal to "..", are removed from the
1322 * buffer string. Removal of these path segments is performed
1323 * iteratively, removing the leftmost matching pattern on each
1324 * iteration, until no matching pattern remains.
1325 */
1326 if ((cur > 1) && (out > 1) &&
1327 (path[cur] == '/') && (path[cur + 1] == '.') &&
1328 (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
1329 ((path[out] != '.') || (path[out - 1] != '.') ||
1330 (path[out - 2] != '/'))) {
1331 cur += 3;
1332 out --;
1333 while ((out > 0) && (path[out] != '/')) { out --; }
1334 path[out] = 0;
1335 continue;
1336 }
1337
1338 /*
1339 * f) If the buffer string ends with "<segment>/..", where <segment>
1340 * is a complete path segment not equal to "..", that
1341 * "<segment>/.." is removed.
1342 */
1343 if ((path[cur] == '/') && (path[cur + 1] == '.') &&
1344 (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
1345 ((path[out] != '.') || (path[out - 1] != '.') ||
1346 (path[out - 2] != '/'))) {
1347 cur += 4;
1348 out --;
1349 while ((out > 0) && (path[out - 1] != '/')) { out --; }
1350 path[out] = 0;
1351 continue;
1352 }
1353
1354 path[out++] = path[cur++]; /* / or 0 */
1355 }
1356 path[out] = 0;
1357
1358 /*
1359 * g) If the resulting buffer string still begins with one or more
1360 * complete path segments of "..", then the reference is
1361 * considered to be in error. Implementations may handle this
1362 * error by retaining these components in the resolved path (i.e.,
1363 * treating them as part of the final URI), by removing them from
1364 * the resolved path (i.e., discarding relative levels above the
1365 * root), or by avoiding traversal of the reference.
1366 *
1367 * We discard them from the final path.
1368 */
1369 cur = 0;
1370 while ((path[cur] == '/') && (path[cur + 1] == '.') &&
1371 (path[cur + 2] == '.'))
1372 cur += 3;
1373 if (cur != 0) {
1374 out = 0;
1375 while (path[cur] != 0) path[out++] = path[cur++];
1376 path[out] = 0;
1377 }
1378 return(0);
1379}
1380
1381/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001382 * xmlBuildURI:
1383 * @URI: the URI instance found in the document
1384 * @base: the base value
1385 *
1386 * Computes he final URI of the reference done by checking that
1387 * the given URI is valid, and building the final URI using the
1388 * base URI. This is processed according to section 5.2 of the
1389 * RFC 2396
1390 *
1391 * 5.2. Resolving Relative References to Absolute Form
1392 *
Daniel Veillardec303412000-03-24 13:41:54 +00001393 * Returns a new URI string (to be freed by the caller) or NULL in case
1394 * of error.
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001395 */
1396xmlChar *
1397xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
Daniel Veillardec303412000-03-24 13:41:54 +00001398 xmlChar *val = NULL;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001399 int ret, ret2, len, index, cur, out;
Daniel Veillardec303412000-03-24 13:41:54 +00001400 xmlURIPtr ref = NULL;
1401 xmlURIPtr bas = NULL;
1402 xmlURIPtr res = NULL;
1403
Daniel Veillardec303412000-03-24 13:41:54 +00001404 /*
1405 * 1) The URI reference is parsed into the potential four components and
1406 * fragment identifier, as described in Section 4.3.
Daniel Veillard90e11312000-09-05 10:42:32 +00001407 *
1408 * NOTE that a completely empty URI is treated by modern browsers
1409 * as a reference to "." rather than as a synonym for the current
1410 * URI. Should we do that here?
Daniel Veillardec303412000-03-24 13:41:54 +00001411 */
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001412 if (URI == NULL)
1413 ret = -1;
1414 else {
1415 ref = xmlCreateURI();
1416 if (ref == NULL)
Daniel Veillard98a79162000-09-04 11:15:39 +00001417 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001418 if (*URI)
1419 ret = xmlParseURIReference(ref, (const char *) URI);
1420 else
1421 ret = -1;
Daniel Veillard98a79162000-09-04 11:15:39 +00001422 }
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001423 if (base == NULL)
1424 ret2 = -1;
1425 else {
1426 bas = xmlCreateURI();
1427 if (bas == NULL)
1428 goto done;
1429 ret2 = xmlParseURIReference(bas, (const char *) base);
1430 }
1431 if ((ret != 0) && (ret2 != 0))
Daniel Veillardec303412000-03-24 13:41:54 +00001432 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001433 if (ret != 0) {
1434 /*
1435 * the base fragment must be ignored
1436 */
1437 if (bas->fragment != NULL) {
1438 xmlFree(bas->fragment);
1439 bas->fragment = NULL;
1440 }
1441 val = xmlSaveUri(bas);
Daniel Veillardec303412000-03-24 13:41:54 +00001442 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001443 }
1444 if (ret2 != 0) {
1445 val = xmlSaveUri(ref);
1446 goto done;
1447 }
1448
Daniel Veillardec303412000-03-24 13:41:54 +00001449
1450 /*
1451 * 2) If the path component is empty and the scheme, authority, and
1452 * query components are undefined, then it is a reference to the
Daniel Veillard90e11312000-09-05 10:42:32 +00001453 * current document and we are done. Otherwise, the reference URI's
1454 * query and fragment components are defined as found (or not found)
1455 * within the URI reference and not inherited from the base URI.
Daniel Veillard98a79162000-09-04 11:15:39 +00001456 *
Daniel Veillard90e11312000-09-05 10:42:32 +00001457 * NOTE that in modern browsers, the parsing differs from the above
1458 * in the following aspect: the query component is allowed to be
1459 * defined while still treating this as a reference to the current
1460 * document.
Daniel Veillardec303412000-03-24 13:41:54 +00001461 */
1462 res = xmlCreateURI();
1463 if (res == NULL)
1464 goto done;
1465 if ((ref->scheme == NULL) && (ref->path == NULL) &&
Daniel Veillard90e11312000-09-05 10:42:32 +00001466 ((ref->authority == NULL) && (ref->server == NULL))) {
1467 if (bas->scheme != NULL)
1468 res->scheme = xmlMemStrdup(bas->scheme);
1469 if (bas->authority != NULL)
1470 res->authority = xmlMemStrdup(bas->authority);
1471 else if (bas->server != NULL) {
1472 res->server = xmlMemStrdup(bas->server);
1473 if (bas->user != NULL)
1474 res->user = xmlMemStrdup(bas->user);
1475 res->port = bas->port;
1476 }
1477 if (bas->path != NULL)
1478 res->path = xmlMemStrdup(bas->path);
1479 if (ref->query != NULL)
1480 res->query = xmlMemStrdup(ref->query);
1481 else if (bas->query != NULL)
1482 res->query = xmlMemStrdup(bas->query);
1483 if (ref->fragment != NULL)
1484 res->fragment = xmlMemStrdup(ref->fragment);
1485 goto step_7;
Daniel Veillardec303412000-03-24 13:41:54 +00001486 }
Daniel Veillard98a79162000-09-04 11:15:39 +00001487
Daniel Veillard90e11312000-09-05 10:42:32 +00001488 if (ref->query != NULL)
1489 res->query = xmlMemStrdup(ref->query);
1490 if (ref->fragment != NULL)
1491 res->fragment = xmlMemStrdup(ref->fragment);
Daniel Veillardec303412000-03-24 13:41:54 +00001492
1493 /*
1494 * 3) If the scheme component is defined, indicating that the reference
1495 * starts with a scheme name, then the reference is interpreted as an
1496 * absolute URI and we are done. Otherwise, the reference URI's
1497 * scheme is inherited from the base URI's scheme component.
1498 */
1499 if (ref->scheme != NULL) {
1500 val = xmlSaveUri(ref);
1501 goto done;
1502 }
Daniel Veillardbe803962000-06-28 23:40:59 +00001503 if (bas->scheme != NULL)
1504 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillardec303412000-03-24 13:41:54 +00001505
1506 /*
1507 * 4) If the authority component is defined, then the reference is a
1508 * network-path and we skip to step 7. Otherwise, the reference
1509 * URI's authority is inherited from the base URI's authority
1510 * component, which will also be undefined if the URI scheme does not
1511 * use an authority component.
1512 */
Daniel Veillard361d8452000-04-03 19:48:13 +00001513 if ((ref->authority != NULL) || (ref->server != NULL)) {
1514 if (ref->authority != NULL)
1515 res->authority = xmlMemStrdup(ref->authority);
1516 else {
1517 res->server = xmlMemStrdup(ref->server);
1518 if (ref->user != NULL)
1519 res->user = xmlMemStrdup(ref->user);
1520 res->port = ref->port;
1521 }
Daniel Veillardec303412000-03-24 13:41:54 +00001522 if (ref->path != NULL)
1523 res->path = xmlMemStrdup(ref->path);
Daniel Veillardec303412000-03-24 13:41:54 +00001524 goto step_7;
1525 }
1526 if (bas->authority != NULL)
1527 res->authority = xmlMemStrdup(bas->authority);
Daniel Veillard361d8452000-04-03 19:48:13 +00001528 else if (bas->server != NULL) {
1529 res->server = xmlMemStrdup(bas->server);
1530 if (bas->user != NULL)
1531 res->user = xmlMemStrdup(bas->user);
1532 res->port = bas->port;
1533 }
Daniel Veillardec303412000-03-24 13:41:54 +00001534
1535 /*
1536 * 5) If the path component begins with a slash character ("/"), then
1537 * the reference is an absolute-path and we skip to step 7.
1538 */
Daniel Veillard90e11312000-09-05 10:42:32 +00001539 if ((ref->path != NULL) && (ref->path[0] == '/')) {
Daniel Veillardec303412000-03-24 13:41:54 +00001540 res->path = xmlMemStrdup(ref->path);
Daniel Veillardec303412000-03-24 13:41:54 +00001541 goto step_7;
1542 }
1543
1544
1545 /*
1546 * 6) If this step is reached, then we are resolving a relative-path
1547 * reference. The relative path needs to be merged with the base
1548 * URI's path. Although there are many ways to do this, we will
1549 * describe a simple method using a separate string buffer.
1550 *
1551 * Allocate a buffer large enough for the result string.
1552 */
1553 len = 2; /* extra / and 0 */
1554 if (ref->path != NULL)
1555 len += strlen(ref->path);
1556 if (bas->path != NULL)
1557 len += strlen(bas->path);
1558 res->path = (char *) xmlMalloc(len);
1559 if (res->path == NULL) {
1560 fprintf(stderr, "xmlBuildURI: out of memory\n");
1561 goto done;
1562 }
1563 res->path[0] = 0;
1564
1565 /*
1566 * a) All but the last segment of the base URI's path component is
1567 * copied to the buffer. In other words, any characters after the
1568 * last (right-most) slash character, if any, are excluded.
1569 */
1570 cur = 0;
1571 out = 0;
1572 if (bas->path != NULL) {
1573 while (bas->path[cur] != 0) {
1574 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1575 cur++;
1576 if (bas->path[cur] == 0)
1577 break;
1578
1579 cur++;
1580 while (out < cur) {
1581 res->path[out] = bas->path[out];
1582 out++;
1583 }
1584 }
1585 }
1586 res->path[out] = 0;
1587
1588 /*
1589 * b) The reference's path component is appended to the buffer
1590 * string.
1591 */
Daniel Veillard8ddb5a72000-09-23 10:28:52 +00001592 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillardec303412000-03-24 13:41:54 +00001593 index = 0;
Daniel Veillard52402ce2000-08-22 23:36:12 +00001594 /*
1595 * Ensure the path includes a '/'
1596 */
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001597 if ((out == 0) && (bas->server != NULL))
Daniel Veillard52402ce2000-08-22 23:36:12 +00001598 res->path[out++] = '/';
Daniel Veillardec303412000-03-24 13:41:54 +00001599 while (ref->path[index] != 0) {
1600 res->path[out++] = ref->path[index++];
1601 }
1602 }
1603 res->path[out] = 0;
1604
1605 /*
1606 * Steps c) to h) are really path normalization steps
1607 */
1608 xmlNormalizeURIPath(res->path);
1609
1610step_7:
1611
1612 /*
1613 * 7) The resulting URI components, including any inherited from the
1614 * base URI, are recombined to give the absolute form of the URI
1615 * reference.
1616 */
1617 val = xmlSaveUri(res);
1618
1619done:
1620 if (ref != NULL)
1621 xmlFreeURI(ref);
Daniel Veillard39c7d712000-09-10 16:14:55 +00001622 if (bas != NULL)
Daniel Veillardec303412000-03-24 13:41:54 +00001623 xmlFreeURI(bas);
1624 if (res != NULL)
1625 xmlFreeURI(res);
1626 return(val);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001627}
1628
1629