blob: 79c78e2a1b78c639b32c13a5e4a8886c5b3444ea [file] [log] [blame]
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
8 * Daniel.Veillard@w3.org
9 */
10
11#ifdef WIN32
12#define INCLUDE_WINSOCK
13#include "win32config.h"
14#else
15#include "config.h"
16#endif
17
18#include <stdio.h>
19#include <string.h>
20
Daniel Veillard361d8452000-04-03 19:48:13 +000021#include <libxml/xmlmemory.h>
22#include <libxml/uri.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000023#include <libxml/xmlerror.h>
Daniel Veillard3dd82e72000-03-20 11:48:04 +000024
Daniel Veillard06047432000-04-24 11:33:38 +000025/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000026 * alpha = lowalpha | upalpha
27 */
28#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
29
30
Daniel Veillard06047432000-04-24 11:33:38 +000031/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000032 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
33 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
34 * "u" | "v" | "w" | "x" | "y" | "z"
35 */
36
37#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
38
Daniel Veillard06047432000-04-24 11:33:38 +000039/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000040 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
41 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
42 * "U" | "V" | "W" | "X" | "Y" | "Z"
43 */
44#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
45
Daniel Veillard06047432000-04-24 11:33:38 +000046/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000047 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
48 */
49
50#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
51
Daniel Veillard06047432000-04-24 11:33:38 +000052/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000053 * alphanum = alpha | digit
54 */
55
56#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
57
Daniel Veillard06047432000-04-24 11:33:38 +000058/*
59 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
Daniel Veillard3dd82e72000-03-20 11:48:04 +000060 * "a" | "b" | "c" | "d" | "e" | "f"
61 */
62
63#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
64 (((x) >= 'A') && ((x) <= 'F')))
65
Daniel Veillard06047432000-04-24 11:33:38 +000066/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000067 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
68 */
69
70#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
71 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
72 ((x) == '(') || ((x) == ')'))
73
74
Daniel Veillard06047432000-04-24 11:33:38 +000075/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000076 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
77 */
78
79#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
80 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
81 ((x) == '+') || ((x) == '$') || ((x) == ','))
82
Daniel Veillard06047432000-04-24 11:33:38 +000083/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000084 * unreserved = alphanum | mark
85 */
86
87#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
88
Daniel Veillard06047432000-04-24 11:33:38 +000089/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000090 * escaped = "%" hex hex
91 */
92
93#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
94 (IS_HEX((p)[2])))
95
Daniel Veillard06047432000-04-24 11:33:38 +000096/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000097 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
98 * "&" | "=" | "+" | "$" | ","
99 */
100#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
101 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
102 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
103 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
104
Daniel Veillard06047432000-04-24 11:33:38 +0000105/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000106 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
107 */
108#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
109 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
110 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
111 ((*(p) == ',')))
112
Daniel Veillard06047432000-04-24 11:33:38 +0000113/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000114 * rel_segment = 1*( unreserved | escaped |
115 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
116 */
117
118#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
119 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
120 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
121 ((*(p) == ',')))
122
Daniel Veillard06047432000-04-24 11:33:38 +0000123/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000124 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
125 */
126
127#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
128 ((x) == '+') || ((x) == '-') || ((x) == '.'))
129
Daniel Veillard06047432000-04-24 11:33:38 +0000130/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000131 * reg_name = 1*( unreserved | escaped | "$" | "," |
132 * ";" | ":" | "@" | "&" | "=" | "+" )
133 */
134
135#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
136 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
137 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
138 ((*(p) == '=')) || ((*(p) == '+')))
139
Daniel Veillard06047432000-04-24 11:33:38 +0000140/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000141 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
142 * "+" | "$" | "," )
143 */
144#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
145 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
146 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
147 ((*(p) == ',')))
148
Daniel Veillard06047432000-04-24 11:33:38 +0000149/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000150 * uric = reserved | unreserved | escaped
151 */
152
153#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
154 (IS_RESERVED(*(p))))
155
Daniel Veillard06047432000-04-24 11:33:38 +0000156/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000157 * Skip to next pointer char, handle escaped sequences
158 */
159
160#define NEXT(p) ((*p == '%')? p += 3 : p++)
161
Daniel Veillard06047432000-04-24 11:33:38 +0000162/*
Daniel Veillard361d8452000-04-03 19:48:13 +0000163 * Productions from the spec.
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000164 *
Daniel Veillard361d8452000-04-03 19:48:13 +0000165 * authority = server | reg_name
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000166 * reg_name = 1*( unreserved | escaped | "$" | "," |
167 * ";" | ":" | "@" | "&" | "=" | "+" )
Daniel Veillard361d8452000-04-03 19:48:13 +0000168 *
169 * path = [ abs_path | opaque_part ]
170 */
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000171
172/**
173 * xmlCreateURI:
174 *
175 * Simply creates an empty xmlURI
176 *
177 * Returns the new structure or NULL in case of error
178 */
179xmlURIPtr
180xmlCreateURI(void) {
181 xmlURIPtr ret;
182
183 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
184 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000185 xmlGenericError(xmlGenericErrorContext,
186 "xmlCreateURI: out of memory\n");
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000187 return(NULL);
188 }
189 memset(ret, 0, sizeof(xmlURI));
190 return(ret);
191}
192
193/**
Daniel Veillardec303412000-03-24 13:41:54 +0000194 * xmlSaveUri:
195 * @uri: pointer to an xmlURI
196 *
197 * Save the URI as an escaped string
198 *
199 * Returns a new string (to be deallocated by caller)
200 */
201xmlChar *
202xmlSaveUri(xmlURIPtr uri) {
203 xmlChar *ret = NULL;
204 const char *p;
205 int len;
206 int max;
207
208 if (uri == NULL) return(NULL);
209
210
211 max = 80;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000212 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000213 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000214 xmlGenericError(xmlGenericErrorContext,
215 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000216 return(NULL);
217 }
218 len = 0;
219
220 if (uri->scheme != NULL) {
221 p = uri->scheme;
222 while (*p != 0) {
223 if (len >= max) {
224 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000225 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000226 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000227 xmlGenericError(xmlGenericErrorContext,
228 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000229 return(NULL);
230 }
231 }
232 ret[len++] = *p++;
233 }
234 if (len >= max) {
235 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000236 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000237 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000240 return(NULL);
241 }
242 }
243 ret[len++] = ':';
244 }
245 if (uri->opaque != NULL) {
246 p = uri->opaque;
247 while (*p != 0) {
248 if (len + 3 >= max) {
249 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000250 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000251 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000252 xmlGenericError(xmlGenericErrorContext,
253 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000254 return(NULL);
255 }
256 }
257 if ((IS_UNRESERVED(*(p))) ||
258 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
259 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
260 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
261 ret[len++] = *p++;
262 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000263 int val = *(unsigned char *)p++;
264 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000265 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000266 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
267 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000268 }
269 }
270 if (len >= max) {
271 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000272 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000273 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000274 xmlGenericError(xmlGenericErrorContext,
275 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000276 return(NULL);
277 }
278 }
279 ret[len++] = 0;
280 } else {
Daniel Veillard361d8452000-04-03 19:48:13 +0000281 if (uri->server != NULL) {
282 if (len + 3 >= max) {
283 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000284 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000285 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000286 xmlGenericError(xmlGenericErrorContext,
287 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000288 return(NULL);
289 }
290 }
291 ret[len++] = '/';
292 ret[len++] = '/';
293 if (uri->user != NULL) {
294 p = uri->user;
295 while (*p != 0) {
296 if (len + 3 >= max) {
297 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000298 ret = (xmlChar *) xmlRealloc(ret,
299 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000300 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000301 xmlGenericError(xmlGenericErrorContext,
302 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000303 return(NULL);
304 }
305 }
306 if ((IS_UNRESERVED(*(p))) ||
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000307 ((*(p) == ';')) || ((*(p) == ':')) ||
308 ((*(p) == '&')) || ((*(p) == '=')) ||
309 ((*(p) == '+')) || ((*(p) == '$')) ||
Daniel Veillard361d8452000-04-03 19:48:13 +0000310 ((*(p) == ',')))
311 ret[len++] = *p++;
312 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000313 int val = *(unsigned char *)p++;
314 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillard361d8452000-04-03 19:48:13 +0000315 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000316 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
317 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillard361d8452000-04-03 19:48:13 +0000318 }
319 }
320 if (len + 3 >= max) {
321 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000322 ret = (xmlChar *) xmlRealloc(ret,
323 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000324 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000325 xmlGenericError(xmlGenericErrorContext,
326 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000327 return(NULL);
328 }
329 }
330 ret[len++] = '@';
331 }
332 p = uri->server;
333 while (*p != 0) {
334 if (len >= max) {
335 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000336 ret = (xmlChar *) xmlRealloc(ret,
337 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000338 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000339 xmlGenericError(xmlGenericErrorContext,
340 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000341 return(NULL);
342 }
343 }
344 ret[len++] = *p++;
345 }
346 if (uri->port > 0) {
347 if (len + 10 >= max) {
348 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000349 ret = (xmlChar *) xmlRealloc(ret,
350 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000351 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000352 xmlGenericError(xmlGenericErrorContext,
353 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000354 return(NULL);
355 }
356 }
357 len += sprintf((char *) &ret[len], ":%d", uri->port);
358 }
359 } else if (uri->authority != NULL) {
Daniel Veillardec303412000-03-24 13:41:54 +0000360 if (len + 3 >= max) {
361 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000362 ret = (xmlChar *) xmlRealloc(ret,
363 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000364 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000365 xmlGenericError(xmlGenericErrorContext,
366 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000367 return(NULL);
368 }
369 }
370 ret[len++] = '/';
371 ret[len++] = '/';
372 p = uri->authority;
373 while (*p != 0) {
374 if (len + 3 >= max) {
375 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000376 ret = (xmlChar *) xmlRealloc(ret,
377 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000378 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000379 xmlGenericError(xmlGenericErrorContext,
380 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000381 return(NULL);
382 }
383 }
384 if ((IS_UNRESERVED(*(p))) ||
385 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
386 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
387 ((*(p) == '=')) || ((*(p) == '+')))
388 ret[len++] = *p++;
389 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000390 int val = *(unsigned char *)p++;
391 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000392 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000393 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
394 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000395 }
396 }
Daniel Veillard740abf52000-10-02 23:04:54 +0000397 } else if (uri->scheme != NULL) {
398 if (len + 3 >= max) {
399 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000400 ret = (xmlChar *) xmlRealloc(ret,
401 (max + 1) * sizeof(xmlChar));
Daniel Veillard740abf52000-10-02 23:04:54 +0000402 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000403 xmlGenericError(xmlGenericErrorContext,
404 "xmlSaveUri: out of memory\n");
Daniel Veillard740abf52000-10-02 23:04:54 +0000405 return(NULL);
406 }
407 }
408 ret[len++] = '/';
409 ret[len++] = '/';
Daniel Veillardec303412000-03-24 13:41:54 +0000410 }
411 if (uri->path != NULL) {
412 p = uri->path;
413 while (*p != 0) {
414 if (len + 3 >= max) {
415 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000416 ret = (xmlChar *) xmlRealloc(ret,
417 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000418 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000419 xmlGenericError(xmlGenericErrorContext,
420 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000421 return(NULL);
422 }
423 }
424 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
425 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
426 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
427 ((*(p) == ',')))
428 ret[len++] = *p++;
429 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000430 int val = *(unsigned char *)p++;
431 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000432 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000433 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
434 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000435 }
436 }
437 }
438 if (uri->query != NULL) {
439 if (len + 3 >= max) {
440 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000441 ret = (xmlChar *) xmlRealloc(ret,
442 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000443 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000444 xmlGenericError(xmlGenericErrorContext,
445 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000446 return(NULL);
447 }
448 }
449 ret[len++] = '?';
450 p = uri->query;
451 while (*p != 0) {
452 if (len + 3 >= max) {
453 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000454 ret = (xmlChar *) xmlRealloc(ret,
455 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000456 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000457 xmlGenericError(xmlGenericErrorContext,
458 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000459 return(NULL);
460 }
461 }
462 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
463 ret[len++] = *p++;
464 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000465 int val = *(unsigned char *)p++;
466 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000467 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000468 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
469 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000470 }
471 }
472 }
473 if (uri->fragment != NULL) {
474 if (len + 3 >= max) {
475 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000476 ret = (xmlChar *) xmlRealloc(ret,
477 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000478 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000479 xmlGenericError(xmlGenericErrorContext,
480 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000481 return(NULL);
482 }
483 }
484 ret[len++] = '#';
485 p = uri->fragment;
486 while (*p != 0) {
487 if (len + 3 >= max) {
488 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000489 ret = (xmlChar *) xmlRealloc(ret,
490 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000491 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000492 xmlGenericError(xmlGenericErrorContext,
493 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000494 return(NULL);
495 }
496 }
497 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
498 ret[len++] = *p++;
499 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000500 int val = *(unsigned char *)p++;
501 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000502 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000503 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
504 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000505 }
506 }
507 }
508 if (len >= max) {
509 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000510 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000511 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000512 xmlGenericError(xmlGenericErrorContext,
513 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000514 return(NULL);
515 }
516 }
517 ret[len++] = 0;
518 }
519 return(ret);
520}
521
522/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000523 * xmlPrintURI:
524 * @stream: a FILE* for the output
525 * @uri: pointer to an xmlURI
526 *
527 * Prints the URI in the stream @steam.
528 */
529void
530xmlPrintURI(FILE *stream, xmlURIPtr uri) {
Daniel Veillardec303412000-03-24 13:41:54 +0000531 xmlChar *out;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000532
Daniel Veillardec303412000-03-24 13:41:54 +0000533 out = xmlSaveUri(uri);
534 if (out != NULL) {
535 fprintf(stream, "%s", out);
536 xmlFree(out);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000537 }
538}
539
540/**
541 * xmlCleanURI:
542 * @uri: pointer to an xmlURI
543 *
544 * Make sure the xmlURI struct is free of content
545 */
546void
547xmlCleanURI(xmlURIPtr uri) {
548 if (uri == NULL) return;
549
550 if (uri->scheme != NULL) xmlFree(uri->scheme);
551 uri->scheme = NULL;
552 if (uri->server != NULL) xmlFree(uri->server);
553 uri->server = NULL;
Daniel Veillard361d8452000-04-03 19:48:13 +0000554 if (uri->user != NULL) xmlFree(uri->user);
555 uri->user = NULL;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000556 if (uri->path != NULL) xmlFree(uri->path);
557 uri->path = NULL;
558 if (uri->fragment != NULL) xmlFree(uri->fragment);
559 uri->fragment = NULL;
560 if (uri->opaque != NULL) xmlFree(uri->opaque);
561 uri->opaque = NULL;
562 if (uri->authority != NULL) xmlFree(uri->authority);
563 uri->authority = NULL;
564 if (uri->query != NULL) xmlFree(uri->query);
565 uri->query = NULL;
566}
567
568/**
569 * xmlFreeURI:
570 * @uri: pointer to an xmlURI
571 *
572 * Free up the xmlURI struct
573 */
574void
575xmlFreeURI(xmlURIPtr uri) {
576 if (uri == NULL) return;
577
578 if (uri->scheme != NULL) xmlFree(uri->scheme);
579 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard361d8452000-04-03 19:48:13 +0000580 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000581 if (uri->path != NULL) xmlFree(uri->path);
582 if (uri->fragment != NULL) xmlFree(uri->fragment);
583 if (uri->opaque != NULL) xmlFree(uri->opaque);
584 if (uri->authority != NULL) xmlFree(uri->authority);
585 if (uri->query != NULL) xmlFree(uri->query);
586 memset(uri, -1, sizeof(xmlURI));
587 xmlFree(uri);
588}
589
590/**
Daniel Veillard361d8452000-04-03 19:48:13 +0000591 * xmlURIUnescapeString:
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000592 * @str: the string to unescape
593 * @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
594 * @target: optionnal destination buffer
595 *
596 * Unescaping routine, does not do validity checks !
Daniel Veillardec303412000-03-24 13:41:54 +0000597 * Output is direct unsigned char translation of %XX values (no encoding)
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000598 *
599 * Returns an copy of the string, but unescaped
600 */
601char *
Daniel Veillard361d8452000-04-03 19:48:13 +0000602xmlURIUnescapeString(const char *str, int len, char *target) {
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000603 char *ret, *out;
604 const char *in;
605
606 if (str == NULL)
607 return(NULL);
608 if (len <= 0) len = strlen(str);
609 if (len <= 0) return(NULL);
610
611 if (target == NULL) {
612 ret = (char *) xmlMalloc(len + 1);
613 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000614 xmlGenericError(xmlGenericErrorContext,
615 "xmlURIUnescapeString: out of memory\n");
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000616 return(NULL);
617 }
618 } else
619 ret = target;
620 in = str;
621 out = ret;
622 while(len > 0) {
623 if (*in == '%') {
624 in++;
625 if ((*in >= '0') && (*in <= '9'))
626 *out = (*in - '0');
627 else if ((*in >= 'a') && (*in <= 'f'))
628 *out = (*in - 'a') + 10;
629 else if ((*in >= 'A') && (*in <= 'F'))
630 *out = (*in - 'A') + 10;
631 in++;
632 if ((*in >= '0') && (*in <= '9'))
633 *out = *out * 16 + (*in - '0');
634 else if ((*in >= 'a') && (*in <= 'f'))
635 *out = *out * 16 + (*in - 'a') + 10;
636 else if ((*in >= 'A') && (*in <= 'F'))
637 *out = *out * 16 + (*in - 'A') + 10;
638 in++;
639 len -= 3;
Daniel Veillardec303412000-03-24 13:41:54 +0000640 out++;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000641 } else {
642 *out++ = *in++;
643 len--;
644 }
645 }
646 *out = 0;
647 return(ret);
648}
649
650
651/**
652 * xmlParseURIFragment:
653 * @uri: pointer to an URI structure
654 * @str: pointer to the string to analyze
655 *
656 * Parse an URI fragment string and fills in the appropriate fields
657 * of the @uri structure.
658 *
659 * fragment = *uric
660 *
661 * Returns 0 or the error code
662 */
663int
664xmlParseURIFragment(xmlURIPtr uri, const char **str) {
665 const char *cur = *str;
666
667 if (str == NULL) return(-1);
668
669 while (IS_URIC(cur)) NEXT(cur);
670 if (uri != NULL) {
671 if (uri->fragment != NULL) xmlFree(uri->fragment);
Daniel Veillard361d8452000-04-03 19:48:13 +0000672 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000673 }
674 *str = cur;
675 return(0);
676}
677
678/**
679 * xmlParseURIQuery:
680 * @uri: pointer to an URI structure
681 * @str: pointer to the string to analyze
682 *
683 * Parse the query part of an URI
684 *
685 * query = *uric
686 *
687 * Returns 0 or the error code
688 */
689int
690xmlParseURIQuery(xmlURIPtr uri, const char **str) {
691 const char *cur = *str;
692
693 if (str == NULL) return(-1);
694
695 while (IS_URIC(cur)) NEXT(cur);
696 if (uri != NULL) {
697 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillard361d8452000-04-03 19:48:13 +0000698 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000699 }
700 *str = cur;
701 return(0);
702}
703
704/**
705 * xmlParseURIScheme:
706 * @uri: pointer to an URI structure
707 * @str: pointer to the string to analyze
708 *
709 * Parse an URI scheme
710 *
711 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
712 *
713 * Returns 0 or the error code
714 */
715int
716xmlParseURIScheme(xmlURIPtr uri, const char **str) {
717 const char *cur;
718
719 if (str == NULL)
720 return(-1);
721
722 cur = *str;
723 if (!IS_ALPHA(*cur))
724 return(2);
725 cur++;
726 while (IS_SCHEME(*cur)) cur++;
727 if (uri != NULL) {
728 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000729 /* !!! strndup */
730 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000731 }
732 *str = cur;
733 return(0);
734}
735
736/**
737 * xmlParseURIOpaquePart:
738 * @uri: pointer to an URI structure
739 * @str: pointer to the string to analyze
740 *
741 * Parse an URI opaque part
742 *
743 * opaque_part = uric_no_slash *uric
744 *
745 * Returns 0 or the error code
746 */
747int
748xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
749 const char *cur;
750
751 if (str == NULL)
752 return(-1);
753
754 cur = *str;
755 if (!IS_URIC_NO_SLASH(cur)) {
756 return(3);
757 }
758 NEXT(cur);
759 while (IS_URIC(cur)) NEXT(cur);
760 if (uri != NULL) {
761 if (uri->opaque != NULL) xmlFree(uri->opaque);
Daniel Veillard361d8452000-04-03 19:48:13 +0000762 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000763 }
764 *str = cur;
765 return(0);
766}
767
768/**
Daniel Veillard361d8452000-04-03 19:48:13 +0000769 * xmlParseURIServer:
770 * @uri: pointer to an URI structure
771 * @str: pointer to the string to analyze
772 *
773 * Parse a server subpart of an URI, it's a finer grain analysis
774 * of the authority part.
775 *
776 * server = [ [ userinfo "@" ] hostport ]
777 * userinfo = *( unreserved | escaped |
778 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
779 * hostport = host [ ":" port ]
780 * host = hostname | IPv4address
781 * hostname = *( domainlabel "." ) toplabel [ "." ]
782 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
783 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
784 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
785 * port = *digit
786 *
787 * Returns 0 or the error code
788 */
789int
790xmlParseURIServer(xmlURIPtr uri, const char **str) {
791 const char *cur;
792 const char *host, *tmp;
793
794 if (str == NULL)
795 return(-1);
796
797 cur = *str;
798
799 /*
800 * is there an userinfo ?
801 */
802 while (IS_USERINFO(cur)) NEXT(cur);
803 if (*cur == '@') {
804 if (uri != NULL) {
805 if (uri->user != NULL) xmlFree(uri->user);
806 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
807 }
808 cur++;
809 } else {
810 if (uri != NULL) {
811 if (uri->user != NULL) xmlFree(uri->user);
812 uri->user = NULL;
813 }
814 cur = *str;
815 }
816 /*
Daniel Veillard740abf52000-10-02 23:04:54 +0000817 * This can be empty in the case where there is no server
818 */
819 host = cur;
820 if (*cur == '/') {
821 if (uri != NULL) {
822 if (uri->authority != NULL) xmlFree(uri->authority);
823 uri->authority = NULL;
824 if (uri->server != NULL) xmlFree(uri->server);
825 uri->server = NULL;
826 uri->port = 0;
827 }
828 return(0);
829 }
830 /*
Daniel Veillard361d8452000-04-03 19:48:13 +0000831 * host part of hostport can derive either an IPV4 address
832 * or an unresolved name. Check the IP first, it easier to detect
833 * errors if wrong one
834 */
Daniel Veillard361d8452000-04-03 19:48:13 +0000835 if (IS_DIGIT(*cur)) {
836 while(IS_DIGIT(*cur)) cur++;
837 if (*cur != '.')
838 goto host_name;
839 cur++;
840 if (!IS_DIGIT(*cur))
841 goto host_name;
842 while(IS_DIGIT(*cur)) cur++;
843 if (*cur != '.')
844 goto host_name;
845 cur++;
846 if (!IS_DIGIT(*cur))
847 goto host_name;
848 while(IS_DIGIT(*cur)) cur++;
849 if (*cur != '.')
850 goto host_name;
851 cur++;
852 if (!IS_DIGIT(*cur))
853 goto host_name;
854 while(IS_DIGIT(*cur)) cur++;
855 if (uri != NULL) {
856 if (uri->authority != NULL) xmlFree(uri->authority);
857 uri->authority = NULL;
858 if (uri->server != NULL) xmlFree(uri->server);
859 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
860 }
861 goto host_done;
862 }
863host_name:
864 /*
865 * the hostname production as-is is a parser nightmare.
866 * simplify it to
867 * hostname = *( domainlabel "." ) domainlabel [ "." ]
868 * and just make sure the last label starts with a non numeric char.
869 */
870 if (!IS_ALPHANUM(*cur))
871 return(6);
872 while (IS_ALPHANUM(*cur)) {
873 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
874 if (*cur == '.')
875 cur++;
876 }
877 tmp = cur;
878 tmp--;
879 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
880 tmp++;
881 if (!IS_ALPHA(*tmp))
882 return(7);
883 if (uri != NULL) {
884 if (uri->authority != NULL) xmlFree(uri->authority);
885 uri->authority = NULL;
886 if (uri->server != NULL) xmlFree(uri->server);
887 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
888 }
889
890host_done:
891
892 /*
893 * finish by checking for a port presence.
894 */
895 if (*cur == ':') {
896 cur++;
897 if (IS_DIGIT(*cur)) {
898 if (uri != NULL)
899 uri->port = 0;
900 while (IS_DIGIT(*cur)) {
901 if (uri != NULL)
902 uri->port = uri->port * 10 + (*cur - '0');
903 cur++;
904 }
905 }
906 }
907 *str = cur;
908 return(0);
909}
910
911/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000912 * xmlParseURIRelSegment:
913 * @uri: pointer to an URI structure
914 * @str: pointer to the string to analyze
915 *
916 * Parse an URI relative segment
917 *
918 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
919 * "+" | "$" | "," )
920 *
921 * Returns 0 or the error code
922 */
923int
924xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
925 const char *cur;
926
927 if (str == NULL)
928 return(-1);
929
930 cur = *str;
931 if (!IS_SEGMENT(cur)) {
932 return(3);
933 }
934 NEXT(cur);
935 while (IS_SEGMENT(cur)) NEXT(cur);
936 if (uri != NULL) {
937 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard361d8452000-04-03 19:48:13 +0000938 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000939 }
940 *str = cur;
941 return(0);
942}
943
944/**
945 * xmlParseURIPathSegments:
946 * @uri: pointer to an URI structure
947 * @str: pointer to the string to analyze
948 * @slash: should we add a leading slash
949 *
950 * Parse an URI set of path segments
951 *
952 * path_segments = segment *( "/" segment )
953 * segment = *pchar *( ";" param )
954 * param = *pchar
955 *
956 * Returns 0 or the error code
957 */
958int
959xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
960 const char *cur;
961
962 if (str == NULL)
963 return(-1);
964
965 cur = *str;
966
967 do {
968 while (IS_PCHAR(cur)) NEXT(cur);
969 if (*cur == ';') {
970 cur++;
971 while (IS_PCHAR(cur)) NEXT(cur);
972 }
973 if (*cur != '/') break;
974 cur++;
975 } while (1);
976 if (uri != NULL) {
977 int len, len2 = 0;
978 char *path;
979
980 /*
981 * Concat the set of path segments to the current path
982 */
983 len = cur - *str;
984 if (slash)
985 len++;
986
987 if (uri->path != NULL) {
988 len2 = strlen(uri->path);
989 len += len2;
990 }
991 path = (char *) xmlMalloc(len + 1);
992 if (path == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000993 xmlGenericError(xmlGenericErrorContext,
994 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000995 *str = cur;
996 return(-1);
997 }
998 if (uri->path != NULL)
999 memcpy(path, uri->path, len2);
1000 if (slash) {
1001 path[len2] = '/';
1002 len2++;
1003 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001004 path[len2] = 0;
1005 if (cur - *str > 0)
1006 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001007 if (uri->path != NULL)
1008 xmlFree(uri->path);
1009 uri->path = path;
1010 }
1011 *str = cur;
1012 return(0);
1013}
1014
1015/**
1016 * xmlParseURIAuthority:
1017 * @uri: pointer to an URI structure
1018 * @str: pointer to the string to analyze
1019 *
1020 * Parse the authority part of an URI.
1021 *
1022 * authority = server | reg_name
1023 * server = [ [ userinfo "@" ] hostport ]
1024 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1025 * "@" | "&" | "=" | "+" )
1026 *
1027 * Note : this is completely ambiguous since reg_name is allowed to
1028 * use the full set of chars in use by server:
1029 *
1030 * 3.2.1. Registry-based Naming Authority
1031 *
1032 * The structure of a registry-based naming authority is specific
1033 * to the URI scheme, but constrained to the allowed characters
1034 * for an authority component.
1035 *
1036 * Returns 0 or the error code
1037 */
1038int
1039xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1040 const char *cur;
Daniel Veillard361d8452000-04-03 19:48:13 +00001041 int ret;
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001042
1043 if (str == NULL)
1044 return(-1);
1045
1046 cur = *str;
Daniel Veillard361d8452000-04-03 19:48:13 +00001047
1048 /*
1049 * try first to parse it as a server string.
1050 */
1051 ret = xmlParseURIServer(uri, str);
1052 if (ret == 0)
1053 return(0);
1054
1055 /*
1056 * failed, fallback to reg_name
1057 */
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001058 if (!IS_REG_NAME(cur)) {
1059 return(5);
1060 }
1061 NEXT(cur);
1062 while (IS_REG_NAME(cur)) NEXT(cur);
1063 if (uri != NULL) {
Daniel Veillard361d8452000-04-03 19:48:13 +00001064 if (uri->server != NULL) xmlFree(uri->server);
1065 uri->server = NULL;
1066 if (uri->user != NULL) xmlFree(uri->user);
1067 uri->user = NULL;
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001068 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard361d8452000-04-03 19:48:13 +00001069 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001070 }
1071 *str = cur;
1072 return(0);
1073}
1074
1075/**
1076 * xmlParseURIHierPart:
1077 * @uri: pointer to an URI structure
1078 * @str: pointer to the string to analyze
1079 *
1080 * Parse an URI hirarchical part
1081 *
1082 * hier_part = ( net_path | abs_path ) [ "?" query ]
1083 * abs_path = "/" path_segments
1084 * net_path = "//" authority [ abs_path ]
1085 *
1086 * Returns 0 or the error code
1087 */
1088int
1089xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1090 int ret;
1091 const char *cur;
1092
1093 if (str == NULL)
1094 return(-1);
1095
1096 cur = *str;
1097
1098 if ((cur[0] == '/') && (cur[1] == '/')) {
1099 cur += 2;
1100 ret = xmlParseURIAuthority(uri, &cur);
1101 if (ret != 0)
1102 return(ret);
1103 if (cur[0] == '/') {
1104 cur++;
1105 ret = xmlParseURIPathSegments(uri, &cur, 1);
1106 }
1107 } else if (cur[0] == '/') {
1108 cur++;
1109 ret = xmlParseURIPathSegments(uri, &cur, 1);
1110 } else {
1111 return(4);
1112 }
1113 if (ret != 0)
1114 return(ret);
1115 if (*cur == '?') {
1116 cur++;
1117 ret = xmlParseURIQuery(uri, &cur);
1118 if (ret != 0)
1119 return(ret);
1120 }
1121 *str = cur;
1122 return(0);
1123}
1124
1125/**
1126 * xmlParseAbsoluteURI:
1127 * @uri: pointer to an URI structure
1128 * @str: pointer to the string to analyze
1129 *
1130 * Parse an URI reference string and fills in the appropriate fields
1131 * of the @uri structure
1132 *
1133 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1134 *
1135 * Returns 0 or the error code
1136 */
1137int
1138xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1139 int ret;
1140
1141 if (str == NULL)
1142 return(-1);
1143
1144 ret = xmlParseURIScheme(uri, str);
1145 if (ret != 0) return(ret);
1146 if (**str != ':')
1147 return(1);
1148 (*str)++;
1149 if (**str == '/')
1150 return(xmlParseURIHierPart(uri, str));
1151 return(xmlParseURIOpaquePart(uri, str));
1152}
1153
1154/**
1155 * xmlParseRelativeURI:
1156 * @uri: pointer to an URI structure
1157 * @str: pointer to the string to analyze
1158 *
1159 * Parse an relative URI string and fills in the appropriate fields
1160 * of the @uri structure
1161 *
1162 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1163 * abs_path = "/" path_segments
1164 * net_path = "//" authority [ abs_path ]
1165 * rel_path = rel_segment [ abs_path ]
1166 *
1167 * Returns 0 or the error code
1168 */
1169int
1170xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1171 int ret = 0;
1172 const char *cur;
1173
1174 if (str == NULL)
1175 return(-1);
1176
1177 cur = *str;
1178 if ((cur[0] == '/') && (cur[1] == '/')) {
1179 cur += 2;
1180 ret = xmlParseURIAuthority(uri, &cur);
1181 if (ret != 0)
1182 return(ret);
1183 if (cur[0] == '/') {
1184 cur++;
1185 ret = xmlParseURIPathSegments(uri, &cur, 1);
1186 }
1187 } else if (cur[0] == '/') {
1188 cur++;
1189 ret = xmlParseURIPathSegments(uri, &cur, 1);
Daniel Veillard98a79162000-09-04 11:15:39 +00001190 } else if (cur[0] != '#' && cur[0] != '?') {
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001191 ret = xmlParseURIRelSegment(uri, &cur);
1192 if (ret != 0)
1193 return(ret);
1194 if (cur[0] == '/') {
1195 cur++;
1196 ret = xmlParseURIPathSegments(uri, &cur, 1);
1197 }
1198 }
1199 if (ret != 0)
1200 return(ret);
1201 if (*cur == '?') {
1202 cur++;
1203 ret = xmlParseURIQuery(uri, &cur);
1204 if (ret != 0)
1205 return(ret);
1206 }
1207 *str = cur;
1208 return(ret);
1209}
1210
1211/**
1212 * xmlParseURIReference:
1213 * @uri: pointer to an URI structure
1214 * @str: the string to analyze
1215 *
1216 * Parse an URI reference string and fills in the appropriate fields
1217 * of the @uri structure
1218 *
1219 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1220 *
1221 * Returns 0 or the error code
1222 */
1223int
1224xmlParseURIReference(xmlURIPtr uri, const char *str) {
1225 int ret;
1226 const char *tmp = str;
1227
1228 if (str == NULL)
1229 return(-1);
1230 xmlCleanURI(uri);
1231
1232 /*
1233 * Try first to parse aboslute refs, then fallback to relative if
1234 * it fails.
1235 */
1236 ret = xmlParseAbsoluteURI(uri, &str);
1237 if (ret != 0) {
1238 xmlCleanURI(uri);
1239 str = tmp;
1240 ret = xmlParseRelativeURI(uri, &str);
1241 }
1242 if (ret != 0) {
1243 xmlCleanURI(uri);
1244 return(ret);
1245 }
1246
1247 if (*str == '#') {
1248 str++;
1249 ret = xmlParseURIFragment(uri, &str);
1250 if (ret != 0) return(ret);
1251 }
1252 if (*str != 0) {
1253 xmlCleanURI(uri);
1254 return(1);
1255 }
1256 return(0);
1257}
1258
1259/**
Daniel Veillard496a1cf2000-05-03 14:20:55 +00001260 * xmlParseURI:
1261 * @str: the URI string to analyze
1262 *
1263 * Parse an URI
1264 *
1265 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1266 *
1267 * Returns a newly build xmlURIPtr or NULL in case of error
1268 */
1269xmlURIPtr
1270xmlParseURI(const char *str) {
1271 xmlURIPtr uri;
1272 int ret;
1273
1274 if (str == NULL)
1275 return(NULL);
1276 uri = xmlCreateURI();
1277 if (uri != NULL) {
1278 ret = xmlParseURIReference(uri, str);
1279 if (ret) {
1280 xmlFreeURI(uri);
1281 return(NULL);
1282 }
1283 }
1284 return(uri);
1285}
1286
1287/**
Daniel Veillardec303412000-03-24 13:41:54 +00001288 * xmlNormalizeURIPath:
1289 * @path: pointer to the path string
1290 *
1291 * applies the 5 normalization steps to a path string
1292 * Normalization occurs directly on the string, no new allocation is done
1293 *
1294 * Returns 0 or an error code
1295 */
1296int
1297xmlNormalizeURIPath(char *path) {
1298 int cur, out;
1299
1300 if (path == NULL)
1301 return(-1);
1302 cur = 0;
1303 out = 0;
1304 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1305 if (path[cur] == 0)
1306 return(0);
1307
1308 /* we are positionned at the beginning of the first segment */
1309 cur++;
1310 out = cur;
1311
1312 /*
1313 * Analyze each segment in sequence.
1314 */
1315 while (path[cur] != 0) {
1316 /*
1317 * c) All occurrences of "./", where "." is a complete path segment,
1318 * are removed from the buffer string.
1319 */
1320 if ((path[cur] == '.') && (path[cur + 1] == '/')) {
1321 cur += 2;
1322 continue;
1323 }
1324
1325 /*
1326 * d) If the buffer string ends with "." as a complete path segment,
1327 * that "." is removed.
1328 */
1329 if ((path[cur] == '.') && (path[cur + 1] == 0)) {
1330 path[out] = 0;
1331 break;
1332 }
1333
1334 /* read the segment */
1335 while ((path[cur] != 0) && (path[cur] != '/')) {
1336 path[out++] = path[cur++];
1337 }
1338 path[out++] = path[cur];
1339 if (path[cur] != 0) {
1340 cur++;
1341 }
1342 }
1343
1344 cur = 0;
1345 out = 0;
1346 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
1347 if (path[cur] == 0)
1348 return(0);
1349 /* we are positionned at the beginning of the first segment */
1350 cur++;
1351 out = cur;
1352 /*
1353 * Analyze each segment in sequence.
1354 */
1355 while (path[cur] != 0) {
1356 /*
1357 * e) All occurrences of "<segment>/../", where <segment> is a
1358 * complete path segment not equal to "..", are removed from the
1359 * buffer string. Removal of these path segments is performed
1360 * iteratively, removing the leftmost matching pattern on each
1361 * iteration, until no matching pattern remains.
1362 */
1363 if ((cur > 1) && (out > 1) &&
1364 (path[cur] == '/') && (path[cur + 1] == '.') &&
1365 (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
1366 ((path[out] != '.') || (path[out - 1] != '.') ||
1367 (path[out - 2] != '/'))) {
1368 cur += 3;
1369 out --;
1370 while ((out > 0) && (path[out] != '/')) { out --; }
1371 path[out] = 0;
1372 continue;
1373 }
1374
1375 /*
1376 * f) If the buffer string ends with "<segment>/..", where <segment>
1377 * is a complete path segment not equal to "..", that
1378 * "<segment>/.." is removed.
1379 */
1380 if ((path[cur] == '/') && (path[cur + 1] == '.') &&
1381 (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
1382 ((path[out] != '.') || (path[out - 1] != '.') ||
1383 (path[out - 2] != '/'))) {
1384 cur += 4;
1385 out --;
1386 while ((out > 0) && (path[out - 1] != '/')) { out --; }
1387 path[out] = 0;
1388 continue;
1389 }
1390
1391 path[out++] = path[cur++]; /* / or 0 */
1392 }
1393 path[out] = 0;
1394
1395 /*
1396 * g) If the resulting buffer string still begins with one or more
1397 * complete path segments of "..", then the reference is
1398 * considered to be in error. Implementations may handle this
1399 * error by retaining these components in the resolved path (i.e.,
1400 * treating them as part of the final URI), by removing them from
1401 * the resolved path (i.e., discarding relative levels above the
1402 * root), or by avoiding traversal of the reference.
1403 *
1404 * We discard them from the final path.
1405 */
1406 cur = 0;
1407 while ((path[cur] == '/') && (path[cur + 1] == '.') &&
1408 (path[cur + 2] == '.'))
1409 cur += 3;
1410 if (cur != 0) {
1411 out = 0;
1412 while (path[cur] != 0) path[out++] = path[cur++];
1413 path[out] = 0;
1414 }
1415 return(0);
1416}
1417
1418/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001419 * xmlBuildURI:
1420 * @URI: the URI instance found in the document
1421 * @base: the base value
1422 *
1423 * Computes he final URI of the reference done by checking that
1424 * the given URI is valid, and building the final URI using the
1425 * base URI. This is processed according to section 5.2 of the
1426 * RFC 2396
1427 *
1428 * 5.2. Resolving Relative References to Absolute Form
1429 *
Daniel Veillardec303412000-03-24 13:41:54 +00001430 * Returns a new URI string (to be freed by the caller) or NULL in case
1431 * of error.
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001432 */
1433xmlChar *
1434xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
Daniel Veillardec303412000-03-24 13:41:54 +00001435 xmlChar *val = NULL;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001436 int ret, ret2, len, index, cur, out;
Daniel Veillardec303412000-03-24 13:41:54 +00001437 xmlURIPtr ref = NULL;
1438 xmlURIPtr bas = NULL;
1439 xmlURIPtr res = NULL;
1440
Daniel Veillardec303412000-03-24 13:41:54 +00001441 /*
1442 * 1) The URI reference is parsed into the potential four components and
1443 * fragment identifier, as described in Section 4.3.
Daniel Veillard90e11312000-09-05 10:42:32 +00001444 *
1445 * NOTE that a completely empty URI is treated by modern browsers
1446 * as a reference to "." rather than as a synonym for the current
1447 * URI. Should we do that here?
Daniel Veillardec303412000-03-24 13:41:54 +00001448 */
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001449 if (URI == NULL)
1450 ret = -1;
1451 else {
1452 ref = xmlCreateURI();
1453 if (ref == NULL)
Daniel Veillard98a79162000-09-04 11:15:39 +00001454 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001455 if (*URI)
1456 ret = xmlParseURIReference(ref, (const char *) URI);
1457 else
1458 ret = -1;
Daniel Veillard98a79162000-09-04 11:15:39 +00001459 }
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001460 if (base == NULL)
1461 ret2 = -1;
1462 else {
1463 bas = xmlCreateURI();
1464 if (bas == NULL)
1465 goto done;
1466 ret2 = xmlParseURIReference(bas, (const char *) base);
1467 }
1468 if ((ret != 0) && (ret2 != 0))
Daniel Veillardec303412000-03-24 13:41:54 +00001469 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001470 if (ret != 0) {
1471 /*
1472 * the base fragment must be ignored
1473 */
1474 if (bas->fragment != NULL) {
1475 xmlFree(bas->fragment);
1476 bas->fragment = NULL;
1477 }
1478 val = xmlSaveUri(bas);
Daniel Veillardec303412000-03-24 13:41:54 +00001479 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001480 }
1481 if (ret2 != 0) {
1482 val = xmlSaveUri(ref);
1483 goto done;
1484 }
1485
Daniel Veillardec303412000-03-24 13:41:54 +00001486
1487 /*
1488 * 2) If the path component is empty and the scheme, authority, and
1489 * query components are undefined, then it is a reference to the
Daniel Veillard90e11312000-09-05 10:42:32 +00001490 * current document and we are done. Otherwise, the reference URI's
1491 * query and fragment components are defined as found (or not found)
1492 * within the URI reference and not inherited from the base URI.
Daniel Veillard98a79162000-09-04 11:15:39 +00001493 *
Daniel Veillard90e11312000-09-05 10:42:32 +00001494 * NOTE that in modern browsers, the parsing differs from the above
1495 * in the following aspect: the query component is allowed to be
1496 * defined while still treating this as a reference to the current
1497 * document.
Daniel Veillardec303412000-03-24 13:41:54 +00001498 */
1499 res = xmlCreateURI();
1500 if (res == NULL)
1501 goto done;
1502 if ((ref->scheme == NULL) && (ref->path == NULL) &&
Daniel Veillard90e11312000-09-05 10:42:32 +00001503 ((ref->authority == NULL) && (ref->server == NULL))) {
1504 if (bas->scheme != NULL)
1505 res->scheme = xmlMemStrdup(bas->scheme);
1506 if (bas->authority != NULL)
1507 res->authority = xmlMemStrdup(bas->authority);
1508 else if (bas->server != NULL) {
1509 res->server = xmlMemStrdup(bas->server);
1510 if (bas->user != NULL)
1511 res->user = xmlMemStrdup(bas->user);
1512 res->port = bas->port;
1513 }
1514 if (bas->path != NULL)
1515 res->path = xmlMemStrdup(bas->path);
1516 if (ref->query != NULL)
1517 res->query = xmlMemStrdup(ref->query);
1518 else if (bas->query != NULL)
1519 res->query = xmlMemStrdup(bas->query);
1520 if (ref->fragment != NULL)
1521 res->fragment = xmlMemStrdup(ref->fragment);
1522 goto step_7;
Daniel Veillardec303412000-03-24 13:41:54 +00001523 }
Daniel Veillard98a79162000-09-04 11:15:39 +00001524
Daniel Veillard90e11312000-09-05 10:42:32 +00001525 if (ref->query != NULL)
1526 res->query = xmlMemStrdup(ref->query);
1527 if (ref->fragment != NULL)
1528 res->fragment = xmlMemStrdup(ref->fragment);
Daniel Veillardec303412000-03-24 13:41:54 +00001529
1530 /*
1531 * 3) If the scheme component is defined, indicating that the reference
1532 * starts with a scheme name, then the reference is interpreted as an
1533 * absolute URI and we are done. Otherwise, the reference URI's
1534 * scheme is inherited from the base URI's scheme component.
1535 */
1536 if (ref->scheme != NULL) {
1537 val = xmlSaveUri(ref);
1538 goto done;
1539 }
Daniel Veillardbe803962000-06-28 23:40:59 +00001540 if (bas->scheme != NULL)
1541 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillardec303412000-03-24 13:41:54 +00001542
1543 /*
1544 * 4) If the authority component is defined, then the reference is a
1545 * network-path and we skip to step 7. Otherwise, the reference
1546 * URI's authority is inherited from the base URI's authority
1547 * component, which will also be undefined if the URI scheme does not
1548 * use an authority component.
1549 */
Daniel Veillard361d8452000-04-03 19:48:13 +00001550 if ((ref->authority != NULL) || (ref->server != NULL)) {
1551 if (ref->authority != NULL)
1552 res->authority = xmlMemStrdup(ref->authority);
1553 else {
1554 res->server = xmlMemStrdup(ref->server);
1555 if (ref->user != NULL)
1556 res->user = xmlMemStrdup(ref->user);
1557 res->port = ref->port;
1558 }
Daniel Veillardec303412000-03-24 13:41:54 +00001559 if (ref->path != NULL)
1560 res->path = xmlMemStrdup(ref->path);
Daniel Veillardec303412000-03-24 13:41:54 +00001561 goto step_7;
1562 }
1563 if (bas->authority != NULL)
1564 res->authority = xmlMemStrdup(bas->authority);
Daniel Veillard361d8452000-04-03 19:48:13 +00001565 else if (bas->server != NULL) {
1566 res->server = xmlMemStrdup(bas->server);
1567 if (bas->user != NULL)
1568 res->user = xmlMemStrdup(bas->user);
1569 res->port = bas->port;
1570 }
Daniel Veillardec303412000-03-24 13:41:54 +00001571
1572 /*
1573 * 5) If the path component begins with a slash character ("/"), then
1574 * the reference is an absolute-path and we skip to step 7.
1575 */
Daniel Veillard90e11312000-09-05 10:42:32 +00001576 if ((ref->path != NULL) && (ref->path[0] == '/')) {
Daniel Veillardec303412000-03-24 13:41:54 +00001577 res->path = xmlMemStrdup(ref->path);
Daniel Veillardec303412000-03-24 13:41:54 +00001578 goto step_7;
1579 }
1580
1581
1582 /*
1583 * 6) If this step is reached, then we are resolving a relative-path
1584 * reference. The relative path needs to be merged with the base
1585 * URI's path. Although there are many ways to do this, we will
1586 * describe a simple method using a separate string buffer.
1587 *
1588 * Allocate a buffer large enough for the result string.
1589 */
1590 len = 2; /* extra / and 0 */
1591 if (ref->path != NULL)
1592 len += strlen(ref->path);
1593 if (bas->path != NULL)
1594 len += strlen(bas->path);
1595 res->path = (char *) xmlMalloc(len);
1596 if (res->path == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001597 xmlGenericError(xmlGenericErrorContext,
1598 "xmlBuildURI: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +00001599 goto done;
1600 }
1601 res->path[0] = 0;
1602
1603 /*
1604 * a) All but the last segment of the base URI's path component is
1605 * copied to the buffer. In other words, any characters after the
1606 * last (right-most) slash character, if any, are excluded.
1607 */
1608 cur = 0;
1609 out = 0;
1610 if (bas->path != NULL) {
1611 while (bas->path[cur] != 0) {
1612 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1613 cur++;
1614 if (bas->path[cur] == 0)
1615 break;
1616
1617 cur++;
1618 while (out < cur) {
1619 res->path[out] = bas->path[out];
1620 out++;
1621 }
1622 }
1623 }
1624 res->path[out] = 0;
1625
1626 /*
1627 * b) The reference's path component is appended to the buffer
1628 * string.
1629 */
Daniel Veillard8ddb5a72000-09-23 10:28:52 +00001630 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillardec303412000-03-24 13:41:54 +00001631 index = 0;
Daniel Veillard52402ce2000-08-22 23:36:12 +00001632 /*
1633 * Ensure the path includes a '/'
1634 */
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001635 if ((out == 0) && (bas->server != NULL))
Daniel Veillard52402ce2000-08-22 23:36:12 +00001636 res->path[out++] = '/';
Daniel Veillardec303412000-03-24 13:41:54 +00001637 while (ref->path[index] != 0) {
1638 res->path[out++] = ref->path[index++];
1639 }
1640 }
1641 res->path[out] = 0;
1642
1643 /*
1644 * Steps c) to h) are really path normalization steps
1645 */
1646 xmlNormalizeURIPath(res->path);
1647
1648step_7:
1649
1650 /*
1651 * 7) The resulting URI components, including any inherited from the
1652 * base URI, are recombined to give the absolute form of the URI
1653 * reference.
1654 */
1655 val = xmlSaveUri(res);
1656
1657done:
1658 if (ref != NULL)
1659 xmlFreeURI(ref);
Daniel Veillard39c7d712000-09-10 16:14:55 +00001660 if (bas != NULL)
Daniel Veillardec303412000-03-24 13:41:54 +00001661 xmlFreeURI(bas);
1662 if (res != NULL)
1663 xmlFreeURI(res);
1664 return(val);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001665}
1666
1667