blob: 31875f7fa3148c5e99b83d59c57df9632173f4f4 [file] [log] [blame]
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFC 2396
5 *
6 * See Copyright for the status of this software.
7 *
8 * Daniel.Veillard@w3.org
9 */
10
11#ifdef WIN32
12#define INCLUDE_WINSOCK
13#include "win32config.h"
14#else
15#include "config.h"
16#endif
17
18#include <stdio.h>
19#include <string.h>
20
Daniel Veillard361d8452000-04-03 19:48:13 +000021#include <libxml/xmlmemory.h>
22#include <libxml/uri.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000023#include <libxml/xmlerror.h>
Daniel Veillard3dd82e72000-03-20 11:48:04 +000024
Daniel Veillard9e8bfae2000-11-06 16:43:11 +000025/************************************************************************
26 * *
27 * Macros to differenciate various character type *
28 * directly extracted from RFC 2396 *
29 * *
30 ************************************************************************/
31
Daniel Veillard06047432000-04-24 11:33:38 +000032/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000033 * alpha = lowalpha | upalpha
34 */
35#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
36
37
Daniel Veillard06047432000-04-24 11:33:38 +000038/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000039 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
40 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
41 * "u" | "v" | "w" | "x" | "y" | "z"
42 */
43
44#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
45
Daniel Veillard06047432000-04-24 11:33:38 +000046/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000047 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
48 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
49 * "U" | "V" | "W" | "X" | "Y" | "Z"
50 */
51#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
52
Daniel Veillard06047432000-04-24 11:33:38 +000053/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000054 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
55 */
56
57#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
58
Daniel Veillard06047432000-04-24 11:33:38 +000059/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000060 * alphanum = alpha | digit
61 */
62
63#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
64
Daniel Veillard06047432000-04-24 11:33:38 +000065/*
66 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
Daniel Veillard3dd82e72000-03-20 11:48:04 +000067 * "a" | "b" | "c" | "d" | "e" | "f"
68 */
69
70#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
71 (((x) >= 'A') && ((x) <= 'F')))
72
Daniel Veillard06047432000-04-24 11:33:38 +000073/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000074 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
75 */
76
77#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
78 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
79 ((x) == '(') || ((x) == ')'))
80
81
Daniel Veillard06047432000-04-24 11:33:38 +000082/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000083 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
84 */
85
86#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
87 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
88 ((x) == '+') || ((x) == '$') || ((x) == ','))
89
Daniel Veillard06047432000-04-24 11:33:38 +000090/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000091 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
Daniel Veillard06047432000-04-24 11:33:38 +000096/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +000097 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
Daniel Veillard06047432000-04-24 11:33:38 +0000103/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
Daniel Veillard06047432000-04-24 11:33:38 +0000112/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
Daniel Veillard06047432000-04-24 11:33:38 +0000120/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
Daniel Veillard06047432000-04-24 11:33:38 +0000130/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
Daniel Veillard06047432000-04-24 11:33:38 +0000137/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
Daniel Veillard06047432000-04-24 11:33:38 +0000147/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
Daniel Veillard06047432000-04-24 11:33:38 +0000156/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
Daniel Veillard06047432000-04-24 11:33:38 +0000163/*
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000164 * Skip to next pointer char, handle escaped sequences
165 */
166
167#define NEXT(p) ((*p == '%')? p += 3 : p++)
168
Daniel Veillard06047432000-04-24 11:33:38 +0000169/*
Daniel Veillard361d8452000-04-03 19:48:13 +0000170 * Productions from the spec.
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000171 *
Daniel Veillard361d8452000-04-03 19:48:13 +0000172 * authority = server | reg_name
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000173 * reg_name = 1*( unreserved | escaped | "$" | "," |
174 * ";" | ":" | "@" | "&" | "=" | "+" )
Daniel Veillard361d8452000-04-03 19:48:13 +0000175 *
176 * path = [ abs_path | opaque_part ]
177 */
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000178
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000179/************************************************************************
180 * *
181 * Generic URI structure functions *
182 * *
183 ************************************************************************/
184
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000185/**
186 * xmlCreateURI:
187 *
188 * Simply creates an empty xmlURI
189 *
190 * Returns the new structure or NULL in case of error
191 */
192xmlURIPtr
193xmlCreateURI(void) {
194 xmlURIPtr ret;
195
196 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
197 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000198 xmlGenericError(xmlGenericErrorContext,
199 "xmlCreateURI: out of memory\n");
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000200 return(NULL);
201 }
202 memset(ret, 0, sizeof(xmlURI));
203 return(ret);
204}
205
206/**
Daniel Veillardec303412000-03-24 13:41:54 +0000207 * xmlSaveUri:
208 * @uri: pointer to an xmlURI
209 *
210 * Save the URI as an escaped string
211 *
212 * Returns a new string (to be deallocated by caller)
213 */
214xmlChar *
215xmlSaveUri(xmlURIPtr uri) {
216 xmlChar *ret = NULL;
217 const char *p;
218 int len;
219 int max;
220
221 if (uri == NULL) return(NULL);
222
223
224 max = 80;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000225 ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000226 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000227 xmlGenericError(xmlGenericErrorContext,
228 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000229 return(NULL);
230 }
231 len = 0;
232
233 if (uri->scheme != NULL) {
234 p = uri->scheme;
235 while (*p != 0) {
236 if (len >= max) {
237 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000238 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000239 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000240 xmlGenericError(xmlGenericErrorContext,
241 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000242 return(NULL);
243 }
244 }
245 ret[len++] = *p++;
246 }
247 if (len >= max) {
248 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000250 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000253 return(NULL);
254 }
255 }
256 ret[len++] = ':';
257 }
258 if (uri->opaque != NULL) {
259 p = uri->opaque;
260 while (*p != 0) {
261 if (len + 3 >= max) {
262 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000263 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000264 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000265 xmlGenericError(xmlGenericErrorContext,
266 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000267 return(NULL);
268 }
269 }
270 if ((IS_UNRESERVED(*(p))) ||
271 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||
272 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
273 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
274 ret[len++] = *p++;
275 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000276 int val = *(unsigned char *)p++;
277 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000278 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000279 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
280 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000281 }
282 }
283 if (len >= max) {
284 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000285 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000286 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000287 xmlGenericError(xmlGenericErrorContext,
288 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000289 return(NULL);
290 }
291 }
292 ret[len++] = 0;
293 } else {
Daniel Veillard361d8452000-04-03 19:48:13 +0000294 if (uri->server != NULL) {
295 if (len + 3 >= max) {
296 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000297 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000298 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000299 xmlGenericError(xmlGenericErrorContext,
300 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000301 return(NULL);
302 }
303 }
304 ret[len++] = '/';
305 ret[len++] = '/';
306 if (uri->user != NULL) {
307 p = uri->user;
308 while (*p != 0) {
309 if (len + 3 >= max) {
310 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000311 ret = (xmlChar *) xmlRealloc(ret,
312 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000313 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000314 xmlGenericError(xmlGenericErrorContext,
315 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000316 return(NULL);
317 }
318 }
319 if ((IS_UNRESERVED(*(p))) ||
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000320 ((*(p) == ';')) || ((*(p) == ':')) ||
321 ((*(p) == '&')) || ((*(p) == '=')) ||
322 ((*(p) == '+')) || ((*(p) == '$')) ||
Daniel Veillard361d8452000-04-03 19:48:13 +0000323 ((*(p) == ',')))
324 ret[len++] = *p++;
325 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000326 int val = *(unsigned char *)p++;
327 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillard361d8452000-04-03 19:48:13 +0000328 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000329 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
330 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillard361d8452000-04-03 19:48:13 +0000331 }
332 }
333 if (len + 3 >= max) {
334 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000335 ret = (xmlChar *) xmlRealloc(ret,
336 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000337 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000338 xmlGenericError(xmlGenericErrorContext,
339 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000340 return(NULL);
341 }
342 }
343 ret[len++] = '@';
344 }
345 p = uri->server;
346 while (*p != 0) {
347 if (len >= max) {
348 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000349 ret = (xmlChar *) xmlRealloc(ret,
350 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000351 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000352 xmlGenericError(xmlGenericErrorContext,
353 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000354 return(NULL);
355 }
356 }
357 ret[len++] = *p++;
358 }
359 if (uri->port > 0) {
360 if (len + 10 >= max) {
361 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000362 ret = (xmlChar *) xmlRealloc(ret,
363 (max + 1) * sizeof(xmlChar));
Daniel Veillard361d8452000-04-03 19:48:13 +0000364 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000365 xmlGenericError(xmlGenericErrorContext,
366 "xmlSaveUri: out of memory\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000367 return(NULL);
368 }
369 }
370 len += sprintf((char *) &ret[len], ":%d", uri->port);
371 }
372 } else if (uri->authority != NULL) {
Daniel Veillardec303412000-03-24 13:41:54 +0000373 if (len + 3 >= max) {
374 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000375 ret = (xmlChar *) xmlRealloc(ret,
376 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000377 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000378 xmlGenericError(xmlGenericErrorContext,
379 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000380 return(NULL);
381 }
382 }
383 ret[len++] = '/';
384 ret[len++] = '/';
385 p = uri->authority;
386 while (*p != 0) {
387 if (len + 3 >= max) {
388 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000389 ret = (xmlChar *) xmlRealloc(ret,
390 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000391 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000392 xmlGenericError(xmlGenericErrorContext,
393 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000394 return(NULL);
395 }
396 }
397 if ((IS_UNRESERVED(*(p))) ||
398 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
399 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
400 ((*(p) == '=')) || ((*(p) == '+')))
401 ret[len++] = *p++;
402 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000403 int val = *(unsigned char *)p++;
404 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000405 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000406 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
407 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000408 }
409 }
Daniel Veillard740abf52000-10-02 23:04:54 +0000410 } else if (uri->scheme != NULL) {
411 if (len + 3 >= max) {
412 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000413 ret = (xmlChar *) xmlRealloc(ret,
414 (max + 1) * sizeof(xmlChar));
Daniel Veillard740abf52000-10-02 23:04:54 +0000415 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000416 xmlGenericError(xmlGenericErrorContext,
417 "xmlSaveUri: out of memory\n");
Daniel Veillard740abf52000-10-02 23:04:54 +0000418 return(NULL);
419 }
420 }
421 ret[len++] = '/';
422 ret[len++] = '/';
Daniel Veillardec303412000-03-24 13:41:54 +0000423 }
424 if (uri->path != NULL) {
425 p = uri->path;
426 while (*p != 0) {
427 if (len + 3 >= max) {
428 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000429 ret = (xmlChar *) xmlRealloc(ret,
430 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000431 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000432 xmlGenericError(xmlGenericErrorContext,
433 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000434 return(NULL);
435 }
436 }
437 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
438 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
439 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
440 ((*(p) == ',')))
441 ret[len++] = *p++;
442 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000443 int val = *(unsigned char *)p++;
444 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000445 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000446 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
447 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000448 }
449 }
450 }
451 if (uri->query != NULL) {
452 if (len + 3 >= max) {
453 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000454 ret = (xmlChar *) xmlRealloc(ret,
455 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000456 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000457 xmlGenericError(xmlGenericErrorContext,
458 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000459 return(NULL);
460 }
461 }
462 ret[len++] = '?';
463 p = uri->query;
464 while (*p != 0) {
465 if (len + 3 >= max) {
466 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000467 ret = (xmlChar *) xmlRealloc(ret,
468 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000469 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000470 xmlGenericError(xmlGenericErrorContext,
471 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000472 return(NULL);
473 }
474 }
475 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
476 ret[len++] = *p++;
477 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000478 int val = *(unsigned char *)p++;
479 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000480 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000481 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
482 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000483 }
484 }
485 }
486 if (uri->fragment != NULL) {
487 if (len + 3 >= max) {
488 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000489 ret = (xmlChar *) xmlRealloc(ret,
490 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000491 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000492 xmlGenericError(xmlGenericErrorContext,
493 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000494 return(NULL);
495 }
496 }
497 ret[len++] = '#';
498 p = uri->fragment;
499 while (*p != 0) {
500 if (len + 3 >= max) {
501 max *= 2;
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000502 ret = (xmlChar *) xmlRealloc(ret,
503 (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000504 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000505 xmlGenericError(xmlGenericErrorContext,
506 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000507 return(NULL);
508 }
509 }
510 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
511 ret[len++] = *p++;
512 else {
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000513 int val = *(unsigned char *)p++;
514 int hi = val / 0x10, lo = val % 0x10;
Daniel Veillardec303412000-03-24 13:41:54 +0000515 ret[len++] = '%';
Daniel Veillard4fb87ee2000-09-19 12:25:59 +0000516 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
517 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
Daniel Veillardec303412000-03-24 13:41:54 +0000518 }
519 }
520 }
521 if (len >= max) {
522 max *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000523 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
Daniel Veillardec303412000-03-24 13:41:54 +0000524 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000525 xmlGenericError(xmlGenericErrorContext,
526 "xmlSaveUri: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +0000527 return(NULL);
528 }
529 }
530 ret[len++] = 0;
531 }
532 return(ret);
533}
534
535/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000536 * xmlPrintURI:
537 * @stream: a FILE* for the output
538 * @uri: pointer to an xmlURI
539 *
540 * Prints the URI in the stream @steam.
541 */
542void
543xmlPrintURI(FILE *stream, xmlURIPtr uri) {
Daniel Veillardec303412000-03-24 13:41:54 +0000544 xmlChar *out;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000545
Daniel Veillardec303412000-03-24 13:41:54 +0000546 out = xmlSaveUri(uri);
547 if (out != NULL) {
548 fprintf(stream, "%s", out);
549 xmlFree(out);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000550 }
551}
552
553/**
554 * xmlCleanURI:
555 * @uri: pointer to an xmlURI
556 *
557 * Make sure the xmlURI struct is free of content
558 */
559void
560xmlCleanURI(xmlURIPtr uri) {
561 if (uri == NULL) return;
562
563 if (uri->scheme != NULL) xmlFree(uri->scheme);
564 uri->scheme = NULL;
565 if (uri->server != NULL) xmlFree(uri->server);
566 uri->server = NULL;
Daniel Veillard361d8452000-04-03 19:48:13 +0000567 if (uri->user != NULL) xmlFree(uri->user);
568 uri->user = NULL;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000569 if (uri->path != NULL) xmlFree(uri->path);
570 uri->path = NULL;
571 if (uri->fragment != NULL) xmlFree(uri->fragment);
572 uri->fragment = NULL;
573 if (uri->opaque != NULL) xmlFree(uri->opaque);
574 uri->opaque = NULL;
575 if (uri->authority != NULL) xmlFree(uri->authority);
576 uri->authority = NULL;
577 if (uri->query != NULL) xmlFree(uri->query);
578 uri->query = NULL;
579}
580
581/**
582 * xmlFreeURI:
583 * @uri: pointer to an xmlURI
584 *
585 * Free up the xmlURI struct
586 */
587void
588xmlFreeURI(xmlURIPtr uri) {
589 if (uri == NULL) return;
590
591 if (uri->scheme != NULL) xmlFree(uri->scheme);
592 if (uri->server != NULL) xmlFree(uri->server);
Daniel Veillard361d8452000-04-03 19:48:13 +0000593 if (uri->user != NULL) xmlFree(uri->user);
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000594 if (uri->path != NULL) xmlFree(uri->path);
595 if (uri->fragment != NULL) xmlFree(uri->fragment);
596 if (uri->opaque != NULL) xmlFree(uri->opaque);
597 if (uri->authority != NULL) xmlFree(uri->authority);
598 if (uri->query != NULL) xmlFree(uri->query);
599 memset(uri, -1, sizeof(xmlURI));
600 xmlFree(uri);
601}
602
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000603/************************************************************************
604 * *
605 * Helper functions *
606 * *
607 ************************************************************************/
608
Daniel Veillardb6e7fdb2001-02-02 17:07:32 +0000609#if 0
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000610/**
611 * xmlNormalizeURIPath:
612 * @path: pointer to the path string
613 *
614 * applies the 5 normalization steps to a path string
615 * Normalization occurs directly on the string, no new allocation is done
616 *
617 * Returns 0 or an error code
618 */
619int
620xmlNormalizeURIPath(char *path) {
621 int cur, out;
622
623 if (path == NULL)
624 return(-1);
625 cur = 0;
626 out = 0;
627 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
628 if (path[cur] == 0)
629 return(0);
630
631 /* we are positionned at the beginning of the first segment */
632 cur++;
633 out = cur;
634
635 /*
636 * Analyze each segment in sequence.
637 */
638 while (path[cur] != 0) {
639 /*
640 * c) All occurrences of "./", where "." is a complete path segment,
641 * are removed from the buffer string.
642 */
643 if ((path[cur] == '.') && (path[cur + 1] == '/')) {
644 cur += 2;
Daniel Veillardea28ce62001-02-02 08:20:19 +0000645 if (path[cur] == 0) {
646 path[out++] = 0;
647 }
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000648 continue;
649 }
650
651 /*
652 * d) If the buffer string ends with "." as a complete path segment,
653 * that "." is removed.
654 */
655 if ((path[cur] == '.') && (path[cur + 1] == 0)) {
656 path[out] = 0;
657 break;
658 }
659
660 /* read the segment */
661 while ((path[cur] != 0) && (path[cur] != '/')) {
662 path[out++] = path[cur++];
663 }
664 path[out++] = path[cur];
665 if (path[cur] != 0) {
666 cur++;
667 }
668 }
669
670 cur = 0;
671 out = 0;
672 while ((path[cur] != 0) && (path[cur] != '/')) cur++;
673 if (path[cur] == 0)
674 return(0);
675 /* we are positionned at the beginning of the first segment */
676 cur++;
677 out = cur;
678 /*
679 * Analyze each segment in sequence.
680 */
681 while (path[cur] != 0) {
682 /*
683 * e) All occurrences of "<segment>/../", where <segment> is a
684 * complete path segment not equal to "..", are removed from the
685 * buffer string. Removal of these path segments is performed
686 * iteratively, removing the leftmost matching pattern on each
687 * iteration, until no matching pattern remains.
688 */
689 if ((cur > 1) && (out > 1) &&
690 (path[cur] == '/') && (path[cur + 1] == '.') &&
691 (path[cur + 2] == '.') && (path[cur + 3] == '/') &&
692 ((path[out] != '.') || (path[out - 1] != '.') ||
693 (path[out - 2] != '/'))) {
694 cur += 3;
695 out --;
696 while ((out > 0) && (path[out] != '/')) { out --; }
697 path[out] = 0;
698 continue;
699 }
700
701 /*
702 * f) If the buffer string ends with "<segment>/..", where <segment>
703 * is a complete path segment not equal to "..", that
704 * "<segment>/.." is removed.
705 */
706 if ((path[cur] == '/') && (path[cur + 1] == '.') &&
707 (path[cur + 2] == '.') && (path[cur + 3] == 0) &&
708 ((path[out] != '.') || (path[out - 1] != '.') ||
709 (path[out - 2] != '/'))) {
710 cur += 4;
711 out --;
712 while ((out > 0) && (path[out - 1] != '/')) { out --; }
713 path[out] = 0;
714 continue;
715 }
716
717 path[out++] = path[cur++]; /* / or 0 */
718 }
719 path[out] = 0;
720
721 /*
722 * g) If the resulting buffer string still begins with one or more
723 * complete path segments of "..", then the reference is
724 * considered to be in error. Implementations may handle this
725 * error by retaining these components in the resolved path (i.e.,
726 * treating them as part of the final URI), by removing them from
727 * the resolved path (i.e., discarding relative levels above the
728 * root), or by avoiding traversal of the reference.
729 *
730 * We discard them from the final path.
731 */
732 cur = 0;
733 while ((path[cur] == '/') && (path[cur + 1] == '.') &&
734 (path[cur + 2] == '.'))
735 cur += 3;
736 if (cur != 0) {
737 out = 0;
738 while (path[cur] != 0) path[out++] = path[cur++];
739 path[out] = 0;
740 }
741 return(0);
742}
Daniel Veillardb6e7fdb2001-02-02 17:07:32 +0000743#else
744/**
745 * xmlNormalizeURIPath:
746 * @path: pointer to the path string
747 *
748 * Applies the 5 normalization steps to a path string--that is, RFC 2396
749 * Section 5.2, steps 6.c through 6.g.
750 *
751 * Normalization occurs directly on the string, no new allocation is done
752 *
753 * Returns 0 or an error code
754 */
755int
756xmlNormalizeURIPath(char *path) {
757 char *cur, *out;
758
759 if (path == NULL)
760 return(-1);
761
762 /* Skip all initial "/" chars. We want to get to the beginning of the
763 * first non-empty segment.
764 */
765 cur = path;
766 while (cur[0] == '/')
767 ++cur;
768 if (cur[0] == '\0')
769 return(0);
770
771 /* Keep everything we've seen so far. */
772 out = cur;
773
774 /*
775 * Analyze each segment in sequence for cases (c) and (d).
776 */
777 while (cur[0] != '\0') {
778 /*
779 * c) All occurrences of "./", where "." is a complete path segment,
780 * are removed from the buffer string.
781 */
782 if ((cur[0] == '.') && (cur[1] == '/')) {
783 cur += 2;
784 continue;
785 }
786
787 /*
788 * d) If the buffer string ends with "." as a complete path segment,
789 * that "." is removed.
790 */
791 if ((cur[0] == '.') && (cur[1] == '\0'))
792 break;
793
794 /* Otherwise keep the segment. */
795 while (cur[0] != '/') {
796 if (cur[0] == '\0')
797 goto done_cd;
798 (out++)[0] = (cur++)[0];
799 }
800 (out++)[0] = (cur++)[0];
801 }
802 done_cd:
803 out[0] = '\0';
804
805 /* Reset to the beginning of the first segment for the next sequence. */
806 cur = path;
807 while (cur[0] == '/')
808 ++cur;
809 if (cur[0] == '\0')
810 return(0);
811
812 /*
813 * Analyze each segment in sequence for cases (e) and (f).
814 *
815 * e) All occurrences of "<segment>/../", where <segment> is a
816 * complete path segment not equal to "..", are removed from the
817 * buffer string. Removal of these path segments is performed
818 * iteratively, removing the leftmost matching pattern on each
819 * iteration, until no matching pattern remains.
820 *
821 * f) If the buffer string ends with "<segment>/..", where <segment>
822 * is a complete path segment not equal to "..", that
823 * "<segment>/.." is removed.
824 *
825 * To satisfy the "iterative" clause in (e), we need to collapse the
826 * string every time we find something that needs to be removed. Thus,
827 * we don't need to keep two pointers into the string: we only need a
828 * "current position" pointer.
829 */
830 while (1) {
831 char *segp;
832
833 /* At the beginning of each iteration of this loop, "cur" points to
834 * the first character of the segment we want to examine.
835 */
836
837 /* Find the end of the current segment. */
838 segp = cur;
839 while ((segp[0] != '/') && (segp[0] != '\0'))
840 ++segp;
841
842 /* If this is the last segment, we're done (we need at least two
843 * segments to meet the criteria for the (e) and (f) cases).
844 */
845 if (segp[0] == '\0')
846 break;
847
848 /* If the first segment is "..", or if the next segment _isn't_ "..",
849 * keep this segment and try the next one.
850 */
851 ++segp;
852 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
853 || ((segp[0] != '.') || (segp[1] != '.')
854 || ((segp[2] != '/') && (segp[2] != '\0')))) {
855 cur = segp;
856 continue;
857 }
858
859 /* If we get here, remove this segment and the next one and back up
860 * to the previous segment (if there is one), to implement the
861 * "iteratively" clause. It's pretty much impossible to back up
862 * while maintaining two pointers into the buffer, so just compact
863 * the whole buffer now.
864 */
865
866 /* If this is the end of the buffer, we're done. */
867 if (segp[2] == '\0') {
868 cur[0] = '\0';
869 break;
870 }
871 strcpy(cur, segp + 3);
872
873 /* If there are no previous segments, then keep going from here. */
874 segp = cur;
875 while ((segp > path) && ((--segp)[0] == '/'))
876 ;
877 if (segp == path)
878 continue;
879
880 /* "segp" is pointing to the end of a previous segment; find it's
881 * start. We need to back up to the previous segment and start
882 * over with that to handle things like "foo/bar/../..". If we
883 * don't do this, then on the first pass we'll remove the "bar/..",
884 * but be pointing at the second ".." so we won't realize we can also
885 * remove the "foo/..".
886 */
887 cur = segp;
888 while ((cur > path) && (cur[-1] != '/'))
889 --cur;
890 }
891 out[0] = '\0';
892
893 /*
894 * g) If the resulting buffer string still begins with one or more
895 * complete path segments of "..", then the reference is
896 * considered to be in error. Implementations may handle this
897 * error by retaining these components in the resolved path (i.e.,
898 * treating them as part of the final URI), by removing them from
899 * the resolved path (i.e., discarding relative levels above the
900 * root), or by avoiding traversal of the reference.
901 *
902 * We discard them from the final path.
903 */
904 if (path[0] == '/') {
905 cur = path;
906 while ((cur[1] == '.') && (cur[2] == '.')
907 && ((cur[3] == '/') || (cur[3] == '\0')))
908 cur += 3;
909
910 if (cur != path) {
911 out = path;
912 while (cur[0] != '\0')
913 (out++)[0] = (cur++)[0];
914 out[0] = 0;
915 }
916 }
917
918 return(0);
919}
920#endif
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000921
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000922/**
Daniel Veillard361d8452000-04-03 19:48:13 +0000923 * xmlURIUnescapeString:
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000924 * @str: the string to unescape
925 * @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
926 * @target: optionnal destination buffer
927 *
928 * Unescaping routine, does not do validity checks !
Daniel Veillardec303412000-03-24 13:41:54 +0000929 * Output is direct unsigned char translation of %XX values (no encoding)
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000930 *
931 * Returns an copy of the string, but unescaped
932 */
933char *
Daniel Veillard361d8452000-04-03 19:48:13 +0000934xmlURIUnescapeString(const char *str, int len, char *target) {
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000935 char *ret, *out;
936 const char *in;
937
938 if (str == NULL)
939 return(NULL);
940 if (len <= 0) len = strlen(str);
941 if (len <= 0) return(NULL);
942
943 if (target == NULL) {
944 ret = (char *) xmlMalloc(len + 1);
945 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000946 xmlGenericError(xmlGenericErrorContext,
947 "xmlURIUnescapeString: out of memory\n");
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000948 return(NULL);
949 }
950 } else
951 ret = target;
952 in = str;
953 out = ret;
954 while(len > 0) {
955 if (*in == '%') {
956 in++;
957 if ((*in >= '0') && (*in <= '9'))
958 *out = (*in - '0');
959 else if ((*in >= 'a') && (*in <= 'f'))
960 *out = (*in - 'a') + 10;
961 else if ((*in >= 'A') && (*in <= 'F'))
962 *out = (*in - 'A') + 10;
963 in++;
964 if ((*in >= '0') && (*in <= '9'))
965 *out = *out * 16 + (*in - '0');
966 else if ((*in >= 'a') && (*in <= 'f'))
967 *out = *out * 16 + (*in - 'a') + 10;
968 else if ((*in >= 'A') && (*in <= 'F'))
969 *out = *out * 16 + (*in - 'A') + 10;
970 in++;
971 len -= 3;
Daniel Veillardec303412000-03-24 13:41:54 +0000972 out++;
Daniel Veillard3dd82e72000-03-20 11:48:04 +0000973 } else {
974 *out++ = *in++;
975 len--;
976 }
977 }
978 *out = 0;
979 return(ret);
980}
981
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000982/**
983 * xmlURIEscape:
984 * @str: the string of the URI to escape
985 *
986 * Escaping routine, does not do validity checks !
987 * It will try to escape the chars needing this, but this is heuristic
988 * based it's impossible to be sure.
989 *
990 * Returns an copy of the string, but escaped
991 */
992xmlChar *
993xmlURIEscape(const xmlChar *str) {
994 xmlChar *ret;
995 const xmlChar *in;
996 unsigned int len, out;
997
998 if (str == NULL)
999 return(NULL);
1000 len = xmlStrlen(str);
1001 if (len <= 0) return(NULL);
1002
1003 len += 20;
1004 ret = (xmlChar *) xmlMalloc(len);
1005 if (ret == NULL) {
1006 xmlGenericError(xmlGenericErrorContext,
1007 "xmlURIEscape: out of memory\n");
1008 return(NULL);
1009 }
1010 in = (const xmlChar *) str;
1011 out = 0;
1012 while(*in != 0) {
1013 if (len - out <= 3) {
1014 len += 20;
1015 ret = (xmlChar *) xmlRealloc(ret, len);
1016 if (ret == NULL) {
1017 xmlGenericError(xmlGenericErrorContext,
1018 "xmlURIEscape: out of memory\n");
1019 return(NULL);
1020 }
1021 }
1022 if ((!IS_UNRESERVED(*in)) && (*in != ':') && (*in != '/') &&
1023 (*in != '?') && (*in != '#')) {
1024 unsigned char val;
1025 ret[out++] = '%';
1026 val = *in >> 4;
1027 if (val <= 9)
1028 ret[out++] = '0' + val;
1029 else
1030 ret[out++] = 'A' + val - 0xA;
1031 val = *in & 0xF;
1032 if (val <= 9)
1033 ret[out++] = '0' + val;
1034 else
1035 ret[out++] = 'A' + val - 0xA;
1036 in++;
1037 } else {
1038 ret[out++] = *in++;
1039 }
1040 }
1041 ret[out] = 0;
1042 return(ret);
1043}
1044
1045/************************************************************************
1046 * *
1047 * Escaped URI parsing *
1048 * *
1049 ************************************************************************/
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001050
1051/**
1052 * xmlParseURIFragment:
1053 * @uri: pointer to an URI structure
1054 * @str: pointer to the string to analyze
1055 *
1056 * Parse an URI fragment string and fills in the appropriate fields
1057 * of the @uri structure.
1058 *
1059 * fragment = *uric
1060 *
1061 * Returns 0 or the error code
1062 */
1063int
1064xmlParseURIFragment(xmlURIPtr uri, const char **str) {
1065 const char *cur = *str;
1066
1067 if (str == NULL) return(-1);
1068
1069 while (IS_URIC(cur)) NEXT(cur);
1070 if (uri != NULL) {
1071 if (uri->fragment != NULL) xmlFree(uri->fragment);
Daniel Veillard361d8452000-04-03 19:48:13 +00001072 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001073 }
1074 *str = cur;
1075 return(0);
1076}
1077
1078/**
1079 * xmlParseURIQuery:
1080 * @uri: pointer to an URI structure
1081 * @str: pointer to the string to analyze
1082 *
1083 * Parse the query part of an URI
1084 *
1085 * query = *uric
1086 *
1087 * Returns 0 or the error code
1088 */
1089int
1090xmlParseURIQuery(xmlURIPtr uri, const char **str) {
1091 const char *cur = *str;
1092
1093 if (str == NULL) return(-1);
1094
1095 while (IS_URIC(cur)) NEXT(cur);
1096 if (uri != NULL) {
1097 if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillard361d8452000-04-03 19:48:13 +00001098 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001099 }
1100 *str = cur;
1101 return(0);
1102}
1103
1104/**
1105 * xmlParseURIScheme:
1106 * @uri: pointer to an URI structure
1107 * @str: pointer to the string to analyze
1108 *
1109 * Parse an URI scheme
1110 *
1111 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1112 *
1113 * Returns 0 or the error code
1114 */
1115int
1116xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1117 const char *cur;
1118
1119 if (str == NULL)
1120 return(-1);
1121
1122 cur = *str;
1123 if (!IS_ALPHA(*cur))
1124 return(2);
1125 cur++;
1126 while (IS_SCHEME(*cur)) cur++;
1127 if (uri != NULL) {
1128 if (uri->scheme != NULL) xmlFree(uri->scheme);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001129 /* !!! strndup */
1130 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001131 }
1132 *str = cur;
1133 return(0);
1134}
1135
1136/**
1137 * xmlParseURIOpaquePart:
1138 * @uri: pointer to an URI structure
1139 * @str: pointer to the string to analyze
1140 *
1141 * Parse an URI opaque part
1142 *
1143 * opaque_part = uric_no_slash *uric
1144 *
1145 * Returns 0 or the error code
1146 */
1147int
1148xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
1149 const char *cur;
1150
1151 if (str == NULL)
1152 return(-1);
1153
1154 cur = *str;
1155 if (!IS_URIC_NO_SLASH(cur)) {
1156 return(3);
1157 }
1158 NEXT(cur);
1159 while (IS_URIC(cur)) NEXT(cur);
1160 if (uri != NULL) {
1161 if (uri->opaque != NULL) xmlFree(uri->opaque);
Daniel Veillard361d8452000-04-03 19:48:13 +00001162 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001163 }
1164 *str = cur;
1165 return(0);
1166}
1167
1168/**
Daniel Veillard361d8452000-04-03 19:48:13 +00001169 * xmlParseURIServer:
1170 * @uri: pointer to an URI structure
1171 * @str: pointer to the string to analyze
1172 *
1173 * Parse a server subpart of an URI, it's a finer grain analysis
1174 * of the authority part.
1175 *
1176 * server = [ [ userinfo "@" ] hostport ]
1177 * userinfo = *( unreserved | escaped |
1178 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1179 * hostport = host [ ":" port ]
1180 * host = hostname | IPv4address
1181 * hostname = *( domainlabel "." ) toplabel [ "." ]
1182 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1183 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1184 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1185 * port = *digit
1186 *
1187 * Returns 0 or the error code
1188 */
1189int
1190xmlParseURIServer(xmlURIPtr uri, const char **str) {
1191 const char *cur;
1192 const char *host, *tmp;
1193
1194 if (str == NULL)
1195 return(-1);
1196
1197 cur = *str;
1198
1199 /*
1200 * is there an userinfo ?
1201 */
1202 while (IS_USERINFO(cur)) NEXT(cur);
1203 if (*cur == '@') {
1204 if (uri != NULL) {
1205 if (uri->user != NULL) xmlFree(uri->user);
1206 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1207 }
1208 cur++;
1209 } else {
1210 if (uri != NULL) {
1211 if (uri->user != NULL) xmlFree(uri->user);
1212 uri->user = NULL;
1213 }
1214 cur = *str;
1215 }
1216 /*
Daniel Veillard740abf52000-10-02 23:04:54 +00001217 * This can be empty in the case where there is no server
1218 */
1219 host = cur;
1220 if (*cur == '/') {
1221 if (uri != NULL) {
1222 if (uri->authority != NULL) xmlFree(uri->authority);
1223 uri->authority = NULL;
1224 if (uri->server != NULL) xmlFree(uri->server);
1225 uri->server = NULL;
1226 uri->port = 0;
1227 }
1228 return(0);
1229 }
1230 /*
Daniel Veillard361d8452000-04-03 19:48:13 +00001231 * host part of hostport can derive either an IPV4 address
1232 * or an unresolved name. Check the IP first, it easier to detect
1233 * errors if wrong one
1234 */
Daniel Veillard361d8452000-04-03 19:48:13 +00001235 if (IS_DIGIT(*cur)) {
1236 while(IS_DIGIT(*cur)) cur++;
1237 if (*cur != '.')
1238 goto host_name;
1239 cur++;
1240 if (!IS_DIGIT(*cur))
1241 goto host_name;
1242 while(IS_DIGIT(*cur)) cur++;
1243 if (*cur != '.')
1244 goto host_name;
1245 cur++;
1246 if (!IS_DIGIT(*cur))
1247 goto host_name;
1248 while(IS_DIGIT(*cur)) cur++;
1249 if (*cur != '.')
1250 goto host_name;
1251 cur++;
1252 if (!IS_DIGIT(*cur))
1253 goto host_name;
1254 while(IS_DIGIT(*cur)) cur++;
1255 if (uri != NULL) {
1256 if (uri->authority != NULL) xmlFree(uri->authority);
1257 uri->authority = NULL;
1258 if (uri->server != NULL) xmlFree(uri->server);
1259 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1260 }
1261 goto host_done;
1262 }
1263host_name:
1264 /*
1265 * the hostname production as-is is a parser nightmare.
1266 * simplify it to
1267 * hostname = *( domainlabel "." ) domainlabel [ "." ]
1268 * and just make sure the last label starts with a non numeric char.
1269 */
1270 if (!IS_ALPHANUM(*cur))
1271 return(6);
1272 while (IS_ALPHANUM(*cur)) {
1273 while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++;
1274 if (*cur == '.')
1275 cur++;
1276 }
1277 tmp = cur;
1278 tmp--;
1279 while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--;
1280 tmp++;
1281 if (!IS_ALPHA(*tmp))
1282 return(7);
1283 if (uri != NULL) {
1284 if (uri->authority != NULL) xmlFree(uri->authority);
1285 uri->authority = NULL;
1286 if (uri->server != NULL) xmlFree(uri->server);
1287 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1288 }
1289
1290host_done:
1291
1292 /*
1293 * finish by checking for a port presence.
1294 */
1295 if (*cur == ':') {
1296 cur++;
1297 if (IS_DIGIT(*cur)) {
1298 if (uri != NULL)
1299 uri->port = 0;
1300 while (IS_DIGIT(*cur)) {
1301 if (uri != NULL)
1302 uri->port = uri->port * 10 + (*cur - '0');
1303 cur++;
1304 }
1305 }
1306 }
1307 *str = cur;
1308 return(0);
1309}
1310
1311/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001312 * xmlParseURIRelSegment:
1313 * @uri: pointer to an URI structure
1314 * @str: pointer to the string to analyze
1315 *
1316 * Parse an URI relative segment
1317 *
1318 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1319 * "+" | "$" | "," )
1320 *
1321 * Returns 0 or the error code
1322 */
1323int
1324xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
1325 const char *cur;
1326
1327 if (str == NULL)
1328 return(-1);
1329
1330 cur = *str;
1331 if (!IS_SEGMENT(cur)) {
1332 return(3);
1333 }
1334 NEXT(cur);
1335 while (IS_SEGMENT(cur)) NEXT(cur);
1336 if (uri != NULL) {
1337 if (uri->path != NULL) xmlFree(uri->path);
Daniel Veillard361d8452000-04-03 19:48:13 +00001338 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001339 }
1340 *str = cur;
1341 return(0);
1342}
1343
1344/**
1345 * xmlParseURIPathSegments:
1346 * @uri: pointer to an URI structure
1347 * @str: pointer to the string to analyze
1348 * @slash: should we add a leading slash
1349 *
1350 * Parse an URI set of path segments
1351 *
1352 * path_segments = segment *( "/" segment )
1353 * segment = *pchar *( ";" param )
1354 * param = *pchar
1355 *
1356 * Returns 0 or the error code
1357 */
1358int
1359xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
1360 const char *cur;
1361
1362 if (str == NULL)
1363 return(-1);
1364
1365 cur = *str;
1366
1367 do {
1368 while (IS_PCHAR(cur)) NEXT(cur);
1369 if (*cur == ';') {
1370 cur++;
1371 while (IS_PCHAR(cur)) NEXT(cur);
1372 }
1373 if (*cur != '/') break;
1374 cur++;
1375 } while (1);
1376 if (uri != NULL) {
1377 int len, len2 = 0;
1378 char *path;
1379
1380 /*
1381 * Concat the set of path segments to the current path
1382 */
1383 len = cur - *str;
1384 if (slash)
1385 len++;
1386
1387 if (uri->path != NULL) {
1388 len2 = strlen(uri->path);
1389 len += len2;
1390 }
1391 path = (char *) xmlMalloc(len + 1);
1392 if (path == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001393 xmlGenericError(xmlGenericErrorContext,
1394 "xmlParseURIPathSegments: out of memory\n");
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001395 *str = cur;
1396 return(-1);
1397 }
1398 if (uri->path != NULL)
1399 memcpy(path, uri->path, len2);
1400 if (slash) {
1401 path[len2] = '/';
1402 len2++;
1403 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001404 path[len2] = 0;
1405 if (cur - *str > 0)
1406 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001407 if (uri->path != NULL)
1408 xmlFree(uri->path);
1409 uri->path = path;
1410 }
1411 *str = cur;
1412 return(0);
1413}
1414
1415/**
1416 * xmlParseURIAuthority:
1417 * @uri: pointer to an URI structure
1418 * @str: pointer to the string to analyze
1419 *
1420 * Parse the authority part of an URI.
1421 *
1422 * authority = server | reg_name
1423 * server = [ [ userinfo "@" ] hostport ]
1424 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1425 * "@" | "&" | "=" | "+" )
1426 *
1427 * Note : this is completely ambiguous since reg_name is allowed to
1428 * use the full set of chars in use by server:
1429 *
1430 * 3.2.1. Registry-based Naming Authority
1431 *
1432 * The structure of a registry-based naming authority is specific
1433 * to the URI scheme, but constrained to the allowed characters
1434 * for an authority component.
1435 *
1436 * Returns 0 or the error code
1437 */
1438int
1439xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1440 const char *cur;
Daniel Veillard361d8452000-04-03 19:48:13 +00001441 int ret;
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001442
1443 if (str == NULL)
1444 return(-1);
1445
1446 cur = *str;
Daniel Veillard361d8452000-04-03 19:48:13 +00001447
1448 /*
1449 * try first to parse it as a server string.
1450 */
1451 ret = xmlParseURIServer(uri, str);
1452 if (ret == 0)
1453 return(0);
1454
1455 /*
1456 * failed, fallback to reg_name
1457 */
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001458 if (!IS_REG_NAME(cur)) {
1459 return(5);
1460 }
1461 NEXT(cur);
1462 while (IS_REG_NAME(cur)) NEXT(cur);
1463 if (uri != NULL) {
Daniel Veillard361d8452000-04-03 19:48:13 +00001464 if (uri->server != NULL) xmlFree(uri->server);
1465 uri->server = NULL;
1466 if (uri->user != NULL) xmlFree(uri->user);
1467 uri->user = NULL;
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001468 if (uri->authority != NULL) xmlFree(uri->authority);
Daniel Veillard361d8452000-04-03 19:48:13 +00001469 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001470 }
1471 *str = cur;
1472 return(0);
1473}
1474
1475/**
1476 * xmlParseURIHierPart:
1477 * @uri: pointer to an URI structure
1478 * @str: pointer to the string to analyze
1479 *
1480 * Parse an URI hirarchical part
1481 *
1482 * hier_part = ( net_path | abs_path ) [ "?" query ]
1483 * abs_path = "/" path_segments
1484 * net_path = "//" authority [ abs_path ]
1485 *
1486 * Returns 0 or the error code
1487 */
1488int
1489xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1490 int ret;
1491 const char *cur;
1492
1493 if (str == NULL)
1494 return(-1);
1495
1496 cur = *str;
1497
1498 if ((cur[0] == '/') && (cur[1] == '/')) {
1499 cur += 2;
1500 ret = xmlParseURIAuthority(uri, &cur);
1501 if (ret != 0)
1502 return(ret);
1503 if (cur[0] == '/') {
1504 cur++;
1505 ret = xmlParseURIPathSegments(uri, &cur, 1);
1506 }
1507 } else if (cur[0] == '/') {
1508 cur++;
1509 ret = xmlParseURIPathSegments(uri, &cur, 1);
1510 } else {
1511 return(4);
1512 }
1513 if (ret != 0)
1514 return(ret);
1515 if (*cur == '?') {
1516 cur++;
1517 ret = xmlParseURIQuery(uri, &cur);
1518 if (ret != 0)
1519 return(ret);
1520 }
1521 *str = cur;
1522 return(0);
1523}
1524
1525/**
1526 * xmlParseAbsoluteURI:
1527 * @uri: pointer to an URI structure
1528 * @str: pointer to the string to analyze
1529 *
1530 * Parse an URI reference string and fills in the appropriate fields
1531 * of the @uri structure
1532 *
1533 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1534 *
1535 * Returns 0 or the error code
1536 */
1537int
1538xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1539 int ret;
1540
1541 if (str == NULL)
1542 return(-1);
1543
1544 ret = xmlParseURIScheme(uri, str);
1545 if (ret != 0) return(ret);
1546 if (**str != ':')
1547 return(1);
1548 (*str)++;
1549 if (**str == '/')
1550 return(xmlParseURIHierPart(uri, str));
1551 return(xmlParseURIOpaquePart(uri, str));
1552}
1553
1554/**
1555 * xmlParseRelativeURI:
1556 * @uri: pointer to an URI structure
1557 * @str: pointer to the string to analyze
1558 *
1559 * Parse an relative URI string and fills in the appropriate fields
1560 * of the @uri structure
1561 *
1562 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1563 * abs_path = "/" path_segments
1564 * net_path = "//" authority [ abs_path ]
1565 * rel_path = rel_segment [ abs_path ]
1566 *
1567 * Returns 0 or the error code
1568 */
1569int
1570xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1571 int ret = 0;
1572 const char *cur;
1573
1574 if (str == NULL)
1575 return(-1);
1576
1577 cur = *str;
1578 if ((cur[0] == '/') && (cur[1] == '/')) {
1579 cur += 2;
1580 ret = xmlParseURIAuthority(uri, &cur);
1581 if (ret != 0)
1582 return(ret);
1583 if (cur[0] == '/') {
1584 cur++;
1585 ret = xmlParseURIPathSegments(uri, &cur, 1);
1586 }
1587 } else if (cur[0] == '/') {
1588 cur++;
1589 ret = xmlParseURIPathSegments(uri, &cur, 1);
Daniel Veillard98a79162000-09-04 11:15:39 +00001590 } else if (cur[0] != '#' && cur[0] != '?') {
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001591 ret = xmlParseURIRelSegment(uri, &cur);
1592 if (ret != 0)
1593 return(ret);
1594 if (cur[0] == '/') {
1595 cur++;
1596 ret = xmlParseURIPathSegments(uri, &cur, 1);
1597 }
1598 }
1599 if (ret != 0)
1600 return(ret);
1601 if (*cur == '?') {
1602 cur++;
1603 ret = xmlParseURIQuery(uri, &cur);
1604 if (ret != 0)
1605 return(ret);
1606 }
1607 *str = cur;
1608 return(ret);
1609}
1610
1611/**
1612 * xmlParseURIReference:
1613 * @uri: pointer to an URI structure
1614 * @str: the string to analyze
1615 *
1616 * Parse an URI reference string and fills in the appropriate fields
1617 * of the @uri structure
1618 *
1619 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1620 *
1621 * Returns 0 or the error code
1622 */
1623int
1624xmlParseURIReference(xmlURIPtr uri, const char *str) {
1625 int ret;
1626 const char *tmp = str;
1627
1628 if (str == NULL)
1629 return(-1);
1630 xmlCleanURI(uri);
1631
1632 /*
1633 * Try first to parse aboslute refs, then fallback to relative if
1634 * it fails.
1635 */
1636 ret = xmlParseAbsoluteURI(uri, &str);
1637 if (ret != 0) {
1638 xmlCleanURI(uri);
1639 str = tmp;
1640 ret = xmlParseRelativeURI(uri, &str);
1641 }
1642 if (ret != 0) {
1643 xmlCleanURI(uri);
1644 return(ret);
1645 }
1646
1647 if (*str == '#') {
1648 str++;
1649 ret = xmlParseURIFragment(uri, &str);
1650 if (ret != 0) return(ret);
1651 }
1652 if (*str != 0) {
1653 xmlCleanURI(uri);
1654 return(1);
1655 }
1656 return(0);
1657}
1658
1659/**
Daniel Veillard496a1cf2000-05-03 14:20:55 +00001660 * xmlParseURI:
1661 * @str: the URI string to analyze
1662 *
1663 * Parse an URI
1664 *
1665 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1666 *
1667 * Returns a newly build xmlURIPtr or NULL in case of error
1668 */
1669xmlURIPtr
1670xmlParseURI(const char *str) {
1671 xmlURIPtr uri;
1672 int ret;
1673
1674 if (str == NULL)
1675 return(NULL);
1676 uri = xmlCreateURI();
1677 if (uri != NULL) {
1678 ret = xmlParseURIReference(uri, str);
1679 if (ret) {
1680 xmlFreeURI(uri);
1681 return(NULL);
1682 }
1683 }
1684 return(uri);
1685}
1686
Daniel Veillard9e8bfae2000-11-06 16:43:11 +00001687/************************************************************************
1688 * *
1689 * Public functions *
1690 * *
1691 ************************************************************************/
Daniel Veillardec303412000-03-24 13:41:54 +00001692
1693/**
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001694 * xmlBuildURI:
1695 * @URI: the URI instance found in the document
1696 * @base: the base value
1697 *
1698 * Computes he final URI of the reference done by checking that
1699 * the given URI is valid, and building the final URI using the
1700 * base URI. This is processed according to section 5.2 of the
1701 * RFC 2396
1702 *
1703 * 5.2. Resolving Relative References to Absolute Form
1704 *
Daniel Veillardec303412000-03-24 13:41:54 +00001705 * Returns a new URI string (to be freed by the caller) or NULL in case
1706 * of error.
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001707 */
1708xmlChar *
1709xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
Daniel Veillardec303412000-03-24 13:41:54 +00001710 xmlChar *val = NULL;
Daniel Veillardbd20df72000-10-29 17:53:40 +00001711 int ret, len, index, cur, out;
Daniel Veillardec303412000-03-24 13:41:54 +00001712 xmlURIPtr ref = NULL;
1713 xmlURIPtr bas = NULL;
1714 xmlURIPtr res = NULL;
1715
Daniel Veillardec303412000-03-24 13:41:54 +00001716 /*
1717 * 1) The URI reference is parsed into the potential four components and
1718 * fragment identifier, as described in Section 4.3.
Daniel Veillard90e11312000-09-05 10:42:32 +00001719 *
1720 * NOTE that a completely empty URI is treated by modern browsers
1721 * as a reference to "." rather than as a synonym for the current
1722 * URI. Should we do that here?
Daniel Veillardec303412000-03-24 13:41:54 +00001723 */
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001724 if (URI == NULL)
1725 ret = -1;
1726 else {
Daniel Veillardbd20df72000-10-29 17:53:40 +00001727 if (*URI) {
1728 ref = xmlCreateURI();
1729 if (ref == NULL)
1730 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001731 ret = xmlParseURIReference(ref, (const char *) URI);
Daniel Veillardbd20df72000-10-29 17:53:40 +00001732 }
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001733 else
Daniel Veillardbd20df72000-10-29 17:53:40 +00001734 ret = 0;
Daniel Veillard98a79162000-09-04 11:15:39 +00001735 }
Daniel Veillardbd20df72000-10-29 17:53:40 +00001736 if (ret != 0)
1737 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001738 if (base == NULL)
Daniel Veillardbd20df72000-10-29 17:53:40 +00001739 ret = -1;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001740 else {
1741 bas = xmlCreateURI();
1742 if (bas == NULL)
1743 goto done;
Daniel Veillardbd20df72000-10-29 17:53:40 +00001744 ret = xmlParseURIReference(bas, (const char *) base);
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001745 }
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001746 if (ret != 0) {
Daniel Veillardbd20df72000-10-29 17:53:40 +00001747 if (ref)
1748 val = xmlSaveUri(ref);
1749 goto done;
1750 }
1751 if (ref == NULL) {
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001752 /*
1753 * the base fragment must be ignored
1754 */
1755 if (bas->fragment != NULL) {
1756 xmlFree(bas->fragment);
1757 bas->fragment = NULL;
1758 }
1759 val = xmlSaveUri(bas);
Daniel Veillardec303412000-03-24 13:41:54 +00001760 goto done;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001761 }
Daniel Veillardec303412000-03-24 13:41:54 +00001762
1763 /*
1764 * 2) If the path component is empty and the scheme, authority, and
1765 * query components are undefined, then it is a reference to the
Daniel Veillard90e11312000-09-05 10:42:32 +00001766 * current document and we are done. Otherwise, the reference URI's
1767 * query and fragment components are defined as found (or not found)
1768 * within the URI reference and not inherited from the base URI.
Daniel Veillard98a79162000-09-04 11:15:39 +00001769 *
Daniel Veillard90e11312000-09-05 10:42:32 +00001770 * NOTE that in modern browsers, the parsing differs from the above
1771 * in the following aspect: the query component is allowed to be
1772 * defined while still treating this as a reference to the current
1773 * document.
Daniel Veillardec303412000-03-24 13:41:54 +00001774 */
1775 res = xmlCreateURI();
1776 if (res == NULL)
1777 goto done;
1778 if ((ref->scheme == NULL) && (ref->path == NULL) &&
Daniel Veillard90e11312000-09-05 10:42:32 +00001779 ((ref->authority == NULL) && (ref->server == NULL))) {
1780 if (bas->scheme != NULL)
1781 res->scheme = xmlMemStrdup(bas->scheme);
1782 if (bas->authority != NULL)
1783 res->authority = xmlMemStrdup(bas->authority);
1784 else if (bas->server != NULL) {
1785 res->server = xmlMemStrdup(bas->server);
1786 if (bas->user != NULL)
1787 res->user = xmlMemStrdup(bas->user);
1788 res->port = bas->port;
1789 }
1790 if (bas->path != NULL)
1791 res->path = xmlMemStrdup(bas->path);
1792 if (ref->query != NULL)
1793 res->query = xmlMemStrdup(ref->query);
1794 else if (bas->query != NULL)
1795 res->query = xmlMemStrdup(bas->query);
1796 if (ref->fragment != NULL)
1797 res->fragment = xmlMemStrdup(ref->fragment);
1798 goto step_7;
Daniel Veillardec303412000-03-24 13:41:54 +00001799 }
Daniel Veillard98a79162000-09-04 11:15:39 +00001800
Daniel Veillard90e11312000-09-05 10:42:32 +00001801 if (ref->query != NULL)
1802 res->query = xmlMemStrdup(ref->query);
1803 if (ref->fragment != NULL)
1804 res->fragment = xmlMemStrdup(ref->fragment);
Daniel Veillardec303412000-03-24 13:41:54 +00001805
1806 /*
1807 * 3) If the scheme component is defined, indicating that the reference
1808 * starts with a scheme name, then the reference is interpreted as an
1809 * absolute URI and we are done. Otherwise, the reference URI's
1810 * scheme is inherited from the base URI's scheme component.
1811 */
1812 if (ref->scheme != NULL) {
1813 val = xmlSaveUri(ref);
1814 goto done;
1815 }
Daniel Veillardbe803962000-06-28 23:40:59 +00001816 if (bas->scheme != NULL)
1817 res->scheme = xmlMemStrdup(bas->scheme);
Daniel Veillardec303412000-03-24 13:41:54 +00001818
1819 /*
1820 * 4) If the authority component is defined, then the reference is a
1821 * network-path and we skip to step 7. Otherwise, the reference
1822 * URI's authority is inherited from the base URI's authority
1823 * component, which will also be undefined if the URI scheme does not
1824 * use an authority component.
1825 */
Daniel Veillard361d8452000-04-03 19:48:13 +00001826 if ((ref->authority != NULL) || (ref->server != NULL)) {
1827 if (ref->authority != NULL)
1828 res->authority = xmlMemStrdup(ref->authority);
1829 else {
1830 res->server = xmlMemStrdup(ref->server);
1831 if (ref->user != NULL)
1832 res->user = xmlMemStrdup(ref->user);
1833 res->port = ref->port;
1834 }
Daniel Veillardec303412000-03-24 13:41:54 +00001835 if (ref->path != NULL)
1836 res->path = xmlMemStrdup(ref->path);
Daniel Veillardec303412000-03-24 13:41:54 +00001837 goto step_7;
1838 }
1839 if (bas->authority != NULL)
1840 res->authority = xmlMemStrdup(bas->authority);
Daniel Veillard361d8452000-04-03 19:48:13 +00001841 else if (bas->server != NULL) {
1842 res->server = xmlMemStrdup(bas->server);
1843 if (bas->user != NULL)
1844 res->user = xmlMemStrdup(bas->user);
1845 res->port = bas->port;
1846 }
Daniel Veillardec303412000-03-24 13:41:54 +00001847
1848 /*
1849 * 5) If the path component begins with a slash character ("/"), then
1850 * the reference is an absolute-path and we skip to step 7.
1851 */
Daniel Veillard90e11312000-09-05 10:42:32 +00001852 if ((ref->path != NULL) && (ref->path[0] == '/')) {
Daniel Veillardec303412000-03-24 13:41:54 +00001853 res->path = xmlMemStrdup(ref->path);
Daniel Veillardec303412000-03-24 13:41:54 +00001854 goto step_7;
1855 }
1856
1857
1858 /*
1859 * 6) If this step is reached, then we are resolving a relative-path
1860 * reference. The relative path needs to be merged with the base
1861 * URI's path. Although there are many ways to do this, we will
1862 * describe a simple method using a separate string buffer.
1863 *
1864 * Allocate a buffer large enough for the result string.
1865 */
1866 len = 2; /* extra / and 0 */
1867 if (ref->path != NULL)
1868 len += strlen(ref->path);
1869 if (bas->path != NULL)
1870 len += strlen(bas->path);
1871 res->path = (char *) xmlMalloc(len);
1872 if (res->path == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001873 xmlGenericError(xmlGenericErrorContext,
1874 "xmlBuildURI: out of memory\n");
Daniel Veillardec303412000-03-24 13:41:54 +00001875 goto done;
1876 }
1877 res->path[0] = 0;
1878
1879 /*
1880 * a) All but the last segment of the base URI's path component is
1881 * copied to the buffer. In other words, any characters after the
1882 * last (right-most) slash character, if any, are excluded.
1883 */
1884 cur = 0;
1885 out = 0;
1886 if (bas->path != NULL) {
1887 while (bas->path[cur] != 0) {
1888 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1889 cur++;
1890 if (bas->path[cur] == 0)
1891 break;
1892
1893 cur++;
1894 while (out < cur) {
1895 res->path[out] = bas->path[out];
1896 out++;
1897 }
1898 }
1899 }
1900 res->path[out] = 0;
1901
1902 /*
1903 * b) The reference's path component is appended to the buffer
1904 * string.
1905 */
Daniel Veillard8ddb5a72000-09-23 10:28:52 +00001906 if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillardec303412000-03-24 13:41:54 +00001907 index = 0;
Daniel Veillard52402ce2000-08-22 23:36:12 +00001908 /*
1909 * Ensure the path includes a '/'
1910 */
Daniel Veillardf09e7e32000-10-01 15:53:30 +00001911 if ((out == 0) && (bas->server != NULL))
Daniel Veillard52402ce2000-08-22 23:36:12 +00001912 res->path[out++] = '/';
Daniel Veillardec303412000-03-24 13:41:54 +00001913 while (ref->path[index] != 0) {
1914 res->path[out++] = ref->path[index++];
1915 }
1916 }
1917 res->path[out] = 0;
1918
1919 /*
1920 * Steps c) to h) are really path normalization steps
1921 */
1922 xmlNormalizeURIPath(res->path);
1923
1924step_7:
1925
1926 /*
1927 * 7) The resulting URI components, including any inherited from the
1928 * base URI, are recombined to give the absolute form of the URI
1929 * reference.
1930 */
1931 val = xmlSaveUri(res);
1932
1933done:
1934 if (ref != NULL)
1935 xmlFreeURI(ref);
Daniel Veillard39c7d712000-09-10 16:14:55 +00001936 if (bas != NULL)
Daniel Veillardec303412000-03-24 13:41:54 +00001937 xmlFreeURI(bas);
1938 if (res != NULL)
1939 xmlFreeURI(res);
1940 return(val);
Daniel Veillard3dd82e72000-03-20 11:48:04 +00001941}
1942
1943