Blame - uri.c - platform/external/libxml2

blob: 183bef331025c1178906d92a88c02ee146d5532e [file] [log] [blame]

Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1	/**
				2	* uri.c: set of generic URI related routines
				3	*
				4	* Reference: RFC 2396
				5	*
				6	* See Copyright for the status of this software.
				7	*
				8	* Daniel.Veillard@w3.org
				9	*/
				10
				11	#ifdef WIN32
				12	#define INCLUDE_WINSOCK
				13	#include "win32config.h"
				14	#else
				15	#include "config.h"
				16	#endif
				17
				18	#include <stdio.h>
				19	#include <string.h>
				20
				21	#include <libxml/xmlmemory.h>
				22	#include <libxml/uri.h>
				23	#include <libxml/xmlerror.h>
				24
				25	/************************************************************************
				26	* *
				27	* Macros to differenciate various character type *
				28	* directly extracted from RFC 2396 *
				29	* *
				30	************************************************************************/
				31
				32	/*
				33	* alpha = lowalpha \| upalpha
				34	*/
				35	#define IS_ALPHA(x) (IS_LOWALPHA(x) \|\| IS_UPALPHA(x))
				36
				37
				38	/*
				39	* lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \| "j" \|
				40	* "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \| "s" \| "t" \|
				41	* "u" \| "v" \| "w" \| "x" \| "y" \| "z"
				42	*/
				43
				44	#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
				45
				46	/*
				47	* upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \| "J" \|
				48	* "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \| "S" \| "T" \|
				49	* "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
				50	*/
				51	#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
				52
				53	/*
				54	* digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \| "8" \| "9"
				55	*/
				56
				57	#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
				58
				59	/*
				60	* alphanum = alpha \| digit
				61	*/
				62
				63	#define IS_ALPHANUM(x) (IS_ALPHA(x) \|\| IS_DIGIT(x))
				64
				65	/*
				66	* hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|
				67	* "a" \| "b" \| "c" \| "d" \| "e" \| "f"
				68	*/
				69
				70	#define IS_HEX(x) ((IS_DIGIT(x)) \|\| (((x) >= 'a') && ((x) <= 'f')) \|\| \
				71	(((x) >= 'A') && ((x) <= 'F')))
				72
				73	/*
				74	* mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")"
				75	*/
				76
				77	#define IS_MARK(x) (((x) == '-') \|\| ((x) == '_') \|\| ((x) == '.') \|\| \
				78	((x) == '!') \|\| ((x) == '~') \|\| ((x) == '*') \|\| ((x) == '\'') \|\| \
				79	((x) == '(') \|\| ((x) == ')'))
				80
				81
				82	/*
				83	* reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
				84	*/
				85
				86	#define IS_RESERVED(x) (((x) == ';') \|\| ((x) == '/') \|\| ((x) == '?') \|\| \
				87	((x) == ':') \|\| ((x) == '@') \|\| ((x) == '&') \|\| ((x) == '=') \|\| \
				88	((x) == '+') \|\| ((x) == '$') \|\| ((x) == ','))
				89
				90	/*
				91	* unreserved = alphanum \| mark
				92	*/
				93
				94	#define IS_UNRESERVED(x) (IS_ALPHANUM(x) \|\| IS_MARK(x))
				95
				96	/*
				97	* escaped = "%" hex hex
				98	*/
				99
				100	#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
				101	(IS_HEX((p)[2])))
				102
				103	/*
				104	* uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|
				105	* "&" \| "=" \| "+" \| "$" \| ","
				106	*/
				107	#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\|\
				108	(((p) == ';')) \|\| (((p) == '?')) \|\| ((*(p) == ':')) \|\|\
				109	(((p) == '@')) \|\| (((p) == '&')) \|\| ((*(p) == '=')) \|\|\
				110	(((p) == '+')) \|\| (((p) == '$')) \|\| ((*(p) == ',')))
				111
				112	/*
				113	* pchar = unreserved \| escaped \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
				114	*/
				115	#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
				116	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|\
				117	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|\
				118	((*(p) == ',')))
				119
				120	/*
				121	* rel_segment = 1*( unreserved \| escaped \|
				122	* ";" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," )
				123	*/
				124
				125	#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
				126	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\| \
				127	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\| \
				128	((*(p) == ',')))
				129
				130	/*
				131	* scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
				132	*/
				133
				134	#define IS_SCHEME(x) ((IS_ALPHA(x)) \|\| (IS_DIGIT(x)) \|\| \
				135	((x) == '+') \|\| ((x) == '-') \|\| ((x) == '.'))
				136
				137	/*
				138	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
				139	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
				140	*/
				141
				142	#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
				143	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\| \
				144	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\| \
				145	(((p) == '=')) \|\| (((p) == '+')))
				146
				147	/*
				148	* userinfo = *( unreserved \| escaped \| ";" \| ":" \| "&" \| "=" \|
				149	* "+" \| "$" \| "," )
				150	*/
				151	#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
				152	(((p) == ';')) \|\| (((p) == ':')) \|\| ((*(p) == '&')) \|\| \
				153	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\| \
				154	((*(p) == ',')))
				155
				156	/*
				157	* uric = reserved \| unreserved \| escaped
				158	*/
				159
				160	#define IS_URIC(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
				161	(IS_RESERVED(*(p))))
				162
				163	/*
				164	* Skip to next pointer char, handle escaped sequences
				165	*/
				166
				167	#define NEXT(p) ((*p == '%')? p += 3 : p++)
				168
				169	/*
				170	* Productions from the spec.
				171	*
				172	* authority = server \| reg_name
				173	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
				174	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
				175	*
				176	* path = [ abs_path \| opaque_part ]
				177	*/
				178
				179	/************************************************************************
				180	* *
				181	* Generic URI structure functions *
				182	* *
				183	************************************************************************/
				184
				185	/**
				186	* xmlCreateURI:
				187	*
				188	* Simply creates an empty xmlURI
				189	*
				190	* Returns the new structure or NULL in case of error
				191	*/
				192	xmlURIPtr
				193	xmlCreateURI(void) {
				194	xmlURIPtr ret;
				195
				196	ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
				197	if (ret == NULL) {
				198	xmlGenericError(xmlGenericErrorContext,
				199	"xmlCreateURI: out of memory\n");
				200	return(NULL);
				201	}
				202	memset(ret, 0, sizeof(xmlURI));
				203	return(ret);
				204	}
				205
				206	/**
				207	* xmlSaveUri:
				208	* @uri: pointer to an xmlURI
				209	*
				210	* Save the URI as an escaped string
				211	*
				212	* Returns a new string (to be deallocated by caller)
				213	*/
				214	xmlChar *
				215	xmlSaveUri(xmlURIPtr uri) {
				216	xmlChar *ret = NULL;
				217	const char *p;
				218	int len;
				219	int max;
				220
				221	if (uri == NULL) return(NULL);
				222
				223
				224	max = 80;
				225	ret = (xmlChar ) xmlMalloc((max + 1) sizeof(xmlChar));
				226	if (ret == NULL) {
				227	xmlGenericError(xmlGenericErrorContext,
				228	"xmlSaveUri: out of memory\n");
				229	return(NULL);
				230	}
				231	len = 0;
				232
				233	if (uri->scheme != NULL) {
				234	p = uri->scheme;
				235	while (*p != 0) {
				236	if (len >= max) {
				237	max *= 2;
				238	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
				239	if (ret == NULL) {
				240	xmlGenericError(xmlGenericErrorContext,
				241	"xmlSaveUri: out of memory\n");
				242	return(NULL);
				243	}
				244	}
				245	ret[len++] = *p++;
				246	}
				247	if (len >= max) {
				248	max *= 2;
				249	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
				250	if (ret == NULL) {
				251	xmlGenericError(xmlGenericErrorContext,
				252	"xmlSaveUri: out of memory\n");
				253	return(NULL);
				254	}
				255	}
				256	ret[len++] = ':';
				257	}
				258	if (uri->opaque != NULL) {
				259	p = uri->opaque;
				260	while (*p != 0) {
				261	if (len + 3 >= max) {
				262	max *= 2;
				263	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
				264	if (ret == NULL) {
				265	xmlGenericError(xmlGenericErrorContext,
				266	"xmlSaveUri: out of memory\n");
				267	return(NULL);
				268	}
				269	}
				270	if ((IS_UNRESERVED(*(p))) \|\|
				271	(((p) == ';')) \|\| (((p) == '?')) \|\| ((*(p) == ':')) \|\|
				272	(((p) == '@')) \|\| (((p) == '&')) \|\| ((*(p) == '=')) \|\|
				273	(((p) == '+')) \|\| (((p) == '$')) \|\| ((*(p) == ',')))
				274	ret[len++] = *p++;
				275	else {
				276	int val = (unsigned char )p++;
				277	int hi = val / 0x10, lo = val % 0x10;
				278	ret[len++] = '%';
				279	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				280	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				281	}
				282	}
				283	if (len >= max) {
				284	max *= 2;
				285	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
				286	if (ret == NULL) {
				287	xmlGenericError(xmlGenericErrorContext,
				288	"xmlSaveUri: out of memory\n");
				289	return(NULL);
				290	}
				291	}
				292	ret[len++] = 0;
				293	} else {
				294	if (uri->server != NULL) {
				295	if (len + 3 >= max) {
				296	max *= 2;
				297	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
				298	if (ret == NULL) {
				299	xmlGenericError(xmlGenericErrorContext,
				300	"xmlSaveUri: out of memory\n");
				301	return(NULL);
				302	}
				303	}
				304	ret[len++] = '/';
				305	ret[len++] = '/';
				306	if (uri->user != NULL) {
				307	p = uri->user;
				308	while (*p != 0) {
				309	if (len + 3 >= max) {
				310	max *= 2;
				311	ret = (xmlChar *) xmlRealloc(ret,
				312	(max + 1) * sizeof(xmlChar));
				313	if (ret == NULL) {
				314	xmlGenericError(xmlGenericErrorContext,
				315	"xmlSaveUri: out of memory\n");
				316	return(NULL);
				317	}
				318	}
				319	if ((IS_UNRESERVED(*(p))) \|\|
				320	(((p) == ';')) \|\| (((p) == ':')) \|\|
				321	(((p) == '&')) \|\| (((p) == '=')) \|\|
				322	(((p) == '+')) \|\| (((p) == '$')) \|\|
				323	((*(p) == ',')))
				324	ret[len++] = *p++;
				325	else {
				326	int val = (unsigned char )p++;
				327	int hi = val / 0x10, lo = val % 0x10;
				328	ret[len++] = '%';
				329	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				330	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				331	}
				332	}
				333	if (len + 3 >= max) {
				334	max *= 2;
				335	ret = (xmlChar *) xmlRealloc(ret,
				336	(max + 1) * sizeof(xmlChar));
				337	if (ret == NULL) {
				338	xmlGenericError(xmlGenericErrorContext,
				339	"xmlSaveUri: out of memory\n");
				340	return(NULL);
				341	}
				342	}
				343	ret[len++] = '@';
				344	}
				345	p = uri->server;
				346	while (*p != 0) {
				347	if (len >= max) {
				348	max *= 2;
				349	ret = (xmlChar *) xmlRealloc(ret,
				350	(max + 1) * sizeof(xmlChar));
				351	if (ret == NULL) {
				352	xmlGenericError(xmlGenericErrorContext,
				353	"xmlSaveUri: out of memory\n");
				354	return(NULL);
				355	}
				356	}
				357	ret[len++] = *p++;
				358	}
				359	if (uri->port > 0) {
				360	if (len + 10 >= max) {
				361	max *= 2;
				362	ret = (xmlChar *) xmlRealloc(ret,
				363	(max + 1) * sizeof(xmlChar));
				364	if (ret == NULL) {
				365	xmlGenericError(xmlGenericErrorContext,
				366	"xmlSaveUri: out of memory\n");
				367	return(NULL);
				368	}
				369	}
				370	len += sprintf((char *) &ret[len], ":%d", uri->port);
				371	}
				372	} else if (uri->authority != NULL) {
				373	if (len + 3 >= max) {
				374	max *= 2;
				375	ret = (xmlChar *) xmlRealloc(ret,
				376	(max + 1) * sizeof(xmlChar));
				377	if (ret == NULL) {
				378	xmlGenericError(xmlGenericErrorContext,
				379	"xmlSaveUri: out of memory\n");
				380	return(NULL);
				381	}
				382	}
				383	ret[len++] = '/';
				384	ret[len++] = '/';
				385	p = uri->authority;
				386	while (*p != 0) {
				387	if (len + 3 >= max) {
				388	max *= 2;
				389	ret = (xmlChar *) xmlRealloc(ret,
				390	(max + 1) * sizeof(xmlChar));
				391	if (ret == NULL) {
				392	xmlGenericError(xmlGenericErrorContext,
				393	"xmlSaveUri: out of memory\n");
				394	return(NULL);
				395	}
				396	}
				397	if ((IS_UNRESERVED(*(p))) \|\|
				398	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\|
				399	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
				400	(((p) == '=')) \|\| (((p) == '+')))
				401	ret[len++] = *p++;
				402	else {
				403	int val = (unsigned char )p++;
				404	int hi = val / 0x10, lo = val % 0x10;
				405	ret[len++] = '%';
				406	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				407	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				408	}
				409	}
				410	} else if (uri->scheme != NULL) {
				411	if (len + 3 >= max) {
				412	max *= 2;
				413	ret = (xmlChar *) xmlRealloc(ret,
				414	(max + 1) * sizeof(xmlChar));
				415	if (ret == NULL) {
				416	xmlGenericError(xmlGenericErrorContext,
				417	"xmlSaveUri: out of memory\n");
				418	return(NULL);
				419	}
				420	}
				421	ret[len++] = '/';
				422	ret[len++] = '/';
				423	}
				424	if (uri->path != NULL) {
				425	p = uri->path;
				426	while (*p != 0) {
				427	if (len + 3 >= max) {
				428	max *= 2;
				429	ret = (xmlChar *) xmlRealloc(ret,
				430	(max + 1) * sizeof(xmlChar));
				431	if (ret == NULL) {
				432	xmlGenericError(xmlGenericErrorContext,
				433	"xmlSaveUri: out of memory\n");
				434	return(NULL);
				435	}
				436	}
				437	if ((IS_UNRESERVED((p))) \|\| (((p) == '/')) \|\|
				438	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
				439	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|
				440	((*(p) == ',')))
				441	ret[len++] = *p++;
				442	else {
				443	int val = (unsigned char )p++;
				444	int hi = val / 0x10, lo = val % 0x10;
				445	ret[len++] = '%';
				446	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				447	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				448	}
				449	}
				450	}
				451	if (uri->query != NULL) {
				452	if (len + 3 >= max) {
				453	max *= 2;
				454	ret = (xmlChar *) xmlRealloc(ret,
				455	(max + 1) * sizeof(xmlChar));
				456	if (ret == NULL) {
				457	xmlGenericError(xmlGenericErrorContext,
				458	"xmlSaveUri: out of memory\n");
				459	return(NULL);
				460	}
				461	}
				462	ret[len++] = '?';
				463	p = uri->query;
				464	while (*p != 0) {
				465	if (len + 3 >= max) {
				466	max *= 2;
				467	ret = (xmlChar *) xmlRealloc(ret,
				468	(max + 1) * sizeof(xmlChar));
				469	if (ret == NULL) {
				470	xmlGenericError(xmlGenericErrorContext,
				471	"xmlSaveUri: out of memory\n");
				472	return(NULL);
				473	}
				474	}
				475	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
				476	ret[len++] = *p++;
				477	else {
				478	int val = (unsigned char )p++;
				479	int hi = val / 0x10, lo = val % 0x10;
				480	ret[len++] = '%';
				481	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				482	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				483	}
				484	}
				485	}
				486	if (uri->fragment != NULL) {
				487	if (len + 3 >= max) {
				488	max *= 2;
				489	ret = (xmlChar *) xmlRealloc(ret,
				490	(max + 1) * sizeof(xmlChar));
				491	if (ret == NULL) {
				492	xmlGenericError(xmlGenericErrorContext,
				493	"xmlSaveUri: out of memory\n");
				494	return(NULL);
				495	}
				496	}
				497	ret[len++] = '#';
				498	p = uri->fragment;
				499	while (*p != 0) {
				500	if (len + 3 >= max) {
				501	max *= 2;
				502	ret = (xmlChar *) xmlRealloc(ret,
				503	(max + 1) * sizeof(xmlChar));
				504	if (ret == NULL) {
				505	xmlGenericError(xmlGenericErrorContext,
				506	"xmlSaveUri: out of memory\n");
				507	return(NULL);
				508	}
				509	}
				510	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
				511	ret[len++] = *p++;
				512	else {
				513	int val = (unsigned char )p++;
				514	int hi = val / 0x10, lo = val % 0x10;
				515	ret[len++] = '%';
				516	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				517	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				518	}
				519	}
				520	}
				521	if (len >= max) {
				522	max *= 2;
				523	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
				524	if (ret == NULL) {
				525	xmlGenericError(xmlGenericErrorContext,
				526	"xmlSaveUri: out of memory\n");
				527	return(NULL);
				528	}
				529	}
				530	ret[len++] = 0;
				531	}
				532	return(ret);
				533	}
				534
				535	/**
				536	* xmlPrintURI:
				537	* @stream: a FILE* for the output
				538	* @uri: pointer to an xmlURI
				539	*
				540	* Prints the URI in the stream @steam.
				541	*/
				542	void
				543	xmlPrintURI(FILE *stream, xmlURIPtr uri) {
				544	xmlChar *out;
				545
				546	out = xmlSaveUri(uri);
				547	if (out != NULL) {
				548	fprintf(stream, "%s", out);
				549	xmlFree(out);
				550	}
				551	}
				552
				553	/**
				554	* xmlCleanURI:
				555	* @uri: pointer to an xmlURI
				556	*
				557	* Make sure the xmlURI struct is free of content
				558	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	559	static void
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	560	xmlCleanURI(xmlURIPtr uri) {
				561	if (uri == NULL) return;
				562
				563	if (uri->scheme != NULL) xmlFree(uri->scheme);
				564	uri->scheme = NULL;
				565	if (uri->server != NULL) xmlFree(uri->server);
				566	uri->server = NULL;
				567	if (uri->user != NULL) xmlFree(uri->user);
				568	uri->user = NULL;
				569	if (uri->path != NULL) xmlFree(uri->path);
				570	uri->path = NULL;
				571	if (uri->fragment != NULL) xmlFree(uri->fragment);
				572	uri->fragment = NULL;
				573	if (uri->opaque != NULL) xmlFree(uri->opaque);
				574	uri->opaque = NULL;
				575	if (uri->authority != NULL) xmlFree(uri->authority);
				576	uri->authority = NULL;
				577	if (uri->query != NULL) xmlFree(uri->query);
				578	uri->query = NULL;
				579	}
				580
				581	/**
				582	* xmlFreeURI:
				583	* @uri: pointer to an xmlURI
				584	*
				585	* Free up the xmlURI struct
				586	*/
				587	void
				588	xmlFreeURI(xmlURIPtr uri) {
				589	if (uri == NULL) return;
				590
				591	if (uri->scheme != NULL) xmlFree(uri->scheme);
				592	if (uri->server != NULL) xmlFree(uri->server);
				593	if (uri->user != NULL) xmlFree(uri->user);
				594	if (uri->path != NULL) xmlFree(uri->path);
				595	if (uri->fragment != NULL) xmlFree(uri->fragment);
				596	if (uri->opaque != NULL) xmlFree(uri->opaque);
				597	if (uri->authority != NULL) xmlFree(uri->authority);
				598	if (uri->query != NULL) xmlFree(uri->query);
Daniel Veillard	48b2f89	2001-02-25 16:11:03 +0000	[diff] [blame]	599	MEM_CLEANUP(uri, sizeof(xmlURI));
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	600	xmlFree(uri);
				601	}
				602
				603	/************************************************************************
				604	* *
				605	* Helper functions *
				606	* *
				607	************************************************************************/
				608
				609	#if 0
				610	/**
				611	* xmlNormalizeURIPath:
				612	* @path: pointer to the path string
				613	*
				614	* applies the 5 normalization steps to a path string
				615	* Normalization occurs directly on the string, no new allocation is done
				616	*
				617	* Returns 0 or an error code
				618	*/
				619	int
				620	xmlNormalizeURIPath(char *path) {
				621	int cur, out;
				622
				623	if (path == NULL)
				624	return(-1);
				625	cur = 0;
				626	out = 0;
				627	while ((path[cur] != 0) && (path[cur] != '/')) cur++;
				628	if (path[cur] == 0)
				629	return(0);
				630
				631	/* we are positionned at the beginning of the first segment */
				632	cur++;
				633	out = cur;
				634
				635	/*
				636	* Analyze each segment in sequence.
				637	*/
				638	while (path[cur] != 0) {
				639	/*
				640	* c) All occurrences of "./", where "." is a complete path segment,
				641	* are removed from the buffer string.
				642	*/
				643	if ((path[cur] == '.') && (path[cur + 1] == '/')) {
				644	cur += 2;
				645	if (path[cur] == 0) {
				646	path[out++] = 0;
				647	}
				648	continue;
				649	}
				650
				651	/*
				652	* d) If the buffer string ends with "." as a complete path segment,
				653	* that "." is removed.
				654	*/
				655	if ((path[cur] == '.') && (path[cur + 1] == 0)) {
				656	path[out] = 0;
				657	break;
				658	}
				659
				660	/* read the segment */
				661	while ((path[cur] != 0) && (path[cur] != '/')) {
				662	path[out++] = path[cur++];
				663	}
				664	path[out++] = path[cur];
				665	if (path[cur] != 0) {
				666	cur++;
				667	}
				668	}
				669
				670	cur = 0;
				671	out = 0;
				672	while ((path[cur] != 0) && (path[cur] != '/')) cur++;
				673	if (path[cur] == 0)
				674	return(0);
				675	/* we are positionned at the beginning of the first segment */
				676	cur++;
				677	out = cur;
				678	/*
				679	* Analyze each segment in sequence.
				680	*/
				681	while (path[cur] != 0) {
				682	/*
				683	* e) All occurrences of "<segment>/../", where <segment> is a
				684	* complete path segment not equal to "..", are removed from the
				685	* buffer string. Removal of these path segments is performed
				686	* iteratively, removing the leftmost matching pattern on each
				687	* iteration, until no matching pattern remains.
				688	*/
				689	if ((cur > 1) && (out > 1) &&
				690	(path[cur] == '/') && (path[cur + 1] == '.') &&
				691	(path[cur + 2] == '.') && (path[cur + 3] == '/') &&
				692	((path[out] != '.') \|\| (path[out - 1] != '.') \|\|
				693	(path[out - 2] != '/'))) {
				694	cur += 3;
				695	out --;
				696	while ((out > 0) && (path[out] != '/')) { out --; }
				697	path[out] = 0;
				698	continue;
				699	}
				700
				701	/*
				702	* f) If the buffer string ends with "<segment>/..", where <segment>
				703	* is a complete path segment not equal to "..", that
				704	* "<segment>/.." is removed.
				705	*/
				706	if ((path[cur] == '/') && (path[cur + 1] == '.') &&
				707	(path[cur + 2] == '.') && (path[cur + 3] == 0) &&
				708	((path[out] != '.') \|\| (path[out - 1] != '.') \|\|
				709	(path[out - 2] != '/'))) {
				710	cur += 4;
				711	out --;
				712	while ((out > 0) && (path[out - 1] != '/')) { out --; }
				713	path[out] = 0;
				714	continue;
				715	}
				716
				717	path[out++] = path[cur++]; /* / or 0 */
				718	}
				719	path[out] = 0;
				720
				721	/*
				722	* g) If the resulting buffer string still begins with one or more
				723	* complete path segments of "..", then the reference is
				724	* considered to be in error. Implementations may handle this
				725	* error by retaining these components in the resolved path (i.e.,
				726	* treating them as part of the final URI), by removing them from
				727	* the resolved path (i.e., discarding relative levels above the
				728	* root), or by avoiding traversal of the reference.
				729	*
				730	* We discard them from the final path.
				731	*/
				732	cur = 0;
				733	while ((path[cur] == '/') && (path[cur + 1] == '.') &&
				734	(path[cur + 2] == '.'))
				735	cur += 3;
				736	if (cur != 0) {
				737	out = 0;
				738	while (path[cur] != 0) path[out++] = path[cur++];
				739	path[out] = 0;
				740	}
				741	return(0);
				742	}
				743	#else
				744	/**
				745	* xmlNormalizeURIPath:
				746	* @path: pointer to the path string
				747	*
				748	* Applies the 5 normalization steps to a path string--that is, RFC 2396
				749	* Section 5.2, steps 6.c through 6.g.
				750	*
				751	* Normalization occurs directly on the string, no new allocation is done
				752	*
				753	* Returns 0 or an error code
				754	*/
				755	int
				756	xmlNormalizeURIPath(char *path) {
				757	char cur, out;
				758
				759	if (path == NULL)
				760	return(-1);
				761
				762	/* Skip all initial "/" chars. We want to get to the beginning of the
				763	* first non-empty segment.
				764	*/
				765	cur = path;
				766	while (cur[0] == '/')
				767	++cur;
				768	if (cur[0] == '\0')
				769	return(0);
				770
				771	/* Keep everything we've seen so far. */
				772	out = cur;
				773
				774	/*
				775	* Analyze each segment in sequence for cases (c) and (d).
				776	*/
				777	while (cur[0] != '\0') {
				778	/*
				779	* c) All occurrences of "./", where "." is a complete path segment,
				780	* are removed from the buffer string.
				781	*/
				782	if ((cur[0] == '.') && (cur[1] == '/')) {
				783	cur += 2;
				784	continue;
				785	}
				786
				787	/*
				788	* d) If the buffer string ends with "." as a complete path segment,
				789	* that "." is removed.
				790	*/
				791	if ((cur[0] == '.') && (cur[1] == '\0'))
				792	break;
				793
				794	/* Otherwise keep the segment. */
				795	while (cur[0] != '/') {
				796	if (cur[0] == '\0')
				797	goto done_cd;
				798	(out++)[0] = (cur++)[0];
				799	}
				800	(out++)[0] = (cur++)[0];
				801	}
				802	done_cd:
				803	out[0] = '\0';
				804
				805	/* Reset to the beginning of the first segment for the next sequence. */
				806	cur = path;
				807	while (cur[0] == '/')
				808	++cur;
				809	if (cur[0] == '\0')
				810	return(0);
				811
				812	/*
				813	* Analyze each segment in sequence for cases (e) and (f).
				814	*
				815	* e) All occurrences of "<segment>/../", where <segment> is a
				816	* complete path segment not equal to "..", are removed from the
				817	* buffer string. Removal of these path segments is performed
				818	* iteratively, removing the leftmost matching pattern on each
				819	* iteration, until no matching pattern remains.
				820	*
				821	* f) If the buffer string ends with "<segment>/..", where <segment>
				822	* is a complete path segment not equal to "..", that
				823	* "<segment>/.." is removed.
				824	*
				825	* To satisfy the "iterative" clause in (e), we need to collapse the
				826	* string every time we find something that needs to be removed. Thus,
				827	* we don't need to keep two pointers into the string: we only need a
				828	* "current position" pointer.
				829	*/
				830	while (1) {
				831	char *segp;
				832
				833	/* At the beginning of each iteration of this loop, "cur" points to
				834	* the first character of the segment we want to examine.
				835	*/
				836
				837	/* Find the end of the current segment. */
				838	segp = cur;
				839	while ((segp[0] != '/') && (segp[0] != '\0'))
				840	++segp;
				841
				842	/* If this is the last segment, we're done (we need at least two
				843	* segments to meet the criteria for the (e) and (f) cases).
				844	*/
				845	if (segp[0] == '\0')
				846	break;
				847
				848	/* If the first segment is "..", or if the next segment _isn't_ "..",
				849	* keep this segment and try the next one.
				850	*/
				851	++segp;
				852	if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
				853	\|\| ((segp[0] != '.') \|\| (segp[1] != '.')
				854	\|\| ((segp[2] != '/') && (segp[2] != '\0')))) {
				855	cur = segp;
				856	continue;
				857	}
				858
				859	/* If we get here, remove this segment and the next one and back up
				860	* to the previous segment (if there is one), to implement the
				861	* "iteratively" clause. It's pretty much impossible to back up
				862	* while maintaining two pointers into the buffer, so just compact
				863	* the whole buffer now.
				864	*/
				865
				866	/* If this is the end of the buffer, we're done. */
				867	if (segp[2] == '\0') {
				868	cur[0] = '\0';
				869	break;
				870	}
				871	strcpy(cur, segp + 3);
				872
				873	/* If there are no previous segments, then keep going from here. */
				874	segp = cur;
				875	while ((segp > path) && ((--segp)[0] == '/'))
				876	;
				877	if (segp == path)
				878	continue;
				879
				880	/* "segp" is pointing to the end of a previous segment; find it's
				881	* start. We need to back up to the previous segment and start
				882	* over with that to handle things like "foo/bar/../..". If we
				883	* don't do this, then on the first pass we'll remove the "bar/..",
				884	* but be pointing at the second ".." so we won't realize we can also
				885	* remove the "foo/..".
				886	*/
				887	cur = segp;
				888	while ((cur > path) && (cur[-1] != '/'))
				889	--cur;
				890	}
				891	out[0] = '\0';
				892
				893	/*
				894	* g) If the resulting buffer string still begins with one or more
				895	* complete path segments of "..", then the reference is
				896	* considered to be in error. Implementations may handle this
				897	* error by retaining these components in the resolved path (i.e.,
				898	* treating them as part of the final URI), by removing them from
				899	* the resolved path (i.e., discarding relative levels above the
				900	* root), or by avoiding traversal of the reference.
				901	*
				902	* We discard them from the final path.
				903	*/
				904	if (path[0] == '/') {
				905	cur = path;
				906	while ((cur[1] == '.') && (cur[2] == '.')
				907	&& ((cur[3] == '/') \|\| (cur[3] == '\0')))
				908	cur += 3;
				909
				910	if (cur != path) {
				911	out = path;
				912	while (cur[0] != '\0')
				913	(out++)[0] = (cur++)[0];
				914	out[0] = 0;
				915	}
				916	}
				917
				918	return(0);
				919	}
				920	#endif
				921
				922	/**
				923	* xmlURIUnescapeString:
				924	* @str: the string to unescape
				925	* @len: the lenght in bytes to unescape (or <= 0 to indicate full string)
				926	* @target: optionnal destination buffer
				927	*
				928	* Unescaping routine, does not do validity checks !
				929	* Output is direct unsigned char translation of %XX values (no encoding)
				930	*
				931	* Returns an copy of the string, but unescaped
				932	*/
				933	char *
				934	xmlURIUnescapeString(const char str, int len, char target) {
				935	char ret, out;
				936	const char *in;
				937
				938	if (str == NULL)
				939	return(NULL);
				940	if (len <= 0) len = strlen(str);
				941	if (len <= 0) return(NULL);
				942
				943	if (target == NULL) {
				944	ret = (char *) xmlMalloc(len + 1);
				945	if (ret == NULL) {
				946	xmlGenericError(xmlGenericErrorContext,
				947	"xmlURIUnescapeString: out of memory\n");
				948	return(NULL);
				949	}
				950	} else
				951	ret = target;
				952	in = str;
				953	out = ret;
				954	while(len > 0) {
				955	if (*in == '%') {
				956	in++;
				957	if ((in >= '0') && (in <= '9'))
				958	out = (in - '0');
				959	else if ((in >= 'a') && (in <= 'f'))
				960	out = (in - 'a') + 10;
				961	else if ((in >= 'A') && (in <= 'F'))
				962	out = (in - 'A') + 10;
				963	in++;
				964	if ((in >= '0') && (in <= '9'))
				965	out = out * 16 + (*in - '0');
				966	else if ((in >= 'a') && (in <= 'f'))
				967	out = out * 16 + (*in - 'a') + 10;
				968	else if ((in >= 'A') && (in <= 'F'))
				969	out = out * 16 + (*in - 'A') + 10;
				970	in++;
				971	len -= 3;
				972	out++;
				973	} else {
				974	out++ = in++;
				975	len--;
				976	}
				977	}
				978	*out = 0;
				979	return(ret);
				980	}
				981
				982	/**
				983	* xmlURIEscape:
				984	* @str: the string of the URI to escape
				985	*
				986	* Escaping routine, does not do validity checks !
				987	* It will try to escape the chars needing this, but this is heuristic
				988	* based it's impossible to be sure.
				989	*
Daniel Veillard	146c912	2001-03-22 15:22:27 +0000	[diff] [blame]	990	* TODO: make the proper implementation of this function by calling
				991	* xmlParseURIReference() and escaping each section accordingly
				992	* to the rules (c.f. bug 51876)
				993	*
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	994	* Returns an copy of the string, but escaped
				995	*/
				996	xmlChar *
				997	xmlURIEscape(const xmlChar *str) {
				998	xmlChar *ret;
				999	const xmlChar *in;
				1000	unsigned int len, out;
				1001
				1002	if (str == NULL)
				1003	return(NULL);
				1004	len = xmlStrlen(str);
				1005	if (len <= 0) return(NULL);
				1006
				1007	len += 20;
				1008	ret = (xmlChar *) xmlMalloc(len);
				1009	if (ret == NULL) {
				1010	xmlGenericError(xmlGenericErrorContext,
				1011	"xmlURIEscape: out of memory\n");
				1012	return(NULL);
				1013	}
				1014	in = (const xmlChar *) str;
				1015	out = 0;
				1016	while(*in != 0) {
				1017	if (len - out <= 3) {
				1018	len += 20;
				1019	ret = (xmlChar *) xmlRealloc(ret, len);
				1020	if (ret == NULL) {
				1021	xmlGenericError(xmlGenericErrorContext,
				1022	"xmlURIEscape: out of memory\n");
				1023	return(NULL);
				1024	}
				1025	}
				1026	if ((!IS_UNRESERVED(in)) && (in != ':') && (*in != '/') &&
				1027	(in != '?') && (in != '#')) {
				1028	unsigned char val;
				1029	ret[out++] = '%';
				1030	val = *in >> 4;
				1031	if (val <= 9)
				1032	ret[out++] = '0' + val;
				1033	else
				1034	ret[out++] = 'A' + val - 0xA;
				1035	val = *in & 0xF;
				1036	if (val <= 9)
				1037	ret[out++] = '0' + val;
				1038	else
				1039	ret[out++] = 'A' + val - 0xA;
				1040	in++;
				1041	} else {
				1042	ret[out++] = *in++;
				1043	}
				1044	}
				1045	ret[out] = 0;
				1046	return(ret);
				1047	}
				1048
				1049	/************************************************************************
				1050	* *
				1051	* Escaped URI parsing *
				1052	* *
				1053	************************************************************************/
				1054
				1055	/**
				1056	* xmlParseURIFragment:
				1057	* @uri: pointer to an URI structure
				1058	* @str: pointer to the string to analyze
				1059	*
				1060	* Parse an URI fragment string and fills in the appropriate fields
				1061	* of the @uri structure.
				1062	*
				1063	* fragment = *uric
				1064	*
				1065	* Returns 0 or the error code
				1066	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1067	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1068	xmlParseURIFragment(xmlURIPtr uri, const char **str) {
				1069	const char cur = str;
				1070
				1071	if (str == NULL) return(-1);
				1072
				1073	while (IS_URIC(cur)) NEXT(cur);
				1074	if (uri != NULL) {
				1075	if (uri->fragment != NULL) xmlFree(uri->fragment);
				1076	uri->fragment = xmlURIUnescapeString(str, cur - str, NULL);
				1077	}
				1078	*str = cur;
				1079	return(0);
				1080	}
				1081
				1082	/**
				1083	* xmlParseURIQuery:
				1084	* @uri: pointer to an URI structure
				1085	* @str: pointer to the string to analyze
				1086	*
				1087	* Parse the query part of an URI
				1088	*
				1089	* query = *uric
				1090	*
				1091	* Returns 0 or the error code
				1092	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1093	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1094	xmlParseURIQuery(xmlURIPtr uri, const char **str) {
				1095	const char cur = str;
				1096
				1097	if (str == NULL) return(-1);
				1098
				1099	while (IS_URIC(cur)) NEXT(cur);
				1100	if (uri != NULL) {
				1101	if (uri->query != NULL) xmlFree(uri->query);
				1102	uri->query = xmlURIUnescapeString(str, cur - str, NULL);
				1103	}
				1104	*str = cur;
				1105	return(0);
				1106	}
				1107
				1108	/**
				1109	* xmlParseURIScheme:
				1110	* @uri: pointer to an URI structure
				1111	* @str: pointer to the string to analyze
				1112	*
				1113	* Parse an URI scheme
				1114	*
				1115	* scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
				1116	*
				1117	* Returns 0 or the error code
				1118	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1119	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1120	xmlParseURIScheme(xmlURIPtr uri, const char **str) {
				1121	const char *cur;
				1122
				1123	if (str == NULL)
				1124	return(-1);
				1125
				1126	cur = *str;
				1127	if (!IS_ALPHA(*cur))
				1128	return(2);
				1129	cur++;
				1130	while (IS_SCHEME(*cur)) cur++;
				1131	if (uri != NULL) {
				1132	if (uri->scheme != NULL) xmlFree(uri->scheme);
				1133	/* !!! strndup */
				1134	uri->scheme = xmlURIUnescapeString(str, cur - str, NULL);
				1135	}
				1136	*str = cur;
				1137	return(0);
				1138	}
				1139
				1140	/**
				1141	* xmlParseURIOpaquePart:
				1142	* @uri: pointer to an URI structure
				1143	* @str: pointer to the string to analyze
				1144	*
				1145	* Parse an URI opaque part
				1146	*
				1147	* opaque_part = uric_no_slash *uric
				1148	*
				1149	* Returns 0 or the error code
				1150	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1151	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1152	xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) {
				1153	const char *cur;
				1154
				1155	if (str == NULL)
				1156	return(-1);
				1157
				1158	cur = *str;
				1159	if (!IS_URIC_NO_SLASH(cur)) {
				1160	return(3);
				1161	}
				1162	NEXT(cur);
				1163	while (IS_URIC(cur)) NEXT(cur);
				1164	if (uri != NULL) {
				1165	if (uri->opaque != NULL) xmlFree(uri->opaque);
				1166	uri->opaque = xmlURIUnescapeString(str, cur - str, NULL);
				1167	}
				1168	*str = cur;
				1169	return(0);
				1170	}
				1171
				1172	/**
				1173	* xmlParseURIServer:
				1174	* @uri: pointer to an URI structure
				1175	* @str: pointer to the string to analyze
				1176	*
				1177	* Parse a server subpart of an URI, it's a finer grain analysis
				1178	* of the authority part.
				1179	*
				1180	* server = [ [ userinfo "@" ] hostport ]
				1181	* userinfo = *( unreserved \| escaped \|
				1182	* ";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )
				1183	* hostport = host [ ":" port ]
				1184	* host = hostname \| IPv4address
				1185	* hostname = *( domainlabel "." ) toplabel [ "." ]
				1186	* domainlabel = alphanum \| alphanum *( alphanum \| "-" ) alphanum
				1187	* toplabel = alpha \| alpha *( alphanum \| "-" ) alphanum
				1188	* IPv4address = 1digit "." 1digit "." 1digit "." 1digit
				1189	* port = *digit
				1190	*
				1191	* Returns 0 or the error code
				1192	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1193	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1194	xmlParseURIServer(xmlURIPtr uri, const char **str) {
				1195	const char *cur;
				1196	const char host, tmp;
				1197
				1198	if (str == NULL)
				1199	return(-1);
				1200
				1201	cur = *str;
				1202
				1203	/*
				1204	* is there an userinfo ?
				1205	*/
				1206	while (IS_USERINFO(cur)) NEXT(cur);
				1207	if (*cur == '@') {
				1208	if (uri != NULL) {
				1209	if (uri->user != NULL) xmlFree(uri->user);
				1210	uri->user = xmlURIUnescapeString(str, cur - str, NULL);
				1211	}
				1212	cur++;
				1213	} else {
				1214	if (uri != NULL) {
				1215	if (uri->user != NULL) xmlFree(uri->user);
				1216	uri->user = NULL;
				1217	}
				1218	cur = *str;
				1219	}
				1220	/*
				1221	* This can be empty in the case where there is no server
				1222	*/
				1223	host = cur;
				1224	if (*cur == '/') {
				1225	if (uri != NULL) {
				1226	if (uri->authority != NULL) xmlFree(uri->authority);
				1227	uri->authority = NULL;
				1228	if (uri->server != NULL) xmlFree(uri->server);
				1229	uri->server = NULL;
				1230	uri->port = 0;
				1231	}
				1232	return(0);
				1233	}
				1234	/*
				1235	* host part of hostport can derive either an IPV4 address
				1236	* or an unresolved name. Check the IP first, it easier to detect
				1237	* errors if wrong one
				1238	*/
				1239	if (IS_DIGIT(*cur)) {
				1240	while(IS_DIGIT(*cur)) cur++;
				1241	if (*cur != '.')
				1242	goto host_name;
				1243	cur++;
				1244	if (!IS_DIGIT(*cur))
				1245	goto host_name;
				1246	while(IS_DIGIT(*cur)) cur++;
				1247	if (*cur != '.')
				1248	goto host_name;
				1249	cur++;
				1250	if (!IS_DIGIT(*cur))
				1251	goto host_name;
				1252	while(IS_DIGIT(*cur)) cur++;
				1253	if (*cur != '.')
				1254	goto host_name;
				1255	cur++;
				1256	if (!IS_DIGIT(*cur))
				1257	goto host_name;
				1258	while(IS_DIGIT(*cur)) cur++;
				1259	if (uri != NULL) {
				1260	if (uri->authority != NULL) xmlFree(uri->authority);
				1261	uri->authority = NULL;
				1262	if (uri->server != NULL) xmlFree(uri->server);
				1263	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
				1264	}
				1265	goto host_done;
				1266	}
				1267	host_name:
				1268	/*
				1269	* the hostname production as-is is a parser nightmare.
				1270	* simplify it to
				1271	* hostname = *( domainlabel "." ) domainlabel [ "." ]
				1272	* and just make sure the last label starts with a non numeric char.
				1273	*/
				1274	if (!IS_ALPHANUM(*cur))
				1275	return(6);
				1276	while (IS_ALPHANUM(*cur)) {
				1277	while ((IS_ALPHANUM(cur)) \|\| (cur == '-')) cur++;
				1278	if (*cur == '.')
				1279	cur++;
				1280	}
				1281	tmp = cur;
				1282	tmp--;
				1283	while (IS_ALPHANUM(tmp) && (tmp != '.') && (tmp >= host)) tmp--;
				1284	tmp++;
				1285	if (!IS_ALPHA(*tmp))
				1286	return(7);
				1287	if (uri != NULL) {
				1288	if (uri->authority != NULL) xmlFree(uri->authority);
				1289	uri->authority = NULL;
				1290	if (uri->server != NULL) xmlFree(uri->server);
				1291	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
				1292	}
				1293
				1294	host_done:
				1295
				1296	/*
				1297	* finish by checking for a port presence.
				1298	*/
				1299	if (*cur == ':') {
				1300	cur++;
				1301	if (IS_DIGIT(*cur)) {
				1302	if (uri != NULL)
				1303	uri->port = 0;
				1304	while (IS_DIGIT(*cur)) {
				1305	if (uri != NULL)
				1306	uri->port = uri->port * 10 + (*cur - '0');
				1307	cur++;
				1308	}
				1309	}
				1310	}
				1311	*str = cur;
				1312	return(0);
				1313	}
				1314
				1315	/**
				1316	* xmlParseURIRelSegment:
				1317	* @uri: pointer to an URI structure
				1318	* @str: pointer to the string to analyze
				1319	*
				1320	* Parse an URI relative segment
				1321	*
				1322	* rel_segment = 1*( unreserved \| escaped \| ";" \| "@" \| "&" \| "=" \|
				1323	* "+" \| "$" \| "," )
				1324	*
				1325	* Returns 0 or the error code
				1326	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1327	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1328	xmlParseURIRelSegment(xmlURIPtr uri, const char **str) {
				1329	const char *cur;
				1330
				1331	if (str == NULL)
				1332	return(-1);
				1333
				1334	cur = *str;
				1335	if (!IS_SEGMENT(cur)) {
				1336	return(3);
				1337	}
				1338	NEXT(cur);
				1339	while (IS_SEGMENT(cur)) NEXT(cur);
				1340	if (uri != NULL) {
				1341	if (uri->path != NULL) xmlFree(uri->path);
				1342	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
				1343	}
				1344	*str = cur;
				1345	return(0);
				1346	}
				1347
				1348	/**
				1349	* xmlParseURIPathSegments:
				1350	* @uri: pointer to an URI structure
				1351	* @str: pointer to the string to analyze
				1352	* @slash: should we add a leading slash
				1353	*
				1354	* Parse an URI set of path segments
				1355	*
				1356	* path_segments = segment *( "/" segment )
				1357	* segment = pchar ( ";" param )
				1358	* param = *pchar
				1359	*
				1360	* Returns 0 or the error code
				1361	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1362	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1363	xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
				1364	const char *cur;
				1365
				1366	if (str == NULL)
				1367	return(-1);
				1368
				1369	cur = *str;
				1370
				1371	do {
				1372	while (IS_PCHAR(cur)) NEXT(cur);
				1373	if (*cur == ';') {
				1374	cur++;
				1375	while (IS_PCHAR(cur)) NEXT(cur);
				1376	}
				1377	if (*cur != '/') break;
				1378	cur++;
				1379	} while (1);
				1380	if (uri != NULL) {
				1381	int len, len2 = 0;
				1382	char *path;
				1383
				1384	/*
				1385	* Concat the set of path segments to the current path
				1386	*/
				1387	len = cur - *str;
				1388	if (slash)
				1389	len++;
				1390
				1391	if (uri->path != NULL) {
				1392	len2 = strlen(uri->path);
				1393	len += len2;
				1394	}
				1395	path = (char *) xmlMalloc(len + 1);
				1396	if (path == NULL) {
				1397	xmlGenericError(xmlGenericErrorContext,
				1398	"xmlParseURIPathSegments: out of memory\n");
				1399	*str = cur;
				1400	return(-1);
				1401	}
				1402	if (uri->path != NULL)
				1403	memcpy(path, uri->path, len2);
				1404	if (slash) {
				1405	path[len2] = '/';
				1406	len2++;
				1407	}
				1408	path[len2] = 0;
				1409	if (cur - *str > 0)
				1410	xmlURIUnescapeString(str, cur - str, &path[len2]);
				1411	if (uri->path != NULL)
				1412	xmlFree(uri->path);
				1413	uri->path = path;
				1414	}
				1415	*str = cur;
				1416	return(0);
				1417	}
				1418
				1419	/**
				1420	* xmlParseURIAuthority:
				1421	* @uri: pointer to an URI structure
				1422	* @str: pointer to the string to analyze
				1423	*
				1424	* Parse the authority part of an URI.
				1425	*
				1426	* authority = server \| reg_name
				1427	* server = [ [ userinfo "@" ] hostport ]
				1428	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \| ";" \| ":" \|
				1429	* "@" \| "&" \| "=" \| "+" )
				1430	*
				1431	* Note : this is completely ambiguous since reg_name is allowed to
				1432	* use the full set of chars in use by server:
				1433	*
				1434	* 3.2.1. Registry-based Naming Authority
				1435	*
				1436	* The structure of a registry-based naming authority is specific
				1437	* to the URI scheme, but constrained to the allowed characters
				1438	* for an authority component.
				1439	*
				1440	* Returns 0 or the error code
				1441	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1442	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1443	xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
				1444	const char *cur;
				1445	int ret;
				1446
				1447	if (str == NULL)
				1448	return(-1);
				1449
				1450	cur = *str;
				1451
				1452	/*
				1453	* try first to parse it as a server string.
				1454	*/
				1455	ret = xmlParseURIServer(uri, str);
				1456	if (ret == 0)
				1457	return(0);
				1458
				1459	/*
				1460	* failed, fallback to reg_name
				1461	*/
				1462	if (!IS_REG_NAME(cur)) {
				1463	return(5);
				1464	}
				1465	NEXT(cur);
				1466	while (IS_REG_NAME(cur)) NEXT(cur);
				1467	if (uri != NULL) {
				1468	if (uri->server != NULL) xmlFree(uri->server);
				1469	uri->server = NULL;
				1470	if (uri->user != NULL) xmlFree(uri->user);
				1471	uri->user = NULL;
				1472	if (uri->authority != NULL) xmlFree(uri->authority);
				1473	uri->authority = xmlURIUnescapeString(str, cur - str, NULL);
				1474	}
				1475	*str = cur;
				1476	return(0);
				1477	}
				1478
				1479	/**
				1480	* xmlParseURIHierPart:
				1481	* @uri: pointer to an URI structure
				1482	* @str: pointer to the string to analyze
				1483	*
				1484	* Parse an URI hirarchical part
				1485	*
				1486	* hier_part = ( net_path \| abs_path ) [ "?" query ]
				1487	* abs_path = "/" path_segments
				1488	* net_path = "//" authority [ abs_path ]
				1489	*
				1490	* Returns 0 or the error code
				1491	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1492	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1493	xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
				1494	int ret;
				1495	const char *cur;
				1496
				1497	if (str == NULL)
				1498	return(-1);
				1499
				1500	cur = *str;
				1501
				1502	if ((cur[0] == '/') && (cur[1] == '/')) {
				1503	cur += 2;
				1504	ret = xmlParseURIAuthority(uri, &cur);
				1505	if (ret != 0)
				1506	return(ret);
				1507	if (cur[0] == '/') {
				1508	cur++;
				1509	ret = xmlParseURIPathSegments(uri, &cur, 1);
				1510	}
				1511	} else if (cur[0] == '/') {
				1512	cur++;
				1513	ret = xmlParseURIPathSegments(uri, &cur, 1);
				1514	} else {
				1515	return(4);
				1516	}
				1517	if (ret != 0)
				1518	return(ret);
				1519	if (*cur == '?') {
				1520	cur++;
				1521	ret = xmlParseURIQuery(uri, &cur);
				1522	if (ret != 0)
				1523	return(ret);
				1524	}
				1525	*str = cur;
				1526	return(0);
				1527	}
				1528
				1529	/**
				1530	* xmlParseAbsoluteURI:
				1531	* @uri: pointer to an URI structure
				1532	* @str: pointer to the string to analyze
				1533	*
				1534	* Parse an URI reference string and fills in the appropriate fields
				1535	* of the @uri structure
				1536	*
				1537	* absoluteURI = scheme ":" ( hier_part \| opaque_part )
				1538	*
				1539	* Returns 0 or the error code
				1540	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1541	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1542	xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
				1543	int ret;
				1544
				1545	if (str == NULL)
				1546	return(-1);
				1547
				1548	ret = xmlParseURIScheme(uri, str);
				1549	if (ret != 0) return(ret);
				1550	if (**str != ':')
				1551	return(1);
				1552	(*str)++;
				1553	if (**str == '/')
				1554	return(xmlParseURIHierPart(uri, str));
				1555	return(xmlParseURIOpaquePart(uri, str));
				1556	}
				1557
				1558	/**
				1559	* xmlParseRelativeURI:
				1560	* @uri: pointer to an URI structure
				1561	* @str: pointer to the string to analyze
				1562	*
				1563	* Parse an relative URI string and fills in the appropriate fields
				1564	* of the @uri structure
				1565	*
				1566	* relativeURI = ( net_path \| abs_path \| rel_path ) [ "?" query ]
				1567	* abs_path = "/" path_segments
				1568	* net_path = "//" authority [ abs_path ]
				1569	* rel_path = rel_segment [ abs_path ]
				1570	*
				1571	* Returns 0 or the error code
				1572	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1573	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1574	xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
				1575	int ret = 0;
				1576	const char *cur;
				1577
				1578	if (str == NULL)
				1579	return(-1);
				1580
				1581	cur = *str;
				1582	if ((cur[0] == '/') && (cur[1] == '/')) {
				1583	cur += 2;
				1584	ret = xmlParseURIAuthority(uri, &cur);
				1585	if (ret != 0)
				1586	return(ret);
				1587	if (cur[0] == '/') {
				1588	cur++;
				1589	ret = xmlParseURIPathSegments(uri, &cur, 1);
				1590	}
				1591	} else if (cur[0] == '/') {
				1592	cur++;
				1593	ret = xmlParseURIPathSegments(uri, &cur, 1);
				1594	} else if (cur[0] != '#' && cur[0] != '?') {
				1595	ret = xmlParseURIRelSegment(uri, &cur);
				1596	if (ret != 0)
				1597	return(ret);
				1598	if (cur[0] == '/') {
				1599	cur++;
				1600	ret = xmlParseURIPathSegments(uri, &cur, 1);
				1601	}
				1602	}
				1603	if (ret != 0)
				1604	return(ret);
				1605	if (*cur == '?') {
				1606	cur++;
				1607	ret = xmlParseURIQuery(uri, &cur);
				1608	if (ret != 0)
				1609	return(ret);
				1610	}
				1611	*str = cur;
				1612	return(ret);
				1613	}
				1614
				1615	/**
				1616	* xmlParseURIReference:
				1617	* @uri: pointer to an URI structure
				1618	* @str: the string to analyze
				1619	*
				1620	* Parse an URI reference string and fills in the appropriate fields
				1621	* of the @uri structure
				1622	*
				1623	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
				1624	*
				1625	* Returns 0 or the error code
				1626	*/
				1627	int
				1628	xmlParseURIReference(xmlURIPtr uri, const char *str) {
				1629	int ret;
				1630	const char *tmp = str;
				1631
				1632	if (str == NULL)
				1633	return(-1);
				1634	xmlCleanURI(uri);
				1635
				1636	/*
				1637	* Try first to parse aboslute refs, then fallback to relative if
				1638	* it fails.
				1639	*/
				1640	ret = xmlParseAbsoluteURI(uri, &str);
				1641	if (ret != 0) {
				1642	xmlCleanURI(uri);
				1643	str = tmp;
				1644	ret = xmlParseRelativeURI(uri, &str);
				1645	}
				1646	if (ret != 0) {
				1647	xmlCleanURI(uri);
				1648	return(ret);
				1649	}
				1650
				1651	if (*str == '#') {
				1652	str++;
				1653	ret = xmlParseURIFragment(uri, &str);
				1654	if (ret != 0) return(ret);
				1655	}
				1656	if (*str != 0) {
				1657	xmlCleanURI(uri);
				1658	return(1);
				1659	}
				1660	return(0);
				1661	}
				1662
				1663	/**
				1664	* xmlParseURI:
				1665	* @str: the URI string to analyze
				1666	*
				1667	* Parse an URI
				1668	*
				1669	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
				1670	*
				1671	* Returns a newly build xmlURIPtr or NULL in case of error
				1672	*/
				1673	xmlURIPtr
				1674	xmlParseURI(const char *str) {
				1675	xmlURIPtr uri;
				1676	int ret;
				1677
				1678	if (str == NULL)
				1679	return(NULL);
				1680	uri = xmlCreateURI();
				1681	if (uri != NULL) {
				1682	ret = xmlParseURIReference(uri, str);
				1683	if (ret) {
				1684	xmlFreeURI(uri);
				1685	return(NULL);
				1686	}
				1687	}
				1688	return(uri);
				1689	}
				1690
				1691	/************************************************************************
				1692	* *
				1693	* Public functions *
				1694	* *
				1695	************************************************************************/
				1696
				1697	/**
				1698	* xmlBuildURI:
				1699	* @URI: the URI instance found in the document
				1700	* @base: the base value
				1701	*
				1702	* Computes he final URI of the reference done by checking that
				1703	* the given URI is valid, and building the final URI using the
				1704	* base URI. This is processed according to section 5.2 of the
				1705	* RFC 2396
				1706	*
				1707	* 5.2. Resolving Relative References to Absolute Form
				1708	*
				1709	* Returns a new URI string (to be freed by the caller) or NULL in case
				1710	* of error.
				1711	*/
				1712	xmlChar *
				1713	xmlBuildURI(const xmlChar URI, const xmlChar base) {
				1714	xmlChar *val = NULL;
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1715	int ret, len, indx, cur, out;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1716	xmlURIPtr ref = NULL;
				1717	xmlURIPtr bas = NULL;
				1718	xmlURIPtr res = NULL;
				1719
				1720	/*
				1721	* 1) The URI reference is parsed into the potential four components and
				1722	* fragment identifier, as described in Section 4.3.
				1723	*
				1724	* NOTE that a completely empty URI is treated by modern browsers
				1725	* as a reference to "." rather than as a synonym for the current
				1726	* URI. Should we do that here?
				1727	*/
				1728	if (URI == NULL)
				1729	ret = -1;
				1730	else {
				1731	if (*URI) {
				1732	ref = xmlCreateURI();
				1733	if (ref == NULL)
				1734	goto done;
				1735	ret = xmlParseURIReference(ref, (const char *) URI);
				1736	}
				1737	else
				1738	ret = 0;
				1739	}
				1740	if (ret != 0)
				1741	goto done;
				1742	if (base == NULL)
				1743	ret = -1;
				1744	else {
				1745	bas = xmlCreateURI();
				1746	if (bas == NULL)
				1747	goto done;
				1748	ret = xmlParseURIReference(bas, (const char *) base);
				1749	}
				1750	if (ret != 0) {
				1751	if (ref)
				1752	val = xmlSaveUri(ref);
				1753	goto done;
				1754	}
				1755	if (ref == NULL) {
				1756	/*
				1757	* the base fragment must be ignored
				1758	*/
				1759	if (bas->fragment != NULL) {
				1760	xmlFree(bas->fragment);
				1761	bas->fragment = NULL;
				1762	}
				1763	val = xmlSaveUri(bas);
				1764	goto done;
				1765	}
				1766
				1767	/*
				1768	* 2) If the path component is empty and the scheme, authority, and
				1769	* query components are undefined, then it is a reference to the
				1770	* current document and we are done. Otherwise, the reference URI's
				1771	* query and fragment components are defined as found (or not found)
				1772	* within the URI reference and not inherited from the base URI.
				1773	*
				1774	* NOTE that in modern browsers, the parsing differs from the above
				1775	* in the following aspect: the query component is allowed to be
				1776	* defined while still treating this as a reference to the current
				1777	* document.
				1778	*/
				1779	res = xmlCreateURI();
				1780	if (res == NULL)
				1781	goto done;
				1782	if ((ref->scheme == NULL) && (ref->path == NULL) &&
				1783	((ref->authority == NULL) && (ref->server == NULL))) {
				1784	if (bas->scheme != NULL)
				1785	res->scheme = xmlMemStrdup(bas->scheme);
				1786	if (bas->authority != NULL)
				1787	res->authority = xmlMemStrdup(bas->authority);
				1788	else if (bas->server != NULL) {
				1789	res->server = xmlMemStrdup(bas->server);
				1790	if (bas->user != NULL)
				1791	res->user = xmlMemStrdup(bas->user);
				1792	res->port = bas->port;
				1793	}
				1794	if (bas->path != NULL)
				1795	res->path = xmlMemStrdup(bas->path);
				1796	if (ref->query != NULL)
				1797	res->query = xmlMemStrdup(ref->query);
				1798	else if (bas->query != NULL)
				1799	res->query = xmlMemStrdup(bas->query);
				1800	if (ref->fragment != NULL)
				1801	res->fragment = xmlMemStrdup(ref->fragment);
				1802	goto step_7;
				1803	}
				1804
				1805	if (ref->query != NULL)
				1806	res->query = xmlMemStrdup(ref->query);
				1807	if (ref->fragment != NULL)
				1808	res->fragment = xmlMemStrdup(ref->fragment);
				1809
				1810	/*
				1811	* 3) If the scheme component is defined, indicating that the reference
				1812	* starts with a scheme name, then the reference is interpreted as an
				1813	* absolute URI and we are done. Otherwise, the reference URI's
				1814	* scheme is inherited from the base URI's scheme component.
				1815	*/
				1816	if (ref->scheme != NULL) {
				1817	val = xmlSaveUri(ref);
				1818	goto done;
				1819	}
				1820	if (bas->scheme != NULL)
				1821	res->scheme = xmlMemStrdup(bas->scheme);
				1822
				1823	/*
				1824	* 4) If the authority component is defined, then the reference is a
				1825	* network-path and we skip to step 7. Otherwise, the reference
				1826	* URI's authority is inherited from the base URI's authority
				1827	* component, which will also be undefined if the URI scheme does not
				1828	* use an authority component.
				1829	*/
				1830	if ((ref->authority != NULL) \|\| (ref->server != NULL)) {
				1831	if (ref->authority != NULL)
				1832	res->authority = xmlMemStrdup(ref->authority);
				1833	else {
				1834	res->server = xmlMemStrdup(ref->server);
				1835	if (ref->user != NULL)
				1836	res->user = xmlMemStrdup(ref->user);
				1837	res->port = ref->port;
				1838	}
				1839	if (ref->path != NULL)
				1840	res->path = xmlMemStrdup(ref->path);
				1841	goto step_7;
				1842	}
				1843	if (bas->authority != NULL)
				1844	res->authority = xmlMemStrdup(bas->authority);
				1845	else if (bas->server != NULL) {
				1846	res->server = xmlMemStrdup(bas->server);
				1847	if (bas->user != NULL)
				1848	res->user = xmlMemStrdup(bas->user);
				1849	res->port = bas->port;
				1850	}
				1851
				1852	/*
				1853	* 5) If the path component begins with a slash character ("/"), then
				1854	* the reference is an absolute-path and we skip to step 7.
				1855	*/
				1856	if ((ref->path != NULL) && (ref->path[0] == '/')) {
				1857	res->path = xmlMemStrdup(ref->path);
				1858	goto step_7;
				1859	}
				1860
				1861
				1862	/*
				1863	* 6) If this step is reached, then we are resolving a relative-path
				1864	* reference. The relative path needs to be merged with the base
				1865	* URI's path. Although there are many ways to do this, we will
				1866	* describe a simple method using a separate string buffer.
				1867	*
				1868	* Allocate a buffer large enough for the result string.
				1869	*/
				1870	len = 2; /* extra / and 0 */
				1871	if (ref->path != NULL)
				1872	len += strlen(ref->path);
				1873	if (bas->path != NULL)
				1874	len += strlen(bas->path);
				1875	res->path = (char *) xmlMalloc(len);
				1876	if (res->path == NULL) {
				1877	xmlGenericError(xmlGenericErrorContext,
				1878	"xmlBuildURI: out of memory\n");
				1879	goto done;
				1880	}
				1881	res->path[0] = 0;
				1882
				1883	/*
				1884	* a) All but the last segment of the base URI's path component is
				1885	* copied to the buffer. In other words, any characters after the
				1886	* last (right-most) slash character, if any, are excluded.
				1887	*/
				1888	cur = 0;
				1889	out = 0;
				1890	if (bas->path != NULL) {
				1891	while (bas->path[cur] != 0) {
				1892	while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
				1893	cur++;
				1894	if (bas->path[cur] == 0)
				1895	break;
				1896
				1897	cur++;
				1898	while (out < cur) {
				1899	res->path[out] = bas->path[out];
				1900	out++;
				1901	}
				1902	}
				1903	}
				1904	res->path[out] = 0;
				1905
				1906	/*
				1907	* b) The reference's path component is appended to the buffer
				1908	* string.
				1909	*/
				1910	if (ref->path != NULL && ref->path[0] != 0) {
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1911	indx = 0;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1912	/*
				1913	* Ensure the path includes a '/'
				1914	*/
				1915	if ((out == 0) && (bas->server != NULL))
				1916	res->path[out++] = '/';
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame^]	1917	while (ref->path[indx] != 0) {
				1918	res->path[out++] = ref->path[indx++];
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1919	}
				1920	}
				1921	res->path[out] = 0;
				1922
				1923	/*
				1924	* Steps c) to h) are really path normalization steps
				1925	*/
				1926	xmlNormalizeURIPath(res->path);
				1927
				1928	step_7:
				1929
				1930	/*
				1931	* 7) The resulting URI components, including any inherited from the
				1932	* base URI, are recombined to give the absolute form of the URI
				1933	* reference.
				1934	*/
				1935	val = xmlSaveUri(res);
				1936
				1937	done:
				1938	if (ref != NULL)
				1939	xmlFreeURI(ref);
				1940	if (bas != NULL)
				1941	xmlFreeURI(bas);
				1942	if (res != NULL)
				1943	xmlFreeURI(res);
				1944	return(val);
				1945	}
				1946
				1947