Blame - xmlstring.c - platform/external/libxml2

blob: 5c6405342d1595ec3084056266dba08e95c3ca97 [file] [log] [blame]

William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	1	/*
				2	* string.c : an XML string utilities module
				3	*
				4	* This module provides various utility functions for manipulating
				5	* the xmlChar* type. All functions named xmlStr* have been moved here
				6	* from the parser.c file (their original home).
				7	*
				8	* See Copyright for the status of this software.
				9	*
				10	* UTF8 string routines from:
				11	* William Brack <wbrack@mmm.com.hk>
				12	*
				13	* daniel@veillard.com
				14	*/
				15
				16	#define IN_LIBXML
				17	#include "libxml.h"
				18
				19	#include <stdlib.h>
				20	#include <string.h>
				21	#include <libxml/xmlmemory.h>
				22	#include <libxml/parserInternals.h>
				23	#include <libxml/xmlstring.h>
				24
				25	/************************************************************************
				26	* *
				27	* Commodity functions to handle xmlChars *
				28	* *
				29	************************************************************************/
				30
				31	/**
				32	* xmlStrndup:
				33	* @cur: the input xmlChar *
				34	* @len: the len of @cur
				35	*
				36	* a strndup for array of xmlChar's
				37	*
				38	* Returns a new xmlChar * or NULL
				39	*/
				40	xmlChar *
				41	xmlStrndup(const xmlChar *cur, int len) {
				42	xmlChar *ret;
				43
				44	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				45	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				46	if (ret == NULL) {
				47	xmlErrMemory(NULL, NULL);
				48	return(NULL);
				49	}
				50	memcpy(ret, cur, len * sizeof(xmlChar));
				51	ret[len] = 0;
				52	return(ret);
				53	}
				54
				55	/**
				56	* xmlStrdup:
				57	* @cur: the input xmlChar *
				58	*
				59	* a strdup for array of xmlChar's. Since they are supposed to be
				60	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				61	* a termination mark of '0'.
				62	*
				63	* Returns a new xmlChar * or NULL
				64	*/
				65	xmlChar *
				66	xmlStrdup(const xmlChar *cur) {
				67	const xmlChar *p = cur;
				68
				69	if (cur == NULL) return(NULL);
				70	while (p != 0) p++; / non input consuming */
				71	return(xmlStrndup(cur, p - cur));
				72	}
				73
				74	/**
				75	* xmlCharStrndup:
				76	* @cur: the input char *
				77	* @len: the len of @cur
				78	*
				79	* a strndup for char's to xmlChar's
				80	*
				81	* Returns a new xmlChar * or NULL
				82	*/
				83
				84	xmlChar *
				85	xmlCharStrndup(const char *cur, int len) {
				86	int i;
				87	xmlChar *ret;
				88
				89	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				90	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				91	if (ret == NULL) {
				92	xmlErrMemory(NULL, NULL);
				93	return(NULL);
				94	}
				95	for (i = 0;i < len;i++)
				96	ret[i] = (xmlChar) cur[i];
				97	ret[len] = 0;
				98	return(ret);
				99	}
				100
				101	/**
				102	* xmlCharStrdup:
				103	* @cur: the input char *
				104	*
				105	* a strdup for char's to xmlChar's
				106	*
				107	* Returns a new xmlChar * or NULL
				108	*/
				109
				110	xmlChar *
				111	xmlCharStrdup(const char *cur) {
				112	const char *p = cur;
				113
				114	if (cur == NULL) return(NULL);
				115	while (p != '\0') p++; / non input consuming */
				116	return(xmlCharStrndup(cur, p - cur));
				117	}
				118
				119	/**
				120	* xmlStrcmp:
				121	* @str1: the first xmlChar *
				122	* @str2: the second xmlChar *
				123	*
				124	* a strcmp for xmlChar's
				125	*
				126	* Returns the integer result of the comparison
				127	*/
				128
				129	int
				130	xmlStrcmp(const xmlChar str1, const xmlChar str2) {
				131	register int tmp;
				132
				133	if (str1 == str2) return(0);
				134	if (str1 == NULL) return(-1);
				135	if (str2 == NULL) return(1);
				136	do {
				137	tmp = str1++ - str2;
				138	if (tmp != 0) return(tmp);
				139	} while (*str2++ != 0);
				140	return 0;
				141	}
				142
				143	/**
				144	* xmlStrEqual:
				145	* @str1: the first xmlChar *
				146	* @str2: the second xmlChar *
				147	*
				148	* Check if both string are equal of have same content
				149	* Should be a bit more readable and faster than xmlStrEqual()
				150	*
				151	* Returns 1 if they are equal, 0 if they are different
				152	*/
				153
				154	int
				155	xmlStrEqual(const xmlChar str1, const xmlChar str2) {
				156	if (str1 == str2) return(1);
				157	if (str1 == NULL) return(0);
				158	if (str2 == NULL) return(0);
				159	do {
				160	if (str1++ != str2) return(0);
				161	} while (*str2++);
				162	return(1);
				163	}
				164
				165	/**
				166	* xmlStrQEqual:
				167	* @pref: the prefix of the QName
				168	* @name: the localname of the QName
				169	* @str: the second xmlChar *
				170	*
				171	* Check if a QName is Equal to a given string
				172	*
				173	* Returns 1 if they are equal, 0 if they are different
				174	*/
				175
				176	int
				177	xmlStrQEqual(const xmlChar pref, const xmlChar name, const xmlChar *str) {
				178	if (pref == NULL) return(xmlStrEqual(name, str));
				179	if (name == NULL) return(0);
				180	if (str == NULL) return(0);
				181
				182	do {
				183	if (pref++ != str) return(0);
				184	} while ((str++) && (pref));
				185	if (*str++ != ':') return(0);
				186	do {
				187	if (name++ != str) return(0);
				188	} while (*str++);
				189	return(1);
				190	}
				191
				192	/**
				193	* xmlStrncmp:
				194	* @str1: the first xmlChar *
				195	* @str2: the second xmlChar *
				196	* @len: the max comparison length
				197	*
				198	* a strncmp for xmlChar's
				199	*
				200	* Returns the integer result of the comparison
				201	*/
				202
				203	int
				204	xmlStrncmp(const xmlChar str1, const xmlChar str2, int len) {
				205	register int tmp;
				206
				207	if (len <= 0) return(0);
				208	if (str1 == str2) return(0);
				209	if (str1 == NULL) return(-1);
				210	if (str2 == NULL) return(1);
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	211	#ifdef __GNUC__
				212	tmp = strncmp(str1, str2, len);
				213	return tmp;
				214	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	215	do {
				216	tmp = str1++ - str2;
				217	if (tmp != 0 \|\| --len == 0) return(tmp);
				218	} while (*str2++ != 0);
				219	return 0;
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	220	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	221	}
				222
				223	static const xmlChar casemap[256] = {
				224	0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
				225	0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
				226	0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
				227	0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
				228	0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
				229	0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
				230	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
				231	0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
				232	0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				233	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				234	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				235	0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
				236	0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				237	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				238	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				239	0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
				240	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
				241	0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
				242	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
				243	0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
				244	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
				245	0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
				246	0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
				247	0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
				248	0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
				249	0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
				250	0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
				251	0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
				252	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
				253	0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
				254	0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
				255	0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
				256	};
				257
				258	/**
				259	* xmlStrcasecmp:
				260	* @str1: the first xmlChar *
				261	* @str2: the second xmlChar *
				262	*
				263	* a strcasecmp for xmlChar's
				264	*
				265	* Returns the integer result of the comparison
				266	*/
				267
				268	int
				269	xmlStrcasecmp(const xmlChar str1, const xmlChar str2) {
				270	register int tmp;
				271
				272	if (str1 == str2) return(0);
				273	if (str1 == NULL) return(-1);
				274	if (str2 == NULL) return(1);
				275	do {
				276	tmp = casemap[str1++] - casemap[str2];
				277	if (tmp != 0) return(tmp);
				278	} while (*str2++ != 0);
				279	return 0;
				280	}
				281
				282	/**
				283	* xmlStrncasecmp:
				284	* @str1: the first xmlChar *
				285	* @str2: the second xmlChar *
				286	* @len: the max comparison length
				287	*
				288	* a strncasecmp for xmlChar's
				289	*
				290	* Returns the integer result of the comparison
				291	*/
				292
				293	int
				294	xmlStrncasecmp(const xmlChar str1, const xmlChar str2, int len) {
				295	register int tmp;
				296
				297	if (len <= 0) return(0);
				298	if (str1 == str2) return(0);
				299	if (str1 == NULL) return(-1);
				300	if (str2 == NULL) return(1);
				301	do {
				302	tmp = casemap[str1++] - casemap[str2];
				303	if (tmp != 0 \|\| --len == 0) return(tmp);
				304	} while (*str2++ != 0);
				305	return 0;
				306	}
				307
				308	/**
				309	* xmlStrchr:
				310	* @str: the xmlChar * array
				311	* @val: the xmlChar to search
				312	*
				313	* a strchr for xmlChar's
				314	*
				315	* Returns the xmlChar * for the first occurrence or NULL.
				316	*/
				317
				318	const xmlChar *
				319	xmlStrchr(const xmlChar *str, xmlChar val) {
				320	if (str == NULL) return(NULL);
				321	while (str != 0) { / non input consuming */
				322	if (str == val) return((xmlChar ) str);
				323	str++;
				324	}
				325	return(NULL);
				326	}
				327
				328	/**
				329	* xmlStrstr:
				330	* @str: the xmlChar * array (haystack)
				331	* @val: the xmlChar to search (needle)
				332	*
				333	* a strstr for xmlChar's
				334	*
				335	* Returns the xmlChar * for the first occurrence or NULL.
				336	*/
				337
				338	const xmlChar *
				339	xmlStrstr(const xmlChar str, const xmlChar val) {
				340	int n;
				341
				342	if (str == NULL) return(NULL);
				343	if (val == NULL) return(NULL);
				344	n = xmlStrlen(val);
				345
				346	if (n == 0) return(str);
				347	while (str != 0) { / non input consuming */
				348	if (str == val) {
				349	if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
				350	}
				351	str++;
				352	}
				353	return(NULL);
				354	}
				355
				356	/**
				357	* xmlStrcasestr:
				358	* @str: the xmlChar * array (haystack)
				359	* @val: the xmlChar to search (needle)
				360	*
				361	* a case-ignoring strstr for xmlChar's
				362	*
				363	* Returns the xmlChar * for the first occurrence or NULL.
				364	*/
				365
				366	const xmlChar *
				367	xmlStrcasestr(const xmlChar str, xmlChar val) {
				368	int n;
				369
				370	if (str == NULL) return(NULL);
				371	if (val == NULL) return(NULL);
				372	n = xmlStrlen(val);
				373
				374	if (n == 0) return(str);
				375	while (str != 0) { / non input consuming */
				376	if (casemap[str] == casemap[val])
				377	if (!xmlStrncasecmp(str, val, n)) return(str);
				378	str++;
				379	}
				380	return(NULL);
				381	}
				382
				383	/**
				384	* xmlStrsub:
				385	* @str: the xmlChar * array (haystack)
				386	* @start: the index of the first char (zero based)
				387	* @len: the length of the substring
				388	*
				389	* Extract a substring of a given string
				390	*
				391	* Returns the xmlChar * for the first occurrence or NULL.
				392	*/
				393
				394	xmlChar *
				395	xmlStrsub(const xmlChar *str, int start, int len) {
				396	int i;
				397
				398	if (str == NULL) return(NULL);
				399	if (start < 0) return(NULL);
				400	if (len < 0) return(NULL);
				401
				402	for (i = 0;i < start;i++) {
				403	if (*str == 0) return(NULL);
				404	str++;
				405	}
				406	if (*str == 0) return(NULL);
				407	return(xmlStrndup(str, len));
				408	}
				409
				410	/**
				411	* xmlStrlen:
				412	* @str: the xmlChar * array
				413	*
				414	* length of a xmlChar's string
				415	*
				416	* Returns the number of xmlChar contained in the ARRAY.
				417	*/
				418
				419	int
				420	xmlStrlen(const xmlChar *str) {
				421	int len = 0;
				422
				423	if (str == NULL) return(0);
				424	while (str != 0) { / non input consuming */
				425	str++;
				426	len++;
				427	}
				428	return(len);
				429	}
				430
				431	/**
				432	* xmlStrncat:
				433	* @cur: the original xmlChar * array
				434	* @add: the xmlChar * array added
				435	* @len: the length of @add
				436	*
				437	* a strncat for array of xmlChar's, it will extend @cur with the len
				438	* first bytes of @add.
				439	*
				440	* Returns a new xmlChar *, the original @cur is reallocated if needed
				441	* and should not be freed
				442	*/
				443
				444	xmlChar *
				445	xmlStrncat(xmlChar cur, const xmlChar add, int len) {
				446	int size;
				447	xmlChar *ret;
				448
				449	if ((add == NULL) \|\| (len == 0))
				450	return(cur);
				451	if (cur == NULL)
				452	return(xmlStrndup(add, len));
				453
				454	size = xmlStrlen(cur);
				455	ret = (xmlChar ) xmlRealloc(cur, (size + len + 1) sizeof(xmlChar));
				456	if (ret == NULL) {
				457	xmlErrMemory(NULL, NULL);
				458	return(cur);
				459	}
				460	memcpy(&ret[size], add, len * sizeof(xmlChar));
				461	ret[size + len] = 0;
				462	return(ret);
				463	}
				464
				465	/**
				466	* xmlStrncatNew:
				467	* @str1: first xmlChar string
				468	* @str2: second xmlChar string
				469	* @len: the len of @str2
				470	*
				471	* same as xmlStrncat, but creates a new string. The original
				472	* two strings are not freed.
				473	*
				474	* Returns a new xmlChar * or NULL
				475	*/
				476	xmlChar *
				477	xmlStrncatNew(const xmlChar str1, const xmlChar str2, int len) {
				478	int size;
				479	xmlChar *ret;
				480
				481	if ((str2 == NULL) \|\| (len == 0))
				482	return(xmlStrdup(str1));
				483	if (str1 == NULL)
				484	return(xmlStrndup(str2, len));
				485
				486	size = xmlStrlen(str1);
				487	ret = (xmlChar ) xmlMalloc((size + len + 1) sizeof(xmlChar));
				488	if (ret == NULL) {
				489	xmlErrMemory(NULL, NULL);
				490	return(xmlStrndup(str1, size));
				491	}
				492	memcpy(ret, str1, size * sizeof(xmlChar));
				493	memcpy(&ret[size], str2, len * sizeof(xmlChar));
				494	ret[size + len] = 0;
				495	return(ret);
				496	}
				497
				498	/**
				499	* xmlStrcat:
				500	* @cur: the original xmlChar * array
				501	* @add: the xmlChar * array added
				502	*
				503	* a strcat for array of xmlChar's. Since they are supposed to be
				504	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				505	* a termination mark of '0'.
				506	*
				507	* Returns a new xmlChar * containing the concatenated string.
				508	*/
				509	xmlChar *
				510	xmlStrcat(xmlChar cur, const xmlChar add) {
				511	const xmlChar *p = add;
				512
				513	if (add == NULL) return(cur);
				514	if (cur == NULL)
				515	return(xmlStrdup(add));
				516
				517	while (p != 0) p++; / non input consuming */
				518	return(xmlStrncat(cur, add, p - add));
				519	}
				520
				521	/**
				522	* xmlStrPrintf:
				523	* @buf: the result buffer.
				524	* @len: the result buffer length.
				525	* @msg: the message with printf formatting.
				526	* @...: extra parameters for the message.
				527	*
				528	* Formats @msg and places result into @buf.
				529	*
				530	* Returns the number of characters written to @buf or -1 if an error occurs.
				531	*/
				532	int
				533	xmlStrPrintf(xmlChar buf, int len, const xmlChar msg, ...) {
				534	va_list args;
				535	int ret;
				536
				537	if((buf == NULL) \|\| (msg == NULL)) {
				538	return(-1);
				539	}
				540
				541	va_start(args, msg);
				542	ret = vsnprintf((char ) buf, len, (const char ) msg, args);
				543	va_end(args);
				544	buf[len - 1] = 0; /* be safe ! */
				545
				546	return(ret);
				547	}
				548
				549	/**
				550	* xmlStrVPrintf:
				551	* @buf: the result buffer.
				552	* @len: the result buffer length.
				553	* @msg: the message with printf formatting.
				554	* @ap: extra parameters for the message.
				555	*
				556	* Formats @msg and places result into @buf.
				557	*
				558	* Returns the number of characters written to @buf or -1 if an error occurs.
				559	*/
				560	int
				561	xmlStrVPrintf(xmlChar buf, int len, const xmlChar msg, va_list ap) {
				562	int ret;
				563
				564	if((buf == NULL) \|\| (msg == NULL)) {
				565	return(-1);
				566	}
				567
				568	ret = vsnprintf((char ) buf, len, (const char ) msg, ap);
				569	buf[len - 1] = 0; /* be safe ! */
				570
				571	return(ret);
				572	}
				573
				574	/************************************************************************
				575	* *
				576	* Generic UTF8 handling routines *
				577	* *
				578	* From rfc2044: encoding of the Unicode values on UTF-8: *
				579	* *
				580	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
				581	* 0000 0000-0000 007F 0xxxxxxx *
				582	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
				583	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
				584	* *
				585	* I hope we won't use values > 0xFFFF anytime soon ! *
				586	* *
				587	************************************************************************/
				588
				589
				590	/**
				591	* xmlUTF8Size:
				592	* @utf: pointer to the UTF8 character
				593	*
				594	* calculates the internal size of a UTF8 character
				595	*
				596	* returns the numbers of bytes in the character, -1 on format error
				597	*/
				598	int
				599	xmlUTF8Size(const xmlChar *utf) {
				600	xmlChar mask;
				601	int len;
				602
				603	if (utf == NULL)
				604	return -1;
				605	if (*utf < 0x80)
				606	return 1;
				607	/* check valid UTF8 character */
				608	if (!(*utf & 0x40))
				609	return -1;
				610	/* determine number of bytes in char */
				611	len = 2;
				612	for (mask=0x20; mask != 0; mask>>=1) {
				613	if (!(*utf & mask))
				614	return len;
				615	len++;
				616	}
				617	return -1;
				618	}
				619
				620	/**
				621	* xmlUTF8Charcmp:
				622	* @utf1: pointer to first UTF8 char
				623	* @utf2: pointer to second UTF8 char
				624	*
				625	* compares the two UCS4 values
				626	*
				627	* returns result of the compare as with xmlStrncmp
				628	*/
				629	int
				630	xmlUTF8Charcmp(const xmlChar utf1, const xmlChar utf2) {
				631
				632	if (utf1 == NULL ) {
				633	if (utf2 == NULL)
				634	return 0;
				635	return -1;
				636	}
				637	return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
				638	}
				639
				640	/**
				641	* xmlUTF8Strlen:
				642	* @utf: a sequence of UTF-8 encoded bytes
				643	*
				644	* compute the length of an UTF8 string, it doesn't do a full UTF8
				645	* checking of the content of the string.
				646	*
				647	* Returns the number of characters in the string or -1 in case of error
				648	*/
				649	int
				650	xmlUTF8Strlen(const xmlChar *utf) {
				651	int ret = 0;
				652
				653	if (utf == NULL)
				654	return(-1);
				655
				656	while (*utf != 0) {
				657	if (utf[0] & 0x80) {
				658	if ((utf[1] & 0xc0) != 0x80)
				659	return(-1);
				660	if ((utf[0] & 0xe0) == 0xe0) {
				661	if ((utf[2] & 0xc0) != 0x80)
				662	return(-1);
				663	if ((utf[0] & 0xf0) == 0xf0) {
				664	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				665	return(-1);
				666	utf += 4;
				667	} else {
				668	utf += 3;
				669	}
				670	} else {
				671	utf += 2;
				672	}
				673	} else {
				674	utf++;
				675	}
				676	ret++;
				677	}
				678	return(ret);
				679	}
				680
				681	/**
				682	* xmlGetUTF8Char:
				683	* @utf: a sequence of UTF-8 encoded bytes
				684	* @len: a pointer to @bytes len
				685	*
				686	* Read one UTF8 Char from @utf
				687	*
				688	* Returns the char value or -1 in case of error, and updates *len with the
				689	* number of bytes consumed
				690	*/
				691	int
				692	xmlGetUTF8Char(const unsigned char utf, int len) {
				693	unsigned int c;
				694
				695	if (utf == NULL)
				696	goto error;
				697	if (len == NULL)
				698	goto error;
				699	if (*len < 1)
				700	goto error;
				701
				702	c = utf[0];
				703	if (c & 0x80) {
				704	if (*len < 2)
				705	goto error;
				706	if ((utf[1] & 0xc0) != 0x80)
				707	goto error;
				708	if ((c & 0xe0) == 0xe0) {
				709	if (*len < 3)
				710	goto error;
				711	if ((utf[2] & 0xc0) != 0x80)
				712	goto error;
				713	if ((c & 0xf0) == 0xf0) {
				714	if (*len < 4)
				715	goto error;
				716	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				717	goto error;
				718	*len = 4;
				719	/* 4-byte code */
				720	c = (utf[0] & 0x7) << 18;
				721	c \|= (utf[1] & 0x3f) << 12;
				722	c \|= (utf[2] & 0x3f) << 6;
				723	c \|= utf[3] & 0x3f;
				724	} else {
				725	/* 3-byte code */
				726	*len = 3;
				727	c = (utf[0] & 0xf) << 12;
				728	c \|= (utf[1] & 0x3f) << 6;
				729	c \|= utf[2] & 0x3f;
				730	}
				731	} else {
				732	/* 2-byte code */
				733	*len = 2;
				734	c = (utf[0] & 0x1f) << 6;
				735	c \|= utf[1] & 0x3f;
				736	}
				737	} else {
				738	/* 1-byte code */
				739	*len = 1;
				740	}
				741	return(c);
				742
				743	error:
				744	*len = 0;
				745	return(-1);
				746	}
				747
				748	/**
				749	* xmlCheckUTF8:
				750	* @utf: Pointer to putative UTF-8 encoded string.
				751	*
				752	* Checks @utf for being valid UTF-8. @utf is assumed to be
				753	* null-terminated. This function is not super-strict, as it will
				754	* allow longer UTF-8 sequences than necessary. Note that Java is
				755	* capable of producing these sequences if provoked. Also note, this
				756	* routine checks for the 4-byte maximum size, but does not check for
				757	* 0x10ffff maximum value.
				758	*
				759	* Return value: true if @utf is valid.
				760	**/
				761	int
				762	xmlCheckUTF8(const unsigned char *utf)
				763	{
				764	int ix;
				765	unsigned char c;
				766
				767	for (ix = 0; (c = utf[ix]);) {
				768	if (c & 0x80) {
				769	if ((utf[ix + 1] & 0xc0) != 0x80)
				770	return(0);
				771	if ((c & 0xe0) == 0xe0) {
				772	if ((utf[ix + 2] & 0xc0) != 0x80)
				773	return(0);
				774	if ((c & 0xf0) == 0xf0) {
				775	if ((c & 0xf8) != 0xf0 \|\| (utf[ix + 3] & 0xc0) != 0x80)
				776	return(0);
				777	ix += 4;
				778	/* 4-byte code */
				779	} else
				780	/* 3-byte code */
				781	ix += 3;
				782	} else
				783	/* 2-byte code */
				784	ix += 2;
				785	} else
				786	/* 1-byte code */
				787	ix++;
				788	}
				789	return(1);
				790	}
				791
				792	/**
				793	* xmlUTF8Strsize:
				794	* @utf: a sequence of UTF-8 encoded bytes
				795	* @len: the number of characters in the array
				796	*
				797	* storage size of an UTF8 string
				798	*
				799	* Returns the storage size of
				800	* the first 'len' characters of ARRAY
				801	*
				802	*/
				803
				804	int
				805	xmlUTF8Strsize(const xmlChar *utf, int len) {
				806	const xmlChar *ptr=utf;
				807	xmlChar ch;
				808
				809	if (len <= 0)
				810	return(0);
				811
				812	while ( len-- > 0) {
				813	if ( !*ptr )
				814	break;
				815	if ( (ch = *ptr++) & 0x80)
				816	while ( (ch<<=1) & 0x80 )
				817	ptr++;
				818	}
				819	return (ptr - utf);
				820	}
				821
				822
				823	/**
				824	* xmlUTF8Strndup:
				825	* @utf: the input UTF8 *
				826	* @len: the len of @utf (in chars)
				827	*
				828	* a strndup for array of UTF8's
				829	*
				830	* Returns a new UTF8 * or NULL
				831	*/
				832	xmlChar *
				833	xmlUTF8Strndup(const xmlChar *utf, int len) {
				834	xmlChar *ret;
				835	int i;
				836
				837	if ((utf == NULL) \|\| (len < 0)) return(NULL);
				838	i = xmlUTF8Strsize(utf, len);
				839	ret = (xmlChar ) xmlMallocAtomic((i + 1) sizeof(xmlChar));
				840	if (ret == NULL) {
				841	xmlGenericError(xmlGenericErrorContext,
				842	"malloc of %ld byte failed\n",
				843	(len + 1) * (long)sizeof(xmlChar));
				844	return(NULL);
				845	}
				846	memcpy(ret, utf, i * sizeof(xmlChar));
				847	ret[i] = 0;
				848	return(ret);
				849	}
				850
				851	/**
				852	* xmlUTF8Strpos:
				853	* @utf: the input UTF8 *
				854	* @pos: the position of the desired UTF8 char (in chars)
				855	*
				856	* a function to provide the equivalent of fetching a
				857	* character from a string array
				858	*
				859	* Returns a pointer to the UTF8 character or NULL
				860	*/
				861	xmlChar *
				862	xmlUTF8Strpos(const xmlChar *utf, int pos) {
				863	xmlChar ch;
				864
				865	if (utf == NULL) return(NULL);
				866	if ( (pos < 0) \|\| (pos >= xmlUTF8Strlen(utf)) )
				867	return(NULL);
				868	while (pos--) {
				869	if ((ch=*utf++) == 0) return(NULL);
				870	if ( ch & 0x80 ) {
				871	/* if not simple ascii, verify proper format */
				872	if ( (ch & 0xc0) != 0xc0 )
				873	return(NULL);
				874	/* then skip over remaining bytes for this char */
				875	while ( (ch <<= 1) & 0x80 )
				876	if ( (*utf++ & 0xc0) != 0x80 )
				877	return(NULL);
				878	}
				879	}
				880	return((xmlChar *)utf);
				881	}
				882
				883	/**
				884	* xmlUTF8Strloc:
				885	* @utf: the input UTF8 *
				886	* @utfchar: the UTF8 character to be found
				887	*
				888	* a function to provide the relative location of a UTF8 char
				889	*
				890	* Returns the relative character position of the desired char
				891	* or -1 if not found
				892	*/
				893	int
				894	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
				895	int i, size;
				896	xmlChar ch;
				897
				898	if (utf==NULL \|\| utfchar==NULL) return -1;
				899	size = xmlUTF8Strsize(utfchar, 1);
				900	for(i=0; (ch=*utf) != 0; i++) {
				901	if (xmlStrncmp(utf, utfchar, size)==0)
				902	return(i);
				903	utf++;
				904	if ( ch & 0x80 ) {
				905	/* if not simple ascii, verify proper format */
				906	if ( (ch & 0xc0) != 0xc0 )
				907	return(-1);
				908	/* then skip over remaining bytes for this char */
				909	while ( (ch <<= 1) & 0x80 )
				910	if ( (*utf++ & 0xc0) != 0x80 )
				911	return(-1);
				912	}
				913	}
				914
				915	return(-1);
				916	}
				917	/**
				918	* xmlUTF8Strsub:
				919	* @utf: a sequence of UTF-8 encoded bytes
				920	* @start: relative pos of first char
				921	* @len: total number to copy
				922	*
				923	* Create a substring from a given UTF-8 string
				924	* Note: positions are given in units of UTF-8 chars
				925	*
				926	* Returns a pointer to a newly created string
				927	* or NULL if any problem
				928	*/
				929
				930	xmlChar *
				931	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
				932	int i;
				933	xmlChar ch;
				934
				935	if (utf == NULL) return(NULL);
				936	if (start < 0) return(NULL);
				937	if (len < 0) return(NULL);
				938
				939	/*
				940	* Skip over any leading chars
				941	*/
				942	for (i = 0;i < start;i++) {
				943	if ((ch=*utf++) == 0) return(NULL);
				944	if ( ch & 0x80 ) {
				945	/* if not simple ascii, verify proper format */
				946	if ( (ch & 0xc0) != 0xc0 )
				947	return(NULL);
				948	/* then skip over remaining bytes for this char */
				949	while ( (ch <<= 1) & 0x80 )
				950	if ( (*utf++ & 0xc0) != 0x80 )
				951	return(NULL);
				952	}
				953	}
				954
				955	return(xmlUTF8Strndup(utf, len));
				956	}