Blame - xmlstring.c - platform/external/libxml2

blob: b47e13eb01cb4cf1a28ddc0192727a0746635dd7 [file] [log] [blame]

William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame^]	1	/*
				2	* string.c : an XML string utilities module
				3	*
				4	* This module provides various utility functions for manipulating
				5	* the xmlChar* type. All functions named xmlStr* have been moved here
				6	* from the parser.c file (their original home).
				7	*
				8	* See Copyright for the status of this software.
				9	*
				10	* UTF8 string routines from:
				11	* William Brack <wbrack@mmm.com.hk>
				12	*
				13	* daniel@veillard.com
				14	*/
				15
				16	#define IN_LIBXML
				17	#include "libxml.h"
				18
				19	#include <stdlib.h>
				20	#include <string.h>
				21	#include <libxml/xmlmemory.h>
				22	#include <libxml/parserInternals.h>
				23	#include <libxml/xmlstring.h>
				24
				25	/************************************************************************
				26	* *
				27	* Commodity functions to handle xmlChars *
				28	* *
				29	************************************************************************/
				30
				31	/**
				32	* xmlStrndup:
				33	* @cur: the input xmlChar *
				34	* @len: the len of @cur
				35	*
				36	* a strndup for array of xmlChar's
				37	*
				38	* Returns a new xmlChar * or NULL
				39	*/
				40	xmlChar *
				41	xmlStrndup(const xmlChar *cur, int len) {
				42	xmlChar *ret;
				43
				44	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				45	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				46	if (ret == NULL) {
				47	xmlErrMemory(NULL, NULL);
				48	return(NULL);
				49	}
				50	memcpy(ret, cur, len * sizeof(xmlChar));
				51	ret[len] = 0;
				52	return(ret);
				53	}
				54
				55	/**
				56	* xmlStrdup:
				57	* @cur: the input xmlChar *
				58	*
				59	* a strdup for array of xmlChar's. Since they are supposed to be
				60	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				61	* a termination mark of '0'.
				62	*
				63	* Returns a new xmlChar * or NULL
				64	*/
				65	xmlChar *
				66	xmlStrdup(const xmlChar *cur) {
				67	const xmlChar *p = cur;
				68
				69	if (cur == NULL) return(NULL);
				70	while (p != 0) p++; / non input consuming */
				71	return(xmlStrndup(cur, p - cur));
				72	}
				73
				74	/**
				75	* xmlCharStrndup:
				76	* @cur: the input char *
				77	* @len: the len of @cur
				78	*
				79	* a strndup for char's to xmlChar's
				80	*
				81	* Returns a new xmlChar * or NULL
				82	*/
				83
				84	xmlChar *
				85	xmlCharStrndup(const char *cur, int len) {
				86	int i;
				87	xmlChar *ret;
				88
				89	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				90	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				91	if (ret == NULL) {
				92	xmlErrMemory(NULL, NULL);
				93	return(NULL);
				94	}
				95	for (i = 0;i < len;i++)
				96	ret[i] = (xmlChar) cur[i];
				97	ret[len] = 0;
				98	return(ret);
				99	}
				100
				101	/**
				102	* xmlCharStrdup:
				103	* @cur: the input char *
				104	*
				105	* a strdup for char's to xmlChar's
				106	*
				107	* Returns a new xmlChar * or NULL
				108	*/
				109
				110	xmlChar *
				111	xmlCharStrdup(const char *cur) {
				112	const char *p = cur;
				113
				114	if (cur == NULL) return(NULL);
				115	while (p != '\0') p++; / non input consuming */
				116	return(xmlCharStrndup(cur, p - cur));
				117	}
				118
				119	/**
				120	* xmlStrcmp:
				121	* @str1: the first xmlChar *
				122	* @str2: the second xmlChar *
				123	*
				124	* a strcmp for xmlChar's
				125	*
				126	* Returns the integer result of the comparison
				127	*/
				128
				129	int
				130	xmlStrcmp(const xmlChar str1, const xmlChar str2) {
				131	register int tmp;
				132
				133	if (str1 == str2) return(0);
				134	if (str1 == NULL) return(-1);
				135	if (str2 == NULL) return(1);
				136	do {
				137	tmp = str1++ - str2;
				138	if (tmp != 0) return(tmp);
				139	} while (*str2++ != 0);
				140	return 0;
				141	}
				142
				143	/**
				144	* xmlStrEqual:
				145	* @str1: the first xmlChar *
				146	* @str2: the second xmlChar *
				147	*
				148	* Check if both string are equal of have same content
				149	* Should be a bit more readable and faster than xmlStrEqual()
				150	*
				151	* Returns 1 if they are equal, 0 if they are different
				152	*/
				153
				154	int
				155	xmlStrEqual(const xmlChar str1, const xmlChar str2) {
				156	if (str1 == str2) return(1);
				157	if (str1 == NULL) return(0);
				158	if (str2 == NULL) return(0);
				159	do {
				160	if (str1++ != str2) return(0);
				161	} while (*str2++);
				162	return(1);
				163	}
				164
				165	/**
				166	* xmlStrQEqual:
				167	* @pref: the prefix of the QName
				168	* @name: the localname of the QName
				169	* @str: the second xmlChar *
				170	*
				171	* Check if a QName is Equal to a given string
				172	*
				173	* Returns 1 if they are equal, 0 if they are different
				174	*/
				175
				176	int
				177	xmlStrQEqual(const xmlChar pref, const xmlChar name, const xmlChar *str) {
				178	if (pref == NULL) return(xmlStrEqual(name, str));
				179	if (name == NULL) return(0);
				180	if (str == NULL) return(0);
				181
				182	do {
				183	if (pref++ != str) return(0);
				184	} while ((str++) && (pref));
				185	if (*str++ != ':') return(0);
				186	do {
				187	if (name++ != str) return(0);
				188	} while (*str++);
				189	return(1);
				190	}
				191
				192	/**
				193	* xmlStrncmp:
				194	* @str1: the first xmlChar *
				195	* @str2: the second xmlChar *
				196	* @len: the max comparison length
				197	*
				198	* a strncmp for xmlChar's
				199	*
				200	* Returns the integer result of the comparison
				201	*/
				202
				203	int
				204	xmlStrncmp(const xmlChar str1, const xmlChar str2, int len) {
				205	register int tmp;
				206
				207	if (len <= 0) return(0);
				208	if (str1 == str2) return(0);
				209	if (str1 == NULL) return(-1);
				210	if (str2 == NULL) return(1);
				211	do {
				212	tmp = str1++ - str2;
				213	if (tmp != 0 \|\| --len == 0) return(tmp);
				214	} while (*str2++ != 0);
				215	return 0;
				216	}
				217
				218	static const xmlChar casemap[256] = {
				219	0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
				220	0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
				221	0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
				222	0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
				223	0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
				224	0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
				225	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
				226	0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
				227	0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				228	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				229	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				230	0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
				231	0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				232	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				233	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				234	0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
				235	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
				236	0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
				237	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
				238	0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
				239	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
				240	0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
				241	0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
				242	0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
				243	0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
				244	0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
				245	0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
				246	0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
				247	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
				248	0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
				249	0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
				250	0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
				251	};
				252
				253	/**
				254	* xmlStrcasecmp:
				255	* @str1: the first xmlChar *
				256	* @str2: the second xmlChar *
				257	*
				258	* a strcasecmp for xmlChar's
				259	*
				260	* Returns the integer result of the comparison
				261	*/
				262
				263	int
				264	xmlStrcasecmp(const xmlChar str1, const xmlChar str2) {
				265	register int tmp;
				266
				267	if (str1 == str2) return(0);
				268	if (str1 == NULL) return(-1);
				269	if (str2 == NULL) return(1);
				270	do {
				271	tmp = casemap[str1++] - casemap[str2];
				272	if (tmp != 0) return(tmp);
				273	} while (*str2++ != 0);
				274	return 0;
				275	}
				276
				277	/**
				278	* xmlStrncasecmp:
				279	* @str1: the first xmlChar *
				280	* @str2: the second xmlChar *
				281	* @len: the max comparison length
				282	*
				283	* a strncasecmp for xmlChar's
				284	*
				285	* Returns the integer result of the comparison
				286	*/
				287
				288	int
				289	xmlStrncasecmp(const xmlChar str1, const xmlChar str2, int len) {
				290	register int tmp;
				291
				292	if (len <= 0) return(0);
				293	if (str1 == str2) return(0);
				294	if (str1 == NULL) return(-1);
				295	if (str2 == NULL) return(1);
				296	do {
				297	tmp = casemap[str1++] - casemap[str2];
				298	if (tmp != 0 \|\| --len == 0) return(tmp);
				299	} while (*str2++ != 0);
				300	return 0;
				301	}
				302
				303	/**
				304	* xmlStrchr:
				305	* @str: the xmlChar * array
				306	* @val: the xmlChar to search
				307	*
				308	* a strchr for xmlChar's
				309	*
				310	* Returns the xmlChar * for the first occurrence or NULL.
				311	*/
				312
				313	const xmlChar *
				314	xmlStrchr(const xmlChar *str, xmlChar val) {
				315	if (str == NULL) return(NULL);
				316	while (str != 0) { / non input consuming */
				317	if (str == val) return((xmlChar ) str);
				318	str++;
				319	}
				320	return(NULL);
				321	}
				322
				323	/**
				324	* xmlStrstr:
				325	* @str: the xmlChar * array (haystack)
				326	* @val: the xmlChar to search (needle)
				327	*
				328	* a strstr for xmlChar's
				329	*
				330	* Returns the xmlChar * for the first occurrence or NULL.
				331	*/
				332
				333	const xmlChar *
				334	xmlStrstr(const xmlChar str, const xmlChar val) {
				335	int n;
				336
				337	if (str == NULL) return(NULL);
				338	if (val == NULL) return(NULL);
				339	n = xmlStrlen(val);
				340
				341	if (n == 0) return(str);
				342	while (str != 0) { / non input consuming */
				343	if (str == val) {
				344	if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
				345	}
				346	str++;
				347	}
				348	return(NULL);
				349	}
				350
				351	/**
				352	* xmlStrcasestr:
				353	* @str: the xmlChar * array (haystack)
				354	* @val: the xmlChar to search (needle)
				355	*
				356	* a case-ignoring strstr for xmlChar's
				357	*
				358	* Returns the xmlChar * for the first occurrence or NULL.
				359	*/
				360
				361	const xmlChar *
				362	xmlStrcasestr(const xmlChar str, xmlChar val) {
				363	int n;
				364
				365	if (str == NULL) return(NULL);
				366	if (val == NULL) return(NULL);
				367	n = xmlStrlen(val);
				368
				369	if (n == 0) return(str);
				370	while (str != 0) { / non input consuming */
				371	if (casemap[str] == casemap[val])
				372	if (!xmlStrncasecmp(str, val, n)) return(str);
				373	str++;
				374	}
				375	return(NULL);
				376	}
				377
				378	/**
				379	* xmlStrsub:
				380	* @str: the xmlChar * array (haystack)
				381	* @start: the index of the first char (zero based)
				382	* @len: the length of the substring
				383	*
				384	* Extract a substring of a given string
				385	*
				386	* Returns the xmlChar * for the first occurrence or NULL.
				387	*/
				388
				389	xmlChar *
				390	xmlStrsub(const xmlChar *str, int start, int len) {
				391	int i;
				392
				393	if (str == NULL) return(NULL);
				394	if (start < 0) return(NULL);
				395	if (len < 0) return(NULL);
				396
				397	for (i = 0;i < start;i++) {
				398	if (*str == 0) return(NULL);
				399	str++;
				400	}
				401	if (*str == 0) return(NULL);
				402	return(xmlStrndup(str, len));
				403	}
				404
				405	/**
				406	* xmlStrlen:
				407	* @str: the xmlChar * array
				408	*
				409	* length of a xmlChar's string
				410	*
				411	* Returns the number of xmlChar contained in the ARRAY.
				412	*/
				413
				414	int
				415	xmlStrlen(const xmlChar *str) {
				416	int len = 0;
				417
				418	if (str == NULL) return(0);
				419	while (str != 0) { / non input consuming */
				420	str++;
				421	len++;
				422	}
				423	return(len);
				424	}
				425
				426	/**
				427	* xmlStrncat:
				428	* @cur: the original xmlChar * array
				429	* @add: the xmlChar * array added
				430	* @len: the length of @add
				431	*
				432	* a strncat for array of xmlChar's, it will extend @cur with the len
				433	* first bytes of @add.
				434	*
				435	* Returns a new xmlChar *, the original @cur is reallocated if needed
				436	* and should not be freed
				437	*/
				438
				439	xmlChar *
				440	xmlStrncat(xmlChar cur, const xmlChar add, int len) {
				441	int size;
				442	xmlChar *ret;
				443
				444	if ((add == NULL) \|\| (len == 0))
				445	return(cur);
				446	if (cur == NULL)
				447	return(xmlStrndup(add, len));
				448
				449	size = xmlStrlen(cur);
				450	ret = (xmlChar ) xmlRealloc(cur, (size + len + 1) sizeof(xmlChar));
				451	if (ret == NULL) {
				452	xmlErrMemory(NULL, NULL);
				453	return(cur);
				454	}
				455	memcpy(&ret[size], add, len * sizeof(xmlChar));
				456	ret[size + len] = 0;
				457	return(ret);
				458	}
				459
				460	/**
				461	* xmlStrncatNew:
				462	* @str1: first xmlChar string
				463	* @str2: second xmlChar string
				464	* @len: the len of @str2
				465	*
				466	* same as xmlStrncat, but creates a new string. The original
				467	* two strings are not freed.
				468	*
				469	* Returns a new xmlChar * or NULL
				470	*/
				471	xmlChar *
				472	xmlStrncatNew(const xmlChar str1, const xmlChar str2, int len) {
				473	int size;
				474	xmlChar *ret;
				475
				476	if ((str2 == NULL) \|\| (len == 0))
				477	return(xmlStrdup(str1));
				478	if (str1 == NULL)
				479	return(xmlStrndup(str2, len));
				480
				481	size = xmlStrlen(str1);
				482	ret = (xmlChar ) xmlMalloc((size + len + 1) sizeof(xmlChar));
				483	if (ret == NULL) {
				484	xmlErrMemory(NULL, NULL);
				485	return(xmlStrndup(str1, size));
				486	}
				487	memcpy(ret, str1, size * sizeof(xmlChar));
				488	memcpy(&ret[size], str2, len * sizeof(xmlChar));
				489	ret[size + len] = 0;
				490	return(ret);
				491	}
				492
				493	/**
				494	* xmlStrcat:
				495	* @cur: the original xmlChar * array
				496	* @add: the xmlChar * array added
				497	*
				498	* a strcat for array of xmlChar's. Since they are supposed to be
				499	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				500	* a termination mark of '0'.
				501	*
				502	* Returns a new xmlChar * containing the concatenated string.
				503	*/
				504	xmlChar *
				505	xmlStrcat(xmlChar cur, const xmlChar add) {
				506	const xmlChar *p = add;
				507
				508	if (add == NULL) return(cur);
				509	if (cur == NULL)
				510	return(xmlStrdup(add));
				511
				512	while (p != 0) p++; / non input consuming */
				513	return(xmlStrncat(cur, add, p - add));
				514	}
				515
				516	/**
				517	* xmlStrPrintf:
				518	* @buf: the result buffer.
				519	* @len: the result buffer length.
				520	* @msg: the message with printf formatting.
				521	* @...: extra parameters for the message.
				522	*
				523	* Formats @msg and places result into @buf.
				524	*
				525	* Returns the number of characters written to @buf or -1 if an error occurs.
				526	*/
				527	int
				528	xmlStrPrintf(xmlChar buf, int len, const xmlChar msg, ...) {
				529	va_list args;
				530	int ret;
				531
				532	if((buf == NULL) \|\| (msg == NULL)) {
				533	return(-1);
				534	}
				535
				536	va_start(args, msg);
				537	ret = vsnprintf((char ) buf, len, (const char ) msg, args);
				538	va_end(args);
				539	buf[len - 1] = 0; /* be safe ! */
				540
				541	return(ret);
				542	}
				543
				544	/**
				545	* xmlStrVPrintf:
				546	* @buf: the result buffer.
				547	* @len: the result buffer length.
				548	* @msg: the message with printf formatting.
				549	* @ap: extra parameters for the message.
				550	*
				551	* Formats @msg and places result into @buf.
				552	*
				553	* Returns the number of characters written to @buf or -1 if an error occurs.
				554	*/
				555	int
				556	xmlStrVPrintf(xmlChar buf, int len, const xmlChar msg, va_list ap) {
				557	int ret;
				558
				559	if((buf == NULL) \|\| (msg == NULL)) {
				560	return(-1);
				561	}
				562
				563	ret = vsnprintf((char ) buf, len, (const char ) msg, ap);
				564	buf[len - 1] = 0; /* be safe ! */
				565
				566	return(ret);
				567	}
				568
				569	/************************************************************************
				570	* *
				571	* Generic UTF8 handling routines *
				572	* *
				573	* From rfc2044: encoding of the Unicode values on UTF-8: *
				574	* *
				575	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
				576	* 0000 0000-0000 007F 0xxxxxxx *
				577	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
				578	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
				579	* *
				580	* I hope we won't use values > 0xFFFF anytime soon ! *
				581	* *
				582	************************************************************************/
				583
				584
				585	/**
				586	* xmlUTF8Size:
				587	* @utf: pointer to the UTF8 character
				588	*
				589	* calculates the internal size of a UTF8 character
				590	*
				591	* returns the numbers of bytes in the character, -1 on format error
				592	*/
				593	int
				594	xmlUTF8Size(const xmlChar *utf) {
				595	xmlChar mask;
				596	int len;
				597
				598	if (utf == NULL)
				599	return -1;
				600	if (*utf < 0x80)
				601	return 1;
				602	/* check valid UTF8 character */
				603	if (!(*utf & 0x40))
				604	return -1;
				605	/* determine number of bytes in char */
				606	len = 2;
				607	for (mask=0x20; mask != 0; mask>>=1) {
				608	if (!(*utf & mask))
				609	return len;
				610	len++;
				611	}
				612	return -1;
				613	}
				614
				615	/**
				616	* xmlUTF8Charcmp:
				617	* @utf1: pointer to first UTF8 char
				618	* @utf2: pointer to second UTF8 char
				619	*
				620	* compares the two UCS4 values
				621	*
				622	* returns result of the compare as with xmlStrncmp
				623	*/
				624	int
				625	xmlUTF8Charcmp(const xmlChar utf1, const xmlChar utf2) {
				626
				627	if (utf1 == NULL ) {
				628	if (utf2 == NULL)
				629	return 0;
				630	return -1;
				631	}
				632	return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
				633	}
				634
				635	/**
				636	* xmlUTF8Strlen:
				637	* @utf: a sequence of UTF-8 encoded bytes
				638	*
				639	* compute the length of an UTF8 string, it doesn't do a full UTF8
				640	* checking of the content of the string.
				641	*
				642	* Returns the number of characters in the string or -1 in case of error
				643	*/
				644	int
				645	xmlUTF8Strlen(const xmlChar *utf) {
				646	int ret = 0;
				647
				648	if (utf == NULL)
				649	return(-1);
				650
				651	while (*utf != 0) {
				652	if (utf[0] & 0x80) {
				653	if ((utf[1] & 0xc0) != 0x80)
				654	return(-1);
				655	if ((utf[0] & 0xe0) == 0xe0) {
				656	if ((utf[2] & 0xc0) != 0x80)
				657	return(-1);
				658	if ((utf[0] & 0xf0) == 0xf0) {
				659	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				660	return(-1);
				661	utf += 4;
				662	} else {
				663	utf += 3;
				664	}
				665	} else {
				666	utf += 2;
				667	}
				668	} else {
				669	utf++;
				670	}
				671	ret++;
				672	}
				673	return(ret);
				674	}
				675
				676	/**
				677	* xmlGetUTF8Char:
				678	* @utf: a sequence of UTF-8 encoded bytes
				679	* @len: a pointer to @bytes len
				680	*
				681	* Read one UTF8 Char from @utf
				682	*
				683	* Returns the char value or -1 in case of error, and updates *len with the
				684	* number of bytes consumed
				685	*/
				686	int
				687	xmlGetUTF8Char(const unsigned char utf, int len) {
				688	unsigned int c;
				689
				690	if (utf == NULL)
				691	goto error;
				692	if (len == NULL)
				693	goto error;
				694	if (*len < 1)
				695	goto error;
				696
				697	c = utf[0];
				698	if (c & 0x80) {
				699	if (*len < 2)
				700	goto error;
				701	if ((utf[1] & 0xc0) != 0x80)
				702	goto error;
				703	if ((c & 0xe0) == 0xe0) {
				704	if (*len < 3)
				705	goto error;
				706	if ((utf[2] & 0xc0) != 0x80)
				707	goto error;
				708	if ((c & 0xf0) == 0xf0) {
				709	if (*len < 4)
				710	goto error;
				711	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				712	goto error;
				713	*len = 4;
				714	/* 4-byte code */
				715	c = (utf[0] & 0x7) << 18;
				716	c \|= (utf[1] & 0x3f) << 12;
				717	c \|= (utf[2] & 0x3f) << 6;
				718	c \|= utf[3] & 0x3f;
				719	} else {
				720	/* 3-byte code */
				721	*len = 3;
				722	c = (utf[0] & 0xf) << 12;
				723	c \|= (utf[1] & 0x3f) << 6;
				724	c \|= utf[2] & 0x3f;
				725	}
				726	} else {
				727	/* 2-byte code */
				728	*len = 2;
				729	c = (utf[0] & 0x1f) << 6;
				730	c \|= utf[1] & 0x3f;
				731	}
				732	} else {
				733	/* 1-byte code */
				734	*len = 1;
				735	}
				736	return(c);
				737
				738	error:
				739	*len = 0;
				740	return(-1);
				741	}
				742
				743	/**
				744	* xmlCheckUTF8:
				745	* @utf: Pointer to putative UTF-8 encoded string.
				746	*
				747	* Checks @utf for being valid UTF-8. @utf is assumed to be
				748	* null-terminated. This function is not super-strict, as it will
				749	* allow longer UTF-8 sequences than necessary. Note that Java is
				750	* capable of producing these sequences if provoked. Also note, this
				751	* routine checks for the 4-byte maximum size, but does not check for
				752	* 0x10ffff maximum value.
				753	*
				754	* Return value: true if @utf is valid.
				755	**/
				756	int
				757	xmlCheckUTF8(const unsigned char *utf)
				758	{
				759	int ix;
				760	unsigned char c;
				761
				762	for (ix = 0; (c = utf[ix]);) {
				763	if (c & 0x80) {
				764	if ((utf[ix + 1] & 0xc0) != 0x80)
				765	return(0);
				766	if ((c & 0xe0) == 0xe0) {
				767	if ((utf[ix + 2] & 0xc0) != 0x80)
				768	return(0);
				769	if ((c & 0xf0) == 0xf0) {
				770	if ((c & 0xf8) != 0xf0 \|\| (utf[ix + 3] & 0xc0) != 0x80)
				771	return(0);
				772	ix += 4;
				773	/* 4-byte code */
				774	} else
				775	/* 3-byte code */
				776	ix += 3;
				777	} else
				778	/* 2-byte code */
				779	ix += 2;
				780	} else
				781	/* 1-byte code */
				782	ix++;
				783	}
				784	return(1);
				785	}
				786
				787	/**
				788	* xmlUTF8Strsize:
				789	* @utf: a sequence of UTF-8 encoded bytes
				790	* @len: the number of characters in the array
				791	*
				792	* storage size of an UTF8 string
				793	*
				794	* Returns the storage size of
				795	* the first 'len' characters of ARRAY
				796	*
				797	*/
				798
				799	int
				800	xmlUTF8Strsize(const xmlChar *utf, int len) {
				801	const xmlChar *ptr=utf;
				802	xmlChar ch;
				803
				804	if (len <= 0)
				805	return(0);
				806
				807	while ( len-- > 0) {
				808	if ( !*ptr )
				809	break;
				810	if ( (ch = *ptr++) & 0x80)
				811	while ( (ch<<=1) & 0x80 )
				812	ptr++;
				813	}
				814	return (ptr - utf);
				815	}
				816
				817
				818	/**
				819	* xmlUTF8Strndup:
				820	* @utf: the input UTF8 *
				821	* @len: the len of @utf (in chars)
				822	*
				823	* a strndup for array of UTF8's
				824	*
				825	* Returns a new UTF8 * or NULL
				826	*/
				827	xmlChar *
				828	xmlUTF8Strndup(const xmlChar *utf, int len) {
				829	xmlChar *ret;
				830	int i;
				831
				832	if ((utf == NULL) \|\| (len < 0)) return(NULL);
				833	i = xmlUTF8Strsize(utf, len);
				834	ret = (xmlChar ) xmlMallocAtomic((i + 1) sizeof(xmlChar));
				835	if (ret == NULL) {
				836	xmlGenericError(xmlGenericErrorContext,
				837	"malloc of %ld byte failed\n",
				838	(len + 1) * (long)sizeof(xmlChar));
				839	return(NULL);
				840	}
				841	memcpy(ret, utf, i * sizeof(xmlChar));
				842	ret[i] = 0;
				843	return(ret);
				844	}
				845
				846	/**
				847	* xmlUTF8Strpos:
				848	* @utf: the input UTF8 *
				849	* @pos: the position of the desired UTF8 char (in chars)
				850	*
				851	* a function to provide the equivalent of fetching a
				852	* character from a string array
				853	*
				854	* Returns a pointer to the UTF8 character or NULL
				855	*/
				856	xmlChar *
				857	xmlUTF8Strpos(const xmlChar *utf, int pos) {
				858	xmlChar ch;
				859
				860	if (utf == NULL) return(NULL);
				861	if ( (pos < 0) \|\| (pos >= xmlUTF8Strlen(utf)) )
				862	return(NULL);
				863	while (pos--) {
				864	if ((ch=*utf++) == 0) return(NULL);
				865	if ( ch & 0x80 ) {
				866	/* if not simple ascii, verify proper format */
				867	if ( (ch & 0xc0) != 0xc0 )
				868	return(NULL);
				869	/* then skip over remaining bytes for this char */
				870	while ( (ch <<= 1) & 0x80 )
				871	if ( (*utf++ & 0xc0) != 0x80 )
				872	return(NULL);
				873	}
				874	}
				875	return((xmlChar *)utf);
				876	}
				877
				878	/**
				879	* xmlUTF8Strloc:
				880	* @utf: the input UTF8 *
				881	* @utfchar: the UTF8 character to be found
				882	*
				883	* a function to provide the relative location of a UTF8 char
				884	*
				885	* Returns the relative character position of the desired char
				886	* or -1 if not found
				887	*/
				888	int
				889	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
				890	int i, size;
				891	xmlChar ch;
				892
				893	if (utf==NULL \|\| utfchar==NULL) return -1;
				894	size = xmlUTF8Strsize(utfchar, 1);
				895	for(i=0; (ch=*utf) != 0; i++) {
				896	if (xmlStrncmp(utf, utfchar, size)==0)
				897	return(i);
				898	utf++;
				899	if ( ch & 0x80 ) {
				900	/* if not simple ascii, verify proper format */
				901	if ( (ch & 0xc0) != 0xc0 )
				902	return(-1);
				903	/* then skip over remaining bytes for this char */
				904	while ( (ch <<= 1) & 0x80 )
				905	if ( (*utf++ & 0xc0) != 0x80 )
				906	return(-1);
				907	}
				908	}
				909
				910	return(-1);
				911	}
				912	/**
				913	* xmlUTF8Strsub:
				914	* @utf: a sequence of UTF-8 encoded bytes
				915	* @start: relative pos of first char
				916	* @len: total number to copy
				917	*
				918	* Create a substring from a given UTF-8 string
				919	* Note: positions are given in units of UTF-8 chars
				920	*
				921	* Returns a pointer to a newly created string
				922	* or NULL if any problem
				923	*/
				924
				925	xmlChar *
				926	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
				927	int i;
				928	xmlChar ch;
				929
				930	if (utf == NULL) return(NULL);
				931	if (start < 0) return(NULL);
				932	if (len < 0) return(NULL);
				933
				934	/*
				935	* Skip over any leading chars
				936	*/
				937	for (i = 0;i < start;i++) {
				938	if ((ch=*utf++) == 0) return(NULL);
				939	if ( ch & 0x80 ) {
				940	/* if not simple ascii, verify proper format */
				941	if ( (ch & 0xc0) != 0xc0 )
				942	return(NULL);
				943	/* then skip over remaining bytes for this char */
				944	while ( (ch <<= 1) & 0x80 )
				945	if ( (*utf++ & 0xc0) != 0x80 )
				946	return(NULL);
				947	}
				948	}
				949
				950	return(xmlUTF8Strndup(utf, len));
				951	}