Blame - xmlstring.c - platform/external/libxml2

blob: cc85777eadc71889f5ab25ba8ef54c4724f5bc20 [file] [log] [blame]

William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	1	/*
				2	* string.c : an XML string utilities module
				3	*
				4	* This module provides various utility functions for manipulating
				5	* the xmlChar* type. All functions named xmlStr* have been moved here
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	6	* from the parser.c file (their original home).
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	7	*
				8	* See Copyright for the status of this software.
				9	*
				10	* UTF8 string routines from:
				11	* William Brack <wbrack@mmm.com.hk>
				12	*
				13	* daniel@veillard.com
				14	*/
				15
				16	#define IN_LIBXML
				17	#include "libxml.h"
				18
				19	#include <stdlib.h>
				20	#include <string.h>
				21	#include <libxml/xmlmemory.h>
				22	#include <libxml/parserInternals.h>
				23	#include <libxml/xmlstring.h>
				24
				25	/************************************************************************
				26	* *
				27	* Commodity functions to handle xmlChars *
				28	* *
				29	************************************************************************/
				30
				31	/**
				32	* xmlStrndup:
				33	* @cur: the input xmlChar *
				34	* @len: the len of @cur
				35	*
				36	* a strndup for array of xmlChar's
				37	*
				38	* Returns a new xmlChar * or NULL
				39	*/
				40	xmlChar *
				41	xmlStrndup(const xmlChar *cur, int len) {
				42	xmlChar *ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	43
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	44	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				45	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				46	if (ret == NULL) {
				47	xmlErrMemory(NULL, NULL);
				48	return(NULL);
				49	}
				50	memcpy(ret, cur, len * sizeof(xmlChar));
				51	ret[len] = 0;
				52	return(ret);
				53	}
				54
				55	/**
				56	* xmlStrdup:
				57	* @cur: the input xmlChar *
				58	*
				59	* a strdup for array of xmlChar's. Since they are supposed to be
				60	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				61	* a termination mark of '0'.
				62	*
				63	* Returns a new xmlChar * or NULL
				64	*/
				65	xmlChar *
				66	xmlStrdup(const xmlChar *cur) {
				67	const xmlChar *p = cur;
				68
				69	if (cur == NULL) return(NULL);
				70	while (p != 0) p++; / non input consuming */
				71	return(xmlStrndup(cur, p - cur));
				72	}
				73
				74	/**
				75	* xmlCharStrndup:
				76	* @cur: the input char *
				77	* @len: the len of @cur
				78	*
				79	* a strndup for char's to xmlChar's
				80	*
				81	* Returns a new xmlChar * or NULL
				82	*/
				83
				84	xmlChar *
				85	xmlCharStrndup(const char *cur, int len) {
				86	int i;
				87	xmlChar *ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	88
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	89	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				90	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				91	if (ret == NULL) {
				92	xmlErrMemory(NULL, NULL);
				93	return(NULL);
				94	}
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	95	for (i = 0;i < len;i++) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	96	ret[i] = (xmlChar) cur[i];
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	97	if (ret[i] == 0) return(ret);
				98	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	99	ret[len] = 0;
				100	return(ret);
				101	}
				102
				103	/**
				104	* xmlCharStrdup:
				105	* @cur: the input char *
				106	*
				107	* a strdup for char's to xmlChar's
				108	*
				109	* Returns a new xmlChar * or NULL
				110	*/
				111
				112	xmlChar *
				113	xmlCharStrdup(const char *cur) {
				114	const char *p = cur;
				115
				116	if (cur == NULL) return(NULL);
				117	while (p != '\0') p++; / non input consuming */
				118	return(xmlCharStrndup(cur, p - cur));
				119	}
				120
				121	/**
				122	* xmlStrcmp:
				123	* @str1: the first xmlChar *
				124	* @str2: the second xmlChar *
				125	*
				126	* a strcmp for xmlChar's
				127	*
				128	* Returns the integer result of the comparison
				129	*/
				130
				131	int
				132	xmlStrcmp(const xmlChar str1, const xmlChar str2) {
				133	register int tmp;
				134
				135	if (str1 == str2) return(0);
				136	if (str1 == NULL) return(-1);
				137	if (str2 == NULL) return(1);
				138	do {
				139	tmp = str1++ - str2;
				140	if (tmp != 0) return(tmp);
				141	} while (*str2++ != 0);
				142	return 0;
				143	}
				144
				145	/**
				146	* xmlStrEqual:
				147	* @str1: the first xmlChar *
				148	* @str2: the second xmlChar *
				149	*
Daniel Veillard	d95ecf0	2005-12-22 14:58:32 +0000	[diff] [blame]	150	* Check if both strings are equal of have same content.
Daniel Veillard	6a0baa0	2005-12-10 11:11:12 +0000	[diff] [blame]	151	* Should be a bit more readable and faster than xmlStrcmp()
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	152	*
				153	* Returns 1 if they are equal, 0 if they are different
				154	*/
				155
				156	int
				157	xmlStrEqual(const xmlChar str1, const xmlChar str2) {
				158	if (str1 == str2) return(1);
				159	if (str1 == NULL) return(0);
				160	if (str2 == NULL) return(0);
				161	do {
				162	if (str1++ != str2) return(0);
				163	} while (*str2++);
				164	return(1);
				165	}
				166
				167	/**
				168	* xmlStrQEqual:
				169	* @pref: the prefix of the QName
				170	* @name: the localname of the QName
				171	* @str: the second xmlChar *
				172	*
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	173	* Check if a QName is Equal to a given string
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	174	*
				175	* Returns 1 if they are equal, 0 if they are different
				176	*/
				177
				178	int
				179	xmlStrQEqual(const xmlChar pref, const xmlChar name, const xmlChar *str) {
				180	if (pref == NULL) return(xmlStrEqual(name, str));
				181	if (name == NULL) return(0);
				182	if (str == NULL) return(0);
				183
				184	do {
				185	if (pref++ != str) return(0);
				186	} while ((str++) && (pref));
				187	if (*str++ != ':') return(0);
				188	do {
				189	if (name++ != str) return(0);
				190	} while (*str++);
				191	return(1);
				192	}
				193
				194	/**
				195	* xmlStrncmp:
				196	* @str1: the first xmlChar *
				197	* @str2: the second xmlChar *
				198	* @len: the max comparison length
				199	*
				200	* a strncmp for xmlChar's
				201	*
				202	* Returns the integer result of the comparison
				203	*/
				204
				205	int
				206	xmlStrncmp(const xmlChar str1, const xmlChar str2, int len) {
				207	register int tmp;
				208
				209	if (len <= 0) return(0);
				210	if (str1 == str2) return(0);
				211	if (str1 == NULL) return(-1);
				212	if (str2 == NULL) return(1);
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	213	#ifdef __GNUC__
William M. Brack	b7b54de	2004-10-06 16:38:01 +0000	[diff] [blame]	214	tmp = strncmp((const char )str1, (const char )str2, len);
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	215	return tmp;
				216	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	217	do {
				218	tmp = str1++ - str2;
				219	if (tmp != 0 \|\| --len == 0) return(tmp);
				220	} while (*str2++ != 0);
				221	return 0;
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	222	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	223	}
				224
				225	static const xmlChar casemap[256] = {
				226	0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
				227	0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
				228	0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
				229	0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
				230	0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
				231	0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
				232	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
				233	0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
				234	0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				235	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				236	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				237	0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
				238	0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				239	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				240	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				241	0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
				242	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
				243	0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
				244	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
				245	0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
				246	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
				247	0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
				248	0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
				249	0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
				250	0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
				251	0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
				252	0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
				253	0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
				254	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
				255	0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
				256	0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
				257	0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
				258	};
				259
				260	/**
				261	* xmlStrcasecmp:
				262	* @str1: the first xmlChar *
				263	* @str2: the second xmlChar *
				264	*
				265	* a strcasecmp for xmlChar's
				266	*
				267	* Returns the integer result of the comparison
				268	*/
				269
				270	int
				271	xmlStrcasecmp(const xmlChar str1, const xmlChar str2) {
				272	register int tmp;
				273
				274	if (str1 == str2) return(0);
				275	if (str1 == NULL) return(-1);
				276	if (str2 == NULL) return(1);
				277	do {
				278	tmp = casemap[str1++] - casemap[str2];
				279	if (tmp != 0) return(tmp);
				280	} while (*str2++ != 0);
				281	return 0;
				282	}
				283
				284	/**
				285	* xmlStrncasecmp:
				286	* @str1: the first xmlChar *
				287	* @str2: the second xmlChar *
				288	* @len: the max comparison length
				289	*
				290	* a strncasecmp for xmlChar's
				291	*
				292	* Returns the integer result of the comparison
				293	*/
				294
				295	int
				296	xmlStrncasecmp(const xmlChar str1, const xmlChar str2, int len) {
				297	register int tmp;
				298
				299	if (len <= 0) return(0);
				300	if (str1 == str2) return(0);
				301	if (str1 == NULL) return(-1);
				302	if (str2 == NULL) return(1);
				303	do {
				304	tmp = casemap[str1++] - casemap[str2];
				305	if (tmp != 0 \|\| --len == 0) return(tmp);
				306	} while (*str2++ != 0);
				307	return 0;
				308	}
				309
				310	/**
				311	* xmlStrchr:
				312	* @str: the xmlChar * array
				313	* @val: the xmlChar to search
				314	*
				315	* a strchr for xmlChar's
				316	*
				317	* Returns the xmlChar * for the first occurrence or NULL.
				318	*/
				319
				320	const xmlChar *
				321	xmlStrchr(const xmlChar *str, xmlChar val) {
				322	if (str == NULL) return(NULL);
				323	while (str != 0) { / non input consuming */
				324	if (str == val) return((xmlChar ) str);
				325	str++;
				326	}
				327	return(NULL);
				328	}
				329
				330	/**
				331	* xmlStrstr:
				332	* @str: the xmlChar * array (haystack)
				333	* @val: the xmlChar to search (needle)
				334	*
				335	* a strstr for xmlChar's
				336	*
				337	* Returns the xmlChar * for the first occurrence or NULL.
				338	*/
				339
				340	const xmlChar *
				341	xmlStrstr(const xmlChar str, const xmlChar val) {
				342	int n;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	343
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	344	if (str == NULL) return(NULL);
				345	if (val == NULL) return(NULL);
				346	n = xmlStrlen(val);
				347
				348	if (n == 0) return(str);
				349	while (str != 0) { / non input consuming */
				350	if (str == val) {
				351	if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
				352	}
				353	str++;
				354	}
				355	return(NULL);
				356	}
				357
				358	/**
				359	* xmlStrcasestr:
				360	* @str: the xmlChar * array (haystack)
				361	* @val: the xmlChar to search (needle)
				362	*
				363	* a case-ignoring strstr for xmlChar's
				364	*
				365	* Returns the xmlChar * for the first occurrence or NULL.
				366	*/
				367
				368	const xmlChar *
Daniel Veillard	fcf2457	2009-08-12 23:02:08 +0200	[diff] [blame]	369	xmlStrcasestr(const xmlChar str, const xmlChar val) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	370	int n;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	371
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	372	if (str == NULL) return(NULL);
				373	if (val == NULL) return(NULL);
				374	n = xmlStrlen(val);
				375
				376	if (n == 0) return(str);
				377	while (str != 0) { / non input consuming */
				378	if (casemap[str] == casemap[val])
				379	if (!xmlStrncasecmp(str, val, n)) return(str);
				380	str++;
				381	}
				382	return(NULL);
				383	}
				384
				385	/**
				386	* xmlStrsub:
				387	* @str: the xmlChar * array (haystack)
				388	* @start: the index of the first char (zero based)
				389	* @len: the length of the substring
				390	*
				391	* Extract a substring of a given string
				392	*
				393	* Returns the xmlChar * for the first occurrence or NULL.
				394	*/
				395
				396	xmlChar *
				397	xmlStrsub(const xmlChar *str, int start, int len) {
				398	int i;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	399
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	400	if (str == NULL) return(NULL);
				401	if (start < 0) return(NULL);
				402	if (len < 0) return(NULL);
				403
				404	for (i = 0;i < start;i++) {
				405	if (*str == 0) return(NULL);
				406	str++;
				407	}
				408	if (*str == 0) return(NULL);
				409	return(xmlStrndup(str, len));
				410	}
				411
				412	/**
				413	* xmlStrlen:
				414	* @str: the xmlChar * array
				415	*
				416	* length of a xmlChar's string
				417	*
				418	* Returns the number of xmlChar contained in the ARRAY.
				419	*/
				420
				421	int
				422	xmlStrlen(const xmlChar *str) {
				423	int len = 0;
				424
				425	if (str == NULL) return(0);
				426	while (str != 0) { / non input consuming */
				427	str++;
				428	len++;
				429	}
				430	return(len);
				431	}
				432
				433	/**
				434	* xmlStrncat:
				435	* @cur: the original xmlChar * array
				436	* @add: the xmlChar * array added
				437	* @len: the length of @add
				438	*
				439	* a strncat for array of xmlChar's, it will extend @cur with the len
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	440	* first bytes of @add. Note that if @len < 0 then this is an API error
				441	* and NULL will be returned.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	442	*
				443	* Returns a new xmlChar *, the original @cur is reallocated if needed
				444	* and should not be freed
				445	*/
				446
				447	xmlChar *
				448	xmlStrncat(xmlChar cur, const xmlChar add, int len) {
				449	int size;
				450	xmlChar *ret;
				451
				452	if ((add == NULL) \|\| (len == 0))
				453	return(cur);
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	454	if (len < 0)
				455	return(NULL);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	456	if (cur == NULL)
				457	return(xmlStrndup(add, len));
				458
				459	size = xmlStrlen(cur);
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	460	if (size < 0)
				461	return(NULL);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	462	ret = (xmlChar ) xmlRealloc(cur, (size + len + 1) sizeof(xmlChar));
				463	if (ret == NULL) {
				464	xmlErrMemory(NULL, NULL);
				465	return(cur);
				466	}
				467	memcpy(&ret[size], add, len * sizeof(xmlChar));
				468	ret[size + len] = 0;
				469	return(ret);
				470	}
				471
				472	/**
				473	* xmlStrncatNew:
				474	* @str1: first xmlChar string
				475	* @str2: second xmlChar string
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	476	* @len: the len of @str2 or < 0
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	477	*
				478	* same as xmlStrncat, but creates a new string. The original
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	479	* two strings are not freed. If @len is < 0 then the length
				480	* will be calculated automatically.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	481	*
				482	* Returns a new xmlChar * or NULL
				483	*/
				484	xmlChar *
				485	xmlStrncatNew(const xmlChar str1, const xmlChar str2, int len) {
				486	int size;
				487	xmlChar *ret;
				488
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	489	if (len < 0) {
Daniel Veillard	8a32fe4	2004-11-02 22:10:16 +0000	[diff] [blame]	490	len = xmlStrlen(str2);
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	491	if (len < 0)
				492	return(NULL);
				493	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	494	if ((str2 == NULL) \|\| (len == 0))
				495	return(xmlStrdup(str1));
				496	if (str1 == NULL)
				497	return(xmlStrndup(str2, len));
				498
				499	size = xmlStrlen(str1);
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	500	if (size < 0)
				501	return(NULL);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	502	ret = (xmlChar ) xmlMalloc((size + len + 1) sizeof(xmlChar));
				503	if (ret == NULL) {
				504	xmlErrMemory(NULL, NULL);
				505	return(xmlStrndup(str1, size));
				506	}
				507	memcpy(ret, str1, size * sizeof(xmlChar));
				508	memcpy(&ret[size], str2, len * sizeof(xmlChar));
				509	ret[size + len] = 0;
				510	return(ret);
				511	}
				512
				513	/**
				514	* xmlStrcat:
				515	* @cur: the original xmlChar * array
				516	* @add: the xmlChar * array added
				517	*
				518	* a strcat for array of xmlChar's. Since they are supposed to be
				519	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				520	* a termination mark of '0'.
				521	*
				522	* Returns a new xmlChar * containing the concatenated string.
				523	*/
				524	xmlChar *
				525	xmlStrcat(xmlChar cur, const xmlChar add) {
				526	const xmlChar *p = add;
				527
				528	if (add == NULL) return(cur);
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	529	if (cur == NULL)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	530	return(xmlStrdup(add));
				531
				532	while (p != 0) p++; / non input consuming */
				533	return(xmlStrncat(cur, add, p - add));
				534	}
				535
				536	/**
				537	* xmlStrPrintf:
				538	* @buf: the result buffer.
				539	* @len: the result buffer length.
				540	* @msg: the message with printf formatting.
				541	* @...: extra parameters for the message.
				542	*
				543	* Formats @msg and places result into @buf.
				544	*
				545	* Returns the number of characters written to @buf or -1 if an error occurs.
				546	*/
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	547	int XMLCDECL
David Kilzer	4472c3a	2016-05-13 15:13:17 +0800	[diff] [blame]	548	xmlStrPrintf(xmlChar buf, int len, const char msg, ...) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	549	va_list args;
				550	int ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	551
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	552	if((buf == NULL) \|\| (msg == NULL)) {
				553	return(-1);
				554	}
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	555
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	556	va_start(args, msg);
				557	ret = vsnprintf((char ) buf, len, (const char ) msg, args);
				558	va_end(args);
				559	buf[len - 1] = 0; /* be safe ! */
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	560
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	561	return(ret);
				562	}
				563
				564	/**
				565	* xmlStrVPrintf:
				566	* @buf: the result buffer.
				567	* @len: the result buffer length.
				568	* @msg: the message with printf formatting.
				569	* @ap: extra parameters for the message.
				570	*
				571	* Formats @msg and places result into @buf.
				572	*
				573	* Returns the number of characters written to @buf or -1 if an error occurs.
				574	*/
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	575	int
David Kilzer	4472c3a	2016-05-13 15:13:17 +0800	[diff] [blame]	576	xmlStrVPrintf(xmlChar buf, int len, const char msg, va_list ap) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	577	int ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	578
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	579	if((buf == NULL) \|\| (msg == NULL)) {
				580	return(-1);
				581	}
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	582
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	583	ret = vsnprintf((char ) buf, len, (const char ) msg, ap);
				584	buf[len - 1] = 0; /* be safe ! */
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	585
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	586	return(ret);
				587	}
				588
				589	/************************************************************************
				590	* *
				591	* Generic UTF8 handling routines *
				592	* *
				593	* From rfc2044: encoding of the Unicode values on UTF-8: *
				594	* *
				595	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
				596	* 0000 0000-0000 007F 0xxxxxxx *
				597	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
				598	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
				599	* *
				600	* I hope we won't use values > 0xFFFF anytime soon ! *
				601	* *
				602	************************************************************************/
				603
				604
				605	/**
				606	* xmlUTF8Size:
				607	* @utf: pointer to the UTF8 character
				608	*
				609	* calculates the internal size of a UTF8 character
				610	*
				611	* returns the numbers of bytes in the character, -1 on format error
				612	*/
				613	int
				614	xmlUTF8Size(const xmlChar *utf) {
				615	xmlChar mask;
				616	int len;
				617
				618	if (utf == NULL)
				619	return -1;
				620	if (*utf < 0x80)
				621	return 1;
				622	/* check valid UTF8 character */
				623	if (!(*utf & 0x40))
				624	return -1;
				625	/* determine number of bytes in char */
				626	len = 2;
				627	for (mask=0x20; mask != 0; mask>>=1) {
				628	if (!(*utf & mask))
				629	return len;
				630	len++;
				631	}
				632	return -1;
				633	}
				634
				635	/**
				636	* xmlUTF8Charcmp:
				637	* @utf1: pointer to first UTF8 char
				638	* @utf2: pointer to second UTF8 char
				639	*
				640	* compares the two UCS4 values
				641	*
				642	* returns result of the compare as with xmlStrncmp
				643	*/
				644	int
				645	xmlUTF8Charcmp(const xmlChar utf1, const xmlChar utf2) {
				646
				647	if (utf1 == NULL ) {
				648	if (utf2 == NULL)
				649	return 0;
				650	return -1;
				651	}
				652	return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
				653	}
				654
				655	/**
				656	* xmlUTF8Strlen:
				657	* @utf: a sequence of UTF-8 encoded bytes
				658	*
				659	* compute the length of an UTF8 string, it doesn't do a full UTF8
				660	* checking of the content of the string.
				661	*
				662	* Returns the number of characters in the string or -1 in case of error
				663	*/
				664	int
				665	xmlUTF8Strlen(const xmlChar *utf) {
				666	int ret = 0;
				667
				668	if (utf == NULL)
				669	return(-1);
				670
				671	while (*utf != 0) {
				672	if (utf[0] & 0x80) {
				673	if ((utf[1] & 0xc0) != 0x80)
				674	return(-1);
				675	if ((utf[0] & 0xe0) == 0xe0) {
				676	if ((utf[2] & 0xc0) != 0x80)
				677	return(-1);
				678	if ((utf[0] & 0xf0) == 0xf0) {
				679	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				680	return(-1);
				681	utf += 4;
				682	} else {
				683	utf += 3;
				684	}
				685	} else {
				686	utf += 2;
				687	}
				688	} else {
				689	utf++;
				690	}
				691	ret++;
				692	}
				693	return(ret);
				694	}
				695
				696	/**
				697	* xmlGetUTF8Char:
				698	* @utf: a sequence of UTF-8 encoded bytes
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	699	* @len: a pointer to the minimum number of bytes present in
				700	* the sequence. This is used to assure the next character
				701	* is completely contained within the sequence.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	702	*
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	703	* Read the first UTF8 character from @utf
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	704	*
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	705	* Returns the char value or -1 in case of error, and sets *len to
				706	* the actual number of bytes consumed (0 in case of error)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	707	*/
				708	int
				709	xmlGetUTF8Char(const unsigned char utf, int len) {
				710	unsigned int c;
				711
				712	if (utf == NULL)
				713	goto error;
				714	if (len == NULL)
				715	goto error;
				716	if (*len < 1)
				717	goto error;
				718
				719	c = utf[0];
				720	if (c & 0x80) {
				721	if (*len < 2)
				722	goto error;
				723	if ((utf[1] & 0xc0) != 0x80)
				724	goto error;
				725	if ((c & 0xe0) == 0xe0) {
				726	if (*len < 3)
				727	goto error;
				728	if ((utf[2] & 0xc0) != 0x80)
				729	goto error;
				730	if ((c & 0xf0) == 0xf0) {
				731	if (*len < 4)
				732	goto error;
				733	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				734	goto error;
				735	*len = 4;
				736	/* 4-byte code */
				737	c = (utf[0] & 0x7) << 18;
				738	c \|= (utf[1] & 0x3f) << 12;
				739	c \|= (utf[2] & 0x3f) << 6;
				740	c \|= utf[3] & 0x3f;
				741	} else {
				742	/* 3-byte code */
				743	*len = 3;
				744	c = (utf[0] & 0xf) << 12;
				745	c \|= (utf[1] & 0x3f) << 6;
				746	c \|= utf[2] & 0x3f;
				747	}
				748	} else {
				749	/* 2-byte code */
				750	*len = 2;
				751	c = (utf[0] & 0x1f) << 6;
				752	c \|= utf[1] & 0x3f;
				753	}
				754	} else {
				755	/* 1-byte code */
				756	*len = 1;
				757	}
				758	return(c);
				759
				760	error:
Daniel Veillard	ce682bc	2004-11-05 17:22:25 +0000	[diff] [blame]	761	if (len != NULL)
				762	*len = 0;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	763	return(-1);
				764	}
				765
				766	/**
				767	* xmlCheckUTF8:
				768	* @utf: Pointer to putative UTF-8 encoded string.
				769	*
				770	* Checks @utf for being valid UTF-8. @utf is assumed to be
				771	* null-terminated. This function is not super-strict, as it will
				772	* allow longer UTF-8 sequences than necessary. Note that Java is
				773	* capable of producing these sequences if provoked. Also note, this
				774	* routine checks for the 4-byte maximum size, but does not check for
				775	* 0x10ffff maximum value.
				776	*
				777	* Return value: true if @utf is valid.
				778	**/
				779	int
				780	xmlCheckUTF8(const unsigned char *utf)
				781	{
				782	int ix;
				783	unsigned char c;
				784
Daniel Veillard	ce682bc	2004-11-05 17:22:25 +0000	[diff] [blame]	785	if (utf == NULL)
				786	return(0);
William M. Brack	3ffe90e	2004-08-28 01:33:30 +0000	[diff] [blame]	787	/*
				788	* utf is a string of 1, 2, 3 or 4 bytes. The valid strings
				789	* are as follows (in "bit format"):
				790	* 0xxxxxxx valid 1-byte
				791	* 110xxxxx 10xxxxxx valid 2-byte
				792	* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
				793	* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
				794	*/
				795	for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
William M. Brack	f409515	2004-08-31 16:49:26 +0000	[diff] [blame]	796	if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	797	ix++;
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	798	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
				799	if ((utf[ix+1] & 0xc0 ) != 0x80)
				800	return 0;
				801	ix += 2;
				802	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
				803	if (((utf[ix+1] & 0xc0) != 0x80) \|\|
				804	((utf[ix+2] & 0xc0) != 0x80))
				805	return 0;
				806	ix += 3;
				807	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
				808	if (((utf[ix+1] & 0xc0) != 0x80) \|\|
				809	((utf[ix+2] & 0xc0) != 0x80) \|\|
				810	((utf[ix+3] & 0xc0) != 0x80))
				811	return 0;
				812	ix += 4;
				813	} else /* unknown encoding */
				814	return 0;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	815	}
				816	return(1);
				817	}
				818
				819	/**
				820	* xmlUTF8Strsize:
				821	* @utf: a sequence of UTF-8 encoded bytes
				822	* @len: the number of characters in the array
				823	*
				824	* storage size of an UTF8 string
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	825	* the behaviour is not garanteed if the input string is not UTF-8
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	826	*
				827	* Returns the storage size of
				828	* the first 'len' characters of ARRAY
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	829	*/
				830
				831	int
				832	xmlUTF8Strsize(const xmlChar *utf, int len) {
				833	const xmlChar *ptr=utf;
				834	xmlChar ch;
				835
Daniel Veillard	36e5cd5	2004-11-02 14:52:23 +0000	[diff] [blame]	836	if (utf == NULL)
				837	return(0);
				838
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	839	if (len <= 0)
				840	return(0);
				841
				842	while ( len-- > 0) {
				843	if ( !*ptr )
				844	break;
				845	if ( (ch = *ptr++) & 0x80)
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	846	while ((ch<<=1) & 0x80 ) {
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	847	if (*ptr == 0) break;
Nick Wellnhofer	96a5c17	2016-04-21 19:03:47 +0200	[diff] [blame]	848	ptr++;
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	849	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	850	}
				851	return (ptr - utf);
				852	}
				853
				854
				855	/**
				856	* xmlUTF8Strndup:
				857	* @utf: the input UTF8 *
				858	* @len: the len of @utf (in chars)
				859	*
				860	* a strndup for array of UTF8's
				861	*
				862	* Returns a new UTF8 * or NULL
				863	*/
				864	xmlChar *
				865	xmlUTF8Strndup(const xmlChar *utf, int len) {
				866	xmlChar *ret;
				867	int i;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	868
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	869	if ((utf == NULL) \|\| (len < 0)) return(NULL);
				870	i = xmlUTF8Strsize(utf, len);
				871	ret = (xmlChar ) xmlMallocAtomic((i + 1) sizeof(xmlChar));
				872	if (ret == NULL) {
				873	xmlGenericError(xmlGenericErrorContext,
				874	"malloc of %ld byte failed\n",
				875	(len + 1) * (long)sizeof(xmlChar));
				876	return(NULL);
				877	}
				878	memcpy(ret, utf, i * sizeof(xmlChar));
				879	ret[i] = 0;
				880	return(ret);
				881	}
				882
				883	/**
				884	* xmlUTF8Strpos:
				885	* @utf: the input UTF8 *
				886	* @pos: the position of the desired UTF8 char (in chars)
				887	*
				888	* a function to provide the equivalent of fetching a
				889	* character from a string array
				890	*
				891	* Returns a pointer to the UTF8 character or NULL
				892	*/
Daniel Veillard	8a32fe4	2004-11-02 22:10:16 +0000	[diff] [blame]	893	const xmlChar *
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	894	xmlUTF8Strpos(const xmlChar *utf, int pos) {
				895	xmlChar ch;
				896
				897	if (utf == NULL) return(NULL);
William M. Brack	230c550	2004-12-20 16:18:49 +0000	[diff] [blame]	898	if (pos < 0)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	899	return(NULL);
				900	while (pos--) {
				901	if ((ch=*utf++) == 0) return(NULL);
				902	if ( ch & 0x80 ) {
				903	/* if not simple ascii, verify proper format */
				904	if ( (ch & 0xc0) != 0xc0 )
				905	return(NULL);
				906	/* then skip over remaining bytes for this char */
				907	while ( (ch <<= 1) & 0x80 )
				908	if ( (*utf++ & 0xc0) != 0x80 )
				909	return(NULL);
				910	}
				911	}
				912	return((xmlChar *)utf);
				913	}
				914
				915	/**
				916	* xmlUTF8Strloc:
				917	* @utf: the input UTF8 *
				918	* @utfchar: the UTF8 character to be found
				919	*
				920	* a function to provide the relative location of a UTF8 char
				921	*
				922	* Returns the relative character position of the desired char
				923	* or -1 if not found
				924	*/
				925	int
				926	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
				927	int i, size;
				928	xmlChar ch;
				929
				930	if (utf==NULL \|\| utfchar==NULL) return -1;
				931	size = xmlUTF8Strsize(utfchar, 1);
				932	for(i=0; (ch=*utf) != 0; i++) {
				933	if (xmlStrncmp(utf, utfchar, size)==0)
				934	return(i);
				935	utf++;
				936	if ( ch & 0x80 ) {
				937	/* if not simple ascii, verify proper format */
				938	if ( (ch & 0xc0) != 0xc0 )
				939	return(-1);
				940	/* then skip over remaining bytes for this char */
				941	while ( (ch <<= 1) & 0x80 )
				942	if ( (*utf++ & 0xc0) != 0x80 )
				943	return(-1);
				944	}
				945	}
				946
				947	return(-1);
				948	}
				949	/**
				950	* xmlUTF8Strsub:
				951	* @utf: a sequence of UTF-8 encoded bytes
				952	* @start: relative pos of first char
				953	* @len: total number to copy
				954	*
				955	* Create a substring from a given UTF-8 string
				956	* Note: positions are given in units of UTF-8 chars
				957	*
				958	* Returns a pointer to a newly created string
				959	* or NULL if any problem
				960	*/
				961
				962	xmlChar *
				963	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
				964	int i;
				965	xmlChar ch;
				966
				967	if (utf == NULL) return(NULL);
				968	if (start < 0) return(NULL);
				969	if (len < 0) return(NULL);
				970
				971	/*
				972	* Skip over any leading chars
				973	*/
				974	for (i = 0;i < start;i++) {
				975	if ((ch=*utf++) == 0) return(NULL);
				976	if ( ch & 0x80 ) {
				977	/* if not simple ascii, verify proper format */
				978	if ( (ch & 0xc0) != 0xc0 )
				979	return(NULL);
				980	/* then skip over remaining bytes for this char */
				981	while ( (ch <<= 1) & 0x80 )
				982	if ( (*utf++ & 0xc0) != 0x80 )
				983	return(NULL);
				984	}
				985	}
				986
				987	return(xmlUTF8Strndup(utf, len));
				988	}
Daniel Veillard	5d4644e	2005-04-01 13:11:58 +0000	[diff] [blame]	989
David Kilzer	502f6a6	2016-05-23 14:58:41 +0800	[diff] [blame]	990	/**
				991	* xmlEscapeFormatString:
				992	* @msg: a pointer to the string in which to escape '%' characters.
				993	* Must be a heap-allocated buffer created by libxml2 that may be
				994	* returned, or that may be freed and replaced.
				995	*
				996	* Replaces the string pointed to by 'msg' with an escaped string.
				997	* Returns the same string with all '%' characters escaped.
				998	*/
				999	xmlChar *
				1000	xmlEscapeFormatString(xmlChar **msg)
				1001	{
				1002	xmlChar *msgPtr = NULL;
				1003	xmlChar *result = NULL;
				1004	xmlChar *resultPtr = NULL;
				1005	size_t count = 0;
				1006	size_t msgLen = 0;
				1007	size_t resultLen = 0;
				1008
				1009	if (!msg \|\| !*msg)
				1010	return(NULL);
				1011
				1012	for (msgPtr = msg; msgPtr != '\0'; ++msgPtr) {
				1013	++msgLen;
				1014	if (*msgPtr == '%')
				1015	++count;
				1016	}
				1017
				1018	if (count == 0)
				1019	return(*msg);
				1020
				1021	resultLen = msgLen + count + 1;
				1022	result = (xmlChar ) xmlMallocAtomic(resultLen sizeof(xmlChar));
				1023	if (result == NULL) {
				1024	/* Clear *msg to prevent format string vulnerabilities in
				1025	out-of-memory situations. */
				1026	xmlFree(*msg);
				1027	*msg = NULL;
				1028	xmlErrMemory(NULL, NULL);
				1029	return(NULL);
				1030	}
				1031
				1032	for (msgPtr = msg, resultPtr = result; msgPtr != '\0'; ++msgPtr, ++resultPtr) {
				1033	resultPtr = msgPtr;
				1034	if (*msgPtr == '%')
				1035	*(++resultPtr) = '%';
				1036	}
				1037	result[resultLen - 1] = '\0';
				1038
				1039	xmlFree(*msg);
				1040	*msg = result;
				1041
				1042	return *msg;
				1043	}
				1044
Daniel Veillard	5d4644e	2005-04-01 13:11:58 +0000	[diff] [blame]	1045	#define bottom_xmlstring
				1046	#include "elfgcchack.h"