Blame - xmlstring.c - platform/external/libxml2

blob: a85685d7e138eaec59e1d193b1ce9bb6835e5efb [file] [log] [blame]

William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	1	/*
				2	* string.c : an XML string utilities module
				3	*
				4	* This module provides various utility functions for manipulating
				5	* the xmlChar* type. All functions named xmlStr* have been moved here
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	6	* from the parser.c file (their original home).
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	7	*
				8	* See Copyright for the status of this software.
				9	*
				10	* UTF8 string routines from:
				11	* William Brack <wbrack@mmm.com.hk>
				12	*
				13	* daniel@veillard.com
				14	*/
				15
				16	#define IN_LIBXML
				17	#include "libxml.h"
				18
				19	#include <stdlib.h>
				20	#include <string.h>
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	21	#include <limits.h>
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	22	#include <libxml/xmlmemory.h>
				23	#include <libxml/parserInternals.h>
				24	#include <libxml/xmlstring.h>
				25
				26	/************************************************************************
				27	* *
				28	* Commodity functions to handle xmlChars *
				29	* *
				30	************************************************************************/
				31
				32	/**
				33	* xmlStrndup:
				34	* @cur: the input xmlChar *
				35	* @len: the len of @cur
				36	*
				37	* a strndup for array of xmlChar's
				38	*
				39	* Returns a new xmlChar * or NULL
				40	*/
				41	xmlChar *
				42	xmlStrndup(const xmlChar *cur, int len) {
				43	xmlChar *ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	44
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	45	if ((cur == NULL) \|\| (len < 0)) return(NULL);
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	46	ret = (xmlChar ) xmlMallocAtomic(((size_t) len + 1) sizeof(xmlChar));
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	47	if (ret == NULL) {
				48	xmlErrMemory(NULL, NULL);
				49	return(NULL);
				50	}
				51	memcpy(ret, cur, len * sizeof(xmlChar));
				52	ret[len] = 0;
				53	return(ret);
				54	}
				55
				56	/**
				57	* xmlStrdup:
				58	* @cur: the input xmlChar *
				59	*
				60	* a strdup for array of xmlChar's. Since they are supposed to be
				61	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				62	* a termination mark of '0'.
				63	*
				64	* Returns a new xmlChar * or NULL
				65	*/
				66	xmlChar *
				67	xmlStrdup(const xmlChar *cur) {
				68	const xmlChar *p = cur;
				69
				70	if (cur == NULL) return(NULL);
				71	while (p != 0) p++; / non input consuming */
				72	return(xmlStrndup(cur, p - cur));
				73	}
				74
				75	/**
				76	* xmlCharStrndup:
				77	* @cur: the input char *
				78	* @len: the len of @cur
				79	*
				80	* a strndup for char's to xmlChar's
				81	*
				82	* Returns a new xmlChar * or NULL
				83	*/
				84
				85	xmlChar *
				86	xmlCharStrndup(const char *cur, int len) {
				87	int i;
				88	xmlChar *ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	89
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	90	if ((cur == NULL) \|\| (len < 0)) return(NULL);
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	91	ret = (xmlChar ) xmlMallocAtomic(((size_t) len + 1) sizeof(xmlChar));
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	92	if (ret == NULL) {
				93	xmlErrMemory(NULL, NULL);
				94	return(NULL);
				95	}
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	96	for (i = 0;i < len;i++) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	97	ret[i] = (xmlChar) cur[i];
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	98	if (ret[i] == 0) return(ret);
				99	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	100	ret[len] = 0;
				101	return(ret);
				102	}
				103
				104	/**
				105	* xmlCharStrdup:
				106	* @cur: the input char *
				107	*
				108	* a strdup for char's to xmlChar's
				109	*
				110	* Returns a new xmlChar * or NULL
				111	*/
				112
				113	xmlChar *
				114	xmlCharStrdup(const char *cur) {
				115	const char *p = cur;
				116
				117	if (cur == NULL) return(NULL);
				118	while (p != '\0') p++; / non input consuming */
				119	return(xmlCharStrndup(cur, p - cur));
				120	}
				121
				122	/**
				123	* xmlStrcmp:
				124	* @str1: the first xmlChar *
				125	* @str2: the second xmlChar *
				126	*
				127	* a strcmp for xmlChar's
				128	*
				129	* Returns the integer result of the comparison
				130	*/
				131
				132	int
				133	xmlStrcmp(const xmlChar str1, const xmlChar str2) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	134	if (str1 == str2) return(0);
				135	if (str1 == NULL) return(-1);
				136	if (str2 == NULL) return(1);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	137	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				138	return(strcmp((const char )str1, (const char )str2));
				139	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	140	do {
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	141	int tmp = str1++ - str2;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	142	if (tmp != 0) return(tmp);
				143	} while (*str2++ != 0);
				144	return 0;
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	145	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	146	}
				147
				148	/**
				149	* xmlStrEqual:
				150	* @str1: the first xmlChar *
				151	* @str2: the second xmlChar *
				152	*
Daniel Veillard	d95ecf0	2005-12-22 14:58:32 +0000	[diff] [blame]	153	* Check if both strings are equal of have same content.
Daniel Veillard	6a0baa0	2005-12-10 11:11:12 +0000	[diff] [blame]	154	* Should be a bit more readable and faster than xmlStrcmp()
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	155	*
				156	* Returns 1 if they are equal, 0 if they are different
				157	*/
				158
				159	int
				160	xmlStrEqual(const xmlChar str1, const xmlChar str2) {
				161	if (str1 == str2) return(1);
				162	if (str1 == NULL) return(0);
				163	if (str2 == NULL) return(0);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	164	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				165	return(strcmp((const char )str1, (const char )str2) == 0);
				166	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	167	do {
				168	if (str1++ != str2) return(0);
				169	} while (*str2++);
				170	return(1);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	171	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	172	}
				173
				174	/**
				175	* xmlStrQEqual:
				176	* @pref: the prefix of the QName
				177	* @name: the localname of the QName
				178	* @str: the second xmlChar *
				179	*
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	180	* Check if a QName is Equal to a given string
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	181	*
				182	* Returns 1 if they are equal, 0 if they are different
				183	*/
				184
				185	int
				186	xmlStrQEqual(const xmlChar pref, const xmlChar name, const xmlChar *str) {
				187	if (pref == NULL) return(xmlStrEqual(name, str));
				188	if (name == NULL) return(0);
				189	if (str == NULL) return(0);
				190
				191	do {
				192	if (pref++ != str) return(0);
				193	} while ((str++) && (pref));
				194	if (*str++ != ':') return(0);
				195	do {
				196	if (name++ != str) return(0);
				197	} while (*str++);
				198	return(1);
				199	}
				200
				201	/**
				202	* xmlStrncmp:
				203	* @str1: the first xmlChar *
				204	* @str2: the second xmlChar *
				205	* @len: the max comparison length
				206	*
				207	* a strncmp for xmlChar's
				208	*
				209	* Returns the integer result of the comparison
				210	*/
				211
				212	int
				213	xmlStrncmp(const xmlChar str1, const xmlChar str2, int len) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	214	if (len <= 0) return(0);
				215	if (str1 == str2) return(0);
				216	if (str1 == NULL) return(-1);
				217	if (str2 == NULL) return(1);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	218	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				219	return(strncmp((const char )str1, (const char )str2, len));
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	220	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	221	do {
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	222	int tmp = str1++ - str2;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	223	if (tmp != 0 \|\| --len == 0) return(tmp);
				224	} while (*str2++ != 0);
				225	return 0;
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	226	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	227	}
				228
				229	static const xmlChar casemap[256] = {
				230	0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
				231	0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
				232	0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
				233	0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
				234	0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
				235	0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
				236	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
				237	0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
				238	0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				239	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				240	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				241	0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
				242	0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				243	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				244	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				245	0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
				246	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
				247	0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
				248	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
				249	0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
				250	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
				251	0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
				252	0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
				253	0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
				254	0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
				255	0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
				256	0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
				257	0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
				258	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
				259	0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
				260	0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
				261	0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
				262	};
				263
				264	/**
				265	* xmlStrcasecmp:
				266	* @str1: the first xmlChar *
				267	* @str2: the second xmlChar *
				268	*
				269	* a strcasecmp for xmlChar's
				270	*
				271	* Returns the integer result of the comparison
				272	*/
				273
				274	int
				275	xmlStrcasecmp(const xmlChar str1, const xmlChar str2) {
				276	register int tmp;
				277
				278	if (str1 == str2) return(0);
				279	if (str1 == NULL) return(-1);
				280	if (str2 == NULL) return(1);
				281	do {
				282	tmp = casemap[str1++] - casemap[str2];
				283	if (tmp != 0) return(tmp);
				284	} while (*str2++ != 0);
				285	return 0;
				286	}
				287
				288	/**
				289	* xmlStrncasecmp:
				290	* @str1: the first xmlChar *
				291	* @str2: the second xmlChar *
				292	* @len: the max comparison length
				293	*
				294	* a strncasecmp for xmlChar's
				295	*
				296	* Returns the integer result of the comparison
				297	*/
				298
				299	int
				300	xmlStrncasecmp(const xmlChar str1, const xmlChar str2, int len) {
				301	register int tmp;
				302
				303	if (len <= 0) return(0);
				304	if (str1 == str2) return(0);
				305	if (str1 == NULL) return(-1);
				306	if (str2 == NULL) return(1);
				307	do {
				308	tmp = casemap[str1++] - casemap[str2];
				309	if (tmp != 0 \|\| --len == 0) return(tmp);
				310	} while (*str2++ != 0);
				311	return 0;
				312	}
				313
				314	/**
				315	* xmlStrchr:
				316	* @str: the xmlChar * array
				317	* @val: the xmlChar to search
				318	*
				319	* a strchr for xmlChar's
				320	*
				321	* Returns the xmlChar * for the first occurrence or NULL.
				322	*/
				323
				324	const xmlChar *
				325	xmlStrchr(const xmlChar *str, xmlChar val) {
				326	if (str == NULL) return(NULL);
				327	while (str != 0) { / non input consuming */
				328	if (str == val) return((xmlChar ) str);
				329	str++;
				330	}
				331	return(NULL);
				332	}
				333
				334	/**
				335	* xmlStrstr:
				336	* @str: the xmlChar * array (haystack)
				337	* @val: the xmlChar to search (needle)
				338	*
				339	* a strstr for xmlChar's
				340	*
				341	* Returns the xmlChar * for the first occurrence or NULL.
				342	*/
				343
				344	const xmlChar *
				345	xmlStrstr(const xmlChar str, const xmlChar val) {
				346	int n;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	347
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	348	if (str == NULL) return(NULL);
				349	if (val == NULL) return(NULL);
				350	n = xmlStrlen(val);
				351
				352	if (n == 0) return(str);
				353	while (str != 0) { / non input consuming */
				354	if (str == val) {
				355	if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
				356	}
				357	str++;
				358	}
				359	return(NULL);
				360	}
				361
				362	/**
				363	* xmlStrcasestr:
				364	* @str: the xmlChar * array (haystack)
				365	* @val: the xmlChar to search (needle)
				366	*
				367	* a case-ignoring strstr for xmlChar's
				368	*
				369	* Returns the xmlChar * for the first occurrence or NULL.
				370	*/
				371
				372	const xmlChar *
Daniel Veillard	fcf2457	2009-08-12 23:02:08 +0200	[diff] [blame]	373	xmlStrcasestr(const xmlChar str, const xmlChar val) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	374	int n;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	375
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	376	if (str == NULL) return(NULL);
				377	if (val == NULL) return(NULL);
				378	n = xmlStrlen(val);
				379
				380	if (n == 0) return(str);
				381	while (str != 0) { / non input consuming */
				382	if (casemap[str] == casemap[val])
				383	if (!xmlStrncasecmp(str, val, n)) return(str);
				384	str++;
				385	}
				386	return(NULL);
				387	}
				388
				389	/**
				390	* xmlStrsub:
				391	* @str: the xmlChar * array (haystack)
				392	* @start: the index of the first char (zero based)
				393	* @len: the length of the substring
				394	*
				395	* Extract a substring of a given string
				396	*
				397	* Returns the xmlChar * for the first occurrence or NULL.
				398	*/
				399
				400	xmlChar *
				401	xmlStrsub(const xmlChar *str, int start, int len) {
				402	int i;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	403
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	404	if (str == NULL) return(NULL);
				405	if (start < 0) return(NULL);
				406	if (len < 0) return(NULL);
				407
				408	for (i = 0;i < start;i++) {
				409	if (*str == 0) return(NULL);
				410	str++;
				411	}
				412	if (*str == 0) return(NULL);
				413	return(xmlStrndup(str, len));
				414	}
				415
				416	/**
				417	* xmlStrlen:
				418	* @str: the xmlChar * array
				419	*
				420	* length of a xmlChar's string
				421	*
				422	* Returns the number of xmlChar contained in the ARRAY.
				423	*/
				424
				425	int
				426	xmlStrlen(const xmlChar *str) {
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	427	size_t len = 0;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	428
				429	if (str == NULL) return(0);
				430	while (str != 0) { / non input consuming */
				431	str++;
				432	len++;
				433	}
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	434	return(len > INT_MAX ? 0 : len);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	435	}
				436
				437	/**
				438	* xmlStrncat:
				439	* @cur: the original xmlChar * array
				440	* @add: the xmlChar * array added
				441	* @len: the length of @add
				442	*
				443	* a strncat for array of xmlChar's, it will extend @cur with the len
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	444	* first bytes of @add. Note that if @len < 0 then this is an API error
				445	* and NULL will be returned.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	446	*
Nick Wellnhofer	5a0ae66	2017-06-17 23:20:38 +0200	[diff] [blame]	447	* Returns a new xmlChar *, the original @cur is reallocated and should
				448	* not be freed.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	449	*/
				450
				451	xmlChar *
				452	xmlStrncat(xmlChar cur, const xmlChar add, int len) {
				453	int size;
				454	xmlChar *ret;
				455
				456	if ((add == NULL) \|\| (len == 0))
				457	return(cur);
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	458	if (len < 0)
				459	return(NULL);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	460	if (cur == NULL)
				461	return(xmlStrndup(add, len));
				462
				463	size = xmlStrlen(cur);
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	464	if ((size < 0) \|\| (size > INT_MAX - len))
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	465	return(NULL);
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	466	ret = (xmlChar ) xmlRealloc(cur, ((size_t) size + len + 1) sizeof(xmlChar));
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	467	if (ret == NULL) {
				468	xmlErrMemory(NULL, NULL);
				469	return(cur);
				470	}
				471	memcpy(&ret[size], add, len * sizeof(xmlChar));
				472	ret[size + len] = 0;
				473	return(ret);
				474	}
				475
				476	/**
				477	* xmlStrncatNew:
				478	* @str1: first xmlChar string
				479	* @str2: second xmlChar string
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	480	* @len: the len of @str2 or < 0
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	481	*
				482	* same as xmlStrncat, but creates a new string. The original
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	483	* two strings are not freed. If @len is < 0 then the length
				484	* will be calculated automatically.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	485	*
				486	* Returns a new xmlChar * or NULL
				487	*/
				488	xmlChar *
				489	xmlStrncatNew(const xmlChar str1, const xmlChar str2, int len) {
				490	int size;
				491	xmlChar *ret;
				492
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	493	if (len < 0) {
Daniel Veillard	8a32fe4	2004-11-02 22:10:16 +0000	[diff] [blame]	494	len = xmlStrlen(str2);
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	495	if (len < 0)
				496	return(NULL);
				497	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	498	if ((str2 == NULL) \|\| (len == 0))
				499	return(xmlStrdup(str1));
				500	if (str1 == NULL)
				501	return(xmlStrndup(str2, len));
				502
				503	size = xmlStrlen(str1);
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	504	if ((size < 0) \|\| (size > INT_MAX - len))
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	505	return(NULL);
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	506	ret = (xmlChar ) xmlMalloc(((size_t) size + len + 1) sizeof(xmlChar));
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	507	if (ret == NULL) {
				508	xmlErrMemory(NULL, NULL);
				509	return(xmlStrndup(str1, size));
				510	}
				511	memcpy(ret, str1, size * sizeof(xmlChar));
				512	memcpy(&ret[size], str2, len * sizeof(xmlChar));
				513	ret[size + len] = 0;
				514	return(ret);
				515	}
				516
				517	/**
				518	* xmlStrcat:
				519	* @cur: the original xmlChar * array
				520	* @add: the xmlChar * array added
				521	*
				522	* a strcat for array of xmlChar's. Since they are supposed to be
				523	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				524	* a termination mark of '0'.
				525	*
Nick Wellnhofer	5a0ae66	2017-06-17 23:20:38 +0200	[diff] [blame]	526	* Returns a new xmlChar * containing the concatenated string. The original
				527	* @cur is reallocated and should not be freed.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	528	*/
				529	xmlChar *
				530	xmlStrcat(xmlChar cur, const xmlChar add) {
				531	const xmlChar *p = add;
				532
				533	if (add == NULL) return(cur);
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	534	if (cur == NULL)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	535	return(xmlStrdup(add));
				536
				537	while (p != 0) p++; / non input consuming */
				538	return(xmlStrncat(cur, add, p - add));
				539	}
				540
				541	/**
				542	* xmlStrPrintf:
				543	* @buf: the result buffer.
				544	* @len: the result buffer length.
				545	* @msg: the message with printf formatting.
				546	* @...: extra parameters for the message.
				547	*
				548	* Formats @msg and places result into @buf.
				549	*
				550	* Returns the number of characters written to @buf or -1 if an error occurs.
				551	*/
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	552	int XMLCDECL
David Kilzer	4472c3a	2016-05-13 15:13:17 +0800	[diff] [blame]	553	xmlStrPrintf(xmlChar buf, int len, const char msg, ...) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	554	va_list args;
				555	int ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	556
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	557	if((buf == NULL) \|\| (msg == NULL)) {
				558	return(-1);
				559	}
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	560
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	561	va_start(args, msg);
				562	ret = vsnprintf((char ) buf, len, (const char ) msg, args);
				563	va_end(args);
				564	buf[len - 1] = 0; /* be safe ! */
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	565
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	566	return(ret);
				567	}
				568
				569	/**
				570	* xmlStrVPrintf:
				571	* @buf: the result buffer.
				572	* @len: the result buffer length.
				573	* @msg: the message with printf formatting.
				574	* @ap: extra parameters for the message.
				575	*
				576	* Formats @msg and places result into @buf.
				577	*
				578	* Returns the number of characters written to @buf or -1 if an error occurs.
				579	*/
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	580	int
David Kilzer	4472c3a	2016-05-13 15:13:17 +0800	[diff] [blame]	581	xmlStrVPrintf(xmlChar buf, int len, const char msg, va_list ap) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	582	int ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	583
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	584	if((buf == NULL) \|\| (msg == NULL)) {
				585	return(-1);
				586	}
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	587
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	588	ret = vsnprintf((char ) buf, len, (const char ) msg, ap);
				589	buf[len - 1] = 0; /* be safe ! */
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	590
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	591	return(ret);
				592	}
				593
				594	/************************************************************************
				595	* *
				596	* Generic UTF8 handling routines *
				597	* *
				598	* From rfc2044: encoding of the Unicode values on UTF-8: *
				599	* *
				600	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
				601	* 0000 0000-0000 007F 0xxxxxxx *
				602	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
				603	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
				604	* *
				605	* I hope we won't use values > 0xFFFF anytime soon ! *
				606	* *
				607	************************************************************************/
				608
				609
				610	/**
				611	* xmlUTF8Size:
				612	* @utf: pointer to the UTF8 character
				613	*
				614	* calculates the internal size of a UTF8 character
				615	*
				616	* returns the numbers of bytes in the character, -1 on format error
				617	*/
				618	int
				619	xmlUTF8Size(const xmlChar *utf) {
				620	xmlChar mask;
				621	int len;
				622
				623	if (utf == NULL)
				624	return -1;
				625	if (*utf < 0x80)
				626	return 1;
				627	/* check valid UTF8 character */
				628	if (!(*utf & 0x40))
				629	return -1;
				630	/* determine number of bytes in char */
				631	len = 2;
				632	for (mask=0x20; mask != 0; mask>>=1) {
				633	if (!(*utf & mask))
				634	return len;
				635	len++;
				636	}
				637	return -1;
				638	}
				639
				640	/**
				641	* xmlUTF8Charcmp:
				642	* @utf1: pointer to first UTF8 char
				643	* @utf2: pointer to second UTF8 char
				644	*
				645	* compares the two UCS4 values
				646	*
				647	* returns result of the compare as with xmlStrncmp
				648	*/
				649	int
				650	xmlUTF8Charcmp(const xmlChar utf1, const xmlChar utf2) {
				651
				652	if (utf1 == NULL ) {
				653	if (utf2 == NULL)
				654	return 0;
				655	return -1;
				656	}
				657	return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
				658	}
				659
				660	/**
				661	* xmlUTF8Strlen:
				662	* @utf: a sequence of UTF-8 encoded bytes
				663	*
				664	* compute the length of an UTF8 string, it doesn't do a full UTF8
				665	* checking of the content of the string.
				666	*
				667	* Returns the number of characters in the string or -1 in case of error
				668	*/
				669	int
				670	xmlUTF8Strlen(const xmlChar *utf) {
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	671	size_t ret = 0;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	672
				673	if (utf == NULL)
				674	return(-1);
				675
				676	while (*utf != 0) {
				677	if (utf[0] & 0x80) {
				678	if ((utf[1] & 0xc0) != 0x80)
				679	return(-1);
				680	if ((utf[0] & 0xe0) == 0xe0) {
				681	if ((utf[2] & 0xc0) != 0x80)
				682	return(-1);
				683	if ((utf[0] & 0xf0) == 0xf0) {
				684	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				685	return(-1);
				686	utf += 4;
				687	} else {
				688	utf += 3;
				689	}
				690	} else {
				691	utf += 2;
				692	}
				693	} else {
				694	utf++;
				695	}
				696	ret++;
				697	}
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	698	return(ret > INT_MAX ? 0 : ret);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	699	}
				700
				701	/**
				702	* xmlGetUTF8Char:
				703	* @utf: a sequence of UTF-8 encoded bytes
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	704	* @len: a pointer to the minimum number of bytes present in
				705	* the sequence. This is used to assure the next character
				706	* is completely contained within the sequence.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	707	*
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	708	* Read the first UTF8 character from @utf
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	709	*
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	710	* Returns the char value or -1 in case of error, and sets *len to
				711	* the actual number of bytes consumed (0 in case of error)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	712	*/
				713	int
				714	xmlGetUTF8Char(const unsigned char utf, int len) {
				715	unsigned int c;
				716
				717	if (utf == NULL)
				718	goto error;
				719	if (len == NULL)
				720	goto error;
				721	if (*len < 1)
				722	goto error;
				723
				724	c = utf[0];
				725	if (c & 0x80) {
				726	if (*len < 2)
				727	goto error;
				728	if ((utf[1] & 0xc0) != 0x80)
				729	goto error;
				730	if ((c & 0xe0) == 0xe0) {
				731	if (*len < 3)
				732	goto error;
				733	if ((utf[2] & 0xc0) != 0x80)
				734	goto error;
				735	if ((c & 0xf0) == 0xf0) {
				736	if (*len < 4)
				737	goto error;
				738	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				739	goto error;
				740	*len = 4;
				741	/* 4-byte code */
				742	c = (utf[0] & 0x7) << 18;
				743	c \|= (utf[1] & 0x3f) << 12;
				744	c \|= (utf[2] & 0x3f) << 6;
				745	c \|= utf[3] & 0x3f;
				746	} else {
				747	/* 3-byte code */
				748	*len = 3;
				749	c = (utf[0] & 0xf) << 12;
				750	c \|= (utf[1] & 0x3f) << 6;
				751	c \|= utf[2] & 0x3f;
				752	}
				753	} else {
				754	/* 2-byte code */
				755	*len = 2;
				756	c = (utf[0] & 0x1f) << 6;
				757	c \|= utf[1] & 0x3f;
				758	}
				759	} else {
				760	/* 1-byte code */
				761	*len = 1;
				762	}
				763	return(c);
				764
				765	error:
Daniel Veillard	ce682bc	2004-11-05 17:22:25 +0000	[diff] [blame]	766	if (len != NULL)
				767	*len = 0;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	768	return(-1);
				769	}
				770
				771	/**
				772	* xmlCheckUTF8:
				773	* @utf: Pointer to putative UTF-8 encoded string.
				774	*
				775	* Checks @utf for being valid UTF-8. @utf is assumed to be
				776	* null-terminated. This function is not super-strict, as it will
				777	* allow longer UTF-8 sequences than necessary. Note that Java is
				778	* capable of producing these sequences if provoked. Also note, this
				779	* routine checks for the 4-byte maximum size, but does not check for
				780	* 0x10ffff maximum value.
				781	*
				782	* Return value: true if @utf is valid.
				783	**/
				784	int
				785	xmlCheckUTF8(const unsigned char *utf)
				786	{
				787	int ix;
				788	unsigned char c;
				789
Daniel Veillard	ce682bc	2004-11-05 17:22:25 +0000	[diff] [blame]	790	if (utf == NULL)
				791	return(0);
William M. Brack	3ffe90e	2004-08-28 01:33:30 +0000	[diff] [blame]	792	/*
				793	* utf is a string of 1, 2, 3 or 4 bytes. The valid strings
				794	* are as follows (in "bit format"):
				795	* 0xxxxxxx valid 1-byte
				796	* 110xxxxx 10xxxxxx valid 2-byte
				797	* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
				798	* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
				799	*/
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	800	while ((c = utf[0])) { /* string is 0-terminated */
				801	ix = 0;
William M. Brack	f409515	2004-08-31 16:49:26 +0000	[diff] [blame]	802	if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	803	ix = 1;
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	804	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	805	if ((utf[1] & 0xc0 ) != 0x80)
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	806	return 0;
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	807	ix = 2;
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	808	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	809	if (((utf[1] & 0xc0) != 0x80) \|\|
				810	((utf[2] & 0xc0) != 0x80))
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	811	return 0;
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	812	ix = 3;
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	813	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	814	if (((utf[1] & 0xc0) != 0x80) \|\|
				815	((utf[2] & 0xc0) != 0x80) \|\|
				816	((utf[3] & 0xc0) != 0x80))
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	817	return 0;
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	818	ix = 4;
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	819	} else /* unknown encoding */
				820	return 0;
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	821	utf += ix;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	822	}
				823	return(1);
				824	}
				825
				826	/**
				827	* xmlUTF8Strsize:
				828	* @utf: a sequence of UTF-8 encoded bytes
				829	* @len: the number of characters in the array
				830	*
				831	* storage size of an UTF8 string
Nick Wellnhofer	8bbe450	2017-06-17 16:15:09 +0200	[diff] [blame]	832	* the behaviour is not guaranteed if the input string is not UTF-8
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	833	*
				834	* Returns the storage size of
				835	* the first 'len' characters of ARRAY
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	836	*/
				837
				838	int
				839	xmlUTF8Strsize(const xmlChar *utf, int len) {
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	840	const xmlChar *ptr=utf;
				841	int ch;
				842	size_t ret;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	843
Daniel Veillard	36e5cd5	2004-11-02 14:52:23 +0000	[diff] [blame]	844	if (utf == NULL)
				845	return(0);
				846
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	847	if (len <= 0)
				848	return(0);
				849
				850	while ( len-- > 0) {
				851	if ( !*ptr )
				852	break;
				853	if ( (ch = *ptr++) & 0x80)
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	854	while ((ch<<=1) & 0x80 ) {
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	855	if (*ptr == 0) break;
Nick Wellnhofer	96a5c17	2016-04-21 19:03:47 +0200	[diff] [blame]	856	ptr++;
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	857	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	858	}
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	859	ret = ptr - utf;
				860	return (ret > INT_MAX ? 0 : ret);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	861	}
				862
				863
				864	/**
				865	* xmlUTF8Strndup:
				866	* @utf: the input UTF8 *
				867	* @len: the len of @utf (in chars)
				868	*
				869	* a strndup for array of UTF8's
				870	*
				871	* Returns a new UTF8 * or NULL
				872	*/
				873	xmlChar *
				874	xmlUTF8Strndup(const xmlChar *utf, int len) {
				875	xmlChar *ret;
				876	int i;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	877
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	878	if ((utf == NULL) \|\| (len < 0)) return(NULL);
				879	i = xmlUTF8Strsize(utf, len);
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	880	ret = (xmlChar ) xmlMallocAtomic(((size_t) i + 1) sizeof(xmlChar));
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	881	if (ret == NULL) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	882	return(NULL);
				883	}
				884	memcpy(ret, utf, i * sizeof(xmlChar));
				885	ret[i] = 0;
				886	return(ret);
				887	}
				888
				889	/**
				890	* xmlUTF8Strpos:
				891	* @utf: the input UTF8 *
				892	* @pos: the position of the desired UTF8 char (in chars)
				893	*
				894	* a function to provide the equivalent of fetching a
				895	* character from a string array
				896	*
				897	* Returns a pointer to the UTF8 character or NULL
				898	*/
Daniel Veillard	8a32fe4	2004-11-02 22:10:16 +0000	[diff] [blame]	899	const xmlChar *
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	900	xmlUTF8Strpos(const xmlChar *utf, int pos) {
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	901	int ch;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	902
				903	if (utf == NULL) return(NULL);
William M. Brack	230c550	2004-12-20 16:18:49 +0000	[diff] [blame]	904	if (pos < 0)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	905	return(NULL);
				906	while (pos--) {
				907	if ((ch=*utf++) == 0) return(NULL);
				908	if ( ch & 0x80 ) {
				909	/* if not simple ascii, verify proper format */
				910	if ( (ch & 0xc0) != 0xc0 )
				911	return(NULL);
				912	/* then skip over remaining bytes for this char */
				913	while ( (ch <<= 1) & 0x80 )
				914	if ( (*utf++ & 0xc0) != 0x80 )
				915	return(NULL);
				916	}
				917	}
				918	return((xmlChar *)utf);
				919	}
				920
				921	/**
				922	* xmlUTF8Strloc:
				923	* @utf: the input UTF8 *
				924	* @utfchar: the UTF8 character to be found
				925	*
				926	* a function to provide the relative location of a UTF8 char
				927	*
				928	* Returns the relative character position of the desired char
				929	* or -1 if not found
				930	*/
				931	int
				932	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	933	size_t i;
				934	int size;
				935	int ch;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	936
				937	if (utf==NULL \|\| utfchar==NULL) return -1;
				938	size = xmlUTF8Strsize(utfchar, 1);
				939	for(i=0; (ch=*utf) != 0; i++) {
				940	if (xmlStrncmp(utf, utfchar, size)==0)
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	941	return(i > INT_MAX ? 0 : i);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	942	utf++;
				943	if ( ch & 0x80 ) {
				944	/* if not simple ascii, verify proper format */
				945	if ( (ch & 0xc0) != 0xc0 )
				946	return(-1);
				947	/* then skip over remaining bytes for this char */
				948	while ( (ch <<= 1) & 0x80 )
				949	if ( (*utf++ & 0xc0) != 0x80 )
				950	return(-1);
				951	}
				952	}
				953
				954	return(-1);
				955	}
				956	/**
				957	* xmlUTF8Strsub:
				958	* @utf: a sequence of UTF-8 encoded bytes
				959	* @start: relative pos of first char
				960	* @len: total number to copy
				961	*
				962	* Create a substring from a given UTF-8 string
				963	* Note: positions are given in units of UTF-8 chars
				964	*
				965	* Returns a pointer to a newly created string
				966	* or NULL if any problem
				967	*/
				968
				969	xmlChar *
				970	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	971	int i;
				972	int ch;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	973
				974	if (utf == NULL) return(NULL);
				975	if (start < 0) return(NULL);
				976	if (len < 0) return(NULL);
				977
				978	/*
				979	* Skip over any leading chars
				980	*/
				981	for (i = 0;i < start;i++) {
				982	if ((ch=*utf++) == 0) return(NULL);
				983	if ( ch & 0x80 ) {
				984	/* if not simple ascii, verify proper format */
				985	if ( (ch & 0xc0) != 0xc0 )
				986	return(NULL);
				987	/* then skip over remaining bytes for this char */
				988	while ( (ch <<= 1) & 0x80 )
				989	if ( (*utf++ & 0xc0) != 0x80 )
				990	return(NULL);
				991	}
				992	}
				993
				994	return(xmlUTF8Strndup(utf, len));
				995	}
Daniel Veillard	5d4644e	2005-04-01 13:11:58 +0000	[diff] [blame]	996
David Kilzer	502f6a6	2016-05-23 14:58:41 +0800	[diff] [blame]	997	/**
				998	* xmlEscapeFormatString:
				999	* @msg: a pointer to the string in which to escape '%' characters.
				1000	* Must be a heap-allocated buffer created by libxml2 that may be
				1001	* returned, or that may be freed and replaced.
				1002	*
				1003	* Replaces the string pointed to by 'msg' with an escaped string.
				1004	* Returns the same string with all '%' characters escaped.
				1005	*/
				1006	xmlChar *
				1007	xmlEscapeFormatString(xmlChar **msg)
				1008	{
				1009	xmlChar *msgPtr = NULL;
				1010	xmlChar *result = NULL;
				1011	xmlChar *resultPtr = NULL;
				1012	size_t count = 0;
				1013	size_t msgLen = 0;
				1014	size_t resultLen = 0;
				1015
				1016	if (!msg \|\| !*msg)
				1017	return(NULL);
				1018
				1019	for (msgPtr = msg; msgPtr != '\0'; ++msgPtr) {
				1020	++msgLen;
				1021	if (*msgPtr == '%')
				1022	++count;
				1023	}
				1024
				1025	if (count == 0)
				1026	return(*msg);
				1027
Elliott Hughes	ecdab2a	2022-02-23 14:33:50 -0800	[diff] [blame]	1028	if ((count > INT_MAX) \|\| (msgLen > INT_MAX - count))
				1029	return(NULL);
David Kilzer	502f6a6	2016-05-23 14:58:41 +0800	[diff] [blame]	1030	resultLen = msgLen + count + 1;
				1031	result = (xmlChar ) xmlMallocAtomic(resultLen sizeof(xmlChar));
				1032	if (result == NULL) {
				1033	/* Clear *msg to prevent format string vulnerabilities in
				1034	out-of-memory situations. */
				1035	xmlFree(*msg);
				1036	*msg = NULL;
				1037	xmlErrMemory(NULL, NULL);
				1038	return(NULL);
				1039	}
				1040
				1041	for (msgPtr = msg, resultPtr = result; msgPtr != '\0'; ++msgPtr, ++resultPtr) {
				1042	resultPtr = msgPtr;
				1043	if (*msgPtr == '%')
				1044	*(++resultPtr) = '%';
				1045	}
				1046	result[resultLen - 1] = '\0';
				1047
				1048	xmlFree(*msg);
				1049	*msg = result;
				1050
				1051	return *msg;
				1052	}
				1053