Blame - xmlstring.c - platform/external/libxml2

blob: e8a1e45d1b3e890238f0ccb9ada60ccccf887074 [file] [log] [blame]

William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	1	/*
				2	* string.c : an XML string utilities module
				3	*
				4	* This module provides various utility functions for manipulating
				5	* the xmlChar* type. All functions named xmlStr* have been moved here
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	6	* from the parser.c file (their original home).
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	7	*
				8	* See Copyright for the status of this software.
				9	*
				10	* UTF8 string routines from:
				11	* William Brack <wbrack@mmm.com.hk>
				12	*
				13	* daniel@veillard.com
				14	*/
				15
				16	#define IN_LIBXML
				17	#include "libxml.h"
				18
				19	#include <stdlib.h>
				20	#include <string.h>
				21	#include <libxml/xmlmemory.h>
				22	#include <libxml/parserInternals.h>
				23	#include <libxml/xmlstring.h>
				24
				25	/************************************************************************
				26	* *
				27	* Commodity functions to handle xmlChars *
				28	* *
				29	************************************************************************/
				30
				31	/**
				32	* xmlStrndup:
				33	* @cur: the input xmlChar *
				34	* @len: the len of @cur
				35	*
				36	* a strndup for array of xmlChar's
				37	*
				38	* Returns a new xmlChar * or NULL
				39	*/
				40	xmlChar *
				41	xmlStrndup(const xmlChar *cur, int len) {
				42	xmlChar *ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	43
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	44	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				45	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				46	if (ret == NULL) {
				47	xmlErrMemory(NULL, NULL);
				48	return(NULL);
				49	}
				50	memcpy(ret, cur, len * sizeof(xmlChar));
				51	ret[len] = 0;
				52	return(ret);
				53	}
				54
				55	/**
				56	* xmlStrdup:
				57	* @cur: the input xmlChar *
				58	*
				59	* a strdup for array of xmlChar's. Since they are supposed to be
				60	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				61	* a termination mark of '0'.
				62	*
				63	* Returns a new xmlChar * or NULL
				64	*/
				65	xmlChar *
				66	xmlStrdup(const xmlChar *cur) {
				67	const xmlChar *p = cur;
				68
				69	if (cur == NULL) return(NULL);
				70	while (p != 0) p++; / non input consuming */
				71	return(xmlStrndup(cur, p - cur));
				72	}
				73
				74	/**
				75	* xmlCharStrndup:
				76	* @cur: the input char *
				77	* @len: the len of @cur
				78	*
				79	* a strndup for char's to xmlChar's
				80	*
				81	* Returns a new xmlChar * or NULL
				82	*/
				83
				84	xmlChar *
				85	xmlCharStrndup(const char *cur, int len) {
				86	int i;
				87	xmlChar *ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	88
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	89	if ((cur == NULL) \|\| (len < 0)) return(NULL);
				90	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
				91	if (ret == NULL) {
				92	xmlErrMemory(NULL, NULL);
				93	return(NULL);
				94	}
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	95	for (i = 0;i < len;i++) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	96	ret[i] = (xmlChar) cur[i];
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	97	if (ret[i] == 0) return(ret);
				98	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	99	ret[len] = 0;
				100	return(ret);
				101	}
				102
				103	/**
				104	* xmlCharStrdup:
				105	* @cur: the input char *
				106	*
				107	* a strdup for char's to xmlChar's
				108	*
				109	* Returns a new xmlChar * or NULL
				110	*/
				111
				112	xmlChar *
				113	xmlCharStrdup(const char *cur) {
				114	const char *p = cur;
				115
				116	if (cur == NULL) return(NULL);
				117	while (p != '\0') p++; / non input consuming */
				118	return(xmlCharStrndup(cur, p - cur));
				119	}
				120
				121	/**
				122	* xmlStrcmp:
				123	* @str1: the first xmlChar *
				124	* @str2: the second xmlChar *
				125	*
				126	* a strcmp for xmlChar's
				127	*
				128	* Returns the integer result of the comparison
				129	*/
				130
				131	int
				132	xmlStrcmp(const xmlChar str1, const xmlChar str2) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	133	if (str1 == str2) return(0);
				134	if (str1 == NULL) return(-1);
				135	if (str2 == NULL) return(1);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	136	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				137	return(strcmp((const char )str1, (const char )str2));
				138	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	139	do {
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	140	int tmp = str1++ - str2;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	141	if (tmp != 0) return(tmp);
				142	} while (*str2++ != 0);
				143	return 0;
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	144	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	145	}
				146
				147	/**
				148	* xmlStrEqual:
				149	* @str1: the first xmlChar *
				150	* @str2: the second xmlChar *
				151	*
Daniel Veillard	d95ecf0	2005-12-22 14:58:32 +0000	[diff] [blame]	152	* Check if both strings are equal of have same content.
Daniel Veillard	6a0baa0	2005-12-10 11:11:12 +0000	[diff] [blame]	153	* Should be a bit more readable and faster than xmlStrcmp()
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	154	*
				155	* Returns 1 if they are equal, 0 if they are different
				156	*/
				157
				158	int
				159	xmlStrEqual(const xmlChar str1, const xmlChar str2) {
				160	if (str1 == str2) return(1);
				161	if (str1 == NULL) return(0);
				162	if (str2 == NULL) return(0);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	163	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				164	return(strcmp((const char )str1, (const char )str2) == 0);
				165	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	166	do {
				167	if (str1++ != str2) return(0);
				168	} while (*str2++);
				169	return(1);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	170	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	171	}
				172
				173	/**
				174	* xmlStrQEqual:
				175	* @pref: the prefix of the QName
				176	* @name: the localname of the QName
				177	* @str: the second xmlChar *
				178	*
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	179	* Check if a QName is Equal to a given string
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	180	*
				181	* Returns 1 if they are equal, 0 if they are different
				182	*/
				183
				184	int
				185	xmlStrQEqual(const xmlChar pref, const xmlChar name, const xmlChar *str) {
				186	if (pref == NULL) return(xmlStrEqual(name, str));
				187	if (name == NULL) return(0);
				188	if (str == NULL) return(0);
				189
				190	do {
				191	if (pref++ != str) return(0);
				192	} while ((str++) && (pref));
				193	if (*str++ != ':') return(0);
				194	do {
				195	if (name++ != str) return(0);
				196	} while (*str++);
				197	return(1);
				198	}
				199
				200	/**
				201	* xmlStrncmp:
				202	* @str1: the first xmlChar *
				203	* @str2: the second xmlChar *
				204	* @len: the max comparison length
				205	*
				206	* a strncmp for xmlChar's
				207	*
				208	* Returns the integer result of the comparison
				209	*/
				210
				211	int
				212	xmlStrncmp(const xmlChar str1, const xmlChar str2, int len) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	213	if (len <= 0) return(0);
				214	if (str1 == str2) return(0);
				215	if (str1 == NULL) return(-1);
				216	if (str2 == NULL) return(1);
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	217	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				218	return(strncmp((const char )str1, (const char )str2, len));
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	219	#else
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	220	do {
Haibo Huang	f0a546b	2020-09-01 20:28:19 -0700	[diff] [blame]	221	int tmp = str1++ - str2;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	222	if (tmp != 0 \|\| --len == 0) return(tmp);
				223	} while (*str2++ != 0);
				224	return 0;
Daniel Veillard	c82c57e	2004-01-12 16:24:34 +0000	[diff] [blame]	225	#endif
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	226	}
				227
				228	static const xmlChar casemap[256] = {
				229	0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
				230	0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
				231	0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
				232	0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
				233	0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
				234	0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
				235	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
				236	0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
				237	0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				238	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				239	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				240	0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
				241	0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
				242	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
				243	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
				244	0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
				245	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
				246	0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
				247	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
				248	0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
				249	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
				250	0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
				251	0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
				252	0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
				253	0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
				254	0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
				255	0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
				256	0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
				257	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
				258	0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
				259	0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
				260	0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
				261	};
				262
				263	/**
				264	* xmlStrcasecmp:
				265	* @str1: the first xmlChar *
				266	* @str2: the second xmlChar *
				267	*
				268	* a strcasecmp for xmlChar's
				269	*
				270	* Returns the integer result of the comparison
				271	*/
				272
				273	int
				274	xmlStrcasecmp(const xmlChar str1, const xmlChar str2) {
				275	register int tmp;
				276
				277	if (str1 == str2) return(0);
				278	if (str1 == NULL) return(-1);
				279	if (str2 == NULL) return(1);
				280	do {
				281	tmp = casemap[str1++] - casemap[str2];
				282	if (tmp != 0) return(tmp);
				283	} while (*str2++ != 0);
				284	return 0;
				285	}
				286
				287	/**
				288	* xmlStrncasecmp:
				289	* @str1: the first xmlChar *
				290	* @str2: the second xmlChar *
				291	* @len: the max comparison length
				292	*
				293	* a strncasecmp for xmlChar's
				294	*
				295	* Returns the integer result of the comparison
				296	*/
				297
				298	int
				299	xmlStrncasecmp(const xmlChar str1, const xmlChar str2, int len) {
				300	register int tmp;
				301
				302	if (len <= 0) return(0);
				303	if (str1 == str2) return(0);
				304	if (str1 == NULL) return(-1);
				305	if (str2 == NULL) return(1);
				306	do {
				307	tmp = casemap[str1++] - casemap[str2];
				308	if (tmp != 0 \|\| --len == 0) return(tmp);
				309	} while (*str2++ != 0);
				310	return 0;
				311	}
				312
				313	/**
				314	* xmlStrchr:
				315	* @str: the xmlChar * array
				316	* @val: the xmlChar to search
				317	*
				318	* a strchr for xmlChar's
				319	*
				320	* Returns the xmlChar * for the first occurrence or NULL.
				321	*/
				322
				323	const xmlChar *
				324	xmlStrchr(const xmlChar *str, xmlChar val) {
				325	if (str == NULL) return(NULL);
				326	while (str != 0) { / non input consuming */
				327	if (str == val) return((xmlChar ) str);
				328	str++;
				329	}
				330	return(NULL);
				331	}
				332
				333	/**
				334	* xmlStrstr:
				335	* @str: the xmlChar * array (haystack)
				336	* @val: the xmlChar to search (needle)
				337	*
				338	* a strstr for xmlChar's
				339	*
				340	* Returns the xmlChar * for the first occurrence or NULL.
				341	*/
				342
				343	const xmlChar *
				344	xmlStrstr(const xmlChar str, const xmlChar val) {
				345	int n;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	346
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	347	if (str == NULL) return(NULL);
				348	if (val == NULL) return(NULL);
				349	n = xmlStrlen(val);
				350
				351	if (n == 0) return(str);
				352	while (str != 0) { / non input consuming */
				353	if (str == val) {
				354	if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
				355	}
				356	str++;
				357	}
				358	return(NULL);
				359	}
				360
				361	/**
				362	* xmlStrcasestr:
				363	* @str: the xmlChar * array (haystack)
				364	* @val: the xmlChar to search (needle)
				365	*
				366	* a case-ignoring strstr for xmlChar's
				367	*
				368	* Returns the xmlChar * for the first occurrence or NULL.
				369	*/
				370
				371	const xmlChar *
Daniel Veillard	fcf2457	2009-08-12 23:02:08 +0200	[diff] [blame]	372	xmlStrcasestr(const xmlChar str, const xmlChar val) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	373	int n;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	374
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	375	if (str == NULL) return(NULL);
				376	if (val == NULL) return(NULL);
				377	n = xmlStrlen(val);
				378
				379	if (n == 0) return(str);
				380	while (str != 0) { / non input consuming */
				381	if (casemap[str] == casemap[val])
				382	if (!xmlStrncasecmp(str, val, n)) return(str);
				383	str++;
				384	}
				385	return(NULL);
				386	}
				387
				388	/**
				389	* xmlStrsub:
				390	* @str: the xmlChar * array (haystack)
				391	* @start: the index of the first char (zero based)
				392	* @len: the length of the substring
				393	*
				394	* Extract a substring of a given string
				395	*
				396	* Returns the xmlChar * for the first occurrence or NULL.
				397	*/
				398
				399	xmlChar *
				400	xmlStrsub(const xmlChar *str, int start, int len) {
				401	int i;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	402
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	403	if (str == NULL) return(NULL);
				404	if (start < 0) return(NULL);
				405	if (len < 0) return(NULL);
				406
				407	for (i = 0;i < start;i++) {
				408	if (*str == 0) return(NULL);
				409	str++;
				410	}
				411	if (*str == 0) return(NULL);
				412	return(xmlStrndup(str, len));
				413	}
				414
				415	/**
				416	* xmlStrlen:
				417	* @str: the xmlChar * array
				418	*
				419	* length of a xmlChar's string
				420	*
				421	* Returns the number of xmlChar contained in the ARRAY.
				422	*/
				423
				424	int
				425	xmlStrlen(const xmlChar *str) {
				426	int len = 0;
				427
				428	if (str == NULL) return(0);
				429	while (str != 0) { / non input consuming */
				430	str++;
				431	len++;
				432	}
				433	return(len);
				434	}
				435
				436	/**
				437	* xmlStrncat:
				438	* @cur: the original xmlChar * array
				439	* @add: the xmlChar * array added
				440	* @len: the length of @add
				441	*
				442	* a strncat for array of xmlChar's, it will extend @cur with the len
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	443	* first bytes of @add. Note that if @len < 0 then this is an API error
				444	* and NULL will be returned.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	445	*
Nick Wellnhofer	5a0ae66	2017-06-17 23:20:38 +0200	[diff] [blame]	446	* Returns a new xmlChar *, the original @cur is reallocated and should
				447	* not be freed.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	448	*/
				449
				450	xmlChar *
				451	xmlStrncat(xmlChar cur, const xmlChar add, int len) {
				452	int size;
				453	xmlChar *ret;
				454
				455	if ((add == NULL) \|\| (len == 0))
				456	return(cur);
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	457	if (len < 0)
				458	return(NULL);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	459	if (cur == NULL)
				460	return(xmlStrndup(add, len));
				461
				462	size = xmlStrlen(cur);
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	463	if (size < 0)
				464	return(NULL);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	465	ret = (xmlChar ) xmlRealloc(cur, (size + len + 1) sizeof(xmlChar));
				466	if (ret == NULL) {
				467	xmlErrMemory(NULL, NULL);
				468	return(cur);
				469	}
				470	memcpy(&ret[size], add, len * sizeof(xmlChar));
				471	ret[size + len] = 0;
				472	return(ret);
				473	}
				474
				475	/**
				476	* xmlStrncatNew:
				477	* @str1: first xmlChar string
				478	* @str2: second xmlChar string
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	479	* @len: the len of @str2 or < 0
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	480	*
				481	* same as xmlStrncat, but creates a new string. The original
Kasimier T. Buchcik	5bb0c08	2005-12-20 10:48:33 +0000	[diff] [blame]	482	* two strings are not freed. If @len is < 0 then the length
				483	* will be calculated automatically.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	484	*
				485	* Returns a new xmlChar * or NULL
				486	*/
				487	xmlChar *
				488	xmlStrncatNew(const xmlChar str1, const xmlChar str2, int len) {
				489	int size;
				490	xmlChar *ret;
				491
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	492	if (len < 0) {
Daniel Veillard	8a32fe4	2004-11-02 22:10:16 +0000	[diff] [blame]	493	len = xmlStrlen(str2);
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	494	if (len < 0)
				495	return(NULL);
				496	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	497	if ((str2 == NULL) \|\| (len == 0))
				498	return(xmlStrdup(str1));
				499	if (str1 == NULL)
				500	return(xmlStrndup(str2, len));
				501
				502	size = xmlStrlen(str1);
Pranjal Jumde	8fbbf55	2016-03-08 17:29:00 -0800	[diff] [blame]	503	if (size < 0)
				504	return(NULL);
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	505	ret = (xmlChar ) xmlMalloc((size + len + 1) sizeof(xmlChar));
				506	if (ret == NULL) {
				507	xmlErrMemory(NULL, NULL);
				508	return(xmlStrndup(str1, size));
				509	}
				510	memcpy(ret, str1, size * sizeof(xmlChar));
				511	memcpy(&ret[size], str2, len * sizeof(xmlChar));
				512	ret[size + len] = 0;
				513	return(ret);
				514	}
				515
				516	/**
				517	* xmlStrcat:
				518	* @cur: the original xmlChar * array
				519	* @add: the xmlChar * array added
				520	*
				521	* a strcat for array of xmlChar's. Since they are supposed to be
				522	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
				523	* a termination mark of '0'.
				524	*
Nick Wellnhofer	5a0ae66	2017-06-17 23:20:38 +0200	[diff] [blame]	525	* Returns a new xmlChar * containing the concatenated string. The original
				526	* @cur is reallocated and should not be freed.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	527	*/
				528	xmlChar *
				529	xmlStrcat(xmlChar cur, const xmlChar add) {
				530	const xmlChar *p = add;
				531
				532	if (add == NULL) return(cur);
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	533	if (cur == NULL)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	534	return(xmlStrdup(add));
				535
				536	while (p != 0) p++; / non input consuming */
				537	return(xmlStrncat(cur, add, p - add));
				538	}
				539
				540	/**
				541	* xmlStrPrintf:
				542	* @buf: the result buffer.
				543	* @len: the result buffer length.
				544	* @msg: the message with printf formatting.
				545	* @...: extra parameters for the message.
				546	*
				547	* Formats @msg and places result into @buf.
				548	*
				549	* Returns the number of characters written to @buf or -1 if an error occurs.
				550	*/
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	551	int XMLCDECL
David Kilzer	4472c3a	2016-05-13 15:13:17 +0800	[diff] [blame]	552	xmlStrPrintf(xmlChar buf, int len, const char msg, ...) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	553	va_list args;
				554	int ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	555
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	556	if((buf == NULL) \|\| (msg == NULL)) {
				557	return(-1);
				558	}
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	559
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	560	va_start(args, msg);
				561	ret = vsnprintf((char ) buf, len, (const char ) msg, args);
				562	va_end(args);
				563	buf[len - 1] = 0; /* be safe ! */
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	564
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	565	return(ret);
				566	}
				567
				568	/**
				569	* xmlStrVPrintf:
				570	* @buf: the result buffer.
				571	* @len: the result buffer length.
				572	* @msg: the message with printf formatting.
				573	* @ap: extra parameters for the message.
				574	*
				575	* Formats @msg and places result into @buf.
				576	*
				577	* Returns the number of characters written to @buf or -1 if an error occurs.
				578	*/
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	579	int
David Kilzer	4472c3a	2016-05-13 15:13:17 +0800	[diff] [blame]	580	xmlStrVPrintf(xmlChar buf, int len, const char msg, va_list ap) {
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	581	int ret;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	582
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	583	if((buf == NULL) \|\| (msg == NULL)) {
				584	return(-1);
				585	}
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	586
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	587	ret = vsnprintf((char ) buf, len, (const char ) msg, ap);
				588	buf[len - 1] = 0; /* be safe ! */
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	589
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	590	return(ret);
				591	}
				592
				593	/************************************************************************
				594	* *
				595	* Generic UTF8 handling routines *
				596	* *
				597	* From rfc2044: encoding of the Unicode values on UTF-8: *
				598	* *
				599	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
				600	* 0000 0000-0000 007F 0xxxxxxx *
				601	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
				602	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
				603	* *
				604	* I hope we won't use values > 0xFFFF anytime soon ! *
				605	* *
				606	************************************************************************/
				607
				608
				609	/**
				610	* xmlUTF8Size:
				611	* @utf: pointer to the UTF8 character
				612	*
				613	* calculates the internal size of a UTF8 character
				614	*
				615	* returns the numbers of bytes in the character, -1 on format error
				616	*/
				617	int
				618	xmlUTF8Size(const xmlChar *utf) {
				619	xmlChar mask;
				620	int len;
				621
				622	if (utf == NULL)
				623	return -1;
				624	if (*utf < 0x80)
				625	return 1;
				626	/* check valid UTF8 character */
				627	if (!(*utf & 0x40))
				628	return -1;
				629	/* determine number of bytes in char */
				630	len = 2;
				631	for (mask=0x20; mask != 0; mask>>=1) {
				632	if (!(*utf & mask))
				633	return len;
				634	len++;
				635	}
				636	return -1;
				637	}
				638
				639	/**
				640	* xmlUTF8Charcmp:
				641	* @utf1: pointer to first UTF8 char
				642	* @utf2: pointer to second UTF8 char
				643	*
				644	* compares the two UCS4 values
				645	*
				646	* returns result of the compare as with xmlStrncmp
				647	*/
				648	int
				649	xmlUTF8Charcmp(const xmlChar utf1, const xmlChar utf2) {
				650
				651	if (utf1 == NULL ) {
				652	if (utf2 == NULL)
				653	return 0;
				654	return -1;
				655	}
				656	return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
				657	}
				658
				659	/**
				660	* xmlUTF8Strlen:
				661	* @utf: a sequence of UTF-8 encoded bytes
				662	*
				663	* compute the length of an UTF8 string, it doesn't do a full UTF8
				664	* checking of the content of the string.
				665	*
				666	* Returns the number of characters in the string or -1 in case of error
				667	*/
				668	int
				669	xmlUTF8Strlen(const xmlChar *utf) {
				670	int ret = 0;
				671
				672	if (utf == NULL)
				673	return(-1);
				674
				675	while (*utf != 0) {
				676	if (utf[0] & 0x80) {
				677	if ((utf[1] & 0xc0) != 0x80)
				678	return(-1);
				679	if ((utf[0] & 0xe0) == 0xe0) {
				680	if ((utf[2] & 0xc0) != 0x80)
				681	return(-1);
				682	if ((utf[0] & 0xf0) == 0xf0) {
				683	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				684	return(-1);
				685	utf += 4;
				686	} else {
				687	utf += 3;
				688	}
				689	} else {
				690	utf += 2;
				691	}
				692	} else {
				693	utf++;
				694	}
				695	ret++;
				696	}
				697	return(ret);
				698	}
				699
				700	/**
				701	* xmlGetUTF8Char:
				702	* @utf: a sequence of UTF-8 encoded bytes
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	703	* @len: a pointer to the minimum number of bytes present in
				704	* the sequence. This is used to assure the next character
				705	* is completely contained within the sequence.
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	706	*
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	707	* Read the first UTF8 character from @utf
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	708	*
William M. Brack	3e53016	2004-09-03 17:10:08 +0000	[diff] [blame]	709	* Returns the char value or -1 in case of error, and sets *len to
				710	* the actual number of bytes consumed (0 in case of error)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	711	*/
				712	int
				713	xmlGetUTF8Char(const unsigned char utf, int len) {
				714	unsigned int c;
				715
				716	if (utf == NULL)
				717	goto error;
				718	if (len == NULL)
				719	goto error;
				720	if (*len < 1)
				721	goto error;
				722
				723	c = utf[0];
				724	if (c & 0x80) {
				725	if (*len < 2)
				726	goto error;
				727	if ((utf[1] & 0xc0) != 0x80)
				728	goto error;
				729	if ((c & 0xe0) == 0xe0) {
				730	if (*len < 3)
				731	goto error;
				732	if ((utf[2] & 0xc0) != 0x80)
				733	goto error;
				734	if ((c & 0xf0) == 0xf0) {
				735	if (*len < 4)
				736	goto error;
				737	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				738	goto error;
				739	*len = 4;
				740	/* 4-byte code */
				741	c = (utf[0] & 0x7) << 18;
				742	c \|= (utf[1] & 0x3f) << 12;
				743	c \|= (utf[2] & 0x3f) << 6;
				744	c \|= utf[3] & 0x3f;
				745	} else {
				746	/* 3-byte code */
				747	*len = 3;
				748	c = (utf[0] & 0xf) << 12;
				749	c \|= (utf[1] & 0x3f) << 6;
				750	c \|= utf[2] & 0x3f;
				751	}
				752	} else {
				753	/* 2-byte code */
				754	*len = 2;
				755	c = (utf[0] & 0x1f) << 6;
				756	c \|= utf[1] & 0x3f;
				757	}
				758	} else {
				759	/* 1-byte code */
				760	*len = 1;
				761	}
				762	return(c);
				763
				764	error:
Daniel Veillard	ce682bc	2004-11-05 17:22:25 +0000	[diff] [blame]	765	if (len != NULL)
				766	*len = 0;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	767	return(-1);
				768	}
				769
				770	/**
				771	* xmlCheckUTF8:
				772	* @utf: Pointer to putative UTF-8 encoded string.
				773	*
				774	* Checks @utf for being valid UTF-8. @utf is assumed to be
				775	* null-terminated. This function is not super-strict, as it will
				776	* allow longer UTF-8 sequences than necessary. Note that Java is
				777	* capable of producing these sequences if provoked. Also note, this
				778	* routine checks for the 4-byte maximum size, but does not check for
				779	* 0x10ffff maximum value.
				780	*
				781	* Return value: true if @utf is valid.
				782	**/
				783	int
				784	xmlCheckUTF8(const unsigned char *utf)
				785	{
				786	int ix;
				787	unsigned char c;
				788
Daniel Veillard	ce682bc	2004-11-05 17:22:25 +0000	[diff] [blame]	789	if (utf == NULL)
				790	return(0);
William M. Brack	3ffe90e	2004-08-28 01:33:30 +0000	[diff] [blame]	791	/*
				792	* utf is a string of 1, 2, 3 or 4 bytes. The valid strings
				793	* are as follows (in "bit format"):
				794	* 0xxxxxxx valid 1-byte
				795	* 110xxxxx 10xxxxxx valid 2-byte
				796	* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
				797	* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
				798	*/
				799	for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
William M. Brack	f409515	2004-08-31 16:49:26 +0000	[diff] [blame]	800	if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	801	ix++;
William M. Brack	bf5cf21	2004-08-31 06:47:17 +0000	[diff] [blame]	802	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
				803	if ((utf[ix+1] & 0xc0 ) != 0x80)
				804	return 0;
				805	ix += 2;
				806	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
				807	if (((utf[ix+1] & 0xc0) != 0x80) \|\|
				808	((utf[ix+2] & 0xc0) != 0x80))
				809	return 0;
				810	ix += 3;
				811	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
				812	if (((utf[ix+1] & 0xc0) != 0x80) \|\|
				813	((utf[ix+2] & 0xc0) != 0x80) \|\|
				814	((utf[ix+3] & 0xc0) != 0x80))
				815	return 0;
				816	ix += 4;
				817	} else /* unknown encoding */
				818	return 0;
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	819	}
				820	return(1);
				821	}
				822
				823	/**
				824	* xmlUTF8Strsize:
				825	* @utf: a sequence of UTF-8 encoded bytes
				826	* @len: the number of characters in the array
				827	*
				828	* storage size of an UTF8 string
Nick Wellnhofer	8bbe450	2017-06-17 16:15:09 +0200	[diff] [blame]	829	* the behaviour is not guaranteed if the input string is not UTF-8
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	830	*
				831	* Returns the storage size of
				832	* the first 'len' characters of ARRAY
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	833	*/
				834
				835	int
				836	xmlUTF8Strsize(const xmlChar *utf, int len) {
				837	const xmlChar *ptr=utf;
				838	xmlChar ch;
				839
Daniel Veillard	36e5cd5	2004-11-02 14:52:23 +0000	[diff] [blame]	840	if (utf == NULL)
				841	return(0);
				842
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	843	if (len <= 0)
				844	return(0);
				845
				846	while ( len-- > 0) {
				847	if ( !*ptr )
				848	break;
				849	if ( (ch = *ptr++) & 0x80)
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	850	while ((ch<<=1) & 0x80 ) {
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	851	if (*ptr == 0) break;
Nick Wellnhofer	96a5c17	2016-04-21 19:03:47 +0200	[diff] [blame]	852	ptr++;
Daniel Veillard	5ea30d7	2004-11-08 11:54:28 +0000	[diff] [blame]	853	}
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	854	}
				855	return (ptr - utf);
				856	}
				857
				858
				859	/**
				860	* xmlUTF8Strndup:
				861	* @utf: the input UTF8 *
				862	* @len: the len of @utf (in chars)
				863	*
				864	* a strndup for array of UTF8's
				865	*
				866	* Returns a new UTF8 * or NULL
				867	*/
				868	xmlChar *
				869	xmlUTF8Strndup(const xmlChar *utf, int len) {
				870	xmlChar *ret;
				871	int i;
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	872
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	873	if ((utf == NULL) \|\| (len < 0)) return(NULL);
				874	i = xmlUTF8Strsize(utf, len);
				875	ret = (xmlChar ) xmlMallocAtomic((i + 1) sizeof(xmlChar));
				876	if (ret == NULL) {
				877	xmlGenericError(xmlGenericErrorContext,
				878	"malloc of %ld byte failed\n",
				879	(len + 1) * (long)sizeof(xmlChar));
				880	return(NULL);
				881	}
				882	memcpy(ret, utf, i * sizeof(xmlChar));
				883	ret[i] = 0;
				884	return(ret);
				885	}
				886
				887	/**
				888	* xmlUTF8Strpos:
				889	* @utf: the input UTF8 *
				890	* @pos: the position of the desired UTF8 char (in chars)
				891	*
				892	* a function to provide the equivalent of fetching a
				893	* character from a string array
				894	*
				895	* Returns a pointer to the UTF8 character or NULL
				896	*/
Daniel Veillard	8a32fe4	2004-11-02 22:10:16 +0000	[diff] [blame]	897	const xmlChar *
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	898	xmlUTF8Strpos(const xmlChar *utf, int pos) {
				899	xmlChar ch;
				900
				901	if (utf == NULL) return(NULL);
William M. Brack	230c550	2004-12-20 16:18:49 +0000	[diff] [blame]	902	if (pos < 0)
William M. Brack	a2e844a	2004-01-06 11:52:13 +0000	[diff] [blame]	903	return(NULL);
				904	while (pos--) {
				905	if ((ch=*utf++) == 0) return(NULL);
				906	if ( ch & 0x80 ) {
				907	/* if not simple ascii, verify proper format */
				908	if ( (ch & 0xc0) != 0xc0 )
				909	return(NULL);
				910	/* then skip over remaining bytes for this char */
				911	while ( (ch <<= 1) & 0x80 )
				912	if ( (*utf++ & 0xc0) != 0x80 )
				913	return(NULL);
				914	}
				915	}
				916	return((xmlChar *)utf);
				917	}
				918
				919	/**
				920	* xmlUTF8Strloc:
				921	* @utf: the input UTF8 *
				922	* @utfchar: the UTF8 character to be found
				923	*
				924	* a function to provide the relative location of a UTF8 char
				925	*
				926	* Returns the relative character position of the desired char
				927	* or -1 if not found
				928	*/
				929	int
				930	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
				931	int i, size;
				932	xmlChar ch;
				933
				934	if (utf==NULL \|\| utfchar==NULL) return -1;
				935	size = xmlUTF8Strsize(utfchar, 1);
				936	for(i=0; (ch=*utf) != 0; i++) {
				937	if (xmlStrncmp(utf, utfchar, size)==0)
				938	return(i);
				939	utf++;
				940	if ( ch & 0x80 ) {
				941	/* if not simple ascii, verify proper format */
				942	if ( (ch & 0xc0) != 0xc0 )
				943	return(-1);
				944	/* then skip over remaining bytes for this char */
				945	while ( (ch <<= 1) & 0x80 )
				946	if ( (*utf++ & 0xc0) != 0x80 )
				947	return(-1);
				948	}
				949	}
				950
				951	return(-1);
				952	}
				953	/**
				954	* xmlUTF8Strsub:
				955	* @utf: a sequence of UTF-8 encoded bytes
				956	* @start: relative pos of first char
				957	* @len: total number to copy
				958	*
				959	* Create a substring from a given UTF-8 string
				960	* Note: positions are given in units of UTF-8 chars
				961	*
				962	* Returns a pointer to a newly created string
				963	* or NULL if any problem
				964	*/
				965
				966	xmlChar *
				967	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
				968	int i;
				969	xmlChar ch;
				970
				971	if (utf == NULL) return(NULL);
				972	if (start < 0) return(NULL);
				973	if (len < 0) return(NULL);
				974
				975	/*
				976	* Skip over any leading chars
				977	*/
				978	for (i = 0;i < start;i++) {
				979	if ((ch=*utf++) == 0) return(NULL);
				980	if ( ch & 0x80 ) {
				981	/* if not simple ascii, verify proper format */
				982	if ( (ch & 0xc0) != 0xc0 )
				983	return(NULL);
				984	/* then skip over remaining bytes for this char */
				985	while ( (ch <<= 1) & 0x80 )
				986	if ( (*utf++ & 0xc0) != 0x80 )
				987	return(NULL);
				988	}
				989	}
				990
				991	return(xmlUTF8Strndup(utf, len));
				992	}
Daniel Veillard	5d4644e	2005-04-01 13:11:58 +0000	[diff] [blame]	993
David Kilzer	502f6a6	2016-05-23 14:58:41 +0800	[diff] [blame]	994	/**
				995	* xmlEscapeFormatString:
				996	* @msg: a pointer to the string in which to escape '%' characters.
				997	* Must be a heap-allocated buffer created by libxml2 that may be
				998	* returned, or that may be freed and replaced.
				999	*
				1000	* Replaces the string pointed to by 'msg' with an escaped string.
				1001	* Returns the same string with all '%' characters escaped.
				1002	*/
				1003	xmlChar *
				1004	xmlEscapeFormatString(xmlChar **msg)
				1005	{
				1006	xmlChar *msgPtr = NULL;
				1007	xmlChar *result = NULL;
				1008	xmlChar *resultPtr = NULL;
				1009	size_t count = 0;
				1010	size_t msgLen = 0;
				1011	size_t resultLen = 0;
				1012
				1013	if (!msg \|\| !*msg)
				1014	return(NULL);
				1015
				1016	for (msgPtr = msg; msgPtr != '\0'; ++msgPtr) {
				1017	++msgLen;
				1018	if (*msgPtr == '%')
				1019	++count;
				1020	}
				1021
				1022	if (count == 0)
				1023	return(*msg);
				1024
				1025	resultLen = msgLen + count + 1;
				1026	result = (xmlChar ) xmlMallocAtomic(resultLen sizeof(xmlChar));
				1027	if (result == NULL) {
				1028	/* Clear *msg to prevent format string vulnerabilities in
				1029	out-of-memory situations. */
				1030	xmlFree(*msg);
				1031	*msg = NULL;
				1032	xmlErrMemory(NULL, NULL);
				1033	return(NULL);
				1034	}
				1035
				1036	for (msgPtr = msg, resultPtr = result; msgPtr != '\0'; ++msgPtr, ++resultPtr) {
				1037	resultPtr = msgPtr;
				1038	if (*msgPtr == '%')
				1039	*(++resultPtr) = '%';
				1040	}
				1041	result[resultLen - 1] = '\0';
				1042
				1043	xmlFree(*msg);
				1044	*msg = result;
				1045
				1046	return *msg;
				1047	}
				1048
Daniel Veillard	5d4644e	2005-04-01 13:11:58 +0000	[diff] [blame]	1049	#define bottom_xmlstring
				1050	#include "elfgcchack.h"