Blame - tinyxmlparser.cpp - platform/external/tinyxml

blob: cfe4eb230f9cde7613ea97bc03b1c175a163567f [file] [log] [blame]

Upstream	bc0ee9a	1970-01-12 13:46:40 +0000	[diff] [blame]	1	/*
				2	www.sourceforge.net/projects/tinyxml
				3	Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
				4
				5	This software is provided 'as-is', without any express or implied
				6	warranty. In no event will the authors be held liable for any
				7	damages arising from the use of this software.
				8
				9	Permission is granted to anyone to use this software for any
				10	purpose, including commercial applications, and to alter it and
				11	redistribute it freely, subject to the following restrictions:
				12
				13	1. The origin of this software must not be misrepresented; you must
				14	not claim that you wrote the original software. If you use this
				15	software in a product, an acknowledgment in the product documentation
				16	would be appreciated but is not required.
				17
				18	2. Altered source versions must be plainly marked as such, and
				19	must not be misrepresented as being the original software.
				20
				21	3. This notice may not be removed or altered from any source
				22	distribution.
				23	*/
				24
				25	#include "tinyxml.h"
				26	#include <ctype.h>
				27	#include <stddef.h>
				28
				29	//#define DEBUG_PARSER
Karsten Tausche	d143821	2022-10-06 15:20:50 +0200	[diff] [blame]	30	#if defined( DEBUG_PARSER )
				31	# if defined( DEBUG ) && defined( _MSC_VER )
				32	# include <windows.h>
				33	# define TIXML_LOG OutputDebugString
				34	# else
				35	# define TIXML_LOG printf
				36	# endif
				37	#endif
Upstream	bc0ee9a	1970-01-12 13:46:40 +0000	[diff] [blame]	38
				39	// Note tha "PutString" hardcodes the same list. This
				40	// is less flexible than it appears. Changing the entries
				41	// or order will break putstring.
				42	TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
				43	{
				44	{ "&", 5, '&' },
				45	{ "<", 4, '<' },
				46	{ ">", 4, '>' },
				47	{ """, 6, '\"' },
				48	{ "'", 6, '\'' }
				49	};
				50
				51	// Bunch of unicode info at:
				52	// http://www.unicode.org/faq/utf_bom.html
				53	// Including the basic of this table, which determines the #bytes in the
				54	// sequence from the lead byte. 1 placed for invalid sequences --
				55	// although the result will be junk, pass it through as much as possible.
				56	// Beware of the non-characters in UTF-8:
				57	// ef bb bf (Microsoft "lead bytes")
				58	// ef bf be
				59	// ef bf bf
				60
				61	const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
				62	const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
				63	const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
				64
				65	const int TiXmlBase::utf8ByteTable[256] =
				66	{
				67	// 0 1 2 3 4 5 6 7 8 9 a b c d e f
				68	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
				69	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
				70	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
				71	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
				72	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
				73	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
				74	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
				75	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
				76	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
				77	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
				78	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
				79	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
				80	1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
				81	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
				82	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
				83	4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
				84	};
				85
				86
				87	void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
				88	{
				89	const unsigned long BYTE_MASK = 0xBF;
				90	const unsigned long BYTE_MARK = 0x80;
				91	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
				92
				93	if (input < 0x80)
				94	*length = 1;
				95	else if ( input < 0x800 )
				96	*length = 2;
				97	else if ( input < 0x10000 )
				98	*length = 3;
				99	else if ( input < 0x200000 )
				100	*length = 4;
				101	else
				102	{ *length = 0; return; } // This code won't covert this correctly anyway.
				103
				104	output += *length;
				105
				106	// Scary scary fall throughs.
				107	switch (*length)
				108	{
				109	case 4:
				110	--output;
				111	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
				112	input >>= 6;
				113	case 3:
				114	--output;
				115	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
				116	input >>= 6;
				117	case 2:
				118	--output;
				119	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
				120	input >>= 6;
				121	case 1:
				122	--output;
				123	output = (char)(input \| FIRST_BYTE_MARK[length]);
				124	}
				125	}
				126
				127
				128	/static/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /encoding/ )
				129	{
				130	// This will only work for low-ascii, everything else is assumed to be a valid
				131	// letter. I'm not sure this is the best approach, but it is quite tricky trying
				132	// to figure out alhabetical vs. not across encoding. So take a very
				133	// conservative approach.
				134
				135	// if ( encoding == TIXML_ENCODING_UTF8 )
				136	// {
				137	if ( anyByte < 127 )
				138	return isalpha( anyByte );
				139	else
				140	return 1; // What else to do? The unicode set is huge...get the english ones right.
				141	// }
				142	// else
				143	// {
				144	// return isalpha( anyByte );
				145	// }
				146	}
				147
				148
				149	/static/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /encoding/ )
				150	{
				151	// This will only work for low-ascii, everything else is assumed to be a valid
				152	// letter. I'm not sure this is the best approach, but it is quite tricky trying
				153	// to figure out alhabetical vs. not across encoding. So take a very
				154	// conservative approach.
				155
				156	// if ( encoding == TIXML_ENCODING_UTF8 )
				157	// {
				158	if ( anyByte < 127 )
				159	return isalnum( anyByte );
				160	else
				161	return 1; // What else to do? The unicode set is huge...get the english ones right.
				162	// }
				163	// else
				164	// {
				165	// return isalnum( anyByte );
				166	// }
				167	}
				168
				169
				170	class TiXmlParsingData
				171	{
				172	friend class TiXmlDocument;
				173	public:
				174	void Stamp( const char* now, TiXmlEncoding encoding );
				175
				176	const TiXmlCursor& Cursor() { return cursor; }
				177
				178	private:
				179	// Only used by the document!
				180	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
				181	{
				182	assert( start );
				183	stamp = start;
				184	tabsize = _tabsize;
				185	cursor.row = row;
				186	cursor.col = col;
				187	}
				188
				189	TiXmlCursor cursor;
				190	const char* stamp;
				191	int tabsize;
				192	};
				193
				194
				195	void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
				196	{
				197	assert( now );
				198
				199	// Do nothing if the tabsize is 0.
				200	if ( tabsize < 1 )
				201	{
				202	return;
				203	}
				204
				205	// Get the current row, column.
				206	int row = cursor.row;
				207	int col = cursor.col;
				208	const char* p = stamp;
				209	assert( p );
				210
				211	while ( p < now )
				212	{
				213	// Treat p as unsigned, so we have a happy compiler.
				214	const unsigned char* pU = (const unsigned char*)p;
				215
				216	// Code contributed by Fletcher Dunn: (modified by lee)
				217	switch (*pU) {
				218	case 0:
				219	// We should never get here, but in case we do, don't
				220	// advance past the terminating null character, ever
				221	return;
				222
				223	case '\r':
				224	// bump down to the next line
				225	++row;
				226	col = 0;
				227	// Eat the character
				228	++p;
				229
				230	// Check for \r\n sequence, and treat this as a single character
				231	if (*p == '\n') {
				232	++p;
				233	}
				234	break;
				235
				236	case '\n':
				237	// bump down to the next line
				238	++row;
				239	col = 0;
				240
				241	// Eat the character
				242	++p;
				243
				244	// Check for \n\r sequence, and treat this as a single
				245	// character. (Yes, this bizarre thing does occur still
				246	// on some arcane platforms...)
				247	if (*p == '\r') {
				248	++p;
				249	}
				250	break;
				251
				252	case '\t':
				253	// Eat the character
				254	++p;
				255
				256	// Skip to next tab stop
				257	col = (col / tabsize + 1) * tabsize;
				258	break;
				259
				260	case TIXML_UTF_LEAD_0:
				261	if ( encoding == TIXML_ENCODING_UTF8 )
				262	{
				263	if ( (p+1) && (p+2) )
				264	{
				265	// In these cases, don't advance the column. These are
				266	// 0-width spaces.
				267	if ( (pU+1)==TIXML_UTF_LEAD_1 && (pU+2)==TIXML_UTF_LEAD_2 )
				268	p += 3;
				269	else if ( (pU+1)==0xbfU && (pU+2)==0xbeU )
				270	p += 3;
				271	else if ( (pU+1)==0xbfU && (pU+2)==0xbfU )
				272	p += 3;
				273	else
				274	{ p +=3; ++col; } // A normal character.
				275	}
Christian Voegl	a9fb107	2021-10-27 11:25:18 +0200	[diff] [blame]	276	else
				277	{
				278	// TIXML_UTF_LEAD_0 (239) is the start character of a 3 byte sequence, so
				279	// there is something wrong here. Just advance the pointer to evade infinite loops
				280	++p;
				281	}
Upstream	bc0ee9a	1970-01-12 13:46:40 +0000	[diff] [blame]	282	}
				283	else
				284	{
				285	++p;
				286	++col;
				287	}
				288	break;
				289
				290	default:
				291	if ( encoding == TIXML_ENCODING_UTF8 )
				292	{
				293	// Eat the 1 to 4 byte utf8 character.
				294	int step = TiXmlBase::utf8ByteTable[((unsigned char)p)];
				295	if ( step == 0 )
				296	step = 1; // Error case from bad encoding, but handle gracefully.
				297	p += step;
				298
				299	// Just advance one column, of course.
				300	++col;
				301	}
				302	else
				303	{
				304	++p;
				305	++col;
				306	}
				307	break;
				308	}
				309	}
				310	cursor.row = row;
				311	cursor.col = col;
				312	assert( cursor.row >= -1 );
				313	assert( cursor.col >= -1 );
				314	stamp = p;
				315	assert( stamp );
				316	}
				317
				318
				319	const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
				320	{
				321	if ( !p \|\| !*p )
				322	{
				323	return 0;
				324	}
				325	if ( encoding == TIXML_ENCODING_UTF8 )
				326	{
				327	while ( *p )
				328	{
				329	const unsigned char* pU = (const unsigned char*)p;
				330
				331	// Skip the stupid Microsoft UTF-8 Byte order marks
				332	if ( *(pU+0)==TIXML_UTF_LEAD_0
				333	&& *(pU+1)==TIXML_UTF_LEAD_1
				334	&& *(pU+2)==TIXML_UTF_LEAD_2 )
				335	{
				336	p += 3;
				337	continue;
				338	}
				339	else if(*(pU+0)==TIXML_UTF_LEAD_0
				340	&& *(pU+1)==0xbfU
				341	&& *(pU+2)==0xbeU )
				342	{
				343	p += 3;
				344	continue;
				345	}
				346	else if(*(pU+0)==TIXML_UTF_LEAD_0
				347	&& *(pU+1)==0xbfU
				348	&& *(pU+2)==0xbfU )
				349	{
				350	p += 3;
				351	continue;
				352	}
				353
				354	if ( IsWhiteSpace( p ) \|\| p == '\n' \|\| *p =='\r' ) // Still using old rules for white space.
				355	++p;
				356	else
				357	break;
				358	}
				359	}
				360	else
				361	{
				362	while ( p && IsWhiteSpace( p ) \|\| p == '\n' \|\| p =='\r' )
				363	++p;
				364	}
				365
				366	return p;
				367	}
				368
				369	#ifdef TIXML_USE_STL
				370	/static/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
				371	{
				372	for( ;; )
				373	{
				374	if ( !in->good() ) return false;
				375
				376	int c = in->peek();
				377	// At this scope, we can't get to a document. So fail silently.
				378	if ( !IsWhiteSpace( c ) \|\| c <= 0 )
				379	return true;
				380
				381	*tag += (char) in->get();
				382	}
				383	}
				384
				385	/static/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
				386	{
				387	//assert( character > 0 && character < 128 ); // else it won't work in utf-8
				388	while ( in->good() )
				389	{
				390	int c = in->peek();
				391	if ( c == character )
				392	return true;
				393	if ( c <= 0 ) // Silent failure: can't get document at this scope
				394	return false;
				395
				396	in->get();
				397	*tag += (char) c;
				398	}
				399	return false;
				400	}
				401	#endif
				402
				403	const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
				404	{
				405	*name = "";
				406	assert( p );
				407
				408	// Names start with letters or underscores.
				409	// Of course, in unicode, tinyxml has no idea what a letter is. The
				410	// algorithm is generous.
				411	//
				412	// After that, they can be letters, underscores, numbers,
				413	// hyphens, or colons. (Colons are valid ony for namespaces,
				414	// but tinyxml can't tell namespaces from names.)
				415	if ( p && *p
				416	&& ( IsAlpha( (unsigned char) p, encoding ) \|\| p == '_' ) )
				417	{
				418	while( p && *p
				419	&& ( IsAlphaNum( (unsigned char ) *p, encoding )
				420	\|\| *p == '_'
				421	\|\| *p == '-'
				422	\|\| *p == '.'
				423	\|\| *p == ':' ) )
				424	{
				425	(name) += p;
				426	++p;
				427	}
				428	return p;
				429	}
				430	return 0;
				431	}
				432
				433	const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
				434	{
				435	// Presume an entity, and pull it out.
				436	TIXML_STRING ent;
				437	int i;
				438	*length = 0;
				439
				440	if ( (p+1) && (p+1) == '#' && *(p+2) )
				441	{
				442	unsigned long ucs = 0;
				443	ptrdiff_t delta = 0;
				444	unsigned mult = 1;
				445
				446	if ( *(p+2) == 'x' )
				447	{
				448	// Hexadecimal.
				449	if ( !*(p+3) ) return 0;
				450
				451	const char* q = p+3;
				452	q = strchr( q, ';' );
				453
				454	if ( !q \|\| !*q ) return 0;
				455
				456	delta = q-p;
				457	--q;
				458
				459	while ( *q != 'x' )
				460	{
				461	if ( q >= '0' && q <= '9' )
				462	ucs += mult * (*q - '0');
				463	else if ( q >= 'a' && q <= 'f' )
				464	ucs += mult * (*q - 'a' + 10);
				465	else if ( q >= 'A' && q <= 'F' )
				466	ucs += mult * (*q - 'A' + 10 );
				467	else
				468	return 0;
				469	mult *= 16;
				470	--q;
				471	}
				472	}
				473	else
				474	{
				475	// Decimal.
				476	if ( !*(p+2) ) return 0;
				477
				478	const char* q = p+2;
				479	q = strchr( q, ';' );
				480
				481	if ( !q \|\| !*q ) return 0;
				482
				483	delta = q-p;
				484	--q;
				485
				486	while ( *q != '#' )
				487	{
				488	if ( q >= '0' && q <= '9' )
				489	ucs += mult * (*q - '0');
				490	else
				491	return 0;
				492	mult *= 10;
				493	--q;
				494	}
				495	}
				496	if ( encoding == TIXML_ENCODING_UTF8 )
				497	{
				498	// convert the UCS to UTF-8
				499	ConvertUTF32ToUTF8( ucs, value, length );
				500	}
				501	else
				502	{
				503	*value = (char)ucs;
				504	*length = 1;
				505	}
				506	return p + delta + 1;
				507	}
				508
				509	// Now try to match it.
				510	for( i=0; i<NUM_ENTITY; ++i )
				511	{
				512	if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
				513	{
				514	assert( strlen( entity[i].str ) == entity[i].strLength );
				515	*value = entity[i].chr;
				516	*length = 1;
				517	return ( p + entity[i].strLength );
				518	}
				519	}
				520
				521	// So it wasn't an entity, its unrecognized, or something like that.
				522	value = p; // Don't put back the last one, since we return it!
				523	return p+1;
				524	}
				525
				526
				527	bool TiXmlBase::StringEqual( const char* p,
				528	const char* tag,
				529	bool ignoreCase,
				530	TiXmlEncoding encoding )
				531	{
				532	assert( p );
				533	assert( tag );
				534	if ( !p \|\| !*p )
				535	{
				536	assert( 0 );
				537	return false;
				538	}
				539
				540	const char* q = p;
				541
				542	if ( ignoreCase )
				543	{
				544	while ( q && tag && ToLower( q, encoding ) == ToLower( tag, encoding ) )
				545	{
				546	++q;
				547	++tag;
				548	}
				549
				550	if ( *tag == 0 )
				551	return true;
				552	}
				553	else
				554	{
				555	while ( q && tag && q == tag )
				556	{
				557	++q;
				558	++tag;
				559	}
				560
				561	if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
				562	return true;
				563	}
				564	return false;
				565	}
				566
				567	const char* TiXmlBase::ReadText( const char* p,
				568	TIXML_STRING * text,
				569	bool trimWhiteSpace,
				570	const char* endTag,
				571	bool caseInsensitive,
				572	TiXmlEncoding encoding )
				573	{
				574	*text = "";
				575	if ( !trimWhiteSpace // certain tags always keep whitespace
				576	\|\| !condenseWhiteSpace ) // if true, whitespace is always kept
				577	{
				578	// Keep all the white space.
				579	while ( p && *p
				580	&& !StringEqual( p, endTag, caseInsensitive, encoding )
				581	)
				582	{
				583	int len;
				584	char cArr[4] = { 0, 0, 0, 0 };
				585	p = GetChar( p, cArr, &len, encoding );
				586	text->append( cArr, len );
				587	}
				588	}
				589	else
				590	{
				591	bool whitespace = false;
				592
				593	// Remove leading white space:
				594	p = SkipWhiteSpace( p, encoding );
				595	while ( p && *p
				596	&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
				597	{
				598	if ( p == '\r' \|\| p == '\n' )
				599	{
				600	whitespace = true;
				601	++p;
				602	}
				603	else if ( IsWhiteSpace( *p ) )
				604	{
				605	whitespace = true;
				606	++p;
				607	}
				608	else
				609	{
				610	// If we've found whitespace, add it before the
				611	// new character. Any whitespace just becomes a space.
				612	if ( whitespace )
				613	{
				614	(*text) += ' ';
				615	whitespace = false;
				616	}
				617	int len;
				618	char cArr[4] = { 0, 0, 0, 0 };
				619	p = GetChar( p, cArr, &len, encoding );
				620	if ( len == 1 )
				621	(*text) += cArr[0]; // more efficient
				622	else
				623	text->append( cArr, len );
				624	}
				625	}
				626	}
				627	return p + strlen( endTag );
				628	}
				629
				630	#ifdef TIXML_USE_STL
				631
				632	void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
				633	{
				634	// The basic issue with a document is that we don't know what we're
				635	// streaming. Read something presumed to be a tag (and hope), then
				636	// identify it, and call the appropriate stream method on the tag.
				637	//
				638	// This "pre-streaming" will never read the closing ">" so the
				639	// sub-tag can orient itself.
				640
				641	if ( !StreamTo( in, '<', tag ) )
				642	{
				643	SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
				644	return;
				645	}
				646
				647	while ( in->good() )
				648	{
				649	int tagIndex = (int) tag->length();
				650	while ( in->good() && in->peek() != '>' )
				651	{
				652	int c = in->get();
				653	if ( c <= 0 )
				654	{
				655	SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				656	break;
				657	}
				658	(*tag) += (char) c;
				659	}
				660
				661	if ( in->good() )
				662	{
				663	// We now have something we presume to be a node of
				664	// some sort. Identify it, and call the node to
				665	// continue streaming.
				666	TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
				667
				668	if ( node )
				669	{
				670	node->StreamIn( in, tag );
				671	bool isElement = node->ToElement() != 0;
				672	delete node;
				673	node = 0;
				674
				675	// If this is the root element, we're done. Parsing will be
				676	// done by the >> operator.
				677	if ( isElement )
				678	{
				679	return;
				680	}
				681	}
				682	else
				683	{
				684	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
				685	return;
				686	}
				687	}
				688	}
				689	// We should have returned sooner.
				690	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
				691	}
				692
				693	#endif
				694
				695	const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
				696	{
				697	ClearError();
				698
				699	// Parse away, at the document level. Since a document
				700	// contains nothing but other tags, most of what happens
				701	// here is skipping white space.
				702	if ( !p \|\| !*p )
				703	{
				704	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
				705	return 0;
				706	}
				707
				708	// Note that, for a document, this needs to come
				709	// before the while space skip, so that parsing
				710	// starts from the pointer we are given.
				711	location.Clear();
				712	if ( prevData )
				713	{
				714	location.row = prevData->cursor.row;
				715	location.col = prevData->cursor.col;
				716	}
				717	else
				718	{
				719	location.row = 0;
				720	location.col = 0;
				721	}
				722	TiXmlParsingData data( p, TabSize(), location.row, location.col );
				723	location = data.Cursor();
				724
				725	if ( encoding == TIXML_ENCODING_UNKNOWN )
				726	{
				727	// Check for the Microsoft UTF-8 lead bytes.
				728	const unsigned char* pU = (const unsigned char*)p;
				729	if ( (pU+0) && (pU+0) == TIXML_UTF_LEAD_0
				730	&& (pU+1) && (pU+1) == TIXML_UTF_LEAD_1
				731	&& (pU+2) && (pU+2) == TIXML_UTF_LEAD_2 )
				732	{
				733	encoding = TIXML_ENCODING_UTF8;
				734	useMicrosoftBOM = true;
				735	}
				736	}
				737
				738	p = SkipWhiteSpace( p, encoding );
				739	if ( !p )
				740	{
				741	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
				742	return 0;
				743	}
				744
				745	while ( p && *p )
				746	{
				747	TiXmlNode* node = Identify( p, encoding );
				748	if ( node )
				749	{
				750	p = node->Parse( p, &data, encoding );
				751	LinkEndChild( node );
				752	}
				753	else
				754	{
				755	break;
				756	}
				757
				758	// Did we get encoding info?
				759	if ( encoding == TIXML_ENCODING_UNKNOWN
				760	&& node->ToDeclaration() )
				761	{
				762	TiXmlDeclaration* dec = node->ToDeclaration();
				763	const char* enc = dec->Encoding();
				764	assert( enc );
				765
				766	if ( *enc == 0 )
				767	encoding = TIXML_ENCODING_UTF8;
				768	else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
				769	encoding = TIXML_ENCODING_UTF8;
				770	else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
				771	encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
				772	else
				773	encoding = TIXML_ENCODING_LEGACY;
				774	}
				775
				776	p = SkipWhiteSpace( p, encoding );
				777	}
				778
				779	// Was this empty?
				780	if ( !firstChild ) {
				781	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
				782	return 0;
				783	}
				784
				785	// All is well.
				786	return p;
				787	}
				788
				789	void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
				790	{
				791	// The first error in a chain is more accurate - don't set again!
				792	if ( error )
				793	return;
				794
				795	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
				796	error = true;
				797	errorId = err;
				798	errorDesc = errorString[ errorId ];
				799
				800	errorLocation.Clear();
				801	if ( pError && data )
				802	{
				803	data->Stamp( pError, encoding );
				804	errorLocation = data->Cursor();
				805	}
				806	}
				807
				808
				809	TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
				810	{
				811	TiXmlNode* returnNode = 0;
				812
				813	p = SkipWhiteSpace( p, encoding );
				814	if( !p \|\| !p \|\| p != '<' )
				815	{
				816	return 0;
				817	}
				818
				819	TiXmlDocument* doc = GetDocument();
				820	p = SkipWhiteSpace( p, encoding );
				821
				822	if ( !p \|\| !*p )
				823	{
				824	return 0;
				825	}
				826
				827	// What is this thing?
				828	// - Elements start with a letter or underscore, but xml is reserved.
				829	// - Comments: <!--
				830	// - Decleration: <?xml
				831	// - Everthing else is unknown to tinyxml.
				832	//
				833
				834	const char* xmlHeader = { "<?xml" };
				835	const char* commentHeader = { "<!--" };
				836	const char* dtdHeader = { "<!" };
				837	const char* cdataHeader = { "<![CDATA[" };
				838
				839	if ( StringEqual( p, xmlHeader, true, encoding ) )
				840	{
				841	#ifdef DEBUG_PARSER
				842	TIXML_LOG( "XML parsing Declaration\n" );
				843	#endif
				844	returnNode = new TiXmlDeclaration();
				845	}
				846	else if ( StringEqual( p, commentHeader, false, encoding ) )
				847	{
				848	#ifdef DEBUG_PARSER
				849	TIXML_LOG( "XML parsing Comment\n" );
				850	#endif
				851	returnNode = new TiXmlComment();
				852	}
				853	else if ( StringEqual( p, cdataHeader, false, encoding ) )
				854	{
				855	#ifdef DEBUG_PARSER
				856	TIXML_LOG( "XML parsing CDATA\n" );
				857	#endif
				858	TiXmlText* text = new TiXmlText( "" );
				859	text->SetCDATA( true );
				860	returnNode = text;
				861	}
				862	else if ( StringEqual( p, dtdHeader, false, encoding ) )
				863	{
				864	#ifdef DEBUG_PARSER
				865	TIXML_LOG( "XML parsing Unknown(1)\n" );
				866	#endif
				867	returnNode = new TiXmlUnknown();
				868	}
				869	else if ( IsAlpha( *(p+1), encoding )
				870	\|\| *(p+1) == '_' )
				871	{
				872	#ifdef DEBUG_PARSER
				873	TIXML_LOG( "XML parsing Element\n" );
				874	#endif
				875	returnNode = new TiXmlElement( "" );
				876	}
				877	else
				878	{
				879	#ifdef DEBUG_PARSER
				880	TIXML_LOG( "XML parsing Unknown(2)\n" );
				881	#endif
				882	returnNode = new TiXmlUnknown();
				883	}
				884
				885	if ( returnNode )
				886	{
				887	// Set the parent, so it can report errors
				888	returnNode->parent = this;
				889	}
				890	else
				891	{
				892	if ( doc )
				893	doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
				894	}
				895	return returnNode;
				896	}
				897
				898	#ifdef TIXML_USE_STL
				899
				900	void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
				901	{
				902	// We're called with some amount of pre-parsing. That is, some of "this"
				903	// element is in "tag". Go ahead and stream to the closing ">"
				904	while( in->good() )
				905	{
				906	int c = in->get();
				907	if ( c <= 0 )
				908	{
				909	TiXmlDocument* document = GetDocument();
				910	if ( document )
				911	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				912	return;
				913	}
				914	(*tag) += (char) c ;
				915
				916	if ( c == '>' )
				917	break;
				918	}
				919
				920	if ( tag->length() < 3 ) return;
				921
				922	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
				923	// If not, identify and stream.
				924
				925	if ( tag->at( tag->length() - 1 ) == '>'
				926	&& tag->at( tag->length() - 2 ) == '/' )
				927	{
				928	// All good!
				929	return;
				930	}
				931	else if ( tag->at( tag->length() - 1 ) == '>' )
				932	{
				933	// There is more. Could be:
				934	// text
				935	// closing tag
				936	// another node.
				937	for ( ;; )
				938	{
				939	StreamWhiteSpace( in, tag );
				940
				941	// Do we have text?
				942	if ( in->good() && in->peek() != '<' )
				943	{
				944	// Yep, text.
				945	TiXmlText text( "" );
				946	text.StreamIn( in, tag );
				947
				948	// What follows text is a closing tag or another node.
				949	// Go around again and figure it out.
				950	continue;
				951	}
				952
				953	// We now have either a closing tag...or another node.
				954	// We should be at a "<", regardless.
				955	if ( !in->good() ) return;
				956	assert( in->peek() == '<' );
				957	int tagIndex = (int) tag->length();
				958
				959	bool closingTag = false;
				960	bool firstCharFound = false;
				961
				962	for( ;; )
				963	{
				964	if ( !in->good() )
				965	return;
				966
				967	int c = in->peek();
				968	if ( c <= 0 )
				969	{
				970	TiXmlDocument* document = GetDocument();
				971	if ( document )
				972	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				973	return;
				974	}
				975
				976	if ( c == '>' )
				977	break;
				978
				979	*tag += (char) c;
				980	in->get();
				981
				982	if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
				983	{
				984	firstCharFound = true;
				985	if ( c == '/' )
				986	closingTag = true;
				987	}
				988	}
				989	// If it was a closing tag, then read in the closing '>' to clean up the input stream.
				990	// If it was not, the streaming will be done by the tag.
				991	if ( closingTag )
				992	{
				993	if ( !in->good() )
				994	return;
				995
				996	int c = in->get();
				997	if ( c <= 0 )
				998	{
				999	TiXmlDocument* document = GetDocument();
				1000	if ( document )
				1001	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				1002	return;
				1003	}
				1004	assert( c == '>' );
				1005	*tag += (char) c;
				1006
				1007	// We are done, once we've found our closing tag.
				1008	return;
				1009	}
				1010	else
				1011	{
				1012	// If not a closing tag, id it, and stream.
				1013	const char* tagloc = tag->c_str() + tagIndex;
				1014	TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
				1015	if ( !node )
				1016	return;
				1017	node->StreamIn( in, tag );
				1018	delete node;
				1019	node = 0;
				1020
				1021	// No return: go around from the beginning: text, closing tag, or node.
				1022	}
				1023	}
				1024	}
				1025	}
				1026	#endif
				1027
				1028	const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
				1029	{
				1030	p = SkipWhiteSpace( p, encoding );
				1031	TiXmlDocument* document = GetDocument();
				1032
				1033	if ( !p \|\| !*p )
				1034	{
				1035	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
				1036	return 0;
				1037	}
				1038
				1039	if ( data )
				1040	{
				1041	data->Stamp( p, encoding );
				1042	location = data->Cursor();
				1043	}
				1044
				1045	if ( *p != '<' )
				1046	{
				1047	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
				1048	return 0;
				1049	}
				1050
				1051	p = SkipWhiteSpace( p+1, encoding );
				1052
				1053	// Read the name.
				1054	const char* pErr = p;
				1055
				1056	p = ReadName( p, &value, encoding );
				1057	if ( !p \|\| !*p )
				1058	{
				1059	if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
				1060	return 0;
				1061	}
				1062
				1063	TIXML_STRING endTag ("</");
				1064	endTag += value;
				1065	endTag += ">";
				1066
				1067	// Check for and read attributes. Also look for an empty
				1068	// tag or an end tag.
				1069	while ( p && *p )
				1070	{
				1071	pErr = p;
				1072	p = SkipWhiteSpace( p, encoding );
				1073	if ( !p \|\| !*p )
				1074	{
				1075	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
				1076	return 0;
				1077	}
				1078	if ( *p == '/' )
				1079	{
				1080	++p;
				1081	// Empty tag.
				1082	if ( *p != '>' )
				1083	{
				1084	if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
				1085	return 0;
				1086	}
				1087	return (p+1);
				1088	}
				1089	else if ( *p == '>' )
				1090	{
				1091	// Done with attributes (if there were any.)
				1092	// Read the value -- which can include other
				1093	// elements -- read the end tag, and return.
				1094	++p;
				1095	p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
				1096	if ( !p \|\| !*p )
				1097	return 0;
				1098
				1099	// We should find the end tag now
				1100	if ( StringEqual( p, endTag.c_str(), false, encoding ) )
				1101	{
				1102	p += endTag.length();
				1103	return p;
				1104	}
				1105	else
				1106	{
				1107	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
				1108	return 0;
				1109	}
				1110	}
				1111	else
				1112	{
				1113	// Try to read an attribute:
				1114	TiXmlAttribute* attrib = new TiXmlAttribute();
				1115	if ( !attrib )
				1116	{
				1117	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
				1118	return 0;
				1119	}
				1120
				1121	attrib->SetDocument( document );
				1122	const char* pErr = p;
				1123	p = attrib->Parse( p, data, encoding );
				1124
				1125	if ( !p \|\| !*p )
				1126	{
				1127	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
				1128	delete attrib;
				1129	return 0;
				1130	}
				1131
				1132	// Handle the strange case of double attributes:
Karsten Tausche	d143821	2022-10-06 15:20:50 +0200	[diff] [blame]	1133	TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
Upstream	bc0ee9a	1970-01-12 13:46:40 +0000	[diff] [blame]	1134	if ( node )
				1135	{
				1136	node->SetValue( attrib->Value() );
				1137	delete attrib;
				1138	return 0;
				1139	}
				1140
				1141	attributeSet.Add( attrib );
				1142	}
				1143	}
				1144	return p;
				1145	}
				1146
				1147
				1148	const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
				1149	{
				1150	TiXmlDocument* document = GetDocument();
				1151
				1152	// Read in text and elements in any order.
				1153	const char* pWithWhiteSpace = p;
				1154	p = SkipWhiteSpace( p, encoding );
				1155
				1156	while ( p && *p )
				1157	{
				1158	if ( *p != '<' )
				1159	{
				1160	// Take what we have, make a text element.
				1161	TiXmlText* textNode = new TiXmlText( "" );
				1162
				1163	if ( !textNode )
				1164	{
				1165	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
				1166	return 0;
				1167	}
				1168
				1169	if ( TiXmlBase::IsWhiteSpaceCondensed() )
				1170	{
				1171	p = textNode->Parse( p, data, encoding );
				1172	}
				1173	else
				1174	{
				1175	// Special case: we want to keep the white space
				1176	// so that leading spaces aren't removed.
				1177	p = textNode->Parse( pWithWhiteSpace, data, encoding );
				1178	}
				1179
				1180	if ( !textNode->Blank() )
				1181	LinkEndChild( textNode );
				1182	else
				1183	delete textNode;
				1184	}
				1185	else
				1186	{
				1187	// We hit a '<'
				1188	// Have we hit a new element or an end tag? This could also be
				1189	// a TiXmlText in the "CDATA" style.
				1190	if ( StringEqual( p, "</", false, encoding ) )
				1191	{
				1192	return p;
				1193	}
				1194	else
				1195	{
				1196	TiXmlNode* node = Identify( p, encoding );
				1197	if ( node )
				1198	{
				1199	p = node->Parse( p, data, encoding );
				1200	LinkEndChild( node );
				1201	}
				1202	else
				1203	{
				1204	return 0;
				1205	}
				1206	}
				1207	}
				1208	pWithWhiteSpace = p;
				1209	p = SkipWhiteSpace( p, encoding );
				1210	}
				1211
				1212	if ( !p )
				1213	{
				1214	if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
				1215	}
				1216	return p;
				1217	}
				1218
				1219
				1220	#ifdef TIXML_USE_STL
				1221	void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
				1222	{
				1223	while ( in->good() )
				1224	{
				1225	int c = in->get();
				1226	if ( c <= 0 )
				1227	{
				1228	TiXmlDocument* document = GetDocument();
				1229	if ( document )
				1230	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				1231	return;
				1232	}
				1233	(*tag) += (char) c;
				1234
				1235	if ( c == '>' )
				1236	{
				1237	// All is well.
				1238	return;
				1239	}
				1240	}
				1241	}
				1242	#endif
				1243
				1244
				1245	const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
				1246	{
				1247	TiXmlDocument* document = GetDocument();
				1248	p = SkipWhiteSpace( p, encoding );
				1249
				1250	if ( data )
				1251	{
				1252	data->Stamp( p, encoding );
				1253	location = data->Cursor();
				1254	}
				1255	if ( !p \|\| !p \|\| p != '<' )
				1256	{
				1257	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
				1258	return 0;
				1259	}
				1260	++p;
				1261	value = "";
				1262
				1263	while ( p && p && p != '>' )
				1264	{
				1265	value += *p;
				1266	++p;
				1267	}
				1268
				1269	if ( !p )
				1270	{
				1271	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
				1272	}
				1273	if ( *p == '>' )
				1274	return p+1;
				1275	return p;
				1276	}
				1277
				1278	#ifdef TIXML_USE_STL
				1279	void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
				1280	{
				1281	while ( in->good() )
				1282	{
				1283	int c = in->get();
				1284	if ( c <= 0 )
				1285	{
				1286	TiXmlDocument* document = GetDocument();
				1287	if ( document )
				1288	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				1289	return;
				1290	}
				1291
				1292	(*tag) += (char) c;
				1293
				1294	if ( c == '>'
				1295	&& tag->at( tag->length() - 2 ) == '-'
				1296	&& tag->at( tag->length() - 3 ) == '-' )
				1297	{
				1298	// All is well.
				1299	return;
				1300	}
				1301	}
				1302	}
				1303	#endif
				1304
				1305
				1306	const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
				1307	{
				1308	TiXmlDocument* document = GetDocument();
				1309	value = "";
				1310
				1311	p = SkipWhiteSpace( p, encoding );
				1312
				1313	if ( data )
				1314	{
				1315	data->Stamp( p, encoding );
				1316	location = data->Cursor();
				1317	}
				1318	const char* startTag = "<!--";
				1319	const char* endTag = "-->";
				1320
				1321	if ( !StringEqual( p, startTag, false, encoding ) )
				1322	{
				1323	document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
				1324	return 0;
				1325	}
				1326	p += strlen( startTag );
				1327	p = ReadText( p, &value, false, endTag, false, encoding );
				1328	return p;
				1329	}
				1330
				1331
				1332	const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
				1333	{
				1334	p = SkipWhiteSpace( p, encoding );
				1335	if ( !p \|\| !*p ) return 0;
				1336
				1337	int tabsize = 4;
				1338	if ( document )
				1339	tabsize = document->TabSize();
				1340
				1341	if ( data )
				1342	{
				1343	data->Stamp( p, encoding );
				1344	location = data->Cursor();
				1345	}
				1346	// Read the name, the '=' and the value.
				1347	const char* pErr = p;
				1348	p = ReadName( p, &name, encoding );
				1349	if ( !p \|\| !*p )
				1350	{
				1351	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
				1352	return 0;
				1353	}
				1354	p = SkipWhiteSpace( p, encoding );
				1355	if ( !p \|\| !p \|\| p != '=' )
				1356	{
				1357	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
				1358	return 0;
				1359	}
				1360
				1361	++p; // skip '='
				1362	p = SkipWhiteSpace( p, encoding );
				1363	if ( !p \|\| !*p )
				1364	{
				1365	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
				1366	return 0;
				1367	}
				1368
				1369	const char* end;
				1370
				1371	if ( *p == '\'' )
				1372	{
				1373	++p;
				1374	end = "\'";
				1375	p = ReadText( p, &value, false, end, false, encoding );
				1376	}
				1377	else if ( *p == '"' )
				1378	{
				1379	++p;
				1380	end = "\"";
				1381	p = ReadText( p, &value, false, end, false, encoding );
				1382	}
				1383	else
				1384	{
				1385	// All attribute values should be in single or double quotes.
				1386	// But this is such a common error that the parser will try
				1387	// its best, even without them.
				1388	value = "";
				1389	while ( p && *p // existence
				1390	&& !IsWhiteSpace( p ) && p != '\n' && *p != '\r' // whitespace
				1391	&& p != '/' && p != '>' ) // tag end
				1392	{
				1393	value += *p;
				1394	++p;
				1395	}
				1396	}
				1397	return p;
				1398	}
				1399
				1400	#ifdef TIXML_USE_STL
				1401	void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
				1402	{
				1403	if ( cdata )
				1404	{
				1405	int c = in->get();
				1406	if ( c <= 0 )
				1407	{
				1408	TiXmlDocument* document = GetDocument();
				1409	if ( document )
				1410	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				1411	return;
				1412	}
				1413
				1414	(*tag) += (char) c;
				1415
				1416	if ( c == '>'
				1417	&& tag->at( tag->length() - 2 ) == ']'
				1418	&& tag->at( tag->length() - 3 ) == ']' )
				1419	{
				1420	// All is well.
				1421	return;
				1422	}
				1423	}
				1424	else
				1425	{
				1426	while ( in->good() )
				1427	{
				1428	int c = in->peek();
				1429	if ( c == '<' )
				1430	return;
				1431	if ( c <= 0 )
				1432	{
				1433	TiXmlDocument* document = GetDocument();
				1434	if ( document )
				1435	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				1436	return;
				1437	}
				1438
				1439	(*tag) += (char) c;
				1440	in->get();
				1441	}
				1442	}
				1443	}
				1444	#endif
				1445
				1446	const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
				1447	{
				1448	value = "";
				1449	TiXmlDocument* document = GetDocument();
				1450
				1451	if ( data )
				1452	{
				1453	data->Stamp( p, encoding );
				1454	location = data->Cursor();
				1455	}
				1456
				1457	const char* const startTag = "<![CDATA[";
				1458	const char* const endTag = "]]>";
				1459
				1460	if ( cdata \|\| StringEqual( p, startTag, false, encoding ) )
				1461	{
				1462	cdata = true;
				1463
				1464	if ( !StringEqual( p, startTag, false, encoding ) )
				1465	{
				1466	document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
				1467	return 0;
				1468	}
				1469	p += strlen( startTag );
				1470
				1471	// Keep all the white space, ignore the encoding, etc.
				1472	while ( p && *p
				1473	&& !StringEqual( p, endTag, false, encoding )
				1474	)
				1475	{
				1476	value += *p;
				1477	++p;
				1478	}
				1479
				1480	TIXML_STRING dummy;
				1481	p = ReadText( p, &dummy, false, endTag, false, encoding );
				1482	return p;
				1483	}
				1484	else
				1485	{
				1486	bool ignoreWhite = true;
				1487
				1488	const char* end = "<";
				1489	p = ReadText( p, &value, ignoreWhite, end, false, encoding );
				1490	if ( p )
				1491	return p-1; // don't truncate the '<'
				1492	return 0;
				1493	}
				1494	}
				1495
				1496	#ifdef TIXML_USE_STL
				1497	void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
				1498	{
				1499	while ( in->good() )
				1500	{
				1501	int c = in->get();
				1502	if ( c <= 0 )
				1503	{
				1504	TiXmlDocument* document = GetDocument();
				1505	if ( document )
				1506	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
				1507	return;
				1508	}
				1509	(*tag) += (char) c;
				1510
				1511	if ( c == '>' )
				1512	{
				1513	// All is well.
				1514	return;
				1515	}
				1516	}
				1517	}
				1518	#endif
				1519
				1520	const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
				1521	{
				1522	p = SkipWhiteSpace( p, _encoding );
				1523	// Find the beginning, find the end, and look for
				1524	// the stuff in-between.
				1525	TiXmlDocument* document = GetDocument();
				1526	if ( !p \|\| !*p \|\| !StringEqual( p, "<?xml", true, _encoding ) )
				1527	{
				1528	if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
				1529	return 0;
				1530	}
				1531	if ( data )
				1532	{
				1533	data->Stamp( p, _encoding );
				1534	location = data->Cursor();
				1535	}
				1536	p += 5;
				1537
				1538	version = "";
				1539	encoding = "";
				1540	standalone = "";
				1541
				1542	while ( p && *p )
				1543	{
				1544	if ( *p == '>' )
				1545	{
				1546	++p;
				1547	return p;
				1548	}
				1549
				1550	p = SkipWhiteSpace( p, _encoding );
				1551	if ( StringEqual( p, "version", true, _encoding ) )
				1552	{
				1553	TiXmlAttribute attrib;
				1554	p = attrib.Parse( p, data, _encoding );
				1555	version = attrib.Value();
				1556	}
				1557	else if ( StringEqual( p, "encoding", true, _encoding ) )
				1558	{
				1559	TiXmlAttribute attrib;
				1560	p = attrib.Parse( p, data, _encoding );
				1561	encoding = attrib.Value();
				1562	}
				1563	else if ( StringEqual( p, "standalone", true, _encoding ) )
				1564	{
				1565	TiXmlAttribute attrib;
				1566	p = attrib.Parse( p, data, _encoding );
				1567	standalone = attrib.Value();
				1568	}
				1569	else
				1570	{
				1571	// Read over whatever it is.
				1572	while( p && p && p != '>' && !IsWhiteSpace( *p ) )
				1573	++p;
				1574	}
				1575	}
				1576	return 0;
				1577	}
				1578
				1579	bool TiXmlText::Blank() const
				1580	{
				1581	for ( unsigned i=0; i<value.length(); i++ )
				1582	if ( !IsWhiteSpace( value[i] ) )
				1583	return false;
				1584	return true;
				1585	}
				1586