new element loop

commit: 46a14cfec746e22b9dd540cad2e06f0e82ab054d [log] [tgz]
author: Lee Thomason (grinliz) <leethomason@gmail.com> Thu Feb 23 22:27:28 2012 -0800
committer: Lee Thomason (grinliz) <leethomason@gmail.com> Thu Feb 23 22:27:28 2012 -0800
tree: 316fde0716dbab658c769ab6110e111a0c1abc0f
parent: d627776dd387cd1d578b64ceb458472eb57a144a [diff] [blame]
diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index 1d68cb0..51cf795 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp

@@ -18,17 +18,29 @@
 static const char SINGLE_QUOTE			= '\'';

 static const char DOUBLE_QUOTE			= '\"';

 

-// Bunch of unicode info at:
-//		http://www.unicode.org/faq/utf_bom.html
-//	ef bb bf (Microsoft "lead bytes") - designates UTF-8
-
-static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
-static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
-static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
+// Bunch of unicode info at:

+//		http://www.unicode.org/faq/utf_bom.html

+//	ef bb bf (Microsoft "lead bytes") - designates UTF-8

+

+static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;

+static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;

+static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;

 

 

-#define DELETE_NODE( node ) { MemPool* pool = node->memPool; node->~XMLNode(); pool->Free( node ); }

-#define DELETE_ATTRIBUTE( attrib ) { MemPool* pool = attrib->memPool; attrib->~XMLAttribute(); pool->Free( attrib ); }

+#define DELETE_NODE( node )	{			\

+	if ( node ) {						\

+		MemPool* pool = node->memPool;	\

+		node->~XMLNode();				\

+		pool->Free( node );				\

+	}									\

+}

+#define DELETE_ATTRIBUTE( attrib ) {		\

+	if ( attrib ) {							\

+		MemPool* pool = attrib->memPool;	\

+		attrib->~XMLAttribute();			\

+		pool->Free( attrib );				\

+	}										\

+}

 

 struct Entity {

 	const char* pattern;

@@ -229,114 +241,114 @@
 }

 

 

-void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
-{
-	const unsigned long BYTE_MASK = 0xBF;
-	const unsigned long BYTE_MARK = 0x80;
-	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
-	if (input < 0x80) 
-		*length = 1;
-	else if ( input < 0x800 )
-		*length = 2;
-	else if ( input < 0x10000 )
-		*length = 3;
-	else if ( input < 0x200000 )
-		*length = 4;
-	else
-		{ *length = 0; return; }	// This code won't covert this correctly anyway.
-
-	output += *length;
-
-	// Scary scary fall throughs.
-	switch (*length) 
-	{
-		case 4:
-			--output; 
-			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
-			input >>= 6;
-		case 3:
-			--output; 
-			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
-			input >>= 6;
-		case 2:
-			--output; 
-			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
-			input >>= 6;
-		case 1:
-			--output; 
-			*output = (char)(input | FIRST_BYTE_MARK[*length]);
-	}
-}
-
-
-const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
-{
-	// Presume an entity, and pull it out.
-	*length = 0;
-
-	if ( *(p+1) == '#' && *(p+2) )
-	{
-		unsigned long ucs = 0;
-		ptrdiff_t delta = 0;
-		unsigned mult = 1;
-
-		if ( *(p+2) == 'x' )
-		{
-			// Hexadecimal.
-			if ( !*(p+3) ) return 0;
-
-			const char* q = p+3;
-			q = strchr( q, ';' );
-
-			if ( !q || !*q ) return 0;
-
-			delta = q-p;
-			--q;
-
-			while ( *q != 'x' )
-			{
-				if ( *q >= '0' && *q <= '9' )
-					ucs += mult * (*q - '0');
-				else if ( *q >= 'a' && *q <= 'f' )
-					ucs += mult * (*q - 'a' + 10);
-				else if ( *q >= 'A' && *q <= 'F' )
-					ucs += mult * (*q - 'A' + 10 );
-				else 
-					return 0;
-				mult *= 16;
-				--q;
-			}
-		}
-		else
-		{
-			// Decimal.
-			if ( !*(p+2) ) return 0;
-
-			const char* q = p+2;
-			q = strchr( q, ';' );
-
-			if ( !q || !*q ) return 0;
-
-			delta = q-p;
-			--q;
-
-			while ( *q != '#' )
-			{
-				if ( *q >= '0' && *q <= '9' )
-					ucs += mult * (*q - '0');
-				else 
-					return 0;
-				mult *= 10;
-				--q;
-			}
-		}
-		// convert the UCS to UTF-8
-		ConvertUTF32ToUTF8( ucs, value, length );
-		return p + delta + 1;
-	}
-	return p+1;
-}
+void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )

+{

+	const unsigned long BYTE_MASK = 0xBF;

+	const unsigned long BYTE_MARK = 0x80;

+	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

+

+	if (input < 0x80) 

+		*length = 1;

+	else if ( input < 0x800 )

+		*length = 2;

+	else if ( input < 0x10000 )

+		*length = 3;

+	else if ( input < 0x200000 )

+		*length = 4;

+	else

+		{ *length = 0; return; }	// This code won't covert this correctly anyway.

+

+	output += *length;

+

+	// Scary scary fall throughs.

+	switch (*length) 

+	{

+		case 4:

+			--output; 

+			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 

+			input >>= 6;

+		case 3:

+			--output; 

+			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 

+			input >>= 6;

+		case 2:

+			--output; 

+			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 

+			input >>= 6;

+		case 1:

+			--output; 

+			*output = (char)(input | FIRST_BYTE_MARK[*length]);

+	}

+}

+

+

+const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )

+{

+	// Presume an entity, and pull it out.

+	*length = 0;

+

+	if ( *(p+1) == '#' && *(p+2) )

+	{

+		unsigned long ucs = 0;

+		ptrdiff_t delta = 0;

+		unsigned mult = 1;

+

+		if ( *(p+2) == 'x' )

+		{

+			// Hexadecimal.

+			if ( !*(p+3) ) return 0;

+

+			const char* q = p+3;

+			q = strchr( q, ';' );

+

+			if ( !q || !*q ) return 0;

+

+			delta = q-p;

+			--q;

+

+			while ( *q != 'x' )

+			{

+				if ( *q >= '0' && *q <= '9' )

+					ucs += mult * (*q - '0');

+				else if ( *q >= 'a' && *q <= 'f' )

+					ucs += mult * (*q - 'a' + 10);

+				else if ( *q >= 'A' && *q <= 'F' )

+					ucs += mult * (*q - 'A' + 10 );

+				else 

+					return 0;

+				mult *= 16;

+				--q;

+			}

+		}

+		else

+		{

+			// Decimal.

+			if ( !*(p+2) ) return 0;

+

+			const char* q = p+2;

+			q = strchr( q, ';' );

+

+			if ( !q || !*q ) return 0;

+

+			delta = q-p;

+			--q;

+

+			while ( *q != '#' )

+			{

+				if ( *q >= '0' && *q <= '9' )

+					ucs += mult * (*q - '0');

+				else 

+					return 0;

+				mult *= 10;

+				--q;

+			}

+		}

+		// convert the UCS to UTF-8

+		ConvertUTF32ToUTF8( ucs, value, length );

+		return p + delta + 1;

+	}

+	return p+1;

+}

 

 

 char* XMLDocument::Identify( char* p, XMLNode** node ) 

@@ -397,6 +409,11 @@
 		returnNode = new (elementPool.Alloc()) XMLElement( this );

 		returnNode->memPool = &elementPool;

 		p += elementHeaderLen;

+

+		p = XMLUtil::SkipWhiteSpace( p );

+		if ( p && *p == '/' ) {

+			((XMLElement*)returnNode)->closingType = XMLElement::CLOSING;

+		}

 	}

 	else {

 		returnNode = new (textPool.Alloc()) XMLText( this );

@@ -587,20 +604,75 @@
 

 char* XMLNode::ParseDeep( char* p )

 {

+	// This is a recursive method, but thinking about it "at the current level"

+	// it is a pretty simple flat list:

+	//		<foo/>

+	//		<!-- comment -->

+	//

+	// With a special case:

+	//		<foo>

+	//		</foo>

+	//		<!-- comment -->

+	//		

+	// Where the closing element (/foo) *must* be the next thing after the opening

+	// element, and the names must match. BUT the tricky bit is that the closing

+	// element will be read by the child.

+

 	while( p && *p ) {

 		XMLNode* node = 0;

-		p = document->Identify( p, &node );

-		if ( p && node ) {

-			p = node->ParseDeep( p );

+		char* mark = p;

 

-			if ( node->IsClosingElement() ) {

-				if ( !XMLUtil::StringEqual( Value(), node->Value() )) {

-					document->SetError( ERROR_MISMATCHED_ELEMENT, Value(), 0 );

-				}

+		p = document->Identify( p, &node );

+		if ( p == 0 ) {

+			break;

+		}

+

+		// We read the end tag. Back up and return.

+		if ( node && node->ToElement() && node->ToElement()->ClosingType() == XMLElement::CLOSING ) {

+			DELETE_NODE( node );

+			return mark;

+		}

+

+		if ( node ) {

+			p = node->ParseDeep( p );

+			if ( !p ) {

 				DELETE_NODE( node );

-				return p;

+				node = 0;

+				break;

 			}

-			this->InsertEndChild( node );

+	

+			XMLElement* ele = node->ToElement();

+			if ( ele && ele->ClosingType() == XMLElement::OPEN ) {

+				XMLNode* closingNode = 0;

+				p = document->Identify( p, &closingNode );

+				XMLElement* closingEle = closingNode ? closingNode->ToElement() : 0;

+

+				if ( closingEle == 0 ) {

+					document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );

+					p = 0;

+				}

+				else if ( closingEle->ClosingType() != XMLElement::CLOSING ) {

+					document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );

+					p = 0;

+				}

+				else 

+				{

+					p = closingEle->ParseDeep( p );

+					if ( !XMLUtil::StringEqual( closingEle->Value(), node->Value() )) { 

+						document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );

+						p = 0;

+					}

+				}

+				// Else everything is fine, but we need to throw away the node.

+				DELETE_NODE( closingNode );

+				if ( p == 0 ) {

+					DELETE_NODE( node );

+					node = 0;

+				}

+			}

+			if ( node ) {

+				this->InsertEndChild( node );

+			}

 		}

 	}

 	return 0;

@@ -736,7 +808,7 @@
 	char endTag[2] = { *p, 0 };

 	++p;

 	p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE );

-	if ( value.Empty() ) return 0;

+	//if ( value.Empty() ) return 0;

 	return p;

 }

 

@@ -842,9 +914,8 @@
 

 // --------- XMLElement ---------- //

 XMLElement::XMLElement( XMLDocument* doc ) : XMLNode( doc ),

-	closing( false ),

+	closingType( 0 ),

 	rootAttribute( 0 )

-	//lastAttribute( 0 )

 {

 }

 

@@ -937,10 +1008,9 @@
 }

 

 

-char* XMLElement::ParseAttributes( char* p, bool* closedElement )

+char* XMLElement::ParseAttributes( char* p )

 {

 	const char* start = p;

-	*closedElement = false;

 

 	// Read the attributes.

 	while( p ) {

@@ -965,11 +1035,7 @@
 		}

 		// end of the tag

 		else if ( *p == '/' && *(p+1) == '>' ) {

-			if ( closing ) {

-				document->SetError( ERROR_PARSING_ELEMENT, start, p );

-				return 0;

-			}

-			*closedElement = true;

+			closingType = CLOSED;

 			return p+2;	// done; sealed element.

 		}

 		// end of the tag

@@ -1001,7 +1067,7 @@
 	// parsed just like a regular element then deleted from

 	// the DOM.

 	if ( *p == '/' ) {

-		closing = true;

+		closingType = CLOSING;

 		++p;

 	}

 

@@ -1009,8 +1075,8 @@
 	if ( value.Empty() ) return 0;

 

 	bool elementClosed=false;

-	p = ParseAttributes( p, &elementClosed );

-	if ( !p || !*p || elementClosed || closing ) 

+	p = ParseAttributes( p );

+	if ( !p || !*p || closingType ) 

 		return p;

 

 	p = XMLNode::ParseDeep( p );
commit	46a14cfec746e22b9dd540cad2e06f0e82ab054d	[log] [tgz]
author	Lee Thomason (grinliz) <leethomason@gmail.com>	Thu Feb 23 22:27:28 2012 -0800
committer	Lee Thomason (grinliz) <leethomason@gmail.com>	Thu Feb 23 22:27:28 2012 -0800
tree	316fde0716dbab658c769ab6110e111a0c1abc0f
parent	d627776dd387cd1d578b64ceb458472eb57a144a [diff] [blame]