new element loop
diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index 1d68cb0..51cf795 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -18,17 +18,29 @@
static const char SINGLE_QUOTE = '\'';
static const char DOUBLE_QUOTE = '\"';
-// Bunch of unicode info at:
-// http://www.unicode.org/faq/utf_bom.html
-// ef bb bf (Microsoft "lead bytes") - designates UTF-8
-
-static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
-static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
-static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
+// Bunch of unicode info at:
+// http://www.unicode.org/faq/utf_bom.html
+// ef bb bf (Microsoft "lead bytes") - designates UTF-8
+
+static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
+static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
+static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
-#define DELETE_NODE( node ) { MemPool* pool = node->memPool; node->~XMLNode(); pool->Free( node ); }
-#define DELETE_ATTRIBUTE( attrib ) { MemPool* pool = attrib->memPool; attrib->~XMLAttribute(); pool->Free( attrib ); }
+#define DELETE_NODE( node ) { \
+ if ( node ) { \
+ MemPool* pool = node->memPool; \
+ node->~XMLNode(); \
+ pool->Free( node ); \
+ } \
+}
+#define DELETE_ATTRIBUTE( attrib ) { \
+ if ( attrib ) { \
+ MemPool* pool = attrib->memPool; \
+ attrib->~XMLAttribute(); \
+ pool->Free( attrib ); \
+ } \
+}
struct Entity {
const char* pattern;
@@ -229,114 +241,114 @@
}
-void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
-{
- const unsigned long BYTE_MASK = 0xBF;
- const unsigned long BYTE_MARK = 0x80;
- const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
- if (input < 0x80)
- *length = 1;
- else if ( input < 0x800 )
- *length = 2;
- else if ( input < 0x10000 )
- *length = 3;
- else if ( input < 0x200000 )
- *length = 4;
- else
- { *length = 0; return; } // This code won't covert this correctly anyway.
-
- output += *length;
-
- // Scary scary fall throughs.
- switch (*length)
- {
- case 4:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 3:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 2:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 1:
- --output;
- *output = (char)(input | FIRST_BYTE_MARK[*length]);
- }
-}
-
-
-const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
-{
- // Presume an entity, and pull it out.
- *length = 0;
-
- if ( *(p+1) == '#' && *(p+2) )
- {
- unsigned long ucs = 0;
- ptrdiff_t delta = 0;
- unsigned mult = 1;
-
- if ( *(p+2) == 'x' )
- {
- // Hexadecimal.
- if ( !*(p+3) ) return 0;
-
- const char* q = p+3;
- q = strchr( q, ';' );
-
- if ( !q || !*q ) return 0;
-
- delta = q-p;
- --q;
-
- while ( *q != 'x' )
- {
- if ( *q >= '0' && *q <= '9' )
- ucs += mult * (*q - '0');
- else if ( *q >= 'a' && *q <= 'f' )
- ucs += mult * (*q - 'a' + 10);
- else if ( *q >= 'A' && *q <= 'F' )
- ucs += mult * (*q - 'A' + 10 );
- else
- return 0;
- mult *= 16;
- --q;
- }
- }
- else
- {
- // Decimal.
- if ( !*(p+2) ) return 0;
-
- const char* q = p+2;
- q = strchr( q, ';' );
-
- if ( !q || !*q ) return 0;
-
- delta = q-p;
- --q;
-
- while ( *q != '#' )
- {
- if ( *q >= '0' && *q <= '9' )
- ucs += mult * (*q - '0');
- else
- return 0;
- mult *= 10;
- --q;
- }
- }
- // convert the UCS to UTF-8
- ConvertUTF32ToUTF8( ucs, value, length );
- return p + delta + 1;
- }
- return p+1;
-}
+void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
+{
+ const unsigned long BYTE_MASK = 0xBF;
+ const unsigned long BYTE_MARK = 0x80;
+ const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+ if (input < 0x80)
+ *length = 1;
+ else if ( input < 0x800 )
+ *length = 2;
+ else if ( input < 0x10000 )
+ *length = 3;
+ else if ( input < 0x200000 )
+ *length = 4;
+ else
+ { *length = 0; return; } // This code won't covert this correctly anyway.
+
+ output += *length;
+
+ // Scary scary fall throughs.
+ switch (*length)
+ {
+ case 4:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 3:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 2:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 1:
+ --output;
+ *output = (char)(input | FIRST_BYTE_MARK[*length]);
+ }
+}
+
+
+const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
+{
+ // Presume an entity, and pull it out.
+ *length = 0;
+
+ if ( *(p+1) == '#' && *(p+2) )
+ {
+ unsigned long ucs = 0;
+ ptrdiff_t delta = 0;
+ unsigned mult = 1;
+
+ if ( *(p+2) == 'x' )
+ {
+ // Hexadecimal.
+ if ( !*(p+3) ) return 0;
+
+ const char* q = p+3;
+ q = strchr( q, ';' );
+
+ if ( !q || !*q ) return 0;
+
+ delta = q-p;
+ --q;
+
+ while ( *q != 'x' )
+ {
+ if ( *q >= '0' && *q <= '9' )
+ ucs += mult * (*q - '0');
+ else if ( *q >= 'a' && *q <= 'f' )
+ ucs += mult * (*q - 'a' + 10);
+ else if ( *q >= 'A' && *q <= 'F' )
+ ucs += mult * (*q - 'A' + 10 );
+ else
+ return 0;
+ mult *= 16;
+ --q;
+ }
+ }
+ else
+ {
+ // Decimal.
+ if ( !*(p+2) ) return 0;
+
+ const char* q = p+2;
+ q = strchr( q, ';' );
+
+ if ( !q || !*q ) return 0;
+
+ delta = q-p;
+ --q;
+
+ while ( *q != '#' )
+ {
+ if ( *q >= '0' && *q <= '9' )
+ ucs += mult * (*q - '0');
+ else
+ return 0;
+ mult *= 10;
+ --q;
+ }
+ }
+ // convert the UCS to UTF-8
+ ConvertUTF32ToUTF8( ucs, value, length );
+ return p + delta + 1;
+ }
+ return p+1;
+}
char* XMLDocument::Identify( char* p, XMLNode** node )
@@ -397,6 +409,11 @@
returnNode = new (elementPool.Alloc()) XMLElement( this );
returnNode->memPool = &elementPool;
p += elementHeaderLen;
+
+ p = XMLUtil::SkipWhiteSpace( p );
+ if ( p && *p == '/' ) {
+ ((XMLElement*)returnNode)->closingType = XMLElement::CLOSING;
+ }
}
else {
returnNode = new (textPool.Alloc()) XMLText( this );
@@ -587,20 +604,75 @@
char* XMLNode::ParseDeep( char* p )
{
+ // This is a recursive method, but thinking about it "at the current level"
+ // it is a pretty simple flat list:
+ // <foo/>
+ // <!-- comment -->
+ //
+ // With a special case:
+ // <foo>
+ // </foo>
+ // <!-- comment -->
+ //
+ // Where the closing element (/foo) *must* be the next thing after the opening
+ // element, and the names must match. BUT the tricky bit is that the closing
+ // element will be read by the child.
+
while( p && *p ) {
XMLNode* node = 0;
- p = document->Identify( p, &node );
- if ( p && node ) {
- p = node->ParseDeep( p );
+ char* mark = p;
- if ( node->IsClosingElement() ) {
- if ( !XMLUtil::StringEqual( Value(), node->Value() )) {
- document->SetError( ERROR_MISMATCHED_ELEMENT, Value(), 0 );
- }
+ p = document->Identify( p, &node );
+ if ( p == 0 ) {
+ break;
+ }
+
+ // We read the end tag. Back up and return.
+ if ( node && node->ToElement() && node->ToElement()->ClosingType() == XMLElement::CLOSING ) {
+ DELETE_NODE( node );
+ return mark;
+ }
+
+ if ( node ) {
+ p = node->ParseDeep( p );
+ if ( !p ) {
DELETE_NODE( node );
- return p;
+ node = 0;
+ break;
}
- this->InsertEndChild( node );
+
+ XMLElement* ele = node->ToElement();
+ if ( ele && ele->ClosingType() == XMLElement::OPEN ) {
+ XMLNode* closingNode = 0;
+ p = document->Identify( p, &closingNode );
+ XMLElement* closingEle = closingNode ? closingNode->ToElement() : 0;
+
+ if ( closingEle == 0 ) {
+ document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+ p = 0;
+ }
+ else if ( closingEle->ClosingType() != XMLElement::CLOSING ) {
+ document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+ p = 0;
+ }
+ else
+ {
+ p = closingEle->ParseDeep( p );
+ if ( !XMLUtil::StringEqual( closingEle->Value(), node->Value() )) {
+ document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+ p = 0;
+ }
+ }
+ // Else everything is fine, but we need to throw away the node.
+ DELETE_NODE( closingNode );
+ if ( p == 0 ) {
+ DELETE_NODE( node );
+ node = 0;
+ }
+ }
+ if ( node ) {
+ this->InsertEndChild( node );
+ }
}
}
return 0;
@@ -736,7 +808,7 @@
char endTag[2] = { *p, 0 };
++p;
p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE );
- if ( value.Empty() ) return 0;
+ //if ( value.Empty() ) return 0;
return p;
}
@@ -842,9 +914,8 @@
// --------- XMLElement ---------- //
XMLElement::XMLElement( XMLDocument* doc ) : XMLNode( doc ),
- closing( false ),
+ closingType( 0 ),
rootAttribute( 0 )
- //lastAttribute( 0 )
{
}
@@ -937,10 +1008,9 @@
}
-char* XMLElement::ParseAttributes( char* p, bool* closedElement )
+char* XMLElement::ParseAttributes( char* p )
{
const char* start = p;
- *closedElement = false;
// Read the attributes.
while( p ) {
@@ -965,11 +1035,7 @@
}
// end of the tag
else if ( *p == '/' && *(p+1) == '>' ) {
- if ( closing ) {
- document->SetError( ERROR_PARSING_ELEMENT, start, p );
- return 0;
- }
- *closedElement = true;
+ closingType = CLOSED;
return p+2; // done; sealed element.
}
// end of the tag
@@ -1001,7 +1067,7 @@
// parsed just like a regular element then deleted from
// the DOM.
if ( *p == '/' ) {
- closing = true;
+ closingType = CLOSING;
++p;
}
@@ -1009,8 +1075,8 @@
if ( value.Empty() ) return 0;
bool elementClosed=false;
- p = ParseAttributes( p, &elementClosed );
- if ( !p || !*p || elementClosed || closing )
+ p = ParseAttributes( p );
+ if ( !p || !*p || closingType )
return p;
p = XMLNode::ParseDeep( p );