Added performance test and option to leave entities

commit: 6f381b773923ab75eb9b30c3028e60a218fb9b96 [log] [tgz]
author: Lee Thomason <leethomason@gmail.com> Fri Mar 02 12:59:39 2012 -0800
committer: Lee Thomason <leethomason@gmail.com> Fri Mar 02 12:59:39 2012 -0800
tree: 0263debbc5a26cce57ed48176b88024bb5db396b
parent: 2705731775e79446507432219dc06ff1625ae6e2 [diff]
diff --git a/readme.txt b/readme.txt
index 62a1972..200805c 100755
--- a/readme.txt
+++ b/readme.txt

@@ -55,7 +55,7 @@
 Which should you use? TinyXML-2 uses a similar API to TinyXML-1 and the same
 rich test cases. But the implementation of the parser is completely re-written
 to make it more appropriate for use in a game. It uses less memory, is faster,
-and user far few memory allocations.
+and uses far few memory allocations.
 
 TinyXML-2 has no requirement for STL, but has also dropped all STL support. All
 strings are query and set as 'const char*'. This allows the use of internal 

diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index bb491de..c545180 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp

@@ -740,7 +740,7 @@
 		return p;

 	}

 	else {

-		p = value.ParseText( p, "<", StrPair::TEXT_ELEMENT );

+		p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );

 		if ( !p ) {

 			document->SetError( ERROR_PARSING_TEXT, start, 0 );

 		}

@@ -916,14 +916,14 @@
 }

 

 // --------- XMLAttribute ---------- //

-char* XMLAttribute::ParseDeep( char* p )

+char* XMLAttribute::ParseDeep( char* p, bool processEntities )

 {

 	p = name.ParseText( p, "=", StrPair::ATTRIBUTE_NAME );

 	if ( !p || !*p ) return 0;

 

 	char endTag[2] = { *p, 0 };

 	++p;

-	p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE );

+	p = value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES );

 	//if ( value.Empty() ) return 0;

 	return p;

 }

@@ -1141,7 +1141,7 @@
 			XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute();

 			attrib->memPool = &document->attributePool;

 

-			p = attrib->ParseDeep( p );

+			p = attrib->ParseDeep( p, document->ProcessEntities() );

 			if ( !p || Attribute( attrib->Name() ) ) {

 				DELETE_ATTRIBUTE( attrib );

 				document->SetError( ERROR_PARSING_ATTRIBUTE, start, p );

@@ -1250,9 +1250,13 @@
 

 

 // --------- XMLDocument ----------- //

-XMLDocument::XMLDocument() :

+XMLDocument::XMLDocument( bool _processEntities ) :

 	XMLNode( 0 ),

 	writeBOM( false ),

+	processEntities( _processEntities ),

+	errorID( 0 ),

+	errorStr1( 0 ),

+	errorStr2( 0 ),

 	charBuffer( 0 )

 {

 	document = this;	// avoid warning about 'this' in initializer list

@@ -1474,7 +1478,8 @@
 	firstElement( true ),

 	fp( file ), 

 	depth( 0 ), 

-	textDepth( -1 )

+	textDepth( -1 ),

+	processEntities( true )

 {

 	for( int i=0; i<ENTITY_RANGE; ++i ) {

 		entityFlag[i] = false;

@@ -1540,31 +1545,33 @@
 	const char* q = p;

 	const bool* flag = restricted ? restrictedEntityFlag : entityFlag;

 

-	while ( *q ) {

-		// Remember, char is sometimes signed. (How many times has that bitten me?)

-		if ( *q > 0 && *q < ENTITY_RANGE ) {

-			// Check for entities. If one is found, flush

-			// the stream up until the entity, write the 

-			// entity, and keep looking.

-			if ( flag[*q] ) {

-				while ( p < q ) {

-					Print( "%c", *p );

+	if ( processEntities ) {

+		while ( *q ) {

+			// Remember, char is sometimes signed. (How many times has that bitten me?)

+			if ( *q > 0 && *q < ENTITY_RANGE ) {

+				// Check for entities. If one is found, flush

+				// the stream up until the entity, write the 

+				// entity, and keep looking.

+				if ( flag[*q] ) {

+					while ( p < q ) {

+						Print( "%c", *p );

+						++p;

+					}

+					for( int i=0; i<NUM_ENTITIES; ++i ) {

+						if ( entities[i].value == *q ) {

+							Print( "&%s;", entities[i].pattern );

+							break;

+						}

+					}

 					++p;

 				}

-				for( int i=0; i<NUM_ENTITIES; ++i ) {

-					if ( entities[i].value == *q ) {

-						Print( "&%s;", entities[i].pattern );

-						break;

-					}

-				}

-				++p;

 			}

+			++q;

 		}

-		++q;

 	}

 	// Flush the remaining string. This will be the entire

 	// string if an entity wasn't found.

-	if ( q-p > 0 ) {

+	if ( !processEntities || (q-p > 0) ) {

 		Print( "%s", p );

 	}

 }

@@ -1735,6 +1742,7 @@
 

 bool XMLPrinter::VisitEnter( const XMLDocument& doc )

 {

+	processEntities = doc.ProcessEntities();

 	if ( doc.HasBOM() ) {

 		PushHeader( true, false );

 	}

@@ -1785,5 +1793,3 @@
 	PushUnknown( unknown.Value() );

 	return true;

 }

-

-


diff --git a/tinyxml2.h b/tinyxml2.h
index d1ae0d7..368916c 100644
--- a/tinyxml2.h
+++ b/tinyxml2.h

@@ -115,8 +115,10 @@
 		NEEDS_NEWLINE_NORMALIZATION		= 0x02,

 

 		TEXT_ELEMENT		= NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,

+		TEXT_ELEMENT_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,

 		ATTRIBUTE_NAME		= 0,

 		ATTRIBUTE_VALUE		= NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,

+		ATTRIBUTE_VALUE_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,

 		COMMENT				= NEEDS_NEWLINE_NORMALIZATION,

 	};

 

@@ -804,7 +806,7 @@
 	void operator=( const XMLAttribute& );	// not supported

 	void SetName( const char* name );

 

-	char* ParseDeep( char* p );

+	char* ParseDeep( char* p, bool processEntities );

 

 	mutable StrPair name;

 	mutable StrPair value;

@@ -962,7 +964,7 @@
 	friend class XMLElement;

 public:

 	/// constructor

-	XMLDocument(); 

+	XMLDocument( bool processEntities = true ); 

 	~XMLDocument();

 

 	virtual XMLDocument* ToDocument()				{ return this; }

@@ -993,6 +995,11 @@
 	*/

 	void SaveFile( const char* filename );

 

+	bool ProcessEntities() const						{ return processEntities; }

+

+	/**

+		Returns true if this document has a leading Byte Order Mark of UTF8.

+	*/

 	bool HasBOM() const { return writeBOM; }

 

 	/** Return the root element of DOM. Equivalent to FirstChildElement().

@@ -1071,8 +1078,8 @@
 	// internal

 	char* Identify( char* p, XMLNode** node );

 

-	virtual XMLNode* ShallowClone( XMLDocument* document ) const	{ return 0; }

-	virtual bool ShallowEqual( const XMLNode* compare ) const	{ return false; }

+	virtual XMLNode* ShallowClone( XMLDocument* /*document*/ ) const	{ return 0; }

+	virtual bool ShallowEqual( const XMLNode* /*compare*/ ) const	{ return false; }

 

 private:

 	XMLDocument( const XMLDocument& );	// not supported

@@ -1080,6 +1087,7 @@
 	void InitDocument();

 

 	bool writeBOM;

+	bool processEntities;

 	int errorID;

 	const char* errorStr1;

 	const char* errorStr2;

@@ -1196,6 +1204,7 @@
 	FILE* fp;

 	int depth;

 	int textDepth;

+	bool processEntities;

 

 	enum {

 		ENTITY_RANGE = 64,


diff --git a/tinyxml2/tinyxml2.vcxproj b/tinyxml2/tinyxml2.vcxproj
index 89eef04..f9bd173 100644
--- a/tinyxml2/tinyxml2.vcxproj
+++ b/tinyxml2/tinyxml2.vcxproj

@@ -80,6 +80,9 @@
   <ItemGroup>

     <ClInclude Include="..\tinyxml2.h" />

   </ItemGroup>

+  <ItemGroup>

+    <None Include="..\readme.txt" />

+  </ItemGroup>

   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

   <ImportGroup Label="ExtensionTargets">

   </ImportGroup>


diff --git a/tinyxml2/tinyxml2.vcxproj.filters b/tinyxml2/tinyxml2.vcxproj.filters
index 37eabf3..20cfa20 100644
--- a/tinyxml2/tinyxml2.vcxproj.filters
+++ b/tinyxml2/tinyxml2.vcxproj.filters

@@ -19,4 +19,9 @@
       <Filter>Source Files</Filter>

     </ClInclude>

   </ItemGroup>

+  <ItemGroup>

+    <None Include="..\readme.txt">

+      <Filter>Source Files</Filter>

+    </None>

+  </ItemGroup>

 </Project>
\ No newline at end of file

diff --git a/xmltest.cpp b/xmltest.cpp
index b3ffc2f..532c1be 100644
--- a/xmltest.cpp
+++ b/xmltest.cpp

@@ -3,9 +3,12 @@
 #include <stdio.h>

 #include <stdlib.h>

 #include <string.h>

+#include <time.h>

 

 #if defined( _MSC_VER )

 	#include <crtdbg.h>

+	#define WIN32_LEAN_AND_MEAN

+	#include <windows.h>

 	_CrtMemState startMemState;

 	_CrtMemState endMemState;

 #endif

@@ -211,6 +214,8 @@
 

 		//gNewTotal = gNew - newStart;

 	}

+

+

 	{

 		const char* error =	"<?xml version=\"1.0\" standalone=\"no\" ?>\n"

 							"<passages count=\"006\" formatversion=\"20020620\">\n"

@@ -459,6 +464,24 @@
 	}

 

 	{

+		// Suppress entities.

+		const char* passages =

+			"<?xml version=\"1.0\" standalone=\"no\" ?>"

+			"<passages count=\"006\" formatversion=\"20020620\">"

+				"<psg context=\"Line 5 has &quot;quotation marks&quot; and &apos;apostrophe marks&apos;.\">Crazy &ttk;</psg>"

+			"</passages>";

+		

+		XMLDocument doc( false );

+		doc.Parse( passages );

+

+		XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->Attribute( "context" ), 

+				 "Line 5 has &quot;quotation marks&quot; and &apos;apostrophe marks&apos;." );

+		XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->FirstChild()->Value(),

+				 "Crazy &ttk;" );

+		doc.Print();

+	}

+

+	{

         const char* test = "<?xml version='1.0'?><a.elem xmi.version='2.0'/>";

 

 		XMLDocument doc;

@@ -653,6 +676,62 @@
 		XMLTest( "Clone and Equal", 4, count );

 	}

 

+	// ----------- Performance tracking --------------

+	{

+#if defined( _MSC_VER )

+		__int64 start, end, freq;

+		QueryPerformanceFrequency( (LARGE_INTEGER*) &freq );

+#endif

+

+#if defined(_MSC_VER)

+#pragma warning ( push )

+#pragma warning ( disable : 4996 )		// Fail to see a compelling reason why this should be deprecated.

+#endif

+		FILE* fp  = fopen( "dream.xml", "r" );

+#if defined(_MSC_VER)

+#pragma warning ( pop )

+#endif

+		fseek( fp, 0, SEEK_END );

+		long size = ftell( fp );

+		fseek( fp, 0, SEEK_SET );

+

+		char* mem = new char[size+1];

+		fread( mem, size, 1, fp );

+		fclose( fp );

+		mem[size] = 0;

+

+#if defined( _MSC_VER )

+		QueryPerformanceCounter( (LARGE_INTEGER*) &start );

+#else

+		clock_t cstart = clock();

+#endif

+		static const int COUNT = 10;

+		for( int i=0; i<COUNT; ++i ) {

+			XMLDocument doc;

+			doc.Parse( mem );

+		}

+#if defined( _MSC_VER )

+		QueryPerformanceCounter( (LARGE_INTEGER*) &end );

+#else

+		clock_t cend = clock();

+#endif

+

+		delete [] mem;

+

+		static const char* note = 

+#ifdef DEBUG

+			"DEBUG";

+#else

+			"Release";

+#endif

+

+#if defined( _MSC_VER )

+		printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, 1000.0 * (double)(end-start) / ( (double)freq * (double)COUNT) );

+#else

+		printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, (double)(cend - cstart)/(double)COUNT );

+#endif

+	}

+

 	#if defined( _MSC_VER )

 		_CrtMemCheckpoint( &endMemState );  

 		//_CrtMemDumpStatistics( &endMemState );
commit	6f381b773923ab75eb9b30c3028e60a218fb9b96	[log] [tgz]
author	Lee Thomason <leethomason@gmail.com>	Fri Mar 02 12:59:39 2012 -0800
committer	Lee Thomason <leethomason@gmail.com>	Fri Mar 02 12:59:39 2012 -0800
tree	0263debbc5a26cce57ed48176b88024bb5db396b
parent	2705731775e79446507432219dc06ff1625ae6e2 [diff]