Added performance test and option to leave entities
diff --git a/readme.txt b/readme.txt
index 62a1972..200805c 100755
--- a/readme.txt
+++ b/readme.txt
@@ -55,7 +55,7 @@
Which should you use? TinyXML-2 uses a similar API to TinyXML-1 and the same
rich test cases. But the implementation of the parser is completely re-written
to make it more appropriate for use in a game. It uses less memory, is faster,
-and user far few memory allocations.
+and uses far few memory allocations.
TinyXML-2 has no requirement for STL, but has also dropped all STL support. All
strings are query and set as 'const char*'. This allows the use of internal
diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index bb491de..c545180 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -740,7 +740,7 @@
return p;
}
else {
- p = value.ParseText( p, "<", StrPair::TEXT_ELEMENT );
+ p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );
if ( !p ) {
document->SetError( ERROR_PARSING_TEXT, start, 0 );
}
@@ -916,14 +916,14 @@
}
// --------- XMLAttribute ---------- //
-char* XMLAttribute::ParseDeep( char* p )
+char* XMLAttribute::ParseDeep( char* p, bool processEntities )
{
p = name.ParseText( p, "=", StrPair::ATTRIBUTE_NAME );
if ( !p || !*p ) return 0;
char endTag[2] = { *p, 0 };
++p;
- p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE );
+ p = value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES );
//if ( value.Empty() ) return 0;
return p;
}
@@ -1141,7 +1141,7 @@
XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute();
attrib->memPool = &document->attributePool;
- p = attrib->ParseDeep( p );
+ p = attrib->ParseDeep( p, document->ProcessEntities() );
if ( !p || Attribute( attrib->Name() ) ) {
DELETE_ATTRIBUTE( attrib );
document->SetError( ERROR_PARSING_ATTRIBUTE, start, p );
@@ -1250,9 +1250,13 @@
// --------- XMLDocument ----------- //
-XMLDocument::XMLDocument() :
+XMLDocument::XMLDocument( bool _processEntities ) :
XMLNode( 0 ),
writeBOM( false ),
+ processEntities( _processEntities ),
+ errorID( 0 ),
+ errorStr1( 0 ),
+ errorStr2( 0 ),
charBuffer( 0 )
{
document = this; // avoid warning about 'this' in initializer list
@@ -1474,7 +1478,8 @@
firstElement( true ),
fp( file ),
depth( 0 ),
- textDepth( -1 )
+ textDepth( -1 ),
+ processEntities( true )
{
for( int i=0; i<ENTITY_RANGE; ++i ) {
entityFlag[i] = false;
@@ -1540,31 +1545,33 @@
const char* q = p;
const bool* flag = restricted ? restrictedEntityFlag : entityFlag;
- while ( *q ) {
- // Remember, char is sometimes signed. (How many times has that bitten me?)
- if ( *q > 0 && *q < ENTITY_RANGE ) {
- // Check for entities. If one is found, flush
- // the stream up until the entity, write the
- // entity, and keep looking.
- if ( flag[*q] ) {
- while ( p < q ) {
- Print( "%c", *p );
+ if ( processEntities ) {
+ while ( *q ) {
+ // Remember, char is sometimes signed. (How many times has that bitten me?)
+ if ( *q > 0 && *q < ENTITY_RANGE ) {
+ // Check for entities. If one is found, flush
+ // the stream up until the entity, write the
+ // entity, and keep looking.
+ if ( flag[*q] ) {
+ while ( p < q ) {
+ Print( "%c", *p );
+ ++p;
+ }
+ for( int i=0; i<NUM_ENTITIES; ++i ) {
+ if ( entities[i].value == *q ) {
+ Print( "&%s;", entities[i].pattern );
+ break;
+ }
+ }
++p;
}
- for( int i=0; i<NUM_ENTITIES; ++i ) {
- if ( entities[i].value == *q ) {
- Print( "&%s;", entities[i].pattern );
- break;
- }
- }
- ++p;
}
+ ++q;
}
- ++q;
}
// Flush the remaining string. This will be the entire
// string if an entity wasn't found.
- if ( q-p > 0 ) {
+ if ( !processEntities || (q-p > 0) ) {
Print( "%s", p );
}
}
@@ -1735,6 +1742,7 @@
bool XMLPrinter::VisitEnter( const XMLDocument& doc )
{
+ processEntities = doc.ProcessEntities();
if ( doc.HasBOM() ) {
PushHeader( true, false );
}
@@ -1785,5 +1793,3 @@
PushUnknown( unknown.Value() );
return true;
}
-
-
diff --git a/tinyxml2.h b/tinyxml2.h
index d1ae0d7..368916c 100644
--- a/tinyxml2.h
+++ b/tinyxml2.h
@@ -115,8 +115,10 @@
NEEDS_NEWLINE_NORMALIZATION = 0x02,
TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
+ TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
ATTRIBUTE_NAME = 0,
ATTRIBUTE_VALUE = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
+ ATTRIBUTE_VALUE_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
COMMENT = NEEDS_NEWLINE_NORMALIZATION,
};
@@ -804,7 +806,7 @@
void operator=( const XMLAttribute& ); // not supported
void SetName( const char* name );
- char* ParseDeep( char* p );
+ char* ParseDeep( char* p, bool processEntities );
mutable StrPair name;
mutable StrPair value;
@@ -962,7 +964,7 @@
friend class XMLElement;
public:
/// constructor
- XMLDocument();
+ XMLDocument( bool processEntities = true );
~XMLDocument();
virtual XMLDocument* ToDocument() { return this; }
@@ -993,6 +995,11 @@
*/
void SaveFile( const char* filename );
+ bool ProcessEntities() const { return processEntities; }
+
+ /**
+ Returns true if this document has a leading Byte Order Mark of UTF8.
+ */
bool HasBOM() const { return writeBOM; }
/** Return the root element of DOM. Equivalent to FirstChildElement().
@@ -1071,8 +1078,8 @@
// internal
char* Identify( char* p, XMLNode** node );
- virtual XMLNode* ShallowClone( XMLDocument* document ) const { return 0; }
- virtual bool ShallowEqual( const XMLNode* compare ) const { return false; }
+ virtual XMLNode* ShallowClone( XMLDocument* /*document*/ ) const { return 0; }
+ virtual bool ShallowEqual( const XMLNode* /*compare*/ ) const { return false; }
private:
XMLDocument( const XMLDocument& ); // not supported
@@ -1080,6 +1087,7 @@
void InitDocument();
bool writeBOM;
+ bool processEntities;
int errorID;
const char* errorStr1;
const char* errorStr2;
@@ -1196,6 +1204,7 @@
FILE* fp;
int depth;
int textDepth;
+ bool processEntities;
enum {
ENTITY_RANGE = 64,
diff --git a/tinyxml2/tinyxml2.vcxproj b/tinyxml2/tinyxml2.vcxproj
index 89eef04..f9bd173 100644
--- a/tinyxml2/tinyxml2.vcxproj
+++ b/tinyxml2/tinyxml2.vcxproj
@@ -80,6 +80,9 @@
<ItemGroup>
<ClInclude Include="..\tinyxml2.h" />
</ItemGroup>
+ <ItemGroup>
+ <None Include="..\readme.txt" />
+ </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
diff --git a/tinyxml2/tinyxml2.vcxproj.filters b/tinyxml2/tinyxml2.vcxproj.filters
index 37eabf3..20cfa20 100644
--- a/tinyxml2/tinyxml2.vcxproj.filters
+++ b/tinyxml2/tinyxml2.vcxproj.filters
@@ -19,4 +19,9 @@
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
+ <ItemGroup>
+ <None Include="..\readme.txt">
+ <Filter>Source Files</Filter>
+ </None>
+ </ItemGroup>
</Project>
\ No newline at end of file
diff --git a/xmltest.cpp b/xmltest.cpp
index b3ffc2f..532c1be 100644
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -3,9 +3,12 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <time.h>
#if defined( _MSC_VER )
#include <crtdbg.h>
+ #define WIN32_LEAN_AND_MEAN
+ #include <windows.h>
_CrtMemState startMemState;
_CrtMemState endMemState;
#endif
@@ -211,6 +214,8 @@
//gNewTotal = gNew - newStart;
}
+
+
{
const char* error = "<?xml version=\"1.0\" standalone=\"no\" ?>\n"
"<passages count=\"006\" formatversion=\"20020620\">\n"
@@ -459,6 +464,24 @@
}
{
+ // Suppress entities.
+ const char* passages =
+ "<?xml version=\"1.0\" standalone=\"no\" ?>"
+ "<passages count=\"006\" formatversion=\"20020620\">"
+ "<psg context=\"Line 5 has "quotation marks" and 'apostrophe marks'.\">Crazy &ttk;</psg>"
+ "</passages>";
+
+ XMLDocument doc( false );
+ doc.Parse( passages );
+
+ XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->Attribute( "context" ),
+ "Line 5 has "quotation marks" and 'apostrophe marks'." );
+ XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->FirstChild()->Value(),
+ "Crazy &ttk;" );
+ doc.Print();
+ }
+
+ {
const char* test = "<?xml version='1.0'?><a.elem xmi.version='2.0'/>";
XMLDocument doc;
@@ -653,6 +676,62 @@
XMLTest( "Clone and Equal", 4, count );
}
+ // ----------- Performance tracking --------------
+ {
+#if defined( _MSC_VER )
+ __int64 start, end, freq;
+ QueryPerformanceFrequency( (LARGE_INTEGER*) &freq );
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning ( push )
+#pragma warning ( disable : 4996 ) // Fail to see a compelling reason why this should be deprecated.
+#endif
+ FILE* fp = fopen( "dream.xml", "r" );
+#if defined(_MSC_VER)
+#pragma warning ( pop )
+#endif
+ fseek( fp, 0, SEEK_END );
+ long size = ftell( fp );
+ fseek( fp, 0, SEEK_SET );
+
+ char* mem = new char[size+1];
+ fread( mem, size, 1, fp );
+ fclose( fp );
+ mem[size] = 0;
+
+#if defined( _MSC_VER )
+ QueryPerformanceCounter( (LARGE_INTEGER*) &start );
+#else
+ clock_t cstart = clock();
+#endif
+ static const int COUNT = 10;
+ for( int i=0; i<COUNT; ++i ) {
+ XMLDocument doc;
+ doc.Parse( mem );
+ }
+#if defined( _MSC_VER )
+ QueryPerformanceCounter( (LARGE_INTEGER*) &end );
+#else
+ clock_t cend = clock();
+#endif
+
+ delete [] mem;
+
+ static const char* note =
+#ifdef DEBUG
+ "DEBUG";
+#else
+ "Release";
+#endif
+
+#if defined( _MSC_VER )
+ printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, 1000.0 * (double)(end-start) / ( (double)freq * (double)COUNT) );
+#else
+ printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, (double)(cend - cstart)/(double)COUNT );
+#endif
+ }
+
#if defined( _MSC_VER )
_CrtMemCheckpoint( &endMemState );
//_CrtMemDumpStatistics( &endMemState );