added whitespace=collapse support. tests work. code needs review
diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index 8069d04..b20d9f0 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -23,10 +23,12 @@
#include "tinyxml2.h"
-#include <cstdio>
-#include <cstdlib>
-#include <new>
-#include <cstddef>
+#include <new> // yes, this one new style header, is in the Android SDK.
+#ifdef ANDROID_NDK
+ #include <stddef.h>
+#else
+ #include <cstddef>
+#endif
using namespace tinyxml2;
@@ -156,6 +158,31 @@
}
+void StrPair::CollapseWhitespace()
+{
+ // Trim leading space.
+ start = XMLUtil::SkipWhiteSpace( start );
+
+ if ( start && *start ) {
+ char* p = start; // the read pointer
+ char* q = start; // the write pointer
+
+ while( *p ) {
+ if ( XMLUtil::IsWhiteSpace( *p )) {
+ p = XMLUtil::SkipWhiteSpace( p );
+ if ( *p == 0 )
+ break; // don't write to q; this trims the trailing space.
+ *q = ' ';
+ ++q;
+ }
+ *q = *p;
+ ++q;
+ ++p;
+ }
+ *q = 0;
+ }
+}
+
const char* StrPair::GetStr()
{
@@ -232,6 +259,11 @@
}
*q = 0;
}
+ // The loop below has plenty going on, and this
+ // is a less useful mode. Break it out.
+ if ( flags & COLLAPSE_WHITESPACE ) {
+ CollapseWhitespace();
+ }
flags = (flags & NEEDS_DELETE);
}
return start;
@@ -815,7 +847,11 @@
return p;
}
else {
- p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );
+ int flags = document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES;
+ if ( document->WhitespaceMode() == COLLAPSE_WHITESPACE )
+ flags |= StrPair::COLLAPSE_WHITESPACE;
+
+ p = value.ParseText( p, "<", flags );
if ( !p ) {
document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );
}
@@ -1416,11 +1452,12 @@
// --------- XMLDocument ----------- //
-XMLDocument::XMLDocument( bool _processEntities ) :
+XMLDocument::XMLDocument( bool _processEntities, Whitespace _whitespace ) :
XMLNode( 0 ),
writeBOM( false ),
processEntities( _processEntities ),
errorID( 0 ),
+ whitespace( _whitespace ),
errorStr1( 0 ),
errorStr2( 0 ),
charBuffer( 0 )
diff --git a/tinyxml2.h b/tinyxml2.h
index e1c22c5..b766d35 100644
--- a/tinyxml2.h
+++ b/tinyxml2.h
@@ -24,11 +24,21 @@
#ifndef TINYXML2_INCLUDED
#define TINYXML2_INCLUDED
-#include <cctype>
-#include <climits>
-#include <cstdio>
-#include <cstring>
-#include <cstdarg>
+#ifdef ANDROID_NDK
+ #include <ctype.h>
+ #include <limits.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <stdarg.h>
+#else
+ #include <cctype>
+ #include <climits>
+ #include <cstdio>
+ #include <cstdlib>
+ #include <cstring>
+ #include <cstdarg>
+#endif
/*
TODO: intern strings instead of allocation.
@@ -112,6 +122,7 @@
enum {
NEEDS_ENTITY_PROCESSING = 0x01,
NEEDS_NEWLINE_NORMALIZATION = 0x02,
+ COLLAPSE_WHITESPACE = 0x04,
TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
@@ -140,6 +151,7 @@
private:
void Reset();
+ void CollapseWhitespace();
enum {
NEEDS_FLUSH = 0x100,
@@ -365,6 +377,7 @@
// correct, but simple, and usually works.
static const char* SkipWhiteSpace( const char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; }
static char* SkipWhiteSpace( char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) ) { ++p; } return p; }
+ static bool IsWhiteSpace( char p ) { return !IsUTF8Continuation(p) && isspace( static_cast<unsigned char>(p) ); }
inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX ) {
int n = 0;
@@ -1031,6 +1044,12 @@
};
+enum Whitespace {
+ PRESERVE_WHITESPACE,
+ COLLAPSE_WHITESPACE
+};
+
+
/** A Document binds together all the functionality.
It can be saved, loaded, and printed to the screen.
All Nodes are connected and allocated to a Document.
@@ -1041,7 +1060,7 @@
friend class XMLElement;
public:
/// constructor
- XMLDocument( bool processEntities = true );
+ XMLDocument( bool processEntities = true, Whitespace = PRESERVE_WHITESPACE );
~XMLDocument();
virtual XMLDocument* ToDocument() { return this; }
@@ -1086,7 +1105,8 @@
*/
int SaveFile( FILE* );
- bool ProcessEntities() const { return processEntities; }
+ bool ProcessEntities() const { return processEntities; }
+ Whitespace WhitespaceMode() const { return whitespace; }
/**
Returns true if this document has a leading Byte Order Mark of UTF8.
@@ -1189,6 +1209,7 @@
bool writeBOM;
bool processEntities;
int errorID;
+ Whitespace whitespace;
const char* errorStr1;
const char* errorStr2;
char* charBuffer;
diff --git a/xmltest.cpp b/xmltest.cpp
index 81018aa..f48fce8 100644
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -938,6 +938,25 @@
XMLTest( "QueryBoolText", boolValue, true, false );
}
+ // ----------- Whitespace ------------
+ {
+ const char* xml = "<element>"
+ "<a> This \nis ' text ' </a>"
+ "<b> This is ' text ' \n</b>"
+ "<c>This is ' \n\n text '</c>"
+ "</element>";
+ XMLDocument doc( true, COLLAPSE_WHITESPACE );
+ doc.Parse( xml );
+
+ const XMLElement* element = doc.FirstChildElement();
+ for( const XMLElement* parent = element->FirstChildElement();
+ parent;
+ parent = parent->NextSiblingElement() )
+ {
+ XMLTest( "Whitespace collapse", "This is ' text '", parent->GetText() );
+ }
+ }
+
// ----------- Performance tracking --------------
{