UTF-8 tests passing.
diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index 740f1f5..8a4fa95 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -18,6 +18,15 @@
static const char SINGLE_QUOTE = '\'';
static const char DOUBLE_QUOTE = '\"';
+// Bunch of unicode info at:
+// http://www.unicode.org/faq/utf_bom.html
+// ef bb bf (Microsoft "lead bytes") - designates UTF-8
+
+static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
+static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
+static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
+
+
#define DELETE_NODE( node ) { MemPool* pool = node->memPool; node->~XMLNode(); pool->Free( node ); }
#define DELETE_ATTRIBUTE( attrib ) { MemPool* pool = attrib->memPool; attrib->~XMLAttribute(); pool->Free( attrib ); }
@@ -117,6 +126,7 @@
}
+
const char* StrPair::GetStr()
{
if ( flags & NEEDS_FLUSH ) {
@@ -124,8 +134,8 @@
flags ^= NEEDS_FLUSH;
if ( flags ) {
- char* p = start;
- char* q = start;
+ char* p = start; // the read pointer
+ char* q = start; // the write pointer
while( p < end ) {
if ( (flags & NEEDS_NEWLINE_NORMALIZATION) && *p == CR ) {
@@ -151,21 +161,38 @@
}
else if ( (flags & NEEDS_ENTITY_PROCESSING) && *p == '&' ) {
int i=0;
- for( i=0; i<NUM_ENTITIES; ++i ) {
- if ( strncmp( p+1, entities[i].pattern, entities[i].length ) == 0
- && *(p+entities[i].length+1) == ';' )
- {
- // Found an entity convert;
- *q = entities[i].value;
- ++q;
- p += entities[i].length + 2;
- break;
+
+ // Entities handled by tinyXML2:
+ // - special entities in the entity table [in/out]
+ // - numeric character reference [in]
+ // 中 or 中
+
+ if ( *(p+1) == '#' ) {
+ char buf[10] = { 0 };
+ int len;
+ p = const_cast<char*>( XMLUtil::GetCharacterRef( p, buf, &len ) );
+ for( int i=0; i<len; ++i ) {
+ *q++ = buf[i];
}
+ TIXMLASSERT( q <= p );
}
- if ( i == NUM_ENTITIES ) {
- // fixme: treat as error?
- ++p;
- ++q;
+ else {
+ for( i=0; i<NUM_ENTITIES; ++i ) {
+ if ( strncmp( p+1, entities[i].pattern, entities[i].length ) == 0
+ && *(p+entities[i].length+1) == ';' )
+ {
+ // Found an entity convert;
+ *q = entities[i].value;
+ ++q;
+ p += entities[i].length + 2;
+ break;
+ }
+ }
+ if ( i == NUM_ENTITIES ) {
+ // fixme: treat as error?
+ ++p;
+ ++q;
+ }
}
}
else {
@@ -183,8 +210,135 @@
+
// --------- XMLUtil ----------- //
+const char* XMLUtil::ReadBOM( const char* p, bool* bom )
+{
+ *bom = false;
+ const unsigned char* pu = reinterpret_cast<const unsigned char*>(p);
+ // Check for BOM:
+ if ( *(pu+0) == TIXML_UTF_LEAD_0
+ && *(pu+1) == TIXML_UTF_LEAD_1
+ && *(pu+2) == TIXML_UTF_LEAD_2 )
+ {
+ *bom = true;
+ p += 3;
+ }
+ return p;
+}
+
+
+void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
+{
+ const unsigned long BYTE_MASK = 0xBF;
+ const unsigned long BYTE_MARK = 0x80;
+ const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+ if (input < 0x80)
+ *length = 1;
+ else if ( input < 0x800 )
+ *length = 2;
+ else if ( input < 0x10000 )
+ *length = 3;
+ else if ( input < 0x200000 )
+ *length = 4;
+ else
+ { *length = 0; return; } // This code won't covert this correctly anyway.
+
+ output += *length;
+
+ // Scary scary fall throughs.
+ switch (*length)
+ {
+ case 4:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 3:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 2:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 1:
+ --output;
+ *output = (char)(input | FIRST_BYTE_MARK[*length]);
+ }
+}
+
+
+const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
+{
+ // Presume an entity, and pull it out.
+ *length = 0;
+
+ if ( *(p+1) == '#' && *(p+2) )
+ {
+ unsigned long ucs = 0;
+ ptrdiff_t delta = 0;
+ unsigned mult = 1;
+
+ if ( *(p+2) == 'x' )
+ {
+ // Hexadecimal.
+ if ( !*(p+3) ) return 0;
+
+ const char* q = p+3;
+ q = strchr( q, ';' );
+
+ if ( !q || !*q ) return 0;
+
+ delta = q-p;
+ --q;
+
+ while ( *q != 'x' )
+ {
+ if ( *q >= '0' && *q <= '9' )
+ ucs += mult * (*q - '0');
+ else if ( *q >= 'a' && *q <= 'f' )
+ ucs += mult * (*q - 'a' + 10);
+ else if ( *q >= 'A' && *q <= 'F' )
+ ucs += mult * (*q - 'A' + 10 );
+ else
+ return 0;
+ mult *= 16;
+ --q;
+ }
+ }
+ else
+ {
+ // Decimal.
+ if ( !*(p+2) ) return 0;
+
+ const char* q = p+2;
+ q = strchr( q, ';' );
+
+ if ( !q || !*q ) return 0;
+
+ delta = q-p;
+ --q;
+
+ while ( *q != '#' )
+ {
+ if ( *q >= '0' && *q <= '9' )
+ ucs += mult * (*q - '0');
+ else
+ return 0;
+ mult *= 10;
+ --q;
+ }
+ }
+ // convert the UCS to UTF-8
+ ConvertUTF32ToUTF8( ucs, value, length );
+ return p + delta + 1;
+ }
+ return p+1;
+}
+
+
char* XMLDocument::Identify( char* p, XMLNode** node )
{
XMLNode* returnNode = 0;
@@ -456,14 +610,14 @@
if ( this->CData() ) {
p = value.ParseText( p, "]]>", StrPair::NEEDS_NEWLINE_NORMALIZATION );
if ( !p ) {
- document->SetError( XMLDocument::ERROR_PARSING_CDATA, start, 0 );
+ document->SetError( ERROR_PARSING_CDATA, start, 0 );
}
return p;
}
else {
p = value.ParseText( p, "<", StrPair::TEXT_ELEMENT );
if ( !p ) {
- document->SetError( XMLDocument::ERROR_PARSING_TEXT, start, 0 );
+ document->SetError( ERROR_PARSING_TEXT, start, 0 );
}
if ( p && *p ) {
return p-1;
@@ -498,7 +652,7 @@
const char* start = p;
p = value.ParseText( p, "-->", StrPair::COMMENT );
if ( p == 0 ) {
- document->SetError( XMLDocument::ERROR_PARSING_COMMENT, start, 0 );
+ document->SetError( ERROR_PARSING_COMMENT, start, 0 );
}
return p;
}
@@ -529,7 +683,7 @@
const char* start = p;
p = value.ParseText( p, "?>", StrPair::NEEDS_NEWLINE_NORMALIZATION );
if ( p == 0 ) {
- document->SetError( XMLDocument::ERROR_PARSING_DECLARATION, start, 0 );
+ document->SetError( ERROR_PARSING_DECLARATION, start, 0 );
}
return p;
}
@@ -559,7 +713,7 @@
p = value.ParseText( p, ">", StrPair::NEEDS_NEWLINE_NORMALIZATION );
if ( !p ) {
- document->SetError( XMLDocument::ERROR_PARSING_UNKNOWN, start, 0 );
+ document->SetError( ERROR_PARSING_UNKNOWN, start, 0 );
}
return p;
}
@@ -593,7 +747,7 @@
int XMLAttribute::QueryIntAttribute( int* value ) const
{
if ( TIXML_SSCANF( Value(), "%d", value ) == 1 )
- return ATTRIBUTE_SUCCESS;
+ return XML_NO_ERROR;
return WRONG_ATTRIBUTE_TYPE;
}
@@ -601,7 +755,7 @@
int XMLAttribute::QueryUnsignedAttribute( unsigned int* value ) const
{
if ( TIXML_SSCANF( Value(), "%u", value ) == 1 )
- return ATTRIBUTE_SUCCESS;
+ return XML_NO_ERROR;
return WRONG_ATTRIBUTE_TYPE;
}
@@ -613,11 +767,11 @@
if ( ival > 0 || XMLUtil::StringEqual( Value(), "true" ) ) {
*value = true;
- return ATTRIBUTE_SUCCESS;
+ return XML_NO_ERROR;
}
else if ( ival == 0 || XMLUtil::StringEqual( Value(), "false" ) ) {
*value = false;
- return ATTRIBUTE_SUCCESS;
+ return XML_NO_ERROR;
}
return WRONG_ATTRIBUTE_TYPE;
}
@@ -626,7 +780,7 @@
int XMLAttribute::QueryDoubleAttribute( double* value ) const
{
if ( TIXML_SSCANF( Value(), "%lf", value ) == 1 )
- return ATTRIBUTE_SUCCESS;
+ return XML_NO_ERROR;
return WRONG_ATTRIBUTE_TYPE;
}
@@ -634,7 +788,7 @@
int XMLAttribute::QueryFloatAttribute( float* value ) const
{
if ( TIXML_SSCANF( Value(), "%f", value ) == 1 )
- return ATTRIBUTE_SUCCESS;
+ return XML_NO_ERROR;
return WRONG_ATTRIBUTE_TYPE;
}
@@ -789,7 +943,7 @@
while( p ) {
p = XMLUtil::SkipWhiteSpace( p );
if ( !p || !(*p) ) {
- document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, Name() );
+ document->SetError( ERROR_PARSING_ELEMENT, start, Name() );
return 0;
}
@@ -801,7 +955,7 @@
p = attrib->ParseDeep( p );
if ( !p ) {
DELETE_ATTRIBUTE( attrib );
- document->SetError( XMLDocument::ERROR_PARSING_ATTRIBUTE, start, p );
+ document->SetError( ERROR_PARSING_ATTRIBUTE, start, p );
return 0;
}
LinkAttribute( attrib );
@@ -809,7 +963,7 @@
// end of the tag
else if ( *p == '/' && *(p+1) == '>' ) {
if ( closing ) {
- document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, p );
+ document->SetError( ERROR_PARSING_ELEMENT, start, p );
return 0;
}
*closedElement = true;
@@ -821,7 +975,7 @@
break;
}
else {
- document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, p );
+ document->SetError( ERROR_PARSING_ELEMENT, start, p );
return 0;
}
}
@@ -875,10 +1029,10 @@
}
-
// --------- XMLDocument ----------- //
XMLDocument::XMLDocument() :
XMLNode( 0 ),
+ writeBOM( false ),
charBuffer( 0 )
{
document = this; // avoid warning about 'this' in initializer list
@@ -906,7 +1060,7 @@
void XMLDocument::InitDocument()
{
- errorID = NO_ERROR;
+ errorID = XML_NO_ERROR;
errorStr1 = 0;
errorStr2 = 0;
@@ -943,7 +1097,7 @@
}
-int XMLDocument::Load( const char* filename )
+int XMLDocument::LoadFile( const char* filename )
{
ClearChildren();
InitDocument();
@@ -953,13 +1107,13 @@
SetError( ERROR_FILE_NOT_FOUND, filename, 0 );
return errorID;
}
- Load( fp );
+ LoadFile( fp );
fclose( fp );
return errorID;
}
-int XMLDocument::Load( FILE* fp )
+int XMLDocument::LoadFile( FILE* fp )
{
ClearChildren();
InitDocument();
@@ -968,16 +1122,27 @@
unsigned size = ftell( fp );
fseek( fp, 0, SEEK_SET );
+ if ( size == 0 ) {
+ return errorID;
+ }
+
charBuffer = new char[size+1];
fread( charBuffer, size, 1, fp );
charBuffer[size] = 0;
- ParseDeep( charBuffer );
+ const char* p = charBuffer;
+ p = XMLUtil::SkipWhiteSpace( p );
+ p = XMLUtil::ReadBOM( p, &writeBOM );
+ if ( !p || !*p ) {
+ return 0; // correctly parse an empty string?
+ }
+
+ ParseDeep( charBuffer + (p-charBuffer) );
return errorID;
}
-void XMLDocument::Save( const char* filename )
+void XMLDocument::SaveFile( const char* filename )
{
FILE* fp = fopen( filename, "w" );
XMLStreamer stream( fp );
@@ -994,10 +1159,17 @@
if ( !p || !*p ) {
return true; // correctly parse an empty string?
}
+ p = XMLUtil::SkipWhiteSpace( p );
+ p = XMLUtil::ReadBOM( p, &writeBOM );
+ if ( !p || !*p ) {
+ return true; // correctly parse an empty string?
+ }
+
size_t len = strlen( p );
charBuffer = new char[ len+1 ];
memcpy( charBuffer, p, len+1 );
+
ParseDeep( charBuffer );
return errorID;
}
@@ -1063,6 +1235,7 @@
}
restrictedEntityFlag['&'] = true;
restrictedEntityFlag['<'] = true;
+ restrictedEntityFlag['>'] = true; // not required, but consistency is nice
buffer.Push( 0 );
}
@@ -1115,7 +1288,8 @@
const bool* flag = restricted ? restrictedEntityFlag : entityFlag;
while ( *q ) {
- if ( *q < ENTITY_RANGE ) {
+ // Remember, char is sometimes signed. (How many times has that bitten me?)
+ if ( *q > 0 && *q < ENTITY_RANGE ) {
// Check for entities. If one is found, flush
// the stream up until the entity, write the
// entity, and keep looking.
@@ -1143,6 +1317,18 @@
}
+void XMLStreamer::PushHeader( bool writeBOM, bool writeDec )
+{
+ static const unsigned char bom[] = { TIXML_UTF_LEAD_0, TIXML_UTF_LEAD_1, TIXML_UTF_LEAD_2, 0 };
+ if ( writeBOM ) {
+ Print( "%s", bom );
+ }
+ if ( writeDec ) {
+ PushDeclaration( "xml version=\"1.0\"" );
+ }
+}
+
+
void XMLStreamer::OpenElement( const char* name )
{
if ( elementJustOpened ) {
@@ -1262,6 +1448,15 @@
}
+bool XMLStreamer::VisitEnter( const XMLDocument& doc )
+{
+ if ( doc.HasBOM() ) {
+ PushHeader( true, false );
+ }
+ return true;
+}
+
+
bool XMLStreamer::VisitEnter( const XMLElement& element, const XMLAttribute* attribute )
{
OpenElement( element.Name() );
diff --git a/tinyxml2.h b/tinyxml2.h
index 4792b09..a1f7603 100644
--- a/tinyxml2.h
+++ b/tinyxml2.h
@@ -371,6 +371,12 @@
inline static int IsUTF8Continuation( unsigned char p ) { return p & 0x80; }
inline static int IsAlphaNum( unsigned char anyByte ) { return ( anyByte < 128 ) ? isalnum( anyByte ) : 1; }
inline static int IsAlpha( unsigned char anyByte ) { return ( anyByte < 128 ) ? isalpha( anyByte ) : 1; }
+
+ static const char* ReadBOM( const char* p, bool* hasBOM );
+ // p is the starting location,
+ // the UTF-8 value of the entity will be placed in value, and length filled in.
+ static const char* GetCharacterRef( const char* p, char* value, int* length );
+ static void ConvertUTF32ToUTF8( unsigned long input, char* output, int* length );
};
@@ -567,9 +573,21 @@
enum {
- ATTRIBUTE_SUCCESS,
+ XML_NO_ERROR = 0,
+
NO_ATTRIBUTE,
- WRONG_ATTRIBUTE_TYPE
+ WRONG_ATTRIBUTE_TYPE,
+
+ ERROR_FILE_NOT_FOUND,
+ ERROR_ELEMENT_MISMATCH,
+ ERROR_PARSING_ELEMENT,
+ ERROR_PARSING_ATTRIBUTE,
+ ERROR_IDENTIFYING_TAG,
+ ERROR_PARSING_TEXT,
+ ERROR_PARSING_CDATA,
+ ERROR_PARSING_COMMENT,
+ ERROR_PARSING_DECLARATION,
+ ERROR_PARSING_UNKNOWN
};
@@ -691,9 +709,11 @@
virtual const XMLDocument* ToDocument() const { return this; }
int Parse( const char* xml );
- int Load( const char* filename );
- int Load( FILE* );
- void Save( const char* filename );
+ int LoadFile( const char* filename );
+ int LoadFile( FILE* );
+ void SaveFile( const char* filename );
+
+ bool HasBOM() const { return writeBOM; }
void Print( XMLStreamer* streamer=0 );
virtual bool Accept( XMLVisitor* visitor ) const;
@@ -716,24 +736,10 @@
*/
void DeleteNode( XMLNode* node ) { node->parent->DeleteChild( node ); }
- enum {
- NO_ERROR = 0,
- ERROR_FILE_NOT_FOUND,
- ERROR_ELEMENT_MISMATCH,
- ERROR_PARSING_ELEMENT,
- ERROR_PARSING_ATTRIBUTE,
- ERROR_IDENTIFYING_TAG,
- ERROR_PARSING_TEXT,
- ERROR_PARSING_CDATA,
- ERROR_PARSING_COMMENT,
- ERROR_PARSING_DECLARATION,
- ERROR_PARSING_UNKNOWN
-
- };
void SetError( int error, const char* str1, const char* str2 );
- bool Error() const { return errorID != NO_ERROR; }
- int GetErrorID() const { return errorID; }
+ bool Error() const { return errorID != XML_NO_ERROR; }
+ int ErrorID() const { return errorID; }
const char* GetErrorStr1() const { return errorStr1; }
const char* GetErrorStr2() const { return errorStr2; }
void PrintError() const;
@@ -745,6 +751,7 @@
void operator=( const XMLDocument& ); // not supported
void InitDocument();
+ bool writeBOM;
int errorID;
const char* errorStr1;
const char* errorStr2;
@@ -763,6 +770,7 @@
XMLStreamer( FILE* file=0 );
~XMLStreamer() {}
+ void PushHeader( bool writeBOM, bool writeDeclaration );
void OpenElement( const char* name );
void PushAttribute( const char* name, const char* value );
void CloseElement();
@@ -772,7 +780,7 @@
void PushDeclaration( const char* value );
void PushUnknown( const char* value );
- virtual bool VisitEnter( const XMLDocument& /*doc*/ ) { return true; }
+ virtual bool VisitEnter( const XMLDocument& /*doc*/ );
virtual bool VisitExit( const XMLDocument& /*doc*/ ) { return true; }
virtual bool VisitEnter( const XMLElement& element, const XMLAttribute* attribute );
diff --git a/xmltest.cpp b/xmltest.cpp
index c025841..95f9738 100644
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -86,6 +86,18 @@
}
+void NullLineEndings( char* p )
+{
+ while( p && *p ) {
+ if ( *p == '\n' || *p == '\r' ) {
+ *p = 0;
+ return;
+ }
+ ++p;
+ }
+}
+
+
int main( int argc, const char* argv )
{
#if defined( WIN32 )
@@ -150,6 +162,7 @@
}
#endif
{
+#if 0
// Test: Programmatic DOM
// Build:
// <element>
@@ -198,17 +211,19 @@
printf( "%s", streamer.CStr() );
delete doc;
+#endif
}
#endif
{
+#if 0
// Test: Dream
// XML1 : 1,187,569 bytes in 31,209 allocations
// XML2 : 469,073 bytes in 323 allocations
//int newStart = gNew;
XMLDocument doc;
- doc.Load( "dream.xml" );
+ doc.LoadFile( "dream.xml" );
- doc.Save( "dreamout.xml" );
+ doc.SaveFile( "dreamout.xml" );
doc.PrintError();
XMLTest( "Dream", "xml version=\"1.0\"",
@@ -222,7 +237,7 @@
doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText() );
XMLDocument doc2;
- doc2.Load( "dreamout.xml" );
+ doc2.LoadFile( "dreamout.xml" );
XMLTest( "Dream-out", "xml version=\"1.0\"",
doc2.FirstChild()->ToDeclaration()->Value() );
XMLTest( "Dream-out", true, doc2.FirstChild()->NextSibling()->ToUnknown() ? true : false );
@@ -231,10 +246,132 @@
XMLTest( "Dream-out", "And Robin shall restore amends.",
doc2.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText() );
+#endif
//gNewTotal = gNew - newStart;
}
- #if defined( WIN32 )
+#if 0
+ {
+ const char* error = "<?xml version=\"1.0\" standalone=\"no\" ?>\n"
+ "<passages count=\"006\" formatversion=\"20020620\">\n"
+ " <wrong error>\n"
+ "</passages>";
+
+ XMLDocument doc;
+ doc.Parse( error );
+ XMLTest( "Bad XML", doc.ErrorID(), ERROR_PARSING_ATTRIBUTE );
+ }
+
+ {
+ const char* str = "<doc attr0='1' attr1='2.0' attr2='foo' />";
+
+ XMLDocument doc;
+ doc.Parse( str );
+
+ XMLElement* ele = doc.FirstChildElement();
+
+ int iVal, result;
+ double dVal;
+
+ result = ele->QueryDoubleAttribute( "attr0", &dVal );
+ XMLTest( "Query attribute: int as double", result, XML_NO_ERROR );
+ XMLTest( "Query attribute: int as double", (int)dVal, 1 );
+ result = ele->QueryDoubleAttribute( "attr1", &dVal );
+ XMLTest( "Query attribute: double as double", (int)dVal, 2 );
+ result = ele->QueryIntAttribute( "attr1", &iVal );
+ XMLTest( "Query attribute: double as int", result, XML_NO_ERROR );
+ XMLTest( "Query attribute: double as int", iVal, 2 );
+ result = ele->QueryIntAttribute( "attr2", &iVal );
+ XMLTest( "Query attribute: not a number", result, WRONG_ATTRIBUTE_TYPE );
+ result = ele->QueryIntAttribute( "bar", &iVal );
+ XMLTest( "Query attribute: does not exist", result, NO_ATTRIBUTE );
+ }
+
+ {
+ const char* str = "<doc/>";
+
+ XMLDocument doc;
+ doc.Parse( str );
+
+ XMLElement* ele = doc.FirstChildElement();
+
+ int iVal;
+ double dVal;
+
+ ele->SetAttribute( "str", "strValue" );
+ ele->SetAttribute( "int", 1 );
+ ele->SetAttribute( "double", -1.0 );
+
+ const char* cStr = ele->Attribute( "str" );
+ ele->QueryIntAttribute( "int", &iVal );
+ ele->QueryDoubleAttribute( "double", &dVal );
+
+ XMLTest( "Attribute round trip. c-string.", "strValue", cStr );
+ XMLTest( "Attribute round trip. int.", 1, iVal );
+ XMLTest( "Attribute round trip. double.", -1, (int)dVal );
+ }
+
+#endif
+ {
+ XMLDocument doc;
+ doc.LoadFile( "utf8test.xml" );
+
+ // Get the attribute "value" from the "Russian" element and check it.
+ XMLElement* element = doc.FirstChildElement( "document" )->FirstChildElement( "Russian" );
+ const unsigned char correctValue[] = { 0xd1U, 0x86U, 0xd0U, 0xb5U, 0xd0U, 0xbdU, 0xd0U, 0xbdU,
+ 0xd0U, 0xbeU, 0xd1U, 0x81U, 0xd1U, 0x82U, 0xd1U, 0x8cU, 0 };
+
+ XMLTest( "UTF-8: Russian value.", (const char*)correctValue, element->Attribute( "value" ) );
+
+ const unsigned char russianElementName[] = { 0xd0U, 0xa0U, 0xd1U, 0x83U,
+ 0xd1U, 0x81U, 0xd1U, 0x81U,
+ 0xd0U, 0xbaU, 0xd0U, 0xb8U,
+ 0xd0U, 0xb9U, 0 };
+ const char russianText[] = "<\xD0\xB8\xD0\xBC\xD0\xB5\xD0\xB5\xD1\x82>";
+
+ XMLText* text = doc.FirstChildElement( "document" )->FirstChildElement( (const char*) russianElementName )->FirstChild()->ToText();
+ XMLTest( "UTF-8: Browsing russian element name.",
+ russianText,
+ text->Value() );
+
+ // Now try for a round trip.
+ doc.SaveFile( "utf8testout.xml" );
+
+ // Check the round trip.
+ char savedBuf[256];
+ char verifyBuf[256];
+ int okay = 0;
+
+ FILE* saved = fopen( "utf8testout.xml", "r" );
+ FILE* verify = fopen( "utf8testverify.xml", "r" );
+
+ if ( saved && verify )
+ {
+ okay = 1;
+ while ( fgets( verifyBuf, 256, verify ) )
+ {
+ fgets( savedBuf, 256, saved );
+ NullLineEndings( verifyBuf );
+ NullLineEndings( savedBuf );
+
+ if ( strcmp( verifyBuf, savedBuf ) )
+ {
+ printf( "verify:%s<\n", verifyBuf );
+ printf( "saved :%s<\n", savedBuf );
+ okay = 0;
+ break;
+ }
+ }
+ }
+ if ( saved )
+ fclose( saved );
+ if ( verify )
+ fclose( verify );
+ XMLTest( "UTF-8: Verified multi-language round trip.", 1, okay );
+ }
+
+
+#if defined( WIN32 )
_CrtMemCheckpoint( &endMemState );
//_CrtMemDumpStatistics( &endMemState );