blob: 67d0a9e5429ea0ba33d0bd8f62ec0f33b09c4d53 [file] [log] [blame]
The Android Open Source Project562be062009-03-03 19:30:48 -08001/*
2www.sourceforge.net/projects/tinyxml
Dan Albert0238a202014-08-22 00:52:41 +00003Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
The Android Open Source Project562be062009-03-03 19:30:48 -08004
5This software is provided 'as-is', without any express or implied
6warranty. In no event will the authors be held liable for any
7damages arising from the use of this software.
8
9Permission is granted to anyone to use this software for any
10purpose, including commercial applications, and to alter it and
11redistribute it freely, subject to the following restrictions:
12
131. The origin of this software must not be misrepresented; you must
14not claim that you wrote the original software. If you use this
15software in a product, an acknowledgment in the product documentation
16would be appreciated but is not required.
17
182. Altered source versions must be plainly marked as such, and
19must not be misrepresented as being the original software.
20
213. This notice may not be removed or altered from any source
22distribution.
23*/
24
Dan Albert0238a202014-08-22 00:52:41 +000025#include "tinyxml.h"
The Android Open Source Project562be062009-03-03 19:30:48 -080026#include <ctype.h>
27#include <stddef.h>
28
29//#define DEBUG_PARSER
30
31// Note tha "PutString" hardcodes the same list. This
32// is less flexible than it appears. Changing the entries
33// or order will break putstring.
Dan Albert0238a202014-08-22 00:52:41 +000034TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
The Android Open Source Project562be062009-03-03 19:30:48 -080035{
36 { "&amp;", 5, '&' },
37 { "&lt;", 4, '<' },
38 { "&gt;", 4, '>' },
39 { "&quot;", 6, '\"' },
40 { "&apos;", 6, '\'' }
41};
42
43// Bunch of unicode info at:
44// http://www.unicode.org/faq/utf_bom.html
45// Including the basic of this table, which determines the #bytes in the
46// sequence from the lead byte. 1 placed for invalid sequences --
47// although the result will be junk, pass it through as much as possible.
48// Beware of the non-characters in UTF-8:
49// ef bb bf (Microsoft "lead bytes")
50// ef bf be
51// ef bf bf
52
53const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
54const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
55const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
56
57const int TiXmlBase::utf8ByteTable[256] =
58{
59 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
72 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
73 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
74 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
75 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
76};
77
78
79void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
80{
81 const unsigned long BYTE_MASK = 0xBF;
82 const unsigned long BYTE_MARK = 0x80;
83 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
84
85 if (input < 0x80)
86 *length = 1;
87 else if ( input < 0x800 )
88 *length = 2;
89 else if ( input < 0x10000 )
90 *length = 3;
91 else if ( input < 0x200000 )
92 *length = 4;
93 else
94 { *length = 0; return; } // This code won't covert this correctly anyway.
95
96 output += *length;
97
98 // Scary scary fall throughs.
99 switch (*length)
100 {
101 case 4:
102 --output;
103 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
104 input >>= 6;
105 case 3:
106 --output;
107 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
108 input >>= 6;
109 case 2:
110 --output;
111 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
112 input >>= 6;
113 case 1:
114 --output;
115 *output = (char)(input | FIRST_BYTE_MARK[*length]);
116 }
117}
118
119
120/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
121{
122 // This will only work for low-ascii, everything else is assumed to be a valid
123 // letter. I'm not sure this is the best approach, but it is quite tricky trying
124 // to figure out alhabetical vs. not across encoding. So take a very
125 // conservative approach.
126
127// if ( encoding == TIXML_ENCODING_UTF8 )
128// {
129 if ( anyByte < 127 )
130 return isalpha( anyByte );
131 else
132 return 1; // What else to do? The unicode set is huge...get the english ones right.
133// }
134// else
135// {
136// return isalpha( anyByte );
137// }
138}
139
140
141/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
142{
143 // This will only work for low-ascii, everything else is assumed to be a valid
144 // letter. I'm not sure this is the best approach, but it is quite tricky trying
145 // to figure out alhabetical vs. not across encoding. So take a very
146 // conservative approach.
147
148// if ( encoding == TIXML_ENCODING_UTF8 )
149// {
150 if ( anyByte < 127 )
151 return isalnum( anyByte );
152 else
153 return 1; // What else to do? The unicode set is huge...get the english ones right.
154// }
155// else
156// {
157// return isalnum( anyByte );
158// }
159}
160
161
162class TiXmlParsingData
163{
164 friend class TiXmlDocument;
165 public:
166 void Stamp( const char* now, TiXmlEncoding encoding );
167
Dan Albert0238a202014-08-22 00:52:41 +0000168 const TiXmlCursor& Cursor() { return cursor; }
The Android Open Source Project562be062009-03-03 19:30:48 -0800169
170 private:
171 // Only used by the document!
172 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
173 {
174 assert( start );
175 stamp = start;
176 tabsize = _tabsize;
177 cursor.row = row;
178 cursor.col = col;
179 }
180
181 TiXmlCursor cursor;
182 const char* stamp;
183 int tabsize;
184};
185
186
187void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
188{
189 assert( now );
190
191 // Do nothing if the tabsize is 0.
192 if ( tabsize < 1 )
193 {
194 return;
195 }
196
197 // Get the current row, column.
198 int row = cursor.row;
199 int col = cursor.col;
200 const char* p = stamp;
201 assert( p );
202
203 while ( p < now )
204 {
205 // Treat p as unsigned, so we have a happy compiler.
206 const unsigned char* pU = (const unsigned char*)p;
207
208 // Code contributed by Fletcher Dunn: (modified by lee)
209 switch (*pU) {
210 case 0:
211 // We *should* never get here, but in case we do, don't
212 // advance past the terminating null character, ever
213 return;
214
215 case '\r':
216 // bump down to the next line
217 ++row;
218 col = 0;
219 // Eat the character
220 ++p;
221
222 // Check for \r\n sequence, and treat this as a single character
223 if (*p == '\n') {
224 ++p;
225 }
226 break;
227
228 case '\n':
229 // bump down to the next line
230 ++row;
231 col = 0;
232
233 // Eat the character
234 ++p;
235
236 // Check for \n\r sequence, and treat this as a single
237 // character. (Yes, this bizarre thing does occur still
238 // on some arcane platforms...)
239 if (*p == '\r') {
240 ++p;
241 }
242 break;
243
244 case '\t':
245 // Eat the character
246 ++p;
247
248 // Skip to next tab stop
249 col = (col / tabsize + 1) * tabsize;
250 break;
251
252 case TIXML_UTF_LEAD_0:
253 if ( encoding == TIXML_ENCODING_UTF8 )
254 {
255 if ( *(p+1) && *(p+2) )
256 {
257 // In these cases, don't advance the column. These are
258 // 0-width spaces.
259 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
260 p += 3;
261 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
262 p += 3;
263 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
264 p += 3;
265 else
266 { p +=3; ++col; } // A normal character.
267 }
268 }
269 else
270 {
271 ++p;
272 ++col;
273 }
274 break;
275
276 default:
277 if ( encoding == TIXML_ENCODING_UTF8 )
278 {
279 // Eat the 1 to 4 byte utf8 character.
Dan Albert0238a202014-08-22 00:52:41 +0000280 int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
The Android Open Source Project562be062009-03-03 19:30:48 -0800281 if ( step == 0 )
282 step = 1; // Error case from bad encoding, but handle gracefully.
283 p += step;
284
285 // Just advance one column, of course.
286 ++col;
287 }
288 else
289 {
290 ++p;
291 ++col;
292 }
293 break;
294 }
295 }
296 cursor.row = row;
297 cursor.col = col;
298 assert( cursor.row >= -1 );
299 assert( cursor.col >= -1 );
300 stamp = p;
301 assert( stamp );
302}
303
304
305const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
306{
307 if ( !p || !*p )
308 {
309 return 0;
310 }
311 if ( encoding == TIXML_ENCODING_UTF8 )
312 {
313 while ( *p )
314 {
315 const unsigned char* pU = (const unsigned char*)p;
316
317 // Skip the stupid Microsoft UTF-8 Byte order marks
318 if ( *(pU+0)==TIXML_UTF_LEAD_0
319 && *(pU+1)==TIXML_UTF_LEAD_1
320 && *(pU+2)==TIXML_UTF_LEAD_2 )
321 {
322 p += 3;
323 continue;
324 }
325 else if(*(pU+0)==TIXML_UTF_LEAD_0
326 && *(pU+1)==0xbfU
327 && *(pU+2)==0xbeU )
328 {
329 p += 3;
330 continue;
331 }
332 else if(*(pU+0)==TIXML_UTF_LEAD_0
333 && *(pU+1)==0xbfU
334 && *(pU+2)==0xbfU )
335 {
336 p += 3;
337 continue;
338 }
339
Dan Albert0238a202014-08-22 00:52:41 +0000340 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space.
The Android Open Source Project562be062009-03-03 19:30:48 -0800341 ++p;
342 else
343 break;
344 }
345 }
346 else
347 {
Dan Albert0238a202014-08-22 00:52:41 +0000348 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
The Android Open Source Project562be062009-03-03 19:30:48 -0800349 ++p;
350 }
351
352 return p;
353}
354
355#ifdef TIXML_USE_STL
Dan Albert0238a202014-08-22 00:52:41 +0000356/*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
The Android Open Source Project562be062009-03-03 19:30:48 -0800357{
358 for( ;; )
359 {
360 if ( !in->good() ) return false;
361
362 int c = in->peek();
363 // At this scope, we can't get to a document. So fail silently.
364 if ( !IsWhiteSpace( c ) || c <= 0 )
365 return true;
366
367 *tag += (char) in->get();
368 }
369}
370
Dan Albert0238a202014-08-22 00:52:41 +0000371/*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
The Android Open Source Project562be062009-03-03 19:30:48 -0800372{
373 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
374 while ( in->good() )
375 {
376 int c = in->peek();
377 if ( c == character )
378 return true;
379 if ( c <= 0 ) // Silent failure: can't get document at this scope
380 return false;
381
382 in->get();
383 *tag += (char) c;
384 }
385 return false;
386}
387#endif
388
389const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
390{
391 *name = "";
392 assert( p );
393
394 // Names start with letters or underscores.
395 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
396 // algorithm is generous.
397 //
398 // After that, they can be letters, underscores, numbers,
399 // hyphens, or colons. (Colons are valid ony for namespaces,
400 // but tinyxml can't tell namespaces from names.)
401 if ( p && *p
402 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
403 {
404 while( p && *p
405 && ( IsAlphaNum( (unsigned char ) *p, encoding )
406 || *p == '_'
407 || *p == '-'
408 || *p == '.'
409 || *p == ':' ) )
410 {
Dan Albert0238a202014-08-22 00:52:41 +0000411 (*name) += *p;
The Android Open Source Project562be062009-03-03 19:30:48 -0800412 ++p;
413 }
414 return p;
415 }
416 return 0;
417}
418
419const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
420{
421 // Presume an entity, and pull it out.
422 TIXML_STRING ent;
423 int i;
424 *length = 0;
425
426 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
427 {
428 unsigned long ucs = 0;
429 ptrdiff_t delta = 0;
430 unsigned mult = 1;
431
432 if ( *(p+2) == 'x' )
433 {
434 // Hexadecimal.
435 if ( !*(p+3) ) return 0;
436
437 const char* q = p+3;
438 q = strchr( q, ';' );
439
440 if ( !q || !*q ) return 0;
441
442 delta = q-p;
443 --q;
444
445 while ( *q != 'x' )
446 {
447 if ( *q >= '0' && *q <= '9' )
448 ucs += mult * (*q - '0');
449 else if ( *q >= 'a' && *q <= 'f' )
450 ucs += mult * (*q - 'a' + 10);
451 else if ( *q >= 'A' && *q <= 'F' )
452 ucs += mult * (*q - 'A' + 10 );
453 else
454 return 0;
455 mult *= 16;
456 --q;
457 }
458 }
459 else
460 {
461 // Decimal.
462 if ( !*(p+2) ) return 0;
463
464 const char* q = p+2;
465 q = strchr( q, ';' );
466
467 if ( !q || !*q ) return 0;
468
469 delta = q-p;
470 --q;
471
472 while ( *q != '#' )
473 {
474 if ( *q >= '0' && *q <= '9' )
475 ucs += mult * (*q - '0');
476 else
477 return 0;
478 mult *= 10;
479 --q;
480 }
481 }
482 if ( encoding == TIXML_ENCODING_UTF8 )
483 {
484 // convert the UCS to UTF-8
485 ConvertUTF32ToUTF8( ucs, value, length );
486 }
487 else
488 {
489 *value = (char)ucs;
490 *length = 1;
491 }
492 return p + delta + 1;
493 }
494
495 // Now try to match it.
496 for( i=0; i<NUM_ENTITY; ++i )
497 {
498 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
499 {
500 assert( strlen( entity[i].str ) == entity[i].strLength );
501 *value = entity[i].chr;
502 *length = 1;
503 return ( p + entity[i].strLength );
504 }
505 }
506
507 // So it wasn't an entity, its unrecognized, or something like that.
508 *value = *p; // Don't put back the last one, since we return it!
509 return p+1;
510}
511
512
513bool TiXmlBase::StringEqual( const char* p,
514 const char* tag,
515 bool ignoreCase,
516 TiXmlEncoding encoding )
517{
518 assert( p );
519 assert( tag );
520 if ( !p || !*p )
521 {
522 assert( 0 );
523 return false;
524 }
525
526 const char* q = p;
527
528 if ( ignoreCase )
529 {
530 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
531 {
532 ++q;
533 ++tag;
534 }
535
536 if ( *tag == 0 )
537 return true;
538 }
539 else
540 {
541 while ( *q && *tag && *q == *tag )
542 {
543 ++q;
544 ++tag;
545 }
546
547 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
548 return true;
549 }
550 return false;
551}
552
553const char* TiXmlBase::ReadText( const char* p,
554 TIXML_STRING * text,
555 bool trimWhiteSpace,
556 const char* endTag,
557 bool caseInsensitive,
558 TiXmlEncoding encoding )
559{
560 *text = "";
561 if ( !trimWhiteSpace // certain tags always keep whitespace
562 || !condenseWhiteSpace ) // if true, whitespace is always kept
563 {
564 // Keep all the white space.
565 while ( p && *p
566 && !StringEqual( p, endTag, caseInsensitive, encoding )
567 )
568 {
569 int len;
570 char cArr[4] = { 0, 0, 0, 0 };
571 p = GetChar( p, cArr, &len, encoding );
572 text->append( cArr, len );
573 }
574 }
575 else
576 {
577 bool whitespace = false;
578
579 // Remove leading white space:
580 p = SkipWhiteSpace( p, encoding );
581 while ( p && *p
582 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
583 {
584 if ( *p == '\r' || *p == '\n' )
585 {
586 whitespace = true;
587 ++p;
588 }
589 else if ( IsWhiteSpace( *p ) )
590 {
591 whitespace = true;
592 ++p;
593 }
594 else
595 {
596 // If we've found whitespace, add it before the
597 // new character. Any whitespace just becomes a space.
598 if ( whitespace )
599 {
600 (*text) += ' ';
601 whitespace = false;
602 }
603 int len;
604 char cArr[4] = { 0, 0, 0, 0 };
605 p = GetChar( p, cArr, &len, encoding );
606 if ( len == 1 )
607 (*text) += cArr[0]; // more efficient
608 else
609 text->append( cArr, len );
610 }
611 }
612 }
Dan Albert0238a202014-08-22 00:52:41 +0000613 return p + strlen( endTag );
The Android Open Source Project562be062009-03-03 19:30:48 -0800614}
615
616#ifdef TIXML_USE_STL
617
Dan Albert0238a202014-08-22 00:52:41 +0000618void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
The Android Open Source Project562be062009-03-03 19:30:48 -0800619{
620 // The basic issue with a document is that we don't know what we're
621 // streaming. Read something presumed to be a tag (and hope), then
622 // identify it, and call the appropriate stream method on the tag.
623 //
624 // This "pre-streaming" will never read the closing ">" so the
625 // sub-tag can orient itself.
626
627 if ( !StreamTo( in, '<', tag ) )
628 {
629 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
630 return;
631 }
632
633 while ( in->good() )
634 {
635 int tagIndex = (int) tag->length();
636 while ( in->good() && in->peek() != '>' )
637 {
638 int c = in->get();
639 if ( c <= 0 )
640 {
641 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
642 break;
643 }
644 (*tag) += (char) c;
645 }
646
647 if ( in->good() )
648 {
649 // We now have something we presume to be a node of
650 // some sort. Identify it, and call the node to
651 // continue streaming.
652 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
653
654 if ( node )
655 {
656 node->StreamIn( in, tag );
657 bool isElement = node->ToElement() != 0;
658 delete node;
659 node = 0;
660
661 // If this is the root element, we're done. Parsing will be
662 // done by the >> operator.
663 if ( isElement )
664 {
665 return;
666 }
667 }
668 else
669 {
670 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
671 return;
672 }
673 }
674 }
675 // We should have returned sooner.
676 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
677}
678
679#endif
680
681const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
682{
683 ClearError();
684
685 // Parse away, at the document level. Since a document
686 // contains nothing but other tags, most of what happens
687 // here is skipping white space.
688 if ( !p || !*p )
689 {
690 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
691 return 0;
692 }
693
694 // Note that, for a document, this needs to come
695 // before the while space skip, so that parsing
696 // starts from the pointer we are given.
697 location.Clear();
698 if ( prevData )
699 {
700 location.row = prevData->cursor.row;
701 location.col = prevData->cursor.col;
702 }
703 else
704 {
705 location.row = 0;
706 location.col = 0;
707 }
708 TiXmlParsingData data( p, TabSize(), location.row, location.col );
709 location = data.Cursor();
710
711 if ( encoding == TIXML_ENCODING_UNKNOWN )
712 {
713 // Check for the Microsoft UTF-8 lead bytes.
714 const unsigned char* pU = (const unsigned char*)p;
715 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
716 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
717 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
718 {
719 encoding = TIXML_ENCODING_UTF8;
720 useMicrosoftBOM = true;
721 }
722 }
723
724 p = SkipWhiteSpace( p, encoding );
725 if ( !p )
726 {
727 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
728 return 0;
729 }
730
731 while ( p && *p )
732 {
733 TiXmlNode* node = Identify( p, encoding );
734 if ( node )
735 {
736 p = node->Parse( p, &data, encoding );
737 LinkEndChild( node );
738 }
739 else
740 {
741 break;
742 }
743
744 // Did we get encoding info?
745 if ( encoding == TIXML_ENCODING_UNKNOWN
746 && node->ToDeclaration() )
747 {
748 TiXmlDeclaration* dec = node->ToDeclaration();
749 const char* enc = dec->Encoding();
750 assert( enc );
751
752 if ( *enc == 0 )
753 encoding = TIXML_ENCODING_UTF8;
754 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
755 encoding = TIXML_ENCODING_UTF8;
756 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
757 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
758 else
759 encoding = TIXML_ENCODING_LEGACY;
760 }
761
762 p = SkipWhiteSpace( p, encoding );
763 }
764
765 // Was this empty?
766 if ( !firstChild ) {
767 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
768 return 0;
769 }
770
771 // All is well.
772 return p;
773}
774
775void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
776{
777 // The first error in a chain is more accurate - don't set again!
778 if ( error )
779 return;
780
781 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
782 error = true;
783 errorId = err;
784 errorDesc = errorString[ errorId ];
785
786 errorLocation.Clear();
787 if ( pError && data )
788 {
789 data->Stamp( pError, encoding );
790 errorLocation = data->Cursor();
791 }
792}
793
794
795TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
796{
797 TiXmlNode* returnNode = 0;
798
799 p = SkipWhiteSpace( p, encoding );
800 if( !p || !*p || *p != '<' )
801 {
802 return 0;
803 }
804
Dan Albert0238a202014-08-22 00:52:41 +0000805 TiXmlDocument* doc = GetDocument();
The Android Open Source Project562be062009-03-03 19:30:48 -0800806 p = SkipWhiteSpace( p, encoding );
807
808 if ( !p || !*p )
809 {
810 return 0;
811 }
812
813 // What is this thing?
814 // - Elements start with a letter or underscore, but xml is reserved.
815 // - Comments: <!--
816 // - Decleration: <?xml
817 // - Everthing else is unknown to tinyxml.
818 //
819
820 const char* xmlHeader = { "<?xml" };
821 const char* commentHeader = { "<!--" };
822 const char* dtdHeader = { "<!" };
823 const char* cdataHeader = { "<![CDATA[" };
824
825 if ( StringEqual( p, xmlHeader, true, encoding ) )
826 {
827 #ifdef DEBUG_PARSER
828 TIXML_LOG( "XML parsing Declaration\n" );
829 #endif
830 returnNode = new TiXmlDeclaration();
831 }
832 else if ( StringEqual( p, commentHeader, false, encoding ) )
833 {
834 #ifdef DEBUG_PARSER
835 TIXML_LOG( "XML parsing Comment\n" );
836 #endif
837 returnNode = new TiXmlComment();
838 }
839 else if ( StringEqual( p, cdataHeader, false, encoding ) )
840 {
841 #ifdef DEBUG_PARSER
842 TIXML_LOG( "XML parsing CDATA\n" );
843 #endif
844 TiXmlText* text = new TiXmlText( "" );
845 text->SetCDATA( true );
846 returnNode = text;
847 }
848 else if ( StringEqual( p, dtdHeader, false, encoding ) )
849 {
850 #ifdef DEBUG_PARSER
851 TIXML_LOG( "XML parsing Unknown(1)\n" );
852 #endif
853 returnNode = new TiXmlUnknown();
854 }
855 else if ( IsAlpha( *(p+1), encoding )
856 || *(p+1) == '_' )
857 {
858 #ifdef DEBUG_PARSER
859 TIXML_LOG( "XML parsing Element\n" );
860 #endif
861 returnNode = new TiXmlElement( "" );
862 }
863 else
864 {
865 #ifdef DEBUG_PARSER
866 TIXML_LOG( "XML parsing Unknown(2)\n" );
867 #endif
868 returnNode = new TiXmlUnknown();
869 }
870
871 if ( returnNode )
872 {
873 // Set the parent, so it can report errors
874 returnNode->parent = this;
875 }
Dan Albert0238a202014-08-22 00:52:41 +0000876 else
877 {
878 if ( doc )
879 doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
880 }
The Android Open Source Project562be062009-03-03 19:30:48 -0800881 return returnNode;
882}
883
884#ifdef TIXML_USE_STL
885
Dan Albert0238a202014-08-22 00:52:41 +0000886void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
The Android Open Source Project562be062009-03-03 19:30:48 -0800887{
888 // We're called with some amount of pre-parsing. That is, some of "this"
889 // element is in "tag". Go ahead and stream to the closing ">"
890 while( in->good() )
891 {
892 int c = in->get();
893 if ( c <= 0 )
894 {
895 TiXmlDocument* document = GetDocument();
896 if ( document )
897 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
898 return;
899 }
900 (*tag) += (char) c ;
901
902 if ( c == '>' )
903 break;
904 }
905
906 if ( tag->length() < 3 ) return;
907
908 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
909 // If not, identify and stream.
910
911 if ( tag->at( tag->length() - 1 ) == '>'
912 && tag->at( tag->length() - 2 ) == '/' )
913 {
914 // All good!
915 return;
916 }
917 else if ( tag->at( tag->length() - 1 ) == '>' )
918 {
919 // There is more. Could be:
920 // text
921 // closing tag
922 // another node.
923 for ( ;; )
924 {
925 StreamWhiteSpace( in, tag );
926
927 // Do we have text?
928 if ( in->good() && in->peek() != '<' )
929 {
930 // Yep, text.
931 TiXmlText text( "" );
932 text.StreamIn( in, tag );
933
934 // What follows text is a closing tag or another node.
935 // Go around again and figure it out.
936 continue;
937 }
938
939 // We now have either a closing tag...or another node.
940 // We should be at a "<", regardless.
941 if ( !in->good() ) return;
942 assert( in->peek() == '<' );
943 int tagIndex = (int) tag->length();
944
945 bool closingTag = false;
946 bool firstCharFound = false;
947
948 for( ;; )
949 {
950 if ( !in->good() )
951 return;
952
953 int c = in->peek();
954 if ( c <= 0 )
955 {
956 TiXmlDocument* document = GetDocument();
957 if ( document )
958 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
959 return;
960 }
961
962 if ( c == '>' )
963 break;
964
965 *tag += (char) c;
966 in->get();
967
968 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
969 {
970 firstCharFound = true;
971 if ( c == '/' )
972 closingTag = true;
973 }
974 }
975 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
976 // If it was not, the streaming will be done by the tag.
977 if ( closingTag )
978 {
979 if ( !in->good() )
980 return;
981
982 int c = in->get();
983 if ( c <= 0 )
984 {
985 TiXmlDocument* document = GetDocument();
986 if ( document )
987 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
988 return;
989 }
990 assert( c == '>' );
991 *tag += (char) c;
992
993 // We are done, once we've found our closing tag.
994 return;
995 }
996 else
997 {
998 // If not a closing tag, id it, and stream.
999 const char* tagloc = tag->c_str() + tagIndex;
1000 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1001 if ( !node )
1002 return;
1003 node->StreamIn( in, tag );
1004 delete node;
1005 node = 0;
1006
1007 // No return: go around from the beginning: text, closing tag, or node.
1008 }
1009 }
1010 }
1011}
1012#endif
1013
1014const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1015{
1016 p = SkipWhiteSpace( p, encoding );
1017 TiXmlDocument* document = GetDocument();
1018
1019 if ( !p || !*p )
1020 {
1021 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1022 return 0;
1023 }
1024
1025 if ( data )
1026 {
1027 data->Stamp( p, encoding );
1028 location = data->Cursor();
1029 }
1030
1031 if ( *p != '<' )
1032 {
1033 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1034 return 0;
1035 }
1036
1037 p = SkipWhiteSpace( p+1, encoding );
1038
1039 // Read the name.
1040 const char* pErr = p;
1041
1042 p = ReadName( p, &value, encoding );
1043 if ( !p || !*p )
1044 {
1045 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1046 return 0;
1047 }
1048
1049 TIXML_STRING endTag ("</");
1050 endTag += value;
Dan Albert0238a202014-08-22 00:52:41 +00001051 endTag += ">";
The Android Open Source Project562be062009-03-03 19:30:48 -08001052
1053 // Check for and read attributes. Also look for an empty
1054 // tag or an end tag.
1055 while ( p && *p )
1056 {
1057 pErr = p;
1058 p = SkipWhiteSpace( p, encoding );
1059 if ( !p || !*p )
1060 {
1061 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1062 return 0;
1063 }
1064 if ( *p == '/' )
1065 {
1066 ++p;
1067 // Empty tag.
1068 if ( *p != '>' )
1069 {
1070 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1071 return 0;
1072 }
1073 return (p+1);
1074 }
1075 else if ( *p == '>' )
1076 {
1077 // Done with attributes (if there were any.)
1078 // Read the value -- which can include other
1079 // elements -- read the end tag, and return.
1080 ++p;
1081 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
Dan Albert0238a202014-08-22 00:52:41 +00001082 if ( !p || !*p )
The Android Open Source Project562be062009-03-03 19:30:48 -08001083 return 0;
1084
1085 // We should find the end tag now
1086 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1087 {
1088 p += endTag.length();
Dan Albert0238a202014-08-22 00:52:41 +00001089 return p;
The Android Open Source Project562be062009-03-03 19:30:48 -08001090 }
1091 else
1092 {
1093 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1094 return 0;
1095 }
1096 }
1097 else
1098 {
1099 // Try to read an attribute:
1100 TiXmlAttribute* attrib = new TiXmlAttribute();
1101 if ( !attrib )
1102 {
Dan Albert0238a202014-08-22 00:52:41 +00001103 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
The Android Open Source Project562be062009-03-03 19:30:48 -08001104 return 0;
1105 }
1106
1107 attrib->SetDocument( document );
Dan Albert0238a202014-08-22 00:52:41 +00001108 const char* pErr = p;
The Android Open Source Project562be062009-03-03 19:30:48 -08001109 p = attrib->Parse( p, data, encoding );
1110
1111 if ( !p || !*p )
1112 {
1113 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1114 delete attrib;
1115 return 0;
1116 }
1117
1118 // Handle the strange case of double attributes:
1119 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1120 if ( node )
1121 {
Dan Albert0238a202014-08-22 00:52:41 +00001122 node->SetValue( attrib->Value() );
The Android Open Source Project562be062009-03-03 19:30:48 -08001123 delete attrib;
1124 return 0;
1125 }
1126
1127 attributeSet.Add( attrib );
1128 }
1129 }
1130 return p;
1131}
1132
1133
1134const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1135{
1136 TiXmlDocument* document = GetDocument();
1137
1138 // Read in text and elements in any order.
1139 const char* pWithWhiteSpace = p;
1140 p = SkipWhiteSpace( p, encoding );
1141
1142 while ( p && *p )
1143 {
1144 if ( *p != '<' )
1145 {
1146 // Take what we have, make a text element.
1147 TiXmlText* textNode = new TiXmlText( "" );
1148
1149 if ( !textNode )
1150 {
Dan Albert0238a202014-08-22 00:52:41 +00001151 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1152 return 0;
The Android Open Source Project562be062009-03-03 19:30:48 -08001153 }
1154
1155 if ( TiXmlBase::IsWhiteSpaceCondensed() )
1156 {
1157 p = textNode->Parse( p, data, encoding );
1158 }
1159 else
1160 {
1161 // Special case: we want to keep the white space
1162 // so that leading spaces aren't removed.
1163 p = textNode->Parse( pWithWhiteSpace, data, encoding );
1164 }
1165
1166 if ( !textNode->Blank() )
1167 LinkEndChild( textNode );
1168 else
1169 delete textNode;
1170 }
1171 else
1172 {
1173 // We hit a '<'
1174 // Have we hit a new element or an end tag? This could also be
1175 // a TiXmlText in the "CDATA" style.
1176 if ( StringEqual( p, "</", false, encoding ) )
1177 {
1178 return p;
1179 }
1180 else
1181 {
1182 TiXmlNode* node = Identify( p, encoding );
1183 if ( node )
1184 {
1185 p = node->Parse( p, data, encoding );
1186 LinkEndChild( node );
1187 }
1188 else
1189 {
1190 return 0;
1191 }
1192 }
1193 }
1194 pWithWhiteSpace = p;
1195 p = SkipWhiteSpace( p, encoding );
1196 }
1197
1198 if ( !p )
1199 {
1200 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1201 }
1202 return p;
1203}
1204
1205
1206#ifdef TIXML_USE_STL
Dan Albert0238a202014-08-22 00:52:41 +00001207void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
The Android Open Source Project562be062009-03-03 19:30:48 -08001208{
1209 while ( in->good() )
1210 {
1211 int c = in->get();
1212 if ( c <= 0 )
1213 {
1214 TiXmlDocument* document = GetDocument();
1215 if ( document )
1216 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1217 return;
1218 }
1219 (*tag) += (char) c;
1220
1221 if ( c == '>' )
1222 {
1223 // All is well.
1224 return;
1225 }
1226 }
1227}
1228#endif
1229
1230
1231const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1232{
1233 TiXmlDocument* document = GetDocument();
1234 p = SkipWhiteSpace( p, encoding );
1235
1236 if ( data )
1237 {
1238 data->Stamp( p, encoding );
1239 location = data->Cursor();
1240 }
1241 if ( !p || !*p || *p != '<' )
1242 {
1243 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1244 return 0;
1245 }
1246 ++p;
1247 value = "";
1248
1249 while ( p && *p && *p != '>' )
1250 {
1251 value += *p;
1252 ++p;
1253 }
1254
1255 if ( !p )
1256 {
Dan Albert0238a202014-08-22 00:52:41 +00001257 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
The Android Open Source Project562be062009-03-03 19:30:48 -08001258 }
Dan Albert0238a202014-08-22 00:52:41 +00001259 if ( *p == '>' )
The Android Open Source Project562be062009-03-03 19:30:48 -08001260 return p+1;
1261 return p;
1262}
1263
1264#ifdef TIXML_USE_STL
Dan Albert0238a202014-08-22 00:52:41 +00001265void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
The Android Open Source Project562be062009-03-03 19:30:48 -08001266{
1267 while ( in->good() )
1268 {
1269 int c = in->get();
1270 if ( c <= 0 )
1271 {
1272 TiXmlDocument* document = GetDocument();
1273 if ( document )
1274 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1275 return;
1276 }
1277
1278 (*tag) += (char) c;
1279
1280 if ( c == '>'
1281 && tag->at( tag->length() - 2 ) == '-'
1282 && tag->at( tag->length() - 3 ) == '-' )
1283 {
1284 // All is well.
1285 return;
1286 }
1287 }
1288}
1289#endif
1290
1291
1292const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1293{
1294 TiXmlDocument* document = GetDocument();
1295 value = "";
1296
1297 p = SkipWhiteSpace( p, encoding );
1298
1299 if ( data )
1300 {
1301 data->Stamp( p, encoding );
1302 location = data->Cursor();
1303 }
1304 const char* startTag = "<!--";
1305 const char* endTag = "-->";
1306
1307 if ( !StringEqual( p, startTag, false, encoding ) )
1308 {
Dan Albert0238a202014-08-22 00:52:41 +00001309 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
The Android Open Source Project562be062009-03-03 19:30:48 -08001310 return 0;
1311 }
1312 p += strlen( startTag );
Dan Albert0238a202014-08-22 00:52:41 +00001313 p = ReadText( p, &value, false, endTag, false, encoding );
The Android Open Source Project562be062009-03-03 19:30:48 -08001314 return p;
1315}
1316
1317
1318const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1319{
1320 p = SkipWhiteSpace( p, encoding );
1321 if ( !p || !*p ) return 0;
1322
Dan Albert0238a202014-08-22 00:52:41 +00001323 int tabsize = 4;
1324 if ( document )
1325 tabsize = document->TabSize();
1326
The Android Open Source Project562be062009-03-03 19:30:48 -08001327 if ( data )
1328 {
1329 data->Stamp( p, encoding );
1330 location = data->Cursor();
1331 }
1332 // Read the name, the '=' and the value.
1333 const char* pErr = p;
1334 p = ReadName( p, &name, encoding );
1335 if ( !p || !*p )
1336 {
1337 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1338 return 0;
1339 }
1340 p = SkipWhiteSpace( p, encoding );
1341 if ( !p || !*p || *p != '=' )
1342 {
1343 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1344 return 0;
1345 }
1346
1347 ++p; // skip '='
1348 p = SkipWhiteSpace( p, encoding );
1349 if ( !p || !*p )
1350 {
1351 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1352 return 0;
1353 }
1354
1355 const char* end;
1356
Dan Albert0238a202014-08-22 00:52:41 +00001357 if ( *p == '\'' )
The Android Open Source Project562be062009-03-03 19:30:48 -08001358 {
1359 ++p;
Dan Albert0238a202014-08-22 00:52:41 +00001360 end = "\'";
The Android Open Source Project562be062009-03-03 19:30:48 -08001361 p = ReadText( p, &value, false, end, false, encoding );
1362 }
Dan Albert0238a202014-08-22 00:52:41 +00001363 else if ( *p == '"' )
The Android Open Source Project562be062009-03-03 19:30:48 -08001364 {
1365 ++p;
Dan Albert0238a202014-08-22 00:52:41 +00001366 end = "\"";
The Android Open Source Project562be062009-03-03 19:30:48 -08001367 p = ReadText( p, &value, false, end, false, encoding );
1368 }
1369 else
1370 {
1371 // All attribute values should be in single or double quotes.
1372 // But this is such a common error that the parser will try
1373 // its best, even without them.
1374 value = "";
Dan Albert0238a202014-08-22 00:52:41 +00001375 while ( p && *p // existence
1376 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace
1377 && *p != '/' && *p != '>' ) // tag end
The Android Open Source Project562be062009-03-03 19:30:48 -08001378 {
1379 value += *p;
1380 ++p;
1381 }
1382 }
1383 return p;
1384}
1385
1386#ifdef TIXML_USE_STL
Dan Albert0238a202014-08-22 00:52:41 +00001387void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
The Android Open Source Project562be062009-03-03 19:30:48 -08001388{
Dan Albert0238a202014-08-22 00:52:41 +00001389 if ( cdata )
The Android Open Source Project562be062009-03-03 19:30:48 -08001390 {
Dan Albert0238a202014-08-22 00:52:41 +00001391 int c = in->get();
The Android Open Source Project562be062009-03-03 19:30:48 -08001392 if ( c <= 0 )
1393 {
1394 TiXmlDocument* document = GetDocument();
1395 if ( document )
1396 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1397 return;
1398 }
1399
1400 (*tag) += (char) c;
1401
Dan Albert0238a202014-08-22 00:52:41 +00001402 if ( c == '>'
1403 && tag->at( tag->length() - 2 ) == ']'
1404 && tag->at( tag->length() - 3 ) == ']' )
1405 {
1406 // All is well.
1407 return;
1408 }
1409 }
1410 else
1411 {
1412 while ( in->good() )
1413 {
1414 int c = in->peek();
1415 if ( c == '<' )
1416 return;
1417 if ( c <= 0 )
1418 {
1419 TiXmlDocument* document = GetDocument();
1420 if ( document )
1421 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
The Android Open Source Project562be062009-03-03 19:30:48 -08001422 return;
1423 }
Dan Albert0238a202014-08-22 00:52:41 +00001424
1425 (*tag) += (char) c;
1426 in->get();
1427 }
The Android Open Source Project562be062009-03-03 19:30:48 -08001428 }
1429}
1430#endif
1431
1432const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1433{
1434 value = "";
1435 TiXmlDocument* document = GetDocument();
1436
1437 if ( data )
1438 {
1439 data->Stamp( p, encoding );
1440 location = data->Cursor();
1441 }
1442
1443 const char* const startTag = "<![CDATA[";
1444 const char* const endTag = "]]>";
1445
1446 if ( cdata || StringEqual( p, startTag, false, encoding ) )
1447 {
1448 cdata = true;
1449
1450 if ( !StringEqual( p, startTag, false, encoding ) )
1451 {
Dan Albert0238a202014-08-22 00:52:41 +00001452 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
The Android Open Source Project562be062009-03-03 19:30:48 -08001453 return 0;
1454 }
1455 p += strlen( startTag );
1456
1457 // Keep all the white space, ignore the encoding, etc.
1458 while ( p && *p
1459 && !StringEqual( p, endTag, false, encoding )
1460 )
1461 {
1462 value += *p;
1463 ++p;
1464 }
1465
1466 TIXML_STRING dummy;
1467 p = ReadText( p, &dummy, false, endTag, false, encoding );
1468 return p;
1469 }
1470 else
1471 {
1472 bool ignoreWhite = true;
1473
1474 const char* end = "<";
1475 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
Dan Albert0238a202014-08-22 00:52:41 +00001476 if ( p )
The Android Open Source Project562be062009-03-03 19:30:48 -08001477 return p-1; // don't truncate the '<'
1478 return 0;
1479 }
1480}
1481
1482#ifdef TIXML_USE_STL
Dan Albert0238a202014-08-22 00:52:41 +00001483void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
The Android Open Source Project562be062009-03-03 19:30:48 -08001484{
1485 while ( in->good() )
1486 {
1487 int c = in->get();
1488 if ( c <= 0 )
1489 {
1490 TiXmlDocument* document = GetDocument();
1491 if ( document )
1492 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1493 return;
1494 }
1495 (*tag) += (char) c;
1496
1497 if ( c == '>' )
1498 {
1499 // All is well.
1500 return;
1501 }
1502 }
1503}
1504#endif
1505
1506const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1507{
1508 p = SkipWhiteSpace( p, _encoding );
1509 // Find the beginning, find the end, and look for
1510 // the stuff in-between.
1511 TiXmlDocument* document = GetDocument();
1512 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1513 {
1514 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1515 return 0;
1516 }
1517 if ( data )
1518 {
1519 data->Stamp( p, _encoding );
1520 location = data->Cursor();
1521 }
1522 p += 5;
1523
1524 version = "";
1525 encoding = "";
1526 standalone = "";
1527
1528 while ( p && *p )
1529 {
1530 if ( *p == '>' )
1531 {
1532 ++p;
1533 return p;
1534 }
1535
1536 p = SkipWhiteSpace( p, _encoding );
1537 if ( StringEqual( p, "version", true, _encoding ) )
1538 {
1539 TiXmlAttribute attrib;
1540 p = attrib.Parse( p, data, _encoding );
1541 version = attrib.Value();
1542 }
1543 else if ( StringEqual( p, "encoding", true, _encoding ) )
1544 {
1545 TiXmlAttribute attrib;
1546 p = attrib.Parse( p, data, _encoding );
1547 encoding = attrib.Value();
1548 }
1549 else if ( StringEqual( p, "standalone", true, _encoding ) )
1550 {
1551 TiXmlAttribute attrib;
1552 p = attrib.Parse( p, data, _encoding );
1553 standalone = attrib.Value();
1554 }
1555 else
1556 {
1557 // Read over whatever it is.
1558 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1559 ++p;
1560 }
1561 }
1562 return 0;
1563}
1564
1565bool TiXmlText::Blank() const
1566{
1567 for ( unsigned i=0; i<value.length(); i++ )
1568 if ( !IsWhiteSpace( value[i] ) )
1569 return false;
1570 return true;
1571}
1572