blob: 1f3873ad9556794f31eca80a4e1c2cc29efe8b7d [file] [log] [blame]
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -04001// Copyright 2007-2011 Baptiste Lepilleur
2// Distributed under MIT license, or public domain if desired and
3// recognized in your jurisdiction.
4// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
5
6#if !defined(JSON_IS_AMALGAMATION)
7# include <json/assertions.h>
8# include <json/reader.h>
9# include <json/value.h>
10# include "json_tool.h"
11#endif // if !defined(JSON_IS_AMALGAMATION)
12#include <utility>
13#include <cstdio>
14#include <cassert>
15#include <cstring>
16#include <stdexcept>
17
18#if _MSC_VER >= 1400 // VC++ 8.0
19#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
20#endif
21
22namespace Json {
23
24// Implementation of class Features
25// ////////////////////////////////
26
27Features::Features()
28 : allowComments_( true )
29 , strictRoot_( false )
30{
31}
32
33
34Features
35Features::all()
36{
37 return Features();
38}
39
40
41Features
42Features::strictMode()
43{
44 Features features;
45 features.allowComments_ = false;
46 features.strictRoot_ = true;
47 return features;
48}
49
50// Implementation of class Reader
51// ////////////////////////////////
52
53
54static inline bool
55in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
56{
57 return c == c1 || c == c2 || c == c3 || c == c4;
58}
59
60static inline bool
61in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
62{
63 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
64}
65
66
67static bool
68containsNewLine( Reader::Location begin,
69 Reader::Location end )
70{
71 for ( ;begin < end; ++begin )
72 if ( *begin == '\n' || *begin == '\r' )
73 return true;
74 return false;
75}
76
77
78// Class Reader
79// //////////////////////////////////////////////////////////////////
80
81Reader::Reader()
82 : errors_(),
83 document_(),
84 begin_(),
85 end_(),
86 current_(),
87 lastValueEnd_(),
88 lastValue_(),
89 commentsBefore_(),
90 features_( Features::all() ),
91 collectComments_()
92{
93}
94
95
96Reader::Reader( const Features &features )
97 : errors_(),
98 document_(),
99 begin_(),
100 end_(),
101 current_(),
102 lastValueEnd_(),
103 lastValue_(),
104 commentsBefore_(),
105 features_( features ),
106 collectComments_()
107{
108}
109
110
111bool
112Reader::parse( const std::string &document,
113 Value &root,
114 bool collectComments )
115{
116 document_ = document;
117 const char *begin = document_.c_str();
118 const char *end = begin + document_.length();
119 return parse( begin, end, root, collectComments );
120}
121
122
123bool
124Reader::parse( std::istream& sin,
125 Value &root,
126 bool collectComments )
127{
128 //std::istream_iterator<char> begin(sin);
129 //std::istream_iterator<char> end;
130 // Those would allow streamed input from a file, if parse() were a
131 // template function.
132
133 // Since std::string is reference-counted, this at least does not
134 // create an extra copy.
135 std::string doc;
136 std::getline(sin, doc, (char)EOF);
137 return parse( doc, root, collectComments );
138}
139
140bool
141Reader::parse( const char *beginDoc, const char *endDoc,
142 Value &root,
143 bool collectComments )
144{
145 if ( !features_.allowComments_ )
146 {
147 collectComments = false;
148 }
149
150 begin_ = beginDoc;
151 end_ = endDoc;
152 collectComments_ = collectComments;
153 current_ = begin_;
154 lastValueEnd_ = 0;
155 lastValue_ = 0;
156 commentsBefore_ = "";
157 errors_.clear();
158 while ( !nodes_.empty() )
159 nodes_.pop();
160 nodes_.push( &root );
161
162 bool successful = readValue();
163 Token token;
164 skipCommentTokens( token );
165 if ( collectComments_ && !commentsBefore_.empty() )
166 root.setComment( commentsBefore_, commentAfter );
167 if ( features_.strictRoot_ )
168 {
169 if ( !root.isArray() && !root.isObject() )
170 {
171 // Set error location to start of doc, ideally should be first token found in doc
172 token.type_ = tokenError;
173 token.start_ = beginDoc;
174 token.end_ = endDoc;
175 addError( "A valid JSON document must be either an array or an object value.",
176 token );
177 return false;
178 }
179 }
180 return successful;
181}
182
183
184bool
185Reader::readValue()
186{
187 Token token;
188 skipCommentTokens( token );
189 bool successful = true;
190
191 if ( collectComments_ && !commentsBefore_.empty() )
192 {
193 currentValue().setComment( commentsBefore_, commentBefore );
194 commentsBefore_ = "";
195 }
196
197
198 switch ( token.type_ )
199 {
200 case tokenObjectBegin:
201 successful = readObject( token );
202 break;
203 case tokenArrayBegin:
204 successful = readArray( token );
205 break;
206 case tokenNumber:
207 successful = decodeNumber( token );
208 break;
209 case tokenString:
210 successful = decodeString( token );
211 break;
212 case tokenTrue:
213 currentValue() = true;
214 break;
215 case tokenFalse:
216 currentValue() = false;
217 break;
218 case tokenNull:
219 currentValue() = Value();
220 break;
221 default:
222 return addError( "Syntax error: value, object or array expected.", token );
223 }
224
225 if ( collectComments_ )
226 {
227 lastValueEnd_ = current_;
228 lastValue_ = &currentValue();
229 }
230
231 return successful;
232}
233
234
235void
236Reader::skipCommentTokens( Token &token )
237{
238 if ( features_.allowComments_ )
239 {
240 do
241 {
242 readToken( token );
243 }
244 while ( token.type_ == tokenComment );
245 }
246 else
247 {
248 readToken( token );
249 }
250}
251
252
253bool
254Reader::expectToken( TokenType type, Token &token, const char *message )
255{
256 readToken( token );
257 if ( token.type_ != type )
258 return addError( message, token );
259 return true;
260}
261
262
263bool
264Reader::readToken( Token &token )
265{
266 skipSpaces();
267 token.start_ = current_;
268 Char c = getNextChar();
269 bool ok = true;
270 switch ( c )
271 {
272 case '{':
273 token.type_ = tokenObjectBegin;
274 break;
275 case '}':
276 token.type_ = tokenObjectEnd;
277 break;
278 case '[':
279 token.type_ = tokenArrayBegin;
280 break;
281 case ']':
282 token.type_ = tokenArrayEnd;
283 break;
284 case '"':
285 token.type_ = tokenString;
286 ok = readString();
287 break;
288 case '/':
289 token.type_ = tokenComment;
290 ok = readComment();
291 break;
292 case '0':
293 case '1':
294 case '2':
295 case '3':
296 case '4':
297 case '5':
298 case '6':
299 case '7':
300 case '8':
301 case '9':
302 case '-':
303 token.type_ = tokenNumber;
304 readNumber();
305 break;
306 case 't':
307 token.type_ = tokenTrue;
308 ok = match( "rue", 3 );
309 break;
310 case 'f':
311 token.type_ = tokenFalse;
312 ok = match( "alse", 4 );
313 break;
314 case 'n':
315 token.type_ = tokenNull;
316 ok = match( "ull", 3 );
317 break;
318 case ',':
319 token.type_ = tokenArraySeparator;
320 break;
321 case ':':
322 token.type_ = tokenMemberSeparator;
323 break;
324 case 0:
325 token.type_ = tokenEndOfStream;
326 break;
327 default:
328 ok = false;
329 break;
330 }
331 if ( !ok )
332 token.type_ = tokenError;
333 token.end_ = current_;
334 return true;
335}
336
337
338void
339Reader::skipSpaces()
340{
341 while ( current_ != end_ )
342 {
343 Char c = *current_;
344 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
345 ++current_;
346 else
347 break;
348 }
349}
350
351
352bool
353Reader::match( Location pattern,
354 int patternLength )
355{
356 if ( end_ - current_ < patternLength )
357 return false;
358 int index = patternLength;
359 while ( index-- )
360 if ( current_[index] != pattern[index] )
361 return false;
362 current_ += patternLength;
363 return true;
364}
365
366
367bool
368Reader::readComment()
369{
370 Location commentBegin = current_ - 1;
371 Char c = getNextChar();
372 bool successful = false;
373 if ( c == '*' )
374 successful = readCStyleComment();
375 else if ( c == '/' )
376 successful = readCppStyleComment();
377 if ( !successful )
378 return false;
379
380 if ( collectComments_ )
381 {
382 CommentPlacement placement = commentBefore;
383 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
384 {
385 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
386 placement = commentAfterOnSameLine;
387 }
388
389 addComment( commentBegin, current_, placement );
390 }
391 return true;
392}
393
394
395void
396Reader::addComment( Location begin,
397 Location end,
398 CommentPlacement placement )
399{
400 assert( collectComments_ );
401 if ( placement == commentAfterOnSameLine )
402 {
403 assert( lastValue_ != 0 );
404 lastValue_->setComment( std::string( begin, end ), placement );
405 }
406 else
407 {
408 if ( !commentsBefore_.empty() )
409 commentsBefore_ += "\n";
410 commentsBefore_ += std::string( begin, end );
411 }
412}
413
414
415bool
416Reader::readCStyleComment()
417{
418 while ( current_ != end_ )
419 {
420 Char c = getNextChar();
421 if ( c == '*' && *current_ == '/' )
422 break;
423 }
424 return getNextChar() == '/';
425}
426
427
428bool
429Reader::readCppStyleComment()
430{
431 while ( current_ != end_ )
432 {
433 Char c = getNextChar();
434 if ( c == '\r' || c == '\n' )
435 break;
436 }
437 return true;
438}
439
440
441void
442Reader::readNumber()
443{
444 while ( current_ != end_ )
445 {
446 if ( !(*current_ >= '0' && *current_ <= '9') &&
447 !in( *current_, '.', 'e', 'E', '+', '-' ) )
448 break;
449 ++current_;
450 }
451}
452
453bool
454Reader::readString()
455{
456 Char c = 0;
457 while ( current_ != end_ )
458 {
459 c = getNextChar();
460 if ( c == '\\' )
461 getNextChar();
462 else if ( c == '"' )
463 break;
464 }
465 return c == '"';
466}
467
468
469bool
470Reader::readObject( Token &/*tokenStart*/ )
471{
472 Token tokenName;
473 std::string name;
474 currentValue() = Value( objectValue );
475 while ( readToken( tokenName ) )
476 {
477 bool initialTokenOk = true;
478 while ( tokenName.type_ == tokenComment && initialTokenOk )
479 initialTokenOk = readToken( tokenName );
480 if ( !initialTokenOk )
481 break;
482 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
483 return true;
484 if ( tokenName.type_ != tokenString )
485 break;
486
487 name = "";
488 if ( !decodeString( tokenName, name ) )
489 return recoverFromError( tokenObjectEnd );
490
491 Token colon;
492 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
493 {
494 return addErrorAndRecover( "Missing ':' after object member name",
495 colon,
496 tokenObjectEnd );
497 }
498 Value &value = currentValue()[ name ];
499 nodes_.push( &value );
500 bool ok = readValue();
501 nodes_.pop();
502 if ( !ok ) // error already set
503 return recoverFromError( tokenObjectEnd );
504
505 Token comma;
506 if ( !readToken( comma )
507 || ( comma.type_ != tokenObjectEnd &&
508 comma.type_ != tokenArraySeparator &&
509 comma.type_ != tokenComment ) )
510 {
511 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
512 comma,
513 tokenObjectEnd );
514 }
515 bool finalizeTokenOk = true;
516 while ( comma.type_ == tokenComment &&
517 finalizeTokenOk )
518 finalizeTokenOk = readToken( comma );
519 if ( comma.type_ == tokenObjectEnd )
520 return true;
521 }
522 return addErrorAndRecover( "Missing '}' or object member name",
523 tokenName,
524 tokenObjectEnd );
525}
526
527
528bool
529Reader::readArray( Token &/*tokenStart*/ )
530{
531 currentValue() = Value( arrayValue );
532 skipSpaces();
533 if ( *current_ == ']' ) // empty array
534 {
535 Token endArray;
536 readToken( endArray );
537 return true;
538 }
539 int index = 0;
540 for (;;)
541 {
542 Value &value = currentValue()[ index++ ];
543 nodes_.push( &value );
544 bool ok = readValue();
545 nodes_.pop();
546 if ( !ok ) // error already set
547 return recoverFromError( tokenArrayEnd );
548
549 Token token;
550 // Accept Comment after last item in the array.
551 ok = readToken( token );
552 while ( token.type_ == tokenComment && ok )
553 {
554 ok = readToken( token );
555 }
556 bool badTokenType = ( token.type_ != tokenArraySeparator &&
557 token.type_ != tokenArrayEnd );
558 if ( !ok || badTokenType )
559 {
560 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
561 token,
562 tokenArrayEnd );
563 }
564 if ( token.type_ == tokenArrayEnd )
565 break;
566 }
567 return true;
568}
569
570
571bool
572Reader::decodeNumber( Token &token )
573{
574 bool isDouble = false;
575 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
576 {
577 isDouble = isDouble
578 || in( *inspect, '.', 'e', 'E', '+' )
579 || ( *inspect == '-' && inspect != token.start_ );
580 }
581 if ( isDouble )
582 return decodeDouble( token );
583 // Attempts to parse the number as an integer. If the number is
584 // larger than the maximum supported value of an integer then
585 // we decode the number as a double.
586 Location current = token.start_;
587 bool isNegative = *current == '-';
588 if ( isNegative )
589 ++current;
590 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt)
591 : Value::maxLargestUInt;
592 Value::LargestUInt threshold = maxIntegerValue / 10;
593 Value::LargestUInt value = 0;
594 while ( current < token.end_ )
595 {
596 Char c = *current++;
597 if ( c < '0' || c > '9' )
598 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
599 Value::UInt digit(c - '0');
600 if ( value >= threshold )
601 {
602 // We've hit or exceeded the max value divided by 10 (rounded down). If
603 // a) we've only just touched the limit, b) this is the last digit, and
604 // c) it's small enough to fit in that rounding delta, we're okay.
605 // Otherwise treat this number as a double to avoid overflow.
606 if (value > threshold ||
607 current != token.end_ ||
608 digit > maxIntegerValue % 10)
609 {
610 return decodeDouble( token );
611 }
612 }
613 value = value * 10 + digit;
614 }
615 if ( isNegative )
616 currentValue() = -Value::LargestInt( value );
617 else if ( value <= Value::LargestUInt(Value::maxInt) )
618 currentValue() = Value::LargestInt( value );
619 else
620 currentValue() = value;
621 return true;
622}
623
624
625bool
626Reader::decodeDouble( Token &token )
627{
628 double value = 0;
629 const int bufferSize = 32;
630 int count;
631 int length = int(token.end_ - token.start_);
632
633 // Sanity check to avoid buffer overflow exploits.
634 if (length < 0) {
635 return addError( "Unable to parse token length", token );
636 }
637
638 // Avoid using a string constant for the format control string given to
639 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
640 // info:
641 //
642 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
643 char format[] = "%lf";
644
645 if ( length <= bufferSize )
646 {
647 Char buffer[bufferSize+1];
648 memcpy( buffer, token.start_, length );
649 buffer[length] = 0;
650 count = sscanf( buffer, format, &value );
651 }
652 else
653 {
654 std::string buffer( token.start_, token.end_ );
655 count = sscanf( buffer.c_str(), format, &value );
656 }
657
658 if ( count != 1 )
659 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
660 currentValue() = value;
661 return true;
662}
663
664
665bool
666Reader::decodeString( Token &token )
667{
668 std::string decoded;
669 if ( !decodeString( token, decoded ) )
670 return false;
671 currentValue() = decoded;
672 return true;
673}
674
675
676bool
677Reader::decodeString( Token &token, std::string &decoded )
678{
679 decoded.reserve( token.end_ - token.start_ - 2 );
680 Location current = token.start_ + 1; // skip '"'
681 Location end = token.end_ - 1; // do not include '"'
682 while ( current != end )
683 {
684 Char c = *current++;
685 if ( c == '"' )
686 break;
687 else if ( c == '\\' )
688 {
689 if ( current == end )
690 return addError( "Empty escape sequence in string", token, current );
691 Char escape = *current++;
692 switch ( escape )
693 {
694 case '"': decoded += '"'; break;
695 case '/': decoded += '/'; break;
696 case '\\': decoded += '\\'; break;
697 case 'b': decoded += '\b'; break;
698 case 'f': decoded += '\f'; break;
699 case 'n': decoded += '\n'; break;
700 case 'r': decoded += '\r'; break;
701 case 't': decoded += '\t'; break;
702 case 'u':
703 {
704 unsigned int unicode;
705 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
706 return false;
707 decoded += codePointToUTF8(unicode);
708 }
709 break;
710 default:
711 return addError( "Bad escape sequence in string", token, current );
712 }
713 }
714 else
715 {
716 decoded += c;
717 }
718 }
719 return true;
720}
721
722bool
723Reader::decodeUnicodeCodePoint( Token &token,
724 Location &current,
725 Location end,
726 unsigned int &unicode )
727{
728
729 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
730 return false;
731 if (unicode >= 0xD800 && unicode <= 0xDBFF)
732 {
733 // surrogate pairs
734 if (end - current < 6)
735 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
736 unsigned int surrogatePair;
737 if (*(current++) == '\\' && *(current++)== 'u')
738 {
739 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
740 {
741 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
742 }
743 else
744 return false;
745 }
746 else
747 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
748 }
749 return true;
750}
751
752bool
753Reader::decodeUnicodeEscapeSequence( Token &token,
754 Location &current,
755 Location end,
756 unsigned int &unicode )
757{
758 if ( end - current < 4 )
759 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
760 unicode = 0;
761 for ( int index =0; index < 4; ++index )
762 {
763 Char c = *current++;
764 unicode *= 16;
765 if ( c >= '0' && c <= '9' )
766 unicode += c - '0';
767 else if ( c >= 'a' && c <= 'f' )
768 unicode += c - 'a' + 10;
769 else if ( c >= 'A' && c <= 'F' )
770 unicode += c - 'A' + 10;
771 else
772 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
773 }
774 return true;
775}
776
777
778bool
779Reader::addError( const std::string &message,
780 Token &token,
781 Location extra )
782{
783 ErrorInfo info;
784 info.token_ = token;
785 info.message_ = message;
786 info.extra_ = extra;
787 errors_.push_back( info );
788 return false;
789}
790
791
792bool
793Reader::recoverFromError( TokenType skipUntilToken )
794{
795 int errorCount = int(errors_.size());
796 Token skip;
797 for (;;)
798 {
799 if ( !readToken(skip) )
800 errors_.resize( errorCount ); // discard errors caused by recovery
801 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
802 break;
803 }
804 errors_.resize( errorCount );
805 return false;
806}
807
808
809bool
810Reader::addErrorAndRecover( const std::string &message,
811 Token &token,
812 TokenType skipUntilToken )
813{
814 addError( message, token );
815 return recoverFromError( skipUntilToken );
816}
817
818
819Value &
820Reader::currentValue()
821{
822 return *(nodes_.top());
823}
824
825
826Reader::Char
827Reader::getNextChar()
828{
829 if ( current_ == end_ )
830 return 0;
831 return *current_++;
832}
833
834
835void
836Reader::getLocationLineAndColumn( Location location,
837 int &line,
838 int &column ) const
839{
840 Location current = begin_;
841 Location lastLineStart = current;
842 line = 0;
843 while ( current < location && current != end_ )
844 {
845 Char c = *current++;
846 if ( c == '\r' )
847 {
848 if ( *current == '\n' )
849 ++current;
850 lastLineStart = current;
851 ++line;
852 }
853 else if ( c == '\n' )
854 {
855 lastLineStart = current;
856 ++line;
857 }
858 }
859 // column & line start at 1
860 column = int(location - lastLineStart) + 1;
861 ++line;
862}
863
864
865std::string
866Reader::getLocationLineAndColumn( Location location ) const
867{
868 int line, column;
869 getLocationLineAndColumn( location, line, column );
870 char buffer[18+16+16+1];
871 sprintf( buffer, "Line %d, Column %d", line, column );
872 return buffer;
873}
874
875
876// Deprecated. Preserved for backward compatibility
877std::string
878Reader::getFormatedErrorMessages() const
879{
880 return getFormattedErrorMessages();
881}
882
883
884std::string
885Reader::getFormattedErrorMessages() const
886{
887 std::string formattedMessage;
888 for ( Errors::const_iterator itError = errors_.begin();
889 itError != errors_.end();
890 ++itError )
891 {
892 const ErrorInfo &error = *itError;
893 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
894 formattedMessage += " " + error.message_ + "\n";
895 if ( error.extra_ )
896 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
897 }
898 return formattedMessage;
899}
900
901
902std::istream& operator>>( std::istream &sin, Value &root )
903{
904 Json::Reader reader;
905 bool ok = reader.parse(sin, root, true);
906 if (!ok) {
907 fprintf(
908 stderr,
909 "Error from reader: %s",
910 reader.getFormattedErrorMessages().c_str());
911
912 JSON_FAIL_MESSAGE("reader error");
913 }
914 return sin;
915}
916
917
918} // namespace Json