Leon Scroggins III | f59fb0e | 2014-05-28 15:19:42 -0400 | [diff] [blame^] | 1 | // Copyright 2007-2011 Baptiste Lepilleur |
| 2 | // Distributed under MIT license, or public domain if desired and |
| 3 | // recognized in your jurisdiction. |
| 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE |
| 5 | |
| 6 | #if !defined(JSON_IS_AMALGAMATION) |
| 7 | # include <json/assertions.h> |
| 8 | # include <json/reader.h> |
| 9 | # include <json/value.h> |
| 10 | # include "json_tool.h" |
| 11 | #endif // if !defined(JSON_IS_AMALGAMATION) |
| 12 | #include <utility> |
| 13 | #include <cstdio> |
| 14 | #include <cassert> |
| 15 | #include <cstring> |
| 16 | #include <stdexcept> |
| 17 | |
| 18 | #if _MSC_VER >= 1400 // VC++ 8.0 |
| 19 | #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. |
| 20 | #endif |
| 21 | |
| 22 | namespace Json { |
| 23 | |
| 24 | // Implementation of class Features |
| 25 | // //////////////////////////////// |
| 26 | |
| 27 | Features::Features() |
| 28 | : allowComments_( true ) |
| 29 | , strictRoot_( false ) |
| 30 | { |
| 31 | } |
| 32 | |
| 33 | |
| 34 | Features |
| 35 | Features::all() |
| 36 | { |
| 37 | return Features(); |
| 38 | } |
| 39 | |
| 40 | |
| 41 | Features |
| 42 | Features::strictMode() |
| 43 | { |
| 44 | Features features; |
| 45 | features.allowComments_ = false; |
| 46 | features.strictRoot_ = true; |
| 47 | return features; |
| 48 | } |
| 49 | |
| 50 | // Implementation of class Reader |
| 51 | // //////////////////////////////// |
| 52 | |
| 53 | |
| 54 | static inline bool |
| 55 | in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) |
| 56 | { |
| 57 | return c == c1 || c == c2 || c == c3 || c == c4; |
| 58 | } |
| 59 | |
| 60 | static inline bool |
| 61 | in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) |
| 62 | { |
| 63 | return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; |
| 64 | } |
| 65 | |
| 66 | |
| 67 | static bool |
| 68 | containsNewLine( Reader::Location begin, |
| 69 | Reader::Location end ) |
| 70 | { |
| 71 | for ( ;begin < end; ++begin ) |
| 72 | if ( *begin == '\n' || *begin == '\r' ) |
| 73 | return true; |
| 74 | return false; |
| 75 | } |
| 76 | |
| 77 | |
| 78 | // Class Reader |
| 79 | // ////////////////////////////////////////////////////////////////// |
| 80 | |
| 81 | Reader::Reader() |
| 82 | : errors_(), |
| 83 | document_(), |
| 84 | begin_(), |
| 85 | end_(), |
| 86 | current_(), |
| 87 | lastValueEnd_(), |
| 88 | lastValue_(), |
| 89 | commentsBefore_(), |
| 90 | features_( Features::all() ), |
| 91 | collectComments_() |
| 92 | { |
| 93 | } |
| 94 | |
| 95 | |
| 96 | Reader::Reader( const Features &features ) |
| 97 | : errors_(), |
| 98 | document_(), |
| 99 | begin_(), |
| 100 | end_(), |
| 101 | current_(), |
| 102 | lastValueEnd_(), |
| 103 | lastValue_(), |
| 104 | commentsBefore_(), |
| 105 | features_( features ), |
| 106 | collectComments_() |
| 107 | { |
| 108 | } |
| 109 | |
| 110 | |
| 111 | bool |
| 112 | Reader::parse( const std::string &document, |
| 113 | Value &root, |
| 114 | bool collectComments ) |
| 115 | { |
| 116 | document_ = document; |
| 117 | const char *begin = document_.c_str(); |
| 118 | const char *end = begin + document_.length(); |
| 119 | return parse( begin, end, root, collectComments ); |
| 120 | } |
| 121 | |
| 122 | |
| 123 | bool |
| 124 | Reader::parse( std::istream& sin, |
| 125 | Value &root, |
| 126 | bool collectComments ) |
| 127 | { |
| 128 | //std::istream_iterator<char> begin(sin); |
| 129 | //std::istream_iterator<char> end; |
| 130 | // Those would allow streamed input from a file, if parse() were a |
| 131 | // template function. |
| 132 | |
| 133 | // Since std::string is reference-counted, this at least does not |
| 134 | // create an extra copy. |
| 135 | std::string doc; |
| 136 | std::getline(sin, doc, (char)EOF); |
| 137 | return parse( doc, root, collectComments ); |
| 138 | } |
| 139 | |
| 140 | bool |
| 141 | Reader::parse( const char *beginDoc, const char *endDoc, |
| 142 | Value &root, |
| 143 | bool collectComments ) |
| 144 | { |
| 145 | if ( !features_.allowComments_ ) |
| 146 | { |
| 147 | collectComments = false; |
| 148 | } |
| 149 | |
| 150 | begin_ = beginDoc; |
| 151 | end_ = endDoc; |
| 152 | collectComments_ = collectComments; |
| 153 | current_ = begin_; |
| 154 | lastValueEnd_ = 0; |
| 155 | lastValue_ = 0; |
| 156 | commentsBefore_ = ""; |
| 157 | errors_.clear(); |
| 158 | while ( !nodes_.empty() ) |
| 159 | nodes_.pop(); |
| 160 | nodes_.push( &root ); |
| 161 | |
| 162 | bool successful = readValue(); |
| 163 | Token token; |
| 164 | skipCommentTokens( token ); |
| 165 | if ( collectComments_ && !commentsBefore_.empty() ) |
| 166 | root.setComment( commentsBefore_, commentAfter ); |
| 167 | if ( features_.strictRoot_ ) |
| 168 | { |
| 169 | if ( !root.isArray() && !root.isObject() ) |
| 170 | { |
| 171 | // Set error location to start of doc, ideally should be first token found in doc |
| 172 | token.type_ = tokenError; |
| 173 | token.start_ = beginDoc; |
| 174 | token.end_ = endDoc; |
| 175 | addError( "A valid JSON document must be either an array or an object value.", |
| 176 | token ); |
| 177 | return false; |
| 178 | } |
| 179 | } |
| 180 | return successful; |
| 181 | } |
| 182 | |
| 183 | |
| 184 | bool |
| 185 | Reader::readValue() |
| 186 | { |
| 187 | Token token; |
| 188 | skipCommentTokens( token ); |
| 189 | bool successful = true; |
| 190 | |
| 191 | if ( collectComments_ && !commentsBefore_.empty() ) |
| 192 | { |
| 193 | currentValue().setComment( commentsBefore_, commentBefore ); |
| 194 | commentsBefore_ = ""; |
| 195 | } |
| 196 | |
| 197 | |
| 198 | switch ( token.type_ ) |
| 199 | { |
| 200 | case tokenObjectBegin: |
| 201 | successful = readObject( token ); |
| 202 | break; |
| 203 | case tokenArrayBegin: |
| 204 | successful = readArray( token ); |
| 205 | break; |
| 206 | case tokenNumber: |
| 207 | successful = decodeNumber( token ); |
| 208 | break; |
| 209 | case tokenString: |
| 210 | successful = decodeString( token ); |
| 211 | break; |
| 212 | case tokenTrue: |
| 213 | currentValue() = true; |
| 214 | break; |
| 215 | case tokenFalse: |
| 216 | currentValue() = false; |
| 217 | break; |
| 218 | case tokenNull: |
| 219 | currentValue() = Value(); |
| 220 | break; |
| 221 | default: |
| 222 | return addError( "Syntax error: value, object or array expected.", token ); |
| 223 | } |
| 224 | |
| 225 | if ( collectComments_ ) |
| 226 | { |
| 227 | lastValueEnd_ = current_; |
| 228 | lastValue_ = ¤tValue(); |
| 229 | } |
| 230 | |
| 231 | return successful; |
| 232 | } |
| 233 | |
| 234 | |
| 235 | void |
| 236 | Reader::skipCommentTokens( Token &token ) |
| 237 | { |
| 238 | if ( features_.allowComments_ ) |
| 239 | { |
| 240 | do |
| 241 | { |
| 242 | readToken( token ); |
| 243 | } |
| 244 | while ( token.type_ == tokenComment ); |
| 245 | } |
| 246 | else |
| 247 | { |
| 248 | readToken( token ); |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | |
| 253 | bool |
| 254 | Reader::expectToken( TokenType type, Token &token, const char *message ) |
| 255 | { |
| 256 | readToken( token ); |
| 257 | if ( token.type_ != type ) |
| 258 | return addError( message, token ); |
| 259 | return true; |
| 260 | } |
| 261 | |
| 262 | |
| 263 | bool |
| 264 | Reader::readToken( Token &token ) |
| 265 | { |
| 266 | skipSpaces(); |
| 267 | token.start_ = current_; |
| 268 | Char c = getNextChar(); |
| 269 | bool ok = true; |
| 270 | switch ( c ) |
| 271 | { |
| 272 | case '{': |
| 273 | token.type_ = tokenObjectBegin; |
| 274 | break; |
| 275 | case '}': |
| 276 | token.type_ = tokenObjectEnd; |
| 277 | break; |
| 278 | case '[': |
| 279 | token.type_ = tokenArrayBegin; |
| 280 | break; |
| 281 | case ']': |
| 282 | token.type_ = tokenArrayEnd; |
| 283 | break; |
| 284 | case '"': |
| 285 | token.type_ = tokenString; |
| 286 | ok = readString(); |
| 287 | break; |
| 288 | case '/': |
| 289 | token.type_ = tokenComment; |
| 290 | ok = readComment(); |
| 291 | break; |
| 292 | case '0': |
| 293 | case '1': |
| 294 | case '2': |
| 295 | case '3': |
| 296 | case '4': |
| 297 | case '5': |
| 298 | case '6': |
| 299 | case '7': |
| 300 | case '8': |
| 301 | case '9': |
| 302 | case '-': |
| 303 | token.type_ = tokenNumber; |
| 304 | readNumber(); |
| 305 | break; |
| 306 | case 't': |
| 307 | token.type_ = tokenTrue; |
| 308 | ok = match( "rue", 3 ); |
| 309 | break; |
| 310 | case 'f': |
| 311 | token.type_ = tokenFalse; |
| 312 | ok = match( "alse", 4 ); |
| 313 | break; |
| 314 | case 'n': |
| 315 | token.type_ = tokenNull; |
| 316 | ok = match( "ull", 3 ); |
| 317 | break; |
| 318 | case ',': |
| 319 | token.type_ = tokenArraySeparator; |
| 320 | break; |
| 321 | case ':': |
| 322 | token.type_ = tokenMemberSeparator; |
| 323 | break; |
| 324 | case 0: |
| 325 | token.type_ = tokenEndOfStream; |
| 326 | break; |
| 327 | default: |
| 328 | ok = false; |
| 329 | break; |
| 330 | } |
| 331 | if ( !ok ) |
| 332 | token.type_ = tokenError; |
| 333 | token.end_ = current_; |
| 334 | return true; |
| 335 | } |
| 336 | |
| 337 | |
| 338 | void |
| 339 | Reader::skipSpaces() |
| 340 | { |
| 341 | while ( current_ != end_ ) |
| 342 | { |
| 343 | Char c = *current_; |
| 344 | if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) |
| 345 | ++current_; |
| 346 | else |
| 347 | break; |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | |
| 352 | bool |
| 353 | Reader::match( Location pattern, |
| 354 | int patternLength ) |
| 355 | { |
| 356 | if ( end_ - current_ < patternLength ) |
| 357 | return false; |
| 358 | int index = patternLength; |
| 359 | while ( index-- ) |
| 360 | if ( current_[index] != pattern[index] ) |
| 361 | return false; |
| 362 | current_ += patternLength; |
| 363 | return true; |
| 364 | } |
| 365 | |
| 366 | |
| 367 | bool |
| 368 | Reader::readComment() |
| 369 | { |
| 370 | Location commentBegin = current_ - 1; |
| 371 | Char c = getNextChar(); |
| 372 | bool successful = false; |
| 373 | if ( c == '*' ) |
| 374 | successful = readCStyleComment(); |
| 375 | else if ( c == '/' ) |
| 376 | successful = readCppStyleComment(); |
| 377 | if ( !successful ) |
| 378 | return false; |
| 379 | |
| 380 | if ( collectComments_ ) |
| 381 | { |
| 382 | CommentPlacement placement = commentBefore; |
| 383 | if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) |
| 384 | { |
| 385 | if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) |
| 386 | placement = commentAfterOnSameLine; |
| 387 | } |
| 388 | |
| 389 | addComment( commentBegin, current_, placement ); |
| 390 | } |
| 391 | return true; |
| 392 | } |
| 393 | |
| 394 | |
| 395 | void |
| 396 | Reader::addComment( Location begin, |
| 397 | Location end, |
| 398 | CommentPlacement placement ) |
| 399 | { |
| 400 | assert( collectComments_ ); |
| 401 | if ( placement == commentAfterOnSameLine ) |
| 402 | { |
| 403 | assert( lastValue_ != 0 ); |
| 404 | lastValue_->setComment( std::string( begin, end ), placement ); |
| 405 | } |
| 406 | else |
| 407 | { |
| 408 | if ( !commentsBefore_.empty() ) |
| 409 | commentsBefore_ += "\n"; |
| 410 | commentsBefore_ += std::string( begin, end ); |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | |
| 415 | bool |
| 416 | Reader::readCStyleComment() |
| 417 | { |
| 418 | while ( current_ != end_ ) |
| 419 | { |
| 420 | Char c = getNextChar(); |
| 421 | if ( c == '*' && *current_ == '/' ) |
| 422 | break; |
| 423 | } |
| 424 | return getNextChar() == '/'; |
| 425 | } |
| 426 | |
| 427 | |
| 428 | bool |
| 429 | Reader::readCppStyleComment() |
| 430 | { |
| 431 | while ( current_ != end_ ) |
| 432 | { |
| 433 | Char c = getNextChar(); |
| 434 | if ( c == '\r' || c == '\n' ) |
| 435 | break; |
| 436 | } |
| 437 | return true; |
| 438 | } |
| 439 | |
| 440 | |
| 441 | void |
| 442 | Reader::readNumber() |
| 443 | { |
| 444 | while ( current_ != end_ ) |
| 445 | { |
| 446 | if ( !(*current_ >= '0' && *current_ <= '9') && |
| 447 | !in( *current_, '.', 'e', 'E', '+', '-' ) ) |
| 448 | break; |
| 449 | ++current_; |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | bool |
| 454 | Reader::readString() |
| 455 | { |
| 456 | Char c = 0; |
| 457 | while ( current_ != end_ ) |
| 458 | { |
| 459 | c = getNextChar(); |
| 460 | if ( c == '\\' ) |
| 461 | getNextChar(); |
| 462 | else if ( c == '"' ) |
| 463 | break; |
| 464 | } |
| 465 | return c == '"'; |
| 466 | } |
| 467 | |
| 468 | |
| 469 | bool |
| 470 | Reader::readObject( Token &/*tokenStart*/ ) |
| 471 | { |
| 472 | Token tokenName; |
| 473 | std::string name; |
| 474 | currentValue() = Value( objectValue ); |
| 475 | while ( readToken( tokenName ) ) |
| 476 | { |
| 477 | bool initialTokenOk = true; |
| 478 | while ( tokenName.type_ == tokenComment && initialTokenOk ) |
| 479 | initialTokenOk = readToken( tokenName ); |
| 480 | if ( !initialTokenOk ) |
| 481 | break; |
| 482 | if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object |
| 483 | return true; |
| 484 | if ( tokenName.type_ != tokenString ) |
| 485 | break; |
| 486 | |
| 487 | name = ""; |
| 488 | if ( !decodeString( tokenName, name ) ) |
| 489 | return recoverFromError( tokenObjectEnd ); |
| 490 | |
| 491 | Token colon; |
| 492 | if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) |
| 493 | { |
| 494 | return addErrorAndRecover( "Missing ':' after object member name", |
| 495 | colon, |
| 496 | tokenObjectEnd ); |
| 497 | } |
| 498 | Value &value = currentValue()[ name ]; |
| 499 | nodes_.push( &value ); |
| 500 | bool ok = readValue(); |
| 501 | nodes_.pop(); |
| 502 | if ( !ok ) // error already set |
| 503 | return recoverFromError( tokenObjectEnd ); |
| 504 | |
| 505 | Token comma; |
| 506 | if ( !readToken( comma ) |
| 507 | || ( comma.type_ != tokenObjectEnd && |
| 508 | comma.type_ != tokenArraySeparator && |
| 509 | comma.type_ != tokenComment ) ) |
| 510 | { |
| 511 | return addErrorAndRecover( "Missing ',' or '}' in object declaration", |
| 512 | comma, |
| 513 | tokenObjectEnd ); |
| 514 | } |
| 515 | bool finalizeTokenOk = true; |
| 516 | while ( comma.type_ == tokenComment && |
| 517 | finalizeTokenOk ) |
| 518 | finalizeTokenOk = readToken( comma ); |
| 519 | if ( comma.type_ == tokenObjectEnd ) |
| 520 | return true; |
| 521 | } |
| 522 | return addErrorAndRecover( "Missing '}' or object member name", |
| 523 | tokenName, |
| 524 | tokenObjectEnd ); |
| 525 | } |
| 526 | |
| 527 | |
| 528 | bool |
| 529 | Reader::readArray( Token &/*tokenStart*/ ) |
| 530 | { |
| 531 | currentValue() = Value( arrayValue ); |
| 532 | skipSpaces(); |
| 533 | if ( *current_ == ']' ) // empty array |
| 534 | { |
| 535 | Token endArray; |
| 536 | readToken( endArray ); |
| 537 | return true; |
| 538 | } |
| 539 | int index = 0; |
| 540 | for (;;) |
| 541 | { |
| 542 | Value &value = currentValue()[ index++ ]; |
| 543 | nodes_.push( &value ); |
| 544 | bool ok = readValue(); |
| 545 | nodes_.pop(); |
| 546 | if ( !ok ) // error already set |
| 547 | return recoverFromError( tokenArrayEnd ); |
| 548 | |
| 549 | Token token; |
| 550 | // Accept Comment after last item in the array. |
| 551 | ok = readToken( token ); |
| 552 | while ( token.type_ == tokenComment && ok ) |
| 553 | { |
| 554 | ok = readToken( token ); |
| 555 | } |
| 556 | bool badTokenType = ( token.type_ != tokenArraySeparator && |
| 557 | token.type_ != tokenArrayEnd ); |
| 558 | if ( !ok || badTokenType ) |
| 559 | { |
| 560 | return addErrorAndRecover( "Missing ',' or ']' in array declaration", |
| 561 | token, |
| 562 | tokenArrayEnd ); |
| 563 | } |
| 564 | if ( token.type_ == tokenArrayEnd ) |
| 565 | break; |
| 566 | } |
| 567 | return true; |
| 568 | } |
| 569 | |
| 570 | |
| 571 | bool |
| 572 | Reader::decodeNumber( Token &token ) |
| 573 | { |
| 574 | bool isDouble = false; |
| 575 | for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) |
| 576 | { |
| 577 | isDouble = isDouble |
| 578 | || in( *inspect, '.', 'e', 'E', '+' ) |
| 579 | || ( *inspect == '-' && inspect != token.start_ ); |
| 580 | } |
| 581 | if ( isDouble ) |
| 582 | return decodeDouble( token ); |
| 583 | // Attempts to parse the number as an integer. If the number is |
| 584 | // larger than the maximum supported value of an integer then |
| 585 | // we decode the number as a double. |
| 586 | Location current = token.start_; |
| 587 | bool isNegative = *current == '-'; |
| 588 | if ( isNegative ) |
| 589 | ++current; |
| 590 | Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) |
| 591 | : Value::maxLargestUInt; |
| 592 | Value::LargestUInt threshold = maxIntegerValue / 10; |
| 593 | Value::LargestUInt value = 0; |
| 594 | while ( current < token.end_ ) |
| 595 | { |
| 596 | Char c = *current++; |
| 597 | if ( c < '0' || c > '9' ) |
| 598 | return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); |
| 599 | Value::UInt digit(c - '0'); |
| 600 | if ( value >= threshold ) |
| 601 | { |
| 602 | // We've hit or exceeded the max value divided by 10 (rounded down). If |
| 603 | // a) we've only just touched the limit, b) this is the last digit, and |
| 604 | // c) it's small enough to fit in that rounding delta, we're okay. |
| 605 | // Otherwise treat this number as a double to avoid overflow. |
| 606 | if (value > threshold || |
| 607 | current != token.end_ || |
| 608 | digit > maxIntegerValue % 10) |
| 609 | { |
| 610 | return decodeDouble( token ); |
| 611 | } |
| 612 | } |
| 613 | value = value * 10 + digit; |
| 614 | } |
| 615 | if ( isNegative ) |
| 616 | currentValue() = -Value::LargestInt( value ); |
| 617 | else if ( value <= Value::LargestUInt(Value::maxInt) ) |
| 618 | currentValue() = Value::LargestInt( value ); |
| 619 | else |
| 620 | currentValue() = value; |
| 621 | return true; |
| 622 | } |
| 623 | |
| 624 | |
| 625 | bool |
| 626 | Reader::decodeDouble( Token &token ) |
| 627 | { |
| 628 | double value = 0; |
| 629 | const int bufferSize = 32; |
| 630 | int count; |
| 631 | int length = int(token.end_ - token.start_); |
| 632 | |
| 633 | // Sanity check to avoid buffer overflow exploits. |
| 634 | if (length < 0) { |
| 635 | return addError( "Unable to parse token length", token ); |
| 636 | } |
| 637 | |
| 638 | // Avoid using a string constant for the format control string given to |
| 639 | // sscanf, as this can cause hard to debug crashes on OS X. See here for more |
| 640 | // info: |
| 641 | // |
| 642 | // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html |
| 643 | char format[] = "%lf"; |
| 644 | |
| 645 | if ( length <= bufferSize ) |
| 646 | { |
| 647 | Char buffer[bufferSize+1]; |
| 648 | memcpy( buffer, token.start_, length ); |
| 649 | buffer[length] = 0; |
| 650 | count = sscanf( buffer, format, &value ); |
| 651 | } |
| 652 | else |
| 653 | { |
| 654 | std::string buffer( token.start_, token.end_ ); |
| 655 | count = sscanf( buffer.c_str(), format, &value ); |
| 656 | } |
| 657 | |
| 658 | if ( count != 1 ) |
| 659 | return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); |
| 660 | currentValue() = value; |
| 661 | return true; |
| 662 | } |
| 663 | |
| 664 | |
| 665 | bool |
| 666 | Reader::decodeString( Token &token ) |
| 667 | { |
| 668 | std::string decoded; |
| 669 | if ( !decodeString( token, decoded ) ) |
| 670 | return false; |
| 671 | currentValue() = decoded; |
| 672 | return true; |
| 673 | } |
| 674 | |
| 675 | |
| 676 | bool |
| 677 | Reader::decodeString( Token &token, std::string &decoded ) |
| 678 | { |
| 679 | decoded.reserve( token.end_ - token.start_ - 2 ); |
| 680 | Location current = token.start_ + 1; // skip '"' |
| 681 | Location end = token.end_ - 1; // do not include '"' |
| 682 | while ( current != end ) |
| 683 | { |
| 684 | Char c = *current++; |
| 685 | if ( c == '"' ) |
| 686 | break; |
| 687 | else if ( c == '\\' ) |
| 688 | { |
| 689 | if ( current == end ) |
| 690 | return addError( "Empty escape sequence in string", token, current ); |
| 691 | Char escape = *current++; |
| 692 | switch ( escape ) |
| 693 | { |
| 694 | case '"': decoded += '"'; break; |
| 695 | case '/': decoded += '/'; break; |
| 696 | case '\\': decoded += '\\'; break; |
| 697 | case 'b': decoded += '\b'; break; |
| 698 | case 'f': decoded += '\f'; break; |
| 699 | case 'n': decoded += '\n'; break; |
| 700 | case 'r': decoded += '\r'; break; |
| 701 | case 't': decoded += '\t'; break; |
| 702 | case 'u': |
| 703 | { |
| 704 | unsigned int unicode; |
| 705 | if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) |
| 706 | return false; |
| 707 | decoded += codePointToUTF8(unicode); |
| 708 | } |
| 709 | break; |
| 710 | default: |
| 711 | return addError( "Bad escape sequence in string", token, current ); |
| 712 | } |
| 713 | } |
| 714 | else |
| 715 | { |
| 716 | decoded += c; |
| 717 | } |
| 718 | } |
| 719 | return true; |
| 720 | } |
| 721 | |
| 722 | bool |
| 723 | Reader::decodeUnicodeCodePoint( Token &token, |
| 724 | Location ¤t, |
| 725 | Location end, |
| 726 | unsigned int &unicode ) |
| 727 | { |
| 728 | |
| 729 | if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) |
| 730 | return false; |
| 731 | if (unicode >= 0xD800 && unicode <= 0xDBFF) |
| 732 | { |
| 733 | // surrogate pairs |
| 734 | if (end - current < 6) |
| 735 | return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); |
| 736 | unsigned int surrogatePair; |
| 737 | if (*(current++) == '\\' && *(current++)== 'u') |
| 738 | { |
| 739 | if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) |
| 740 | { |
| 741 | unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); |
| 742 | } |
| 743 | else |
| 744 | return false; |
| 745 | } |
| 746 | else |
| 747 | return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); |
| 748 | } |
| 749 | return true; |
| 750 | } |
| 751 | |
| 752 | bool |
| 753 | Reader::decodeUnicodeEscapeSequence( Token &token, |
| 754 | Location ¤t, |
| 755 | Location end, |
| 756 | unsigned int &unicode ) |
| 757 | { |
| 758 | if ( end - current < 4 ) |
| 759 | return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); |
| 760 | unicode = 0; |
| 761 | for ( int index =0; index < 4; ++index ) |
| 762 | { |
| 763 | Char c = *current++; |
| 764 | unicode *= 16; |
| 765 | if ( c >= '0' && c <= '9' ) |
| 766 | unicode += c - '0'; |
| 767 | else if ( c >= 'a' && c <= 'f' ) |
| 768 | unicode += c - 'a' + 10; |
| 769 | else if ( c >= 'A' && c <= 'F' ) |
| 770 | unicode += c - 'A' + 10; |
| 771 | else |
| 772 | return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); |
| 773 | } |
| 774 | return true; |
| 775 | } |
| 776 | |
| 777 | |
| 778 | bool |
| 779 | Reader::addError( const std::string &message, |
| 780 | Token &token, |
| 781 | Location extra ) |
| 782 | { |
| 783 | ErrorInfo info; |
| 784 | info.token_ = token; |
| 785 | info.message_ = message; |
| 786 | info.extra_ = extra; |
| 787 | errors_.push_back( info ); |
| 788 | return false; |
| 789 | } |
| 790 | |
| 791 | |
| 792 | bool |
| 793 | Reader::recoverFromError( TokenType skipUntilToken ) |
| 794 | { |
| 795 | int errorCount = int(errors_.size()); |
| 796 | Token skip; |
| 797 | for (;;) |
| 798 | { |
| 799 | if ( !readToken(skip) ) |
| 800 | errors_.resize( errorCount ); // discard errors caused by recovery |
| 801 | if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) |
| 802 | break; |
| 803 | } |
| 804 | errors_.resize( errorCount ); |
| 805 | return false; |
| 806 | } |
| 807 | |
| 808 | |
| 809 | bool |
| 810 | Reader::addErrorAndRecover( const std::string &message, |
| 811 | Token &token, |
| 812 | TokenType skipUntilToken ) |
| 813 | { |
| 814 | addError( message, token ); |
| 815 | return recoverFromError( skipUntilToken ); |
| 816 | } |
| 817 | |
| 818 | |
| 819 | Value & |
| 820 | Reader::currentValue() |
| 821 | { |
| 822 | return *(nodes_.top()); |
| 823 | } |
| 824 | |
| 825 | |
| 826 | Reader::Char |
| 827 | Reader::getNextChar() |
| 828 | { |
| 829 | if ( current_ == end_ ) |
| 830 | return 0; |
| 831 | return *current_++; |
| 832 | } |
| 833 | |
| 834 | |
| 835 | void |
| 836 | Reader::getLocationLineAndColumn( Location location, |
| 837 | int &line, |
| 838 | int &column ) const |
| 839 | { |
| 840 | Location current = begin_; |
| 841 | Location lastLineStart = current; |
| 842 | line = 0; |
| 843 | while ( current < location && current != end_ ) |
| 844 | { |
| 845 | Char c = *current++; |
| 846 | if ( c == '\r' ) |
| 847 | { |
| 848 | if ( *current == '\n' ) |
| 849 | ++current; |
| 850 | lastLineStart = current; |
| 851 | ++line; |
| 852 | } |
| 853 | else if ( c == '\n' ) |
| 854 | { |
| 855 | lastLineStart = current; |
| 856 | ++line; |
| 857 | } |
| 858 | } |
| 859 | // column & line start at 1 |
| 860 | column = int(location - lastLineStart) + 1; |
| 861 | ++line; |
| 862 | } |
| 863 | |
| 864 | |
| 865 | std::string |
| 866 | Reader::getLocationLineAndColumn( Location location ) const |
| 867 | { |
| 868 | int line, column; |
| 869 | getLocationLineAndColumn( location, line, column ); |
| 870 | char buffer[18+16+16+1]; |
| 871 | sprintf( buffer, "Line %d, Column %d", line, column ); |
| 872 | return buffer; |
| 873 | } |
| 874 | |
| 875 | |
| 876 | // Deprecated. Preserved for backward compatibility |
| 877 | std::string |
| 878 | Reader::getFormatedErrorMessages() const |
| 879 | { |
| 880 | return getFormattedErrorMessages(); |
| 881 | } |
| 882 | |
| 883 | |
| 884 | std::string |
| 885 | Reader::getFormattedErrorMessages() const |
| 886 | { |
| 887 | std::string formattedMessage; |
| 888 | for ( Errors::const_iterator itError = errors_.begin(); |
| 889 | itError != errors_.end(); |
| 890 | ++itError ) |
| 891 | { |
| 892 | const ErrorInfo &error = *itError; |
| 893 | formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; |
| 894 | formattedMessage += " " + error.message_ + "\n"; |
| 895 | if ( error.extra_ ) |
| 896 | formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; |
| 897 | } |
| 898 | return formattedMessage; |
| 899 | } |
| 900 | |
| 901 | |
| 902 | std::istream& operator>>( std::istream &sin, Value &root ) |
| 903 | { |
| 904 | Json::Reader reader; |
| 905 | bool ok = reader.parse(sin, root, true); |
| 906 | if (!ok) { |
| 907 | fprintf( |
| 908 | stderr, |
| 909 | "Error from reader: %s", |
| 910 | reader.getFormattedErrorMessages().c_str()); |
| 911 | |
| 912 | JSON_FAIL_MESSAGE("reader error"); |
| 913 | } |
| 914 | return sin; |
| 915 | } |
| 916 | |
| 917 | |
| 918 | } // namespace Json |