license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 4 | // |
| 5 | // A JSON parser. Converts strings of JSON into a Value object (see |
| 6 | // base/values.h). |
| 7 | // http://www.ietf.org/rfc/rfc4627.txt?number=4627 |
| 8 | // |
| 9 | // Known limitations/deviations from the RFC: |
| 10 | // - Only knows how to parse ints within the range of a signed 32 bit int and |
| 11 | // decimal numbers within a double. |
| 12 | // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 |
| 13 | // (BE or LE) and UTF-32 (BE or LE) as well. |
| 14 | // - We limit nesting to 100 levels to prevent stack overflow (this is allowed |
| 15 | // by the RFC). |
| 16 | // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data |
| 17 | // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input |
| 18 | // UTF-8 string for the JSONReader::JsonToValue() function may start with a |
| 19 | // UTF-8 BOM (0xEF, 0xBB, 0xBF). |
| 20 | // To avoid the function from mis-treating a UTF-8 BOM as an invalid |
| 21 | // character, the function skips a Unicode BOM at the beginning of the |
| 22 | // Unicode string (converted from the input UTF-8 string) before parsing it. |
| 23 | // |
tc@google.com | ce6a78d | 2008-07-29 09:01:31 +0900 | [diff] [blame] | 24 | // TODO(tc): It would be nice to give back an error string when we fail to |
| 25 | // parse JSON. |
| 26 | // TODO(tc): Add a parsing option to to relax object keys being wrapped in |
| 27 | // double quotes |
| 28 | // TODO(tc): Add an option to disable comment stripping |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 29 | |
| 30 | #ifndef CHROME_COMMON_JSON_READER_H__ |
| 31 | #define CHROME_COMMON_JSON_READER_H__ |
| 32 | |
| 33 | #include <string> |
| 34 | |
| 35 | #include "base/basictypes.h" |
| 36 | #include "testing/gtest/include/gtest/gtest_prod.h" |
| 37 | |
| 38 | class Value; |
| 39 | |
| 40 | class JSONReader { |
| 41 | public: |
| 42 | // A struct to hold a JS token. |
| 43 | class Token { |
| 44 | public: |
| 45 | enum Type { |
| 46 | OBJECT_BEGIN, // { |
| 47 | OBJECT_END, // } |
| 48 | ARRAY_BEGIN, // [ |
| 49 | ARRAY_END, // ] |
| 50 | STRING, |
| 51 | NUMBER, |
| 52 | BOOL_TRUE, // true |
| 53 | BOOL_FALSE, // false |
| 54 | NULL_TOKEN, // null |
| 55 | LIST_SEPARATOR, // , |
| 56 | OBJECT_PAIR_SEPARATOR, // : |
| 57 | END_OF_INPUT, |
| 58 | INVALID_TOKEN, |
| 59 | }; |
| 60 | Token(Type t, const wchar_t* b, int len) |
| 61 | : type(t), begin(b), length(len) {} |
| 62 | |
| 63 | Type type; |
| 64 | |
| 65 | // A pointer into JSONReader::json_pos_ that's the beginning of this token. |
| 66 | const wchar_t* begin; |
| 67 | |
| 68 | // End should be one char past the end of the token. |
| 69 | int length; |
| 70 | |
| 71 | // Get the character that's one past the end of this token. |
| 72 | wchar_t NextChar() { |
| 73 | return *(begin + length); |
| 74 | } |
| 75 | }; |
| 76 | |
tc@google.com | ce6a78d | 2008-07-29 09:01:31 +0900 | [diff] [blame] | 77 | // Reads and parses |json| and populates |root|. If |json| is not a properly |
| 78 | // formed JSON string, returns false and leaves root unaltered. If |
| 79 | // allow_trailing_comma is true, we will ignore trailing commas in objects |
| 80 | // and arrays even though this goes against the RFC. |
| 81 | static bool Read(const std::string& json, |
| 82 | Value** root, |
| 83 | bool allow_trailing_comma); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 84 | |
| 85 | private: |
tc@google.com | ce6a78d | 2008-07-29 09:01:31 +0900 | [diff] [blame] | 86 | JSONReader(const wchar_t* json_start_pos, bool allow_trailing_comma); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 87 | DISALLOW_EVIL_CONSTRUCTORS(JSONReader); |
| 88 | |
| 89 | FRIEND_TEST(JSONReaderTest, Reading); |
| 90 | |
| 91 | // Pass through method from JSONReader::Read. We have this so unittests can |
| 92 | // disable the root check. |
| 93 | static bool JsonToValue(const std::string& json, Value** root, |
tc@google.com | ce6a78d | 2008-07-29 09:01:31 +0900 | [diff] [blame] | 94 | bool check_root, |
| 95 | bool allow_trailing_comma); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 96 | |
| 97 | // Recursively build Value. Returns false if we don't have a valid JSON |
| 98 | // string. If |is_root| is true, we verify that the root element is either |
| 99 | // an object or an array. |
| 100 | bool BuildValue(Value** root, bool is_root); |
| 101 | |
| 102 | // Parses a sequence of characters into a Token::NUMBER. If the sequence of |
| 103 | // characters is not a valid number, returns a Token::INVALID_TOKEN. Note |
| 104 | // that DecodeNumber is used to actually convert from a string to an |
| 105 | // int/double. |
| 106 | Token ParseNumberToken(); |
| 107 | |
| 108 | // Try and convert the substring that token holds into an int or a double. If |
| 109 | // we can (ie., no overflow), return true and create the appropriate value |
| 110 | // for |node|. Return false if we can't do the conversion. |
| 111 | bool DecodeNumber(const Token& token, Value** node); |
| 112 | |
| 113 | // Parses a sequence of characters into a Token::STRING. If the sequence of |
| 114 | // characters is not a valid string, returns a Token::INVALID_TOKEN. Note |
| 115 | // that DecodeString is used to actually decode the escaped string into an |
| 116 | // actual wstring. |
| 117 | Token ParseStringToken(); |
| 118 | |
| 119 | // Convert the substring into a value string. This should always succeed |
| 120 | // (otherwise ParseStringToken would have failed), but returns a success bool |
| 121 | // just in case. |
| 122 | bool DecodeString(const Token& token, Value** node); |
| 123 | |
| 124 | // Grabs the next token in the JSON stream. This does not increment the |
| 125 | // stream so it can be used to look ahead at the next token. |
| 126 | Token ParseToken(); |
| 127 | |
| 128 | // Increments json_pos_ past leading whitespace and comments. |
| 129 | void EatWhitespaceAndComments(); |
| 130 | |
| 131 | // If json_pos_ is at the start of a comment, eat it, otherwise, returns |
| 132 | // false. |
| 133 | bool EatComment(); |
| 134 | |
| 135 | // Checks if json_pos_ matches str. |
| 136 | bool NextStringMatch(const std::wstring& str); |
| 137 | |
| 138 | // Pointer to the current position in the input string. |
| 139 | const wchar_t* json_pos_; |
| 140 | |
| 141 | // Used to keep track of how many nested lists/dicts there are. |
| 142 | int stack_depth_; |
tc@google.com | ce6a78d | 2008-07-29 09:01:31 +0900 | [diff] [blame] | 143 | |
| 144 | // A parser flag that allows trailing commas in objects and arrays. |
| 145 | bool allow_trailing_comma_; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 146 | }; |
| 147 | |
| 148 | #endif // CHROME_COMMON_JSON_READER_H__ |
license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame^] | 149 | |