blob: 5d3c5213554ee353a968bfaee0386c88f8891378 [file] [log] [blame]
license.botf003cfe2008-08-24 09:55:55 +09001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit3f4a7322008-07-27 06:49:38 +09004//
5// A JSON parser. Converts strings of JSON into a Value object (see
6// base/values.h).
7// http://www.ietf.org/rfc/rfc4627.txt?number=4627
8//
9// Known limitations/deviations from the RFC:
10// - Only knows how to parse ints within the range of a signed 32 bit int and
11// decimal numbers within a double.
12// - Assumes input is encoded as UTF8. The spec says we should allow UTF-16
13// (BE or LE) and UTF-32 (BE or LE) as well.
14// - We limit nesting to 100 levels to prevent stack overflow (this is allowed
15// by the RFC).
16// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
17// stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
18// UTF-8 string for the JSONReader::JsonToValue() function may start with a
19// UTF-8 BOM (0xEF, 0xBB, 0xBF).
20// To avoid the function from mis-treating a UTF-8 BOM as an invalid
21// character, the function skips a Unicode BOM at the beginning of the
22// Unicode string (converted from the input UTF-8 string) before parsing it.
23//
tc@google.comce6a78d2008-07-29 09:01:31 +090024// TODO(tc): It would be nice to give back an error string when we fail to
25// parse JSON.
26// TODO(tc): Add a parsing option to to relax object keys being wrapped in
27// double quotes
28// TODO(tc): Add an option to disable comment stripping
initial.commit3f4a7322008-07-27 06:49:38 +090029
30#ifndef CHROME_COMMON_JSON_READER_H__
31#define CHROME_COMMON_JSON_READER_H__
32
33#include <string>
34
35#include "base/basictypes.h"
36#include "testing/gtest/include/gtest/gtest_prod.h"
37
38class Value;
39
40class JSONReader {
41 public:
42 // A struct to hold a JS token.
43 class Token {
44 public:
45 enum Type {
46 OBJECT_BEGIN, // {
47 OBJECT_END, // }
48 ARRAY_BEGIN, // [
49 ARRAY_END, // ]
50 STRING,
51 NUMBER,
52 BOOL_TRUE, // true
53 BOOL_FALSE, // false
54 NULL_TOKEN, // null
55 LIST_SEPARATOR, // ,
56 OBJECT_PAIR_SEPARATOR, // :
57 END_OF_INPUT,
58 INVALID_TOKEN,
59 };
60 Token(Type t, const wchar_t* b, int len)
61 : type(t), begin(b), length(len) {}
62
63 Type type;
64
65 // A pointer into JSONReader::json_pos_ that's the beginning of this token.
66 const wchar_t* begin;
67
68 // End should be one char past the end of the token.
69 int length;
70
71 // Get the character that's one past the end of this token.
72 wchar_t NextChar() {
73 return *(begin + length);
74 }
75 };
76
tc@google.comce6a78d2008-07-29 09:01:31 +090077 // Reads and parses |json| and populates |root|. If |json| is not a properly
78 // formed JSON string, returns false and leaves root unaltered. If
79 // allow_trailing_comma is true, we will ignore trailing commas in objects
80 // and arrays even though this goes against the RFC.
81 static bool Read(const std::string& json,
82 Value** root,
83 bool allow_trailing_comma);
initial.commit3f4a7322008-07-27 06:49:38 +090084
85 private:
tc@google.comce6a78d2008-07-29 09:01:31 +090086 JSONReader(const wchar_t* json_start_pos, bool allow_trailing_comma);
initial.commit3f4a7322008-07-27 06:49:38 +090087 DISALLOW_EVIL_CONSTRUCTORS(JSONReader);
88
89 FRIEND_TEST(JSONReaderTest, Reading);
90
91 // Pass through method from JSONReader::Read. We have this so unittests can
92 // disable the root check.
93 static bool JsonToValue(const std::string& json, Value** root,
tc@google.comce6a78d2008-07-29 09:01:31 +090094 bool check_root,
95 bool allow_trailing_comma);
initial.commit3f4a7322008-07-27 06:49:38 +090096
97 // Recursively build Value. Returns false if we don't have a valid JSON
98 // string. If |is_root| is true, we verify that the root element is either
99 // an object or an array.
100 bool BuildValue(Value** root, bool is_root);
101
102 // Parses a sequence of characters into a Token::NUMBER. If the sequence of
103 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
104 // that DecodeNumber is used to actually convert from a string to an
105 // int/double.
106 Token ParseNumberToken();
107
108 // Try and convert the substring that token holds into an int or a double. If
109 // we can (ie., no overflow), return true and create the appropriate value
110 // for |node|. Return false if we can't do the conversion.
111 bool DecodeNumber(const Token& token, Value** node);
112
113 // Parses a sequence of characters into a Token::STRING. If the sequence of
114 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
115 // that DecodeString is used to actually decode the escaped string into an
116 // actual wstring.
117 Token ParseStringToken();
118
119 // Convert the substring into a value string. This should always succeed
120 // (otherwise ParseStringToken would have failed), but returns a success bool
121 // just in case.
122 bool DecodeString(const Token& token, Value** node);
123
124 // Grabs the next token in the JSON stream. This does not increment the
125 // stream so it can be used to look ahead at the next token.
126 Token ParseToken();
127
128 // Increments json_pos_ past leading whitespace and comments.
129 void EatWhitespaceAndComments();
130
131 // If json_pos_ is at the start of a comment, eat it, otherwise, returns
132 // false.
133 bool EatComment();
134
135 // Checks if json_pos_ matches str.
136 bool NextStringMatch(const std::wstring& str);
137
138 // Pointer to the current position in the input string.
139 const wchar_t* json_pos_;
140
141 // Used to keep track of how many nested lists/dicts there are.
142 int stack_depth_;
tc@google.comce6a78d2008-07-29 09:01:31 +0900143
144 // A parser flag that allows trailing commas in objects and arrays.
145 bool allow_trailing_comma_;
initial.commit3f4a7322008-07-27 06:49:38 +0900146};
147
148#endif // CHROME_COMMON_JSON_READER_H__
license.botf003cfe2008-08-24 09:55:55 +0900149