blob: 66d8f68a246083c830ebf6bfae2a1d943bce6bb3 [file] [log] [blame]
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "base/json_reader.h"
#include "base/float_util.h"
#include "base/logging.h"
#include "base/string_util.h"
#include "base/values.h"
static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,
0, 0);
static const int kStackLimit = 100;
namespace {
inline int HexToInt(wchar_t c) {
if ('0' <= c && c <= '9') {
return c - '0';
} else if ('A' <= c && c <= 'F') {
return c - 'A' + 10;
} else if ('a' <= c && c <= 'f') {
return c - 'a' + 10;
}
NOTREACHED();
return 0;
}
// A helper method for ParseNumberToken. It reads an int from the end of
// token. The method returns false if there is no valid integer at the end of
// the token.
bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) {
wchar_t first = token.NextChar();
int len = 0;
// Read in more digits
wchar_t c = first;
while ('\0' != c && '0' <= c && c <= '9') {
++token.length;
++len;
c = token.NextChar();
}
// We need at least 1 digit.
if (len == 0)
return false;
if (!can_have_leading_zeros && len > 1 && '0' == first)
return false;
return true;
}
// A helper method for ParseStringToken. It reads |digits| hex digits from the
// token. If the sequence if digits is not valid (contains other characters),
// the method returns false.
bool ReadHexDigits(JSONReader::Token& token, int digits) {
for (int i = 1; i <= digits; ++i) {
wchar_t c = *(token.begin + token.length + i);
if ('\0' == c)
return false;
if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F'))) {
return false;
}
}
token.length += digits;
return true;
}
} // anonymous namespace
/* static */
bool JSONReader::Read(const std::string& json,
Value** root,
bool allow_trailing_comma) {
return JsonToValue(json, root, true, allow_trailing_comma);
}
/* static */
bool JSONReader::JsonToValue(const std::string& json,
Value** root,
bool check_root,
bool allow_trailing_comma) {
// Assume input is UTF8. The conversion from UTF8 to wstring removes null
// bytes for us (a good thing).
std::wstring json_wide(UTF8ToWide(json));
const wchar_t* json_cstr = json_wide.c_str();
// When the input JSON string starts with a UTF-8 Byte-Order-Mark
// (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode
// BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from
// mis-treating a Unicode BOM as an invalid character and returning false,
// skip a converted Unicode BOM if it exists.
if (!json_wide.empty() && json_cstr[0] == 0xFEFF) {
++json_cstr;
}
JSONReader reader(json_cstr, allow_trailing_comma);
Value* temp_root = NULL;
bool success = reader.BuildValue(&temp_root, check_root);
// Only modify root_ if we have valid JSON and nothing else.
if (success && reader.ParseToken().type == Token::END_OF_INPUT) {
*root = temp_root;
return true;
}
if (temp_root)
delete temp_root;
return false;
}
JSONReader::JSONReader(const wchar_t* json_start_pos,
bool allow_trailing_comma)
: json_pos_(json_start_pos),
stack_depth_(0),
allow_trailing_comma_(allow_trailing_comma) {}
bool JSONReader::BuildValue(Value** node, bool is_root) {
++stack_depth_;
if (stack_depth_ > kStackLimit)
return false;
Token token = ParseToken();
// The root token must be an array or an object.
if (is_root && token.type != Token::OBJECT_BEGIN &&
token.type != Token::ARRAY_BEGIN) {
return false;
}
switch (token.type) {
case Token::END_OF_INPUT:
case Token::INVALID_TOKEN:
return false;
case Token::NULL_TOKEN:
*node = Value::CreateNullValue();
break;
case Token::BOOL_TRUE:
*node = Value::CreateBooleanValue(true);
break;
case Token::BOOL_FALSE:
*node = Value::CreateBooleanValue(false);
break;
case Token::NUMBER:
if (!DecodeNumber(token, node))
return false;
break;
case Token::STRING:
if (!DecodeString(token, node))
return false;
break;
case Token::ARRAY_BEGIN:
{
json_pos_ += token.length;
token = ParseToken();
ListValue* array = new ListValue;
while (token.type != Token::ARRAY_END) {
Value* array_node = NULL;
if (!BuildValue(&array_node, false)) {
delete array;
return false;
}
array->Append(array_node);
// After a list value, we expect a comma or the end of the list.
token = ParseToken();
if (token.type == Token::LIST_SEPARATOR) {
json_pos_ += token.length;
token = ParseToken();
// Trailing commas are invalid according to the JSON RFC, but some
// consumers need the parsing leniency, so handle accordingly.
if (token.type == Token::ARRAY_END) {
if (!allow_trailing_comma_) {
delete array;
return false;
}
// Trailing comma OK, stop parsing the Array.
break;
}
} else if (token.type != Token::ARRAY_END) {
// Unexpected value after list value. Bail out.
delete array;
return false;
}
}
if (token.type != Token::ARRAY_END) {
delete array;
return false;
}
*node = array;
break;
}
case Token::OBJECT_BEGIN:
{
json_pos_ += token.length;
token = ParseToken();
DictionaryValue* dict = new DictionaryValue;
while (token.type != Token::OBJECT_END) {
if (token.type != Token::STRING) {
delete dict;
return false;
}
Value* dict_key_value = NULL;
if (!DecodeString(token, &dict_key_value)) {
delete dict;
return false;
}
// Convert the key into a wstring.
std::wstring dict_key;
bool success = dict_key_value->GetAsString(&dict_key);
DCHECK(success);
delete dict_key_value;
json_pos_ += token.length;
token = ParseToken();
if (token.type != Token::OBJECT_PAIR_SEPARATOR) {
delete dict;
return false;
}
json_pos_ += token.length;
token = ParseToken();
Value* dict_value = NULL;
if (!BuildValue(&dict_value, false)) {
delete dict;
return false;
}
dict->Set(dict_key, dict_value);
// After a key/value pair, we expect a comma or the end of the
// object.
token = ParseToken();
if (token.type == Token::LIST_SEPARATOR) {
json_pos_ += token.length;
token = ParseToken();
// Trailing commas are invalid according to the JSON RFC, but some
// consumers need the parsing leniency, so handle accordingly.
if (token.type == Token::OBJECT_END) {
if (!allow_trailing_comma_) {
delete dict;
return false;
}
// Trailing comma OK, stop parsing the Object.
break;
}
} else if (token.type != Token::OBJECT_END) {
// Unexpected value after last object value. Bail out.
delete dict;
return false;
}
}
if (token.type != Token::OBJECT_END) {
delete dict;
return false;
}
*node = dict;
break;
}
default:
// We got a token that's not a value.
return false;
}
json_pos_ += token.length;
--stack_depth_;
return true;
}
JSONReader::Token JSONReader::ParseNumberToken() {
// We just grab the number here. We validate the size in DecodeNumber.
// According to RFC4627, a valid number is: [minus] int [frac] [exp]
Token token(Token::NUMBER, json_pos_, 0);
wchar_t c = *json_pos_;
if ('-' == c) {
++token.length;
c = token.NextChar();
}
if (!ReadInt(token, false))
return kInvalidToken;
// Optional fraction part
c = token.NextChar();
if ('.' == c) {
++token.length;
if (!ReadInt(token, true))
return kInvalidToken;
c = token.NextChar();
}
// Optional exponent part
if ('e' == c || 'E' == c) {
++token.length;
c = token.NextChar();
if ('-' == c || '+' == c) {
++token.length;
c = token.NextChar();
}
if (!ReadInt(token, true))
return kInvalidToken;
}
return token;
}
bool JSONReader::DecodeNumber(const Token& token, Value** node) {
const std::wstring num_string(token.begin, token.length);
int num_int;
if (StringToInt(num_string, &num_int)) {
*node = Value::CreateIntegerValue(num_int);
return true;
}
double num_double;
if (StringToDouble(num_string, &num_double) && base::IsFinite(num_double)) {
*node = Value::CreateRealValue(num_double);
return true;
}
return false;
}
JSONReader::Token JSONReader::ParseStringToken() {
Token token(Token::STRING, json_pos_, 1);
wchar_t c = token.NextChar();
while ('\0' != c) {
if ('\\' == c) {
++token.length;
c = token.NextChar();
// Make sure the escaped char is valid.
switch (c) {
case 'x':
if (!ReadHexDigits(token, 2))
return kInvalidToken;
break;
case 'u':
if (!ReadHexDigits(token, 4))
return kInvalidToken;
break;
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case '"':
break;
default:
return kInvalidToken;
}
} else if ('"' == c) {
++token.length;
return token;
}
++token.length;
c = token.NextChar();
}
return kInvalidToken;
}
bool JSONReader::DecodeString(const Token& token, Value** node) {
std::wstring decoded_str;
decoded_str.reserve(token.length - 2);
for (int i = 1; i < token.length - 1; ++i) {
wchar_t c = *(token.begin + i);
if ('\\' == c) {
++i;
c = *(token.begin + i);
switch (c) {
case '"':
case '/':
case '\\':
decoded_str.push_back(c);
break;
case 'b':
decoded_str.push_back('\b');
break;
case 'f':
decoded_str.push_back('\f');
break;
case 'n':
decoded_str.push_back('\n');
break;
case 'r':
decoded_str.push_back('\r');
break;
case 't':
decoded_str.push_back('\t');
break;
case 'x':
decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) +
HexToInt(*(token.begin + i + 2)));
i += 2;
break;
case 'u':
decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) +
(HexToInt(*(token.begin + i + 2)) << 8) +
(HexToInt(*(token.begin + i + 3)) << 4) +
HexToInt(*(token.begin + i + 4)));
i += 4;
break;
default:
// We should only have valid strings at this point. If not,
// ParseStringToken didn't do it's job.
NOTREACHED();
return false;
}
} else {
// Not escaped
decoded_str.push_back(c);
}
}
*node = Value::CreateStringValue(decoded_str);
return true;
}
JSONReader::Token JSONReader::ParseToken() {
static const std::wstring kNullString(L"null");
static const std::wstring kTrueString(L"true");
static const std::wstring kFalseString(L"false");
EatWhitespaceAndComments();
Token token(Token::INVALID_TOKEN, 0, 0);
switch (*json_pos_) {
case '\0':
token.type = Token::END_OF_INPUT;
break;
case 'n':
if (NextStringMatch(kNullString))
token = Token(Token::NULL_TOKEN, json_pos_, 4);
break;
case 't':
if (NextStringMatch(kTrueString))
token = Token(Token::BOOL_TRUE, json_pos_, 4);
break;
case 'f':
if (NextStringMatch(kFalseString))
token = Token(Token::BOOL_FALSE, json_pos_, 5);
break;
case '[':
token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
break;
case ']':
token = Token(Token::ARRAY_END, json_pos_, 1);
break;
case ',':
token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
break;
case '{':
token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
break;
case '}':
token = Token(Token::OBJECT_END, json_pos_, 1);
break;
case ':':
token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
token = ParseNumberToken();
break;
case '"':
token = ParseStringToken();
break;
}
return token;
}
bool JSONReader::NextStringMatch(const std::wstring& str) {
for (size_t i = 0; i < str.length(); ++i) {
if ('\0' == *json_pos_)
return false;
if (*(json_pos_ + i) != str[i])
return false;
}
return true;
}
void JSONReader::EatWhitespaceAndComments() {
while ('\0' != *json_pos_) {
switch (*json_pos_) {
case ' ':
case '\n':
case '\r':
case '\t':
++json_pos_;
break;
case '/':
// TODO(tc): This isn't in the RFC so it should be a parser flag.
if (!EatComment())
return;
break;
default:
// Not a whitespace char, just exit.
return;
}
}
}
bool JSONReader::EatComment() {
if ('/' != *json_pos_)
return false;
wchar_t next_char = *(json_pos_ + 1);
if ('/' == next_char) {
// Line comment, read until \n or \r
json_pos_ += 2;
while ('\0' != *json_pos_) {
switch (*json_pos_) {
case '\n':
case '\r':
++json_pos_;
return true;
default:
++json_pos_;
}
}
} else if ('*' == next_char) {
// Block comment, read until */
json_pos_ += 2;
while ('\0' != *json_pos_) {
switch (*json_pos_) {
case '*':
if ('/' == *(json_pos_ + 1)) {
json_pos_ += 2;
return true;
}
default:
++json_pos_;
}
}
} else {
return false;
}
return true;
}