Blame - src/json-parser.cc - fp2-dev/platform/external/v8

blob: b7f57c2c37c5a47d66ccbb7b6cba5062fa700e08 [file] [log] [blame]

Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	1	// Copyright 2011 the V8 project authors. All rights reserved.
				2	// Redistribution and use in source and binary forms, with or without
				3	// modification, are permitted provided that the following conditions are
				4	// met:
				5	//
				6	// * Redistributions of source code must retain the above copyright
				7	// notice, this list of conditions and the following disclaimer.
				8	// * Redistributions in binary form must reproduce the above
				9	// copyright notice, this list of conditions and the following
				10	// disclaimer in the documentation and/or other materials provided
				11	// with the distribution.
				12	// * Neither the name of Google Inc. nor the names of its
				13	// contributors may be used to endorse or promote products derived
				14	// from this software without specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				20	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				21	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				22	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				23	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				24	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				25	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				26	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27
				28	#include "v8.h"
				29
				30	#include "char-predicates-inl.h"
				31	#include "conversions.h"
				32	#include "json-parser.h"
				33	#include "messages.h"
				34	#include "spaces.h"
				35
				36	namespace v8 {
				37	namespace internal {
				38
				39
				40	Handle<Object> JsonParser::ParseJson(Handle<String> source) {
				41	isolate_ = source->map()->isolate();
				42	source_ = Handle<String>(source->TryFlattenGetString());
				43	source_length_ = source_->length() - 1;
				44
				45	// Optimized fast case where we only have ascii characters.
				46	if (source_->IsSeqAsciiString()) {
				47	is_sequential_ascii_ = true;
				48	seq_source_ = Handle<SeqAsciiString>::cast(source_);
				49	} else {
				50	is_sequential_ascii_ = false;
				51	}
				52
				53	// Set initial position right before the string.
				54	position_ = -1;
				55	// Advance to the first character (posibly EOS)
				56	Advance();
				57	Next();
				58	Handle<Object> result = ParseJsonValue();
				59	if (result.is_null() \|\| Next() != Token::EOS) {
				60	// Parse failed. Scanner's current token is the unexpected token.
				61	Token::Value token = current_.token;
				62
				63	const char* message;
				64	const char* name_opt = NULL;
				65
				66	switch (token) {
				67	case Token::EOS:
				68	message = "unexpected_eos";
				69	break;
				70	case Token::NUMBER:
				71	message = "unexpected_token_number";
				72	break;
				73	case Token::STRING:
				74	message = "unexpected_token_string";
				75	break;
				76	case Token::IDENTIFIER:
				77	case Token::FUTURE_RESERVED_WORD:
				78	message = "unexpected_token_identifier";
				79	break;
				80	default:
				81	message = "unexpected_token";
				82	name_opt = Token::String(token);
				83	ASSERT(name_opt != NULL);
				84	break;
				85	}
				86
				87	Factory* factory = isolate()->factory();
				88	MessageLocation location(factory->NewScript(source),
				89	current_.beg_pos,
				90	current_.end_pos);
				91	Handle<JSArray> array;
				92	if (name_opt == NULL) {
				93	array = factory->NewJSArray(0);
				94	} else {
				95	Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt));
				96	Handle<FixedArray> element = factory->NewFixedArray(1);
				97	element->set(0, *name);
				98	array = factory->NewJSArrayWithElements(element);
				99	}
				100	Handle<Object> result = factory->NewSyntaxError(message, array);
				101	isolate()->Throw(*result, &location);
				102	return Handle<Object>::null();
				103	}
				104	return result;
				105	}
				106
				107
				108	// Parse any JSON value.
				109	Handle<Object> JsonParser::ParseJsonValue() {
				110	Token::Value token = Next();
				111	switch (token) {
				112	case Token::STRING:
				113	return GetString(false);
				114	case Token::NUMBER:
				115	return isolate()->factory()->NewNumber(number_);
				116	case Token::FALSE_LITERAL:
				117	return isolate()->factory()->false_value();
				118	case Token::TRUE_LITERAL:
				119	return isolate()->factory()->true_value();
				120	case Token::NULL_LITERAL:
				121	return isolate()->factory()->null_value();
				122	case Token::LBRACE:
				123	return ParseJsonObject();
				124	case Token::LBRACK:
				125	return ParseJsonArray();
				126	default:
				127	return ReportUnexpectedToken();
				128	}
				129	}
				130
				131
				132	// Parse a JSON object. Scanner must be right after '{' token.
				133	Handle<Object> JsonParser::ParseJsonObject() {
				134	Handle<JSFunction> object_constructor(
				135	isolate()->global_context()->object_function());
				136	Handle<JSObject> json_object =
				137	isolate()->factory()->NewJSObject(object_constructor);
				138
				139	if (Peek() == Token::RBRACE) {
				140	Next();
				141	} else {
				142	do {
				143	if (Next() != Token::STRING) {
				144	return ReportUnexpectedToken();
				145	}
				146	Handle<String> key = GetString(true);
				147	if (Next() != Token::COLON) {
				148	return ReportUnexpectedToken();
				149	}
				150
				151	Handle<Object> value = ParseJsonValue();
				152	if (value.is_null()) return Handle<Object>::null();
				153
				154	uint32_t index;
				155	if (key->AsArrayIndex(&index)) {
				156	SetOwnElement(json_object, index, value, kNonStrictMode);
				157	} else if (key->Equals(isolate()->heap()->Proto_symbol())) {
				158	SetPrototype(json_object, value);
				159	} else {
				160	SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
				161	}
				162	} while (Next() == Token::COMMA);
				163	if (current_.token != Token::RBRACE) {
				164	return ReportUnexpectedToken();
				165	}
				166	}
				167	return json_object;
				168	}
				169
				170	// Parse a JSON array. Scanner must be right after '[' token.
				171	Handle<Object> JsonParser::ParseJsonArray() {
				172	ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
				173	ZoneList<Handle<Object> > elements(4);
				174
				175	Token::Value token = Peek();
				176	if (token == Token::RBRACK) {
				177	Next();
				178	} else {
				179	do {
				180	Handle<Object> element = ParseJsonValue();
				181	if (element.is_null()) return Handle<Object>::null();
				182	elements.Add(element);
				183	token = Next();
				184	} while (token == Token::COMMA);
				185	if (token != Token::RBRACK) {
				186	return ReportUnexpectedToken();
				187	}
				188	}
				189
				190	// Allocate a fixed array with all the elements.
				191	Handle<FixedArray> fast_elements =
				192	isolate()->factory()->NewFixedArray(elements.length());
				193
				194	for (int i = 0, n = elements.length(); i < n; i++) {
				195	fast_elements->set(i, *elements[i]);
				196	}
				197
				198	return isolate()->factory()->NewJSArrayWithElements(fast_elements);
				199	}
				200
				201
				202	Token::Value JsonParser::Next() {
				203	current_ = next_;
				204	ScanJson();
				205	return current_.token;
				206	}
				207
				208	void JsonParser::ScanJson() {
				209	if (source_->IsSeqAsciiString()) {
				210	is_sequential_ascii_ = true;
				211	} else {
				212	is_sequential_ascii_ = false;
				213	}
				214
				215	Token::Value token;
				216	do {
				217	// Remember the position of the next token
				218	next_.beg_pos = position_;
				219	switch (c0_) {
				220	case '\t':
				221	case '\r':
				222	case '\n':
				223	case ' ':
				224	Advance();
				225	token = Token::WHITESPACE;
				226	break;
				227	case '{':
				228	Advance();
				229	token = Token::LBRACE;
				230	break;
				231	case '}':
				232	Advance();
				233	token = Token::RBRACE;
				234	break;
				235	case '[':
				236	Advance();
				237	token = Token::LBRACK;
				238	break;
				239	case ']':
				240	Advance();
				241	token = Token::RBRACK;
				242	break;
				243	case ':':
				244	Advance();
				245	token = Token::COLON;
				246	break;
				247	case ',':
				248	Advance();
				249	token = Token::COMMA;
				250	break;
				251	case '"':
				252	token = ScanJsonString();
				253	break;
				254	case '-':
				255	case '0':
				256	case '1':
				257	case '2':
				258	case '3':
				259	case '4':
				260	case '5':
				261	case '6':
				262	case '7':
				263	case '8':
				264	case '9':
				265	token = ScanJsonNumber();
				266	break;
				267	case 't':
				268	token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
				269	break;
				270	case 'f':
				271	token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
				272	break;
				273	case 'n':
				274	token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
				275	break;
				276	default:
				277	if (c0_ < 0) {
				278	Advance();
				279	token = Token::EOS;
				280	} else {
				281	Advance();
				282	token = Token::ILLEGAL;
				283	}
				284	}
				285	} while (token == Token::WHITESPACE);
				286
				287	next_.end_pos = position_;
				288	next_.token = token;
				289	}
				290
				291
				292	Token::Value JsonParser::ScanJsonIdentifier(const char* text,
				293	Token::Value token) {
				294	while (*text != '\0') {
				295	if (c0_ != *text) return Token::ILLEGAL;
				296	Advance();
				297	text++;
				298	}
				299	return token;
				300	}
				301
				302
				303	Token::Value JsonParser::ScanJsonNumber() {
				304	bool negative = false;
				305
				306	if (c0_ == '-') {
				307	Advance();
				308	negative = true;
				309	}
				310	if (c0_ == '0') {
				311	Advance();
				312	// Prefix zero is only allowed if it's the only digit before
				313	// a decimal point or exponent.
				314	if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
				315	} else {
				316	int i = 0;
				317	int digits = 0;
				318	if (c0_ < '1' \|\| c0_ > '9') return Token::ILLEGAL;
				319	do {
				320	i = i * 10 + c0_ - '0';
				321	digits++;
				322	Advance();
				323	} while (c0_ >= '0' && c0_ <= '9');
				324	if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
				325	number_ = (negative ? -i : i);
				326	return Token::NUMBER;
				327	}
				328	}
				329	if (c0_ == '.') {
				330	Advance();
				331	if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;
				332	do {
				333	Advance();
				334	} while (c0_ >= '0' && c0_ <= '9');
				335	}
				336	if (AsciiAlphaToLower(c0_) == 'e') {
				337	Advance();
				338	if (c0_ == '-' \|\| c0_ == '+') Advance();
				339	if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;
				340	do {
				341	Advance();
				342	} while (c0_ >= '0' && c0_ <= '9');
				343	}
				344	if (is_sequential_ascii_) {
				345	Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos,
				346	position_ - next_.beg_pos);
				347	number_ = StringToDouble(isolate()->unicode_cache(),
				348	chars,
				349	NO_FLAGS, // Hex, octal or trailing junk.
				350	OS::nan_value());
				351	} else {
				352	Vector<char> buffer = Vector<char>::New(position_ - next_.beg_pos);
				353	String::WriteToFlat(*source_, buffer.start(), next_.beg_pos, position_);
				354	Vector<const char> result =
				355	Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
				356	position_ - next_.beg_pos);
				357	number_ = StringToDouble(isolate()->unicode_cache(),
				358	result,
				359	NO_FLAGS, // Hex, octal or trailing junk.
				360	0.0);
				361	buffer.Dispose();
				362	}
				363	return Token::NUMBER;
				364	}
				365
				366	Token::Value JsonParser::SlowScanJsonString() {
				367	// The currently scanned ascii characters.
				368	Handle<String> ascii(isolate()->factory()->NewSubString(source_,
				369	next_.beg_pos + 1,
				370	position_));
				371	Handle<String> two_byte =
				372	isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
				373	NOT_TENURED);
				374	Handle<SeqTwoByteString> seq_two_byte =
				375	Handle<SeqTwoByteString>::cast(two_byte);
				376
				377	int allocation_count = 1;
				378	int count = 0;
				379
				380	while (c0_ != '"') {
				381	// Create new seq string
				382	if (count >= kInitialSpecialStringSize * allocation_count) {
				383	allocation_count = allocation_count * 2;
				384	int new_size = allocation_count * kInitialSpecialStringSize;
				385	Handle<String> new_two_byte =
				386	isolate()->factory()->NewRawTwoByteString(new_size,
				387	NOT_TENURED);
				388	uc16* char_start =
				389	Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
				390	String::WriteToFlat(*seq_two_byte, char_start, 0, count);
				391	seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
				392	}
				393
				394	// Check for control character (0x00-0x1f) or unterminated string (<0).
				395	if (c0_ < 0x20) return Token::ILLEGAL;
				396	if (c0_ != '\\') {
				397	seq_two_byte->SeqTwoByteStringSet(count++, c0_);
				398	Advance();
				399	} else {
				400	Advance();
				401	switch (c0_) {
				402	case '"':
				403	case '\\':
				404	case '/':
				405	seq_two_byte->SeqTwoByteStringSet(count++, c0_);
				406	break;
				407	case 'b':
				408	seq_two_byte->SeqTwoByteStringSet(count++, '\x08');
				409	break;
				410	case 'f':
				411	seq_two_byte->SeqTwoByteStringSet(count++, '\x0c');
				412	break;
				413	case 'n':
				414	seq_two_byte->SeqTwoByteStringSet(count++, '\x0a');
				415	break;
				416	case 'r':
				417	seq_two_byte->SeqTwoByteStringSet(count++, '\x0d');
				418	break;
				419	case 't':
				420	seq_two_byte->SeqTwoByteStringSet(count++, '\x09');
				421	break;
				422	case 'u': {
				423	uc32 value = 0;
				424	for (int i = 0; i < 4; i++) {
				425	Advance();
				426	int digit = HexValue(c0_);
				427	if (digit < 0) {
				428	return Token::ILLEGAL;
				429	}
				430	value = value * 16 + digit;
				431	}
				432	seq_two_byte->SeqTwoByteStringSet(count++, value);
				433	break;
				434	}
				435	default:
				436	return Token::ILLEGAL;
				437	}
				438	Advance();
				439	}
				440	}
				441	// Advance past the last '"'.
				442	ASSERT_EQ('"', c0_);
				443	Advance();
				444
				445	// Shrink the the string to our length.
				446	if (isolate()->heap()->InNewSpace(*seq_two_byte)) {
				447	isolate()->heap()->new_space()->
				448	ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte,
				449	count);
				450	} else {
				451	int string_size = SeqTwoByteString::SizeFor(count);
				452	int allocated_string_size =
				453	SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);
				454	int delta = allocated_string_size - string_size;
				455	Address start_filler_object = seq_two_byte->address() + string_size;
				456	seq_two_byte->set_length(count);
				457	isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
				458	}
				459	string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte);
				460	return Token::STRING;
				461	}
				462
				463
				464	Token::Value JsonParser::ScanJsonString() {
				465	ASSERT_EQ('"', c0_);
				466	// Set string_val to null. If string_val is not set we assume an
				467	// ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1.
				468	string_val_ = Handle<String>::null();
				469	Advance();
				470	// Fast case for ascii only without escape characters.
				471	while (c0_ != '"') {
				472	// Check for control character (0x00-0x1f) or unterminated string (<0).
				473	if (c0_ < 0x20) return Token::ILLEGAL;
				474	if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) {
				475	Advance();
				476	} else {
				477	return SlowScanJsonString();
				478	}
				479	}
				480	ASSERT_EQ('"', c0_);
				481	// Advance past the last '"'.
				482	Advance();
				483	return Token::STRING;
				484	}
				485
				486	Handle<String> JsonParser::GetString() {
				487	return GetString(false);
				488	}
				489
				490	Handle<String> JsonParser::GetSymbol() {
				491	Handle<String> result = GetString(true);
				492	if (result->IsSymbol()) return result;
				493	return isolate()->factory()->LookupSymbol(result);
				494	}
				495
				496	Handle<String> JsonParser::GetString(bool hint_symbol) {
				497	// We have a non ascii string, return that.
				498	if (!string_val_.is_null()) return string_val_;
				499
				500	if (is_sequential_ascii_ && hint_symbol) {
				501	Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_);
				502	// The current token includes the '"' in both ends.
				503	int length = current_.end_pos - current_.beg_pos - 2;
				504	return isolate()->factory()->LookupAsciiSymbol(seq_source_,
				505	current_.beg_pos + 1,
				506	length);
				507	}
				508	// The current token includes the '"' in both ends.
				509	return isolate()->factory()->NewSubString(
				510	source_, current_.beg_pos + 1, current_.end_pos - 1);
				511	}
				512
				513	} } // namespace v8::internal