Blame - src/scanner.cc - fp2-dev/platform/external/v8

blob: 15b1d44203e5c897c7b33591f8dd9f6576464a07 [file] [log] [blame]

Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1	// Copyright 2006-2008 the V8 project authors. All rights reserved.
				2	// Redistribution and use in source and binary forms, with or without
				3	// modification, are permitted provided that the following conditions are
				4	// met:
				5	//
				6	// * Redistributions of source code must retain the above copyright
				7	// notice, this list of conditions and the following disclaimer.
				8	// * Redistributions in binary form must reproduce the above
				9	// copyright notice, this list of conditions and the following
				10	// disclaimer in the documentation and/or other materials provided
				11	// with the distribution.
				12	// * Neither the name of Google Inc. nor the names of its
				13	// contributors may be used to endorse or promote products derived
				14	// from this software without specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				20	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				21	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				22	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				23	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				24	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				25	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				26	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27
				28	#include "v8.h"
				29
				30	#include "ast.h"
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	31	#include "handles.h"
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	32	#include "scanner.h"
				33
				34	namespace v8 {
				35	namespace internal {
				36
				37	// ----------------------------------------------------------------------------
				38	// Character predicates
				39
				40
				41	unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
				42	unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
				43	unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
				44	unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
				45
				46
				47	StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
				48
				49
				50	// ----------------------------------------------------------------------------
				51	// UTF8Buffer
				52
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	53	UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	54
				55
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	56	UTF8Buffer::~UTF8Buffer() {}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	57
				58
				59	void UTF8Buffer::AddCharSlow(uc32 c) {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	60	ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
				61	int length = unibrow::Utf8::Length(c);
				62	Vector<char> block = buffer_.AddBlock(length, '\0');
				63	#ifdef DEBUG
				64	int written_length = unibrow::Utf8::Encode(block.start(), c);
				65	CHECK_EQ(length, written_length);
				66	#else
				67	unibrow::Utf8::Encode(block.start(), c);
				68	#endif
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	69	}
				70
				71
				72	// ----------------------------------------------------------------------------
				73	// UTF16Buffer
				74
				75
				76	UTF16Buffer::UTF16Buffer()
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	77	: pos_(0), end_(Scanner::kNoEndPosition) { }
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	78
				79
				80	// CharacterStreamUTF16Buffer
				81	CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
				82	: pushback_buffer_(0), last_(0), stream_(NULL) { }
				83
				84
				85	void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	86	unibrow::CharacterStream* input,
				87	int start_position,
				88	int end_position) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	89	stream_ = input;
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	90	if (start_position > 0) {
				91	SeekForward(start_position);
				92	}
				93	end_ = end_position != Scanner::kNoEndPosition ? end_position : kMaxInt;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	94	}
				95
				96
				97	void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
				98	pushback_buffer()->Add(last_);
				99	last_ = ch;
				100	pos_--;
				101	}
				102
				103
				104	uc32 CharacterStreamUTF16Buffer::Advance() {
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	105	ASSERT(end_ != Scanner::kNoEndPosition);
				106	ASSERT(end_ >= 0);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	107	// NOTE: It is of importance to Persian / Farsi resources that we do
				108	// not strip format control characters in the scanner; see
				109	//
				110	// https://bugzilla.mozilla.org/show_bug.cgi?id=274152
				111	//
				112	// So, even though ECMA-262, section 7.1, page 11, dictates that we
				113	// must remove Unicode format-control characters, we do not. This is
				114	// in line with how IE and SpiderMonkey handles it.
				115	if (!pushback_buffer()->is_empty()) {
				116	pos_++;
				117	return last_ = pushback_buffer()->RemoveLast();
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	118	} else if (stream_->has_more() && pos_ < end_) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	119	pos_++;
				120	uc32 next = stream_->GetNext();
				121	return last_ = next;
				122	} else {
				123	// Note: currently the following increment is necessary to avoid a
				124	// test-parser problem!
				125	pos_++;
				126	return last_ = static_cast<uc32>(-1);
				127	}
				128	}
				129
				130
				131	void CharacterStreamUTF16Buffer::SeekForward(int pos) {
				132	pos_ = pos;
				133	ASSERT(pushback_buffer()->is_empty());
				134	stream_->Seek(pos);
				135	}
				136
				137
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	138	// ExternalStringUTF16Buffer
				139	template <typename StringType, typename CharType>
				140	ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	141	: raw_data_(NULL) { }
				142
				143
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	144	template <typename StringType, typename CharType>
				145	void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
				146	Handle<StringType> data,
				147	int start_position,
				148	int end_position) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	149	ASSERT(!data.is_null());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	150	raw_data_ = data->resource()->data();
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	151
				152	ASSERT(end_position <= data->length());
				153	if (start_position > 0) {
				154	SeekForward(start_position);
				155	}
				156	end_ =
				157	end_position != Scanner::kNoEndPosition ? end_position : data->length();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	158	}
				159
				160
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	161	template <typename StringType, typename CharType>
				162	uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
				163	if (pos_ < end_) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	164	return raw_data_[pos_++];
				165	} else {
				166	// note: currently the following increment is necessary to avoid a
				167	// test-parser problem!
				168	pos_++;
				169	return static_cast<uc32>(-1);
				170	}
				171	}
				172
				173
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	174	template <typename StringType, typename CharType>
				175	void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	176	pos_--;
				177	ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
				178	ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
				179	}
				180
				181
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	182	template <typename StringType, typename CharType>
				183	void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	184	pos_ = pos;
				185	}
				186
				187
				188	// ----------------------------------------------------------------------------
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	189	// Keyword Matcher
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	190
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	191	KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
				192	{ "break", KEYWORD_PREFIX, Token::BREAK },
				193	{ NULL, C, Token::ILLEGAL },
				194	{ NULL, D, Token::ILLEGAL },
				195	{ "else", KEYWORD_PREFIX, Token::ELSE },
				196	{ NULL, F, Token::ILLEGAL },
				197	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				198	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				199	{ NULL, I, Token::ILLEGAL },
				200	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				201	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				202	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				203	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				204	{ NULL, N, Token::ILLEGAL },
				205	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				206	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				207	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				208	{ "return", KEYWORD_PREFIX, Token::RETURN },
				209	{ "switch", KEYWORD_PREFIX, Token::SWITCH },
				210	{ NULL, T, Token::ILLEGAL },
				211	{ NULL, UNMATCHABLE, Token::ILLEGAL },
				212	{ NULL, V, Token::ILLEGAL },
				213	{ NULL, W, Token::ILLEGAL }
				214	};
				215
				216
				217	void KeywordMatcher::Step(uc32 input) {
				218	switch (state_) {
				219	case INITIAL: {
				220	// matching the first character is the only state with significant fanout.
				221	// Match only lower-case letters in range 'b'..'w'.
				222	unsigned int offset = input - kFirstCharRangeMin;
				223	if (offset < kFirstCharRangeLength) {
				224	state_ = first_states_[offset].state;
				225	if (state_ == KEYWORD_PREFIX) {
				226	keyword_ = first_states_[offset].keyword;
				227	counter_ = 1;
				228	keyword_token_ = first_states_[offset].token;
				229	}
				230	return;
				231	}
				232	break;
				233	}
				234	case KEYWORD_PREFIX:
				235	if (keyword_[counter_] == input) {
				236	ASSERT_NE(input, '\0');
				237	counter_++;
				238	if (keyword_[counter_] == '\0') {
				239	state_ = KEYWORD_MATCHED;
				240	token_ = keyword_token_;
				241	}
				242	return;
				243	}
				244	break;
				245	case KEYWORD_MATCHED:
				246	token_ = Token::IDENTIFIER;
				247	break;
				248	case C:
				249	if (MatchState(input, 'a', CA)) return;
				250	if (MatchState(input, 'o', CO)) return;
				251	break;
				252	case CA:
				253	if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
				254	if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
				255	break;
				256	case CO:
				257	if (MatchState(input, 'n', CON)) return;
				258	break;
				259	case CON:
				260	if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
				261	if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
				262	break;
				263	case D:
				264	if (MatchState(input, 'e', DE)) return;
				265	if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
				266	break;
				267	case DE:
				268	if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
				269	if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
				270	if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
				271	break;
				272	case F:
				273	if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
				274	if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
				275	if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
				276	if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
				277	break;
				278	case I:
				279	if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
				280	if (MatchKeyword(input, 'n', IN, Token::IN)) return;
				281	break;
				282	case IN:
				283	token_ = Token::IDENTIFIER;
				284	if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
				285	return;
				286	}
				287	break;
				288	case N:
				289	if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
				290	if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
				291	if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
				292	break;
				293	case T:
				294	if (MatchState(input, 'h', TH)) return;
				295	if (MatchState(input, 'r', TR)) return;
				296	if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
				297	break;
				298	case TH:
				299	if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
				300	if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
				301	break;
				302	case TR:
				303	if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
				304	if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
				305	break;
				306	case V:
				307	if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
				308	if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
				309	break;
				310	case W:
				311	if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
				312	if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
				313	break;
				314	default:
				315	UNREACHABLE();
				316	}
				317	// On fallthrough, it's a failure.
				318	state_ = UNMATCHABLE;
				319	}
				320
				321
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	322
				323	// ----------------------------------------------------------------------------
				324	// Scanner::LiteralScope
				325
				326	Scanner::LiteralScope::LiteralScope(Scanner* self)
				327	: scanner_(self), complete_(false) {
				328	self->StartLiteral();
				329	}
				330
				331
				332	Scanner::LiteralScope::~LiteralScope() {
				333	if (!complete_) scanner_->DropLiteral();
				334	}
				335
				336
				337	void Scanner::LiteralScope::Complete() {
				338	scanner_->TerminateLiteral();
				339	complete_ = true;
				340	}
				341
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	342	// ----------------------------------------------------------------------------
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	343	// Scanner
				344
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	345	Scanner::Scanner(ParserMode pre)
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	346	: is_pre_parsing_(pre == PREPARSE), stack_overflow_(false) { }
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	347
				348
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	349	void Scanner::Initialize(Handle<String> source,
				350	ParserLanguage language) {
Ben Murdoch	3bec4d2	2010-07-22 14:51:16 +0100	[diff] [blame]	351	Init(source, NULL, 0, source->length(), language);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	352	}
				353
				354
				355	void Scanner::Initialize(Handle<String> source,
				356	unibrow::CharacterStream* stream,
				357	ParserLanguage language) {
				358	Init(source, stream, 0, kNoEndPosition, language);
				359	}
				360
				361
				362	void Scanner::Initialize(Handle<String> source,
				363	int start_position,
				364	int end_position,
				365	ParserLanguage language) {
Ben Murdoch	3bec4d2	2010-07-22 14:51:16 +0100	[diff] [blame]	366	Init(source, NULL, start_position, end_position, language);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	367	}
				368
				369
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	370	void Scanner::Init(Handle<String> source,
				371	unibrow::CharacterStream* stream,
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	372	int start_position,
				373	int end_position,
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	374	ParserLanguage language) {
Ben Murdoch	3bec4d2	2010-07-22 14:51:16 +0100	[diff] [blame]	375	// Either initialize the scanner from a character stream or from a
				376	// string.
				377	ASSERT(source.is_null() \|\| stream == NULL);
				378
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	379	// Initialize the source buffer.
				380	if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
				381	two_byte_string_buffer_.Initialize(
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	382	Handle<ExternalTwoByteString>::cast(source),
				383	start_position,
				384	end_position);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	385	source_ = &two_byte_string_buffer_;
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	386	} else if (!source.is_null() && StringShape(*source).IsExternalAscii()) {
				387	ascii_string_buffer_.Initialize(
				388	Handle<ExternalAsciiString>::cast(source),
				389	start_position,
				390	end_position);
				391	source_ = &ascii_string_buffer_;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	392	} else {
Ben Murdoch	3bec4d2	2010-07-22 14:51:16 +0100	[diff] [blame]	393	if (!source.is_null()) {
				394	safe_string_input_buffer_.Reset(source.location());
				395	stream = &safe_string_input_buffer_;
				396	}
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	397	char_stream_buffer_.Initialize(source,
				398	stream,
				399	start_position,
				400	end_position);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	401	source_ = &char_stream_buffer_;
				402	}
				403
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	404	is_parsing_json_ = (language == JSON);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	405
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	406	// Set c0_ (one character ahead)
				407	ASSERT(kCharacterLookaheadBufferSize == 1);
				408	Advance();
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	409	// Initialize current_ to not refer to a literal.
				410	current_.literal_chars = Vector<const char>();
				411	// Reset literal buffer.
				412	literal_buffer_.Reset();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	413
				414	// Skip initial whitespace allowing HTML comment ends just like
				415	// after a newline and scan first token.
				416	has_line_terminator_before_next_ = true;
				417	SkipWhiteSpace();
				418	Scan();
				419	}
				420
				421
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	422	Token::Value Scanner::Next() {
				423	// BUG 1215673: Find a thread safe way to set a stack limit in
				424	// pre-parse mode. Otherwise, we cannot safely pre-parse from other
				425	// threads.
				426	current_ = next_;
				427	// Check for stack-overflow before returning any tokens.
				428	StackLimitCheck check;
				429	if (check.HasOverflowed()) {
				430	stack_overflow_ = true;
				431	next_.token = Token::ILLEGAL;
				432	} else {
Iain Merrick	9ac36c9	2010-09-13 15:29:50 +0100	[diff] [blame^]	433	has_line_terminator_before_next_ = false;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	434	Scan();
				435	}
				436	return current_.token;
				437	}
				438
				439
				440	void Scanner::StartLiteral() {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	441	literal_buffer_.StartLiteral();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	442	}
				443
				444
				445	void Scanner::AddChar(uc32 c) {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	446	literal_buffer_.AddChar(c);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	447	}
				448
				449
				450	void Scanner::TerminateLiteral() {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	451	next_.literal_chars = literal_buffer_.EndLiteral();
				452	}
				453
				454
				455	void Scanner::DropLiteral() {
				456	literal_buffer_.DropLiteral();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	457	}
				458
				459
				460	void Scanner::AddCharAdvance() {
				461	AddChar(c0_);
				462	Advance();
				463	}
				464
				465
				466	static inline bool IsByteOrderMark(uc32 c) {
				467	// The Unicode value U+FFFE is guaranteed never to be assigned as a
				468	// Unicode character; this implies that in a Unicode context the
				469	// 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
				470	// character expressed in little-endian byte order (since it could
				471	// not be a U+FFFE character expressed in big-endian byte
				472	// order). Nevertheless, we check for it to be compatible with
				473	// Spidermonkey.
				474	return c == 0xFEFF \|\| c == 0xFFFE;
				475	}
				476
				477
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	478	bool Scanner::SkipJsonWhiteSpace() {
				479	int start_position = source_pos();
				480	// JSON WhiteSpace is tab, carrige-return, newline and space.
				481	while (c0_ == ' ' \|\| c0_ == '\n' \|\| c0_ == '\r' \|\| c0_ == '\t') {
				482	Advance();
				483	}
				484	return source_pos() != start_position;
				485	}
				486
				487
				488	bool Scanner::SkipJavaScriptWhiteSpace() {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	489	int start_position = source_pos();
				490
				491	while (true) {
				492	// We treat byte-order marks (BOMs) as whitespace for better
				493	// compatibility with Spidermonkey and other JavaScript engines.
				494	while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {
				495	// IsWhiteSpace() includes line terminators!
				496	if (kIsLineTerminator.get(c0_)) {
				497	// Ignore line terminators, but remember them. This is necessary
				498	// for automatic semicolon insertion.
				499	has_line_terminator_before_next_ = true;
				500	}
				501	Advance();
				502	}
				503
				504	// If there is an HTML comment end '-->' at the beginning of a
				505	// line (with only whitespace in front of it), we treat the rest
				506	// of the line as a comment. This is in line with the way
				507	// SpiderMonkey handles it.
				508	if (c0_ == '-' && has_line_terminator_before_next_) {
				509	Advance();
				510	if (c0_ == '-') {
				511	Advance();
				512	if (c0_ == '>') {
				513	// Treat the rest of the line as a comment.
				514	SkipSingleLineComment();
				515	// Continue skipping white space after the comment.
				516	continue;
				517	}
				518	PushBack('-'); // undo Advance()
				519	}
				520	PushBack('-'); // undo Advance()
				521	}
				522	// Return whether or not we skipped any characters.
				523	return source_pos() != start_position;
				524	}
				525	}
				526
				527
				528	Token::Value Scanner::SkipSingleLineComment() {
				529	Advance();
				530
				531	// The line terminator at the end of the line is not considered
				532	// to be part of the single-line comment; it is recognized
				533	// separately by the lexical grammar and becomes part of the
				534	// stream of input elements for the syntactic grammar (see
				535	// ECMA-262, section 7.4, page 12).
				536	while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
				537	Advance();
				538	}
				539
				540	return Token::WHITESPACE;
				541	}
				542
				543
				544	Token::Value Scanner::SkipMultiLineComment() {
				545	ASSERT(c0_ == '*');
				546	Advance();
				547
				548	while (c0_ >= 0) {
				549	char ch = c0_;
				550	Advance();
				551	// If we have reached the end of the multi-line comment, we
				552	// consume the '/' and insert a whitespace. This way all
				553	// multi-line comments are treated as whitespace - even the ones
				554	// containing line terminators. This contradicts ECMA-262, section
				555	// 7.4, page 12, that says that multi-line comments containing
				556	// line terminators should be treated as a line terminator, but it
				557	// matches the behaviour of SpiderMonkey and KJS.
				558	if (ch == '*' && c0_ == '/') {
				559	c0_ = ' ';
				560	return Token::WHITESPACE;
				561	}
				562	}
				563
				564	// Unterminated multi-line comment.
				565	return Token::ILLEGAL;
				566	}
				567
				568
				569	Token::Value Scanner::ScanHtmlComment() {
				570	// Check for <!-- comments.
				571	ASSERT(c0_ == '!');
				572	Advance();
				573	if (c0_ == '-') {
				574	Advance();
				575	if (c0_ == '-') return SkipSingleLineComment();
				576	PushBack('-'); // undo Advance()
				577	}
				578	PushBack('!'); // undo Advance()
				579	ASSERT(c0_ == '!');
				580	return Token::LT;
				581	}
				582
				583
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	584
				585	void Scanner::ScanJson() {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	586	next_.literal_chars = Vector<const char>();
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	587	Token::Value token;
				588	has_line_terminator_before_next_ = false;
				589	do {
				590	// Remember the position of the next token
				591	next_.location.beg_pos = source_pos();
				592	switch (c0_) {
				593	case '\t':
				594	case '\r':
				595	case '\n':
				596	case ' ':
				597	Advance();
				598	token = Token::WHITESPACE;
				599	break;
				600	case '{':
				601	Advance();
				602	token = Token::LBRACE;
				603	break;
				604	case '}':
				605	Advance();
				606	token = Token::RBRACE;
				607	break;
				608	case '[':
				609	Advance();
				610	token = Token::LBRACK;
				611	break;
				612	case ']':
				613	Advance();
				614	token = Token::RBRACK;
				615	break;
				616	case ':':
				617	Advance();
				618	token = Token::COLON;
				619	break;
				620	case ',':
				621	Advance();
				622	token = Token::COMMA;
				623	break;
				624	case '"':
				625	token = ScanJsonString();
				626	break;
				627	case '-':
				628	case '0':
				629	case '1':
				630	case '2':
				631	case '3':
				632	case '4':
				633	case '5':
				634	case '6':
				635	case '7':
				636	case '8':
				637	case '9':
				638	token = ScanJsonNumber();
				639	break;
				640	case 't':
				641	token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
				642	break;
				643	case 'f':
				644	token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
				645	break;
				646	case 'n':
				647	token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
				648	break;
				649	default:
				650	if (c0_ < 0) {
				651	Advance();
				652	token = Token::EOS;
				653	} else {
				654	Advance();
				655	token = Select(Token::ILLEGAL);
				656	}
				657	}
				658	} while (token == Token::WHITESPACE);
				659
				660	next_.location.end_pos = source_pos();
				661	next_.token = token;
				662	}
				663
				664
				665	Token::Value Scanner::ScanJsonString() {
				666	ASSERT_EQ('"', c0_);
				667	Advance();
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	668	LiteralScope literal(this);
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	669	while (c0_ != '"' && c0_ > 0) {
				670	// Check for control character (0x00-0x1f) or unterminated string (<0).
				671	if (c0_ < 0x20) return Token::ILLEGAL;
				672	if (c0_ != '\\') {
				673	AddCharAdvance();
				674	} else {
				675	Advance();
				676	switch (c0_) {
				677	case '"':
				678	case '\\':
				679	case '/':
				680	AddChar(c0_);
				681	break;
				682	case 'b':
				683	AddChar('\x08');
				684	break;
				685	case 'f':
				686	AddChar('\x0c');
				687	break;
				688	case 'n':
				689	AddChar('\x0a');
				690	break;
				691	case 'r':
				692	AddChar('\x0d');
				693	break;
				694	case 't':
				695	AddChar('\x09');
				696	break;
				697	case 'u': {
				698	uc32 value = 0;
				699	for (int i = 0; i < 4; i++) {
				700	Advance();
				701	int digit = HexValue(c0_);
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	702	if (digit < 0) {
				703	return Token::ILLEGAL;
				704	}
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	705	value = value * 16 + digit;
				706	}
				707	AddChar(value);
				708	break;
				709	}
				710	default:
				711	return Token::ILLEGAL;
				712	}
				713	Advance();
				714	}
				715	}
				716	if (c0_ != '"') {
				717	return Token::ILLEGAL;
				718	}
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	719	literal.Complete();
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	720	Advance();
				721	return Token::STRING;
				722	}
				723
				724
				725	Token::Value Scanner::ScanJsonNumber() {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	726	LiteralScope literal(this);
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	727	if (c0_ == '-') AddCharAdvance();
				728	if (c0_ == '0') {
				729	AddCharAdvance();
				730	// Prefix zero is only allowed if it's the only digit before
				731	// a decimal point or exponent.
				732	if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
				733	} else {
				734	if (c0_ < '1' \|\| c0_ > '9') return Token::ILLEGAL;
				735	do {
				736	AddCharAdvance();
				737	} while (c0_ >= '0' && c0_ <= '9');
				738	}
				739	if (c0_ == '.') {
				740	AddCharAdvance();
				741	if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;
				742	do {
				743	AddCharAdvance();
				744	} while (c0_ >= '0' && c0_ <= '9');
				745	}
Iain Merrick	9ac36c9	2010-09-13 15:29:50 +0100	[diff] [blame^]	746	if (AsciiAlphaToLower(c0_) == 'e') {
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	747	AddCharAdvance();
				748	if (c0_ == '-' \|\| c0_ == '+') AddCharAdvance();
				749	if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;
				750	do {
				751	AddCharAdvance();
				752	} while (c0_ >= '0' && c0_ <= '9');
				753	}
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	754	literal.Complete();
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	755	return Token::NUMBER;
				756	}
				757
				758
				759	Token::Value Scanner::ScanJsonIdentifier(const char* text,
				760	Token::Value token) {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	761	LiteralScope literal(this);
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	762	while (*text != '\0') {
				763	if (c0_ != *text) return Token::ILLEGAL;
				764	Advance();
				765	text++;
				766	}
				767	if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	768	literal.Complete();
Leon Clarke	4515c47	2010-02-03 11:58:03 +0000	[diff] [blame]	769	return token;
				770	}
				771
				772
				773	void Scanner::ScanJavaScript() {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	774	next_.literal_chars = Vector<const char>();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	775	Token::Value token;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	776	do {
				777	// Remember the position of the next token
				778	next_.location.beg_pos = source_pos();
				779
				780	switch (c0_) {
				781	case ' ':
				782	case '\t':
				783	Advance();
				784	token = Token::WHITESPACE;
				785	break;
				786
				787	case '\n':
				788	Advance();
				789	has_line_terminator_before_next_ = true;
				790	token = Token::WHITESPACE;
				791	break;
				792
				793	case '"': case '\'':
				794	token = ScanString();
				795	break;
				796
				797	case '<':
				798	// < <= << <<= <!--
				799	Advance();
				800	if (c0_ == '=') {
				801	token = Select(Token::LTE);
				802	} else if (c0_ == '<') {
				803	token = Select('=', Token::ASSIGN_SHL, Token::SHL);
				804	} else if (c0_ == '!') {
				805	token = ScanHtmlComment();
				806	} else {
				807	token = Token::LT;
				808	}
				809	break;
				810
				811	case '>':
				812	// > >= >> >>= >>> >>>=
				813	Advance();
				814	if (c0_ == '=') {
				815	token = Select(Token::GTE);
				816	} else if (c0_ == '>') {
				817	// >> >>= >>> >>>=
				818	Advance();
				819	if (c0_ == '=') {
				820	token = Select(Token::ASSIGN_SAR);
				821	} else if (c0_ == '>') {
				822	token = Select('=', Token::ASSIGN_SHR, Token::SHR);
				823	} else {
				824	token = Token::SAR;
				825	}
				826	} else {
				827	token = Token::GT;
				828	}
				829	break;
				830
				831	case '=':
				832	// = == ===
				833	Advance();
				834	if (c0_ == '=') {
				835	token = Select('=', Token::EQ_STRICT, Token::EQ);
				836	} else {
				837	token = Token::ASSIGN;
				838	}
				839	break;
				840
				841	case '!':
				842	// ! != !==
				843	Advance();
				844	if (c0_ == '=') {
				845	token = Select('=', Token::NE_STRICT, Token::NE);
				846	} else {
				847	token = Token::NOT;
				848	}
				849	break;
				850
				851	case '+':
				852	// + ++ +=
				853	Advance();
				854	if (c0_ == '+') {
				855	token = Select(Token::INC);
				856	} else if (c0_ == '=') {
				857	token = Select(Token::ASSIGN_ADD);
				858	} else {
				859	token = Token::ADD;
				860	}
				861	break;
				862
				863	case '-':
				864	// - -- --> -=
				865	Advance();
				866	if (c0_ == '-') {
				867	Advance();
				868	if (c0_ == '>' && has_line_terminator_before_next_) {
				869	// For compatibility with SpiderMonkey, we skip lines that
				870	// start with an HTML comment end '-->'.
				871	token = SkipSingleLineComment();
				872	} else {
				873	token = Token::DEC;
				874	}
				875	} else if (c0_ == '=') {
				876	token = Select(Token::ASSIGN_SUB);
				877	} else {
				878	token = Token::SUB;
				879	}
				880	break;
				881
				882	case '*':
				883	// * *=
				884	token = Select('=', Token::ASSIGN_MUL, Token::MUL);
				885	break;
				886
				887	case '%':
				888	// % %=
				889	token = Select('=', Token::ASSIGN_MOD, Token::MOD);
				890	break;
				891
				892	case '/':
				893	// / // /* /=
				894	Advance();
				895	if (c0_ == '/') {
				896	token = SkipSingleLineComment();
				897	} else if (c0_ == '*') {
				898	token = SkipMultiLineComment();
				899	} else if (c0_ == '=') {
				900	token = Select(Token::ASSIGN_DIV);
				901	} else {
				902	token = Token::DIV;
				903	}
				904	break;
				905
				906	case '&':
				907	// & && &=
				908	Advance();
				909	if (c0_ == '&') {
				910	token = Select(Token::AND);
				911	} else if (c0_ == '=') {
				912	token = Select(Token::ASSIGN_BIT_AND);
				913	} else {
				914	token = Token::BIT_AND;
				915	}
				916	break;
				917
				918	case '\|':
				919	// \| \|\| \|=
				920	Advance();
				921	if (c0_ == '\|') {
				922	token = Select(Token::OR);
				923	} else if (c0_ == '=') {
				924	token = Select(Token::ASSIGN_BIT_OR);
				925	} else {
				926	token = Token::BIT_OR;
				927	}
				928	break;
				929
				930	case '^':
				931	// ^ ^=
				932	token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
				933	break;
				934
				935	case '.':
				936	// . Number
				937	Advance();
				938	if (IsDecimalDigit(c0_)) {
				939	token = ScanNumber(true);
				940	} else {
				941	token = Token::PERIOD;
				942	}
				943	break;
				944
				945	case ':':
				946	token = Select(Token::COLON);
				947	break;
				948
				949	case ';':
				950	token = Select(Token::SEMICOLON);
				951	break;
				952
				953	case ',':
				954	token = Select(Token::COMMA);
				955	break;
				956
				957	case '(':
				958	token = Select(Token::LPAREN);
				959	break;
				960
				961	case ')':
				962	token = Select(Token::RPAREN);
				963	break;
				964
				965	case '[':
				966	token = Select(Token::LBRACK);
				967	break;
				968
				969	case ']':
				970	token = Select(Token::RBRACK);
				971	break;
				972
				973	case '{':
				974	token = Select(Token::LBRACE);
				975	break;
				976
				977	case '}':
				978	token = Select(Token::RBRACE);
				979	break;
				980
				981	case '?':
				982	token = Select(Token::CONDITIONAL);
				983	break;
				984
				985	case '~':
				986	token = Select(Token::BIT_NOT);
				987	break;
				988
				989	default:
				990	if (kIsIdentifierStart.get(c0_)) {
				991	token = ScanIdentifier();
				992	} else if (IsDecimalDigit(c0_)) {
				993	token = ScanNumber(false);
				994	} else if (SkipWhiteSpace()) {
				995	token = Token::WHITESPACE;
				996	} else if (c0_ < 0) {
				997	token = Token::EOS;
				998	} else {
				999	token = Select(Token::ILLEGAL);
				1000	}
				1001	break;
				1002	}
				1003
				1004	// Continue scanning for tokens as long as we're just skipping
				1005	// whitespace.
				1006	} while (token == Token::WHITESPACE);
				1007
				1008	next_.location.end_pos = source_pos();
				1009	next_.token = token;
				1010	}
				1011
				1012
				1013	void Scanner::SeekForward(int pos) {
				1014	source_->SeekForward(pos - 1);
				1015	Advance();
Iain Merrick	9ac36c9	2010-09-13 15:29:50 +0100	[diff] [blame^]	1016	// This function is only called to seek to the location
				1017	// of the end of a function (at the "}" token). It doesn't matter
				1018	// whether there was a line terminator in the part we skip.
				1019	has_line_terminator_before_next_ = false;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1020	Scan();
				1021	}
				1022
				1023
				1024	uc32 Scanner::ScanHexEscape(uc32 c, int length) {
				1025	ASSERT(length <= 4); // prevent overflow
				1026
				1027	uc32 digits[4];
				1028	uc32 x = 0;
				1029	for (int i = 0; i < length; i++) {
				1030	digits[i] = c0_;
				1031	int d = HexValue(c0_);
				1032	if (d < 0) {
				1033	// According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
				1034	// should be illegal, but other JS VMs just return the
				1035	// non-escaped version of the original character.
				1036
				1037	// Push back digits read, except the last one (in c0_).
				1038	for (int j = i-1; j >= 0; j--) {
				1039	PushBack(digits[j]);
				1040	}
				1041	// Notice: No handling of error - treat it as "\u"->"u".
				1042	return c;
				1043	}
				1044	x = x * 16 + d;
				1045	Advance();
				1046	}
				1047
				1048	return x;
				1049	}
				1050
				1051
				1052	// Octal escapes of the forms '\0xx' and '\xxx' are not a part of
				1053	// ECMA-262. Other JS VMs support them.
				1054	uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
				1055	uc32 x = c - '0';
				1056	for (int i = 0; i < length; i++) {
				1057	int d = c0_ - '0';
				1058	if (d < 0 \|\| d > 7) break;
				1059	int nx = x * 8 + d;
				1060	if (nx >= 256) break;
				1061	x = nx;
				1062	Advance();
				1063	}
				1064	return x;
				1065	}
				1066
				1067
				1068	void Scanner::ScanEscape() {
				1069	uc32 c = c0_;
				1070	Advance();
				1071
				1072	// Skip escaped newlines.
				1073	if (kIsLineTerminator.get(c)) {
				1074	// Allow CR+LF newlines in multiline string literals.
				1075	if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
				1076	// Allow LF+CR newlines in multiline string literals.
				1077	if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
				1078	return;
				1079	}
				1080
				1081	switch (c) {
				1082	case '\'': // fall through
				1083	case '"' : // fall through
				1084	case '\\': break;
				1085	case 'b' : c = '\b'; break;
				1086	case 'f' : c = '\f'; break;
				1087	case 'n' : c = '\n'; break;
				1088	case 'r' : c = '\r'; break;
				1089	case 't' : c = '\t'; break;
				1090	case 'u' : c = ScanHexEscape(c, 4); break;
				1091	case 'v' : c = '\v'; break;
				1092	case 'x' : c = ScanHexEscape(c, 2); break;
				1093	case '0' : // fall through
				1094	case '1' : // fall through
				1095	case '2' : // fall through
				1096	case '3' : // fall through
				1097	case '4' : // fall through
				1098	case '5' : // fall through
				1099	case '6' : // fall through
				1100	case '7' : c = ScanOctalEscape(c, 2); break;
				1101	}
				1102
				1103	// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
				1104	// should be illegal, but they are commonly handled
				1105	// as non-escaped characters by JS VMs.
				1106	AddChar(c);
				1107	}
				1108
				1109
				1110	Token::Value Scanner::ScanString() {
				1111	uc32 quote = c0_;
				1112	Advance(); // consume quote
				1113
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1114	LiteralScope literal(this);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1115	while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
				1116	uc32 c = c0_;
				1117	Advance();
				1118	if (c == '\\') {
				1119	if (c0_ < 0) return Token::ILLEGAL;
				1120	ScanEscape();
				1121	} else {
				1122	AddChar(c);
				1123	}
				1124	}
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1125	if (c0_ != quote) return Token::ILLEGAL;
				1126	literal.Complete();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1127
				1128	Advance(); // consume quote
				1129	return Token::STRING;
				1130	}
				1131
				1132
				1133	Token::Value Scanner::Select(Token::Value tok) {
				1134	Advance();
				1135	return tok;
				1136	}
				1137
				1138
				1139	Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {
				1140	Advance();
				1141	if (c0_ == next) {
				1142	Advance();
				1143	return then;
				1144	} else {
				1145	return else_;
				1146	}
				1147	}
				1148
				1149
				1150	// Returns true if any decimal digits were scanned, returns false otherwise.
				1151	void Scanner::ScanDecimalDigits() {
				1152	while (IsDecimalDigit(c0_))
				1153	AddCharAdvance();
				1154	}
				1155
				1156
				1157	Token::Value Scanner::ScanNumber(bool seen_period) {
				1158	ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
				1159
				1160	enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
				1161
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1162	LiteralScope literal(this);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1163	if (seen_period) {
				1164	// we have already seen a decimal point of the float
				1165	AddChar('.');
				1166	ScanDecimalDigits(); // we know we have at least one digit
				1167
				1168	} else {
				1169	// if the first character is '0' we must check for octals and hex
				1170	if (c0_ == '0') {
				1171	AddCharAdvance();
				1172
				1173	// either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
				1174	if (c0_ == 'x' \|\| c0_ == 'X') {
				1175	// hex number
				1176	kind = HEX;
				1177	AddCharAdvance();
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1178	if (!IsHexDigit(c0_)) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1179	// we must have at least one hex digit after 'x'/'X'
				1180	return Token::ILLEGAL;
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1181	}
				1182	while (IsHexDigit(c0_)) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1183	AddCharAdvance();
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1184	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1185	} else if ('0' <= c0_ && c0_ <= '7') {
				1186	// (possible) octal number
				1187	kind = OCTAL;
				1188	while (true) {
				1189	if (c0_ == '8' \|\| c0_ == '9') {
				1190	kind = DECIMAL;
				1191	break;
				1192	}
				1193	if (c0_ < '0' \|\| '7' < c0_) break;
				1194	AddCharAdvance();
				1195	}
				1196	}
				1197	}
				1198
				1199	// Parse decimal digits and allow trailing fractional part.
				1200	if (kind == DECIMAL) {
				1201	ScanDecimalDigits(); // optional
				1202	if (c0_ == '.') {
				1203	AddCharAdvance();
				1204	ScanDecimalDigits(); // optional
				1205	}
				1206	}
				1207	}
				1208
				1209	// scan exponent, if any
				1210	if (c0_ == 'e' \|\| c0_ == 'E') {
				1211	ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
				1212	if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
				1213	// scan exponent
				1214	AddCharAdvance();
				1215	if (c0_ == '+' \|\| c0_ == '-')
				1216	AddCharAdvance();
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1217	if (!IsDecimalDigit(c0_)) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1218	// we must have at least one decimal digit after 'e'/'E'
				1219	return Token::ILLEGAL;
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1220	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1221	ScanDecimalDigits();
				1222	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1223
				1224	// The source character immediately following a numeric literal must
				1225	// not be an identifier start or a decimal digit; see ECMA-262
				1226	// section 7.8.3, page 17 (note that we read only one decimal digit
				1227	// if the value is 0).
				1228	if (IsDecimalDigit(c0_) \|\| kIsIdentifierStart.get(c0_))
				1229	return Token::ILLEGAL;
				1230
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1231	literal.Complete();
				1232
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1233	return Token::NUMBER;
				1234	}
				1235
				1236
				1237	uc32 Scanner::ScanIdentifierUnicodeEscape() {
				1238	Advance();
				1239	if (c0_ != 'u') return unibrow::Utf8::kBadChar;
				1240	Advance();
				1241	uc32 c = ScanHexEscape('u', 4);
				1242	// We do not allow a unicode escape sequence to start another
				1243	// unicode escape sequence.
				1244	if (c == '\\') return unibrow::Utf8::kBadChar;
				1245	return c;
				1246	}
				1247
				1248
				1249	Token::Value Scanner::ScanIdentifier() {
				1250	ASSERT(kIsIdentifierStart.get(c0_));
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1251
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1252	LiteralScope literal(this);
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	1253	KeywordMatcher keyword_match;
				1254
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1255	// Scan identifier start character.
				1256	if (c0_ == '\\') {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1257	uc32 c = ScanIdentifierUnicodeEscape();
				1258	// Only allow legal identifier start characters.
				1259	if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
				1260	AddChar(c);
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	1261	keyword_match.Fail();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1262	} else {
				1263	AddChar(c0_);
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	1264	keyword_match.AddChar(c0_);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1265	Advance();
				1266	}
				1267
				1268	// Scan the rest of the identifier characters.
				1269	while (kIsIdentifierPart.get(c0_)) {
				1270	if (c0_ == '\\') {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1271	uc32 c = ScanIdentifierUnicodeEscape();
				1272	// Only allow legal identifier part characters.
				1273	if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
				1274	AddChar(c);
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	1275	keyword_match.Fail();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1276	} else {
				1277	AddChar(c0_);
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	1278	keyword_match.AddChar(c0_);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1279	Advance();
				1280	}
				1281	}
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1282	literal.Complete();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1283
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	1284	return keyword_match.token();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1285	}
				1286
				1287
				1288
				1289	bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
				1290	// Checks whether the buffer contains an identifier (no escape).
				1291	if (!buffer->has_more()) return false;
				1292	if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
				1293	while (buffer->has_more()) {
				1294	if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
				1295	}
				1296	return true;
				1297	}
				1298
				1299
				1300	bool Scanner::ScanRegExpPattern(bool seen_equal) {
				1301	// Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags
				1302	bool in_character_class = false;
				1303
				1304	// Previous token is either '/' or '/=', in the second case, the
				1305	// pattern starts at =.
				1306	next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
				1307	next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
				1308
				1309	// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
				1310	// the scanner should pass uninterpreted bodies to the RegExp
				1311	// constructor.
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1312	LiteralScope literal(this);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1313	if (seen_equal)
				1314	AddChar('=');
				1315
				1316	while (c0_ != '/' \|\| in_character_class) {
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1317	if (kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1318	if (c0_ == '\\') { // escaped character
				1319	AddCharAdvance();
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1320	if (kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1321	AddCharAdvance();
				1322	} else { // unescaped character
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1323	if (c0_ == '[') in_character_class = true;
				1324	if (c0_ == ']') in_character_class = false;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1325	AddCharAdvance();
				1326	}
				1327	}
				1328	Advance(); // consume '/'
				1329
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1330	literal.Complete();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1331
				1332	return true;
				1333	}
				1334
				1335	bool Scanner::ScanRegExpFlags() {
				1336	// Scan regular expression flags.
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1337	LiteralScope literal(this);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1338	while (kIsIdentifierPart.get(c0_)) {
				1339	if (c0_ == '\\') {
				1340	uc32 c = ScanIdentifierUnicodeEscape();
				1341	if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
				1342	// We allow any escaped character, unlike the restriction on
				1343	// IdentifierPart when it is used to build an IdentifierName.
				1344	AddChar(c);
				1345	continue;
				1346	}
				1347	}
				1348	AddCharAdvance();
				1349	}
Kristian Monsen	80d68ea	2010-09-08 11:05:35 +0100	[diff] [blame]	1350	literal.Complete();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1351
				1352	next_.location.end_pos = source_pos() - 1;
				1353	return true;
				1354	}
				1355
				1356	} } // namespace v8::internal