Blame - src/parsing/scanner.cc - fp2-dev/platform/external/v8

blob: 5fc848f58ca843ce242a0163335683939ff8768d [file] [log] [blame]

Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1	// Copyright 2011 the V8 project authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	// Features shared by parsing and pre-parsing scanners.
				6
				7	#include "src/parsing/scanner.h"
				8
				9	#include <stdint.h>
				10
				11	#include <cmath>
				12
				13	#include "src/ast/ast-value-factory.h"
				14	#include "src/char-predicates-inl.h"
				15	#include "src/conversions-inl.h"
				16	#include "src/list-inl.h"
				17	#include "src/parsing/parser.h"
				18
				19	namespace v8 {
				20	namespace internal {
				21
				22
				23	Handle<String> LiteralBuffer::Internalize(Isolate* isolate) const {
				24	if (is_one_byte()) {
				25	return isolate->factory()->InternalizeOneByteString(one_byte_literal());
				26	}
				27	return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
				28	}
				29
				30
				31	// Default implementation for streams that do not support bookmarks.
				32	bool Utf16CharacterStream::SetBookmark() { return false; }
				33	void Utf16CharacterStream::ResetToBookmark() { UNREACHABLE(); }
				34
				35
				36	// ----------------------------------------------------------------------------
				37	// Scanner
				38
				39	Scanner::Scanner(UnicodeCache* unicode_cache)
				40	: unicode_cache_(unicode_cache),
				41	bookmark_c0_(kNoBookmark),
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	42	octal_pos_(Location::invalid()),
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	43	decimal_with_leading_zero_pos_(Location::invalid()),
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	44	found_html_comment_(false),
				45	allow_harmony_exponentiation_operator_(false) {
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	46	bookmark_current_.literal_chars = &bookmark_current_literal_;
				47	bookmark_current_.raw_literal_chars = &bookmark_current_raw_literal_;
				48	bookmark_next_.literal_chars = &bookmark_next_literal_;
				49	bookmark_next_.raw_literal_chars = &bookmark_next_raw_literal_;
				50	}
				51
				52
				53	void Scanner::Initialize(Utf16CharacterStream* source) {
				54	source_ = source;
				55	// Need to capture identifiers in order to recognize "get" and "set"
				56	// in object literals.
				57	Init();
				58	// Skip initial whitespace allowing HTML comment ends just like
				59	// after a newline and scan first token.
				60	has_line_terminator_before_next_ = true;
				61	SkipWhiteSpace();
				62	Scan();
				63	}
				64
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	65	template <bool capture_raw, bool unicode>
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	66	uc32 Scanner::ScanHexNumber(int expected_length) {
				67	DCHECK(expected_length <= 4); // prevent overflow
				68
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	69	int begin = source_pos() - 2;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	70	uc32 x = 0;
				71	for (int i = 0; i < expected_length; i++) {
				72	int d = HexValue(c0_);
				73	if (d < 0) {
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	74	ReportScannerError(Location(begin, begin + expected_length + 2),
				75	unicode
				76	? MessageTemplate::kInvalidUnicodeEscapeSequence
				77	: MessageTemplate::kInvalidHexEscapeSequence);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	78	return -1;
				79	}
				80	x = x * 16 + d;
				81	Advance<capture_raw>();
				82	}
				83
				84	return x;
				85	}
				86
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	87	template <bool capture_raw>
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	88	uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	89	uc32 x = 0;
				90	int d = HexValue(c0_);
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	91	if (d < 0) return -1;
				92
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	93	while (d >= 0) {
				94	x = x * 16 + d;
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	95	if (x > max_value) {
				96	ReportScannerError(Location(beg_pos, source_pos() + 1),
				97	MessageTemplate::kUndefinedUnicodeCodePoint);
				98	return -1;
				99	}
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	100	Advance<capture_raw>();
				101	d = HexValue(c0_);
				102	}
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	103
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	104	return x;
				105	}
				106
				107
				108	// Ensure that tokens can be stored in a byte.
				109	STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
				110
				111	// Table of one-character tokens, by character (0x00..0x7f only).
				112	static const byte one_char_tokens[] = {
				113	Token::ILLEGAL,
				114	Token::ILLEGAL,
				115	Token::ILLEGAL,
				116	Token::ILLEGAL,
				117	Token::ILLEGAL,
				118	Token::ILLEGAL,
				119	Token::ILLEGAL,
				120	Token::ILLEGAL,
				121	Token::ILLEGAL,
				122	Token::ILLEGAL,
				123	Token::ILLEGAL,
				124	Token::ILLEGAL,
				125	Token::ILLEGAL,
				126	Token::ILLEGAL,
				127	Token::ILLEGAL,
				128	Token::ILLEGAL,
				129	Token::ILLEGAL,
				130	Token::ILLEGAL,
				131	Token::ILLEGAL,
				132	Token::ILLEGAL,
				133	Token::ILLEGAL,
				134	Token::ILLEGAL,
				135	Token::ILLEGAL,
				136	Token::ILLEGAL,
				137	Token::ILLEGAL,
				138	Token::ILLEGAL,
				139	Token::ILLEGAL,
				140	Token::ILLEGAL,
				141	Token::ILLEGAL,
				142	Token::ILLEGAL,
				143	Token::ILLEGAL,
				144	Token::ILLEGAL,
				145	Token::ILLEGAL,
				146	Token::ILLEGAL,
				147	Token::ILLEGAL,
				148	Token::ILLEGAL,
				149	Token::ILLEGAL,
				150	Token::ILLEGAL,
				151	Token::ILLEGAL,
				152	Token::ILLEGAL,
				153	Token::LPAREN, // 0x28
				154	Token::RPAREN, // 0x29
				155	Token::ILLEGAL,
				156	Token::ILLEGAL,
				157	Token::COMMA, // 0x2c
				158	Token::ILLEGAL,
				159	Token::ILLEGAL,
				160	Token::ILLEGAL,
				161	Token::ILLEGAL,
				162	Token::ILLEGAL,
				163	Token::ILLEGAL,
				164	Token::ILLEGAL,
				165	Token::ILLEGAL,
				166	Token::ILLEGAL,
				167	Token::ILLEGAL,
				168	Token::ILLEGAL,
				169	Token::ILLEGAL,
				170	Token::ILLEGAL,
				171	Token::COLON, // 0x3a
				172	Token::SEMICOLON, // 0x3b
				173	Token::ILLEGAL,
				174	Token::ILLEGAL,
				175	Token::ILLEGAL,
				176	Token::CONDITIONAL, // 0x3f
				177	Token::ILLEGAL,
				178	Token::ILLEGAL,
				179	Token::ILLEGAL,
				180	Token::ILLEGAL,
				181	Token::ILLEGAL,
				182	Token::ILLEGAL,
				183	Token::ILLEGAL,
				184	Token::ILLEGAL,
				185	Token::ILLEGAL,
				186	Token::ILLEGAL,
				187	Token::ILLEGAL,
				188	Token::ILLEGAL,
				189	Token::ILLEGAL,
				190	Token::ILLEGAL,
				191	Token::ILLEGAL,
				192	Token::ILLEGAL,
				193	Token::ILLEGAL,
				194	Token::ILLEGAL,
				195	Token::ILLEGAL,
				196	Token::ILLEGAL,
				197	Token::ILLEGAL,
				198	Token::ILLEGAL,
				199	Token::ILLEGAL,
				200	Token::ILLEGAL,
				201	Token::ILLEGAL,
				202	Token::ILLEGAL,
				203	Token::ILLEGAL,
				204	Token::LBRACK, // 0x5b
				205	Token::ILLEGAL,
				206	Token::RBRACK, // 0x5d
				207	Token::ILLEGAL,
				208	Token::ILLEGAL,
				209	Token::ILLEGAL,
				210	Token::ILLEGAL,
				211	Token::ILLEGAL,
				212	Token::ILLEGAL,
				213	Token::ILLEGAL,
				214	Token::ILLEGAL,
				215	Token::ILLEGAL,
				216	Token::ILLEGAL,
				217	Token::ILLEGAL,
				218	Token::ILLEGAL,
				219	Token::ILLEGAL,
				220	Token::ILLEGAL,
				221	Token::ILLEGAL,
				222	Token::ILLEGAL,
				223	Token::ILLEGAL,
				224	Token::ILLEGAL,
				225	Token::ILLEGAL,
				226	Token::ILLEGAL,
				227	Token::ILLEGAL,
				228	Token::ILLEGAL,
				229	Token::ILLEGAL,
				230	Token::ILLEGAL,
				231	Token::ILLEGAL,
				232	Token::ILLEGAL,
				233	Token::ILLEGAL,
				234	Token::ILLEGAL,
				235	Token::ILLEGAL,
				236	Token::LBRACE, // 0x7b
				237	Token::ILLEGAL,
				238	Token::RBRACE, // 0x7d
				239	Token::BIT_NOT, // 0x7e
				240	Token::ILLEGAL
				241	};
				242
				243
				244	Token::Value Scanner::Next() {
				245	if (next_.token == Token::EOS) {
				246	next_.location.beg_pos = current_.location.beg_pos;
				247	next_.location.end_pos = current_.location.end_pos;
				248	}
				249	current_ = next_;
				250	if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
				251	next_ = next_next_;
				252	next_next_.token = Token::UNINITIALIZED;
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	253	has_line_terminator_before_next_ = has_line_terminator_after_next_;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	254	return current_.token;
				255	}
				256	has_line_terminator_before_next_ = false;
				257	has_multiline_comment_before_next_ = false;
				258	if (static_cast<unsigned>(c0_) <= 0x7f) {
				259	Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
				260	if (token != Token::ILLEGAL) {
				261	int pos = source_pos();
				262	next_.token = token;
				263	next_.location.beg_pos = pos;
				264	next_.location.end_pos = pos + 1;
				265	Advance();
				266	return current_.token;
				267	}
				268	}
				269	Scan();
				270	return current_.token;
				271	}
				272
				273
				274	Token::Value Scanner::PeekAhead() {
				275	if (next_next_.token != Token::UNINITIALIZED) {
				276	return next_next_.token;
				277	}
				278	TokenDesc prev = current_;
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	279	bool has_line_terminator_before_next =
				280	has_line_terminator_before_next_ \|\| has_multiline_comment_before_next_;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	281	Next();
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	282	has_line_terminator_after_next_ =
				283	has_line_terminator_before_next_ \|\| has_multiline_comment_before_next_;
				284	has_line_terminator_before_next_ = has_line_terminator_before_next;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	285	Token::Value ret = next_.token;
				286	next_next_ = next_;
				287	next_ = current_;
				288	current_ = prev;
				289	return ret;
				290	}
				291
				292
				293	// TODO(yangguo): check whether this is actually necessary.
				294	static inline bool IsLittleEndianByteOrderMark(uc32 c) {
				295	// The Unicode value U+FFFE is guaranteed never to be assigned as a
				296	// Unicode character; this implies that in a Unicode context the
				297	// 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
				298	// character expressed in little-endian byte order (since it could
				299	// not be a U+FFFE character expressed in big-endian byte
				300	// order). Nevertheless, we check for it to be compatible with
				301	// Spidermonkey.
				302	return c == 0xFFFE;
				303	}
				304
				305
				306	bool Scanner::SkipWhiteSpace() {
				307	int start_position = source_pos();
				308
				309	while (true) {
				310	while (true) {
				311	// The unicode cache accepts unsigned inputs.
				312	if (c0_ < 0) break;
				313	// Advance as long as character is a WhiteSpace or LineTerminator.
				314	// Remember if the latter is the case.
				315	if (unicode_cache_->IsLineTerminator(c0_)) {
				316	has_line_terminator_before_next_ = true;
				317	} else if (!unicode_cache_->IsWhiteSpace(c0_) &&
				318	!IsLittleEndianByteOrderMark(c0_)) {
				319	break;
				320	}
				321	Advance();
				322	}
				323
				324	// If there is an HTML comment end '-->' at the beginning of a
				325	// line (with only whitespace in front of it), we treat the rest
				326	// of the line as a comment. This is in line with the way
				327	// SpiderMonkey handles it.
				328	if (c0_ == '-' && has_line_terminator_before_next_) {
				329	Advance();
				330	if (c0_ == '-') {
				331	Advance();
				332	if (c0_ == '>') {
				333	// Treat the rest of the line as a comment.
				334	SkipSingleLineComment();
				335	// Continue skipping white space after the comment.
				336	continue;
				337	}
				338	PushBack('-'); // undo Advance()
				339	}
				340	PushBack('-'); // undo Advance()
				341	}
				342	// Return whether or not we skipped any characters.
				343	return source_pos() != start_position;
				344	}
				345	}
				346
				347
				348	Token::Value Scanner::SkipSingleLineComment() {
				349	Advance();
				350
				351	// The line terminator at the end of the line is not considered
				352	// to be part of the single-line comment; it is recognized
				353	// separately by the lexical grammar and becomes part of the
				354	// stream of input elements for the syntactic grammar (see
				355	// ECMA-262, section 7.4).
				356	while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
				357	Advance();
				358	}
				359
				360	return Token::WHITESPACE;
				361	}
				362
				363
				364	Token::Value Scanner::SkipSourceURLComment() {
				365	TryToParseSourceURLComment();
				366	while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
				367	Advance();
				368	}
				369
				370	return Token::WHITESPACE;
				371	}
				372
				373
				374	void Scanner::TryToParseSourceURLComment() {
				375	// Magic comments are of the form: //[#@]\s<name>=\s<value>\s.* and this
				376	// function will just return if it cannot parse a magic comment.
				377	if (c0_ < 0 \|\| !unicode_cache_->IsWhiteSpace(c0_)) return;
				378	Advance();
				379	LiteralBuffer name;
				380	while (c0_ >= 0 && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) &&
				381	c0_ != '=') {
				382	name.AddChar(c0_);
				383	Advance();
				384	}
				385	if (!name.is_one_byte()) return;
				386	Vector<const uint8_t> name_literal = name.one_byte_literal();
				387	LiteralBuffer* value;
				388	if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
				389	value = &source_url_;
				390	} else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
				391	value = &source_mapping_url_;
				392	} else {
				393	return;
				394	}
				395	if (c0_ != '=')
				396	return;
				397	Advance();
				398	value->Reset();
				399	while (c0_ >= 0 && unicode_cache_->IsWhiteSpace(c0_)) {
				400	Advance();
				401	}
				402	while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
				403	// Disallowed characters.
				404	if (c0_ == '"' \|\| c0_ == '\'') {
				405	value->Reset();
				406	return;
				407	}
				408	if (unicode_cache_->IsWhiteSpace(c0_)) {
				409	break;
				410	}
				411	value->AddChar(c0_);
				412	Advance();
				413	}
				414	// Allow whitespace at the end.
				415	while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
				416	if (!unicode_cache_->IsWhiteSpace(c0_)) {
				417	value->Reset();
				418	break;
				419	}
				420	Advance();
				421	}
				422	}
				423
				424
				425	Token::Value Scanner::SkipMultiLineComment() {
				426	DCHECK(c0_ == '*');
				427	Advance();
				428
				429	while (c0_ >= 0) {
				430	uc32 ch = c0_;
				431	Advance();
				432	if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {
				433	// Following ECMA-262, section 7.4, a comment containing
				434	// a newline will make the comment count as a line-terminator.
				435	has_multiline_comment_before_next_ = true;
				436	}
				437	// If we have reached the end of the multi-line comment, we
				438	// consume the '/' and insert a whitespace. This way all
				439	// multi-line comments are treated as whitespace.
				440	if (ch == '*' && c0_ == '/') {
				441	c0_ = ' ';
				442	return Token::WHITESPACE;
				443	}
				444	}
				445
				446	// Unterminated multi-line comment.
				447	return Token::ILLEGAL;
				448	}
				449
				450
				451	Token::Value Scanner::ScanHtmlComment() {
				452	// Check for <!-- comments.
				453	DCHECK(c0_ == '!');
				454	Advance();
				455	if (c0_ == '-') {
				456	Advance();
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	457	if (c0_ == '-') {
				458	found_html_comment_ = true;
				459	return SkipSingleLineComment();
				460	}
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	461	PushBack('-'); // undo Advance()
				462	}
				463	PushBack('!'); // undo Advance()
				464	DCHECK(c0_ == '!');
				465	return Token::LT;
				466	}
				467
				468
				469	void Scanner::Scan() {
				470	next_.literal_chars = NULL;
				471	next_.raw_literal_chars = NULL;
				472	Token::Value token;
				473	do {
				474	// Remember the position of the next token
				475	next_.location.beg_pos = source_pos();
				476
				477	switch (c0_) {
				478	case ' ':
				479	case '\t':
				480	Advance();
				481	token = Token::WHITESPACE;
				482	break;
				483
				484	case '\n':
				485	Advance();
				486	has_line_terminator_before_next_ = true;
				487	token = Token::WHITESPACE;
				488	break;
				489
				490	case '"': case '\'':
				491	token = ScanString();
				492	break;
				493
				494	case '<':
				495	// < <= << <<= <!--
				496	Advance();
				497	if (c0_ == '=') {
				498	token = Select(Token::LTE);
				499	} else if (c0_ == '<') {
				500	token = Select('=', Token::ASSIGN_SHL, Token::SHL);
				501	} else if (c0_ == '!') {
				502	token = ScanHtmlComment();
				503	} else {
				504	token = Token::LT;
				505	}
				506	break;
				507
				508	case '>':
				509	// > >= >> >>= >>> >>>=
				510	Advance();
				511	if (c0_ == '=') {
				512	token = Select(Token::GTE);
				513	} else if (c0_ == '>') {
				514	// >> >>= >>> >>>=
				515	Advance();
				516	if (c0_ == '=') {
				517	token = Select(Token::ASSIGN_SAR);
				518	} else if (c0_ == '>') {
				519	token = Select('=', Token::ASSIGN_SHR, Token::SHR);
				520	} else {
				521	token = Token::SAR;
				522	}
				523	} else {
				524	token = Token::GT;
				525	}
				526	break;
				527
				528	case '=':
				529	// = == === =>
				530	Advance();
				531	if (c0_ == '=') {
				532	token = Select('=', Token::EQ_STRICT, Token::EQ);
				533	} else if (c0_ == '>') {
				534	token = Select(Token::ARROW);
				535	} else {
				536	token = Token::ASSIGN;
				537	}
				538	break;
				539
				540	case '!':
				541	// ! != !==
				542	Advance();
				543	if (c0_ == '=') {
				544	token = Select('=', Token::NE_STRICT, Token::NE);
				545	} else {
				546	token = Token::NOT;
				547	}
				548	break;
				549
				550	case '+':
				551	// + ++ +=
				552	Advance();
				553	if (c0_ == '+') {
				554	token = Select(Token::INC);
				555	} else if (c0_ == '=') {
				556	token = Select(Token::ASSIGN_ADD);
				557	} else {
				558	token = Token::ADD;
				559	}
				560	break;
				561
				562	case '-':
				563	// - -- --> -=
				564	Advance();
				565	if (c0_ == '-') {
				566	Advance();
				567	if (c0_ == '>' && has_line_terminator_before_next_) {
				568	// For compatibility with SpiderMonkey, we skip lines that
				569	// start with an HTML comment end '-->'.
				570	token = SkipSingleLineComment();
				571	} else {
				572	token = Token::DEC;
				573	}
				574	} else if (c0_ == '=') {
				575	token = Select(Token::ASSIGN_SUB);
				576	} else {
				577	token = Token::SUB;
				578	}
				579	break;
				580
				581	case '*':
				582	// * *=
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	583	Advance();
				584	if (c0_ == '*' && allow_harmony_exponentiation_operator()) {
				585	token = Select('=', Token::ASSIGN_EXP, Token::EXP);
				586	} else if (c0_ == '=') {
				587	token = Select(Token::ASSIGN_MUL);
				588	} else {
				589	token = Token::MUL;
				590	}
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	591	break;
				592
				593	case '%':
				594	// % %=
				595	token = Select('=', Token::ASSIGN_MOD, Token::MOD);
				596	break;
				597
				598	case '/':
				599	// / // /* /=
				600	Advance();
				601	if (c0_ == '/') {
				602	Advance();
				603	if (c0_ == '#' \|\| c0_ == '@') {
				604	Advance();
				605	token = SkipSourceURLComment();
				606	} else {
				607	PushBack(c0_);
				608	token = SkipSingleLineComment();
				609	}
				610	} else if (c0_ == '*') {
				611	token = SkipMultiLineComment();
				612	} else if (c0_ == '=') {
				613	token = Select(Token::ASSIGN_DIV);
				614	} else {
				615	token = Token::DIV;
				616	}
				617	break;
				618
				619	case '&':
				620	// & && &=
				621	Advance();
				622	if (c0_ == '&') {
				623	token = Select(Token::AND);
				624	} else if (c0_ == '=') {
				625	token = Select(Token::ASSIGN_BIT_AND);
				626	} else {
				627	token = Token::BIT_AND;
				628	}
				629	break;
				630
				631	case '\|':
				632	// \| \|\| \|=
				633	Advance();
				634	if (c0_ == '\|') {
				635	token = Select(Token::OR);
				636	} else if (c0_ == '=') {
				637	token = Select(Token::ASSIGN_BIT_OR);
				638	} else {
				639	token = Token::BIT_OR;
				640	}
				641	break;
				642
				643	case '^':
				644	// ^ ^=
				645	token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
				646	break;
				647
				648	case '.':
				649	// . Number
				650	Advance();
				651	if (IsDecimalDigit(c0_)) {
				652	token = ScanNumber(true);
				653	} else {
				654	token = Token::PERIOD;
				655	if (c0_ == '.') {
				656	Advance();
				657	if (c0_ == '.') {
				658	Advance();
				659	token = Token::ELLIPSIS;
				660	} else {
				661	PushBack('.');
				662	}
				663	}
				664	}
				665	break;
				666
				667	case ':':
				668	token = Select(Token::COLON);
				669	break;
				670
				671	case ';':
				672	token = Select(Token::SEMICOLON);
				673	break;
				674
				675	case ',':
				676	token = Select(Token::COMMA);
				677	break;
				678
				679	case '(':
				680	token = Select(Token::LPAREN);
				681	break;
				682
				683	case ')':
				684	token = Select(Token::RPAREN);
				685	break;
				686
				687	case '[':
				688	token = Select(Token::LBRACK);
				689	break;
				690
				691	case ']':
				692	token = Select(Token::RBRACK);
				693	break;
				694
				695	case '{':
				696	token = Select(Token::LBRACE);
				697	break;
				698
				699	case '}':
				700	token = Select(Token::RBRACE);
				701	break;
				702
				703	case '?':
				704	token = Select(Token::CONDITIONAL);
				705	break;
				706
				707	case '~':
				708	token = Select(Token::BIT_NOT);
				709	break;
				710
				711	case '`':
				712	token = ScanTemplateStart();
				713	break;
				714
				715	default:
				716	if (c0_ < 0) {
				717	token = Token::EOS;
				718	} else if (unicode_cache_->IsIdentifierStart(c0_)) {
				719	token = ScanIdentifierOrKeyword();
				720	} else if (IsDecimalDigit(c0_)) {
				721	token = ScanNumber(false);
				722	} else if (SkipWhiteSpace()) {
				723	token = Token::WHITESPACE;
				724	} else {
				725	token = Select(Token::ILLEGAL);
				726	}
				727	break;
				728	}
				729
				730	// Continue scanning for tokens as long as we're just skipping
				731	// whitespace.
				732	} while (token == Token::WHITESPACE);
				733
				734	next_.location.end_pos = source_pos();
				735	next_.token = token;
				736	}
				737
				738
				739	void Scanner::SeekForward(int pos) {
				740	// After this call, we will have the token at the given position as
				741	// the "next" token. The "current" token will be invalid.
				742	if (pos == next_.location.beg_pos) return;
				743	int current_pos = source_pos();
				744	DCHECK_EQ(next_.location.end_pos, current_pos);
				745	// Positions inside the lookahead token aren't supported.
				746	DCHECK(pos >= current_pos);
				747	if (pos != current_pos) {
				748	source_->SeekForward(pos - source_->pos());
				749	Advance();
				750	// This function is only called to seek to the location
				751	// of the end of a function (at the "}" token). It doesn't matter
				752	// whether there was a line terminator in the part we skip.
				753	has_line_terminator_before_next_ = false;
				754	has_multiline_comment_before_next_ = false;
				755	}
				756	Scan();
				757	}
				758
				759
				760	template <bool capture_raw, bool in_template_literal>
				761	bool Scanner::ScanEscape() {
				762	uc32 c = c0_;
				763	Advance<capture_raw>();
				764
				765	// Skip escaped newlines.
				766	if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
				767	// Allow CR+LF newlines in multiline string literals.
				768	if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
				769	// Allow LF+CR newlines in multiline string literals.
				770	if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();
				771	return true;
				772	}
				773
				774	switch (c) {
				775	case '\'': // fall through
				776	case '"' : // fall through
				777	case '\\': break;
				778	case 'b' : c = '\b'; break;
				779	case 'f' : c = '\f'; break;
				780	case 'n' : c = '\n'; break;
				781	case 'r' : c = '\r'; break;
				782	case 't' : c = '\t'; break;
				783	case 'u' : {
				784	c = ScanUnicodeEscape<capture_raw>();
				785	if (c < 0) return false;
				786	break;
				787	}
				788	case 'v':
				789	c = '\v';
				790	break;
				791	case 'x': {
				792	c = ScanHexNumber<capture_raw>(2);
				793	if (c < 0) return false;
				794	break;
				795	}
				796	case '0': // Fall through.
				797	case '1': // fall through
				798	case '2': // fall through
				799	case '3': // fall through
				800	case '4': // fall through
				801	case '5': // fall through
				802	case '6': // fall through
				803	case '7':
				804	c = ScanOctalEscape<capture_raw>(c, 2);
				805	break;
				806	}
				807
				808	// According to ECMA-262, section 7.8.4, characters not covered by the
				809	// above cases should be illegal, but they are commonly handled as
				810	// non-escaped characters by JS VMs.
				811	AddLiteralChar(c);
				812	return true;
				813	}
				814
				815
				816	// Octal escapes of the forms '\0xx' and '\xxx' are not a part of
				817	// ECMA-262. Other JS VMs support them.
				818	template <bool capture_raw>
				819	uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
				820	uc32 x = c - '0';
				821	int i = 0;
				822	for (; i < length; i++) {
				823	int d = c0_ - '0';
				824	if (d < 0 \|\| d > 7) break;
				825	int nx = x * 8 + d;
				826	if (nx >= 256) break;
				827	x = nx;
				828	Advance<capture_raw>();
				829	}
				830	// Anything except '\0' is an octal escape sequence, illegal in strict mode.
				831	// Remember the position of octal escape sequences so that an error
				832	// can be reported later (in strict mode).
				833	// We don't report the error immediately, because the octal escape can
				834	// occur before the "use strict" directive.
				835	if (c != '0' \|\| i > 0) {
				836	octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
				837	}
				838	return x;
				839	}
				840
				841
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	842	Token::Value Scanner::ScanString() {
				843	uc32 quote = c0_;
				844	Advance<false, false>(); // consume quote
				845
				846	LiteralScope literal(this);
				847	while (true) {
				848	if (c0_ > kMaxAscii) {
				849	HandleLeadSurrogate();
				850	break;
				851	}
				852	if (c0_ < 0 \|\| c0_ == '\n' \|\| c0_ == '\r') return Token::ILLEGAL;
				853	if (c0_ == quote) {
				854	literal.Complete();
				855	Advance<false, false>();
				856	return Token::STRING;
				857	}
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	858	char c = static_cast<char>(c0_);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	859	if (c == '\\') break;
				860	Advance<false, false>();
				861	AddLiteralChar(c);
				862	}
				863
				864	while (c0_ != quote && c0_ >= 0
				865	&& !unicode_cache_->IsLineTerminator(c0_)) {
				866	uc32 c = c0_;
				867	Advance();
				868	if (c == '\\') {
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	869	if (c0_ < 0 \|\| !ScanEscape<false, false>()) {
				870	return Token::ILLEGAL;
				871	}
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	872	} else {
				873	AddLiteralChar(c);
				874	}
				875	}
				876	if (c0_ != quote) return Token::ILLEGAL;
				877	literal.Complete();
				878
				879	Advance(); // consume quote
				880	return Token::STRING;
				881	}
				882
				883
				884	Token::Value Scanner::ScanTemplateSpan() {
				885	// When scanning a TemplateSpan, we are looking for the following construct:
				886	// TEMPLATE_SPAN ::
				887	// ` LiteralChars* ${
				888	// \| } LiteralChars* ${
				889	//
				890	// TEMPLATE_TAIL ::
				891	// ` LiteralChars* `
				892	// \| } LiteralChar* `
				893	//
				894	// A TEMPLATE_SPAN should always be followed by an Expression, while a
				895	// TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
				896	// followed by an Expression.
				897
				898	Token::Value result = Token::TEMPLATE_SPAN;
				899	LiteralScope literal(this);
				900	StartRawLiteral();
				901	const bool capture_raw = true;
				902	const bool in_template_literal = true;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	903	while (true) {
				904	uc32 c = c0_;
				905	Advance<capture_raw>();
				906	if (c == '`') {
				907	result = Token::TEMPLATE_TAIL;
				908	ReduceRawLiteralLength(1);
				909	break;
				910	} else if (c == '$' && c0_ == '{') {
				911	Advance<capture_raw>(); // Consume '{'
				912	ReduceRawLiteralLength(2);
				913	break;
				914	} else if (c == '\\') {
				915	if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) {
				916	// The TV of LineContinuation :: \ LineTerminatorSequence is the empty
				917	// code unit sequence.
				918	uc32 lastChar = c0_;
				919	Advance<capture_raw>();
				920	if (lastChar == '\r') {
				921	ReduceRawLiteralLength(1); // Remove \r
				922	if (c0_ == '\n') {
				923	Advance<capture_raw>(); // Adds \n
				924	} else {
				925	AddRawLiteralChar('\n');
				926	}
				927	}
				928	} else if (!ScanEscape<capture_raw, in_template_literal>()) {
				929	return Token::ILLEGAL;
				930	}
				931	} else if (c < 0) {
				932	// Unterminated template literal
				933	PushBack(c);
				934	break;
				935	} else {
				936	// The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
				937	// The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
				938	// consisting of the CV 0x000A.
				939	if (c == '\r') {
				940	ReduceRawLiteralLength(1); // Remove \r
				941	if (c0_ == '\n') {
				942	Advance<capture_raw>(); // Adds \n
				943	} else {
				944	AddRawLiteralChar('\n');
				945	}
				946	c = '\n';
				947	}
				948	AddLiteralChar(c);
				949	}
				950	}
				951	literal.Complete();
				952	next_.location.end_pos = source_pos();
				953	next_.token = result;
				954	return result;
				955	}
				956
				957
				958	Token::Value Scanner::ScanTemplateStart() {
				959	DCHECK(c0_ == '`');
				960	next_.location.beg_pos = source_pos();
				961	Advance(); // Consume `
				962	return ScanTemplateSpan();
				963	}
				964
				965
				966	Token::Value Scanner::ScanTemplateContinuation() {
				967	DCHECK_EQ(next_.token, Token::RBRACE);
				968	next_.location.beg_pos = source_pos() - 1; // We already consumed }
				969	return ScanTemplateSpan();
				970	}
				971
				972
				973	void Scanner::ScanDecimalDigits() {
				974	while (IsDecimalDigit(c0_))
				975	AddLiteralCharAdvance();
				976	}
				977
				978
				979	Token::Value Scanner::ScanNumber(bool seen_period) {
				980	DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
				981
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	982	enum {
				983	DECIMAL,
				984	DECIMAL_WITH_LEADING_ZERO,
				985	HEX,
				986	OCTAL,
				987	IMPLICIT_OCTAL,
				988	BINARY
				989	} kind = DECIMAL;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	990
				991	LiteralScope literal(this);
				992	bool at_start = !seen_period;
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	993	int start_pos = source_pos(); // For reporting octal positions.
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	994	if (seen_period) {
				995	// we have already seen a decimal point of the float
				996	AddLiteralChar('.');
				997	ScanDecimalDigits(); // we know we have at least one digit
				998
				999	} else {
				1000	// if the first character is '0' we must check for octals and hex
				1001	if (c0_ == '0') {
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1002	AddLiteralCharAdvance();
				1003
				1004	// either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
				1005	// an octal number.
				1006	if (c0_ == 'x' \|\| c0_ == 'X') {
				1007	// hex number
				1008	kind = HEX;
				1009	AddLiteralCharAdvance();
				1010	if (!IsHexDigit(c0_)) {
				1011	// we must have at least one hex digit after 'x'/'X'
				1012	return Token::ILLEGAL;
				1013	}
				1014	while (IsHexDigit(c0_)) {
				1015	AddLiteralCharAdvance();
				1016	}
				1017	} else if (c0_ == 'o' \|\| c0_ == 'O') {
				1018	kind = OCTAL;
				1019	AddLiteralCharAdvance();
				1020	if (!IsOctalDigit(c0_)) {
				1021	// we must have at least one octal digit after 'o'/'O'
				1022	return Token::ILLEGAL;
				1023	}
				1024	while (IsOctalDigit(c0_)) {
				1025	AddLiteralCharAdvance();
				1026	}
				1027	} else if (c0_ == 'b' \|\| c0_ == 'B') {
				1028	kind = BINARY;
				1029	AddLiteralCharAdvance();
				1030	if (!IsBinaryDigit(c0_)) {
				1031	// we must have at least one binary digit after 'b'/'B'
				1032	return Token::ILLEGAL;
				1033	}
				1034	while (IsBinaryDigit(c0_)) {
				1035	AddLiteralCharAdvance();
				1036	}
				1037	} else if ('0' <= c0_ && c0_ <= '7') {
				1038	// (possible) octal number
				1039	kind = IMPLICIT_OCTAL;
				1040	while (true) {
				1041	if (c0_ == '8' \|\| c0_ == '9') {
				1042	at_start = false;
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1043	kind = DECIMAL_WITH_LEADING_ZERO;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1044	break;
				1045	}
				1046	if (c0_ < '0' \|\| '7' < c0_) {
				1047	// Octal literal finished.
				1048	octal_pos_ = Location(start_pos, source_pos());
				1049	break;
				1050	}
				1051	AddLiteralCharAdvance();
				1052	}
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1053	} else if (c0_ == '8' \|\| c0_ == '9') {
				1054	kind = DECIMAL_WITH_LEADING_ZERO;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1055	}
				1056	}
				1057
				1058	// Parse decimal digits and allow trailing fractional part.
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1059	if (kind == DECIMAL \|\| kind == DECIMAL_WITH_LEADING_ZERO) {
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1060	if (at_start) {
				1061	uint64_t value = 0;
				1062	while (IsDecimalDigit(c0_)) {
				1063	value = 10 * value + (c0_ - '0');
				1064
				1065	uc32 first_char = c0_;
				1066	Advance<false, false>();
				1067	AddLiteralChar(first_char);
				1068	}
				1069
				1070	if (next_.literal_chars->one_byte_literal().length() <= 10 &&
				1071	value <= Smi::kMaxValue && c0_ != '.' && c0_ != 'e' && c0_ != 'E') {
				1072	next_.smi_value_ = static_cast<int>(value);
				1073	literal.Complete();
				1074	HandleLeadSurrogate();
				1075
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1076	if (kind == DECIMAL_WITH_LEADING_ZERO)
				1077	decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1078	return Token::SMI;
				1079	}
				1080	HandleLeadSurrogate();
				1081	}
				1082
				1083	ScanDecimalDigits(); // optional
				1084	if (c0_ == '.') {
				1085	AddLiteralCharAdvance();
				1086	ScanDecimalDigits(); // optional
				1087	}
				1088	}
				1089	}
				1090
				1091	// scan exponent, if any
				1092	if (c0_ == 'e' \|\| c0_ == 'E') {
				1093	DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1094	if (!(kind == DECIMAL \|\| kind == DECIMAL_WITH_LEADING_ZERO))
				1095	return Token::ILLEGAL;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1096	// scan exponent
				1097	AddLiteralCharAdvance();
				1098	if (c0_ == '+' \|\| c0_ == '-')
				1099	AddLiteralCharAdvance();
				1100	if (!IsDecimalDigit(c0_)) {
				1101	// we must have at least one decimal digit after 'e'/'E'
				1102	return Token::ILLEGAL;
				1103	}
				1104	ScanDecimalDigits();
				1105	}
				1106
				1107	// The source character immediately following a numeric literal must
				1108	// not be an identifier start or a decimal digit; see ECMA-262
				1109	// section 7.8.3, page 17 (note that we read only one decimal digit
				1110	// if the value is 0).
				1111	if (IsDecimalDigit(c0_) \|\|
				1112	(c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_)))
				1113	return Token::ILLEGAL;
				1114
				1115	literal.Complete();
				1116
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1117	if (kind == DECIMAL_WITH_LEADING_ZERO)
				1118	decimal_with_leading_zero_pos_ = Location(start_pos, source_pos());
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1119	return Token::NUMBER;
				1120	}
				1121
				1122
				1123	uc32 Scanner::ScanIdentifierUnicodeEscape() {
				1124	Advance();
				1125	if (c0_ != 'u') return -1;
				1126	Advance();
				1127	return ScanUnicodeEscape<false>();
				1128	}
				1129
				1130
				1131	template <bool capture_raw>
				1132	uc32 Scanner::ScanUnicodeEscape() {
				1133	// Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
				1134	// hex digits between { } is arbitrary. \ and u have already been read.
				1135	if (c0_ == '{') {
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	1136	int begin = source_pos() - 2;
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1137	Advance<capture_raw>();
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	1138	uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, begin);
				1139	if (cp < 0 \|\| c0_ != '}') {
				1140	ReportScannerError(source_pos(),
				1141	MessageTemplate::kInvalidUnicodeEscapeSequence);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1142	return -1;
				1143	}
				1144	Advance<capture_raw>();
				1145	return cp;
				1146	}
Ben Murdoch	da12d29	2016-06-02 14:46:10 +0100	[diff] [blame]	1147	const bool unicode = true;
				1148	return ScanHexNumber<capture_raw, unicode>(4);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1149	}
				1150
				1151
				1152	// ----------------------------------------------------------------------------
				1153	// Keyword Matcher
				1154
				1155	#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1156	KEYWORD_GROUP('a') \
				1157	KEYWORD("async", Token::ASYNC) \
				1158	KEYWORD("await", Token::AWAIT) \
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1159	KEYWORD_GROUP('b') \
				1160	KEYWORD("break", Token::BREAK) \
				1161	KEYWORD_GROUP('c') \
				1162	KEYWORD("case", Token::CASE) \
				1163	KEYWORD("catch", Token::CATCH) \
				1164	KEYWORD("class", Token::CLASS) \
				1165	KEYWORD("const", Token::CONST) \
				1166	KEYWORD("continue", Token::CONTINUE) \
				1167	KEYWORD_GROUP('d') \
				1168	KEYWORD("debugger", Token::DEBUGGER) \
				1169	KEYWORD("default", Token::DEFAULT) \
				1170	KEYWORD("delete", Token::DELETE) \
				1171	KEYWORD("do", Token::DO) \
				1172	KEYWORD_GROUP('e') \
				1173	KEYWORD("else", Token::ELSE) \
Ben Murdoch	c561043	2016-08-08 18:44:38 +0100	[diff] [blame]	1174	KEYWORD("enum", Token::ENUM) \
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1175	KEYWORD("export", Token::EXPORT) \
				1176	KEYWORD("extends", Token::EXTENDS) \
				1177	KEYWORD_GROUP('f') \
				1178	KEYWORD("false", Token::FALSE_LITERAL) \
				1179	KEYWORD("finally", Token::FINALLY) \
				1180	KEYWORD("for", Token::FOR) \
				1181	KEYWORD("function", Token::FUNCTION) \
				1182	KEYWORD_GROUP('i') \
				1183	KEYWORD("if", Token::IF) \
				1184	KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
				1185	KEYWORD("import", Token::IMPORT) \
				1186	KEYWORD("in", Token::IN) \
				1187	KEYWORD("instanceof", Token::INSTANCEOF) \
				1188	KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
				1189	KEYWORD_GROUP('l') \
				1190	KEYWORD("let", Token::LET) \
				1191	KEYWORD_GROUP('n') \
				1192	KEYWORD("new", Token::NEW) \
				1193	KEYWORD("null", Token::NULL_LITERAL) \
				1194	KEYWORD_GROUP('p') \
				1195	KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
				1196	KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
				1197	KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
				1198	KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
				1199	KEYWORD_GROUP('r') \
				1200	KEYWORD("return", Token::RETURN) \
				1201	KEYWORD_GROUP('s') \
				1202	KEYWORD("static", Token::STATIC) \
				1203	KEYWORD("super", Token::SUPER) \
				1204	KEYWORD("switch", Token::SWITCH) \
				1205	KEYWORD_GROUP('t') \
				1206	KEYWORD("this", Token::THIS) \
				1207	KEYWORD("throw", Token::THROW) \
				1208	KEYWORD("true", Token::TRUE_LITERAL) \
				1209	KEYWORD("try", Token::TRY) \
				1210	KEYWORD("typeof", Token::TYPEOF) \
				1211	KEYWORD_GROUP('v') \
				1212	KEYWORD("var", Token::VAR) \
				1213	KEYWORD("void", Token::VOID) \
				1214	KEYWORD_GROUP('w') \
				1215	KEYWORD("while", Token::WHILE) \
				1216	KEYWORD("with", Token::WITH) \
				1217	KEYWORD_GROUP('y') \
				1218	KEYWORD("yield", Token::YIELD)
				1219
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1220	static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
				1221	int input_length, bool escaped) {
				1222	DCHECK(input_length >= 1);
				1223	const int kMinLength = 2;
				1224	const int kMaxLength = 10;
				1225	if (input_length < kMinLength \|\| input_length > kMaxLength) {
				1226	return Token::IDENTIFIER;
				1227	}
				1228	switch (input[0]) {
				1229	default:
				1230	#define KEYWORD_GROUP_CASE(ch) \
				1231	break; \
				1232	case ch:
				1233	#define KEYWORD(keyword, token) \
				1234	{ \
				1235	/* 'keyword' is a char array, so sizeof(keyword) is */ \
				1236	/* strlen(keyword) plus 1 for the NUL char. */ \
				1237	const int keyword_length = sizeof(keyword) - 1; \
				1238	STATIC_ASSERT(keyword_length >= kMinLength); \
				1239	STATIC_ASSERT(keyword_length <= kMaxLength); \
				1240	if (input_length == keyword_length && input[1] == keyword[1] && \
				1241	(keyword_length <= 2 \|\| input[2] == keyword[2]) && \
				1242	(keyword_length <= 3 \|\| input[3] == keyword[3]) && \
				1243	(keyword_length <= 4 \|\| input[4] == keyword[4]) && \
				1244	(keyword_length <= 5 \|\| input[5] == keyword[5]) && \
				1245	(keyword_length <= 6 \|\| input[6] == keyword[6]) && \
				1246	(keyword_length <= 7 \|\| input[7] == keyword[7]) && \
				1247	(keyword_length <= 8 \|\| input[8] == keyword[8]) && \
				1248	(keyword_length <= 9 \|\| input[9] == keyword[9])) { \
				1249	if (escaped) { \
Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1250	/* TODO(adamk): YIELD should be handled specially. */ \
				1251	return (token == Token::FUTURE_STRICT_RESERVED_WORD \|\| \
				1252	token == Token::LET \|\| token == Token::STATIC) \
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1253	? Token::ESCAPED_STRICT_RESERVED_WORD \
				1254	: Token::ESCAPED_KEYWORD; \
				1255	} \
				1256	return token; \
				1257	} \
				1258	}
				1259	KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
				1260	}
				1261	return Token::IDENTIFIER;
				1262	}
				1263
				1264
				1265	bool Scanner::IdentifierIsFutureStrictReserved(
				1266	const AstRawString* string) const {
				1267	// Keywords are always 1-byte strings.
				1268	if (!string->is_one_byte()) return false;
				1269	if (string->IsOneByteEqualTo("let") \|\| string->IsOneByteEqualTo("static") \|\|
				1270	string->IsOneByteEqualTo("yield")) {
				1271	return true;
				1272	}
				1273	return Token::FUTURE_STRICT_RESERVED_WORD ==
				1274	KeywordOrIdentifierToken(string->raw_data(), string->length(), false);
				1275	}
				1276
				1277
				1278	Token::Value Scanner::ScanIdentifierOrKeyword() {
				1279	DCHECK(unicode_cache_->IsIdentifierStart(c0_));
				1280	LiteralScope literal(this);
				1281	if (IsInRange(c0_, 'a', 'z')) {
				1282	do {
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1283	char first_char = static_cast<char>(c0_);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1284	Advance<false, false>();
				1285	AddLiteralChar(first_char);
				1286	} while (IsInRange(c0_, 'a', 'z'));
				1287
				1288	if (IsDecimalDigit(c0_) \|\| IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\|
				1289	c0_ == '$') {
				1290	// Identifier starting with lowercase.
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1291	char first_char = static_cast<char>(c0_);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1292	Advance<false, false>();
				1293	AddLiteralChar(first_char);
				1294	while (IsAsciiIdentifier(c0_)) {
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1295	char first_char = static_cast<char>(c0_);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1296	Advance<false, false>();
				1297	AddLiteralChar(first_char);
				1298	}
				1299	if (c0_ <= kMaxAscii && c0_ != '\\') {
				1300	literal.Complete();
				1301	return Token::IDENTIFIER;
				1302	}
				1303	} else if (c0_ <= kMaxAscii && c0_ != '\\') {
				1304	// Only a-z+: could be a keyword or identifier.
				1305	literal.Complete();
				1306	Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
				1307	return KeywordOrIdentifierToken(chars.start(), chars.length(), false);
				1308	}
				1309
				1310	HandleLeadSurrogate();
				1311	} else if (IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\| c0_ == '$') {
				1312	do {
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1313	char first_char = static_cast<char>(c0_);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1314	Advance<false, false>();
				1315	AddLiteralChar(first_char);
				1316	} while (IsAsciiIdentifier(c0_));
				1317
				1318	if (c0_ <= kMaxAscii && c0_ != '\\') {
				1319	literal.Complete();
				1320	return Token::IDENTIFIER;
				1321	}
				1322
				1323	HandleLeadSurrogate();
				1324	} else if (c0_ == '\\') {
				1325	// Scan identifier start character.
				1326	uc32 c = ScanIdentifierUnicodeEscape();
				1327	// Only allow legal identifier start characters.
				1328	if (c < 0 \|\|
				1329	c == '\\' \|\| // No recursive escapes.
				1330	!unicode_cache_->IsIdentifierStart(c)) {
				1331	return Token::ILLEGAL;
				1332	}
				1333	AddLiteralChar(c);
				1334	return ScanIdentifierSuffix(&literal, true);
				1335	} else {
				1336	uc32 first_char = c0_;
				1337	Advance();
				1338	AddLiteralChar(first_char);
				1339	}
				1340
				1341	// Scan the rest of the identifier characters.
				1342	while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
				1343	if (c0_ != '\\') {
				1344	uc32 next_char = c0_;
				1345	Advance();
				1346	AddLiteralChar(next_char);
				1347	continue;
				1348	}
				1349	// Fallthrough if no longer able to complete keyword.
				1350	return ScanIdentifierSuffix(&literal, false);
				1351	}
				1352
				1353	literal.Complete();
				1354
				1355	if (next_.literal_chars->is_one_byte()) {
				1356	Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
				1357	return KeywordOrIdentifierToken(chars.start(), chars.length(), false);
				1358	}
				1359	return Token::IDENTIFIER;
				1360	}
				1361
				1362
				1363	Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
				1364	bool escaped) {
				1365	// Scan the rest of the identifier characters.
				1366	while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
				1367	if (c0_ == '\\') {
				1368	uc32 c = ScanIdentifierUnicodeEscape();
				1369	escaped = true;
				1370	// Only allow legal identifier part characters.
				1371	if (c < 0 \|\|
				1372	c == '\\' \|\|
				1373	!unicode_cache_->IsIdentifierPart(c)) {
				1374	return Token::ILLEGAL;
				1375	}
				1376	AddLiteralChar(c);
				1377	} else {
				1378	AddLiteralChar(c0_);
				1379	Advance();
				1380	}
				1381	}
				1382	literal->Complete();
				1383
				1384	if (escaped && next_.literal_chars->is_one_byte()) {
				1385	Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
				1386	return KeywordOrIdentifierToken(chars.start(), chars.length(), true);
				1387	}
				1388	return Token::IDENTIFIER;
				1389	}
				1390
				1391
				1392	bool Scanner::ScanRegExpPattern(bool seen_equal) {
				1393	// Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags
				1394	bool in_character_class = false;
				1395
				1396	// Previous token is either '/' or '/=', in the second case, the
				1397	// pattern starts at =.
				1398	next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
				1399	next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
				1400
				1401	// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
				1402	// the scanner should pass uninterpreted bodies to the RegExp
				1403	// constructor.
				1404	LiteralScope literal(this);
				1405	if (seen_equal) {
				1406	AddLiteralChar('=');
				1407	}
				1408
				1409	while (c0_ != '/' \|\| in_character_class) {
				1410	if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;
				1411	if (c0_ == '\\') { // Escape sequence.
				1412	AddLiteralCharAdvance();
				1413	if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;
				1414	AddLiteralCharAdvance();
				1415	// If the escape allows more characters, i.e., \x??, \u????, or \c?,
				1416	// only "safe" characters are allowed (letters, digits, underscore),
				1417	// otherwise the escape isn't valid and the invalid character has
				1418	// its normal meaning. I.e., we can just continue scanning without
				1419	// worrying whether the following characters are part of the escape
				1420	// or not, since any '/', '\\' or '[' is guaranteed to not be part
				1421	// of the escape sequence.
				1422
				1423	// TODO(896): At some point, parse RegExps more throughly to capture
				1424	// octal esacpes in strict mode.
				1425	} else { // Unescaped character.
				1426	if (c0_ == '[') in_character_class = true;
				1427	if (c0_ == ']') in_character_class = false;
				1428	AddLiteralCharAdvance();
				1429	}
				1430	}
				1431	Advance(); // consume '/'
				1432
				1433	literal.Complete();
				1434
				1435	return true;
				1436	}
				1437
				1438
				1439	Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
				1440	// Scan regular expression flags.
				1441	LiteralScope literal(this);
				1442	int flags = 0;
				1443	while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
				1444	RegExp::Flags flag = RegExp::kNone;
				1445	switch (c0_) {
				1446	case 'g':
				1447	flag = RegExp::kGlobal;
				1448	break;
				1449	case 'i':
				1450	flag = RegExp::kIgnoreCase;
				1451	break;
				1452	case 'm':
				1453	flag = RegExp::kMultiline;
				1454	break;
				1455	case 'u':
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1456	flag = RegExp::kUnicode;
				1457	break;
				1458	case 'y':
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1459	flag = RegExp::kSticky;
				1460	break;
				1461	default:
				1462	return Nothing<RegExp::Flags>();
				1463	}
				1464	if (flags & flag) return Nothing<RegExp::Flags>();
				1465	AddLiteralCharAdvance();
				1466	flags \|= flag;
				1467	}
				1468	literal.Complete();
				1469
				1470	next_.location.end_pos = source_pos();
				1471	return Just(RegExp::Flags(flags));
				1472	}
				1473
				1474
				1475	const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {
				1476	if (is_literal_one_byte()) {
				1477	return ast_value_factory->GetOneByteString(literal_one_byte_string());
				1478	}
				1479	return ast_value_factory->GetTwoByteString(literal_two_byte_string());
				1480	}
				1481
				1482
				1483	const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
				1484	if (is_next_literal_one_byte()) {
				1485	return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
				1486	}
				1487	return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
				1488	}
				1489
				1490
				1491	const AstRawString* Scanner::CurrentRawSymbol(
				1492	AstValueFactory* ast_value_factory) {
				1493	if (is_raw_literal_one_byte()) {
				1494	return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
				1495	}
				1496	return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
				1497	}
				1498
				1499
				1500	double Scanner::DoubleValue() {
				1501	DCHECK(is_literal_one_byte());
				1502	return StringToDouble(
				1503	unicode_cache_,
				1504	literal_one_byte_string(),
				1505	ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);
				1506	}
				1507
				1508
				1509	bool Scanner::ContainsDot() {
				1510	DCHECK(is_literal_one_byte());
				1511	Vector<const uint8_t> str = literal_one_byte_string();
				1512	return std::find(str.begin(), str.end(), '.') != str.end();
				1513	}
				1514
				1515
				1516	int Scanner::FindSymbol(DuplicateFinder* finder, int value) {
				1517	if (is_literal_one_byte()) {
				1518	return finder->AddOneByteSymbol(literal_one_byte_string(), value);
				1519	}
				1520	return finder->AddTwoByteSymbol(literal_two_byte_string(), value);
				1521	}
				1522
				1523
				1524	bool Scanner::SetBookmark() {
				1525	if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark &&
				1526	next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) {
				1527	bookmark_c0_ = c0_;
				1528	CopyTokenDesc(&bookmark_current_, &current_);
				1529	CopyTokenDesc(&bookmark_next_, &next_);
				1530	return true;
				1531	}
				1532	return false;
				1533	}
				1534
				1535
				1536	void Scanner::ResetToBookmark() {
				1537	DCHECK(BookmarkHasBeenSet()); // Caller hasn't called SetBookmark.
				1538
				1539	source_->ResetToBookmark();
				1540	c0_ = bookmark_c0_;
				1541	StartLiteral();
				1542	StartRawLiteral();
				1543	CopyTokenDesc(&next_, &bookmark_current_);
				1544	current_ = next_;
				1545	StartLiteral();
				1546	StartRawLiteral();
				1547	CopyTokenDesc(&next_, &bookmark_next_);
				1548
				1549	bookmark_c0_ = kBookmarkWasApplied;
				1550	}
				1551
				1552
				1553	bool Scanner::BookmarkHasBeenSet() { return bookmark_c0_ >= 0; }
				1554
				1555
				1556	bool Scanner::BookmarkHasBeenReset() {
				1557	return bookmark_c0_ == kBookmarkWasApplied;
				1558	}
				1559
				1560
				1561	void Scanner::DropBookmark() { bookmark_c0_ = kNoBookmark; }
				1562
				1563
				1564	void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {
				1565	DCHECK_NOT_NULL(to);
				1566	DCHECK_NOT_NULL(from);
				1567	to->token = from->token;
				1568	to->location = from->location;
				1569	to->literal_chars->CopyFrom(from->literal_chars);
				1570	to->raw_literal_chars->CopyFrom(from->raw_literal_chars);
				1571	}
				1572
				1573
				1574	int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
				1575	return AddSymbol(key, true, value);
				1576	}
				1577
				1578
				1579	int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
				1580	return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
				1581	}
				1582
				1583
				1584	int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,
				1585	bool is_one_byte,
				1586	int value) {
				1587	uint32_t hash = Hash(key, is_one_byte);
				1588	byte* encoding = BackupKey(key, is_one_byte);
Ben Murdoch	61f157c	2016-09-16 13:49:30 +0100	[diff] [blame]	1589	base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame]	1590	int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
				1591	entry->value =
				1592	reinterpret_cast<void*>(static_cast<intptr_t>(value \| old_value));
				1593	return old_value;
				1594	}
				1595
				1596
				1597	int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
				1598	DCHECK(key.length() > 0);
				1599	// Quick check for already being in canonical form.
				1600	if (IsNumberCanonical(key)) {
				1601	return AddOneByteSymbol(key, value);
				1602	}
				1603
				1604	int flags = ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY;
				1605	double double_value = StringToDouble(
				1606	unicode_constants_, key, flags, 0.0);
				1607	int length;
				1608	const char* string;
				1609	if (!std::isfinite(double_value)) {
				1610	string = "Infinity";
				1611	length = 8; // strlen("Infinity");
				1612	} else {
				1613	string = DoubleToCString(double_value,
				1614	Vector<char>(number_buffer_, kBufferSize));
				1615	length = StrLength(string);
				1616	}
				1617	return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),
				1618	length), true, value);
				1619	}
				1620
				1621
				1622	bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
				1623	// Test for a safe approximation of number literals that are already
				1624	// in canonical form: max 15 digits, no leading zeroes, except an
				1625	// integer part that is a single zero, and no trailing zeros below
				1626	// the decimal point.
				1627	int pos = 0;
				1628	int length = number.length();
				1629	if (number.length() > 15) return false;
				1630	if (number[pos] == '0') {
				1631	pos++;
				1632	} else {
				1633	while (pos < length &&
				1634	static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;
				1635	}
				1636	if (length == pos) return true;
				1637	if (number[pos] != '.') return false;
				1638	pos++;
				1639	bool invalid_last_digit = true;
				1640	while (pos < length) {
				1641	uint8_t digit = number[pos] - '0';
				1642	if (digit > '9' - '0') return false;
				1643	invalid_last_digit = (digit == 0);
				1644	pos++;
				1645	}
				1646	return !invalid_last_digit;
				1647	}
				1648
				1649
				1650	uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
				1651	// Primitive hash function, almost identical to the one used
				1652	// for strings (except that it's seeded by the length and representation).
				1653	int length = key.length();
				1654	uint32_t hash = (length << 1) \| (is_one_byte ? 1 : 0);
				1655	for (int i = 0; i < length; i++) {
				1656	uint32_t c = key[i];
				1657	hash = (hash + c) * 1025;
				1658	hash ^= (hash >> 6);
				1659	}
				1660	return hash;
				1661	}
				1662
				1663
				1664	bool DuplicateFinder::Match(void* first, void* second) {
				1665	// Decode lengths.
				1666	// Length + representation is encoded as base 128, most significant heptet
				1667	// first, with a 8th bit being non-zero while there are more heptets.
				1668	// The value encodes the number of bytes following, and whether the original
				1669	// was Latin1.
				1670	byte* s1 = reinterpret_cast<byte*>(first);
				1671	byte* s2 = reinterpret_cast<byte*>(second);
				1672	uint32_t length_one_byte_field = 0;
				1673	byte c1;
				1674	do {
				1675	c1 = *s1;
				1676	if (c1 != *s2) return false;
				1677	length_one_byte_field = (length_one_byte_field << 7) \| (c1 & 0x7f);
				1678	s1++;
				1679	s2++;
				1680	} while ((c1 & 0x80) != 0);
				1681	int length = static_cast<int>(length_one_byte_field >> 1);
				1682	return memcmp(s1, s2, length) == 0;
				1683	}
				1684
				1685
				1686	byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
				1687	bool is_one_byte) {
				1688	uint32_t one_byte_length = (bytes.length() << 1) \| (is_one_byte ? 1 : 0);
				1689	backing_store_.StartSequence();
				1690	// Emit one_byte_length as base-128 encoded number, with the 7th bit set
				1691	// on the byte of every heptet except the last, least significant, one.
				1692	if (one_byte_length >= (1 << 7)) {
				1693	if (one_byte_length >= (1 << 14)) {
				1694	if (one_byte_length >= (1 << 21)) {
				1695	if (one_byte_length >= (1 << 28)) {
				1696	backing_store_.Add(
				1697	static_cast<uint8_t>((one_byte_length >> 28) \| 0x80));
				1698	}
				1699	backing_store_.Add(
				1700	static_cast<uint8_t>((one_byte_length >> 21) \| 0x80u));
				1701	}
				1702	backing_store_.Add(
				1703	static_cast<uint8_t>((one_byte_length >> 14) \| 0x80u));
				1704	}
				1705	backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));
				1706	}
				1707	backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
				1708
				1709	backing_store_.AddBlock(bytes);
				1710	return backing_store_.EndSequence().start();
				1711	}
				1712
				1713	} // namespace internal
				1714	} // namespace v8