blob: cd0a530e8d0bba8b368510601e30ff566a00cb49 [file] [log] [blame]
Ben Murdoch257744e2011-11-30 15:57:28 +00001// Copyright 2011 the V8 project authors. All rights reserved.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -08002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_PREPARSER_H
29#define V8_PREPARSER_H
30
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080031namespace v8 {
32namespace preparser {
33
34// Preparsing checks a JavaScript program and emits preparse-data that helps
35// a later parsing to be faster.
Ben Murdoch257744e2011-11-30 15:57:28 +000036// See preparse-data-format.h for the data format.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080037
38// The PreParser checks that the syntax follows the grammar for JavaScript,
39// and collects some information about the program along the way.
40// The grammar check is only performed in order to understand the program
41// sufficiently to deduce some information about it, that can be used
42// to speed up later parsing. Finding errors is not the goal of pre-parsing,
43// rather it is to speed up properly written and correct programs.
44// That means that contextual checks (like a label being declared where
45// it is used) are generally omitted.
46
47namespace i = v8::internal;
48
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080049class PreParser {
50 public:
Ben Murdochb0fe1622011-05-05 13:52:32 +010051 enum PreParseResult {
52 kPreParseStackOverflow,
53 kPreParseSuccess
54 };
55
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080056 ~PreParser() { }
57
58 // Pre-parse the program from the character stream; returns true on
59 // success (even if parsing failed, the pre-parse data successfully
60 // captured the syntax error), and false if a stack-overflow happened
61 // during parsing.
Ben Murdochb0fe1622011-05-05 13:52:32 +010062 static PreParseResult PreParseProgram(i::JavaScriptScanner* scanner,
63 i::ParserRecorder* log,
64 bool allow_lazy,
65 uintptr_t stack_limit) {
66 return PreParser(scanner, log, stack_limit, allow_lazy).PreParse();
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080067 }
68
69 private:
Ben Murdoch257744e2011-11-30 15:57:28 +000070 // These types form an algebra over syntactic categories that is just
71 // rich enough to let us recognize and propagate the constructs that
72 // are either being counted in the preparser data, or is important
73 // to throw the correct syntax error exceptions.
74
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080075 enum ScopeType {
76 kTopLevelScope,
77 kFunctionScope
78 };
79
Ben Murdoch69a99ed2011-11-30 16:03:39 +000080 enum VariableDeclarationContext {
81 kSourceElement,
82 kStatement,
83 kForStatement
84 };
85
Ben Murdoch257744e2011-11-30 15:57:28 +000086 class Expression;
Ben Murdochb0fe1622011-05-05 13:52:32 +010087
Ben Murdoch257744e2011-11-30 15:57:28 +000088 class Identifier {
89 public:
90 static Identifier Default() {
91 return Identifier(kUnknownIdentifier);
92 }
93 static Identifier Eval() {
94 return Identifier(kEvalIdentifier);
95 }
96 static Identifier Arguments() {
97 return Identifier(kArgumentsIdentifier);
98 }
99 static Identifier FutureReserved() {
100 return Identifier(kFutureReservedIdentifier);
101 }
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000102 static Identifier FutureStrictReserved() {
103 return Identifier(kFutureStrictReservedIdentifier);
104 }
Ben Murdoch257744e2011-11-30 15:57:28 +0000105 bool IsEval() { return type_ == kEvalIdentifier; }
106 bool IsArguments() { return type_ == kArgumentsIdentifier; }
107 bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; }
108 bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; }
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000109 bool IsFutureStrictReserved() {
110 return type_ == kFutureStrictReservedIdentifier;
111 }
Ben Murdoch257744e2011-11-30 15:57:28 +0000112 bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; }
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000113
Ben Murdoch257744e2011-11-30 15:57:28 +0000114 private:
115 enum Type {
116 kUnknownIdentifier,
117 kFutureReservedIdentifier,
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000118 kFutureStrictReservedIdentifier,
Ben Murdoch257744e2011-11-30 15:57:28 +0000119 kEvalIdentifier,
120 kArgumentsIdentifier
121 };
122 explicit Identifier(Type type) : type_(type) { }
123 Type type_;
124
125 friend class Expression;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100126 };
127
Ben Murdoch257744e2011-11-30 15:57:28 +0000128 // Bits 0 and 1 are used to identify the type of expression:
129 // If bit 0 is set, it's an identifier.
130 // if bit 1 is set, it's a string literal.
131 // If neither is set, it's no particular type, and both set isn't
132 // use yet.
133 // Bit 2 is used to mark the expression as being parenthesized,
134 // so "(foo)" isn't recognized as a pure identifier (and possible label).
135 class Expression {
136 public:
137 static Expression Default() {
138 return Expression(kUnknownExpression);
139 }
140
141 static Expression FromIdentifier(Identifier id) {
142 return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift));
143 }
144
145 static Expression StringLiteral() {
146 return Expression(kUnknownStringLiteral);
147 }
148
149 static Expression UseStrictStringLiteral() {
150 return Expression(kUseStrictString);
151 }
152
153 static Expression This() {
154 return Expression(kThisExpression);
155 }
156
157 static Expression ThisProperty() {
158 return Expression(kThisPropertyExpression);
159 }
160
161 static Expression StrictFunction() {
162 return Expression(kStrictFunctionExpression);
163 }
164
165 bool IsIdentifier() {
166 return (code_ & kIdentifierFlag) != 0;
167 }
168
169 // Only works corretly if it is actually an identifier expression.
170 PreParser::Identifier AsIdentifier() {
171 return PreParser::Identifier(
172 static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift));
173 }
174
175 bool IsParenthesized() {
176 // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized.
177 return (code_ & 7) > 4;
178 }
179
180 bool IsRawIdentifier() {
181 return !IsParenthesized() && IsIdentifier();
182 }
183
184 bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; }
185
186 bool IsRawStringLiteral() {
187 return !IsParenthesized() && IsStringLiteral();
188 }
189
190 bool IsUseStrictLiteral() {
191 return (code_ & kStringLiteralMask) == kUseStrictString;
192 }
193
194 bool IsThis() {
195 return code_ == kThisExpression;
196 }
197
198 bool IsThisProperty() {
199 return code_ == kThisPropertyExpression;
200 }
201
202 bool IsStrictFunction() {
203 return code_ == kStrictFunctionExpression;
204 }
205
206 Expression Parenthesize() {
207 int type = code_ & 3;
208 if (type != 0) {
209 // Identifiers and string literals can be parenthesized.
210 // They no longer work as labels or directive prologues,
211 // but are still recognized in other contexts.
212 return Expression(code_ | kParentesizedExpressionFlag);
213 }
214 // For other types of expressions, it's not important to remember
215 // the parentheses.
216 return *this;
217 }
218
219 private:
220 // First two/three bits are used as flags.
221 // Bit 0 and 1 represent identifiers or strings literals, and are
222 // mutually exclusive, but can both be absent.
223 // If bit 0 or 1 are set, bit 2 marks that the expression has
224 // been wrapped in parentheses (a string literal can no longer
225 // be a directive prologue, and an identifier can no longer be
226 // a label.
227 enum {
228 kUnknownExpression = 0,
229 // Identifiers
230 kIdentifierFlag = 1, // Used to detect labels.
231 kIdentifierShift = 3,
232
233 kStringLiteralFlag = 2, // Used to detect directive prologue.
234 kUnknownStringLiteral = kStringLiteralFlag,
235 kUseStrictString = kStringLiteralFlag | 8,
236 kStringLiteralMask = kUseStrictString,
237
238 kParentesizedExpressionFlag = 4, // Only if identifier or string literal.
239
240 // Below here applies if neither identifier nor string literal.
241 kThisExpression = 4,
242 kThisPropertyExpression = 8,
243 kStrictFunctionExpression = 12
244 };
245
246 explicit Expression(int expression_code) : code_(expression_code) { }
247
248 int code_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100249 };
250
Ben Murdoch257744e2011-11-30 15:57:28 +0000251 class Statement {
252 public:
253 static Statement Default() {
254 return Statement(kUnknownStatement);
255 }
256
257 static Statement FunctionDeclaration() {
258 return Statement(kFunctionDeclaration);
259 }
260
261 // Creates expression statement from expression.
262 // Preserves being an unparenthesized string literal, possibly
263 // "use strict".
264 static Statement ExpressionStatement(Expression expression) {
265 if (!expression.IsParenthesized()) {
266 if (expression.IsUseStrictLiteral()) {
267 return Statement(kUseStrictExpressionStatement);
268 }
269 if (expression.IsStringLiteral()) {
270 return Statement(kStringLiteralExpressionStatement);
271 }
272 }
273 return Default();
274 }
275
276 bool IsStringLiteral() {
277 return code_ != kUnknownStatement;
278 }
279
280 bool IsUseStrictLiteral() {
281 return code_ == kUseStrictExpressionStatement;
282 }
283
284 bool IsFunctionDeclaration() {
285 return code_ == kFunctionDeclaration;
286 }
287
288 private:
289 enum Type {
290 kUnknownStatement,
291 kStringLiteralExpressionStatement,
292 kUseStrictExpressionStatement,
293 kFunctionDeclaration
294 };
295
296 explicit Statement(Type code) : code_(code) {}
297 Type code_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100298 };
299
Ben Murdoch257744e2011-11-30 15:57:28 +0000300 enum SourceElements {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100301 kUnknownSourceElements
302 };
303
Ben Murdochb0fe1622011-05-05 13:52:32 +0100304 typedef int Arguments;
305
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800306 class Scope {
307 public:
308 Scope(Scope** variable, ScopeType type)
309 : variable_(variable),
310 prev_(*variable),
311 type_(type),
312 materialized_literal_count_(0),
313 expected_properties_(0),
Ben Murdoch257744e2011-11-30 15:57:28 +0000314 with_nesting_count_(0),
315 strict_((prev_ != NULL) && prev_->is_strict()) {
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800316 *variable = this;
317 }
318 ~Scope() { *variable_ = prev_; }
319 void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
320 void AddProperty() { expected_properties_++; }
321 ScopeType type() { return type_; }
322 int expected_properties() { return expected_properties_; }
323 int materialized_literal_count() { return materialized_literal_count_; }
324 bool IsInsideWith() { return with_nesting_count_ != 0; }
Ben Murdoch257744e2011-11-30 15:57:28 +0000325 bool is_strict() { return strict_; }
326 void set_strict() { strict_ = true; }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800327 void EnterWith() { with_nesting_count_++; }
328 void LeaveWith() { with_nesting_count_--; }
329
330 private:
331 Scope** const variable_;
332 Scope* const prev_;
333 const ScopeType type_;
334 int materialized_literal_count_;
335 int expected_properties_;
336 int with_nesting_count_;
Ben Murdoch257744e2011-11-30 15:57:28 +0000337 bool strict_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800338 };
339
Ben Murdochb0fe1622011-05-05 13:52:32 +0100340 // Private constructor only used in PreParseProgram.
341 PreParser(i::JavaScriptScanner* scanner,
342 i::ParserRecorder* log,
343 uintptr_t stack_limit,
344 bool allow_lazy)
345 : scanner_(scanner),
346 log_(log),
347 scope_(NULL),
348 stack_limit_(stack_limit),
Ben Murdoch257744e2011-11-30 15:57:28 +0000349 strict_mode_violation_location_(i::Scanner::Location::invalid()),
350 strict_mode_violation_type_(NULL),
Ben Murdochb0fe1622011-05-05 13:52:32 +0100351 stack_overflow_(false),
Ben Murdochb8e0da22011-05-16 14:20:40 +0100352 allow_lazy_(true),
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000353 parenthesized_function_(false),
354 harmony_block_scoping_(scanner->HarmonyBlockScoping()) { }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100355
356 // Preparse the program. Only called in PreParseProgram after creating
357 // the instance.
358 PreParseResult PreParse() {
359 Scope top_scope(&scope_, kTopLevelScope);
360 bool ok = true;
Ben Murdoch257744e2011-11-30 15:57:28 +0000361 int start_position = scanner_->peek_location().beg_pos;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100362 ParseSourceElements(i::Token::EOS, &ok);
363 if (stack_overflow_) return kPreParseStackOverflow;
364 if (!ok) {
365 ReportUnexpectedToken(scanner_->current_token());
Ben Murdoch257744e2011-11-30 15:57:28 +0000366 } else if (scope_->is_strict()) {
367 CheckOctalLiteral(start_position, scanner_->location().end_pos, &ok);
Ben Murdochb0fe1622011-05-05 13:52:32 +0100368 }
369 return kPreParseSuccess;
370 }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800371
372 // Report syntax error
373 void ReportUnexpectedToken(i::Token::Value token);
374 void ReportMessageAt(int start_pos,
375 int end_pos,
376 const char* type,
377 const char* name_opt) {
378 log_->LogMessage(start_pos, end_pos, type, name_opt);
379 }
380
Ben Murdoch257744e2011-11-30 15:57:28 +0000381 void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok);
382
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800383 // All ParseXXX functions take as the last argument an *ok parameter
384 // which is set to false if parsing failed; it is unchanged otherwise.
385 // By making the 'exception handling' explicit, we are forced to check
386 // for failure at the call sites.
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000387 Statement ParseSourceElement(bool* ok);
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800388 SourceElements ParseSourceElements(int end_token, bool* ok);
389 Statement ParseStatement(bool* ok);
390 Statement ParseFunctionDeclaration(bool* ok);
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800391 Statement ParseBlock(bool* ok);
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000392 Statement ParseVariableStatement(VariableDeclarationContext var_context,
393 bool* ok);
394 Statement ParseVariableDeclarations(VariableDeclarationContext var_context,
395 int* num_decl,
396 bool* ok);
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800397 Statement ParseExpressionOrLabelledStatement(bool* ok);
398 Statement ParseIfStatement(bool* ok);
399 Statement ParseContinueStatement(bool* ok);
400 Statement ParseBreakStatement(bool* ok);
401 Statement ParseReturnStatement(bool* ok);
402 Statement ParseWithStatement(bool* ok);
403 Statement ParseSwitchStatement(bool* ok);
404 Statement ParseDoWhileStatement(bool* ok);
405 Statement ParseWhileStatement(bool* ok);
406 Statement ParseForStatement(bool* ok);
407 Statement ParseThrowStatement(bool* ok);
408 Statement ParseTryStatement(bool* ok);
409 Statement ParseDebuggerStatement(bool* ok);
410
411 Expression ParseExpression(bool accept_IN, bool* ok);
412 Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
413 Expression ParseConditionalExpression(bool accept_IN, bool* ok);
414 Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
415 Expression ParseUnaryExpression(bool* ok);
416 Expression ParsePostfixExpression(bool* ok);
417 Expression ParseLeftHandSideExpression(bool* ok);
418 Expression ParseNewExpression(bool* ok);
419 Expression ParseMemberExpression(bool* ok);
420 Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok);
421 Expression ParsePrimaryExpression(bool* ok);
422 Expression ParseArrayLiteral(bool* ok);
423 Expression ParseObjectLiteral(bool* ok);
424 Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
425 Expression ParseV8Intrinsic(bool* ok);
426
427 Arguments ParseArguments(bool* ok);
428 Expression ParseFunctionLiteral(bool* ok);
429
430 Identifier ParseIdentifier(bool* ok);
431 Identifier ParseIdentifierName(bool* ok);
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000432 Identifier ParseIdentifierNameOrGetOrSet(bool* is_get,
433 bool* is_set,
434 bool* ok);
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800435
Steve Block9fac8402011-05-12 15:51:54 +0100436 // Logs the currently parsed literal as a symbol in the preparser data.
437 void LogSymbol();
438 // Log the currently parsed identifier.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800439 Identifier GetIdentifierSymbol();
Steve Block9fac8402011-05-12 15:51:54 +0100440 // Log the currently parsed string literal.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800441 Expression GetStringSymbol();
442
Ben Murdochb0fe1622011-05-05 13:52:32 +0100443 i::Token::Value peek() {
444 if (stack_overflow_) return i::Token::ILLEGAL;
445 return scanner_->peek();
446 }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800447
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800448 i::Token::Value Next() {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100449 if (stack_overflow_) return i::Token::ILLEGAL;
450 {
451 int marker;
452 if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) {
453 // Further calls to peek/Next will return illegal token.
454 // The current one will still be returned. It might already
455 // have been seen using peek.
456 stack_overflow_ = true;
457 }
458 }
459 return scanner_->Next();
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800460 }
461
Steve Block1e0659c2011-05-24 12:43:12 +0100462 bool peek_any_identifier();
463
Ben Murdoch257744e2011-11-30 15:57:28 +0000464 void set_strict_mode() {
465 scope_->set_strict();
466 }
467
468 bool strict_mode() { return scope_->is_strict(); }
469
Ben Murdochb0fe1622011-05-05 13:52:32 +0100470 void Consume(i::Token::Value token) { Next(); }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800471
472 void Expect(i::Token::Value token, bool* ok) {
473 if (Next() != token) {
474 *ok = false;
475 }
476 }
477
478 bool Check(i::Token::Value token) {
479 i::Token::Value next = peek();
480 if (next == token) {
481 Consume(next);
482 return true;
483 }
484 return false;
485 }
486 void ExpectSemicolon(bool* ok);
487
488 static int Precedence(i::Token::Value tok, bool accept_IN);
489
Ben Murdoch257744e2011-11-30 15:57:28 +0000490 void SetStrictModeViolation(i::Scanner::Location,
491 const char* type,
492 bool *ok);
493
494 void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok);
495
496 void StrictModeIdentifierViolation(i::Scanner::Location,
497 const char* eval_args_type,
498 Identifier identifier,
499 bool* ok);
500
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800501 i::JavaScriptScanner* scanner_;
502 i::ParserRecorder* log_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800503 Scope* scope_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100504 uintptr_t stack_limit_;
Ben Murdoch257744e2011-11-30 15:57:28 +0000505 i::Scanner::Location strict_mode_violation_location_;
506 const char* strict_mode_violation_type_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100507 bool stack_overflow_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800508 bool allow_lazy_;
Ben Murdochb8e0da22011-05-16 14:20:40 +0100509 bool parenthesized_function_;
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000510 bool harmony_block_scoping_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800511};
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800512} } // v8::preparser
513
514#endif // V8_PREPARSER_H