blob: 285cf0304e09a7e3764d414cee24e145b9cbcbce [file] [log] [blame]
ager@chromium.orgea91cc52011-05-23 06:06:11 +00001// Copyright 2011 the V8 project authors. All rights reserved.
lrn@chromium.orgfa943b72010-11-03 08:14:36 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_PREPARSER_H
29#define V8_PREPARSER_H
30
lrn@chromium.orgfa943b72010-11-03 08:14:36 +000031namespace v8 {
lrn@chromium.orgfa943b72010-11-03 08:14:36 +000032namespace preparser {
33
34// Preparsing checks a JavaScript program and emits preparse-data that helps
35// a later parsing to be faster.
lrn@chromium.org1c092762011-05-09 09:42:16 +000036// See preparse-data-format.h for the data format.
lrn@chromium.orgfa943b72010-11-03 08:14:36 +000037
38// The PreParser checks that the syntax follows the grammar for JavaScript,
39// and collects some information about the program along the way.
40// The grammar check is only performed in order to understand the program
41// sufficiently to deduce some information about it, that can be used
42// to speed up later parsing. Finding errors is not the goal of pre-parsing,
43// rather it is to speed up properly written and correct programs.
44// That means that contextual checks (like a label being declared where
45// it is used) are generally omitted.
46
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +000047namespace i = v8::internal;
48
lrn@chromium.orgfa943b72010-11-03 08:14:36 +000049class PreParser {
50 public:
kasperl@chromium.orga5551262010-12-07 12:49:48 +000051 enum PreParseResult {
52 kPreParseStackOverflow,
53 kPreParseSuccess
54 };
55
lrn@chromium.orgfa943b72010-11-03 08:14:36 +000056 ~PreParser() { }
57
58 // Pre-parse the program from the character stream; returns true on
59 // success (even if parsing failed, the pre-parse data successfully
60 // captured the syntax error), and false if a stack-overflow happened
61 // during parsing.
kasperl@chromium.orga5551262010-12-07 12:49:48 +000062 static PreParseResult PreParseProgram(i::JavaScriptScanner* scanner,
63 i::ParserRecorder* log,
64 bool allow_lazy,
65 uintptr_t stack_limit) {
66 return PreParser(scanner, log, stack_limit, allow_lazy).PreParse();
lrn@chromium.orgfa943b72010-11-03 08:14:36 +000067 }
68
69 private:
ager@chromium.orgea91cc52011-05-23 06:06:11 +000070 // These types form an algebra over syntactic categories that is just
71 // rich enough to let us recognize and propagate the constructs that
72 // are either being counted in the preparser data, or is important
73 // to throw the correct syntax error exceptions.
74
lrn@chromium.orgfa943b72010-11-03 08:14:36 +000075 enum ScopeType {
76 kTopLevelScope,
77 kFunctionScope
78 };
79
ager@chromium.orgea91cc52011-05-23 06:06:11 +000080 class Expression;
kasperl@chromium.orga5551262010-12-07 12:49:48 +000081
ager@chromium.orgea91cc52011-05-23 06:06:11 +000082 class Identifier {
83 public:
84 static Identifier Default() {
85 return Identifier(kUnknownIdentifier);
86 }
87 static Identifier Eval() {
88 return Identifier(kEvalIdentifier);
89 }
90 static Identifier Arguments() {
91 return Identifier(kArgumentsIdentifier);
92 }
93 static Identifier FutureReserved() {
94 return Identifier(kFutureReservedIdentifier);
95 }
96 bool IsEval() { return type_ == kEvalIdentifier; }
97 bool IsArguments() { return type_ == kArgumentsIdentifier; }
98 bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; }
99 bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; }
100 bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000101
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000102 private:
103 enum Type {
104 kUnknownIdentifier,
105 kFutureReservedIdentifier,
106 kEvalIdentifier,
107 kArgumentsIdentifier
108 };
109 explicit Identifier(Type type) : type_(type) { }
110 Type type_;
111
112 friend class Expression;
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000113 };
114
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000115 // Bits 0 and 1 are used to identify the type of expression:
116 // If bit 0 is set, it's an identifier.
117 // if bit 1 is set, it's a string literal.
118 // If neither is set, it's no particular type, and both set isn't
119 // use yet.
120 // Bit 2 is used to mark the expression as being parenthesized,
121 // so "(foo)" isn't recognized as a pure identifier (and possible label).
122 class Expression {
123 public:
124 static Expression Default() {
125 return Expression(kUnknownExpression);
126 }
127
128 static Expression FromIdentifier(Identifier id) {
129 return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift));
130 }
131
132 static Expression StringLiteral() {
133 return Expression(kUnknownStringLiteral);
134 }
135
136 static Expression UseStrictStringLiteral() {
137 return Expression(kUseStrictString);
138 }
139
140 static Expression This() {
141 return Expression(kThisExpression);
142 }
143
144 static Expression ThisProperty() {
145 return Expression(kThisPropertyExpression);
146 }
147
148 static Expression StrictFunction() {
149 return Expression(kStrictFunctionExpression);
150 }
151
152 bool IsIdentifier() {
153 return (code_ & kIdentifierFlag) != 0;
154 }
155
156 // Only works corretly if it is actually an identifier expression.
157 PreParser::Identifier AsIdentifier() {
158 return PreParser::Identifier(
159 static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift));
160 }
161
162 bool IsParenthesized() {
163 // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized.
164 return (code_ & 7) > 4;
165 }
166
167 bool IsRawIdentifier() {
168 return !IsParenthesized() && IsIdentifier();
169 }
170
171 bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; }
172
173 bool IsRawStringLiteral() {
174 return !IsParenthesized() && IsStringLiteral();
175 }
176
177 bool IsUseStrictLiteral() {
178 return (code_ & kStringLiteralMask) == kUseStrictString;
179 }
180
181 bool IsThis() {
182 return code_ == kThisExpression;
183 }
184
185 bool IsThisProperty() {
186 return code_ == kThisPropertyExpression;
187 }
188
189 bool IsStrictFunction() {
190 return code_ == kStrictFunctionExpression;
191 }
192
193 Expression Parenthesize() {
194 int type = code_ & 3;
195 if (type != 0) {
196 // Identifiers and string literals can be parenthesized.
197 // They no longer work as labels or directive prologues,
198 // but are still recognized in other contexts.
199 return Expression(code_ | kParentesizedExpressionFlag);
200 }
201 // For other types of expressions, it's not important to remember
202 // the parentheses.
203 return *this;
204 }
205
206 private:
207 // First two/three bits are used as flags.
208 // Bit 0 and 1 represent identifiers or strings literals, and are
209 // mutually exclusive, but can both be absent.
210 // If bit 0 or 1 are set, bit 2 marks that the expression has
211 // been wrapped in parentheses (a string literal can no longer
212 // be a directive prologue, and an identifier can no longer be
213 // a label.
214 enum {
215 kUnknownExpression = 0,
216 // Identifiers
217 kIdentifierFlag = 1, // Used to detect labels.
218 kIdentifierShift = 3,
219
220 kStringLiteralFlag = 2, // Used to detect directive prologue.
221 kUnknownStringLiteral = kStringLiteralFlag,
222 kUseStrictString = kStringLiteralFlag | 8,
223 kStringLiteralMask = kUseStrictString,
224
225 kParentesizedExpressionFlag = 4, // Only if identifier or string literal.
226
227 // Below here applies if neither identifier nor string literal.
228 kThisExpression = 4,
229 kThisPropertyExpression = 8,
230 kStrictFunctionExpression = 12
231 };
232
233 explicit Expression(int expression_code) : code_(expression_code) { }
234
235 int code_;
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000236 };
237
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000238 class Statement {
239 public:
240 static Statement Default() {
241 return Statement(kUnknownStatement);
242 }
243
danno@chromium.org40cb8782011-05-25 07:58:50 +0000244 static Statement FunctionDeclaration() {
245 return Statement(kFunctionDeclaration);
246 }
247
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000248 // Creates expression statement from expression.
249 // Preserves being an unparenthesized string literal, possibly
250 // "use strict".
251 static Statement ExpressionStatement(Expression expression) {
252 if (!expression.IsParenthesized()) {
253 if (expression.IsUseStrictLiteral()) {
254 return Statement(kUseStrictExpressionStatement);
255 }
256 if (expression.IsStringLiteral()) {
257 return Statement(kStringLiteralExpressionStatement);
258 }
259 }
260 return Default();
261 }
262
263 bool IsStringLiteral() {
264 return code_ != kUnknownStatement;
265 }
266
267 bool IsUseStrictLiteral() {
268 return code_ == kUseStrictExpressionStatement;
269 }
270
danno@chromium.org40cb8782011-05-25 07:58:50 +0000271 bool IsFunctionDeclaration() {
272 return code_ == kFunctionDeclaration;
273 }
274
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000275 private:
276 enum Type {
277 kUnknownStatement,
278 kStringLiteralExpressionStatement,
danno@chromium.org40cb8782011-05-25 07:58:50 +0000279 kUseStrictExpressionStatement,
280 kFunctionDeclaration
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000281 };
282
283 explicit Statement(Type code) : code_(code) {}
284 Type code_;
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000285 };
286
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000287 enum SourceElements {
288 kUnknownSourceElements
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000289 };
290
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000291 typedef int Arguments;
292
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000293 class Scope {
294 public:
295 Scope(Scope** variable, ScopeType type)
296 : variable_(variable),
297 prev_(*variable),
298 type_(type),
299 materialized_literal_count_(0),
300 expected_properties_(0),
lrn@chromium.org1c092762011-05-09 09:42:16 +0000301 with_nesting_count_(0),
302 strict_((prev_ != NULL) && prev_->is_strict()) {
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000303 *variable = this;
304 }
305 ~Scope() { *variable_ = prev_; }
306 void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
307 void AddProperty() { expected_properties_++; }
308 ScopeType type() { return type_; }
309 int expected_properties() { return expected_properties_; }
310 int materialized_literal_count() { return materialized_literal_count_; }
311 bool IsInsideWith() { return with_nesting_count_ != 0; }
lrn@chromium.org1c092762011-05-09 09:42:16 +0000312 bool is_strict() { return strict_; }
313 void set_strict() { strict_ = true; }
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000314 void EnterWith() { with_nesting_count_++; }
315 void LeaveWith() { with_nesting_count_--; }
316
317 private:
318 Scope** const variable_;
319 Scope* const prev_;
320 const ScopeType type_;
321 int materialized_literal_count_;
322 int expected_properties_;
323 int with_nesting_count_;
lrn@chromium.org1c092762011-05-09 09:42:16 +0000324 bool strict_;
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000325 };
326
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000327 // Private constructor only used in PreParseProgram.
328 PreParser(i::JavaScriptScanner* scanner,
329 i::ParserRecorder* log,
330 uintptr_t stack_limit,
331 bool allow_lazy)
332 : scanner_(scanner),
333 log_(log),
334 scope_(NULL),
335 stack_limit_(stack_limit),
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000336 strict_mode_violation_location_(i::Scanner::Location::invalid()),
337 strict_mode_violation_type_(NULL),
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000338 stack_overflow_(false),
sgjesse@chromium.orgc6c57182011-01-17 12:24:25 +0000339 allow_lazy_(true),
340 parenthesized_function_(false) { }
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000341
342 // Preparse the program. Only called in PreParseProgram after creating
343 // the instance.
344 PreParseResult PreParse() {
345 Scope top_scope(&scope_, kTopLevelScope);
346 bool ok = true;
lrn@chromium.org1c092762011-05-09 09:42:16 +0000347 int start_position = scanner_->peek_location().beg_pos;
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000348 ParseSourceElements(i::Token::EOS, &ok);
349 if (stack_overflow_) return kPreParseStackOverflow;
350 if (!ok) {
351 ReportUnexpectedToken(scanner_->current_token());
lrn@chromium.org1c092762011-05-09 09:42:16 +0000352 } else if (scope_->is_strict()) {
353 CheckOctalLiteral(start_position, scanner_->location().end_pos, &ok);
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000354 }
355 return kPreParseSuccess;
356 }
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000357
358 // Report syntax error
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +0000359 void ReportUnexpectedToken(i::Token::Value token);
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000360 void ReportMessageAt(int start_pos,
361 int end_pos,
362 const char* type,
363 const char* name_opt) {
364 log_->LogMessage(start_pos, end_pos, type, name_opt);
365 }
366
lrn@chromium.org1c092762011-05-09 09:42:16 +0000367 void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok);
368
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000369 // All ParseXXX functions take as the last argument an *ok parameter
370 // which is set to false if parsing failed; it is unchanged otherwise.
371 // By making the 'exception handling' explicit, we are forced to check
372 // for failure at the call sites.
373 SourceElements ParseSourceElements(int end_token, bool* ok);
374 Statement ParseStatement(bool* ok);
375 Statement ParseFunctionDeclaration(bool* ok);
376 Statement ParseNativeDeclaration(bool* ok);
377 Statement ParseBlock(bool* ok);
378 Statement ParseVariableStatement(bool* ok);
379 Statement ParseVariableDeclarations(bool accept_IN, int* num_decl, bool* ok);
380 Statement ParseExpressionOrLabelledStatement(bool* ok);
381 Statement ParseIfStatement(bool* ok);
382 Statement ParseContinueStatement(bool* ok);
383 Statement ParseBreakStatement(bool* ok);
384 Statement ParseReturnStatement(bool* ok);
385 Statement ParseWithStatement(bool* ok);
386 Statement ParseSwitchStatement(bool* ok);
387 Statement ParseDoWhileStatement(bool* ok);
388 Statement ParseWhileStatement(bool* ok);
389 Statement ParseForStatement(bool* ok);
390 Statement ParseThrowStatement(bool* ok);
391 Statement ParseTryStatement(bool* ok);
392 Statement ParseDebuggerStatement(bool* ok);
393
394 Expression ParseExpression(bool accept_IN, bool* ok);
395 Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
396 Expression ParseConditionalExpression(bool accept_IN, bool* ok);
397 Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
398 Expression ParseUnaryExpression(bool* ok);
399 Expression ParsePostfixExpression(bool* ok);
400 Expression ParseLeftHandSideExpression(bool* ok);
401 Expression ParseNewExpression(bool* ok);
402 Expression ParseMemberExpression(bool* ok);
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +0000403 Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok);
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000404 Expression ParsePrimaryExpression(bool* ok);
405 Expression ParseArrayLiteral(bool* ok);
406 Expression ParseObjectLiteral(bool* ok);
407 Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
408 Expression ParseV8Intrinsic(bool* ok);
409
410 Arguments ParseArguments(bool* ok);
411 Expression ParseFunctionLiteral(bool* ok);
412
413 Identifier ParseIdentifier(bool* ok);
414 Identifier ParseIdentifierName(bool* ok);
415 Identifier ParseIdentifierOrGetOrSet(bool* is_get, bool* is_set, bool* ok);
416
lrn@chromium.org5d00b602011-01-05 09:51:43 +0000417 // Logs the currently parsed literal as a symbol in the preparser data.
418 void LogSymbol();
419 // Log the currently parsed identifier.
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000420 Identifier GetIdentifierSymbol();
lrn@chromium.org5d00b602011-01-05 09:51:43 +0000421 // Log the currently parsed string literal.
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000422 Expression GetStringSymbol();
423
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000424 i::Token::Value peek() {
425 if (stack_overflow_) return i::Token::ILLEGAL;
426 return scanner_->peek();
427 }
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000428
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +0000429 i::Token::Value Next() {
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000430 if (stack_overflow_) return i::Token::ILLEGAL;
431 {
432 int marker;
433 if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) {
434 // Further calls to peek/Next will return illegal token.
435 // The current one will still be returned. It might already
436 // have been seen using peek.
437 stack_overflow_ = true;
438 }
439 }
440 return scanner_->Next();
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000441 }
442
ricow@chromium.org83aa5492011-02-07 12:42:56 +0000443 bool peek_any_identifier();
444
lrn@chromium.org1c092762011-05-09 09:42:16 +0000445 void set_strict_mode() {
446 scope_->set_strict();
447 }
448
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000449 bool strict_mode() { return scope_->is_strict(); }
lrn@chromium.org1c092762011-05-09 09:42:16 +0000450
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000451 void Consume(i::Token::Value token) { Next(); }
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000452
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +0000453 void Expect(i::Token::Value token, bool* ok) {
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000454 if (Next() != token) {
455 *ok = false;
456 }
457 }
458
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +0000459 bool Check(i::Token::Value token) {
460 i::Token::Value next = peek();
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000461 if (next == token) {
462 Consume(next);
463 return true;
464 }
465 return false;
466 }
467 void ExpectSemicolon(bool* ok);
468
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +0000469 static int Precedence(i::Token::Value tok, bool accept_IN);
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000470
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000471 void SetStrictModeViolation(i::Scanner::Location,
472 const char* type,
473 bool *ok);
474
475 void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok);
476
477 void StrictModeIdentifierViolation(i::Scanner::Location,
478 const char* eval_args_type,
479 Identifier identifier,
480 bool* ok);
481
ager@chromium.orgbeb25712010-11-29 08:02:25 +0000482 i::JavaScriptScanner* scanner_;
483 i::ParserRecorder* log_;
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000484 Scope* scope_;
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000485 uintptr_t stack_limit_;
ager@chromium.orgea91cc52011-05-23 06:06:11 +0000486 i::Scanner::Location strict_mode_violation_location_;
487 const char* strict_mode_violation_type_;
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000488 bool stack_overflow_;
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000489 bool allow_lazy_;
sgjesse@chromium.orgc6c57182011-01-17 12:24:25 +0000490 bool parenthesized_function_;
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000491};
whesse@chromium.orgf0ac72d2010-11-08 12:47:26 +0000492} } // v8::preparser
lrn@chromium.orgfa943b72010-11-03 08:14:36 +0000493
494#endif // V8_PREPARSER_H