blob: 2efd53ef87e1f81e8ab48f2dedc8c094d8ffd037 [file] [log] [blame]
Ben Murdoch257744e2011-11-30 15:57:28 +00001// Copyright 2011 the V8 project authors. All rights reserved.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -08002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_PREPARSER_H
29#define V8_PREPARSER_H
30
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080031namespace v8 {
32namespace preparser {
33
34// Preparsing checks a JavaScript program and emits preparse-data that helps
35// a later parsing to be faster.
Ben Murdoch257744e2011-11-30 15:57:28 +000036// See preparse-data-format.h for the data format.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080037
38// The PreParser checks that the syntax follows the grammar for JavaScript,
39// and collects some information about the program along the way.
40// The grammar check is only performed in order to understand the program
41// sufficiently to deduce some information about it, that can be used
42// to speed up later parsing. Finding errors is not the goal of pre-parsing,
43// rather it is to speed up properly written and correct programs.
44// That means that contextual checks (like a label being declared where
45// it is used) are generally omitted.
46
47namespace i = v8::internal;
48
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080049class PreParser {
50 public:
Ben Murdochb0fe1622011-05-05 13:52:32 +010051 enum PreParseResult {
52 kPreParseStackOverflow,
53 kPreParseSuccess
54 };
55
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080056 ~PreParser() { }
57
58 // Pre-parse the program from the character stream; returns true on
59 // success (even if parsing failed, the pre-parse data successfully
60 // captured the syntax error), and false if a stack-overflow happened
61 // during parsing.
Ben Murdochb0fe1622011-05-05 13:52:32 +010062 static PreParseResult PreParseProgram(i::JavaScriptScanner* scanner,
63 i::ParserRecorder* log,
64 bool allow_lazy,
65 uintptr_t stack_limit) {
66 return PreParser(scanner, log, stack_limit, allow_lazy).PreParse();
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080067 }
68
69 private:
Ben Murdoch257744e2011-11-30 15:57:28 +000070 // These types form an algebra over syntactic categories that is just
71 // rich enough to let us recognize and propagate the constructs that
72 // are either being counted in the preparser data, or is important
73 // to throw the correct syntax error exceptions.
74
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080075 enum ScopeType {
76 kTopLevelScope,
77 kFunctionScope
78 };
79
Ben Murdoch257744e2011-11-30 15:57:28 +000080 class Expression;
Ben Murdochb0fe1622011-05-05 13:52:32 +010081
Ben Murdoch257744e2011-11-30 15:57:28 +000082 class Identifier {
83 public:
84 static Identifier Default() {
85 return Identifier(kUnknownIdentifier);
86 }
87 static Identifier Eval() {
88 return Identifier(kEvalIdentifier);
89 }
90 static Identifier Arguments() {
91 return Identifier(kArgumentsIdentifier);
92 }
93 static Identifier FutureReserved() {
94 return Identifier(kFutureReservedIdentifier);
95 }
96 bool IsEval() { return type_ == kEvalIdentifier; }
97 bool IsArguments() { return type_ == kArgumentsIdentifier; }
98 bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; }
99 bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; }
100 bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; }
101 private:
102 enum Type {
103 kUnknownIdentifier,
104 kFutureReservedIdentifier,
105 kEvalIdentifier,
106 kArgumentsIdentifier
107 };
108 explicit Identifier(Type type) : type_(type) { }
109 Type type_;
110
111 friend class Expression;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100112 };
113
Ben Murdoch257744e2011-11-30 15:57:28 +0000114 // Bits 0 and 1 are used to identify the type of expression:
115 // If bit 0 is set, it's an identifier.
116 // if bit 1 is set, it's a string literal.
117 // If neither is set, it's no particular type, and both set isn't
118 // use yet.
119 // Bit 2 is used to mark the expression as being parenthesized,
120 // so "(foo)" isn't recognized as a pure identifier (and possible label).
121 class Expression {
122 public:
123 static Expression Default() {
124 return Expression(kUnknownExpression);
125 }
126
127 static Expression FromIdentifier(Identifier id) {
128 return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift));
129 }
130
131 static Expression StringLiteral() {
132 return Expression(kUnknownStringLiteral);
133 }
134
135 static Expression UseStrictStringLiteral() {
136 return Expression(kUseStrictString);
137 }
138
139 static Expression This() {
140 return Expression(kThisExpression);
141 }
142
143 static Expression ThisProperty() {
144 return Expression(kThisPropertyExpression);
145 }
146
147 static Expression StrictFunction() {
148 return Expression(kStrictFunctionExpression);
149 }
150
151 bool IsIdentifier() {
152 return (code_ & kIdentifierFlag) != 0;
153 }
154
155 // Only works corretly if it is actually an identifier expression.
156 PreParser::Identifier AsIdentifier() {
157 return PreParser::Identifier(
158 static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift));
159 }
160
161 bool IsParenthesized() {
162 // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized.
163 return (code_ & 7) > 4;
164 }
165
166 bool IsRawIdentifier() {
167 return !IsParenthesized() && IsIdentifier();
168 }
169
170 bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; }
171
172 bool IsRawStringLiteral() {
173 return !IsParenthesized() && IsStringLiteral();
174 }
175
176 bool IsUseStrictLiteral() {
177 return (code_ & kStringLiteralMask) == kUseStrictString;
178 }
179
180 bool IsThis() {
181 return code_ == kThisExpression;
182 }
183
184 bool IsThisProperty() {
185 return code_ == kThisPropertyExpression;
186 }
187
188 bool IsStrictFunction() {
189 return code_ == kStrictFunctionExpression;
190 }
191
192 Expression Parenthesize() {
193 int type = code_ & 3;
194 if (type != 0) {
195 // Identifiers and string literals can be parenthesized.
196 // They no longer work as labels or directive prologues,
197 // but are still recognized in other contexts.
198 return Expression(code_ | kParentesizedExpressionFlag);
199 }
200 // For other types of expressions, it's not important to remember
201 // the parentheses.
202 return *this;
203 }
204
205 private:
206 // First two/three bits are used as flags.
207 // Bit 0 and 1 represent identifiers or strings literals, and are
208 // mutually exclusive, but can both be absent.
209 // If bit 0 or 1 are set, bit 2 marks that the expression has
210 // been wrapped in parentheses (a string literal can no longer
211 // be a directive prologue, and an identifier can no longer be
212 // a label.
213 enum {
214 kUnknownExpression = 0,
215 // Identifiers
216 kIdentifierFlag = 1, // Used to detect labels.
217 kIdentifierShift = 3,
218
219 kStringLiteralFlag = 2, // Used to detect directive prologue.
220 kUnknownStringLiteral = kStringLiteralFlag,
221 kUseStrictString = kStringLiteralFlag | 8,
222 kStringLiteralMask = kUseStrictString,
223
224 kParentesizedExpressionFlag = 4, // Only if identifier or string literal.
225
226 // Below here applies if neither identifier nor string literal.
227 kThisExpression = 4,
228 kThisPropertyExpression = 8,
229 kStrictFunctionExpression = 12
230 };
231
232 explicit Expression(int expression_code) : code_(expression_code) { }
233
234 int code_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100235 };
236
Ben Murdoch257744e2011-11-30 15:57:28 +0000237 class Statement {
238 public:
239 static Statement Default() {
240 return Statement(kUnknownStatement);
241 }
242
243 static Statement FunctionDeclaration() {
244 return Statement(kFunctionDeclaration);
245 }
246
247 // Creates expression statement from expression.
248 // Preserves being an unparenthesized string literal, possibly
249 // "use strict".
250 static Statement ExpressionStatement(Expression expression) {
251 if (!expression.IsParenthesized()) {
252 if (expression.IsUseStrictLiteral()) {
253 return Statement(kUseStrictExpressionStatement);
254 }
255 if (expression.IsStringLiteral()) {
256 return Statement(kStringLiteralExpressionStatement);
257 }
258 }
259 return Default();
260 }
261
262 bool IsStringLiteral() {
263 return code_ != kUnknownStatement;
264 }
265
266 bool IsUseStrictLiteral() {
267 return code_ == kUseStrictExpressionStatement;
268 }
269
270 bool IsFunctionDeclaration() {
271 return code_ == kFunctionDeclaration;
272 }
273
274 private:
275 enum Type {
276 kUnknownStatement,
277 kStringLiteralExpressionStatement,
278 kUseStrictExpressionStatement,
279 kFunctionDeclaration
280 };
281
282 explicit Statement(Type code) : code_(code) {}
283 Type code_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100284 };
285
Ben Murdoch257744e2011-11-30 15:57:28 +0000286 enum SourceElements {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100287 kUnknownSourceElements
288 };
289
Ben Murdochb0fe1622011-05-05 13:52:32 +0100290 typedef int Arguments;
291
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800292 class Scope {
293 public:
294 Scope(Scope** variable, ScopeType type)
295 : variable_(variable),
296 prev_(*variable),
297 type_(type),
298 materialized_literal_count_(0),
299 expected_properties_(0),
Ben Murdoch257744e2011-11-30 15:57:28 +0000300 with_nesting_count_(0),
301 strict_((prev_ != NULL) && prev_->is_strict()) {
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800302 *variable = this;
303 }
304 ~Scope() { *variable_ = prev_; }
305 void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
306 void AddProperty() { expected_properties_++; }
307 ScopeType type() { return type_; }
308 int expected_properties() { return expected_properties_; }
309 int materialized_literal_count() { return materialized_literal_count_; }
310 bool IsInsideWith() { return with_nesting_count_ != 0; }
Ben Murdoch257744e2011-11-30 15:57:28 +0000311 bool is_strict() { return strict_; }
312 void set_strict() { strict_ = true; }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800313 void EnterWith() { with_nesting_count_++; }
314 void LeaveWith() { with_nesting_count_--; }
315
316 private:
317 Scope** const variable_;
318 Scope* const prev_;
319 const ScopeType type_;
320 int materialized_literal_count_;
321 int expected_properties_;
322 int with_nesting_count_;
Ben Murdoch257744e2011-11-30 15:57:28 +0000323 bool strict_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800324 };
325
Ben Murdochb0fe1622011-05-05 13:52:32 +0100326 // Private constructor only used in PreParseProgram.
327 PreParser(i::JavaScriptScanner* scanner,
328 i::ParserRecorder* log,
329 uintptr_t stack_limit,
330 bool allow_lazy)
331 : scanner_(scanner),
332 log_(log),
333 scope_(NULL),
334 stack_limit_(stack_limit),
Ben Murdoch257744e2011-11-30 15:57:28 +0000335 strict_mode_violation_location_(i::Scanner::Location::invalid()),
336 strict_mode_violation_type_(NULL),
Ben Murdochb0fe1622011-05-05 13:52:32 +0100337 stack_overflow_(false),
Ben Murdochb8e0da22011-05-16 14:20:40 +0100338 allow_lazy_(true),
339 parenthesized_function_(false) { }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100340
341 // Preparse the program. Only called in PreParseProgram after creating
342 // the instance.
343 PreParseResult PreParse() {
344 Scope top_scope(&scope_, kTopLevelScope);
345 bool ok = true;
Ben Murdoch257744e2011-11-30 15:57:28 +0000346 int start_position = scanner_->peek_location().beg_pos;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100347 ParseSourceElements(i::Token::EOS, &ok);
348 if (stack_overflow_) return kPreParseStackOverflow;
349 if (!ok) {
350 ReportUnexpectedToken(scanner_->current_token());
Ben Murdoch257744e2011-11-30 15:57:28 +0000351 } else if (scope_->is_strict()) {
352 CheckOctalLiteral(start_position, scanner_->location().end_pos, &ok);
Ben Murdochb0fe1622011-05-05 13:52:32 +0100353 }
354 return kPreParseSuccess;
355 }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800356
357 // Report syntax error
358 void ReportUnexpectedToken(i::Token::Value token);
359 void ReportMessageAt(int start_pos,
360 int end_pos,
361 const char* type,
362 const char* name_opt) {
363 log_->LogMessage(start_pos, end_pos, type, name_opt);
364 }
365
Ben Murdoch257744e2011-11-30 15:57:28 +0000366 void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok);
367
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800368 // All ParseXXX functions take as the last argument an *ok parameter
369 // which is set to false if parsing failed; it is unchanged otherwise.
370 // By making the 'exception handling' explicit, we are forced to check
371 // for failure at the call sites.
372 SourceElements ParseSourceElements(int end_token, bool* ok);
373 Statement ParseStatement(bool* ok);
374 Statement ParseFunctionDeclaration(bool* ok);
375 Statement ParseNativeDeclaration(bool* ok);
376 Statement ParseBlock(bool* ok);
377 Statement ParseVariableStatement(bool* ok);
378 Statement ParseVariableDeclarations(bool accept_IN, int* num_decl, bool* ok);
379 Statement ParseExpressionOrLabelledStatement(bool* ok);
380 Statement ParseIfStatement(bool* ok);
381 Statement ParseContinueStatement(bool* ok);
382 Statement ParseBreakStatement(bool* ok);
383 Statement ParseReturnStatement(bool* ok);
384 Statement ParseWithStatement(bool* ok);
385 Statement ParseSwitchStatement(bool* ok);
386 Statement ParseDoWhileStatement(bool* ok);
387 Statement ParseWhileStatement(bool* ok);
388 Statement ParseForStatement(bool* ok);
389 Statement ParseThrowStatement(bool* ok);
390 Statement ParseTryStatement(bool* ok);
391 Statement ParseDebuggerStatement(bool* ok);
392
393 Expression ParseExpression(bool accept_IN, bool* ok);
394 Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
395 Expression ParseConditionalExpression(bool accept_IN, bool* ok);
396 Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
397 Expression ParseUnaryExpression(bool* ok);
398 Expression ParsePostfixExpression(bool* ok);
399 Expression ParseLeftHandSideExpression(bool* ok);
400 Expression ParseNewExpression(bool* ok);
401 Expression ParseMemberExpression(bool* ok);
402 Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok);
403 Expression ParsePrimaryExpression(bool* ok);
404 Expression ParseArrayLiteral(bool* ok);
405 Expression ParseObjectLiteral(bool* ok);
406 Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
407 Expression ParseV8Intrinsic(bool* ok);
408
409 Arguments ParseArguments(bool* ok);
410 Expression ParseFunctionLiteral(bool* ok);
411
412 Identifier ParseIdentifier(bool* ok);
413 Identifier ParseIdentifierName(bool* ok);
414 Identifier ParseIdentifierOrGetOrSet(bool* is_get, bool* is_set, bool* ok);
415
Steve Block9fac8402011-05-12 15:51:54 +0100416 // Logs the currently parsed literal as a symbol in the preparser data.
417 void LogSymbol();
418 // Log the currently parsed identifier.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800419 Identifier GetIdentifierSymbol();
Steve Block9fac8402011-05-12 15:51:54 +0100420 // Log the currently parsed string literal.
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800421 Expression GetStringSymbol();
422
Ben Murdochb0fe1622011-05-05 13:52:32 +0100423 i::Token::Value peek() {
424 if (stack_overflow_) return i::Token::ILLEGAL;
425 return scanner_->peek();
426 }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800427
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800428 i::Token::Value Next() {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100429 if (stack_overflow_) return i::Token::ILLEGAL;
430 {
431 int marker;
432 if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) {
433 // Further calls to peek/Next will return illegal token.
434 // The current one will still be returned. It might already
435 // have been seen using peek.
436 stack_overflow_ = true;
437 }
438 }
439 return scanner_->Next();
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800440 }
441
Steve Block1e0659c2011-05-24 12:43:12 +0100442 bool peek_any_identifier();
443
Ben Murdoch257744e2011-11-30 15:57:28 +0000444 void set_strict_mode() {
445 scope_->set_strict();
446 }
447
448 bool strict_mode() { return scope_->is_strict(); }
449
Ben Murdochb0fe1622011-05-05 13:52:32 +0100450 void Consume(i::Token::Value token) { Next(); }
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800451
452 void Expect(i::Token::Value token, bool* ok) {
453 if (Next() != token) {
454 *ok = false;
455 }
456 }
457
458 bool Check(i::Token::Value token) {
459 i::Token::Value next = peek();
460 if (next == token) {
461 Consume(next);
462 return true;
463 }
464 return false;
465 }
466 void ExpectSemicolon(bool* ok);
467
468 static int Precedence(i::Token::Value tok, bool accept_IN);
469
Ben Murdoch257744e2011-11-30 15:57:28 +0000470 void SetStrictModeViolation(i::Scanner::Location,
471 const char* type,
472 bool *ok);
473
474 void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok);
475
476 void StrictModeIdentifierViolation(i::Scanner::Location,
477 const char* eval_args_type,
478 Identifier identifier,
479 bool* ok);
480
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800481 i::JavaScriptScanner* scanner_;
482 i::ParserRecorder* log_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800483 Scope* scope_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100484 uintptr_t stack_limit_;
Ben Murdoch257744e2011-11-30 15:57:28 +0000485 i::Scanner::Location strict_mode_violation_location_;
486 const char* strict_mode_violation_type_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100487 bool stack_overflow_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800488 bool allow_lazy_;
Ben Murdochb8e0da22011-05-16 14:20:40 +0100489 bool parenthesized_function_;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800490};
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800491} } // v8::preparser
492
493#endif // V8_PREPARSER_H