blob: 7142551c2200a88215a69405d77d20f84090223f [file] [log] [blame]
Ben Murdochf87a2032010-10-22 12:50:53 +01001// Copyright 2010 the V8 project authors. All rights reserved.
Steve Blocka7e24c12009-10-30 11:49:00 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_PARSER_H_
29#define V8_PARSER_H_
30
Steve Blocka7e24c12009-10-30 11:49:00 +000031#include "allocation.h"
Ben Murdochf87a2032010-10-22 12:50:53 +010032#include "ast.h"
33#include "scanner.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000034
35namespace v8 {
36namespace internal {
37
Ben Murdochf87a2032010-10-22 12:50:53 +010038class CompilationInfo;
39class FuncNameInferrer;
40class ParserFactory;
41class ParserLog;
42class PositionStack;
43class Target;
44class TemporaryScope;
45
46template <typename T> class ZoneListWrapper;
47
Steve Blocka7e24c12009-10-30 11:49:00 +000048
49class ParserMessage : public Malloced {
50 public:
51 ParserMessage(Scanner::Location loc, const char* message,
52 Vector<const char*> args)
53 : loc_(loc),
54 message_(message),
55 args_(args) { }
56 ~ParserMessage();
57 Scanner::Location location() { return loc_; }
58 const char* message() { return message_; }
59 Vector<const char*> args() { return args_; }
60 private:
61 Scanner::Location loc_;
62 const char* message_;
63 Vector<const char*> args_;
64};
65
66
67class FunctionEntry BASE_EMBEDDED {
68 public:
69 explicit FunctionEntry(Vector<unsigned> backing) : backing_(backing) { }
70 FunctionEntry() : backing_(Vector<unsigned>::empty()) { }
71
72 int start_pos() { return backing_[kStartPosOffset]; }
73 void set_start_pos(int value) { backing_[kStartPosOffset] = value; }
74
75 int end_pos() { return backing_[kEndPosOffset]; }
76 void set_end_pos(int value) { backing_[kEndPosOffset] = value; }
77
78 int literal_count() { return backing_[kLiteralCountOffset]; }
79 void set_literal_count(int value) { backing_[kLiteralCountOffset] = value; }
80
81 int property_count() { return backing_[kPropertyCountOffset]; }
Kristian Monsen80d68ea2010-09-08 11:05:35 +010082 void set_property_count(int value) {
83 backing_[kPropertyCountOffset] = value;
84 }
85
Steve Blocka7e24c12009-10-30 11:49:00 +000086 bool is_valid() { return backing_.length() > 0; }
87
Ben Murdochf87a2032010-10-22 12:50:53 +010088 static const int kSize = 4;
Steve Blocka7e24c12009-10-30 11:49:00 +000089
90 private:
91 Vector<unsigned> backing_;
92 static const int kStartPosOffset = 0;
93 static const int kEndPosOffset = 1;
94 static const int kLiteralCountOffset = 2;
95 static const int kPropertyCountOffset = 3;
Steve Blocka7e24c12009-10-30 11:49:00 +000096};
97
98
99class ScriptDataImpl : public ScriptData {
100 public:
101 explicit ScriptDataImpl(Vector<unsigned> store)
102 : store_(store),
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100103 owns_store_(true) { }
Iain Merrick9ac36c92010-09-13 15:29:50 +0100104
105 // Create an empty ScriptDataImpl that is guaranteed to not satisfy
106 // a SanityCheck.
107 ScriptDataImpl() : store_(Vector<unsigned>()), owns_store_(false) { }
108
Steve Blocka7e24c12009-10-30 11:49:00 +0000109 virtual ~ScriptDataImpl();
110 virtual int Length();
Leon Clarkef7060e22010-06-03 12:02:55 +0100111 virtual const char* Data();
Leon Clarkee46be812010-01-19 14:06:41 +0000112 virtual bool HasError();
Iain Merrick9ac36c92010-09-13 15:29:50 +0100113
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100114 void Initialize();
115 void ReadNextSymbolPosition();
116
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100117 FunctionEntry GetFunctionEntry(int start);
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100118 int GetSymbolIdentifier();
Steve Blocka7e24c12009-10-30 11:49:00 +0000119 bool SanityCheck();
120
121 Scanner::Location MessageLocation();
122 const char* BuildMessage();
123 Vector<const char*> BuildArgs();
124
Iain Merrick9ac36c92010-09-13 15:29:50 +0100125 int symbol_count() {
126 return (store_.length() > kHeaderSize) ? store_[kSymbolCountOffset] : 0;
127 }
128 // The following functions should only be called if SanityCheck has
129 // returned true.
Steve Blocka7e24c12009-10-30 11:49:00 +0000130 bool has_error() { return store_[kHasErrorOffset]; }
131 unsigned magic() { return store_[kMagicOffset]; }
132 unsigned version() { return store_[kVersionOffset]; }
Iain Merrick9ac36c92010-09-13 15:29:50 +0100133
Steve Blocka7e24c12009-10-30 11:49:00 +0000134 static const unsigned kMagicNumber = 0xBadDead;
Ben Murdochf87a2032010-10-22 12:50:53 +0100135 static const unsigned kCurrentVersion = 4;
Steve Blocka7e24c12009-10-30 11:49:00 +0000136
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100137 static const int kMagicOffset = 0;
138 static const int kVersionOffset = 1;
139 static const int kHasErrorOffset = 2;
Iain Merrick9ac36c92010-09-13 15:29:50 +0100140 static const int kFunctionsSizeOffset = 3;
141 static const int kSymbolCountOffset = 4;
142 static const int kSizeOffset = 5;
143 static const int kHeaderSize = 6;
144
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100145 // If encoding a message, the following positions are fixed.
Iain Merrick9ac36c92010-09-13 15:29:50 +0100146 static const int kMessageStartPos = 0;
147 static const int kMessageEndPos = 1;
148 static const int kMessageArgCountPos = 2;
149 static const int kMessageTextPos = 3;
Steve Blocka7e24c12009-10-30 11:49:00 +0000150
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100151 static const byte kNumberTerminator = 0x80u;
152
Steve Blocka7e24c12009-10-30 11:49:00 +0000153 private:
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100154 Vector<unsigned> store_;
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100155 unsigned char* symbol_data_;
156 unsigned char* symbol_data_end_;
Iain Merrick9ac36c92010-09-13 15:29:50 +0100157 int function_index_;
Iain Merrick9ac36c92010-09-13 15:29:50 +0100158 bool owns_store_;
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100159
Steve Blocka7e24c12009-10-30 11:49:00 +0000160 unsigned Read(int position);
161 unsigned* ReadAddress(int position);
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100162 // Reads a number from the current symbols
163 int ReadNumber(byte** source);
Steve Blocka7e24c12009-10-30 11:49:00 +0000164
Iain Merrick9ac36c92010-09-13 15:29:50 +0100165 ScriptDataImpl(const char* backing_store, int length)
166 : store_(reinterpret_cast<unsigned*>(const_cast<char*>(backing_store)),
Ben Murdochf87a2032010-10-22 12:50:53 +0100167 length / static_cast<int>(sizeof(unsigned))),
Iain Merrick9ac36c92010-09-13 15:29:50 +0100168 owns_store_(false) {
Ben Murdochf87a2032010-10-22 12:50:53 +0100169 ASSERT_EQ(0, static_cast<int>(
170 reinterpret_cast<intptr_t>(backing_store) % sizeof(unsigned)));
Iain Merrick9ac36c92010-09-13 15:29:50 +0100171 }
172
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100173 // Read strings written by ParserRecorder::WriteString.
174 static const char* ReadString(unsigned* start, int* chars);
Iain Merrick9ac36c92010-09-13 15:29:50 +0100175
176 friend class ScriptData;
Steve Blocka7e24c12009-10-30 11:49:00 +0000177};
178
179
Ben Murdochf87a2032010-10-22 12:50:53 +0100180class Parser {
181 public:
182 Parser(Handle<Script> script, bool allow_natives_syntax,
183 v8::Extension* extension, ParserMode is_pre_parsing,
184 ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data);
185 virtual ~Parser() { }
Steve Blocka7e24c12009-10-30 11:49:00 +0000186
Ben Murdochf87a2032010-10-22 12:50:53 +0100187 // Parses the source code represented by the compilation info and sets its
188 // function literal. Returns false (and deallocates any allocated AST
189 // nodes) if parsing failed.
190 static bool Parse(CompilationInfo* info);
Steve Blocka7e24c12009-10-30 11:49:00 +0000191
Ben Murdochf87a2032010-10-22 12:50:53 +0100192 // Generic preparser generating full preparse data.
193 static ScriptDataImpl* PreParse(Handle<String> source,
194 unibrow::CharacterStream* stream,
195 v8::Extension* extension);
196
197 // Preparser that only does preprocessing that makes sense if only used
198 // immediately after.
199 static ScriptDataImpl* PartialPreParse(Handle<String> source,
200 unibrow::CharacterStream* stream,
201 v8::Extension* extension);
202
203 static bool ParseRegExp(FlatStringReader* input,
204 bool multiline,
205 RegExpCompileData* result);
206
207 // Pre-parse the program from the character stream; returns true on
208 // success, false if a stack-overflow happened during parsing.
209 bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream);
210
211 void ReportMessage(const char* message, Vector<const char*> args);
212 virtual void ReportMessageAt(Scanner::Location loc,
213 const char* message,
214 Vector<const char*> args) = 0;
Steve Block59151502010-09-22 15:07:15 +0100215
Steve Blocka7e24c12009-10-30 11:49:00 +0000216
Ben Murdochf87a2032010-10-22 12:50:53 +0100217 // Returns NULL if parsing failed.
218 FunctionLiteral* ParseProgram(Handle<String> source,
219 bool in_global_context);
220 FunctionLiteral* ParseLazy(Handle<SharedFunctionInfo> info);
221 FunctionLiteral* ParseJson(Handle<String> source);
222
223 // The minimum number of contiguous assignment that will
224 // be treated as an initialization block. Benchmarks show that
225 // the overhead exceeds the savings below this limit.
226 static const int kMinInitializationBlock = 3;
227
228 protected:
229
230 enum Mode {
231 PARSE_LAZILY,
232 PARSE_EAGERLY
233 };
234
235 // Report syntax error
236 void ReportUnexpectedToken(Token::Value token);
237 void ReportInvalidPreparseData(Handle<String> name, bool* ok);
238
239 Handle<Script> script_;
240 Scanner scanner_;
241
242 Scope* top_scope_;
243 int with_nesting_level_;
244
245 TemporaryScope* temp_scope_;
246 Mode mode_;
247
248 Target* target_stack_; // for break, continue statements
249 bool allow_natives_syntax_;
250 v8::Extension* extension_;
251 ParserFactory* factory_;
252 ParserLog* log_;
253 bool is_pre_parsing_;
254 ScriptDataImpl* pre_data_;
255 FuncNameInferrer* fni_;
256
257 bool inside_with() const { return with_nesting_level_ > 0; }
258 ParserFactory* factory() const { return factory_; }
259 ParserLog* log() const { return log_; }
260 Scanner& scanner() { return scanner_; }
261 Mode mode() const { return mode_; }
262 ScriptDataImpl* pre_data() const { return pre_data_; }
263
264 // All ParseXXX functions take as the last argument an *ok parameter
265 // which is set to false if parsing failed; it is unchanged otherwise.
266 // By making the 'exception handling' explicit, we are forced to check
267 // for failure at the call sites.
268 void* ParseSourceElements(ZoneListWrapper<Statement>* processor,
269 int end_token, bool* ok);
270 Statement* ParseStatement(ZoneStringList* labels, bool* ok);
271 Statement* ParseFunctionDeclaration(bool* ok);
272 Statement* ParseNativeDeclaration(bool* ok);
273 Block* ParseBlock(ZoneStringList* labels, bool* ok);
274 Block* ParseVariableStatement(bool* ok);
275 Block* ParseVariableDeclarations(bool accept_IN, Expression** var, bool* ok);
276 Statement* ParseExpressionOrLabelledStatement(ZoneStringList* labels,
277 bool* ok);
278 IfStatement* ParseIfStatement(ZoneStringList* labels, bool* ok);
279 Statement* ParseContinueStatement(bool* ok);
280 Statement* ParseBreakStatement(ZoneStringList* labels, bool* ok);
281 Statement* ParseReturnStatement(bool* ok);
282 Block* WithHelper(Expression* obj,
283 ZoneStringList* labels,
284 bool is_catch_block,
285 bool* ok);
286 Statement* ParseWithStatement(ZoneStringList* labels, bool* ok);
287 CaseClause* ParseCaseClause(bool* default_seen_ptr, bool* ok);
288 SwitchStatement* ParseSwitchStatement(ZoneStringList* labels, bool* ok);
289 DoWhileStatement* ParseDoWhileStatement(ZoneStringList* labels, bool* ok);
290 WhileStatement* ParseWhileStatement(ZoneStringList* labels, bool* ok);
291 Statement* ParseForStatement(ZoneStringList* labels, bool* ok);
292 Statement* ParseThrowStatement(bool* ok);
293 Expression* MakeCatchContext(Handle<String> id, VariableProxy* value);
294 TryStatement* ParseTryStatement(bool* ok);
295 DebuggerStatement* ParseDebuggerStatement(bool* ok);
296
297 Expression* ParseExpression(bool accept_IN, bool* ok);
298 Expression* ParseAssignmentExpression(bool accept_IN, bool* ok);
299 Expression* ParseConditionalExpression(bool accept_IN, bool* ok);
300 Expression* ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
301 Expression* ParseUnaryExpression(bool* ok);
302 Expression* ParsePostfixExpression(bool* ok);
303 Expression* ParseLeftHandSideExpression(bool* ok);
304 Expression* ParseNewExpression(bool* ok);
305 Expression* ParseMemberExpression(bool* ok);
306 Expression* ParseNewPrefix(PositionStack* stack, bool* ok);
307 Expression* ParseMemberWithNewPrefixesExpression(PositionStack* stack,
308 bool* ok);
309 Expression* ParsePrimaryExpression(bool* ok);
310 Expression* ParseArrayLiteral(bool* ok);
311 Expression* ParseObjectLiteral(bool* ok);
312 ObjectLiteral::Property* ParseObjectLiteralGetSet(bool is_getter, bool* ok);
313 Expression* ParseRegExpLiteral(bool seen_equal, bool* ok);
314
315 Expression* NewCompareNode(Token::Value op,
316 Expression* x,
317 Expression* y,
318 int position);
319
320 // Populate the constant properties fixed array for a materialized object
321 // literal.
322 void BuildObjectLiteralConstantProperties(
323 ZoneList<ObjectLiteral::Property*>* properties,
324 Handle<FixedArray> constants,
325 bool* is_simple,
326 bool* fast_elements,
327 int* depth);
328
329 // Populate the literals fixed array for a materialized array literal.
330 void BuildArrayLiteralBoilerplateLiterals(ZoneList<Expression*>* properties,
331 Handle<FixedArray> constants,
332 bool* is_simple,
333 int* depth);
334
335 // Decide if a property should be in the object boilerplate.
336 bool IsBoilerplateProperty(ObjectLiteral::Property* property);
337 // If the expression is a literal, return the literal value;
338 // if the expression is a materialized literal and is simple return a
339 // compile time value as encoded by CompileTimeValue::GetValue().
340 // Otherwise, return undefined literal as the placeholder
341 // in the object literal boilerplate.
342 Handle<Object> GetBoilerplateValue(Expression* expression);
343
344 enum FunctionLiteralType {
345 EXPRESSION,
346 DECLARATION,
347 NESTED
348 };
349
350 ZoneList<Expression*>* ParseArguments(bool* ok);
351 FunctionLiteral* ParseFunctionLiteral(Handle<String> var_name,
352 int function_token_position,
353 FunctionLiteralType type,
354 bool* ok);
Steve Blocka7e24c12009-10-30 11:49:00 +0000355
356
Ben Murdochf87a2032010-10-22 12:50:53 +0100357 // Magical syntax support.
358 Expression* ParseV8Intrinsic(bool* ok);
359
360 INLINE(Token::Value peek()) { return scanner_.peek(); }
361 INLINE(Token::Value Next()) { return scanner_.Next(); }
362 INLINE(void Consume(Token::Value token));
363 void Expect(Token::Value token, bool* ok);
364 bool Check(Token::Value token);
365 void ExpectSemicolon(bool* ok);
366
367 Handle<String> GetSymbol(bool* ok);
368
369 // Get odd-ball literals.
370 Literal* GetLiteralUndefined();
371 Literal* GetLiteralTheHole();
372 Literal* GetLiteralNumber(double value);
373
374 Handle<String> ParseIdentifier(bool* ok);
375 Handle<String> ParseIdentifierName(bool* ok);
376 Handle<String> ParseIdentifierOrGetOrSet(bool* is_get,
377 bool* is_set,
378 bool* ok);
379
380 // Parser support
381 virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode,
382 FunctionLiteral* fun,
383 bool resolve,
384 bool* ok) = 0;
385
386 bool TargetStackContainsLabel(Handle<String> label);
387 BreakableStatement* LookupBreakTarget(Handle<String> label, bool* ok);
388 IterationStatement* LookupContinueTarget(Handle<String> label, bool* ok);
389
390 void RegisterTargetUse(BreakTarget* target, Target* stop);
391
392 // Create a number literal.
393 Literal* NewNumberLiteral(double value);
394
395 // Generate AST node that throw a ReferenceError with the given type.
396 Expression* NewThrowReferenceError(Handle<String> type);
397
398 // Generate AST node that throw a SyntaxError with the given
399 // type. The first argument may be null (in the handle sense) in
400 // which case no arguments are passed to the constructor.
401 Expression* NewThrowSyntaxError(Handle<String> type, Handle<Object> first);
402
403 // Generate AST node that throw a TypeError with the given
404 // type. Both arguments must be non-null (in the handle sense).
405 Expression* NewThrowTypeError(Handle<String> type,
406 Handle<Object> first,
407 Handle<Object> second);
408
409 // Generic AST generator for throwing errors from compiled code.
410 Expression* NewThrowError(Handle<String> constructor,
411 Handle<String> type,
412 Vector< Handle<Object> > arguments);
413
414 // JSON is a subset of JavaScript, as specified in, e.g., the ECMAScript 5
415 // specification section 15.12.1 (and appendix A.8).
416 // The grammar is given section 15.12.1.2 (and appendix A.8.2).
417
418 // Parse JSON input as a single JSON value.
419 Expression* ParseJson(bool* ok);
420
421 // Parse a single JSON value from input (grammar production JSONValue).
422 // A JSON value is either a (double-quoted) string literal, a number literal,
423 // one of "true", "false", or "null", or an object or array literal.
424 Expression* ParseJsonValue(bool* ok);
425 // Parse a JSON object literal (grammar production JSONObject).
426 // An object literal is a squiggly-braced and comma separated sequence
427 // (possibly empty) of key/value pairs, where the key is a JSON string
428 // literal, the value is a JSON value, and the two are spearated by a colon.
429 // A JavaScript object also allows numbers and identifiers as keys.
430 Expression* ParseJsonObject(bool* ok);
431 // Parses a JSON array literal (grammar production JSONArray). An array
432 // literal is a square-bracketed and comma separated sequence (possibly empty)
433 // of JSON values.
434 // A JavaScript array allows leaving out values from the sequence.
435 Expression* ParseJsonArray(bool* ok);
436
437 friend class Target;
438 friend class TargetScope;
439 friend class LexicalScope;
440 friend class TemporaryScope;
441};
Steve Blocka7e24c12009-10-30 11:49:00 +0000442
443
444// Support for handling complex values (array and object literals) that
445// can be fully handled at compile time.
446class CompileTimeValue: public AllStatic {
447 public:
448 enum Type {
Steve Block6ded16b2010-05-10 14:33:55 +0100449 OBJECT_LITERAL_FAST_ELEMENTS,
450 OBJECT_LITERAL_SLOW_ELEMENTS,
Steve Blocka7e24c12009-10-30 11:49:00 +0000451 ARRAY_LITERAL
452 };
453
454 static bool IsCompileTimeValue(Expression* expression);
455
Iain Merrick75681382010-08-19 15:07:18 +0100456 static bool ArrayLiteralElementNeedsInitialization(Expression* value);
457
Steve Blocka7e24c12009-10-30 11:49:00 +0000458 // Get the value as a compile time value.
459 static Handle<FixedArray> GetValue(Expression* expression);
460
461 // Get the type of a compile time value returned by GetValue().
462 static Type GetType(Handle<FixedArray> value);
463
464 // Get the elements array of a compile time value returned by GetValue().
465 static Handle<FixedArray> GetElements(Handle<FixedArray> value);
466
467 private:
468 static const int kTypeSlot = 0;
469 static const int kElementsSlot = 1;
470
471 DISALLOW_IMPLICIT_CONSTRUCTORS(CompileTimeValue);
472};
473
474
475} } // namespace v8::internal
476
477#endif // V8_PARSER_H_