blob: 356c8e4a549b0e41b753ab95db4891f164e0bc3d [file] [log] [blame]
Ben Murdoch8b112d22011-06-08 16:22:53 +01001// Copyright 2011 the V8 project authors. All rights reserved.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Steve Blocka7e24c12009-10-30 11:49:00 +00004
Ben Murdoch589d6972011-11-30 16:04:58 +00005// Features shared by parsing and pre-parsing scanners.
6
Steve Blocka7e24c12009-10-30 11:49:00 +00007#ifndef V8_SCANNER_H_
8#define V8_SCANNER_H_
9
Ben Murdochb8a8cc12014-11-26 15:28:44 +000010#include "src/allocation.h"
11#include "src/base/logging.h"
12#include "src/char-predicates.h"
13#include "src/globals.h"
14#include "src/hashmap.h"
15#include "src/list.h"
16#include "src/token.h"
17#include "src/unicode-inl.h"
18#include "src/utils.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000019
20namespace v8 {
21namespace internal {
22
Ben Murdoch3ef787d2012-04-12 10:51:47 +010023
Ben Murdochb8a8cc12014-11-26 15:28:44 +000024class AstRawString;
25class AstValueFactory;
26class ParserRecorder;
Ben Murdoch3ef787d2012-04-12 10:51:47 +010027
28
Ben Murdoch589d6972011-11-30 16:04:58 +000029// Returns the value (0 .. 15) of a hexadecimal character c.
30// If c is not a legal hexadecimal character, returns a value < 0.
31inline int HexValue(uc32 c) {
32 c -= '0';
33 if (static_cast<unsigned>(c) <= 9) return c;
34 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
35 if (static_cast<unsigned>(c) <= 5) return c + 10;
36 return -1;
37}
Steve Blocka7e24c12009-10-30 11:49:00 +000038
Ben Murdoch589d6972011-11-30 16:04:58 +000039
40// ---------------------------------------------------------------------
Ben Murdoch3ef787d2012-04-12 10:51:47 +010041// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
42// A code unit is a 16 bit value representing either a 16 bit code point
43// or one part of a surrogate pair that make a single 21 bit code point.
Ben Murdoch589d6972011-11-30 16:04:58 +000044
Ben Murdoch3ef787d2012-04-12 10:51:47 +010045class Utf16CharacterStream {
Ben Murdoch589d6972011-11-30 16:04:58 +000046 public:
Ben Murdoch3ef787d2012-04-12 10:51:47 +010047 Utf16CharacterStream() : pos_(0) { }
48 virtual ~Utf16CharacterStream() { }
Ben Murdoch589d6972011-11-30 16:04:58 +000049
Ben Murdoch3ef787d2012-04-12 10:51:47 +010050 // Returns and advances past the next UTF-16 code unit in the input
51 // stream. If there are no more code units, it returns a negative
Ben Murdoch589d6972011-11-30 16:04:58 +000052 // value.
53 inline uc32 Advance() {
54 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
55 pos_++;
56 return static_cast<uc32>(*(buffer_cursor_++));
57 }
58 // Note: currently the following increment is necessary to avoid a
59 // parser problem! The scanner treats the final kEndOfInput as
Ben Murdoch3ef787d2012-04-12 10:51:47 +010060 // a code unit with a position, and does math relative to that
Ben Murdoch589d6972011-11-30 16:04:58 +000061 // position.
62 pos_++;
63
64 return kEndOfInput;
65 }
66
Ben Murdoch3ef787d2012-04-12 10:51:47 +010067 // Return the current position in the code unit stream.
Ben Murdoch589d6972011-11-30 16:04:58 +000068 // Starts at zero.
69 inline unsigned pos() const { return pos_; }
70
Ben Murdoch3ef787d2012-04-12 10:51:47 +010071 // Skips forward past the next code_unit_count UTF-16 code units
Ben Murdoch589d6972011-11-30 16:04:58 +000072 // in the input, or until the end of input if that comes sooner.
Ben Murdoch3ef787d2012-04-12 10:51:47 +010073 // Returns the number of code units actually skipped. If less
74 // than code_unit_count,
75 inline unsigned SeekForward(unsigned code_unit_count) {
Ben Murdoch589d6972011-11-30 16:04:58 +000076 unsigned buffered_chars =
77 static_cast<unsigned>(buffer_end_ - buffer_cursor_);
Ben Murdoch3ef787d2012-04-12 10:51:47 +010078 if (code_unit_count <= buffered_chars) {
79 buffer_cursor_ += code_unit_count;
80 pos_ += code_unit_count;
81 return code_unit_count;
Ben Murdoch589d6972011-11-30 16:04:58 +000082 }
Ben Murdoch3ef787d2012-04-12 10:51:47 +010083 return SlowSeekForward(code_unit_count);
Ben Murdoch589d6972011-11-30 16:04:58 +000084 }
85
Ben Murdoch3ef787d2012-04-12 10:51:47 +010086 // Pushes back the most recently read UTF-16 code unit (or negative
Ben Murdoch589d6972011-11-30 16:04:58 +000087 // value if at end of input), i.e., the value returned by the most recent
88 // call to Advance.
89 // Must not be used right after calling SeekForward.
Ben Murdoch3ef787d2012-04-12 10:51:47 +010090 virtual void PushBack(int32_t code_unit) = 0;
Steve Blocka7e24c12009-10-30 11:49:00 +000091
Ben Murdochb0fe1622011-05-05 13:52:32 +010092 protected:
Ben Murdoch589d6972011-11-30 16:04:58 +000093 static const uc32 kEndOfInput = -1;
Ben Murdochb0fe1622011-05-05 13:52:32 +010094
Ben Murdoch3ef787d2012-04-12 10:51:47 +010095 // Ensures that the buffer_cursor_ points to the code_unit at
Ben Murdoch589d6972011-11-30 16:04:58 +000096 // position pos_ of the input, if possible. If the position
97 // is at or after the end of the input, return false. If there
Ben Murdoch3ef787d2012-04-12 10:51:47 +010098 // are more code_units available, return true.
Ben Murdoch589d6972011-11-30 16:04:58 +000099 virtual bool ReadBlock() = 0;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100100 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100101
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000102 const uint16_t* buffer_cursor_;
103 const uint16_t* buffer_end_;
Ben Murdoch589d6972011-11-30 16:04:58 +0000104 unsigned pos_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100105};
106
107
Ben Murdoch589d6972011-11-30 16:04:58 +0000108// ---------------------------------------------------------------------
109// Caching predicates used by scanners.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000110
111class UnicodeCache {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100112 public:
Ben Murdoch589d6972011-11-30 16:04:58 +0000113 UnicodeCache() {}
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000114 typedef unibrow::Utf8Decoder<512> Utf8Decoder;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100115
Ben Murdoch589d6972011-11-30 16:04:58 +0000116 StaticResource<Utf8Decoder>* utf8_decoder() {
117 return &utf8_decoder_;
118 }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100119
Ben Murdoch589d6972011-11-30 16:04:58 +0000120 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); }
121 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
122 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
123 bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000124 bool IsWhiteSpaceOrLineTerminator(unibrow::uchar c) {
125 return kIsWhiteSpaceOrLineTerminator.get(c);
126 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000127
128 private:
129 unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
130 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
131 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000132 unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
133 unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
134 kIsWhiteSpaceOrLineTerminator;
Ben Murdoch589d6972011-11-30 16:04:58 +0000135 StaticResource<Utf8Decoder> utf8_decoder_;
136
137 DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
Ben Murdochb0fe1622011-05-05 13:52:32 +0100138};
139
140
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000141// ---------------------------------------------------------------------
142// DuplicateFinder discovers duplicate symbols.
143
144class DuplicateFinder {
145 public:
146 explicit DuplicateFinder(UnicodeCache* constants)
147 : unicode_constants_(constants),
148 backing_store_(16),
149 map_(&Match) { }
150
151 int AddOneByteSymbol(Vector<const uint8_t> key, int value);
152 int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
153 // Add a a number literal by converting it (if necessary)
154 // to the string that ToString(ToNumber(literal)) would generate.
155 // and then adding that string with AddOneByteSymbol.
156 // This string is the actual value used as key in an object literal,
157 // and the one that must be different from the other keys.
158 int AddNumber(Vector<const uint8_t> key, int value);
159
160 private:
161 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
162 // Backs up the key and its length in the backing store.
163 // The backup is stored with a base 127 encoding of the
164 // length (plus a bit saying whether the string is one byte),
165 // followed by the bytes of the key.
166 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
167
168 // Compare two encoded keys (both pointing into the backing store)
169 // for having the same base-127 encoded lengths and representation.
170 // and then having the same 'length' bytes following.
171 static bool Match(void* first, void* second);
172 // Creates a hash from a sequence of bytes.
173 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
174 // Checks whether a string containing a JS number is its canonical
175 // form.
176 static bool IsNumberCanonical(Vector<const uint8_t> key);
177
178 // Size of buffer. Sufficient for using it to call DoubleToCString in
179 // from conversions.h.
180 static const int kBufferSize = 100;
181
182 UnicodeCache* unicode_constants_;
183 // Backing store used to store strings used as hashmap keys.
184 SequenceCollector<unsigned char> backing_store_;
185 HashMap map_;
186 // Buffer used for string->number->canonical string conversions.
187 char number_buffer_[kBufferSize];
188};
189
190
Ben Murdoch589d6972011-11-30 16:04:58 +0000191// ----------------------------------------------------------------------------
192// LiteralBuffer - Collector of chars of literals.
193
194class LiteralBuffer {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100195 public:
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000196 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100197
Ben Murdoch589d6972011-11-30 16:04:58 +0000198 ~LiteralBuffer() {
199 if (backing_store_.length() > 0) {
200 backing_store_.Dispose();
201 }
202 }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100203
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100204 INLINE(void AddChar(uint32_t code_unit)) {
Ben Murdoch589d6972011-11-30 16:04:58 +0000205 if (position_ >= backing_store_.length()) ExpandBuffer();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000206 if (is_one_byte_) {
207 if (code_unit <= unibrow::Latin1::kMaxChar) {
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100208 backing_store_[position_] = static_cast<byte>(code_unit);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000209 position_ += kOneByteSize;
Ben Murdoch589d6972011-11-30 16:04:58 +0000210 return;
211 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000212 ConvertToTwoByte();
Ben Murdoch589d6972011-11-30 16:04:58 +0000213 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000214 DCHECK(code_unit < 0x10000u);
215 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
Ben Murdoch589d6972011-11-30 16:04:58 +0000216 position_ += kUC16Size;
217 }
218
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000219 bool is_one_byte() const { return is_one_byte_; }
Ben Murdoch589d6972011-11-30 16:04:58 +0000220
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000221 bool is_contextual_keyword(Vector<const char> keyword) const {
222 return is_one_byte() && keyword.length() == position_ &&
223 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
224 }
225
226 Vector<const uint16_t> two_byte_literal() const {
227 DCHECK(!is_one_byte_);
228 DCHECK((position_ & 0x1) == 0);
229 return Vector<const uint16_t>(
230 reinterpret_cast<const uint16_t*>(backing_store_.start()),
Ben Murdoch589d6972011-11-30 16:04:58 +0000231 position_ >> 1);
232 }
233
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000234 Vector<const uint8_t> one_byte_literal() const {
235 DCHECK(is_one_byte_);
236 return Vector<const uint8_t>(
237 reinterpret_cast<const uint8_t*>(backing_store_.start()),
Ben Murdoch589d6972011-11-30 16:04:58 +0000238 position_);
239 }
240
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000241 int length() const {
242 return is_one_byte_ ? position_ : (position_ >> 1);
Ben Murdoch589d6972011-11-30 16:04:58 +0000243 }
244
245 void Reset() {
246 position_ = 0;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000247 is_one_byte_ = true;
Ben Murdoch589d6972011-11-30 16:04:58 +0000248 }
249
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000250 Handle<String> Internalize(Isolate* isolate) const;
251
Ben Murdoch589d6972011-11-30 16:04:58 +0000252 private:
253 static const int kInitialCapacity = 16;
254 static const int kGrowthFactory = 4;
255 static const int kMinConversionSlack = 256;
256 static const int kMaxGrowth = 1 * MB;
257 inline int NewCapacity(int min_capacity) {
258 int capacity = Max(min_capacity, backing_store_.length());
259 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
260 return new_capacity;
261 }
262
263 void ExpandBuffer() {
264 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000265 MemCopy(new_store.start(), backing_store_.start(), position_);
Ben Murdoch589d6972011-11-30 16:04:58 +0000266 backing_store_.Dispose();
267 backing_store_ = new_store;
268 }
269
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000270 void ConvertToTwoByte() {
271 DCHECK(is_one_byte_);
Ben Murdoch589d6972011-11-30 16:04:58 +0000272 Vector<byte> new_store;
273 int new_content_size = position_ * kUC16Size;
274 if (new_content_size >= backing_store_.length()) {
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100275 // Ensure room for all currently read code units as UC16 as well
276 // as the code unit about to be stored.
Ben Murdoch589d6972011-11-30 16:04:58 +0000277 new_store = Vector<byte>::New(NewCapacity(new_content_size));
278 } else {
279 new_store = backing_store_;
280 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000281 uint8_t* src = backing_store_.start();
282 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
Ben Murdoch589d6972011-11-30 16:04:58 +0000283 for (int i = position_ - 1; i >= 0; i--) {
284 dst[i] = src[i];
285 }
286 if (new_store.start() != backing_store_.start()) {
287 backing_store_.Dispose();
288 backing_store_ = new_store;
289 }
290 position_ = new_content_size;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000291 is_one_byte_ = false;
Ben Murdoch589d6972011-11-30 16:04:58 +0000292 }
293
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000294 bool is_one_byte_;
Ben Murdoch589d6972011-11-30 16:04:58 +0000295 int position_;
296 Vector<byte> backing_store_;
297
298 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
Steve Blocka7e24c12009-10-30 11:49:00 +0000299};
300
301
Ben Murdoch589d6972011-11-30 16:04:58 +0000302// ----------------------------------------------------------------------------
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100303// JavaScript Scanner.
Steve Blocka7e24c12009-10-30 11:49:00 +0000304
Ben Murdoch589d6972011-11-30 16:04:58 +0000305class Scanner {
306 public:
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100307 // Scoped helper for literal recording. Automatically drops the literal
308 // if aborting the scanning before it's complete.
Ben Murdoch589d6972011-11-30 16:04:58 +0000309 class LiteralScope {
310 public:
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100311 explicit LiteralScope(Scanner* self)
312 : scanner_(self), complete_(false) {
313 scanner_->StartLiteral();
314 }
315 ~LiteralScope() {
316 if (!complete_) scanner_->DropLiteral();
317 }
318 void Complete() {
319 scanner_->TerminateLiteral();
320 complete_ = true;
321 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000322
323 private:
324 Scanner* scanner_;
325 bool complete_;
326 };
327
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100328 // Representation of an interval of source positions.
Ben Murdoch589d6972011-11-30 16:04:58 +0000329 struct Location {
330 Location(int b, int e) : beg_pos(b), end_pos(e) { }
331 Location() : beg_pos(0), end_pos(0) { }
332
333 bool IsValid() const {
334 return beg_pos >= 0 && end_pos >= beg_pos;
335 }
336
337 static Location invalid() { return Location(-1, -1); }
338
339 int beg_pos;
340 int end_pos;
341 };
342
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100343 // -1 is outside of the range of any real source code.
344 static const int kNoOctalLocation = -1;
Ben Murdoch85b71792012-04-11 18:30:58 +0100345
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100346 explicit Scanner(UnicodeCache* scanner_contants);
347
348 void Initialize(Utf16CharacterStream* source);
349
350 // Returns the next token and advances input.
351 Token::Value Next();
352 // Returns the current token again.
353 Token::Value current_token() { return current_.token; }
354 // Returns the location information for the current token
355 // (the token last returned by Next()).
356 Location location() const { return current_.location; }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000357
358 // Similar functions for the upcoming token.
359
360 // One token look-ahead (past the token returned by Next()).
361 Token::Value peek() const { return next_.token; }
362
363 Location peek_location() const { return next_.location; }
Ben Murdoch589d6972011-11-30 16:04:58 +0000364
365 bool literal_contains_escapes() const {
366 Location location = current_.location;
367 int source_length = (location.end_pos - location.beg_pos);
368 if (current_.token == Token::STRING) {
369 // Subtract delimiters.
370 source_length -= 2;
371 }
372 return current_.literal_chars->length() != source_length;
373 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000374 bool is_literal_contextual_keyword(Vector<const char> keyword) {
375 DCHECK_NOT_NULL(current_.literal_chars);
376 return current_.literal_chars->is_contextual_keyword(keyword);
Ben Murdoch589d6972011-11-30 16:04:58 +0000377 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000378 bool is_next_contextual_keyword(Vector<const char> keyword) {
379 DCHECK_NOT_NULL(next_.literal_chars);
380 return next_.literal_chars->is_contextual_keyword(keyword);
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100381 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000382
383 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
384 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
385
386 double DoubleValue();
387 bool UnescapedLiteralMatches(const char* data, int length) {
388 if (is_literal_one_byte() &&
389 literal_length() == length &&
390 !literal_contains_escapes()) {
391 const char* token =
392 reinterpret_cast<const char*>(literal_one_byte_string().start());
393 return !strncmp(token, data, length);
394 }
395 return false;
Ben Murdoch592a9fc2012-03-05 11:04:45 +0000396 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000397 void IsGetOrSet(bool* is_get, bool* is_set) {
398 if (is_literal_one_byte() &&
399 literal_length() == 3 &&
400 !literal_contains_escapes()) {
401 const char* token =
402 reinterpret_cast<const char*>(literal_one_byte_string().start());
403 *is_get = strncmp(token, "get", 3) == 0;
404 *is_set = !*is_get && strncmp(token, "set", 3) == 0;
405 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000406 }
407
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000408 int FindNumber(DuplicateFinder* finder, int value);
409 int FindSymbol(DuplicateFinder* finder, int value);
410
Ben Murdoch589d6972011-11-30 16:04:58 +0000411 UnicodeCache* unicode_cache() { return unicode_cache_; }
412
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100413 // Returns the location of the last seen octal literal.
414 Location octal_position() const { return octal_pos_; }
415 void clear_octal_position() { octal_pos_ = Location::invalid(); }
416
417 // Seek forward to the given position. This operation does not
418 // work in general, for instance when there are pushed back
419 // characters, but works for seeking forward until simple delimiter
420 // tokens, which is what it is used for.
421 void SeekForward(int pos);
422
423 bool HarmonyScoping() const {
424 return harmony_scoping_;
425 }
426 void SetHarmonyScoping(bool scoping) {
427 harmony_scoping_ = scoping;
428 }
429 bool HarmonyModules() const {
430 return harmony_modules_;
431 }
432 void SetHarmonyModules(bool modules) {
433 harmony_modules_ = modules;
434 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000435 bool HarmonyNumericLiterals() const {
436 return harmony_numeric_literals_;
437 }
438 void SetHarmonyNumericLiterals(bool numeric_literals) {
439 harmony_numeric_literals_ = numeric_literals;
440 }
441 bool HarmonyClasses() const {
442 return harmony_classes_;
443 }
444 void SetHarmonyClasses(bool classes) {
445 harmony_classes_ = classes;
446 }
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100447
448 // Returns true if there was a line terminator before the peek'ed token,
449 // possibly inside a multi-line comment.
450 bool HasAnyLineTerminatorBeforeNext() const {
451 return has_line_terminator_before_next_ ||
452 has_multiline_comment_before_next_;
453 }
454
455 // Scans the input as a regular expression pattern, previous
456 // character(s) must be /(=). Returns true if a pattern is scanned.
457 bool ScanRegExpPattern(bool seen_equal);
458 // Returns true if regexp flags are scanned (always since flags can
459 // be empty).
460 bool ScanRegExpFlags();
461
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000462 const LiteralBuffer* source_url() const { return &source_url_; }
463 const LiteralBuffer* source_mapping_url() const {
464 return &source_mapping_url_;
465 }
466
467 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100468
469 private:
Ben Murdoch589d6972011-11-30 16:04:58 +0000470 // The current and look-ahead token.
471 struct TokenDesc {
472 Token::Value token;
473 Location location;
474 LiteralBuffer* literal_chars;
475 };
476
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000477 static const int kCharacterLookaheadBufferSize = 1;
478
479 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
480 uc32 ScanOctalEscape(uc32 c, int length);
481
Ben Murdoch589d6972011-11-30 16:04:58 +0000482 // Call this after setting source_ to the input.
483 void Init() {
484 // Set c0_ (one character ahead)
485 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
486 Advance();
487 // Initialize current_ to not refer to a literal.
488 current_.literal_chars = NULL;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100489 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000490
491 // Literal buffer support
492 inline void StartLiteral() {
493 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
494 &literal_buffer2_ : &literal_buffer1_;
495 free_buffer->Reset();
496 next_.literal_chars = free_buffer;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100497 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000498
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100499 INLINE(void AddLiteralChar(uc32 c)) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000500 DCHECK_NOT_NULL(next_.literal_chars);
Ben Murdoch589d6972011-11-30 16:04:58 +0000501 next_.literal_chars->AddChar(c);
502 }
503
504 // Complete scanning of a literal.
505 inline void TerminateLiteral() {
506 // Does nothing in the current implementation.
507 }
508
509 // Stops scanning of a literal and drop the collected characters,
510 // e.g., due to an encountered error.
511 inline void DropLiteral() {
512 next_.literal_chars = NULL;
513 }
514
515 inline void AddLiteralCharAdvance() {
516 AddLiteralChar(c0_);
517 Advance();
518 }
519
520 // Low-level scanning support.
521 void Advance() { c0_ = source_->Advance(); }
522 void PushBack(uc32 ch) {
523 source_->PushBack(c0_);
524 c0_ = ch;
525 }
526
527 inline Token::Value Select(Token::Value tok) {
528 Advance();
529 return tok;
530 }
531
532 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
533 Advance();
534 if (c0_ == next) {
535 Advance();
536 return then;
537 } else {
538 return else_;
539 }
540 }
541
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000542 // Returns the literal string, if any, for the current token (the
543 // token last returned by Next()). The string is 0-terminated.
544 // Literal strings are collected for identifiers, strings, and
545 // numbers.
546 // These functions only give the correct result if the literal
547 // was scanned between calls to StartLiteral() and TerminateLiteral().
548 Vector<const uint8_t> literal_one_byte_string() {
549 DCHECK_NOT_NULL(current_.literal_chars);
550 return current_.literal_chars->one_byte_literal();
551 }
552 Vector<const uint16_t> literal_two_byte_string() {
553 DCHECK_NOT_NULL(current_.literal_chars);
554 return current_.literal_chars->two_byte_literal();
555 }
556 bool is_literal_one_byte() {
557 DCHECK_NOT_NULL(current_.literal_chars);
558 return current_.literal_chars->is_one_byte();
559 }
560 int literal_length() const {
561 DCHECK_NOT_NULL(current_.literal_chars);
562 return current_.literal_chars->length();
563 }
564 // Returns the literal string for the next token (the token that
565 // would be returned if Next() were called).
566 Vector<const uint8_t> next_literal_one_byte_string() {
567 DCHECK_NOT_NULL(next_.literal_chars);
568 return next_.literal_chars->one_byte_literal();
569 }
570 Vector<const uint16_t> next_literal_two_byte_string() {
571 DCHECK_NOT_NULL(next_.literal_chars);
572 return next_.literal_chars->two_byte_literal();
573 }
574 bool is_next_literal_one_byte() {
575 DCHECK_NOT_NULL(next_.literal_chars);
576 return next_.literal_chars->is_one_byte();
577 }
578 int next_literal_length() const {
579 DCHECK_NOT_NULL(next_.literal_chars);
580 return next_.literal_chars->length();
581 }
582
Ben Murdoch589d6972011-11-30 16:04:58 +0000583 uc32 ScanHexNumber(int expected_length);
584
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100585 // Scans a single JavaScript token.
586 void Scan();
587
588 bool SkipWhiteSpace();
589 Token::Value SkipSingleLineComment();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000590 Token::Value SkipSourceURLComment();
591 void TryToParseSourceURLComment();
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100592 Token::Value SkipMultiLineComment();
593 // Scans a possible HTML comment -- begins with '<!'.
594 Token::Value ScanHtmlComment();
595
596 void ScanDecimalDigits();
597 Token::Value ScanNumber(bool seen_period);
598 Token::Value ScanIdentifierOrKeyword();
599 Token::Value ScanIdentifierSuffix(LiteralScope* literal);
600
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100601 Token::Value ScanString();
602
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000603 // Scans an escape-sequence which is part of a string and adds the
604 // decoded character to the current literal. Returns true if a pattern
605 // is scanned.
606 bool ScanEscape();
607 // Decodes a Unicode escape-sequence which is part of an identifier.
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100608 // If the escape sequence cannot be decoded the result is kBadChar.
609 uc32 ScanIdentifierUnicodeEscape();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000610 // Scans a Unicode escape-sequence and adds its characters,
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100611 // uninterpreted, to the current literal. Used for parsing RegExp
612 // flags.
613 bool ScanLiteralUnicodeEscape();
614
Ben Murdoch589d6972011-11-30 16:04:58 +0000615 // Return the current source position.
616 int source_pos() {
617 return source_->pos() - kCharacterLookaheadBufferSize;
618 }
619
620 UnicodeCache* unicode_cache_;
621
622 // Buffers collecting literal strings, numbers, etc.
623 LiteralBuffer literal_buffer1_;
624 LiteralBuffer literal_buffer2_;
625
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000626 // Values parsed from magic comments.
627 LiteralBuffer source_url_;
628 LiteralBuffer source_mapping_url_;
629
Ben Murdoch589d6972011-11-30 16:04:58 +0000630 TokenDesc current_; // desc for current token (as returned by Next())
631 TokenDesc next_; // desc for next token (one token look-ahead)
632
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100633 // Input stream. Must be initialized to an Utf16CharacterStream.
634 Utf16CharacterStream* source_;
Ben Murdoch589d6972011-11-30 16:04:58 +0000635
Ben Murdoch85b71792012-04-11 18:30:58 +0100636
637 // Start position of the octal literal last scanned.
638 Location octal_pos_;
Ben Murdoch592a9fc2012-03-05 11:04:45 +0000639
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100640 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
641 uc32 c0_;
642
Ben Murdoch589d6972011-11-30 16:04:58 +0000643 // Whether there is a line terminator whitespace character after
644 // the current token, and before the next. Does not count newlines
645 // inside multiline comments.
646 bool has_line_terminator_before_next_;
647 // Whether there is a multi-line comment that contains a
648 // line-terminator after the current token, and before the next.
649 bool has_multiline_comment_before_next_;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100650 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings.
651 bool harmony_scoping_;
652 // Whether we scan 'module', 'import', 'export' as keywords.
653 bool harmony_modules_;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000654 // Whether we scan 0o777 and 0b111 as numbers.
655 bool harmony_numeric_literals_;
656 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords.
657 bool harmony_classes_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000658};
659
Steve Blocka7e24c12009-10-30 11:49:00 +0000660} } // namespace v8::internal
661
662#endif // V8_SCANNER_H_