blob: 6e668fd4921ca23e7dbbcd1755404c8269c20bd7 [file] [log] [blame]
Ben Murdoch8b112d22011-06-08 16:22:53 +01001// Copyright 2011 the V8 project authors. All rights reserved.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Steve Blocka7e24c12009-10-30 11:49:00 +00004
Ben Murdoch589d6972011-11-30 16:04:58 +00005// Features shared by parsing and pre-parsing scanners.
6
Steve Blocka7e24c12009-10-30 11:49:00 +00007#ifndef V8_SCANNER_H_
8#define V8_SCANNER_H_
9
Ben Murdochb8a8cc12014-11-26 15:28:44 +000010#include "src/allocation.h"
11#include "src/base/logging.h"
12#include "src/char-predicates.h"
13#include "src/globals.h"
14#include "src/hashmap.h"
15#include "src/list.h"
16#include "src/token.h"
17#include "src/unicode-inl.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040018#include "src/unicode-decoder.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000019#include "src/utils.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000020
21namespace v8 {
22namespace internal {
23
Ben Murdoch3ef787d2012-04-12 10:51:47 +010024
Ben Murdochb8a8cc12014-11-26 15:28:44 +000025class AstRawString;
26class AstValueFactory;
27class ParserRecorder;
Ben Murdoch3ef787d2012-04-12 10:51:47 +010028
29
Ben Murdoch589d6972011-11-30 16:04:58 +000030// Returns the value (0 .. 15) of a hexadecimal character c.
31// If c is not a legal hexadecimal character, returns a value < 0.
32inline int HexValue(uc32 c) {
33 c -= '0';
34 if (static_cast<unsigned>(c) <= 9) return c;
35 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
36 if (static_cast<unsigned>(c) <= 5) return c + 10;
37 return -1;
38}
Steve Blocka7e24c12009-10-30 11:49:00 +000039
Ben Murdoch589d6972011-11-30 16:04:58 +000040
41// ---------------------------------------------------------------------
Ben Murdoch3ef787d2012-04-12 10:51:47 +010042// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
43// A code unit is a 16 bit value representing either a 16 bit code point
44// or one part of a surrogate pair that make a single 21 bit code point.
Ben Murdoch589d6972011-11-30 16:04:58 +000045
Ben Murdoch3ef787d2012-04-12 10:51:47 +010046class Utf16CharacterStream {
Ben Murdoch589d6972011-11-30 16:04:58 +000047 public:
Ben Murdoch3ef787d2012-04-12 10:51:47 +010048 Utf16CharacterStream() : pos_(0) { }
49 virtual ~Utf16CharacterStream() { }
Ben Murdoch589d6972011-11-30 16:04:58 +000050
Ben Murdoch3ef787d2012-04-12 10:51:47 +010051 // Returns and advances past the next UTF-16 code unit in the input
52 // stream. If there are no more code units, it returns a negative
Ben Murdoch589d6972011-11-30 16:04:58 +000053 // value.
54 inline uc32 Advance() {
55 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
56 pos_++;
57 return static_cast<uc32>(*(buffer_cursor_++));
58 }
59 // Note: currently the following increment is necessary to avoid a
60 // parser problem! The scanner treats the final kEndOfInput as
Ben Murdoch3ef787d2012-04-12 10:51:47 +010061 // a code unit with a position, and does math relative to that
Ben Murdoch589d6972011-11-30 16:04:58 +000062 // position.
63 pos_++;
64
65 return kEndOfInput;
66 }
67
Ben Murdoch3ef787d2012-04-12 10:51:47 +010068 // Return the current position in the code unit stream.
Ben Murdoch589d6972011-11-30 16:04:58 +000069 // Starts at zero.
70 inline unsigned pos() const { return pos_; }
71
Ben Murdoch3ef787d2012-04-12 10:51:47 +010072 // Skips forward past the next code_unit_count UTF-16 code units
Ben Murdoch589d6972011-11-30 16:04:58 +000073 // in the input, or until the end of input if that comes sooner.
Ben Murdoch3ef787d2012-04-12 10:51:47 +010074 // Returns the number of code units actually skipped. If less
75 // than code_unit_count,
76 inline unsigned SeekForward(unsigned code_unit_count) {
Ben Murdoch589d6972011-11-30 16:04:58 +000077 unsigned buffered_chars =
78 static_cast<unsigned>(buffer_end_ - buffer_cursor_);
Ben Murdoch3ef787d2012-04-12 10:51:47 +010079 if (code_unit_count <= buffered_chars) {
80 buffer_cursor_ += code_unit_count;
81 pos_ += code_unit_count;
82 return code_unit_count;
Ben Murdoch589d6972011-11-30 16:04:58 +000083 }
Ben Murdoch3ef787d2012-04-12 10:51:47 +010084 return SlowSeekForward(code_unit_count);
Ben Murdoch589d6972011-11-30 16:04:58 +000085 }
86
Ben Murdoch3ef787d2012-04-12 10:51:47 +010087 // Pushes back the most recently read UTF-16 code unit (or negative
Ben Murdoch589d6972011-11-30 16:04:58 +000088 // value if at end of input), i.e., the value returned by the most recent
89 // call to Advance.
90 // Must not be used right after calling SeekForward.
Ben Murdoch3ef787d2012-04-12 10:51:47 +010091 virtual void PushBack(int32_t code_unit) = 0;
Steve Blocka7e24c12009-10-30 11:49:00 +000092
Ben Murdochb0fe1622011-05-05 13:52:32 +010093 protected:
Ben Murdoch589d6972011-11-30 16:04:58 +000094 static const uc32 kEndOfInput = -1;
Ben Murdochb0fe1622011-05-05 13:52:32 +010095
Ben Murdoch3ef787d2012-04-12 10:51:47 +010096 // Ensures that the buffer_cursor_ points to the code_unit at
Ben Murdoch589d6972011-11-30 16:04:58 +000097 // position pos_ of the input, if possible. If the position
98 // is at or after the end of the input, return false. If there
Ben Murdoch3ef787d2012-04-12 10:51:47 +010099 // are more code_units available, return true.
Ben Murdoch589d6972011-11-30 16:04:58 +0000100 virtual bool ReadBlock() = 0;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100101 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100102
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000103 const uint16_t* buffer_cursor_;
104 const uint16_t* buffer_end_;
Ben Murdoch589d6972011-11-30 16:04:58 +0000105 unsigned pos_;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100106};
107
108
Ben Murdoch589d6972011-11-30 16:04:58 +0000109// ---------------------------------------------------------------------
110// Caching predicates used by scanners.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000111
112class UnicodeCache {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100113 public:
Ben Murdoch589d6972011-11-30 16:04:58 +0000114 UnicodeCache() {}
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000115 typedef unibrow::Utf8Decoder<512> Utf8Decoder;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100116
Ben Murdoch589d6972011-11-30 16:04:58 +0000117 StaticResource<Utf8Decoder>* utf8_decoder() {
118 return &utf8_decoder_;
119 }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100120
Ben Murdoch589d6972011-11-30 16:04:58 +0000121 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); }
122 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
123 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
124 bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000125 bool IsWhiteSpaceOrLineTerminator(unibrow::uchar c) {
126 return kIsWhiteSpaceOrLineTerminator.get(c);
127 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000128
129 private:
130 unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
131 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
132 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000133 unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
134 unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
135 kIsWhiteSpaceOrLineTerminator;
Ben Murdoch589d6972011-11-30 16:04:58 +0000136 StaticResource<Utf8Decoder> utf8_decoder_;
137
138 DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
Ben Murdochb0fe1622011-05-05 13:52:32 +0100139};
140
141
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000142// ---------------------------------------------------------------------
143// DuplicateFinder discovers duplicate symbols.
144
145class DuplicateFinder {
146 public:
147 explicit DuplicateFinder(UnicodeCache* constants)
148 : unicode_constants_(constants),
149 backing_store_(16),
150 map_(&Match) { }
151
152 int AddOneByteSymbol(Vector<const uint8_t> key, int value);
153 int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
154 // Add a a number literal by converting it (if necessary)
155 // to the string that ToString(ToNumber(literal)) would generate.
156 // and then adding that string with AddOneByteSymbol.
157 // This string is the actual value used as key in an object literal,
158 // and the one that must be different from the other keys.
159 int AddNumber(Vector<const uint8_t> key, int value);
160
161 private:
162 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
163 // Backs up the key and its length in the backing store.
164 // The backup is stored with a base 127 encoding of the
165 // length (plus a bit saying whether the string is one byte),
166 // followed by the bytes of the key.
167 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
168
169 // Compare two encoded keys (both pointing into the backing store)
170 // for having the same base-127 encoded lengths and representation.
171 // and then having the same 'length' bytes following.
172 static bool Match(void* first, void* second);
173 // Creates a hash from a sequence of bytes.
174 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
175 // Checks whether a string containing a JS number is its canonical
176 // form.
177 static bool IsNumberCanonical(Vector<const uint8_t> key);
178
179 // Size of buffer. Sufficient for using it to call DoubleToCString in
180 // from conversions.h.
181 static const int kBufferSize = 100;
182
183 UnicodeCache* unicode_constants_;
184 // Backing store used to store strings used as hashmap keys.
185 SequenceCollector<unsigned char> backing_store_;
186 HashMap map_;
187 // Buffer used for string->number->canonical string conversions.
188 char number_buffer_[kBufferSize];
189};
190
191
Ben Murdoch589d6972011-11-30 16:04:58 +0000192// ----------------------------------------------------------------------------
193// LiteralBuffer - Collector of chars of literals.
194
195class LiteralBuffer {
Ben Murdochb0fe1622011-05-05 13:52:32 +0100196 public:
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000197 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100198
Ben Murdoch589d6972011-11-30 16:04:58 +0000199 ~LiteralBuffer() {
200 if (backing_store_.length() > 0) {
201 backing_store_.Dispose();
202 }
203 }
Ben Murdochb0fe1622011-05-05 13:52:32 +0100204
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100205 INLINE(void AddChar(uint32_t code_unit)) {
Ben Murdoch589d6972011-11-30 16:04:58 +0000206 if (position_ >= backing_store_.length()) ExpandBuffer();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000207 if (is_one_byte_) {
208 if (code_unit <= unibrow::Latin1::kMaxChar) {
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100209 backing_store_[position_] = static_cast<byte>(code_unit);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000210 position_ += kOneByteSize;
Ben Murdoch589d6972011-11-30 16:04:58 +0000211 return;
212 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000213 ConvertToTwoByte();
Ben Murdoch589d6972011-11-30 16:04:58 +0000214 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400215 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
216 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
217 position_ += kUC16Size;
218 } else {
219 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
220 unibrow::Utf16::LeadSurrogate(code_unit);
221 position_ += kUC16Size;
222 if (position_ >= backing_store_.length()) ExpandBuffer();
223 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
224 unibrow::Utf16::TrailSurrogate(code_unit);
225 position_ += kUC16Size;
226 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000227 }
228
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000229 bool is_one_byte() const { return is_one_byte_; }
Ben Murdoch589d6972011-11-30 16:04:58 +0000230
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000231 bool is_contextual_keyword(Vector<const char> keyword) const {
232 return is_one_byte() && keyword.length() == position_ &&
233 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
234 }
235
236 Vector<const uint16_t> two_byte_literal() const {
237 DCHECK(!is_one_byte_);
238 DCHECK((position_ & 0x1) == 0);
239 return Vector<const uint16_t>(
240 reinterpret_cast<const uint16_t*>(backing_store_.start()),
Ben Murdoch589d6972011-11-30 16:04:58 +0000241 position_ >> 1);
242 }
243
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000244 Vector<const uint8_t> one_byte_literal() const {
245 DCHECK(is_one_byte_);
246 return Vector<const uint8_t>(
247 reinterpret_cast<const uint8_t*>(backing_store_.start()),
Ben Murdoch589d6972011-11-30 16:04:58 +0000248 position_);
249 }
250
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000251 int length() const {
252 return is_one_byte_ ? position_ : (position_ >> 1);
Ben Murdoch589d6972011-11-30 16:04:58 +0000253 }
254
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400255 void ReduceLength(int delta) {
256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
257 }
258
Ben Murdoch589d6972011-11-30 16:04:58 +0000259 void Reset() {
260 position_ = 0;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000261 is_one_byte_ = true;
Ben Murdoch589d6972011-11-30 16:04:58 +0000262 }
263
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000264 Handle<String> Internalize(Isolate* isolate) const;
265
Ben Murdoch589d6972011-11-30 16:04:58 +0000266 private:
267 static const int kInitialCapacity = 16;
268 static const int kGrowthFactory = 4;
269 static const int kMinConversionSlack = 256;
270 static const int kMaxGrowth = 1 * MB;
271 inline int NewCapacity(int min_capacity) {
272 int capacity = Max(min_capacity, backing_store_.length());
273 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
274 return new_capacity;
275 }
276
277 void ExpandBuffer() {
278 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000279 MemCopy(new_store.start(), backing_store_.start(), position_);
Ben Murdoch589d6972011-11-30 16:04:58 +0000280 backing_store_.Dispose();
281 backing_store_ = new_store;
282 }
283
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000284 void ConvertToTwoByte() {
285 DCHECK(is_one_byte_);
Ben Murdoch589d6972011-11-30 16:04:58 +0000286 Vector<byte> new_store;
287 int new_content_size = position_ * kUC16Size;
288 if (new_content_size >= backing_store_.length()) {
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100289 // Ensure room for all currently read code units as UC16 as well
290 // as the code unit about to be stored.
Ben Murdoch589d6972011-11-30 16:04:58 +0000291 new_store = Vector<byte>::New(NewCapacity(new_content_size));
292 } else {
293 new_store = backing_store_;
294 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000295 uint8_t* src = backing_store_.start();
296 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
Ben Murdoch589d6972011-11-30 16:04:58 +0000297 for (int i = position_ - 1; i >= 0; i--) {
298 dst[i] = src[i];
299 }
300 if (new_store.start() != backing_store_.start()) {
301 backing_store_.Dispose();
302 backing_store_ = new_store;
303 }
304 position_ = new_content_size;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000305 is_one_byte_ = false;
Ben Murdoch589d6972011-11-30 16:04:58 +0000306 }
307
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000308 bool is_one_byte_;
Ben Murdoch589d6972011-11-30 16:04:58 +0000309 int position_;
310 Vector<byte> backing_store_;
311
312 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
Steve Blocka7e24c12009-10-30 11:49:00 +0000313};
314
315
Ben Murdoch589d6972011-11-30 16:04:58 +0000316// ----------------------------------------------------------------------------
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100317// JavaScript Scanner.
Steve Blocka7e24c12009-10-30 11:49:00 +0000318
Ben Murdoch589d6972011-11-30 16:04:58 +0000319class Scanner {
320 public:
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100321 // Scoped helper for literal recording. Automatically drops the literal
322 // if aborting the scanning before it's complete.
Ben Murdoch589d6972011-11-30 16:04:58 +0000323 class LiteralScope {
324 public:
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400325 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100326 scanner_->StartLiteral();
327 }
328 ~LiteralScope() {
329 if (!complete_) scanner_->DropLiteral();
330 }
331 void Complete() {
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100332 complete_ = true;
333 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000334
335 private:
336 Scanner* scanner_;
337 bool complete_;
338 };
339
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100340 // Representation of an interval of source positions.
Ben Murdoch589d6972011-11-30 16:04:58 +0000341 struct Location {
342 Location(int b, int e) : beg_pos(b), end_pos(e) { }
343 Location() : beg_pos(0), end_pos(0) { }
344
345 bool IsValid() const {
346 return beg_pos >= 0 && end_pos >= beg_pos;
347 }
348
349 static Location invalid() { return Location(-1, -1); }
350
351 int beg_pos;
352 int end_pos;
353 };
354
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100355 // -1 is outside of the range of any real source code.
356 static const int kNoOctalLocation = -1;
Ben Murdoch85b71792012-04-11 18:30:58 +0100357
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100358 explicit Scanner(UnicodeCache* scanner_contants);
359
360 void Initialize(Utf16CharacterStream* source);
361
362 // Returns the next token and advances input.
363 Token::Value Next();
364 // Returns the current token again.
365 Token::Value current_token() { return current_.token; }
366 // Returns the location information for the current token
367 // (the token last returned by Next()).
368 Location location() const { return current_.location; }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000369
370 // Similar functions for the upcoming token.
371
372 // One token look-ahead (past the token returned by Next()).
373 Token::Value peek() const { return next_.token; }
374
375 Location peek_location() const { return next_.location; }
Ben Murdoch589d6972011-11-30 16:04:58 +0000376
377 bool literal_contains_escapes() const {
378 Location location = current_.location;
379 int source_length = (location.end_pos - location.beg_pos);
380 if (current_.token == Token::STRING) {
381 // Subtract delimiters.
382 source_length -= 2;
383 }
384 return current_.literal_chars->length() != source_length;
385 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000386 bool is_literal_contextual_keyword(Vector<const char> keyword) {
387 DCHECK_NOT_NULL(current_.literal_chars);
388 return current_.literal_chars->is_contextual_keyword(keyword);
Ben Murdoch589d6972011-11-30 16:04:58 +0000389 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000390 bool is_next_contextual_keyword(Vector<const char> keyword) {
391 DCHECK_NOT_NULL(next_.literal_chars);
392 return next_.literal_chars->is_contextual_keyword(keyword);
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100393 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000394
395 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
396 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400397 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000398
399 double DoubleValue();
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400400 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000401 if (is_literal_one_byte() &&
402 literal_length() == length &&
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400403 (allow_escapes || !literal_contains_escapes())) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000404 const char* token =
405 reinterpret_cast<const char*>(literal_one_byte_string().start());
406 return !strncmp(token, data, length);
407 }
408 return false;
Ben Murdoch592a9fc2012-03-05 11:04:45 +0000409 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400410 inline bool UnescapedLiteralMatches(const char* data, int length) {
411 return LiteralMatches(data, length, false);
412 }
413
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000414 void IsGetOrSet(bool* is_get, bool* is_set) {
415 if (is_literal_one_byte() &&
416 literal_length() == 3 &&
417 !literal_contains_escapes()) {
418 const char* token =
419 reinterpret_cast<const char*>(literal_one_byte_string().start());
420 *is_get = strncmp(token, "get", 3) == 0;
421 *is_set = !*is_get && strncmp(token, "set", 3) == 0;
422 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000423 }
424
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000425 int FindNumber(DuplicateFinder* finder, int value);
426 int FindSymbol(DuplicateFinder* finder, int value);
427
Ben Murdoch589d6972011-11-30 16:04:58 +0000428 UnicodeCache* unicode_cache() { return unicode_cache_; }
429
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100430 // Returns the location of the last seen octal literal.
431 Location octal_position() const { return octal_pos_; }
432 void clear_octal_position() { octal_pos_ = Location::invalid(); }
433
434 // Seek forward to the given position. This operation does not
435 // work in general, for instance when there are pushed back
436 // characters, but works for seeking forward until simple delimiter
437 // tokens, which is what it is used for.
438 void SeekForward(int pos);
439
440 bool HarmonyScoping() const {
441 return harmony_scoping_;
442 }
443 void SetHarmonyScoping(bool scoping) {
444 harmony_scoping_ = scoping;
445 }
446 bool HarmonyModules() const {
447 return harmony_modules_;
448 }
449 void SetHarmonyModules(bool modules) {
450 harmony_modules_ = modules;
451 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000452 bool HarmonyNumericLiterals() const {
453 return harmony_numeric_literals_;
454 }
455 void SetHarmonyNumericLiterals(bool numeric_literals) {
456 harmony_numeric_literals_ = numeric_literals;
457 }
458 bool HarmonyClasses() const {
459 return harmony_classes_;
460 }
461 void SetHarmonyClasses(bool classes) {
462 harmony_classes_ = classes;
463 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400464 bool HarmonyTemplates() const { return harmony_templates_; }
465 void SetHarmonyTemplates(bool templates) { harmony_templates_ = templates; }
466 bool HarmonyUnicode() const { return harmony_unicode_; }
467 void SetHarmonyUnicode(bool unicode) { harmony_unicode_ = unicode; }
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100468
469 // Returns true if there was a line terminator before the peek'ed token,
470 // possibly inside a multi-line comment.
471 bool HasAnyLineTerminatorBeforeNext() const {
472 return has_line_terminator_before_next_ ||
473 has_multiline_comment_before_next_;
474 }
475
476 // Scans the input as a regular expression pattern, previous
477 // character(s) must be /(=). Returns true if a pattern is scanned.
478 bool ScanRegExpPattern(bool seen_equal);
479 // Returns true if regexp flags are scanned (always since flags can
480 // be empty).
481 bool ScanRegExpFlags();
482
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400483 // Scans the input as a template literal
484 Token::Value ScanTemplateStart();
485 Token::Value ScanTemplateContinuation();
486
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000487 const LiteralBuffer* source_url() const { return &source_url_; }
488 const LiteralBuffer* source_mapping_url() const {
489 return &source_mapping_url_;
490 }
491
492 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100493
494 private:
Ben Murdoch589d6972011-11-30 16:04:58 +0000495 // The current and look-ahead token.
496 struct TokenDesc {
497 Token::Value token;
498 Location location;
499 LiteralBuffer* literal_chars;
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400500 LiteralBuffer* raw_literal_chars;
Ben Murdoch589d6972011-11-30 16:04:58 +0000501 };
502
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000503 static const int kCharacterLookaheadBufferSize = 1;
504
505 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400506 template <bool capture_raw>
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000507 uc32 ScanOctalEscape(uc32 c, int length);
508
Ben Murdoch589d6972011-11-30 16:04:58 +0000509 // Call this after setting source_ to the input.
510 void Init() {
511 // Set c0_ (one character ahead)
512 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
513 Advance();
514 // Initialize current_ to not refer to a literal.
515 current_.literal_chars = NULL;
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400516 current_.raw_literal_chars = NULL;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100517 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000518
519 // Literal buffer support
520 inline void StartLiteral() {
521 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
522 &literal_buffer2_ : &literal_buffer1_;
523 free_buffer->Reset();
524 next_.literal_chars = free_buffer;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100525 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000526
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400527 inline void StartRawLiteral() {
528 raw_literal_buffer_.Reset();
529 next_.raw_literal_chars = &raw_literal_buffer_;
530 }
531
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100532 INLINE(void AddLiteralChar(uc32 c)) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000533 DCHECK_NOT_NULL(next_.literal_chars);
Ben Murdoch589d6972011-11-30 16:04:58 +0000534 next_.literal_chars->AddChar(c);
535 }
536
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400537 INLINE(void AddRawLiteralChar(uc32 c)) {
538 DCHECK_NOT_NULL(next_.raw_literal_chars);
539 next_.raw_literal_chars->AddChar(c);
540 }
541
542 INLINE(void ReduceRawLiteralLength(int delta)) {
543 DCHECK_NOT_NULL(next_.raw_literal_chars);
544 next_.raw_literal_chars->ReduceLength(delta);
Ben Murdoch589d6972011-11-30 16:04:58 +0000545 }
546
547 // Stops scanning of a literal and drop the collected characters,
548 // e.g., due to an encountered error.
549 inline void DropLiteral() {
550 next_.literal_chars = NULL;
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400551 next_.raw_literal_chars = NULL;
Ben Murdoch589d6972011-11-30 16:04:58 +0000552 }
553
554 inline void AddLiteralCharAdvance() {
555 AddLiteralChar(c0_);
556 Advance();
557 }
558
559 // Low-level scanning support.
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400560 template <bool capture_raw = false>
561 void Advance() {
562 if (capture_raw) {
563 AddRawLiteralChar(c0_);
564 }
565 c0_ = source_->Advance();
566 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
567 uc32 c1 = source_->Advance();
568 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
569 source_->PushBack(c1);
570 } else {
571 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
572 }
573 }
574 }
575
Ben Murdoch589d6972011-11-30 16:04:58 +0000576 void PushBack(uc32 ch) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400577 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
578 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
579 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
580 } else {
581 source_->PushBack(c0_);
582 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000583 c0_ = ch;
584 }
585
586 inline Token::Value Select(Token::Value tok) {
587 Advance();
588 return tok;
589 }
590
591 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
592 Advance();
593 if (c0_ == next) {
594 Advance();
595 return then;
596 } else {
597 return else_;
598 }
599 }
600
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000601 // Returns the literal string, if any, for the current token (the
602 // token last returned by Next()). The string is 0-terminated.
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400603 // Literal strings are collected for identifiers, strings, numbers as well
604 // as for template literals. For template literals we also collect the raw
605 // form.
606 // These functions only give the correct result if the literal was scanned
607 // when a LiteralScope object is alive.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000608 Vector<const uint8_t> literal_one_byte_string() {
609 DCHECK_NOT_NULL(current_.literal_chars);
610 return current_.literal_chars->one_byte_literal();
611 }
612 Vector<const uint16_t> literal_two_byte_string() {
613 DCHECK_NOT_NULL(current_.literal_chars);
614 return current_.literal_chars->two_byte_literal();
615 }
616 bool is_literal_one_byte() {
617 DCHECK_NOT_NULL(current_.literal_chars);
618 return current_.literal_chars->is_one_byte();
619 }
620 int literal_length() const {
621 DCHECK_NOT_NULL(current_.literal_chars);
622 return current_.literal_chars->length();
623 }
624 // Returns the literal string for the next token (the token that
625 // would be returned if Next() were called).
626 Vector<const uint8_t> next_literal_one_byte_string() {
627 DCHECK_NOT_NULL(next_.literal_chars);
628 return next_.literal_chars->one_byte_literal();
629 }
630 Vector<const uint16_t> next_literal_two_byte_string() {
631 DCHECK_NOT_NULL(next_.literal_chars);
632 return next_.literal_chars->two_byte_literal();
633 }
634 bool is_next_literal_one_byte() {
635 DCHECK_NOT_NULL(next_.literal_chars);
636 return next_.literal_chars->is_one_byte();
637 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400638 Vector<const uint8_t> raw_literal_one_byte_string() {
639 DCHECK_NOT_NULL(current_.raw_literal_chars);
640 return current_.raw_literal_chars->one_byte_literal();
641 }
642 Vector<const uint16_t> raw_literal_two_byte_string() {
643 DCHECK_NOT_NULL(current_.raw_literal_chars);
644 return current_.raw_literal_chars->two_byte_literal();
645 }
646 bool is_raw_literal_one_byte() {
647 DCHECK_NOT_NULL(current_.raw_literal_chars);
648 return current_.raw_literal_chars->is_one_byte();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000649 }
650
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400651 template <bool capture_raw>
Ben Murdoch589d6972011-11-30 16:04:58 +0000652 uc32 ScanHexNumber(int expected_length);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400653 // Scan a number of any length but not bigger than max_value. For example, the
654 // number can be 000000001, so it's very long in characters but its value is
655 // small.
656 template <bool capture_raw>
657 uc32 ScanUnlimitedLengthHexNumber(int max_value);
Ben Murdoch589d6972011-11-30 16:04:58 +0000658
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100659 // Scans a single JavaScript token.
660 void Scan();
661
662 bool SkipWhiteSpace();
663 Token::Value SkipSingleLineComment();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000664 Token::Value SkipSourceURLComment();
665 void TryToParseSourceURLComment();
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100666 Token::Value SkipMultiLineComment();
667 // Scans a possible HTML comment -- begins with '<!'.
668 Token::Value ScanHtmlComment();
669
670 void ScanDecimalDigits();
671 Token::Value ScanNumber(bool seen_period);
672 Token::Value ScanIdentifierOrKeyword();
673 Token::Value ScanIdentifierSuffix(LiteralScope* literal);
674
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100675 Token::Value ScanString();
676
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000677 // Scans an escape-sequence which is part of a string and adds the
678 // decoded character to the current literal. Returns true if a pattern
679 // is scanned.
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400680 template <bool capture_raw, bool in_template_literal>
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000681 bool ScanEscape();
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400682
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000683 // Decodes a Unicode escape-sequence which is part of an identifier.
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100684 // If the escape sequence cannot be decoded the result is kBadChar.
685 uc32 ScanIdentifierUnicodeEscape();
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400686 // Helper for the above functions.
687 template <bool capture_raw>
688 uc32 ScanUnicodeEscape();
689
690 Token::Value ScanTemplateSpan();
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100691
Ben Murdoch589d6972011-11-30 16:04:58 +0000692 // Return the current source position.
693 int source_pos() {
694 return source_->pos() - kCharacterLookaheadBufferSize;
695 }
696
697 UnicodeCache* unicode_cache_;
698
699 // Buffers collecting literal strings, numbers, etc.
700 LiteralBuffer literal_buffer1_;
701 LiteralBuffer literal_buffer2_;
702
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000703 // Values parsed from magic comments.
704 LiteralBuffer source_url_;
705 LiteralBuffer source_mapping_url_;
706
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400707 // Buffer to store raw string values
708 LiteralBuffer raw_literal_buffer_;
709
Ben Murdoch589d6972011-11-30 16:04:58 +0000710 TokenDesc current_; // desc for current token (as returned by Next())
711 TokenDesc next_; // desc for next token (one token look-ahead)
712
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100713 // Input stream. Must be initialized to an Utf16CharacterStream.
714 Utf16CharacterStream* source_;
Ben Murdoch589d6972011-11-30 16:04:58 +0000715
Ben Murdoch85b71792012-04-11 18:30:58 +0100716
717 // Start position of the octal literal last scanned.
718 Location octal_pos_;
Ben Murdoch592a9fc2012-03-05 11:04:45 +0000719
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100720 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
721 uc32 c0_;
722
Ben Murdoch589d6972011-11-30 16:04:58 +0000723 // Whether there is a line terminator whitespace character after
724 // the current token, and before the next. Does not count newlines
725 // inside multiline comments.
726 bool has_line_terminator_before_next_;
727 // Whether there is a multi-line comment that contains a
728 // line-terminator after the current token, and before the next.
729 bool has_multiline_comment_before_next_;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100730 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings.
731 bool harmony_scoping_;
732 // Whether we scan 'module', 'import', 'export' as keywords.
733 bool harmony_modules_;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000734 // Whether we scan 0o777 and 0b111 as numbers.
735 bool harmony_numeric_literals_;
736 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords.
737 bool harmony_classes_;
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400738 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL
739 bool harmony_templates_;
740 // Whether we allow \u{xxxxx}.
741 bool harmony_unicode_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000742};
743
Steve Blocka7e24c12009-10-30 11:49:00 +0000744} } // namespace v8::internal
745
746#endif // V8_SCANNER_H_