blob: 3f9bbb54a4bc698c9c3539dac7f7c77ae5e46ba0 [file] [log] [blame]
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Features shared by parsing and pre-parsing scanners.
6
7#ifndef V8_PARSING_SCANNER_H_
8#define V8_PARSING_SCANNER_H_
9
10#include "src/allocation.h"
11#include "src/base/logging.h"
12#include "src/char-predicates.h"
13#include "src/globals.h"
14#include "src/hashmap.h"
15#include "src/list.h"
16#include "src/parsing/token.h"
17#include "src/unicode.h"
18#include "src/unicode-decoder.h"
19#include "src/utils.h"
20
21namespace v8 {
22namespace internal {
23
24
25class AstRawString;
26class AstValueFactory;
27class ParserRecorder;
28class UnicodeCache;
29
30
31// ---------------------------------------------------------------------
32// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
33// A code unit is a 16 bit value representing either a 16 bit code point
34// or one part of a surrogate pair that make a single 21 bit code point.
35
36class Utf16CharacterStream {
37 public:
38 Utf16CharacterStream() : pos_(0) { }
39 virtual ~Utf16CharacterStream() { }
40
41 // Returns and advances past the next UTF-16 code unit in the input
42 // stream. If there are no more code units, it returns a negative
43 // value.
44 inline uc32 Advance() {
45 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
46 pos_++;
47 return static_cast<uc32>(*(buffer_cursor_++));
48 }
49 // Note: currently the following increment is necessary to avoid a
50 // parser problem! The scanner treats the final kEndOfInput as
51 // a code unit with a position, and does math relative to that
52 // position.
53 pos_++;
54
55 return kEndOfInput;
56 }
57
58 // Return the current position in the code unit stream.
59 // Starts at zero.
60 inline size_t pos() const { return pos_; }
61
62 // Skips forward past the next code_unit_count UTF-16 code units
63 // in the input, or until the end of input if that comes sooner.
64 // Returns the number of code units actually skipped. If less
65 // than code_unit_count,
66 inline size_t SeekForward(size_t code_unit_count) {
67 size_t buffered_chars = buffer_end_ - buffer_cursor_;
68 if (code_unit_count <= buffered_chars) {
69 buffer_cursor_ += code_unit_count;
70 pos_ += code_unit_count;
71 return code_unit_count;
72 }
73 return SlowSeekForward(code_unit_count);
74 }
75
76 // Pushes back the most recently read UTF-16 code unit (or negative
77 // value if at end of input), i.e., the value returned by the most recent
78 // call to Advance.
79 // Must not be used right after calling SeekForward.
80 virtual void PushBack(int32_t code_unit) = 0;
81
82 virtual bool SetBookmark();
83 virtual void ResetToBookmark();
84
85 protected:
86 static const uc32 kEndOfInput = -1;
87
88 // Ensures that the buffer_cursor_ points to the code_unit at
89 // position pos_ of the input, if possible. If the position
90 // is at or after the end of the input, return false. If there
91 // are more code_units available, return true.
92 virtual bool ReadBlock() = 0;
93 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
94
95 const uint16_t* buffer_cursor_;
96 const uint16_t* buffer_end_;
97 size_t pos_;
98};
99
100
101// ---------------------------------------------------------------------
102// DuplicateFinder discovers duplicate symbols.
103
104class DuplicateFinder {
105 public:
106 explicit DuplicateFinder(UnicodeCache* constants)
107 : unicode_constants_(constants),
108 backing_store_(16),
109 map_(&Match) { }
110
111 int AddOneByteSymbol(Vector<const uint8_t> key, int value);
112 int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
113 // Add a a number literal by converting it (if necessary)
114 // to the string that ToString(ToNumber(literal)) would generate.
115 // and then adding that string with AddOneByteSymbol.
116 // This string is the actual value used as key in an object literal,
117 // and the one that must be different from the other keys.
118 int AddNumber(Vector<const uint8_t> key, int value);
119
120 private:
121 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
122 // Backs up the key and its length in the backing store.
123 // The backup is stored with a base 127 encoding of the
124 // length (plus a bit saying whether the string is one byte),
125 // followed by the bytes of the key.
126 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
127
128 // Compare two encoded keys (both pointing into the backing store)
129 // for having the same base-127 encoded lengths and representation.
130 // and then having the same 'length' bytes following.
131 static bool Match(void* first, void* second);
132 // Creates a hash from a sequence of bytes.
133 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
134 // Checks whether a string containing a JS number is its canonical
135 // form.
136 static bool IsNumberCanonical(Vector<const uint8_t> key);
137
138 // Size of buffer. Sufficient for using it to call DoubleToCString in
139 // from conversions.h.
140 static const int kBufferSize = 100;
141
142 UnicodeCache* unicode_constants_;
143 // Backing store used to store strings used as hashmap keys.
144 SequenceCollector<unsigned char> backing_store_;
145 HashMap map_;
146 // Buffer used for string->number->canonical string conversions.
147 char number_buffer_[kBufferSize];
148};
149
150
151// ----------------------------------------------------------------------------
152// LiteralBuffer - Collector of chars of literals.
153
154class LiteralBuffer {
155 public:
156 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
157
158 ~LiteralBuffer() { backing_store_.Dispose(); }
159
160 INLINE(void AddChar(uint32_t code_unit)) {
161 if (position_ >= backing_store_.length()) ExpandBuffer();
162 if (is_one_byte_) {
163 if (code_unit <= unibrow::Latin1::kMaxChar) {
164 backing_store_[position_] = static_cast<byte>(code_unit);
165 position_ += kOneByteSize;
166 return;
167 }
168 ConvertToTwoByte();
169 }
170 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
171 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
172 position_ += kUC16Size;
173 } else {
174 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
175 unibrow::Utf16::LeadSurrogate(code_unit);
176 position_ += kUC16Size;
177 if (position_ >= backing_store_.length()) ExpandBuffer();
178 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
179 unibrow::Utf16::TrailSurrogate(code_unit);
180 position_ += kUC16Size;
181 }
182 }
183
184 bool is_one_byte() const { return is_one_byte_; }
185
186 bool is_contextual_keyword(Vector<const char> keyword) const {
187 return is_one_byte() && keyword.length() == position_ &&
188 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
189 }
190
191 Vector<const uint16_t> two_byte_literal() const {
192 DCHECK(!is_one_byte_);
193 DCHECK((position_ & 0x1) == 0);
194 return Vector<const uint16_t>(
195 reinterpret_cast<const uint16_t*>(backing_store_.start()),
196 position_ >> 1);
197 }
198
199 Vector<const uint8_t> one_byte_literal() const {
200 DCHECK(is_one_byte_);
201 return Vector<const uint8_t>(
202 reinterpret_cast<const uint8_t*>(backing_store_.start()),
203 position_);
204 }
205
206 int length() const {
207 return is_one_byte_ ? position_ : (position_ >> 1);
208 }
209
210 void ReduceLength(int delta) {
211 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
212 }
213
214 void Reset() {
215 position_ = 0;
216 is_one_byte_ = true;
217 }
218
219 Handle<String> Internalize(Isolate* isolate) const;
220
221 void CopyFrom(const LiteralBuffer* other) {
222 if (other == nullptr) {
223 Reset();
224 } else {
225 is_one_byte_ = other->is_one_byte_;
226 position_ = other->position_;
227 backing_store_.Dispose();
228 backing_store_ = other->backing_store_.Clone();
229 }
230 }
231
232 private:
233 static const int kInitialCapacity = 16;
234 static const int kGrowthFactory = 4;
235 static const int kMinConversionSlack = 256;
236 static const int kMaxGrowth = 1 * MB;
237 inline int NewCapacity(int min_capacity) {
238 int capacity = Max(min_capacity, backing_store_.length());
239 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
240 return new_capacity;
241 }
242
243 void ExpandBuffer() {
244 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
245 MemCopy(new_store.start(), backing_store_.start(), position_);
246 backing_store_.Dispose();
247 backing_store_ = new_store;
248 }
249
250 void ConvertToTwoByte() {
251 DCHECK(is_one_byte_);
252 Vector<byte> new_store;
253 int new_content_size = position_ * kUC16Size;
254 if (new_content_size >= backing_store_.length()) {
255 // Ensure room for all currently read code units as UC16 as well
256 // as the code unit about to be stored.
257 new_store = Vector<byte>::New(NewCapacity(new_content_size));
258 } else {
259 new_store = backing_store_;
260 }
261 uint8_t* src = backing_store_.start();
262 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
263 for (int i = position_ - 1; i >= 0; i--) {
264 dst[i] = src[i];
265 }
266 if (new_store.start() != backing_store_.start()) {
267 backing_store_.Dispose();
268 backing_store_ = new_store;
269 }
270 position_ = new_content_size;
271 is_one_byte_ = false;
272 }
273
274 bool is_one_byte_;
275 int position_;
276 Vector<byte> backing_store_;
277
278 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
279};
280
281
282// ----------------------------------------------------------------------------
283// JavaScript Scanner.
284
285class Scanner {
286 public:
287 // Scoped helper for literal recording. Automatically drops the literal
288 // if aborting the scanning before it's complete.
289 class LiteralScope {
290 public:
291 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
292 scanner_->StartLiteral();
293 }
294 ~LiteralScope() {
295 if (!complete_) scanner_->DropLiteral();
296 }
297 void Complete() {
298 complete_ = true;
299 }
300
301 private:
302 Scanner* scanner_;
303 bool complete_;
304 };
305
306 // Scoped helper for a re-settable bookmark.
307 class BookmarkScope {
308 public:
309 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) {
310 DCHECK_NOT_NULL(scanner_);
311 }
312 ~BookmarkScope() { scanner_->DropBookmark(); }
313
314 bool Set() { return scanner_->SetBookmark(); }
315 void Reset() { scanner_->ResetToBookmark(); }
316 bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); }
317 bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); }
318
319 private:
320 Scanner* scanner_;
321
322 DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
323 };
324
325 // Representation of an interval of source positions.
326 struct Location {
327 Location(int b, int e) : beg_pos(b), end_pos(e) { }
328 Location() : beg_pos(0), end_pos(0) { }
329
330 bool IsValid() const {
331 return beg_pos >= 0 && end_pos >= beg_pos;
332 }
333
334 static Location invalid() { return Location(-1, -1); }
335
336 int beg_pos;
337 int end_pos;
338 };
339
340 // -1 is outside of the range of any real source code.
341 static const int kNoOctalLocation = -1;
342
343 explicit Scanner(UnicodeCache* scanner_contants);
344
345 void Initialize(Utf16CharacterStream* source);
346
347 // Returns the next token and advances input.
348 Token::Value Next();
349 // Returns the token following peek()
350 Token::Value PeekAhead();
351 // Returns the current token again.
352 Token::Value current_token() { return current_.token; }
353 // Returns the location information for the current token
354 // (the token last returned by Next()).
355 Location location() const { return current_.location; }
356
357 // Similar functions for the upcoming token.
358
359 // One token look-ahead (past the token returned by Next()).
360 Token::Value peek() const { return next_.token; }
361
362 Location peek_location() const { return next_.location; }
363
364 bool literal_contains_escapes() const {
365 return LiteralContainsEscapes(current_);
366 }
367 bool next_literal_contains_escapes() const {
368 return LiteralContainsEscapes(next_);
369 }
370 bool is_literal_contextual_keyword(Vector<const char> keyword) {
371 DCHECK_NOT_NULL(current_.literal_chars);
372 return current_.literal_chars->is_contextual_keyword(keyword);
373 }
374 bool is_next_contextual_keyword(Vector<const char> keyword) {
375 DCHECK_NOT_NULL(next_.literal_chars);
376 return next_.literal_chars->is_contextual_keyword(keyword);
377 }
378
379 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
380 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
381 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
382
383 double DoubleValue();
384 bool ContainsDot();
385 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
386 if (is_literal_one_byte() &&
387 literal_length() == length &&
388 (allow_escapes || !literal_contains_escapes())) {
389 const char* token =
390 reinterpret_cast<const char*>(literal_one_byte_string().start());
391 return !strncmp(token, data, length);
392 }
393 return false;
394 }
395 inline bool UnescapedLiteralMatches(const char* data, int length) {
396 return LiteralMatches(data, length, false);
397 }
398
399 void IsGetOrSet(bool* is_get, bool* is_set) {
400 if (is_literal_one_byte() &&
401 literal_length() == 3 &&
402 !literal_contains_escapes()) {
403 const char* token =
404 reinterpret_cast<const char*>(literal_one_byte_string().start());
405 *is_get = strncmp(token, "get", 3) == 0;
406 *is_set = !*is_get && strncmp(token, "set", 3) == 0;
407 }
408 }
409
410 int FindSymbol(DuplicateFinder* finder, int value);
411
412 UnicodeCache* unicode_cache() { return unicode_cache_; }
413
414 // Returns the location of the last seen octal literal.
415 Location octal_position() const { return octal_pos_; }
416 void clear_octal_position() { octal_pos_ = Location::invalid(); }
417
418 // Returns the value of the last smi that was scanned.
419 int smi_value() const { return current_.smi_value_; }
420
421 // Seek forward to the given position. This operation does not
422 // work in general, for instance when there are pushed back
423 // characters, but works for seeking forward until simple delimiter
424 // tokens, which is what it is used for.
425 void SeekForward(int pos);
426
427 // Returns true if there was a line terminator before the peek'ed token,
428 // possibly inside a multi-line comment.
429 bool HasAnyLineTerminatorBeforeNext() const {
430 return has_line_terminator_before_next_ ||
431 has_multiline_comment_before_next_;
432 }
433
434 // Scans the input as a regular expression pattern, previous
435 // character(s) must be /(=). Returns true if a pattern is scanned.
436 bool ScanRegExpPattern(bool seen_equal);
437 // Scans the input as regular expression flags. Returns the flags on success.
438 Maybe<RegExp::Flags> ScanRegExpFlags();
439
440 // Scans the input as a template literal
441 Token::Value ScanTemplateStart();
442 Token::Value ScanTemplateContinuation();
443
444 const LiteralBuffer* source_url() const { return &source_url_; }
445 const LiteralBuffer* source_mapping_url() const {
446 return &source_mapping_url_;
447 }
448
449 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
450
Ben Murdoch097c5b22016-05-18 11:27:45 +0100451 bool FoundHtmlComment() const { return found_html_comment_; }
452
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000453 private:
454 // The current and look-ahead token.
455 struct TokenDesc {
456 Token::Value token;
457 Location location;
458 LiteralBuffer* literal_chars;
459 LiteralBuffer* raw_literal_chars;
460 int smi_value_;
461 };
462
463 static const int kCharacterLookaheadBufferSize = 1;
464
465 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
466 template <bool capture_raw>
467 uc32 ScanOctalEscape(uc32 c, int length);
468
469 // Call this after setting source_ to the input.
470 void Init() {
471 // Set c0_ (one character ahead)
472 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
473 Advance();
474 // Initialize current_ to not refer to a literal.
475 current_.literal_chars = NULL;
476 current_.raw_literal_chars = NULL;
477 next_next_.token = Token::UNINITIALIZED;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100478 found_html_comment_ = false;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000479 }
480
481 // Support BookmarkScope functionality.
482 bool SetBookmark();
483 void ResetToBookmark();
484 bool BookmarkHasBeenSet();
485 bool BookmarkHasBeenReset();
486 void DropBookmark();
487 static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);
488
489 // Literal buffer support
490 inline void StartLiteral() {
491 LiteralBuffer* free_buffer =
492 (current_.literal_chars == &literal_buffer0_)
493 ? &literal_buffer1_
494 : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
495 : &literal_buffer0_;
496 free_buffer->Reset();
497 next_.literal_chars = free_buffer;
498 }
499
500 inline void StartRawLiteral() {
501 LiteralBuffer* free_buffer =
502 (current_.raw_literal_chars == &raw_literal_buffer0_)
503 ? &raw_literal_buffer1_
504 : (current_.raw_literal_chars == &raw_literal_buffer1_)
505 ? &raw_literal_buffer2_
506 : &raw_literal_buffer0_;
507 free_buffer->Reset();
508 next_.raw_literal_chars = free_buffer;
509 }
510
511 INLINE(void AddLiteralChar(uc32 c)) {
512 DCHECK_NOT_NULL(next_.literal_chars);
513 next_.literal_chars->AddChar(c);
514 }
515
516 INLINE(void AddRawLiteralChar(uc32 c)) {
517 DCHECK_NOT_NULL(next_.raw_literal_chars);
518 next_.raw_literal_chars->AddChar(c);
519 }
520
521 INLINE(void ReduceRawLiteralLength(int delta)) {
522 DCHECK_NOT_NULL(next_.raw_literal_chars);
523 next_.raw_literal_chars->ReduceLength(delta);
524 }
525
526 // Stops scanning of a literal and drop the collected characters,
527 // e.g., due to an encountered error.
528 inline void DropLiteral() {
529 next_.literal_chars = NULL;
530 next_.raw_literal_chars = NULL;
531 }
532
533 inline void AddLiteralCharAdvance() {
534 AddLiteralChar(c0_);
535 Advance();
536 }
537
538 // Low-level scanning support.
539 template <bool capture_raw = false, bool check_surrogate = true>
540 void Advance() {
541 if (capture_raw) {
542 AddRawLiteralChar(c0_);
543 }
544 c0_ = source_->Advance();
545 if (check_surrogate) HandleLeadSurrogate();
546 }
547
548 void HandleLeadSurrogate() {
549 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
550 uc32 c1 = source_->Advance();
551 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
552 source_->PushBack(c1);
553 } else {
554 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
555 }
556 }
557 }
558
559 void PushBack(uc32 ch) {
560 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
561 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
562 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
563 } else {
564 source_->PushBack(c0_);
565 }
566 c0_ = ch;
567 }
568
569 inline Token::Value Select(Token::Value tok) {
570 Advance();
571 return tok;
572 }
573
574 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
575 Advance();
576 if (c0_ == next) {
577 Advance();
578 return then;
579 } else {
580 return else_;
581 }
582 }
583
584 // Returns the literal string, if any, for the current token (the
585 // token last returned by Next()). The string is 0-terminated.
586 // Literal strings are collected for identifiers, strings, numbers as well
587 // as for template literals. For template literals we also collect the raw
588 // form.
589 // These functions only give the correct result if the literal was scanned
590 // when a LiteralScope object is alive.
591 Vector<const uint8_t> literal_one_byte_string() {
592 DCHECK_NOT_NULL(current_.literal_chars);
593 return current_.literal_chars->one_byte_literal();
594 }
595 Vector<const uint16_t> literal_two_byte_string() {
596 DCHECK_NOT_NULL(current_.literal_chars);
597 return current_.literal_chars->two_byte_literal();
598 }
599 bool is_literal_one_byte() {
600 DCHECK_NOT_NULL(current_.literal_chars);
601 return current_.literal_chars->is_one_byte();
602 }
603 int literal_length() const {
604 DCHECK_NOT_NULL(current_.literal_chars);
605 return current_.literal_chars->length();
606 }
607 // Returns the literal string for the next token (the token that
608 // would be returned if Next() were called).
609 Vector<const uint8_t> next_literal_one_byte_string() {
610 DCHECK_NOT_NULL(next_.literal_chars);
611 return next_.literal_chars->one_byte_literal();
612 }
613 Vector<const uint16_t> next_literal_two_byte_string() {
614 DCHECK_NOT_NULL(next_.literal_chars);
615 return next_.literal_chars->two_byte_literal();
616 }
617 bool is_next_literal_one_byte() {
618 DCHECK_NOT_NULL(next_.literal_chars);
619 return next_.literal_chars->is_one_byte();
620 }
621 Vector<const uint8_t> raw_literal_one_byte_string() {
622 DCHECK_NOT_NULL(current_.raw_literal_chars);
623 return current_.raw_literal_chars->one_byte_literal();
624 }
625 Vector<const uint16_t> raw_literal_two_byte_string() {
626 DCHECK_NOT_NULL(current_.raw_literal_chars);
627 return current_.raw_literal_chars->two_byte_literal();
628 }
629 bool is_raw_literal_one_byte() {
630 DCHECK_NOT_NULL(current_.raw_literal_chars);
631 return current_.raw_literal_chars->is_one_byte();
632 }
633
634 template <bool capture_raw>
635 uc32 ScanHexNumber(int expected_length);
636 // Scan a number of any length but not bigger than max_value. For example, the
637 // number can be 000000001, so it's very long in characters but its value is
638 // small.
639 template <bool capture_raw>
640 uc32 ScanUnlimitedLengthHexNumber(int max_value);
641
642 // Scans a single JavaScript token.
643 void Scan();
644
645 bool SkipWhiteSpace();
646 Token::Value SkipSingleLineComment();
647 Token::Value SkipSourceURLComment();
648 void TryToParseSourceURLComment();
649 Token::Value SkipMultiLineComment();
650 // Scans a possible HTML comment -- begins with '<!'.
651 Token::Value ScanHtmlComment();
652
653 void ScanDecimalDigits();
654 Token::Value ScanNumber(bool seen_period);
655 Token::Value ScanIdentifierOrKeyword();
656 Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
657
658 Token::Value ScanString();
659
660 // Scans an escape-sequence which is part of a string and adds the
661 // decoded character to the current literal. Returns true if a pattern
662 // is scanned.
663 template <bool capture_raw, bool in_template_literal>
664 bool ScanEscape();
665
666 // Decodes a Unicode escape-sequence which is part of an identifier.
667 // If the escape sequence cannot be decoded the result is kBadChar.
668 uc32 ScanIdentifierUnicodeEscape();
669 // Helper for the above functions.
670 template <bool capture_raw>
671 uc32 ScanUnicodeEscape();
672
673 Token::Value ScanTemplateSpan();
674
675 // Return the current source position.
676 int source_pos() {
677 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
678 }
679
680 static bool LiteralContainsEscapes(const TokenDesc& token) {
681 Location location = token.location;
682 int source_length = (location.end_pos - location.beg_pos);
683 if (token.token == Token::STRING) {
684 // Subtract delimiters.
685 source_length -= 2;
686 }
687 return token.literal_chars->length() != source_length;
688 }
689
690 UnicodeCache* unicode_cache_;
691
692 // Buffers collecting literal strings, numbers, etc.
693 LiteralBuffer literal_buffer0_;
694 LiteralBuffer literal_buffer1_;
695 LiteralBuffer literal_buffer2_;
696
697 // Values parsed from magic comments.
698 LiteralBuffer source_url_;
699 LiteralBuffer source_mapping_url_;
700
701 // Buffer to store raw string values
702 LiteralBuffer raw_literal_buffer0_;
703 LiteralBuffer raw_literal_buffer1_;
704 LiteralBuffer raw_literal_buffer2_;
705
706 TokenDesc current_; // desc for current token (as returned by Next())
707 TokenDesc next_; // desc for next token (one token look-ahead)
708 TokenDesc next_next_; // desc for the token after next (after PeakAhead())
709
710 // Variables for Scanner::BookmarkScope and the *Bookmark implementation.
711 // These variables contain the scanner state when a bookmark is set.
712 //
713 // We will use bookmark_c0_ as a 'control' variable, where:
714 // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_.
715 // - bookmark_c0_ == -1: No bookmark has been set.
716 // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark).
717 //
718 // Which state is being bookmarked? The parser state is distributed over
719 // several variables, roughly like this:
720 // ... 1234 + 5678 ..... [character stream]
721 // [current_] [next_] c0_ | [scanner state]
722 // So when the scanner is logically at the beginning of an expression
723 // like "1234 + 4567", then:
724 // - current_ contains "1234"
725 // - next_ contains "+"
726 // - c0_ contains ' ' (the space between "+" and "5678",
727 // - the source_ character stream points to the beginning of "5678".
728 // To be able to restore this state, we will keep copies of current_, next_,
729 // and c0_; we'll ask the stream to bookmark itself, and we'll copy the
730 // contents of current_'s and next_'s literal buffers to bookmark_*_literal_.
731 static const uc32 kNoBookmark = -1;
732 static const uc32 kBookmarkWasApplied = -2;
733 uc32 bookmark_c0_;
734 TokenDesc bookmark_current_;
735 TokenDesc bookmark_next_;
736 LiteralBuffer bookmark_current_literal_;
737 LiteralBuffer bookmark_current_raw_literal_;
738 LiteralBuffer bookmark_next_literal_;
739 LiteralBuffer bookmark_next_raw_literal_;
740
741 // Input stream. Must be initialized to an Utf16CharacterStream.
742 Utf16CharacterStream* source_;
743
744
745 // Start position of the octal literal last scanned.
746 Location octal_pos_;
747
748 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
749 uc32 c0_;
750
751 // Whether there is a line terminator whitespace character after
752 // the current token, and before the next. Does not count newlines
753 // inside multiline comments.
754 bool has_line_terminator_before_next_;
755 // Whether there is a multi-line comment that contains a
756 // line-terminator after the current token, and before the next.
757 bool has_multiline_comment_before_next_;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100758
759 // Whether this scanner encountered an HTML comment.
760 bool found_html_comment_;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000761};
762
763} // namespace internal
764} // namespace v8
765
766#endif // V8_PARSING_SCANNER_H_