blob: 4de413b88538f52216ae8a907bdb8d7edb27f0ad [file] [log] [blame]
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +00001// Copyright 2011 the V8 project authors. All rights reserved.
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ricow@chromium.org55ee8072011-09-08 16:33:10 +000028// Features shared by parsing and pre-parsing scanners.
29
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000030#ifndef V8_SCANNER_H_
31#define V8_SCANNER_H_
32
ricow@chromium.org55ee8072011-09-08 16:33:10 +000033#include "allocation.h"
34#include "char-predicates.h"
35#include "checks.h"
36#include "globals.h"
37#include "token.h"
38#include "unicode-inl.h"
39#include "utils.h"
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000040
kasperl@chromium.org71affb52009-05-26 05:44:31 +000041namespace v8 {
42namespace internal {
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000043
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +000044
mstarzinger@chromium.org1b3afd12011-11-29 14:28:56 +000045// General collection of (multi-)bit-flags that can be passed to scanners and
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +000046// parsers to signify their (initial) mode of operation.
47enum ParsingFlags {
48 kNoParsingFlags = 0,
mstarzinger@chromium.org1b3afd12011-11-29 14:28:56 +000049 // Embed LanguageMode values in parsing flags, i.e., equivalent to:
50 // CLASSIC_MODE = 0,
51 // STRICT_MODE,
52 // EXTENDED_MODE,
53 kLanguageModeMask = 0x03,
yangguo@chromium.org78d1ad42012-02-09 13:53:47 +000054 kAllowLazy = 0x04,
55 kAllowNativesSyntax = 0x08,
56 kAllowModules = 0x10
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +000057};
58
mstarzinger@chromium.org1b3afd12011-11-29 14:28:56 +000059STATIC_ASSERT((kLanguageModeMask & CLASSIC_MODE) == CLASSIC_MODE);
60STATIC_ASSERT((kLanguageModeMask & STRICT_MODE) == STRICT_MODE);
61STATIC_ASSERT((kLanguageModeMask & EXTENDED_MODE) == EXTENDED_MODE);
62
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +000063
ricow@chromium.org55ee8072011-09-08 16:33:10 +000064// Returns the value (0 .. 15) of a hexadecimal character c.
65// If c is not a legal hexadecimal character, returns a value < 0.
66inline int HexValue(uc32 c) {
67 c -= '0';
68 if (static_cast<unsigned>(c) <= 9) return c;
69 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
70 if (static_cast<unsigned>(c) <= 5) return c + 10;
71 return -1;
72}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000073
ricow@chromium.org55ee8072011-09-08 16:33:10 +000074
75// ---------------------------------------------------------------------
yangguo@chromium.org154ff992012-03-13 08:09:54 +000076// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
77// A code unit is a 16 bit value representing either a 16 bit code point
78// or one part of a surrogate pair that make a single 21 bit code point.
ricow@chromium.org55ee8072011-09-08 16:33:10 +000079
yangguo@chromium.org154ff992012-03-13 08:09:54 +000080class Utf16CharacterStream {
ricow@chromium.org55ee8072011-09-08 16:33:10 +000081 public:
yangguo@chromium.org154ff992012-03-13 08:09:54 +000082 Utf16CharacterStream() : pos_(0) { }
83 virtual ~Utf16CharacterStream() { }
ricow@chromium.org55ee8072011-09-08 16:33:10 +000084
yangguo@chromium.org154ff992012-03-13 08:09:54 +000085 // Returns and advances past the next UTF-16 code unit in the input
86 // stream. If there are no more code units, it returns a negative
ricow@chromium.org55ee8072011-09-08 16:33:10 +000087 // value.
88 inline uc32 Advance() {
89 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
90 pos_++;
91 return static_cast<uc32>(*(buffer_cursor_++));
92 }
93 // Note: currently the following increment is necessary to avoid a
94 // parser problem! The scanner treats the final kEndOfInput as
yangguo@chromium.org154ff992012-03-13 08:09:54 +000095 // a code unit with a position, and does math relative to that
ricow@chromium.org55ee8072011-09-08 16:33:10 +000096 // position.
97 pos_++;
98
99 return kEndOfInput;
100 }
101
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000102 // Return the current position in the code unit stream.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000103 // Starts at zero.
104 inline unsigned pos() const { return pos_; }
105
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000106 // Skips forward past the next code_unit_count UTF-16 code units
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000107 // in the input, or until the end of input if that comes sooner.
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000108 // Returns the number of code units actually skipped. If less
109 // than code_unit_count,
110 inline unsigned SeekForward(unsigned code_unit_count) {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000111 unsigned buffered_chars =
112 static_cast<unsigned>(buffer_end_ - buffer_cursor_);
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000113 if (code_unit_count <= buffered_chars) {
114 buffer_cursor_ += code_unit_count;
115 pos_ += code_unit_count;
116 return code_unit_count;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000117 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000118 return SlowSeekForward(code_unit_count);
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000119 }
120
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000121 // Pushes back the most recently read UTF-16 code unit (or negative
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000122 // value if at end of input), i.e., the value returned by the most recent
123 // call to Advance.
124 // Must not be used right after calling SeekForward.
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000125 virtual void PushBack(int32_t code_unit) = 0;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000126
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000127 protected:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000128 static const uc32 kEndOfInput = -1;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000129
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000130 // Ensures that the buffer_cursor_ points to the code_unit at
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000131 // position pos_ of the input, if possible. If the position
132 // is at or after the end of the input, return false. If there
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000133 // are more code_units available, return true.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000134 virtual bool ReadBlock() = 0;
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000135 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000136
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000137 const uc16* buffer_cursor_;
138 const uc16* buffer_end_;
139 unsigned pos_;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000140};
141
142
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000143class UnicodeCache {
144// ---------------------------------------------------------------------
145// Caching predicates used by scanners.
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000146 public:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000147 UnicodeCache() {}
148 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000149
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000150 StaticResource<Utf8Decoder>* utf8_decoder() {
151 return &utf8_decoder_;
152 }
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000153
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000154 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); }
155 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
156 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
157 bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
158
159 private:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000160 unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
161 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
162 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
163 unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
164 StaticResource<Utf8Decoder> utf8_decoder_;
165
166 DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000167};
168
169
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000170// ----------------------------------------------------------------------------
171// LiteralBuffer - Collector of chars of literals.
172
173class LiteralBuffer {
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000174 public:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000175 LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000176
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000177 ~LiteralBuffer() {
178 if (backing_store_.length() > 0) {
179 backing_store_.Dispose();
180 }
181 }
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000182
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000183 INLINE(void AddChar(uint32_t code_unit)) {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000184 if (position_ >= backing_store_.length()) ExpandBuffer();
185 if (is_ascii_) {
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000186 if (code_unit < kMaxAsciiCharCodeU) {
187 backing_store_[position_] = static_cast<byte>(code_unit);
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000188 position_ += kASCIISize;
189 return;
190 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000191 ConvertToUtf16();
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000192 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000193 ASSERT(code_unit < 0x10000u);
194 *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000195 position_ += kUC16Size;
196 }
197
198 bool is_ascii() { return is_ascii_; }
199
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000200 Vector<const uc16> utf16_literal() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000201 ASSERT(!is_ascii_);
202 ASSERT((position_ & 0x1) == 0);
203 return Vector<const uc16>(
204 reinterpret_cast<const uc16*>(backing_store_.start()),
205 position_ >> 1);
206 }
207
208 Vector<const char> ascii_literal() {
209 ASSERT(is_ascii_);
210 return Vector<const char>(
211 reinterpret_cast<const char*>(backing_store_.start()),
212 position_);
213 }
214
215 int length() {
216 return is_ascii_ ? position_ : (position_ >> 1);
217 }
218
219 void Reset() {
220 position_ = 0;
221 is_ascii_ = true;
222 }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000223
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000224 private:
225 static const int kInitialCapacity = 16;
226 static const int kGrowthFactory = 4;
227 static const int kMinConversionSlack = 256;
228 static const int kMaxGrowth = 1 * MB;
229 inline int NewCapacity(int min_capacity) {
230 int capacity = Max(min_capacity, backing_store_.length());
231 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
232 return new_capacity;
233 }
234
235 void ExpandBuffer() {
236 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
237 memcpy(new_store.start(), backing_store_.start(), position_);
238 backing_store_.Dispose();
239 backing_store_ = new_store;
240 }
241
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000242 void ConvertToUtf16() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000243 ASSERT(is_ascii_);
244 Vector<byte> new_store;
245 int new_content_size = position_ * kUC16Size;
246 if (new_content_size >= backing_store_.length()) {
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000247 // Ensure room for all currently read code units as UC16 as well
248 // as the code unit about to be stored.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000249 new_store = Vector<byte>::New(NewCapacity(new_content_size));
250 } else {
251 new_store = backing_store_;
252 }
253 char* src = reinterpret_cast<char*>(backing_store_.start());
254 uc16* dst = reinterpret_cast<uc16*>(new_store.start());
255 for (int i = position_ - 1; i >= 0; i--) {
256 dst[i] = src[i];
257 }
258 if (new_store.start() != backing_store_.start()) {
259 backing_store_.Dispose();
260 backing_store_ = new_store;
261 }
262 position_ = new_content_size;
263 is_ascii_ = false;
264 }
265
266 bool is_ascii_;
267 int position_;
268 Vector<byte> backing_store_;
269
270 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000271};
272
273
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000274// ----------------------------------------------------------------------------
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000275// JavaScript Scanner.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000276
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000277class Scanner {
278 public:
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000279 // Scoped helper for literal recording. Automatically drops the literal
280 // if aborting the scanning before it's complete.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000281 class LiteralScope {
282 public:
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000283 explicit LiteralScope(Scanner* self)
284 : scanner_(self), complete_(false) {
285 scanner_->StartLiteral();
286 }
287 ~LiteralScope() {
288 if (!complete_) scanner_->DropLiteral();
289 }
290 void Complete() {
291 scanner_->TerminateLiteral();
292 complete_ = true;
293 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000294
295 private:
296 Scanner* scanner_;
297 bool complete_;
298 };
299
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000300 // Representation of an interval of source positions.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000301 struct Location {
302 Location(int b, int e) : beg_pos(b), end_pos(e) { }
303 Location() : beg_pos(0), end_pos(0) { }
304
305 bool IsValid() const {
306 return beg_pos >= 0 && end_pos >= beg_pos;
307 }
308
309 static Location invalid() { return Location(-1, -1); }
310
311 int beg_pos;
312 int end_pos;
313 };
314
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000315 // -1 is outside of the range of any real source code.
316 static const int kNoOctalLocation = -1;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000317
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000318 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
319
320 explicit Scanner(UnicodeCache* scanner_contants);
321
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000322 void Initialize(Utf16CharacterStream* source);
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000323
324 // Returns the next token and advances input.
325 Token::Value Next();
326 // Returns the current token again.
327 Token::Value current_token() { return current_.token; }
328 // Returns the location information for the current token
329 // (the token last returned by Next()).
330 Location location() const { return current_.location; }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000331 // Returns the literal string, if any, for the current token (the
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000332 // token last returned by Next()). The string is 0-terminated.
333 // Literal strings are collected for identifiers, strings, and
334 // numbers.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000335 // These functions only give the correct result if the literal
336 // was scanned between calls to StartLiteral() and TerminateLiteral().
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000337 Vector<const char> literal_ascii_string() {
338 ASSERT_NOT_NULL(current_.literal_chars);
339 return current_.literal_chars->ascii_literal();
340 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000341 Vector<const uc16> literal_utf16_string() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000342 ASSERT_NOT_NULL(current_.literal_chars);
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000343 return current_.literal_chars->utf16_literal();
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000344 }
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000345 bool is_literal_ascii() {
346 ASSERT_NOT_NULL(current_.literal_chars);
347 return current_.literal_chars->is_ascii();
348 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000349 int literal_length() const {
350 ASSERT_NOT_NULL(current_.literal_chars);
351 return current_.literal_chars->length();
352 }
353
354 bool literal_contains_escapes() const {
355 Location location = current_.location;
356 int source_length = (location.end_pos - location.beg_pos);
357 if (current_.token == Token::STRING) {
358 // Subtract delimiters.
359 source_length -= 2;
360 }
361 return current_.literal_chars->length() != source_length;
362 }
363
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000364 // Similar functions for the upcoming token.
365
366 // One token look-ahead (past the token returned by Next()).
367 Token::Value peek() const { return next_.token; }
368
369 Location peek_location() const { return next_.location; }
370
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000371 // Returns the literal string for the next token (the token that
372 // would be returned if Next() were called).
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000373 Vector<const char> next_literal_ascii_string() {
374 ASSERT_NOT_NULL(next_.literal_chars);
375 return next_.literal_chars->ascii_literal();
376 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000377 Vector<const uc16> next_literal_utf16_string() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000378 ASSERT_NOT_NULL(next_.literal_chars);
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000379 return next_.literal_chars->utf16_literal();
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000380 }
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000381 bool is_next_literal_ascii() {
382 ASSERT_NOT_NULL(next_.literal_chars);
383 return next_.literal_chars->is_ascii();
384 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000385 int next_literal_length() const {
386 ASSERT_NOT_NULL(next_.literal_chars);
387 return next_.literal_chars->length();
388 }
389
390 UnicodeCache* unicode_cache() { return unicode_cache_; }
391
392 static const int kCharacterLookaheadBufferSize = 1;
sgjesse@chromium.orgc6c57182011-01-17 12:24:25 +0000393
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000394 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
395 uc32 ScanOctalEscape(uc32 c, int length);
396
397 // Returns the location of the last seen octal literal.
398 Location octal_position() const { return octal_pos_; }
399 void clear_octal_position() { octal_pos_ = Location::invalid(); }
400
401 // Seek forward to the given position. This operation does not
402 // work in general, for instance when there are pushed back
403 // characters, but works for seeking forward until simple delimiter
404 // tokens, which is what it is used for.
405 void SeekForward(int pos);
406
407 bool HarmonyScoping() const {
408 return harmony_scoping_;
409 }
yangguo@chromium.org78d1ad42012-02-09 13:53:47 +0000410 void SetHarmonyScoping(bool scoping) {
411 harmony_scoping_ = scoping;
412 }
413 bool HarmonyModules() const {
414 return harmony_modules_;
415 }
416 void SetHarmonyModules(bool modules) {
417 harmony_modules_ = modules;
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000418 }
419
420
421 // Returns true if there was a line terminator before the peek'ed token,
422 // possibly inside a multi-line comment.
423 bool HasAnyLineTerminatorBeforeNext() const {
424 return has_line_terminator_before_next_ ||
425 has_multiline_comment_before_next_;
426 }
427
428 // Scans the input as a regular expression pattern, previous
429 // character(s) must be /(=). Returns true if a pattern is scanned.
430 bool ScanRegExpPattern(bool seen_equal);
431 // Returns true if regexp flags are scanned (always since flags can
432 // be empty).
433 bool ScanRegExpFlags();
434
435 // Tells whether the buffer contains an identifier (no escapes).
436 // Used for checking if a property name is an identifier.
437 static bool IsIdentifier(unibrow::CharacterStream* buffer);
438
439 private:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000440 // The current and look-ahead token.
441 struct TokenDesc {
442 Token::Value token;
443 Location location;
444 LiteralBuffer* literal_chars;
445 };
446
447 // Call this after setting source_ to the input.
448 void Init() {
449 // Set c0_ (one character ahead)
450 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
451 Advance();
452 // Initialize current_ to not refer to a literal.
453 current_.literal_chars = NULL;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000454 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000455
456 // Literal buffer support
457 inline void StartLiteral() {
458 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
459 &literal_buffer2_ : &literal_buffer1_;
460 free_buffer->Reset();
461 next_.literal_chars = free_buffer;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000462 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000463
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000464 INLINE(void AddLiteralChar(uc32 c)) {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000465 ASSERT_NOT_NULL(next_.literal_chars);
466 next_.literal_chars->AddChar(c);
467 }
468
469 // Complete scanning of a literal.
470 inline void TerminateLiteral() {
471 // Does nothing in the current implementation.
472 }
473
474 // Stops scanning of a literal and drop the collected characters,
475 // e.g., due to an encountered error.
476 inline void DropLiteral() {
477 next_.literal_chars = NULL;
478 }
479
480 inline void AddLiteralCharAdvance() {
481 AddLiteralChar(c0_);
482 Advance();
483 }
484
485 // Low-level scanning support.
486 void Advance() { c0_ = source_->Advance(); }
487 void PushBack(uc32 ch) {
488 source_->PushBack(c0_);
489 c0_ = ch;
490 }
491
492 inline Token::Value Select(Token::Value tok) {
493 Advance();
494 return tok;
495 }
496
497 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
498 Advance();
499 if (c0_ == next) {
500 Advance();
501 return then;
502 } else {
503 return else_;
504 }
505 }
506
507 uc32 ScanHexNumber(int expected_length);
508
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000509 // Scans a single JavaScript token.
510 void Scan();
511
512 bool SkipWhiteSpace();
513 Token::Value SkipSingleLineComment();
514 Token::Value SkipMultiLineComment();
515 // Scans a possible HTML comment -- begins with '<!'.
516 Token::Value ScanHtmlComment();
517
518 void ScanDecimalDigits();
519 Token::Value ScanNumber(bool seen_period);
520 Token::Value ScanIdentifierOrKeyword();
521 Token::Value ScanIdentifierSuffix(LiteralScope* literal);
522
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000523 Token::Value ScanString();
524
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000525 // Scans an escape-sequence which is part of a string and adds the
526 // decoded character to the current literal. Returns true if a pattern
527 // is scanned.
528 bool ScanEscape();
529 // Decodes a Unicode escape-sequence which is part of an identifier.
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000530 // If the escape sequence cannot be decoded the result is kBadChar.
531 uc32 ScanIdentifierUnicodeEscape();
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000532 // Scans a Unicode escape-sequence and adds its characters,
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000533 // uninterpreted, to the current literal. Used for parsing RegExp
534 // flags.
535 bool ScanLiteralUnicodeEscape();
536
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000537 // Return the current source position.
538 int source_pos() {
539 return source_->pos() - kCharacterLookaheadBufferSize;
540 }
541
542 UnicodeCache* unicode_cache_;
543
544 // Buffers collecting literal strings, numbers, etc.
545 LiteralBuffer literal_buffer1_;
546 LiteralBuffer literal_buffer2_;
547
548 TokenDesc current_; // desc for current token (as returned by Next())
549 TokenDesc next_; // desc for next token (one token look-ahead)
550
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000551 // Input stream. Must be initialized to an Utf16CharacterStream.
552 Utf16CharacterStream* source_;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000553
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000554
555 // Start position of the octal literal last scanned.
556 Location octal_pos_;
557
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000558 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
559 uc32 c0_;
560
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000561 // Whether there is a line terminator whitespace character after
562 // the current token, and before the next. Does not count newlines
563 // inside multiline comments.
564 bool has_line_terminator_before_next_;
565 // Whether there is a multi-line comment that contains a
566 // line-terminator after the current token, and before the next.
567 bool has_multiline_comment_before_next_;
yangguo@chromium.org78d1ad42012-02-09 13:53:47 +0000568 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings.
rossberg@chromium.orgb4b2aa62011-10-13 09:49:59 +0000569 bool harmony_scoping_;
yangguo@chromium.org78d1ad42012-02-09 13:53:47 +0000570 // Whether we scan 'module', 'import', 'export' as keywords.
571 bool harmony_modules_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000572};
573
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000574} } // namespace v8::internal
575
576#endif // V8_SCANNER_H_