blob: 3cefc833ac3323c536ca98015fdfbee1522060d5 [file] [log] [blame]
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +00001// Copyright 2011 the V8 project authors. All rights reserved.
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
ricow@chromium.org55ee8072011-09-08 16:33:10 +000028// Features shared by parsing and pre-parsing scanners.
29
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000030#ifndef V8_SCANNER_H_
31#define V8_SCANNER_H_
32
ricow@chromium.org55ee8072011-09-08 16:33:10 +000033#include "allocation.h"
34#include "char-predicates.h"
35#include "checks.h"
36#include "globals.h"
mvstanton@chromium.orgdd6d9ee2013-10-11 10:35:37 +000037#include "hashmap.h"
38#include "list.h"
ricow@chromium.org55ee8072011-09-08 16:33:10 +000039#include "token.h"
40#include "unicode-inl.h"
41#include "utils.h"
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000042
kasperl@chromium.org71affb52009-05-26 05:44:31 +000043namespace v8 {
44namespace internal {
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000045
erik.corry@gmail.com394dbcf2011-10-27 07:38:48 +000046
ricow@chromium.org55ee8072011-09-08 16:33:10 +000047// Returns the value (0 .. 15) of a hexadecimal character c.
48// If c is not a legal hexadecimal character, returns a value < 0.
49inline int HexValue(uc32 c) {
50 c -= '0';
51 if (static_cast<unsigned>(c) <= 9) return c;
52 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
53 if (static_cast<unsigned>(c) <= 5) return c + 10;
54 return -1;
55}
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000056
ricow@chromium.org55ee8072011-09-08 16:33:10 +000057
58// ---------------------------------------------------------------------
yangguo@chromium.org154ff992012-03-13 08:09:54 +000059// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
60// A code unit is a 16 bit value representing either a 16 bit code point
61// or one part of a surrogate pair that make a single 21 bit code point.
ricow@chromium.org55ee8072011-09-08 16:33:10 +000062
yangguo@chromium.org154ff992012-03-13 08:09:54 +000063class Utf16CharacterStream {
ricow@chromium.org55ee8072011-09-08 16:33:10 +000064 public:
yangguo@chromium.org154ff992012-03-13 08:09:54 +000065 Utf16CharacterStream() : pos_(0) { }
66 virtual ~Utf16CharacterStream() { }
ricow@chromium.org55ee8072011-09-08 16:33:10 +000067
yangguo@chromium.org154ff992012-03-13 08:09:54 +000068 // Returns and advances past the next UTF-16 code unit in the input
69 // stream. If there are no more code units, it returns a negative
ricow@chromium.org55ee8072011-09-08 16:33:10 +000070 // value.
71 inline uc32 Advance() {
72 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
73 pos_++;
74 return static_cast<uc32>(*(buffer_cursor_++));
75 }
76 // Note: currently the following increment is necessary to avoid a
77 // parser problem! The scanner treats the final kEndOfInput as
yangguo@chromium.org154ff992012-03-13 08:09:54 +000078 // a code unit with a position, and does math relative to that
ricow@chromium.org55ee8072011-09-08 16:33:10 +000079 // position.
80 pos_++;
81
82 return kEndOfInput;
83 }
84
yangguo@chromium.org154ff992012-03-13 08:09:54 +000085 // Return the current position in the code unit stream.
ricow@chromium.org55ee8072011-09-08 16:33:10 +000086 // Starts at zero.
87 inline unsigned pos() const { return pos_; }
88
yangguo@chromium.org154ff992012-03-13 08:09:54 +000089 // Skips forward past the next code_unit_count UTF-16 code units
ricow@chromium.org55ee8072011-09-08 16:33:10 +000090 // in the input, or until the end of input if that comes sooner.
yangguo@chromium.org154ff992012-03-13 08:09:54 +000091 // Returns the number of code units actually skipped. If less
92 // than code_unit_count,
93 inline unsigned SeekForward(unsigned code_unit_count) {
ricow@chromium.org55ee8072011-09-08 16:33:10 +000094 unsigned buffered_chars =
95 static_cast<unsigned>(buffer_end_ - buffer_cursor_);
yangguo@chromium.org154ff992012-03-13 08:09:54 +000096 if (code_unit_count <= buffered_chars) {
97 buffer_cursor_ += code_unit_count;
98 pos_ += code_unit_count;
99 return code_unit_count;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000100 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000101 return SlowSeekForward(code_unit_count);
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000102 }
103
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000104 // Pushes back the most recently read UTF-16 code unit (or negative
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000105 // value if at end of input), i.e., the value returned by the most recent
106 // call to Advance.
107 // Must not be used right after calling SeekForward.
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000108 virtual void PushBack(int32_t code_unit) = 0;
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000109
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000110 protected:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000111 static const uc32 kEndOfInput = -1;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000112
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000113 // Ensures that the buffer_cursor_ points to the code_unit at
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000114 // position pos_ of the input, if possible. If the position
115 // is at or after the end of the input, return false. If there
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000116 // are more code_units available, return true.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000117 virtual bool ReadBlock() = 0;
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000118 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000119
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000120 const uc16* buffer_cursor_;
121 const uc16* buffer_end_;
122 unsigned pos_;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000123};
124
125
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000126// ---------------------------------------------------------------------
127// Caching predicates used by scanners.
mvstanton@chromium.orgdd6d9ee2013-10-11 10:35:37 +0000128
129class UnicodeCache {
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000130 public:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000131 UnicodeCache() {}
yangguo@chromium.orga6bbcc82012-12-21 12:35:02 +0000132 typedef unibrow::Utf8Decoder<512> Utf8Decoder;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000133
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000134 StaticResource<Utf8Decoder>* utf8_decoder() {
135 return &utf8_decoder_;
136 }
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000137
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000138 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); }
139 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
140 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
141 bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
142
143 private:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000144 unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
145 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
146 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
147 unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
148 StaticResource<Utf8Decoder> utf8_decoder_;
149
150 DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000151};
152
153
mvstanton@chromium.orgdd6d9ee2013-10-11 10:35:37 +0000154// ---------------------------------------------------------------------
155// DuplicateFinder discovers duplicate symbols.
156
157class DuplicateFinder {
158 public:
159 explicit DuplicateFinder(UnicodeCache* constants)
160 : unicode_constants_(constants),
161 backing_store_(16),
162 map_(&Match) { }
163
164 int AddAsciiSymbol(Vector<const char> key, int value);
165 int AddUtf16Symbol(Vector<const uint16_t> key, int value);
166 // Add a a number literal by converting it (if necessary)
167 // to the string that ToString(ToNumber(literal)) would generate.
168 // and then adding that string with AddAsciiSymbol.
169 // This string is the actual value used as key in an object literal,
170 // and the one that must be different from the other keys.
171 int AddNumber(Vector<const char> key, int value);
172
173 private:
174 int AddSymbol(Vector<const byte> key, bool is_ascii, int value);
175 // Backs up the key and its length in the backing store.
176 // The backup is stored with a base 127 encoding of the
177 // length (plus a bit saying whether the string is ASCII),
178 // followed by the bytes of the key.
179 byte* BackupKey(Vector<const byte> key, bool is_ascii);
180
181 // Compare two encoded keys (both pointing into the backing store)
182 // for having the same base-127 encoded lengths and ASCII-ness,
183 // and then having the same 'length' bytes following.
184 static bool Match(void* first, void* second);
185 // Creates a hash from a sequence of bytes.
186 static uint32_t Hash(Vector<const byte> key, bool is_ascii);
187 // Checks whether a string containing a JS number is its canonical
188 // form.
189 static bool IsNumberCanonical(Vector<const char> key);
190
191 // Size of buffer. Sufficient for using it to call DoubleToCString in
192 // from conversions.h.
193 static const int kBufferSize = 100;
194
195 UnicodeCache* unicode_constants_;
196 // Backing store used to store strings used as hashmap keys.
197 SequenceCollector<unsigned char> backing_store_;
198 HashMap map_;
199 // Buffer used for string->number->canonical string conversions.
200 char number_buffer_[kBufferSize];
201};
202
203
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000204// ----------------------------------------------------------------------------
205// LiteralBuffer - Collector of chars of literals.
206
207class LiteralBuffer {
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000208 public:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000209 LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000210
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000211 ~LiteralBuffer() {
212 if (backing_store_.length() > 0) {
213 backing_store_.Dispose();
214 }
215 }
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000216
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000217 INLINE(void AddChar(uint32_t code_unit)) {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000218 if (position_ >= backing_store_.length()) ExpandBuffer();
219 if (is_ascii_) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000220 if (code_unit <= unibrow::Latin1::kMaxChar) {
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000221 backing_store_[position_] = static_cast<byte>(code_unit);
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000222 position_ += kOneByteSize;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000223 return;
224 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000225 ConvertToUtf16();
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000226 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000227 ASSERT(code_unit < 0x10000u);
228 *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000229 position_ += kUC16Size;
230 }
231
232 bool is_ascii() { return is_ascii_; }
233
danno@chromium.org1fd77d52013-06-07 16:01:45 +0000234 bool is_contextual_keyword(Vector<const char> keyword) {
235 return is_ascii() && keyword.length() == position_ &&
236 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
237 }
238
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000239 Vector<const uc16> utf16_literal() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000240 ASSERT(!is_ascii_);
241 ASSERT((position_ & 0x1) == 0);
242 return Vector<const uc16>(
243 reinterpret_cast<const uc16*>(backing_store_.start()),
244 position_ >> 1);
245 }
246
247 Vector<const char> ascii_literal() {
248 ASSERT(is_ascii_);
249 return Vector<const char>(
250 reinterpret_cast<const char*>(backing_store_.start()),
251 position_);
252 }
253
254 int length() {
255 return is_ascii_ ? position_ : (position_ >> 1);
256 }
257
258 void Reset() {
259 position_ = 0;
260 is_ascii_ = true;
261 }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000262
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000263 private:
264 static const int kInitialCapacity = 16;
265 static const int kGrowthFactory = 4;
266 static const int kMinConversionSlack = 256;
267 static const int kMaxGrowth = 1 * MB;
268 inline int NewCapacity(int min_capacity) {
269 int capacity = Max(min_capacity, backing_store_.length());
270 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
271 return new_capacity;
272 }
273
274 void ExpandBuffer() {
275 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
mstarzinger@chromium.orge27d6172013-04-17 11:51:44 +0000276 OS::MemCopy(new_store.start(), backing_store_.start(), position_);
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000277 backing_store_.Dispose();
278 backing_store_ = new_store;
279 }
280
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000281 void ConvertToUtf16() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000282 ASSERT(is_ascii_);
283 Vector<byte> new_store;
284 int new_content_size = position_ * kUC16Size;
285 if (new_content_size >= backing_store_.length()) {
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000286 // Ensure room for all currently read code units as UC16 as well
287 // as the code unit about to be stored.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000288 new_store = Vector<byte>::New(NewCapacity(new_content_size));
289 } else {
290 new_store = backing_store_;
291 }
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000292 uint8_t* src = backing_store_.start();
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000293 uc16* dst = reinterpret_cast<uc16*>(new_store.start());
294 for (int i = position_ - 1; i >= 0; i--) {
295 dst[i] = src[i];
296 }
297 if (new_store.start() != backing_store_.start()) {
298 backing_store_.Dispose();
299 backing_store_ = new_store;
300 }
301 position_ = new_content_size;
302 is_ascii_ = false;
303 }
304
305 bool is_ascii_;
306 int position_;
307 Vector<byte> backing_store_;
308
309 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000310};
311
312
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000313// ----------------------------------------------------------------------------
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000314// JavaScript Scanner.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000315
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000316class Scanner {
317 public:
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000318 // Scoped helper for literal recording. Automatically drops the literal
319 // if aborting the scanning before it's complete.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000320 class LiteralScope {
321 public:
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000322 explicit LiteralScope(Scanner* self)
323 : scanner_(self), complete_(false) {
324 scanner_->StartLiteral();
325 }
326 ~LiteralScope() {
327 if (!complete_) scanner_->DropLiteral();
328 }
329 void Complete() {
330 scanner_->TerminateLiteral();
331 complete_ = true;
332 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000333
334 private:
335 Scanner* scanner_;
336 bool complete_;
337 };
338
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000339 // Representation of an interval of source positions.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000340 struct Location {
341 Location(int b, int e) : beg_pos(b), end_pos(e) { }
342 Location() : beg_pos(0), end_pos(0) { }
343
344 bool IsValid() const {
345 return beg_pos >= 0 && end_pos >= beg_pos;
346 }
347
348 static Location invalid() { return Location(-1, -1); }
349
350 int beg_pos;
351 int end_pos;
352 };
353
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000354 // -1 is outside of the range of any real source code.
355 static const int kNoOctalLocation = -1;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000356
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000357 explicit Scanner(UnicodeCache* scanner_contants);
358
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000359 void Initialize(Utf16CharacterStream* source);
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000360
361 // Returns the next token and advances input.
362 Token::Value Next();
363 // Returns the current token again.
364 Token::Value current_token() { return current_.token; }
365 // Returns the location information for the current token
366 // (the token last returned by Next()).
367 Location location() const { return current_.location; }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000368 // Returns the literal string, if any, for the current token (the
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000369 // token last returned by Next()). The string is 0-terminated.
370 // Literal strings are collected for identifiers, strings, and
371 // numbers.
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000372 // These functions only give the correct result if the literal
373 // was scanned between calls to StartLiteral() and TerminateLiteral().
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000374 Vector<const char> literal_ascii_string() {
375 ASSERT_NOT_NULL(current_.literal_chars);
376 return current_.literal_chars->ascii_literal();
377 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000378 Vector<const uc16> literal_utf16_string() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000379 ASSERT_NOT_NULL(current_.literal_chars);
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000380 return current_.literal_chars->utf16_literal();
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000381 }
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000382 bool is_literal_ascii() {
383 ASSERT_NOT_NULL(current_.literal_chars);
384 return current_.literal_chars->is_ascii();
385 }
danno@chromium.org1fd77d52013-06-07 16:01:45 +0000386 bool is_literal_contextual_keyword(Vector<const char> keyword) {
ulan@chromium.org837a67e2013-06-11 15:39:48 +0000387 ASSERT_NOT_NULL(current_.literal_chars);
danno@chromium.org1fd77d52013-06-07 16:01:45 +0000388 return current_.literal_chars->is_contextual_keyword(keyword);
389 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000390 int literal_length() const {
391 ASSERT_NOT_NULL(current_.literal_chars);
392 return current_.literal_chars->length();
393 }
394
395 bool literal_contains_escapes() const {
396 Location location = current_.location;
397 int source_length = (location.end_pos - location.beg_pos);
398 if (current_.token == Token::STRING) {
399 // Subtract delimiters.
400 source_length -= 2;
401 }
402 return current_.literal_chars->length() != source_length;
403 }
404
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000405 // Similar functions for the upcoming token.
406
407 // One token look-ahead (past the token returned by Next()).
408 Token::Value peek() const { return next_.token; }
409
410 Location peek_location() const { return next_.location; }
411
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000412 // Returns the literal string for the next token (the token that
413 // would be returned if Next() were called).
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000414 Vector<const char> next_literal_ascii_string() {
415 ASSERT_NOT_NULL(next_.literal_chars);
416 return next_.literal_chars->ascii_literal();
417 }
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000418 Vector<const uc16> next_literal_utf16_string() {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000419 ASSERT_NOT_NULL(next_.literal_chars);
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000420 return next_.literal_chars->utf16_literal();
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000421 }
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000422 bool is_next_literal_ascii() {
423 ASSERT_NOT_NULL(next_.literal_chars);
424 return next_.literal_chars->is_ascii();
425 }
danno@chromium.org1fd77d52013-06-07 16:01:45 +0000426 bool is_next_contextual_keyword(Vector<const char> keyword) {
427 ASSERT_NOT_NULL(next_.literal_chars);
428 return next_.literal_chars->is_contextual_keyword(keyword);
429 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000430 int next_literal_length() const {
431 ASSERT_NOT_NULL(next_.literal_chars);
432 return next_.literal_chars->length();
433 }
434
435 UnicodeCache* unicode_cache() { return unicode_cache_; }
436
437 static const int kCharacterLookaheadBufferSize = 1;
sgjesse@chromium.orgc6c57182011-01-17 12:24:25 +0000438
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000439 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
440 uc32 ScanOctalEscape(uc32 c, int length);
441
442 // Returns the location of the last seen octal literal.
443 Location octal_position() const { return octal_pos_; }
444 void clear_octal_position() { octal_pos_ = Location::invalid(); }
445
446 // Seek forward to the given position. This operation does not
447 // work in general, for instance when there are pushed back
448 // characters, but works for seeking forward until simple delimiter
449 // tokens, which is what it is used for.
450 void SeekForward(int pos);
451
452 bool HarmonyScoping() const {
453 return harmony_scoping_;
454 }
yangguo@chromium.org78d1ad42012-02-09 13:53:47 +0000455 void SetHarmonyScoping(bool scoping) {
456 harmony_scoping_ = scoping;
457 }
458 bool HarmonyModules() const {
459 return harmony_modules_;
460 }
461 void SetHarmonyModules(bool modules) {
462 harmony_modules_ = modules;
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000463 }
jkummerow@chromium.orgba72ec82013-07-22 09:21:20 +0000464 bool HarmonyNumericLiterals() const {
465 return harmony_numeric_literals_;
466 }
467 void SetHarmonyNumericLiterals(bool numeric_literals) {
468 harmony_numeric_literals_ = numeric_literals;
469 }
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000470
471 // Returns true if there was a line terminator before the peek'ed token,
472 // possibly inside a multi-line comment.
473 bool HasAnyLineTerminatorBeforeNext() const {
474 return has_line_terminator_before_next_ ||
475 has_multiline_comment_before_next_;
476 }
477
478 // Scans the input as a regular expression pattern, previous
479 // character(s) must be /(=). Returns true if a pattern is scanned.
480 bool ScanRegExpPattern(bool seen_equal);
481 // Returns true if regexp flags are scanned (always since flags can
482 // be empty).
483 bool ScanRegExpFlags();
484
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000485 private:
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000486 // The current and look-ahead token.
487 struct TokenDesc {
488 Token::Value token;
489 Location location;
490 LiteralBuffer* literal_chars;
491 };
492
493 // Call this after setting source_ to the input.
494 void Init() {
495 // Set c0_ (one character ahead)
496 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
497 Advance();
498 // Initialize current_ to not refer to a literal.
499 current_.literal_chars = NULL;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000500 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000501
502 // Literal buffer support
503 inline void StartLiteral() {
504 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
505 &literal_buffer2_ : &literal_buffer1_;
506 free_buffer->Reset();
507 next_.literal_chars = free_buffer;
ager@chromium.org5f0c45f2010-12-17 08:51:21 +0000508 }
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000509
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000510 INLINE(void AddLiteralChar(uc32 c)) {
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000511 ASSERT_NOT_NULL(next_.literal_chars);
512 next_.literal_chars->AddChar(c);
513 }
514
515 // Complete scanning of a literal.
516 inline void TerminateLiteral() {
517 // Does nothing in the current implementation.
518 }
519
520 // Stops scanning of a literal and drop the collected characters,
521 // e.g., due to an encountered error.
522 inline void DropLiteral() {
523 next_.literal_chars = NULL;
524 }
525
526 inline void AddLiteralCharAdvance() {
527 AddLiteralChar(c0_);
528 Advance();
529 }
530
531 // Low-level scanning support.
532 void Advance() { c0_ = source_->Advance(); }
533 void PushBack(uc32 ch) {
534 source_->PushBack(c0_);
535 c0_ = ch;
536 }
537
538 inline Token::Value Select(Token::Value tok) {
539 Advance();
540 return tok;
541 }
542
543 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
544 Advance();
545 if (c0_ == next) {
546 Advance();
547 return then;
548 } else {
549 return else_;
550 }
551 }
552
553 uc32 ScanHexNumber(int expected_length);
554
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000555 // Scans a single JavaScript token.
556 void Scan();
557
558 bool SkipWhiteSpace();
559 Token::Value SkipSingleLineComment();
560 Token::Value SkipMultiLineComment();
561 // Scans a possible HTML comment -- begins with '<!'.
562 Token::Value ScanHtmlComment();
563
564 void ScanDecimalDigits();
565 Token::Value ScanNumber(bool seen_period);
566 Token::Value ScanIdentifierOrKeyword();
567 Token::Value ScanIdentifierSuffix(LiteralScope* literal);
568
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000569 Token::Value ScanString();
570
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000571 // Scans an escape-sequence which is part of a string and adds the
572 // decoded character to the current literal. Returns true if a pattern
573 // is scanned.
574 bool ScanEscape();
575 // Decodes a Unicode escape-sequence which is part of an identifier.
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000576 // If the escape sequence cannot be decoded the result is kBadChar.
577 uc32 ScanIdentifierUnicodeEscape();
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000578 // Scans a Unicode escape-sequence and adds its characters,
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000579 // uninterpreted, to the current literal. Used for parsing RegExp
580 // flags.
581 bool ScanLiteralUnicodeEscape();
582
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000583 // Return the current source position.
584 int source_pos() {
585 return source_->pos() - kCharacterLookaheadBufferSize;
586 }
587
588 UnicodeCache* unicode_cache_;
589
590 // Buffers collecting literal strings, numbers, etc.
591 LiteralBuffer literal_buffer1_;
592 LiteralBuffer literal_buffer2_;
593
594 TokenDesc current_; // desc for current token (as returned by Next())
595 TokenDesc next_; // desc for next token (one token look-ahead)
596
yangguo@chromium.org154ff992012-03-13 08:09:54 +0000597 // Input stream. Must be initialized to an Utf16CharacterStream.
598 Utf16CharacterStream* source_;
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000599
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000600
601 // Start position of the octal literal last scanned.
602 Location octal_pos_;
603
jkummerow@chromium.orgc3b37122011-11-07 10:14:12 +0000604 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
605 uc32 c0_;
606
ricow@chromium.org55ee8072011-09-08 16:33:10 +0000607 // Whether there is a line terminator whitespace character after
608 // the current token, and before the next. Does not count newlines
609 // inside multiline comments.
610 bool has_line_terminator_before_next_;
611 // Whether there is a multi-line comment that contains a
612 // line-terminator after the current token, and before the next.
613 bool has_multiline_comment_before_next_;
yangguo@chromium.org78d1ad42012-02-09 13:53:47 +0000614 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings.
rossberg@chromium.orgb4b2aa62011-10-13 09:49:59 +0000615 bool harmony_scoping_;
yangguo@chromium.org78d1ad42012-02-09 13:53:47 +0000616 // Whether we scan 'module', 'import', 'export' as keywords.
617 bool harmony_modules_;
jkummerow@chromium.orgba72ec82013-07-22 09:21:20 +0000618 // Whether we scan 0o777 and 0b111 as numbers.
619 bool harmony_numeric_literals_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000620};
621
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000622} } // namespace v8::internal
623
624#endif // V8_SCANNER_H_