blob: 15b1d44203e5c897c7b33591f8dd9f6576464a07 [file] [log] [blame]
Steve Blocka7e24c12009-10-30 11:49:00 +00001// Copyright 2006-2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29
30#include "ast.h"
Steve Block6ded16b2010-05-10 14:33:55 +010031#include "handles.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000032#include "scanner.h"
33
34namespace v8 {
35namespace internal {
36
37// ----------------------------------------------------------------------------
38// Character predicates
39
40
41unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
42unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
43unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
44unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
45
46
47StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
48
49
50// ----------------------------------------------------------------------------
51// UTF8Buffer
52
Kristian Monsen80d68ea2010-09-08 11:05:35 +010053UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
Steve Blocka7e24c12009-10-30 11:49:00 +000054
55
Kristian Monsen80d68ea2010-09-08 11:05:35 +010056UTF8Buffer::~UTF8Buffer() {}
Steve Blocka7e24c12009-10-30 11:49:00 +000057
58
59void UTF8Buffer::AddCharSlow(uc32 c) {
Kristian Monsen80d68ea2010-09-08 11:05:35 +010060 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
61 int length = unibrow::Utf8::Length(c);
62 Vector<char> block = buffer_.AddBlock(length, '\0');
63#ifdef DEBUG
64 int written_length = unibrow::Utf8::Encode(block.start(), c);
65 CHECK_EQ(length, written_length);
66#else
67 unibrow::Utf8::Encode(block.start(), c);
68#endif
Steve Blocka7e24c12009-10-30 11:49:00 +000069}
70
71
72// ----------------------------------------------------------------------------
73// UTF16Buffer
74
75
76UTF16Buffer::UTF16Buffer()
Steve Block6ded16b2010-05-10 14:33:55 +010077 : pos_(0), end_(Scanner::kNoEndPosition) { }
Steve Blocka7e24c12009-10-30 11:49:00 +000078
79
80// CharacterStreamUTF16Buffer
81CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
82 : pushback_buffer_(0), last_(0), stream_(NULL) { }
83
84
85void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
Steve Block6ded16b2010-05-10 14:33:55 +010086 unibrow::CharacterStream* input,
87 int start_position,
88 int end_position) {
Steve Blocka7e24c12009-10-30 11:49:00 +000089 stream_ = input;
Steve Block6ded16b2010-05-10 14:33:55 +010090 if (start_position > 0) {
91 SeekForward(start_position);
92 }
93 end_ = end_position != Scanner::kNoEndPosition ? end_position : kMaxInt;
Steve Blocka7e24c12009-10-30 11:49:00 +000094}
95
96
97void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
98 pushback_buffer()->Add(last_);
99 last_ = ch;
100 pos_--;
101}
102
103
104uc32 CharacterStreamUTF16Buffer::Advance() {
Steve Block6ded16b2010-05-10 14:33:55 +0100105 ASSERT(end_ != Scanner::kNoEndPosition);
106 ASSERT(end_ >= 0);
Steve Blocka7e24c12009-10-30 11:49:00 +0000107 // NOTE: It is of importance to Persian / Farsi resources that we do
108 // *not* strip format control characters in the scanner; see
109 //
110 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152
111 //
112 // So, even though ECMA-262, section 7.1, page 11, dictates that we
113 // must remove Unicode format-control characters, we do not. This is
114 // in line with how IE and SpiderMonkey handles it.
115 if (!pushback_buffer()->is_empty()) {
116 pos_++;
117 return last_ = pushback_buffer()->RemoveLast();
Steve Block6ded16b2010-05-10 14:33:55 +0100118 } else if (stream_->has_more() && pos_ < end_) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000119 pos_++;
120 uc32 next = stream_->GetNext();
121 return last_ = next;
122 } else {
123 // Note: currently the following increment is necessary to avoid a
124 // test-parser problem!
125 pos_++;
126 return last_ = static_cast<uc32>(-1);
127 }
128}
129
130
131void CharacterStreamUTF16Buffer::SeekForward(int pos) {
132 pos_ = pos;
133 ASSERT(pushback_buffer()->is_empty());
134 stream_->Seek(pos);
135}
136
137
Steve Block6ded16b2010-05-10 14:33:55 +0100138// ExternalStringUTF16Buffer
139template <typename StringType, typename CharType>
140ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
Steve Blocka7e24c12009-10-30 11:49:00 +0000141 : raw_data_(NULL) { }
142
143
Steve Block6ded16b2010-05-10 14:33:55 +0100144template <typename StringType, typename CharType>
145void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
146 Handle<StringType> data,
147 int start_position,
148 int end_position) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000149 ASSERT(!data.is_null());
Steve Blocka7e24c12009-10-30 11:49:00 +0000150 raw_data_ = data->resource()->data();
Steve Block6ded16b2010-05-10 14:33:55 +0100151
152 ASSERT(end_position <= data->length());
153 if (start_position > 0) {
154 SeekForward(start_position);
155 }
156 end_ =
157 end_position != Scanner::kNoEndPosition ? end_position : data->length();
Steve Blocka7e24c12009-10-30 11:49:00 +0000158}
159
160
Steve Block6ded16b2010-05-10 14:33:55 +0100161template <typename StringType, typename CharType>
162uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
163 if (pos_ < end_) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000164 return raw_data_[pos_++];
165 } else {
166 // note: currently the following increment is necessary to avoid a
167 // test-parser problem!
168 pos_++;
169 return static_cast<uc32>(-1);
170 }
171}
172
173
Steve Block6ded16b2010-05-10 14:33:55 +0100174template <typename StringType, typename CharType>
175void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000176 pos_--;
177 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
178 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
179}
180
181
Steve Block6ded16b2010-05-10 14:33:55 +0100182template <typename StringType, typename CharType>
183void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000184 pos_ = pos;
185}
186
187
188// ----------------------------------------------------------------------------
Steve Blockd0582a62009-12-15 09:54:21 +0000189// Keyword Matcher
Kristian Monsen9dcf7e22010-06-28 14:14:28 +0100190
Steve Blockd0582a62009-12-15 09:54:21 +0000191KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
192 { "break", KEYWORD_PREFIX, Token::BREAK },
193 { NULL, C, Token::ILLEGAL },
194 { NULL, D, Token::ILLEGAL },
195 { "else", KEYWORD_PREFIX, Token::ELSE },
196 { NULL, F, Token::ILLEGAL },
197 { NULL, UNMATCHABLE, Token::ILLEGAL },
198 { NULL, UNMATCHABLE, Token::ILLEGAL },
199 { NULL, I, Token::ILLEGAL },
200 { NULL, UNMATCHABLE, Token::ILLEGAL },
201 { NULL, UNMATCHABLE, Token::ILLEGAL },
202 { NULL, UNMATCHABLE, Token::ILLEGAL },
203 { NULL, UNMATCHABLE, Token::ILLEGAL },
204 { NULL, N, Token::ILLEGAL },
205 { NULL, UNMATCHABLE, Token::ILLEGAL },
206 { NULL, UNMATCHABLE, Token::ILLEGAL },
207 { NULL, UNMATCHABLE, Token::ILLEGAL },
208 { "return", KEYWORD_PREFIX, Token::RETURN },
209 { "switch", KEYWORD_PREFIX, Token::SWITCH },
210 { NULL, T, Token::ILLEGAL },
211 { NULL, UNMATCHABLE, Token::ILLEGAL },
212 { NULL, V, Token::ILLEGAL },
213 { NULL, W, Token::ILLEGAL }
214};
215
216
217void KeywordMatcher::Step(uc32 input) {
218 switch (state_) {
219 case INITIAL: {
220 // matching the first character is the only state with significant fanout.
221 // Match only lower-case letters in range 'b'..'w'.
222 unsigned int offset = input - kFirstCharRangeMin;
223 if (offset < kFirstCharRangeLength) {
224 state_ = first_states_[offset].state;
225 if (state_ == KEYWORD_PREFIX) {
226 keyword_ = first_states_[offset].keyword;
227 counter_ = 1;
228 keyword_token_ = first_states_[offset].token;
229 }
230 return;
231 }
232 break;
233 }
234 case KEYWORD_PREFIX:
235 if (keyword_[counter_] == input) {
236 ASSERT_NE(input, '\0');
237 counter_++;
238 if (keyword_[counter_] == '\0') {
239 state_ = KEYWORD_MATCHED;
240 token_ = keyword_token_;
241 }
242 return;
243 }
244 break;
245 case KEYWORD_MATCHED:
246 token_ = Token::IDENTIFIER;
247 break;
248 case C:
249 if (MatchState(input, 'a', CA)) return;
250 if (MatchState(input, 'o', CO)) return;
251 break;
252 case CA:
253 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
254 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
255 break;
256 case CO:
257 if (MatchState(input, 'n', CON)) return;
258 break;
259 case CON:
260 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
261 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
262 break;
263 case D:
264 if (MatchState(input, 'e', DE)) return;
265 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
266 break;
267 case DE:
268 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
269 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
270 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
271 break;
272 case F:
273 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
274 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
275 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
276 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
277 break;
278 case I:
279 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
280 if (MatchKeyword(input, 'n', IN, Token::IN)) return;
281 break;
282 case IN:
283 token_ = Token::IDENTIFIER;
284 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
285 return;
286 }
287 break;
288 case N:
289 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
290 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
291 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
292 break;
293 case T:
294 if (MatchState(input, 'h', TH)) return;
295 if (MatchState(input, 'r', TR)) return;
296 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
297 break;
298 case TH:
299 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
300 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
301 break;
302 case TR:
303 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
304 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
305 break;
306 case V:
307 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
308 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
309 break;
310 case W:
311 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
312 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
313 break;
314 default:
315 UNREACHABLE();
316 }
317 // On fallthrough, it's a failure.
318 state_ = UNMATCHABLE;
319}
320
321
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100322
323// ----------------------------------------------------------------------------
324// Scanner::LiteralScope
325
326Scanner::LiteralScope::LiteralScope(Scanner* self)
327 : scanner_(self), complete_(false) {
328 self->StartLiteral();
329}
330
331
332Scanner::LiteralScope::~LiteralScope() {
333 if (!complete_) scanner_->DropLiteral();
334}
335
336
337void Scanner::LiteralScope::Complete() {
338 scanner_->TerminateLiteral();
339 complete_ = true;
340}
341
Steve Blockd0582a62009-12-15 09:54:21 +0000342// ----------------------------------------------------------------------------
Steve Blocka7e24c12009-10-30 11:49:00 +0000343// Scanner
344
Leon Clarke4515c472010-02-03 11:58:03 +0000345Scanner::Scanner(ParserMode pre)
Kristian Monsen9dcf7e22010-06-28 14:14:28 +0100346 : is_pre_parsing_(pre == PREPARSE), stack_overflow_(false) { }
Steve Blocka7e24c12009-10-30 11:49:00 +0000347
348
Steve Block6ded16b2010-05-10 14:33:55 +0100349void Scanner::Initialize(Handle<String> source,
350 ParserLanguage language) {
Ben Murdoch3bec4d22010-07-22 14:51:16 +0100351 Init(source, NULL, 0, source->length(), language);
Steve Block6ded16b2010-05-10 14:33:55 +0100352}
353
354
355void Scanner::Initialize(Handle<String> source,
356 unibrow::CharacterStream* stream,
357 ParserLanguage language) {
358 Init(source, stream, 0, kNoEndPosition, language);
359}
360
361
362void Scanner::Initialize(Handle<String> source,
363 int start_position,
364 int end_position,
365 ParserLanguage language) {
Ben Murdoch3bec4d22010-07-22 14:51:16 +0100366 Init(source, NULL, start_position, end_position, language);
Steve Block6ded16b2010-05-10 14:33:55 +0100367}
368
369
Leon Clarke4515c472010-02-03 11:58:03 +0000370void Scanner::Init(Handle<String> source,
371 unibrow::CharacterStream* stream,
Steve Block6ded16b2010-05-10 14:33:55 +0100372 int start_position,
373 int end_position,
Leon Clarke4515c472010-02-03 11:58:03 +0000374 ParserLanguage language) {
Ben Murdoch3bec4d22010-07-22 14:51:16 +0100375 // Either initialize the scanner from a character stream or from a
376 // string.
377 ASSERT(source.is_null() || stream == NULL);
378
Steve Blocka7e24c12009-10-30 11:49:00 +0000379 // Initialize the source buffer.
380 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
381 two_byte_string_buffer_.Initialize(
Steve Block6ded16b2010-05-10 14:33:55 +0100382 Handle<ExternalTwoByteString>::cast(source),
383 start_position,
384 end_position);
Steve Blocka7e24c12009-10-30 11:49:00 +0000385 source_ = &two_byte_string_buffer_;
Steve Block6ded16b2010-05-10 14:33:55 +0100386 } else if (!source.is_null() && StringShape(*source).IsExternalAscii()) {
387 ascii_string_buffer_.Initialize(
388 Handle<ExternalAsciiString>::cast(source),
389 start_position,
390 end_position);
391 source_ = &ascii_string_buffer_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000392 } else {
Ben Murdoch3bec4d22010-07-22 14:51:16 +0100393 if (!source.is_null()) {
394 safe_string_input_buffer_.Reset(source.location());
395 stream = &safe_string_input_buffer_;
396 }
Steve Block6ded16b2010-05-10 14:33:55 +0100397 char_stream_buffer_.Initialize(source,
398 stream,
399 start_position,
400 end_position);
Steve Blocka7e24c12009-10-30 11:49:00 +0000401 source_ = &char_stream_buffer_;
402 }
403
Leon Clarke4515c472010-02-03 11:58:03 +0000404 is_parsing_json_ = (language == JSON);
Steve Blocka7e24c12009-10-30 11:49:00 +0000405
Steve Blocka7e24c12009-10-30 11:49:00 +0000406 // Set c0_ (one character ahead)
407 ASSERT(kCharacterLookaheadBufferSize == 1);
408 Advance();
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100409 // Initialize current_ to not refer to a literal.
410 current_.literal_chars = Vector<const char>();
411 // Reset literal buffer.
412 literal_buffer_.Reset();
Steve Blocka7e24c12009-10-30 11:49:00 +0000413
414 // Skip initial whitespace allowing HTML comment ends just like
415 // after a newline and scan first token.
416 has_line_terminator_before_next_ = true;
417 SkipWhiteSpace();
418 Scan();
419}
420
421
Steve Blocka7e24c12009-10-30 11:49:00 +0000422Token::Value Scanner::Next() {
423 // BUG 1215673: Find a thread safe way to set a stack limit in
424 // pre-parse mode. Otherwise, we cannot safely pre-parse from other
425 // threads.
426 current_ = next_;
427 // Check for stack-overflow before returning any tokens.
428 StackLimitCheck check;
429 if (check.HasOverflowed()) {
430 stack_overflow_ = true;
431 next_.token = Token::ILLEGAL;
432 } else {
Iain Merrick9ac36c92010-09-13 15:29:50 +0100433 has_line_terminator_before_next_ = false;
Steve Blocka7e24c12009-10-30 11:49:00 +0000434 Scan();
435 }
436 return current_.token;
437}
438
439
440void Scanner::StartLiteral() {
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100441 literal_buffer_.StartLiteral();
Steve Blocka7e24c12009-10-30 11:49:00 +0000442}
443
444
445void Scanner::AddChar(uc32 c) {
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100446 literal_buffer_.AddChar(c);
Steve Blocka7e24c12009-10-30 11:49:00 +0000447}
448
449
450void Scanner::TerminateLiteral() {
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100451 next_.literal_chars = literal_buffer_.EndLiteral();
452}
453
454
455void Scanner::DropLiteral() {
456 literal_buffer_.DropLiteral();
Steve Blocka7e24c12009-10-30 11:49:00 +0000457}
458
459
460void Scanner::AddCharAdvance() {
461 AddChar(c0_);
462 Advance();
463}
464
465
466static inline bool IsByteOrderMark(uc32 c) {
467 // The Unicode value U+FFFE is guaranteed never to be assigned as a
468 // Unicode character; this implies that in a Unicode context the
469 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
470 // character expressed in little-endian byte order (since it could
471 // not be a U+FFFE character expressed in big-endian byte
472 // order). Nevertheless, we check for it to be compatible with
473 // Spidermonkey.
474 return c == 0xFEFF || c == 0xFFFE;
475}
476
477
Leon Clarke4515c472010-02-03 11:58:03 +0000478bool Scanner::SkipJsonWhiteSpace() {
479 int start_position = source_pos();
480 // JSON WhiteSpace is tab, carrige-return, newline and space.
481 while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') {
482 Advance();
483 }
484 return source_pos() != start_position;
485}
486
487
488bool Scanner::SkipJavaScriptWhiteSpace() {
Steve Blocka7e24c12009-10-30 11:49:00 +0000489 int start_position = source_pos();
490
491 while (true) {
492 // We treat byte-order marks (BOMs) as whitespace for better
493 // compatibility with Spidermonkey and other JavaScript engines.
494 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
495 // IsWhiteSpace() includes line terminators!
496 if (kIsLineTerminator.get(c0_)) {
497 // Ignore line terminators, but remember them. This is necessary
498 // for automatic semicolon insertion.
499 has_line_terminator_before_next_ = true;
500 }
501 Advance();
502 }
503
504 // If there is an HTML comment end '-->' at the beginning of a
505 // line (with only whitespace in front of it), we treat the rest
506 // of the line as a comment. This is in line with the way
507 // SpiderMonkey handles it.
508 if (c0_ == '-' && has_line_terminator_before_next_) {
509 Advance();
510 if (c0_ == '-') {
511 Advance();
512 if (c0_ == '>') {
513 // Treat the rest of the line as a comment.
514 SkipSingleLineComment();
515 // Continue skipping white space after the comment.
516 continue;
517 }
518 PushBack('-'); // undo Advance()
519 }
520 PushBack('-'); // undo Advance()
521 }
522 // Return whether or not we skipped any characters.
523 return source_pos() != start_position;
524 }
525}
526
527
528Token::Value Scanner::SkipSingleLineComment() {
529 Advance();
530
531 // The line terminator at the end of the line is not considered
532 // to be part of the single-line comment; it is recognized
533 // separately by the lexical grammar and becomes part of the
534 // stream of input elements for the syntactic grammar (see
535 // ECMA-262, section 7.4, page 12).
536 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
537 Advance();
538 }
539
540 return Token::WHITESPACE;
541}
542
543
544Token::Value Scanner::SkipMultiLineComment() {
545 ASSERT(c0_ == '*');
546 Advance();
547
548 while (c0_ >= 0) {
549 char ch = c0_;
550 Advance();
551 // If we have reached the end of the multi-line comment, we
552 // consume the '/' and insert a whitespace. This way all
553 // multi-line comments are treated as whitespace - even the ones
554 // containing line terminators. This contradicts ECMA-262, section
555 // 7.4, page 12, that says that multi-line comments containing
556 // line terminators should be treated as a line terminator, but it
557 // matches the behaviour of SpiderMonkey and KJS.
558 if (ch == '*' && c0_ == '/') {
559 c0_ = ' ';
560 return Token::WHITESPACE;
561 }
562 }
563
564 // Unterminated multi-line comment.
565 return Token::ILLEGAL;
566}
567
568
569Token::Value Scanner::ScanHtmlComment() {
570 // Check for <!-- comments.
571 ASSERT(c0_ == '!');
572 Advance();
573 if (c0_ == '-') {
574 Advance();
575 if (c0_ == '-') return SkipSingleLineComment();
576 PushBack('-'); // undo Advance()
577 }
578 PushBack('!'); // undo Advance()
579 ASSERT(c0_ == '!');
580 return Token::LT;
581}
582
583
Leon Clarke4515c472010-02-03 11:58:03 +0000584
585void Scanner::ScanJson() {
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100586 next_.literal_chars = Vector<const char>();
Leon Clarke4515c472010-02-03 11:58:03 +0000587 Token::Value token;
588 has_line_terminator_before_next_ = false;
589 do {
590 // Remember the position of the next token
591 next_.location.beg_pos = source_pos();
592 switch (c0_) {
593 case '\t':
594 case '\r':
595 case '\n':
596 case ' ':
597 Advance();
598 token = Token::WHITESPACE;
599 break;
600 case '{':
601 Advance();
602 token = Token::LBRACE;
603 break;
604 case '}':
605 Advance();
606 token = Token::RBRACE;
607 break;
608 case '[':
609 Advance();
610 token = Token::LBRACK;
611 break;
612 case ']':
613 Advance();
614 token = Token::RBRACK;
615 break;
616 case ':':
617 Advance();
618 token = Token::COLON;
619 break;
620 case ',':
621 Advance();
622 token = Token::COMMA;
623 break;
624 case '"':
625 token = ScanJsonString();
626 break;
627 case '-':
628 case '0':
629 case '1':
630 case '2':
631 case '3':
632 case '4':
633 case '5':
634 case '6':
635 case '7':
636 case '8':
637 case '9':
638 token = ScanJsonNumber();
639 break;
640 case 't':
641 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
642 break;
643 case 'f':
644 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
645 break;
646 case 'n':
647 token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
648 break;
649 default:
650 if (c0_ < 0) {
651 Advance();
652 token = Token::EOS;
653 } else {
654 Advance();
655 token = Select(Token::ILLEGAL);
656 }
657 }
658 } while (token == Token::WHITESPACE);
659
660 next_.location.end_pos = source_pos();
661 next_.token = token;
662}
663
664
665Token::Value Scanner::ScanJsonString() {
666 ASSERT_EQ('"', c0_);
667 Advance();
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100668 LiteralScope literal(this);
Leon Clarke4515c472010-02-03 11:58:03 +0000669 while (c0_ != '"' && c0_ > 0) {
670 // Check for control character (0x00-0x1f) or unterminated string (<0).
671 if (c0_ < 0x20) return Token::ILLEGAL;
672 if (c0_ != '\\') {
673 AddCharAdvance();
674 } else {
675 Advance();
676 switch (c0_) {
677 case '"':
678 case '\\':
679 case '/':
680 AddChar(c0_);
681 break;
682 case 'b':
683 AddChar('\x08');
684 break;
685 case 'f':
686 AddChar('\x0c');
687 break;
688 case 'n':
689 AddChar('\x0a');
690 break;
691 case 'r':
692 AddChar('\x0d');
693 break;
694 case 't':
695 AddChar('\x09');
696 break;
697 case 'u': {
698 uc32 value = 0;
699 for (int i = 0; i < 4; i++) {
700 Advance();
701 int digit = HexValue(c0_);
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100702 if (digit < 0) {
703 return Token::ILLEGAL;
704 }
Leon Clarke4515c472010-02-03 11:58:03 +0000705 value = value * 16 + digit;
706 }
707 AddChar(value);
708 break;
709 }
710 default:
711 return Token::ILLEGAL;
712 }
713 Advance();
714 }
715 }
716 if (c0_ != '"') {
717 return Token::ILLEGAL;
718 }
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100719 literal.Complete();
Leon Clarke4515c472010-02-03 11:58:03 +0000720 Advance();
721 return Token::STRING;
722}
723
724
725Token::Value Scanner::ScanJsonNumber() {
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100726 LiteralScope literal(this);
Leon Clarke4515c472010-02-03 11:58:03 +0000727 if (c0_ == '-') AddCharAdvance();
728 if (c0_ == '0') {
729 AddCharAdvance();
730 // Prefix zero is only allowed if it's the only digit before
731 // a decimal point or exponent.
732 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
733 } else {
734 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
735 do {
736 AddCharAdvance();
737 } while (c0_ >= '0' && c0_ <= '9');
738 }
739 if (c0_ == '.') {
740 AddCharAdvance();
741 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
742 do {
743 AddCharAdvance();
744 } while (c0_ >= '0' && c0_ <= '9');
745 }
Iain Merrick9ac36c92010-09-13 15:29:50 +0100746 if (AsciiAlphaToLower(c0_) == 'e') {
Leon Clarke4515c472010-02-03 11:58:03 +0000747 AddCharAdvance();
748 if (c0_ == '-' || c0_ == '+') AddCharAdvance();
749 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
750 do {
751 AddCharAdvance();
752 } while (c0_ >= '0' && c0_ <= '9');
753 }
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100754 literal.Complete();
Leon Clarke4515c472010-02-03 11:58:03 +0000755 return Token::NUMBER;
756}
757
758
759Token::Value Scanner::ScanJsonIdentifier(const char* text,
760 Token::Value token) {
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100761 LiteralScope literal(this);
Leon Clarke4515c472010-02-03 11:58:03 +0000762 while (*text != '\0') {
763 if (c0_ != *text) return Token::ILLEGAL;
764 Advance();
765 text++;
766 }
767 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100768 literal.Complete();
Leon Clarke4515c472010-02-03 11:58:03 +0000769 return token;
770}
771
772
773void Scanner::ScanJavaScript() {
Kristian Monsen80d68ea2010-09-08 11:05:35 +0100774 next_.literal_chars = Vector<const char>();
Steve Blocka7e24c12009-10-30 11:49:00 +0000775 Token::Value token;
Steve Blocka7e24c12009-10-30 11:49:00 +0000776 do {
777 // Remember the position of the next token
778 next_.location.beg_pos = source_pos();
779
780 switch (c0_) {
781 case ' ':
782 case '\t':
783 Advance();
784 token = Token::WHITESPACE;
785 break;
786
787 case '\n':
788 Advance();
789 has_line_terminator_before_next_ = true;
790 token = Token::WHITESPACE;
791 break;
792
793 case '"': case '\'':
794 token = ScanString();
795 break;
796
797 case '<':
798 // < <= << <<= <!--
799 Advance();
800 if (c0_ == '=') {
801 token = Select(Token::LTE);
802 } else if (c0_ == '<') {
803 token = Select('=', Token::ASSIGN_SHL, Token::SHL);
804 } else if (c0_ == '!') {
805 token = ScanHtmlComment();
806 } else {
807 token = Token::LT;
808 }
809 break;
810
811 case '>':
812 // > >= >> >>= >>> >>>=
813 Advance();
814 if (c0_ == '=') {
815 token = Select(Token::GTE);
816 } else if (c0_ == '>') {
817 // >> >>= >>> >>>=
818 Advance();
819 if (c0_ == '=') {
820 token = Select(Token::ASSIGN_SAR);
821 } else if (c0_ == '>') {
822 token = Select('=', Token::ASSIGN_SHR, Token::SHR);
823 } else {
824 token = Token::SAR;
825 }
826 } else {
827 token = Token::GT;
828 }
829 break;
830
831 case '=':
832 // = == ===
833 Advance();
834 if (c0_ == '=') {
835 token = Select('=', Token::EQ_STRICT, Token::EQ);
836 } else {
837 token = Token::ASSIGN;
838 }
839 break;
840
841 case '!':
842 // ! != !==
843 Advance();
844 if (c0_ == '=') {
845 token = Select('=', Token::NE_STRICT, Token::NE);
846 } else {
847 token = Token::NOT;
848 }
849 break;
850
851 case '+':
852 // + ++ +=
853 Advance();
854 if (c0_ == '+') {
855 token = Select(Token::INC);
856 } else if (c0_ == '=') {
857 token = Select(Token::ASSIGN_ADD);
858 } else {
859 token = Token::ADD;
860 }
861 break;
862
863 case '-':
864 // - -- --> -=
865 Advance();
866 if (c0_ == '-') {
867 Advance();
868 if (c0_ == '>' && has_line_terminator_before_next_) {
869 // For compatibility with SpiderMonkey, we skip lines that
870 // start with an HTML comment end '-->'.
871 token = SkipSingleLineComment();
872 } else {
873 token = Token::DEC;
874 }
875 } else if (c0_ == '=') {
876 token = Select(Token::ASSIGN_SUB);
877 } else {
878 token = Token::SUB;
879 }
880 break;
881
882 case '*':
883 // * *=
884 token = Select('=', Token::ASSIGN_MUL, Token::MUL);
885 break;
886
887 case '%':
888 // % %=
889 token = Select('=', Token::ASSIGN_MOD, Token::MOD);
890 break;
891
892 case '/':
893 // / // /* /=
894 Advance();
895 if (c0_ == '/') {
896 token = SkipSingleLineComment();
897 } else if (c0_ == '*') {
898 token = SkipMultiLineComment();
899 } else if (c0_ == '=') {
900 token = Select(Token::ASSIGN_DIV);
901 } else {
902 token = Token::DIV;
903 }
904 break;
905
906 case '&':
907 // & && &=
908 Advance();
909 if (c0_ == '&') {
910 token = Select(Token::AND);
911 } else if (c0_ == '=') {
912 token = Select(Token::ASSIGN_BIT_AND);
913 } else {
914 token = Token::BIT_AND;
915 }
916 break;
917
918 case '|':
919 // | || |=
920 Advance();
921 if (c0_ == '|') {
922 token = Select(Token::OR);
923 } else if (c0_ == '=') {
924 token = Select(Token::ASSIGN_BIT_OR);
925 } else {
926 token = Token::BIT_OR;
927 }
928 break;
929
930 case '^':
931 // ^ ^=
932 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
933 break;
934
935 case '.':
936 // . Number
937 Advance();
938 if (IsDecimalDigit(c0_)) {
939 token = ScanNumber(true);
940 } else {
941 token = Token::PERIOD;
942 }
943 break;
944
945 case ':':
946 token = Select(Token::COLON);
947 break;
948
949 case ';':
950 token = Select(Token::SEMICOLON);
951 break;
952
953 case ',':
954 token = Select(Token::COMMA);
955 break;
956
957 case '(':
958 token = Select(Token::LPAREN);
959 break;
960
961 case ')':
962 token = Select(Token::RPAREN);
963 break;
964
965 case '[':
966 token = Select(Token::LBRACK);
967 break;
968
969 case ']':
970 token = Select(Token::RBRACK);
971 break;
972
973 case '{':
974 token = Select(Token::LBRACE);
975 break;
976
977 case '}':
978 token = Select(Token::RBRACE);
979 break;
980
981 case '?':
982 token = Select(Token::CONDITIONAL);
983 break;
984
985 case '~':
986 token = Select(Token::BIT_NOT);
987 break;
988
989 default:
990 if (kIsIdentifierStart.get(c0_)) {
991 token = ScanIdentifier();
992 } else if (IsDecimalDigit(c0_)) {
993 token = ScanNumber(false);
994 } else if (SkipWhiteSpace()) {
995 token = Token::WHITESPACE;
996 } else if (c0_ < 0) {
997 token = Token::EOS;
998 } else {
999 token = Select(Token::ILLEGAL);
1000 }
1001 break;
1002 }
1003
1004 // Continue scanning for tokens as long as we're just skipping
1005 // whitespace.
1006 } while (token == Token::WHITESPACE);
1007
1008 next_.location.end_pos = source_pos();
1009 next_.token = token;
1010}
1011
1012
1013void Scanner::SeekForward(int pos) {
1014 source_->SeekForward(pos - 1);
1015 Advance();
Iain Merrick9ac36c92010-09-13 15:29:50 +01001016 // This function is only called to seek to the location
1017 // of the end of a function (at the "}" token). It doesn't matter
1018 // whether there was a line terminator in the part we skip.
1019 has_line_terminator_before_next_ = false;
Steve Blocka7e24c12009-10-30 11:49:00 +00001020 Scan();
1021}
1022
1023
1024uc32 Scanner::ScanHexEscape(uc32 c, int length) {
1025 ASSERT(length <= 4); // prevent overflow
1026
1027 uc32 digits[4];
1028 uc32 x = 0;
1029 for (int i = 0; i < length; i++) {
1030 digits[i] = c0_;
1031 int d = HexValue(c0_);
1032 if (d < 0) {
1033 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
1034 // should be illegal, but other JS VMs just return the
1035 // non-escaped version of the original character.
1036
1037 // Push back digits read, except the last one (in c0_).
1038 for (int j = i-1; j >= 0; j--) {
1039 PushBack(digits[j]);
1040 }
1041 // Notice: No handling of error - treat it as "\u"->"u".
1042 return c;
1043 }
1044 x = x * 16 + d;
1045 Advance();
1046 }
1047
1048 return x;
1049}
1050
1051
1052// Octal escapes of the forms '\0xx' and '\xxx' are not a part of
1053// ECMA-262. Other JS VMs support them.
1054uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
1055 uc32 x = c - '0';
1056 for (int i = 0; i < length; i++) {
1057 int d = c0_ - '0';
1058 if (d < 0 || d > 7) break;
1059 int nx = x * 8 + d;
1060 if (nx >= 256) break;
1061 x = nx;
1062 Advance();
1063 }
1064 return x;
1065}
1066
1067
1068void Scanner::ScanEscape() {
1069 uc32 c = c0_;
1070 Advance();
1071
1072 // Skip escaped newlines.
1073 if (kIsLineTerminator.get(c)) {
1074 // Allow CR+LF newlines in multiline string literals.
1075 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
1076 // Allow LF+CR newlines in multiline string literals.
1077 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
1078 return;
1079 }
1080
1081 switch (c) {
1082 case '\'': // fall through
1083 case '"' : // fall through
1084 case '\\': break;
1085 case 'b' : c = '\b'; break;
1086 case 'f' : c = '\f'; break;
1087 case 'n' : c = '\n'; break;
1088 case 'r' : c = '\r'; break;
1089 case 't' : c = '\t'; break;
1090 case 'u' : c = ScanHexEscape(c, 4); break;
1091 case 'v' : c = '\v'; break;
1092 case 'x' : c = ScanHexEscape(c, 2); break;
1093 case '0' : // fall through
1094 case '1' : // fall through
1095 case '2' : // fall through
1096 case '3' : // fall through
1097 case '4' : // fall through
1098 case '5' : // fall through
1099 case '6' : // fall through
1100 case '7' : c = ScanOctalEscape(c, 2); break;
1101 }
1102
1103 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
1104 // should be illegal, but they are commonly handled
1105 // as non-escaped characters by JS VMs.
1106 AddChar(c);
1107}
1108
1109
1110Token::Value Scanner::ScanString() {
1111 uc32 quote = c0_;
1112 Advance(); // consume quote
1113
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001114 LiteralScope literal(this);
Steve Blocka7e24c12009-10-30 11:49:00 +00001115 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
1116 uc32 c = c0_;
1117 Advance();
1118 if (c == '\\') {
1119 if (c0_ < 0) return Token::ILLEGAL;
1120 ScanEscape();
1121 } else {
1122 AddChar(c);
1123 }
1124 }
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001125 if (c0_ != quote) return Token::ILLEGAL;
1126 literal.Complete();
Steve Blocka7e24c12009-10-30 11:49:00 +00001127
1128 Advance(); // consume quote
1129 return Token::STRING;
1130}
1131
1132
1133Token::Value Scanner::Select(Token::Value tok) {
1134 Advance();
1135 return tok;
1136}
1137
1138
1139Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {
1140 Advance();
1141 if (c0_ == next) {
1142 Advance();
1143 return then;
1144 } else {
1145 return else_;
1146 }
1147}
1148
1149
1150// Returns true if any decimal digits were scanned, returns false otherwise.
1151void Scanner::ScanDecimalDigits() {
1152 while (IsDecimalDigit(c0_))
1153 AddCharAdvance();
1154}
1155
1156
1157Token::Value Scanner::ScanNumber(bool seen_period) {
1158 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
1159
1160 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
1161
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001162 LiteralScope literal(this);
Steve Blocka7e24c12009-10-30 11:49:00 +00001163 if (seen_period) {
1164 // we have already seen a decimal point of the float
1165 AddChar('.');
1166 ScanDecimalDigits(); // we know we have at least one digit
1167
1168 } else {
1169 // if the first character is '0' we must check for octals and hex
1170 if (c0_ == '0') {
1171 AddCharAdvance();
1172
1173 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
1174 if (c0_ == 'x' || c0_ == 'X') {
1175 // hex number
1176 kind = HEX;
1177 AddCharAdvance();
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001178 if (!IsHexDigit(c0_)) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001179 // we must have at least one hex digit after 'x'/'X'
1180 return Token::ILLEGAL;
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001181 }
1182 while (IsHexDigit(c0_)) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001183 AddCharAdvance();
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001184 }
Steve Blocka7e24c12009-10-30 11:49:00 +00001185 } else if ('0' <= c0_ && c0_ <= '7') {
1186 // (possible) octal number
1187 kind = OCTAL;
1188 while (true) {
1189 if (c0_ == '8' || c0_ == '9') {
1190 kind = DECIMAL;
1191 break;
1192 }
1193 if (c0_ < '0' || '7' < c0_) break;
1194 AddCharAdvance();
1195 }
1196 }
1197 }
1198
1199 // Parse decimal digits and allow trailing fractional part.
1200 if (kind == DECIMAL) {
1201 ScanDecimalDigits(); // optional
1202 if (c0_ == '.') {
1203 AddCharAdvance();
1204 ScanDecimalDigits(); // optional
1205 }
1206 }
1207 }
1208
1209 // scan exponent, if any
1210 if (c0_ == 'e' || c0_ == 'E') {
1211 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
1212 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
1213 // scan exponent
1214 AddCharAdvance();
1215 if (c0_ == '+' || c0_ == '-')
1216 AddCharAdvance();
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001217 if (!IsDecimalDigit(c0_)) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001218 // we must have at least one decimal digit after 'e'/'E'
1219 return Token::ILLEGAL;
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001220 }
Steve Blocka7e24c12009-10-30 11:49:00 +00001221 ScanDecimalDigits();
1222 }
Steve Blocka7e24c12009-10-30 11:49:00 +00001223
1224 // The source character immediately following a numeric literal must
1225 // not be an identifier start or a decimal digit; see ECMA-262
1226 // section 7.8.3, page 17 (note that we read only one decimal digit
1227 // if the value is 0).
1228 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
1229 return Token::ILLEGAL;
1230
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001231 literal.Complete();
1232
Steve Blocka7e24c12009-10-30 11:49:00 +00001233 return Token::NUMBER;
1234}
1235
1236
1237uc32 Scanner::ScanIdentifierUnicodeEscape() {
1238 Advance();
1239 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
1240 Advance();
1241 uc32 c = ScanHexEscape('u', 4);
1242 // We do not allow a unicode escape sequence to start another
1243 // unicode escape sequence.
1244 if (c == '\\') return unibrow::Utf8::kBadChar;
1245 return c;
1246}
1247
1248
1249Token::Value Scanner::ScanIdentifier() {
1250 ASSERT(kIsIdentifierStart.get(c0_));
Steve Blocka7e24c12009-10-30 11:49:00 +00001251
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001252 LiteralScope literal(this);
Steve Blockd0582a62009-12-15 09:54:21 +00001253 KeywordMatcher keyword_match;
1254
Steve Blocka7e24c12009-10-30 11:49:00 +00001255 // Scan identifier start character.
1256 if (c0_ == '\\') {
Steve Blocka7e24c12009-10-30 11:49:00 +00001257 uc32 c = ScanIdentifierUnicodeEscape();
1258 // Only allow legal identifier start characters.
1259 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
1260 AddChar(c);
Steve Blockd0582a62009-12-15 09:54:21 +00001261 keyword_match.Fail();
Steve Blocka7e24c12009-10-30 11:49:00 +00001262 } else {
1263 AddChar(c0_);
Steve Blockd0582a62009-12-15 09:54:21 +00001264 keyword_match.AddChar(c0_);
Steve Blocka7e24c12009-10-30 11:49:00 +00001265 Advance();
1266 }
1267
1268 // Scan the rest of the identifier characters.
1269 while (kIsIdentifierPart.get(c0_)) {
1270 if (c0_ == '\\') {
Steve Blocka7e24c12009-10-30 11:49:00 +00001271 uc32 c = ScanIdentifierUnicodeEscape();
1272 // Only allow legal identifier part characters.
1273 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
1274 AddChar(c);
Steve Blockd0582a62009-12-15 09:54:21 +00001275 keyword_match.Fail();
Steve Blocka7e24c12009-10-30 11:49:00 +00001276 } else {
1277 AddChar(c0_);
Steve Blockd0582a62009-12-15 09:54:21 +00001278 keyword_match.AddChar(c0_);
Steve Blocka7e24c12009-10-30 11:49:00 +00001279 Advance();
1280 }
1281 }
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001282 literal.Complete();
Steve Blocka7e24c12009-10-30 11:49:00 +00001283
Steve Blockd0582a62009-12-15 09:54:21 +00001284 return keyword_match.token();
Steve Blocka7e24c12009-10-30 11:49:00 +00001285}
1286
1287
1288
1289bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
1290 // Checks whether the buffer contains an identifier (no escape).
1291 if (!buffer->has_more()) return false;
1292 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
1293 while (buffer->has_more()) {
1294 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
1295 }
1296 return true;
1297}
1298
1299
1300bool Scanner::ScanRegExpPattern(bool seen_equal) {
1301 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1302 bool in_character_class = false;
1303
1304 // Previous token is either '/' or '/=', in the second case, the
1305 // pattern starts at =.
1306 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1307 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1308
1309 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1310 // the scanner should pass uninterpreted bodies to the RegExp
1311 // constructor.
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001312 LiteralScope literal(this);
Steve Blocka7e24c12009-10-30 11:49:00 +00001313 if (seen_equal)
1314 AddChar('=');
1315
1316 while (c0_ != '/' || in_character_class) {
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001317 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
Steve Blocka7e24c12009-10-30 11:49:00 +00001318 if (c0_ == '\\') { // escaped character
1319 AddCharAdvance();
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001320 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
Steve Blocka7e24c12009-10-30 11:49:00 +00001321 AddCharAdvance();
1322 } else { // unescaped character
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001323 if (c0_ == '[') in_character_class = true;
1324 if (c0_ == ']') in_character_class = false;
Steve Blocka7e24c12009-10-30 11:49:00 +00001325 AddCharAdvance();
1326 }
1327 }
1328 Advance(); // consume '/'
1329
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001330 literal.Complete();
Steve Blocka7e24c12009-10-30 11:49:00 +00001331
1332 return true;
1333}
1334
1335bool Scanner::ScanRegExpFlags() {
1336 // Scan regular expression flags.
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001337 LiteralScope literal(this);
Steve Blocka7e24c12009-10-30 11:49:00 +00001338 while (kIsIdentifierPart.get(c0_)) {
1339 if (c0_ == '\\') {
1340 uc32 c = ScanIdentifierUnicodeEscape();
1341 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
1342 // We allow any escaped character, unlike the restriction on
1343 // IdentifierPart when it is used to build an IdentifierName.
1344 AddChar(c);
1345 continue;
1346 }
1347 }
1348 AddCharAdvance();
1349 }
Kristian Monsen80d68ea2010-09-08 11:05:35 +01001350 literal.Complete();
Steve Blocka7e24c12009-10-30 11:49:00 +00001351
1352 next_.location.end_pos = source_pos() - 1;
1353 return true;
1354}
1355
1356} } // namespace v8::internal