blob: 8ee40385a6e19cc53bf0770be3cb7ac8d779c61d [file] [log] [blame]
Steve Blockd0582a62009-12-15 09:54:21 +00001// Copyright 2006-2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include <stdlib.h>
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080029#include <stdio.h>
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080030#include <string.h>
Steve Blockd0582a62009-12-15 09:54:21 +000031
32#include "v8.h"
33
34#include "token.h"
35#include "scanner.h"
Iain Merrick9ac36c92010-09-13 15:29:50 +010036#include "parser.h"
Steve Blockd0582a62009-12-15 09:54:21 +000037#include "utils.h"
Iain Merrick9ac36c92010-09-13 15:29:50 +010038#include "execution.h"
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080039#include "preparser.h"
Steve Blockd0582a62009-12-15 09:54:21 +000040#include "cctest.h"
41
42namespace i = ::v8::internal;
43
44TEST(KeywordMatcher) {
45 struct KeywordToken {
46 const char* keyword;
47 i::Token::Value token;
48 };
49
50 static const KeywordToken keywords[] = {
51#define KEYWORD(t, s, d) { s, i::Token::t },
52#define IGNORE(t, s, d) /* */
53 TOKEN_LIST(IGNORE, KEYWORD, IGNORE)
54#undef KEYWORD
55 { NULL, i::Token::IDENTIFIER }
56 };
57
58 static const char* future_keywords[] = {
59#define FUTURE(t, s, d) s,
60 TOKEN_LIST(IGNORE, IGNORE, FUTURE)
61#undef FUTURE
62#undef IGNORE
63 NULL
64 };
65
66 KeywordToken key_token;
67 for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
68 i::KeywordMatcher matcher;
69 const char* keyword = key_token.keyword;
70 int length = i::StrLength(keyword);
71 for (int j = 0; j < length; j++) {
72 if (key_token.token == i::Token::INSTANCEOF && j == 2) {
73 // "in" is a prefix of "instanceof". It's the only keyword
74 // that is a prefix of another.
75 CHECK_EQ(i::Token::IN, matcher.token());
76 } else {
77 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
78 }
79 matcher.AddChar(keyword[j]);
80 }
81 CHECK_EQ(key_token.token, matcher.token());
82 // Adding more characters will make keyword matching fail.
83 matcher.AddChar('z');
84 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
85 // Adding a keyword later will not make it match again.
86 matcher.AddChar('i');
87 matcher.AddChar('f');
88 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
89 }
90
91 // Future keywords are not recognized.
92 const char* future_keyword;
93 for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) {
94 i::KeywordMatcher matcher;
95 int length = i::StrLength(future_keyword);
96 for (int j = 0; j < length; j++) {
97 matcher.AddChar(future_keyword[j]);
98 }
99 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
100 }
101
102 // Zero isn't ignored at first.
103 i::KeywordMatcher bad_start;
104 bad_start.AddChar(0);
105 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
106 bad_start.AddChar('i');
107 bad_start.AddChar('f');
108 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
109
110 // Zero isn't ignored at end.
111 i::KeywordMatcher bad_end;
112 bad_end.AddChar('i');
113 bad_end.AddChar('f');
114 CHECK_EQ(i::Token::IF, bad_end.token());
115 bad_end.AddChar(0);
116 CHECK_EQ(i::Token::IDENTIFIER, bad_end.token());
117
118 // Case isn't ignored.
119 i::KeywordMatcher bad_case;
120 bad_case.AddChar('i');
121 bad_case.AddChar('F');
122 CHECK_EQ(i::Token::IDENTIFIER, bad_case.token());
123
124 // If we mark it as failure, continuing won't help.
125 i::KeywordMatcher full_stop;
126 full_stop.AddChar('i');
127 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
128 full_stop.Fail();
129 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
130 full_stop.AddChar('f');
131 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
132}
133
Iain Merrick9ac36c92010-09-13 15:29:50 +0100134
135TEST(ScanHTMLEndComments) {
136 // Regression test. See:
137 // http://code.google.com/p/chromium/issues/detail?id=53548
138 // Tests that --> is correctly interpreted as comment-to-end-of-line if there
139 // is only whitespace before it on the line, even after a multiline-comment
140 // comment. This was not the case if it occurred before the first real token
141 // in the input.
142 const char* tests[] = {
143 // Before first real token.
144 "--> is eol-comment\nvar y = 37;\n",
145 "\n --> is eol-comment\nvar y = 37;\n",
146 "/* precomment */ --> is eol-comment\nvar y = 37;\n",
147 "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
148 // After first real token.
149 "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
150 "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
151 NULL
152 };
153
154 // Parser/Scanner needs a stack limit.
155 int marker;
156 i::StackGuard::SetStackLimit(
157 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
158
159 for (int i = 0; tests[i]; i++) {
160 v8::ScriptData* data =
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100161 v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i]));
Iain Merrick9ac36c92010-09-13 15:29:50 +0100162 CHECK(data != NULL && !data->HasError());
163 delete data;
164 }
165}
166
167
168class ScriptResource : public v8::String::ExternalAsciiStringResource {
169 public:
170 ScriptResource(const char* data, size_t length)
171 : data_(data), length_(length) { }
172
173 const char* data() const { return data_; }
174 size_t length() const { return length_; }
175
176 private:
177 const char* data_;
178 size_t length_;
179};
180
181
182TEST(Preparsing) {
183 v8::HandleScope handles;
184 v8::Persistent<v8::Context> context = v8::Context::New();
185 v8::Context::Scope context_scope(context);
186 int marker;
187 i::StackGuard::SetStackLimit(
188 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
189
190 // Source containing functions that might be lazily compiled and all types
191 // of symbols (string, propertyName, regexp).
192 const char* source =
193 "var x = 42;"
194 "function foo(a) { return function nolazy(b) { return a + b; } }"
195 "function bar(a) { if (a) return function lazy(b) { return b; } }"
196 "var z = {'string': 'string literal', bareword: 'propertyName', "
197 " 42: 'number literal', for: 'keyword as propertyName', "
198 " f\\u006fr: 'keyword propertyname with escape'};"
199 "var v = /RegExp Literal/;"
200 "var w = /RegExp Literal\\u0020With Escape/gin;"
201 "var y = { get getter() { return 42; }, "
202 " set setter(v) { this.value = v; }};";
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100203 int source_length = i::StrLength(source);
Iain Merrick9ac36c92010-09-13 15:29:50 +0100204 const char* error_source = "var x = y z;";
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100205 int error_source_length = i::StrLength(error_source);
Iain Merrick9ac36c92010-09-13 15:29:50 +0100206
207 v8::ScriptData* preparse =
208 v8::ScriptData::PreCompile(source, source_length);
209 CHECK(!preparse->HasError());
210 bool lazy_flag = i::FLAG_lazy;
211 {
212 i::FLAG_lazy = true;
213 ScriptResource* resource = new ScriptResource(source, source_length);
214 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
215 v8::Script::Compile(script_source, NULL, preparse);
216 }
217
218 {
219 i::FLAG_lazy = false;
220
221 ScriptResource* resource = new ScriptResource(source, source_length);
222 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
223 v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
224 }
225 delete preparse;
226 i::FLAG_lazy = lazy_flag;
227
228 // Syntax error.
229 v8::ScriptData* error_preparse =
230 v8::ScriptData::PreCompile(error_source, error_source_length);
231 CHECK(error_preparse->HasError());
232 i::ScriptDataImpl *pre_impl =
233 reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
234 i::Scanner::Location error_location =
235 pre_impl->MessageLocation();
236 // Error is at "z" in source, location 10..11.
237 CHECK_EQ(10, error_location.beg_pos);
238 CHECK_EQ(11, error_location.end_pos);
239 // Should not crash.
240 const char* message = pre_impl->BuildMessage();
241 i::Vector<const char*> args = pre_impl->BuildArgs();
242 CHECK_GT(strlen(message), 0);
243}
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800244
245
246TEST(StandAlonePreParser) {
247 int marker;
248 i::StackGuard::SetStackLimit(
249 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
250
251 const char* programs[] = {
252 "{label: 42}",
253 "var x = 42;",
254 "function foo(x, y) { return x + y; }",
255 "native function foo(); return %ArgleBargle(glop);",
256 "var x = new new Function('this.x = 42');",
257 NULL
258 };
259
Ben Murdochb0fe1622011-05-05 13:52:32 +0100260 uintptr_t stack_limit = i::StackGuard::real_climit();
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800261 for (int i = 0; programs[i]; i++) {
262 const char* program = programs[i];
Ben Murdochb0fe1622011-05-05 13:52:32 +0100263 i::Utf8ToUC16CharacterStream stream(
264 reinterpret_cast<const i::byte*>(program),
265 static_cast<unsigned>(strlen(program)));
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800266 i::CompleteParserRecorder log;
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800267 i::V8JavaScriptScanner scanner;
Ben Murdochb0fe1622011-05-05 13:52:32 +0100268 scanner.Initialize(&stream);
269
270 v8::preparser::PreParser::PreParseResult result =
271 v8::preparser::PreParser::PreParseProgram(&scanner,
272 &log,
273 true,
274 stack_limit);
275 CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result);
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800276 i::ScriptDataImpl data(log.ExtractData());
277 CHECK(!data.has_error());
278 }
279}
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800280
281
282TEST(RegressChromium62639) {
283 int marker;
284 i::StackGuard::SetStackLimit(
285 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
286
287 const char* program = "var x = 'something';\n"
288 "escape: function() {}";
289 // Fails parsing expecting an identifier after "function".
290 // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
291 // and then used the invalid currently scanned literal. This always
292 // failed in debug mode, and sometimes crashed in release mode.
293
Ben Murdochb0fe1622011-05-05 13:52:32 +0100294 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
295 static_cast<unsigned>(strlen(program)));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800296 i::ScriptDataImpl* data =
Ben Murdochb0fe1622011-05-05 13:52:32 +0100297 i::ParserApi::PreParse(&stream, NULL);
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800298 CHECK(data->HasError());
299 delete data;
300}
301
302
303TEST(Regress928) {
304 // Preparsing didn't consider the catch clause of a try statement
305 // as with-content, which made it assume that a function inside
306 // the block could be lazily compiled, and an extra, unexpected,
307 // entry was added to the data.
308 int marker;
309 i::StackGuard::SetStackLimit(
310 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
311
312 const char* program =
313 "try { } catch (e) { var foo = function () { /* first */ } }"
314 "var bar = function () { /* second */ }";
315
Ben Murdochb0fe1622011-05-05 13:52:32 +0100316 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
317 static_cast<unsigned>(strlen(program)));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800318 i::ScriptDataImpl* data =
Ben Murdochb0fe1622011-05-05 13:52:32 +0100319 i::ParserApi::PartialPreParse(&stream, NULL);
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800320 CHECK(!data->HasError());
321
322 data->Initialize();
323
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100324 int first_function =
325 static_cast<int>(strstr(program, "function") - program);
326 int first_lbrace = first_function + static_cast<int>(strlen("function () "));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800327 CHECK_EQ('{', program[first_lbrace]);
328 i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
329 CHECK(!entry1.is_valid());
330
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100331 int second_function =
332 static_cast<int>(strstr(program + first_lbrace, "function") - program);
333 int second_lbrace =
334 second_function + static_cast<int>(strlen("function () "));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800335 CHECK_EQ('{', program[second_lbrace]);
336 i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
337 CHECK(entry2.is_valid());
338 CHECK_EQ('}', program[entry2.end_pos() - 1]);
339 delete data;
340}
Ben Murdochb0fe1622011-05-05 13:52:32 +0100341
342
343TEST(PreParseOverflow) {
344 int marker;
345 i::StackGuard::SetStackLimit(
346 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
347
348 size_t kProgramSize = 1024 * 1024;
349 i::SmartPointer<char> program(
350 reinterpret_cast<char*>(malloc(kProgramSize + 1)));
351 memset(*program, '(', kProgramSize);
352 program[kProgramSize] = '\0';
353
354 uintptr_t stack_limit = i::StackGuard::real_climit();
355
356 i::Utf8ToUC16CharacterStream stream(
357 reinterpret_cast<const i::byte*>(*program),
358 static_cast<unsigned>(kProgramSize));
359 i::CompleteParserRecorder log;
360 i::V8JavaScriptScanner scanner;
361 scanner.Initialize(&stream);
362
363
364 v8::preparser::PreParser::PreParseResult result =
365 v8::preparser::PreParser::PreParseProgram(&scanner,
366 &log,
367 true,
368 stack_limit);
369 CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
370}
371
372
373class TestExternalResource: public v8::String::ExternalStringResource {
374 public:
375 explicit TestExternalResource(uint16_t* data, int length)
376 : data_(data), length_(static_cast<size_t>(length)) { }
377
378 ~TestExternalResource() { }
379
380 const uint16_t* data() const {
381 return data_;
382 }
383
384 size_t length() const {
385 return length_;
386 }
387 private:
388 uint16_t* data_;
389 size_t length_;
390};
391
392
393#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
394
395void TestCharacterStream(const char* ascii_source,
396 unsigned length,
397 unsigned start = 0,
398 unsigned end = 0) {
399 if (end == 0) end = length;
400 unsigned sub_length = end - start;
401 i::HandleScope test_scope;
402 i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
403 for (unsigned i = 0; i < length; i++) {
404 uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
405 }
406 i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
407 i::Handle<i::String> ascii_string(
408 i::Factory::NewStringFromAscii(ascii_vector));
409 TestExternalResource resource(*uc16_buffer, length);
410 i::Handle<i::String> uc16_string(
411 i::Factory::NewExternalStringFromTwoByte(&resource));
412
413 i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
414 i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
415 i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
416 i::Utf8ToUC16CharacterStream utf8_stream(
417 reinterpret_cast<const i::byte*>(ascii_source), end);
418 utf8_stream.SeekForward(start);
419
420 unsigned i = start;
421 while (i < end) {
422 // Read streams one char at a time
423 CHECK_EQU(i, uc16_stream.pos());
424 CHECK_EQU(i, string_stream.pos());
425 CHECK_EQU(i, utf8_stream.pos());
426 int32_t c0 = ascii_source[i];
427 int32_t c1 = uc16_stream.Advance();
428 int32_t c2 = string_stream.Advance();
429 int32_t c3 = utf8_stream.Advance();
430 i++;
431 CHECK_EQ(c0, c1);
432 CHECK_EQ(c0, c2);
433 CHECK_EQ(c0, c3);
434 CHECK_EQU(i, uc16_stream.pos());
435 CHECK_EQU(i, string_stream.pos());
436 CHECK_EQU(i, utf8_stream.pos());
437 }
438 while (i > start + sub_length / 4) {
439 // Pushback, re-read, pushback again.
440 int32_t c0 = ascii_source[i - 1];
441 CHECK_EQU(i, uc16_stream.pos());
442 CHECK_EQU(i, string_stream.pos());
443 CHECK_EQU(i, utf8_stream.pos());
444 uc16_stream.PushBack(c0);
445 string_stream.PushBack(c0);
446 utf8_stream.PushBack(c0);
447 i--;
448 CHECK_EQU(i, uc16_stream.pos());
449 CHECK_EQU(i, string_stream.pos());
450 CHECK_EQU(i, utf8_stream.pos());
451 int32_t c1 = uc16_stream.Advance();
452 int32_t c2 = string_stream.Advance();
453 int32_t c3 = utf8_stream.Advance();
454 i++;
455 CHECK_EQU(i, uc16_stream.pos());
456 CHECK_EQU(i, string_stream.pos());
457 CHECK_EQU(i, utf8_stream.pos());
458 CHECK_EQ(c0, c1);
459 CHECK_EQ(c0, c2);
460 CHECK_EQ(c0, c3);
461 uc16_stream.PushBack(c0);
462 string_stream.PushBack(c0);
463 utf8_stream.PushBack(c0);
464 i--;
465 CHECK_EQU(i, uc16_stream.pos());
466 CHECK_EQU(i, string_stream.pos());
467 CHECK_EQU(i, utf8_stream.pos());
468 }
469 unsigned halfway = start + sub_length / 2;
470 uc16_stream.SeekForward(halfway - i);
471 string_stream.SeekForward(halfway - i);
472 utf8_stream.SeekForward(halfway - i);
473 i = halfway;
474 CHECK_EQU(i, uc16_stream.pos());
475 CHECK_EQU(i, string_stream.pos());
476 CHECK_EQU(i, utf8_stream.pos());
477
478 while (i < end) {
479 // Read streams one char at a time
480 CHECK_EQU(i, uc16_stream.pos());
481 CHECK_EQU(i, string_stream.pos());
482 CHECK_EQU(i, utf8_stream.pos());
483 int32_t c0 = ascii_source[i];
484 int32_t c1 = uc16_stream.Advance();
485 int32_t c2 = string_stream.Advance();
486 int32_t c3 = utf8_stream.Advance();
487 i++;
488 CHECK_EQ(c0, c1);
489 CHECK_EQ(c0, c2);
490 CHECK_EQ(c0, c3);
491 CHECK_EQU(i, uc16_stream.pos());
492 CHECK_EQU(i, string_stream.pos());
493 CHECK_EQU(i, utf8_stream.pos());
494 }
495
496 int32_t c1 = uc16_stream.Advance();
497 int32_t c2 = string_stream.Advance();
498 int32_t c3 = utf8_stream.Advance();
499 CHECK_LT(c1, 0);
500 CHECK_LT(c2, 0);
501 CHECK_LT(c3, 0);
502}
503
504
505TEST(CharacterStreams) {
506 v8::HandleScope handles;
507 v8::Persistent<v8::Context> context = v8::Context::New();
508 v8::Context::Scope context_scope(context);
509
510 TestCharacterStream("abc\0\n\r\x7f", 7);
511 static const unsigned kBigStringSize = 4096;
512 char buffer[kBigStringSize + 1];
513 for (unsigned i = 0; i < kBigStringSize; i++) {
514 buffer[i] = static_cast<char>(i & 0x7f);
515 }
516 TestCharacterStream(buffer, kBigStringSize);
517
518 TestCharacterStream(buffer, kBigStringSize, 576, 3298);
519
520 TestCharacterStream("\0", 1);
521 TestCharacterStream("", 0);
522}
523
524
525TEST(Utf8CharacterStream) {
526 static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
527 static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
528
529 static const int kAllUtf8CharsSize =
530 (unibrow::Utf8::kMaxOneByteChar + 1) +
531 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
532 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
533 static const unsigned kAllUtf8CharsSizeU =
534 static_cast<unsigned>(kAllUtf8CharsSize);
535
536 char buffer[kAllUtf8CharsSizeU];
537 unsigned cursor = 0;
538 for (int i = 0; i <= kMaxUC16Char; i++) {
539 cursor += unibrow::Utf8::Encode(buffer + cursor, i);
540 }
541 ASSERT(cursor == kAllUtf8CharsSizeU);
542
543 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
544 kAllUtf8CharsSizeU);
545 for (int i = 0; i <= kMaxUC16Char; i++) {
546 CHECK_EQU(i, stream.pos());
547 int32_t c = stream.Advance();
548 CHECK_EQ(i, c);
549 CHECK_EQU(i + 1, stream.pos());
550 }
551 for (int i = kMaxUC16Char; i >= 0; i--) {
552 CHECK_EQU(i + 1, stream.pos());
553 stream.PushBack(i);
554 CHECK_EQU(i, stream.pos());
555 }
556 int i = 0;
557 while (stream.pos() < kMaxUC16CharU) {
558 CHECK_EQU(i, stream.pos());
559 unsigned progress = stream.SeekForward(12);
560 i += progress;
561 int32_t c = stream.Advance();
562 if (i <= kMaxUC16Char) {
563 CHECK_EQ(i, c);
564 } else {
565 CHECK_EQ(-1, c);
566 }
567 i += 1;
568 CHECK_EQU(i, stream.pos());
569 }
570}
571
572#undef CHECK_EQU
573
574void TestStreamScanner(i::UC16CharacterStream* stream,
575 i::Token::Value* expected_tokens,
576 int skip_pos = 0, // Zero means not skipping.
577 int skip_to = 0) {
578 i::V8JavaScriptScanner scanner;
Steve Block9fac8402011-05-12 15:51:54 +0100579 scanner.Initialize(stream);
Ben Murdochb0fe1622011-05-05 13:52:32 +0100580
581 int i = 0;
582 do {
583 i::Token::Value expected = expected_tokens[i];
584 i::Token::Value actual = scanner.Next();
585 CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
586 if (scanner.location().end_pos == skip_pos) {
587 scanner.SeekForward(skip_to);
588 }
589 i++;
590 } while (expected_tokens[i] != i::Token::ILLEGAL);
591}
592
593TEST(StreamScanner) {
594 const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
595 i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
596 static_cast<unsigned>(strlen(str1)));
597 i::Token::Value expectations1[] = {
598 i::Token::LBRACE,
599 i::Token::IDENTIFIER,
600 i::Token::IDENTIFIER,
601 i::Token::FOR,
602 i::Token::COLON,
603 i::Token::MUL,
604 i::Token::DIV,
605 i::Token::LT,
606 i::Token::SUB,
607 i::Token::IDENTIFIER,
608 i::Token::EOS,
609 i::Token::ILLEGAL
610 };
611 TestStreamScanner(&stream1, expectations1, 0, 0);
612
613 const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
614 i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
615 static_cast<unsigned>(strlen(str2)));
616 i::Token::Value expectations2[] = {
617 i::Token::CASE,
618 i::Token::DEFAULT,
619 i::Token::CONST,
620 i::Token::LBRACE,
621 // Skipped part here
622 i::Token::RBRACE,
623 i::Token::DO,
624 i::Token::EOS,
625 i::Token::ILLEGAL
626 };
627 ASSERT_EQ('{', str2[19]);
628 ASSERT_EQ('}', str2[37]);
629 TestStreamScanner(&stream2, expectations2, 20, 37);
630
631 const char* str3 = "{}}}}";
632 i::Token::Value expectations3[] = {
633 i::Token::LBRACE,
634 i::Token::RBRACE,
635 i::Token::RBRACE,
636 i::Token::RBRACE,
637 i::Token::RBRACE,
638 i::Token::EOS,
639 i::Token::ILLEGAL
640 };
641 // Skip zero-four RBRACEs.
642 for (int i = 0; i <= 4; i++) {
643 expectations3[6 - i] = i::Token::ILLEGAL;
644 expectations3[5 - i] = i::Token::EOS;
645 i::Utf8ToUC16CharacterStream stream3(
646 reinterpret_cast<const i::byte*>(str3),
647 static_cast<unsigned>(strlen(str3)));
648 TestStreamScanner(&stream3, expectations3, 1, 1 + i);
649 }
650}
Ben Murdoch086aeea2011-05-13 15:57:08 +0100651
652
653void TestScanRegExp(const char* re_source, const char* expected) {
654 i::Utf8ToUC16CharacterStream stream(
655 reinterpret_cast<const i::byte*>(re_source),
656 static_cast<unsigned>(strlen(re_source)));
657 i::V8JavaScriptScanner scanner;
658 scanner.Initialize(&stream);
659
660 i::Token::Value start = scanner.peek();
661 CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
662 CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
663 scanner.Next(); // Current token is now the regexp literal.
664 CHECK(scanner.is_literal_ascii());
665 i::Vector<const char> actual = scanner.literal_ascii_string();
666 for (int i = 0; i < actual.length(); i++) {
667 CHECK_NE('\0', expected[i]);
668 CHECK_EQ(expected[i], actual[i]);
669 }
670}
671
672
673TEST(RegExpScanning) {
674 // RegExp token with added garbage at the end. The scanner should only
675 // scan the RegExp until the terminating slash just before "flipperwald".
676 TestScanRegExp("/b/flipperwald", "b");
677 // Incomplete escape sequences doesn't hide the terminating slash.
678 TestScanRegExp("/\\x/flipperwald", "\\x");
679 TestScanRegExp("/\\u/flipperwald", "\\u");
680 TestScanRegExp("/\\u1/flipperwald", "\\u1");
681 TestScanRegExp("/\\u12/flipperwald", "\\u12");
682 TestScanRegExp("/\\u123/flipperwald", "\\u123");
683 TestScanRegExp("/\\c/flipperwald", "\\c");
684 TestScanRegExp("/\\c//flipperwald", "\\c");
685 // Slashes inside character classes are not terminating.
686 TestScanRegExp("/[/]/flipperwald", "[/]");
687 TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
688 // Incomplete escape sequences inside a character class doesn't hide
689 // the end of the character class.
690 TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
691 TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
692 TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
693 TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
694 TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
695 TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
696 TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
697 TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
698 // Escaped ']'s wont end the character class.
699 TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
700 // Escaped slashes are not terminating.
701 TestScanRegExp("/\\//flipperwald", "\\/");
702 // Starting with '=' works too.
703 TestScanRegExp("/=/", "=");
704 TestScanRegExp("/=?/", "=?");
705}