blob: 96a181d7a54002bcac03bfc29fba2045b0588e47 [file] [log] [blame]
Ben Murdoch8b112d22011-06-08 16:22:53 +01001// Copyright 2011 the V8 project authors. All rights reserved.
Steve Blockd0582a62009-12-15 09:54:21 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include <stdlib.h>
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080029#include <stdio.h>
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080030#include <string.h>
Steve Blockd0582a62009-12-15 09:54:21 +000031
32#include "v8.h"
33
Steve Block44f0eee2011-05-26 01:26:41 +010034#include "isolate.h"
Steve Blockd0582a62009-12-15 09:54:21 +000035#include "token.h"
36#include "scanner.h"
Iain Merrick9ac36c92010-09-13 15:29:50 +010037#include "parser.h"
Steve Blockd0582a62009-12-15 09:54:21 +000038#include "utils.h"
Iain Merrick9ac36c92010-09-13 15:29:50 +010039#include "execution.h"
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080040#include "preparser.h"
Steve Blockd0582a62009-12-15 09:54:21 +000041#include "cctest.h"
42
43namespace i = ::v8::internal;
44
45TEST(KeywordMatcher) {
46 struct KeywordToken {
47 const char* keyword;
48 i::Token::Value token;
49 };
50
51 static const KeywordToken keywords[] = {
52#define KEYWORD(t, s, d) { s, i::Token::t },
53#define IGNORE(t, s, d) /* */
54 TOKEN_LIST(IGNORE, KEYWORD, IGNORE)
55#undef KEYWORD
56 { NULL, i::Token::IDENTIFIER }
57 };
58
59 static const char* future_keywords[] = {
60#define FUTURE(t, s, d) s,
61 TOKEN_LIST(IGNORE, IGNORE, FUTURE)
62#undef FUTURE
63#undef IGNORE
64 NULL
65 };
66
67 KeywordToken key_token;
68 for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
69 i::KeywordMatcher matcher;
70 const char* keyword = key_token.keyword;
71 int length = i::StrLength(keyword);
72 for (int j = 0; j < length; j++) {
73 if (key_token.token == i::Token::INSTANCEOF && j == 2) {
74 // "in" is a prefix of "instanceof". It's the only keyword
75 // that is a prefix of another.
76 CHECK_EQ(i::Token::IN, matcher.token());
77 } else {
78 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
79 }
80 matcher.AddChar(keyword[j]);
81 }
82 CHECK_EQ(key_token.token, matcher.token());
83 // Adding more characters will make keyword matching fail.
84 matcher.AddChar('z');
85 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
86 // Adding a keyword later will not make it match again.
87 matcher.AddChar('i');
88 matcher.AddChar('f');
89 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
90 }
91
92 // Future keywords are not recognized.
93 const char* future_keyword;
94 for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) {
95 i::KeywordMatcher matcher;
96 int length = i::StrLength(future_keyword);
97 for (int j = 0; j < length; j++) {
98 matcher.AddChar(future_keyword[j]);
99 }
100 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
101 }
102
103 // Zero isn't ignored at first.
104 i::KeywordMatcher bad_start;
105 bad_start.AddChar(0);
106 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
107 bad_start.AddChar('i');
108 bad_start.AddChar('f');
109 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
110
111 // Zero isn't ignored at end.
112 i::KeywordMatcher bad_end;
113 bad_end.AddChar('i');
114 bad_end.AddChar('f');
115 CHECK_EQ(i::Token::IF, bad_end.token());
116 bad_end.AddChar(0);
117 CHECK_EQ(i::Token::IDENTIFIER, bad_end.token());
118
119 // Case isn't ignored.
120 i::KeywordMatcher bad_case;
121 bad_case.AddChar('i');
122 bad_case.AddChar('F');
123 CHECK_EQ(i::Token::IDENTIFIER, bad_case.token());
124
125 // If we mark it as failure, continuing won't help.
126 i::KeywordMatcher full_stop;
127 full_stop.AddChar('i');
128 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
129 full_stop.Fail();
130 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
131 full_stop.AddChar('f');
132 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
133}
134
Iain Merrick9ac36c92010-09-13 15:29:50 +0100135
136TEST(ScanHTMLEndComments) {
137 // Regression test. See:
138 // http://code.google.com/p/chromium/issues/detail?id=53548
139 // Tests that --> is correctly interpreted as comment-to-end-of-line if there
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000140 // is only whitespace before it on the line (with comments considered as
141 // whitespace, even a multiline-comment containing a newline).
142 // This was not the case if it occurred before the first real token
Iain Merrick9ac36c92010-09-13 15:29:50 +0100143 // in the input.
144 const char* tests[] = {
145 // Before first real token.
146 "--> is eol-comment\nvar y = 37;\n",
147 "\n --> is eol-comment\nvar y = 37;\n",
148 "/* precomment */ --> is eol-comment\nvar y = 37;\n",
149 "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
150 // After first real token.
151 "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
152 "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
153 NULL
154 };
155
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000156 const char* fail_tests[] = {
157 "x --> is eol-comment\nvar y = 37;\n",
158 "\"\\n\" --> is eol-comment\nvar y = 37;\n",
159 "x/* precomment */ --> is eol-comment\nvar y = 37;\n",
160 "x/* precomment\n */ --> is eol-comment\nvar y = 37;\n",
161 "var x = 42; --> is eol-comment\nvar y = 37;\n",
162 "var x = 42; /* precomment\n */ --> is eol-comment\nvar y = 37;\n",
163 NULL
164 };
165
Iain Merrick9ac36c92010-09-13 15:29:50 +0100166 // Parser/Scanner needs a stack limit.
167 int marker;
Steve Block44f0eee2011-05-26 01:26:41 +0100168 i::Isolate::Current()->stack_guard()->SetStackLimit(
Iain Merrick9ac36c92010-09-13 15:29:50 +0100169 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
170
171 for (int i = 0; tests[i]; i++) {
172 v8::ScriptData* data =
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100173 v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i]));
Iain Merrick9ac36c92010-09-13 15:29:50 +0100174 CHECK(data != NULL && !data->HasError());
175 delete data;
176 }
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000177
178 for (int i = 0; fail_tests[i]; i++) {
179 v8::ScriptData* data =
180 v8::ScriptData::PreCompile(fail_tests[i], i::StrLength(fail_tests[i]));
181 CHECK(data == NULL || data->HasError());
182 delete data;
183 }
Iain Merrick9ac36c92010-09-13 15:29:50 +0100184}
185
186
187class ScriptResource : public v8::String::ExternalAsciiStringResource {
188 public:
189 ScriptResource(const char* data, size_t length)
190 : data_(data), length_(length) { }
191
192 const char* data() const { return data_; }
193 size_t length() const { return length_; }
194
195 private:
196 const char* data_;
197 size_t length_;
198};
199
200
201TEST(Preparsing) {
202 v8::HandleScope handles;
203 v8::Persistent<v8::Context> context = v8::Context::New();
204 v8::Context::Scope context_scope(context);
205 int marker;
Steve Block44f0eee2011-05-26 01:26:41 +0100206 i::Isolate::Current()->stack_guard()->SetStackLimit(
Iain Merrick9ac36c92010-09-13 15:29:50 +0100207 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
208
209 // Source containing functions that might be lazily compiled and all types
210 // of symbols (string, propertyName, regexp).
211 const char* source =
212 "var x = 42;"
213 "function foo(a) { return function nolazy(b) { return a + b; } }"
214 "function bar(a) { if (a) return function lazy(b) { return b; } }"
215 "var z = {'string': 'string literal', bareword: 'propertyName', "
216 " 42: 'number literal', for: 'keyword as propertyName', "
217 " f\\u006fr: 'keyword propertyname with escape'};"
218 "var v = /RegExp Literal/;"
219 "var w = /RegExp Literal\\u0020With Escape/gin;"
220 "var y = { get getter() { return 42; }, "
221 " set setter(v) { this.value = v; }};";
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100222 int source_length = i::StrLength(source);
Iain Merrick9ac36c92010-09-13 15:29:50 +0100223 const char* error_source = "var x = y z;";
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100224 int error_source_length = i::StrLength(error_source);
Iain Merrick9ac36c92010-09-13 15:29:50 +0100225
226 v8::ScriptData* preparse =
227 v8::ScriptData::PreCompile(source, source_length);
228 CHECK(!preparse->HasError());
229 bool lazy_flag = i::FLAG_lazy;
230 {
231 i::FLAG_lazy = true;
232 ScriptResource* resource = new ScriptResource(source, source_length);
233 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
234 v8::Script::Compile(script_source, NULL, preparse);
235 }
236
237 {
238 i::FLAG_lazy = false;
239
240 ScriptResource* resource = new ScriptResource(source, source_length);
241 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
242 v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
243 }
244 delete preparse;
245 i::FLAG_lazy = lazy_flag;
246
247 // Syntax error.
248 v8::ScriptData* error_preparse =
249 v8::ScriptData::PreCompile(error_source, error_source_length);
250 CHECK(error_preparse->HasError());
251 i::ScriptDataImpl *pre_impl =
252 reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
253 i::Scanner::Location error_location =
254 pre_impl->MessageLocation();
255 // Error is at "z" in source, location 10..11.
256 CHECK_EQ(10, error_location.beg_pos);
257 CHECK_EQ(11, error_location.end_pos);
258 // Should not crash.
259 const char* message = pre_impl->BuildMessage();
260 i::Vector<const char*> args = pre_impl->BuildArgs();
261 CHECK_GT(strlen(message), 0);
262}
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800263
264
265TEST(StandAlonePreParser) {
266 int marker;
Steve Block44f0eee2011-05-26 01:26:41 +0100267 i::Isolate::Current()->stack_guard()->SetStackLimit(
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800268 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
269
270 const char* programs[] = {
271 "{label: 42}",
272 "var x = 42;",
273 "function foo(x, y) { return x + y; }",
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000274 "%ArgleBargle(glop);",
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800275 "var x = new new Function('this.x = 42');",
276 NULL
277 };
278
Ben Murdoch8b112d22011-06-08 16:22:53 +0100279 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800280 for (int i = 0; programs[i]; i++) {
281 const char* program = programs[i];
Ben Murdochb0fe1622011-05-05 13:52:32 +0100282 i::Utf8ToUC16CharacterStream stream(
283 reinterpret_cast<const i::byte*>(program),
284 static_cast<unsigned>(strlen(program)));
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800285 i::CompleteParserRecorder log;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000286 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Ben Murdochb0fe1622011-05-05 13:52:32 +0100287 scanner.Initialize(&stream);
288
289 v8::preparser::PreParser::PreParseResult result =
290 v8::preparser::PreParser::PreParseProgram(&scanner,
291 &log,
292 true,
293 stack_limit);
294 CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result);
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800295 i::ScriptDataImpl data(log.ExtractData());
296 CHECK(!data.has_error());
297 }
298}
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800299
300
301TEST(RegressChromium62639) {
302 int marker;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100303 i::Isolate::Current()->stack_guard()->SetStackLimit(
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800304 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
305
306 const char* program = "var x = 'something';\n"
307 "escape: function() {}";
308 // Fails parsing expecting an identifier after "function".
309 // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
310 // and then used the invalid currently scanned literal. This always
311 // failed in debug mode, and sometimes crashed in release mode.
312
Ben Murdochb0fe1622011-05-05 13:52:32 +0100313 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
314 static_cast<unsigned>(strlen(program)));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800315 i::ScriptDataImpl* data =
Ben Murdochb0fe1622011-05-05 13:52:32 +0100316 i::ParserApi::PreParse(&stream, NULL);
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800317 CHECK(data->HasError());
318 delete data;
319}
320
321
322TEST(Regress928) {
323 // Preparsing didn't consider the catch clause of a try statement
324 // as with-content, which made it assume that a function inside
325 // the block could be lazily compiled, and an extra, unexpected,
326 // entry was added to the data.
327 int marker;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100328 i::Isolate::Current()->stack_guard()->SetStackLimit(
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800329 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
330
331 const char* program =
332 "try { } catch (e) { var foo = function () { /* first */ } }"
333 "var bar = function () { /* second */ }";
334
Ben Murdochb0fe1622011-05-05 13:52:32 +0100335 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
336 static_cast<unsigned>(strlen(program)));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800337 i::ScriptDataImpl* data =
Ben Murdochb0fe1622011-05-05 13:52:32 +0100338 i::ParserApi::PartialPreParse(&stream, NULL);
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800339 CHECK(!data->HasError());
340
341 data->Initialize();
342
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100343 int first_function =
344 static_cast<int>(strstr(program, "function") - program);
345 int first_lbrace = first_function + static_cast<int>(strlen("function () "));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800346 CHECK_EQ('{', program[first_lbrace]);
347 i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
348 CHECK(!entry1.is_valid());
349
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100350 int second_function =
351 static_cast<int>(strstr(program + first_lbrace, "function") - program);
352 int second_lbrace =
353 second_function + static_cast<int>(strlen("function () "));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800354 CHECK_EQ('{', program[second_lbrace]);
355 i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
356 CHECK(entry2.is_valid());
357 CHECK_EQ('}', program[entry2.end_pos() - 1]);
358 delete data;
359}
Ben Murdochb0fe1622011-05-05 13:52:32 +0100360
361
362TEST(PreParseOverflow) {
363 int marker;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100364 i::Isolate::Current()->stack_guard()->SetStackLimit(
Ben Murdochb0fe1622011-05-05 13:52:32 +0100365 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
366
367 size_t kProgramSize = 1024 * 1024;
368 i::SmartPointer<char> program(
369 reinterpret_cast<char*>(malloc(kProgramSize + 1)));
370 memset(*program, '(', kProgramSize);
371 program[kProgramSize] = '\0';
372
Ben Murdoch8b112d22011-06-08 16:22:53 +0100373 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
Ben Murdochb0fe1622011-05-05 13:52:32 +0100374
375 i::Utf8ToUC16CharacterStream stream(
376 reinterpret_cast<const i::byte*>(*program),
377 static_cast<unsigned>(kProgramSize));
378 i::CompleteParserRecorder log;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000379 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Ben Murdochb0fe1622011-05-05 13:52:32 +0100380 scanner.Initialize(&stream);
381
382
383 v8::preparser::PreParser::PreParseResult result =
384 v8::preparser::PreParser::PreParseProgram(&scanner,
385 &log,
386 true,
387 stack_limit);
388 CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
389}
390
391
392class TestExternalResource: public v8::String::ExternalStringResource {
393 public:
394 explicit TestExternalResource(uint16_t* data, int length)
395 : data_(data), length_(static_cast<size_t>(length)) { }
396
397 ~TestExternalResource() { }
398
399 const uint16_t* data() const {
400 return data_;
401 }
402
403 size_t length() const {
404 return length_;
405 }
406 private:
407 uint16_t* data_;
408 size_t length_;
409};
410
411
412#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
413
414void TestCharacterStream(const char* ascii_source,
415 unsigned length,
416 unsigned start = 0,
417 unsigned end = 0) {
418 if (end == 0) end = length;
419 unsigned sub_length = end - start;
420 i::HandleScope test_scope;
421 i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
422 for (unsigned i = 0; i < length; i++) {
423 uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
424 }
425 i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
426 i::Handle<i::String> ascii_string(
Steve Block44f0eee2011-05-26 01:26:41 +0100427 FACTORY->NewStringFromAscii(ascii_vector));
Ben Murdochb0fe1622011-05-05 13:52:32 +0100428 TestExternalResource resource(*uc16_buffer, length);
429 i::Handle<i::String> uc16_string(
Steve Block44f0eee2011-05-26 01:26:41 +0100430 FACTORY->NewExternalStringFromTwoByte(&resource));
Ben Murdochb0fe1622011-05-05 13:52:32 +0100431
432 i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
433 i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
434 i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
435 i::Utf8ToUC16CharacterStream utf8_stream(
436 reinterpret_cast<const i::byte*>(ascii_source), end);
437 utf8_stream.SeekForward(start);
438
439 unsigned i = start;
440 while (i < end) {
441 // Read streams one char at a time
442 CHECK_EQU(i, uc16_stream.pos());
443 CHECK_EQU(i, string_stream.pos());
444 CHECK_EQU(i, utf8_stream.pos());
445 int32_t c0 = ascii_source[i];
446 int32_t c1 = uc16_stream.Advance();
447 int32_t c2 = string_stream.Advance();
448 int32_t c3 = utf8_stream.Advance();
449 i++;
450 CHECK_EQ(c0, c1);
451 CHECK_EQ(c0, c2);
452 CHECK_EQ(c0, c3);
453 CHECK_EQU(i, uc16_stream.pos());
454 CHECK_EQU(i, string_stream.pos());
455 CHECK_EQU(i, utf8_stream.pos());
456 }
457 while (i > start + sub_length / 4) {
458 // Pushback, re-read, pushback again.
459 int32_t c0 = ascii_source[i - 1];
460 CHECK_EQU(i, uc16_stream.pos());
461 CHECK_EQU(i, string_stream.pos());
462 CHECK_EQU(i, utf8_stream.pos());
463 uc16_stream.PushBack(c0);
464 string_stream.PushBack(c0);
465 utf8_stream.PushBack(c0);
466 i--;
467 CHECK_EQU(i, uc16_stream.pos());
468 CHECK_EQU(i, string_stream.pos());
469 CHECK_EQU(i, utf8_stream.pos());
470 int32_t c1 = uc16_stream.Advance();
471 int32_t c2 = string_stream.Advance();
472 int32_t c3 = utf8_stream.Advance();
473 i++;
474 CHECK_EQU(i, uc16_stream.pos());
475 CHECK_EQU(i, string_stream.pos());
476 CHECK_EQU(i, utf8_stream.pos());
477 CHECK_EQ(c0, c1);
478 CHECK_EQ(c0, c2);
479 CHECK_EQ(c0, c3);
480 uc16_stream.PushBack(c0);
481 string_stream.PushBack(c0);
482 utf8_stream.PushBack(c0);
483 i--;
484 CHECK_EQU(i, uc16_stream.pos());
485 CHECK_EQU(i, string_stream.pos());
486 CHECK_EQU(i, utf8_stream.pos());
487 }
488 unsigned halfway = start + sub_length / 2;
489 uc16_stream.SeekForward(halfway - i);
490 string_stream.SeekForward(halfway - i);
491 utf8_stream.SeekForward(halfway - i);
492 i = halfway;
493 CHECK_EQU(i, uc16_stream.pos());
494 CHECK_EQU(i, string_stream.pos());
495 CHECK_EQU(i, utf8_stream.pos());
496
497 while (i < end) {
498 // Read streams one char at a time
499 CHECK_EQU(i, uc16_stream.pos());
500 CHECK_EQU(i, string_stream.pos());
501 CHECK_EQU(i, utf8_stream.pos());
502 int32_t c0 = ascii_source[i];
503 int32_t c1 = uc16_stream.Advance();
504 int32_t c2 = string_stream.Advance();
505 int32_t c3 = utf8_stream.Advance();
506 i++;
507 CHECK_EQ(c0, c1);
508 CHECK_EQ(c0, c2);
509 CHECK_EQ(c0, c3);
510 CHECK_EQU(i, uc16_stream.pos());
511 CHECK_EQU(i, string_stream.pos());
512 CHECK_EQU(i, utf8_stream.pos());
513 }
514
515 int32_t c1 = uc16_stream.Advance();
516 int32_t c2 = string_stream.Advance();
517 int32_t c3 = utf8_stream.Advance();
518 CHECK_LT(c1, 0);
519 CHECK_LT(c2, 0);
520 CHECK_LT(c3, 0);
521}
522
523
524TEST(CharacterStreams) {
525 v8::HandleScope handles;
526 v8::Persistent<v8::Context> context = v8::Context::New();
527 v8::Context::Scope context_scope(context);
528
529 TestCharacterStream("abc\0\n\r\x7f", 7);
530 static const unsigned kBigStringSize = 4096;
531 char buffer[kBigStringSize + 1];
532 for (unsigned i = 0; i < kBigStringSize; i++) {
533 buffer[i] = static_cast<char>(i & 0x7f);
534 }
535 TestCharacterStream(buffer, kBigStringSize);
536
537 TestCharacterStream(buffer, kBigStringSize, 576, 3298);
538
539 TestCharacterStream("\0", 1);
540 TestCharacterStream("", 0);
541}
542
543
544TEST(Utf8CharacterStream) {
545 static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
546 static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
547
548 static const int kAllUtf8CharsSize =
549 (unibrow::Utf8::kMaxOneByteChar + 1) +
550 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
551 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
552 static const unsigned kAllUtf8CharsSizeU =
553 static_cast<unsigned>(kAllUtf8CharsSize);
554
555 char buffer[kAllUtf8CharsSizeU];
556 unsigned cursor = 0;
557 for (int i = 0; i <= kMaxUC16Char; i++) {
558 cursor += unibrow::Utf8::Encode(buffer + cursor, i);
559 }
560 ASSERT(cursor == kAllUtf8CharsSizeU);
561
562 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
563 kAllUtf8CharsSizeU);
564 for (int i = 0; i <= kMaxUC16Char; i++) {
565 CHECK_EQU(i, stream.pos());
566 int32_t c = stream.Advance();
567 CHECK_EQ(i, c);
568 CHECK_EQU(i + 1, stream.pos());
569 }
570 for (int i = kMaxUC16Char; i >= 0; i--) {
571 CHECK_EQU(i + 1, stream.pos());
572 stream.PushBack(i);
573 CHECK_EQU(i, stream.pos());
574 }
575 int i = 0;
576 while (stream.pos() < kMaxUC16CharU) {
577 CHECK_EQU(i, stream.pos());
578 unsigned progress = stream.SeekForward(12);
579 i += progress;
580 int32_t c = stream.Advance();
581 if (i <= kMaxUC16Char) {
582 CHECK_EQ(i, c);
583 } else {
584 CHECK_EQ(-1, c);
585 }
586 i += 1;
587 CHECK_EQU(i, stream.pos());
588 }
589}
590
591#undef CHECK_EQU
592
593void TestStreamScanner(i::UC16CharacterStream* stream,
594 i::Token::Value* expected_tokens,
595 int skip_pos = 0, // Zero means not skipping.
596 int skip_to = 0) {
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000597 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Steve Block9fac8402011-05-12 15:51:54 +0100598 scanner.Initialize(stream);
Ben Murdochb0fe1622011-05-05 13:52:32 +0100599
600 int i = 0;
601 do {
602 i::Token::Value expected = expected_tokens[i];
603 i::Token::Value actual = scanner.Next();
604 CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
605 if (scanner.location().end_pos == skip_pos) {
606 scanner.SeekForward(skip_to);
607 }
608 i++;
609 } while (expected_tokens[i] != i::Token::ILLEGAL);
610}
611
612TEST(StreamScanner) {
613 const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
614 i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
615 static_cast<unsigned>(strlen(str1)));
616 i::Token::Value expectations1[] = {
617 i::Token::LBRACE,
618 i::Token::IDENTIFIER,
619 i::Token::IDENTIFIER,
620 i::Token::FOR,
621 i::Token::COLON,
622 i::Token::MUL,
623 i::Token::DIV,
624 i::Token::LT,
625 i::Token::SUB,
626 i::Token::IDENTIFIER,
627 i::Token::EOS,
628 i::Token::ILLEGAL
629 };
630 TestStreamScanner(&stream1, expectations1, 0, 0);
631
632 const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
633 i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
634 static_cast<unsigned>(strlen(str2)));
635 i::Token::Value expectations2[] = {
636 i::Token::CASE,
637 i::Token::DEFAULT,
638 i::Token::CONST,
639 i::Token::LBRACE,
640 // Skipped part here
641 i::Token::RBRACE,
642 i::Token::DO,
643 i::Token::EOS,
644 i::Token::ILLEGAL
645 };
646 ASSERT_EQ('{', str2[19]);
647 ASSERT_EQ('}', str2[37]);
648 TestStreamScanner(&stream2, expectations2, 20, 37);
649
650 const char* str3 = "{}}}}";
651 i::Token::Value expectations3[] = {
652 i::Token::LBRACE,
653 i::Token::RBRACE,
654 i::Token::RBRACE,
655 i::Token::RBRACE,
656 i::Token::RBRACE,
657 i::Token::EOS,
658 i::Token::ILLEGAL
659 };
660 // Skip zero-four RBRACEs.
661 for (int i = 0; i <= 4; i++) {
662 expectations3[6 - i] = i::Token::ILLEGAL;
663 expectations3[5 - i] = i::Token::EOS;
664 i::Utf8ToUC16CharacterStream stream3(
665 reinterpret_cast<const i::byte*>(str3),
666 static_cast<unsigned>(strlen(str3)));
667 TestStreamScanner(&stream3, expectations3, 1, 1 + i);
668 }
669}
Ben Murdoch086aeea2011-05-13 15:57:08 +0100670
671
672void TestScanRegExp(const char* re_source, const char* expected) {
673 i::Utf8ToUC16CharacterStream stream(
674 reinterpret_cast<const i::byte*>(re_source),
675 static_cast<unsigned>(strlen(re_source)));
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000676 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Ben Murdoch086aeea2011-05-13 15:57:08 +0100677 scanner.Initialize(&stream);
678
679 i::Token::Value start = scanner.peek();
680 CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
681 CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
682 scanner.Next(); // Current token is now the regexp literal.
683 CHECK(scanner.is_literal_ascii());
684 i::Vector<const char> actual = scanner.literal_ascii_string();
685 for (int i = 0; i < actual.length(); i++) {
686 CHECK_NE('\0', expected[i]);
687 CHECK_EQ(expected[i], actual[i]);
688 }
689}
690
691
692TEST(RegExpScanning) {
693 // RegExp token with added garbage at the end. The scanner should only
694 // scan the RegExp until the terminating slash just before "flipperwald".
695 TestScanRegExp("/b/flipperwald", "b");
696 // Incomplete escape sequences doesn't hide the terminating slash.
697 TestScanRegExp("/\\x/flipperwald", "\\x");
698 TestScanRegExp("/\\u/flipperwald", "\\u");
699 TestScanRegExp("/\\u1/flipperwald", "\\u1");
700 TestScanRegExp("/\\u12/flipperwald", "\\u12");
701 TestScanRegExp("/\\u123/flipperwald", "\\u123");
702 TestScanRegExp("/\\c/flipperwald", "\\c");
703 TestScanRegExp("/\\c//flipperwald", "\\c");
704 // Slashes inside character classes are not terminating.
705 TestScanRegExp("/[/]/flipperwald", "[/]");
706 TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
707 // Incomplete escape sequences inside a character class doesn't hide
708 // the end of the character class.
709 TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
710 TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
711 TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
712 TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
713 TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
714 TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
715 TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
716 TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
717 // Escaped ']'s wont end the character class.
718 TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
719 // Escaped slashes are not terminating.
720 TestScanRegExp("/\\//flipperwald", "\\/");
721 // Starting with '=' works too.
722 TestScanRegExp("/=/", "=");
723 TestScanRegExp("/=?/", "=?");
724}