blob: 8b6afdc59c4be64aac6e286327bb2361efb77e9c [file] [log] [blame]
Ben Murdoch8b112d22011-06-08 16:22:53 +01001// Copyright 2011 the V8 project authors. All rights reserved.
Steve Blockd0582a62009-12-15 09:54:21 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include <stdlib.h>
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080029#include <stdio.h>
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080030#include <string.h>
Steve Blockd0582a62009-12-15 09:54:21 +000031
32#include "v8.h"
33
Steve Block44f0eee2011-05-26 01:26:41 +010034#include "isolate.h"
Steve Blockd0582a62009-12-15 09:54:21 +000035#include "token.h"
36#include "scanner.h"
Iain Merrick9ac36c92010-09-13 15:29:50 +010037#include "parser.h"
Steve Blockd0582a62009-12-15 09:54:21 +000038#include "utils.h"
Iain Merrick9ac36c92010-09-13 15:29:50 +010039#include "execution.h"
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -080040#include "preparser.h"
Steve Blockd0582a62009-12-15 09:54:21 +000041#include "cctest.h"
42
Ben Murdoch69a99ed2011-11-30 16:03:39 +000043TEST(ScanKeywords) {
Steve Blockd0582a62009-12-15 09:54:21 +000044 struct KeywordToken {
45 const char* keyword;
46 i::Token::Value token;
47 };
48
49 static const KeywordToken keywords[] = {
50#define KEYWORD(t, s, d) { s, i::Token::t },
Ben Murdoch69a99ed2011-11-30 16:03:39 +000051 TOKEN_LIST(IGNORE_TOKEN, KEYWORD)
Steve Blockd0582a62009-12-15 09:54:21 +000052#undef KEYWORD
53 { NULL, i::Token::IDENTIFIER }
54 };
55
Steve Blockd0582a62009-12-15 09:54:21 +000056 KeywordToken key_token;
Ben Murdoch69a99ed2011-11-30 16:03:39 +000057 i::UnicodeCache unicode_cache;
58 i::byte buffer[32];
Steve Blockd0582a62009-12-15 09:54:21 +000059 for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +000060 const i::byte* keyword =
61 reinterpret_cast<const i::byte*>(key_token.keyword);
62 int length = i::StrLength(key_token.keyword);
63 CHECK(static_cast<int>(sizeof(buffer)) >= length);
64 {
65 i::Utf8ToUC16CharacterStream stream(keyword, length);
66 i::JavaScriptScanner scanner(&unicode_cache);
67 // The scanner should parse 'let' as Token::LET for this test.
68 scanner.SetHarmonyBlockScoping(true);
69 scanner.Initialize(&stream);
70 CHECK_EQ(key_token.token, scanner.Next());
71 CHECK_EQ(i::Token::EOS, scanner.Next());
Steve Blockd0582a62009-12-15 09:54:21 +000072 }
Ben Murdoch69a99ed2011-11-30 16:03:39 +000073 // Removing characters will make keyword matching fail.
74 {
75 i::Utf8ToUC16CharacterStream stream(keyword, length - 1);
76 i::JavaScriptScanner scanner(&unicode_cache);
77 scanner.Initialize(&stream);
78 CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
79 CHECK_EQ(i::Token::EOS, scanner.Next());
Steve Blockd0582a62009-12-15 09:54:21 +000080 }
Ben Murdoch69a99ed2011-11-30 16:03:39 +000081 // Adding characters will make keyword matching fail.
82 static const char chars_to_append[] = { 'z', '0', '_' };
83 for (int j = 0; j < static_cast<int>(ARRAY_SIZE(chars_to_append)); ++j) {
84 memmove(buffer, keyword, length);
85 buffer[length] = chars_to_append[j];
86 i::Utf8ToUC16CharacterStream stream(buffer, length + 1);
87 i::JavaScriptScanner scanner(&unicode_cache);
88 scanner.Initialize(&stream);
89 CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
90 CHECK_EQ(i::Token::EOS, scanner.Next());
91 }
92 // Replacing characters will make keyword matching fail.
93 {
94 memmove(buffer, keyword, length);
95 buffer[length - 1] = '_';
96 i::Utf8ToUC16CharacterStream stream(buffer, length);
97 i::JavaScriptScanner scanner(&unicode_cache);
98 scanner.Initialize(&stream);
99 CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
100 CHECK_EQ(i::Token::EOS, scanner.Next());
101 }
Steve Blockd0582a62009-12-15 09:54:21 +0000102 }
Steve Blockd0582a62009-12-15 09:54:21 +0000103}
104
Iain Merrick9ac36c92010-09-13 15:29:50 +0100105
106TEST(ScanHTMLEndComments) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000107 v8::V8::Initialize();
108
Iain Merrick9ac36c92010-09-13 15:29:50 +0100109 // Regression test. See:
110 // http://code.google.com/p/chromium/issues/detail?id=53548
111 // Tests that --> is correctly interpreted as comment-to-end-of-line if there
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000112 // is only whitespace before it on the line (with comments considered as
113 // whitespace, even a multiline-comment containing a newline).
114 // This was not the case if it occurred before the first real token
Iain Merrick9ac36c92010-09-13 15:29:50 +0100115 // in the input.
116 const char* tests[] = {
117 // Before first real token.
118 "--> is eol-comment\nvar y = 37;\n",
119 "\n --> is eol-comment\nvar y = 37;\n",
120 "/* precomment */ --> is eol-comment\nvar y = 37;\n",
121 "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
122 // After first real token.
123 "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
124 "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
125 NULL
126 };
127
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000128 const char* fail_tests[] = {
129 "x --> is eol-comment\nvar y = 37;\n",
130 "\"\\n\" --> is eol-comment\nvar y = 37;\n",
131 "x/* precomment */ --> is eol-comment\nvar y = 37;\n",
132 "x/* precomment\n */ --> is eol-comment\nvar y = 37;\n",
133 "var x = 42; --> is eol-comment\nvar y = 37;\n",
134 "var x = 42; /* precomment\n */ --> is eol-comment\nvar y = 37;\n",
135 NULL
136 };
137
Iain Merrick9ac36c92010-09-13 15:29:50 +0100138 // Parser/Scanner needs a stack limit.
139 int marker;
Steve Block44f0eee2011-05-26 01:26:41 +0100140 i::Isolate::Current()->stack_guard()->SetStackLimit(
Iain Merrick9ac36c92010-09-13 15:29:50 +0100141 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
142
143 for (int i = 0; tests[i]; i++) {
144 v8::ScriptData* data =
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100145 v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i]));
Iain Merrick9ac36c92010-09-13 15:29:50 +0100146 CHECK(data != NULL && !data->HasError());
147 delete data;
148 }
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000149
150 for (int i = 0; fail_tests[i]; i++) {
151 v8::ScriptData* data =
152 v8::ScriptData::PreCompile(fail_tests[i], i::StrLength(fail_tests[i]));
153 CHECK(data == NULL || data->HasError());
154 delete data;
155 }
Iain Merrick9ac36c92010-09-13 15:29:50 +0100156}
157
158
159class ScriptResource : public v8::String::ExternalAsciiStringResource {
160 public:
161 ScriptResource(const char* data, size_t length)
162 : data_(data), length_(length) { }
163
164 const char* data() const { return data_; }
165 size_t length() const { return length_; }
166
167 private:
168 const char* data_;
169 size_t length_;
170};
171
172
173TEST(Preparsing) {
174 v8::HandleScope handles;
175 v8::Persistent<v8::Context> context = v8::Context::New();
176 v8::Context::Scope context_scope(context);
177 int marker;
Steve Block44f0eee2011-05-26 01:26:41 +0100178 i::Isolate::Current()->stack_guard()->SetStackLimit(
Iain Merrick9ac36c92010-09-13 15:29:50 +0100179 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
180
181 // Source containing functions that might be lazily compiled and all types
182 // of symbols (string, propertyName, regexp).
183 const char* source =
184 "var x = 42;"
185 "function foo(a) { return function nolazy(b) { return a + b; } }"
186 "function bar(a) { if (a) return function lazy(b) { return b; } }"
187 "var z = {'string': 'string literal', bareword: 'propertyName', "
188 " 42: 'number literal', for: 'keyword as propertyName', "
189 " f\\u006fr: 'keyword propertyname with escape'};"
190 "var v = /RegExp Literal/;"
191 "var w = /RegExp Literal\\u0020With Escape/gin;"
192 "var y = { get getter() { return 42; }, "
193 " set setter(v) { this.value = v; }};";
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100194 int source_length = i::StrLength(source);
Iain Merrick9ac36c92010-09-13 15:29:50 +0100195 const char* error_source = "var x = y z;";
Kristian Monsen0d5e1162010-09-30 15:31:59 +0100196 int error_source_length = i::StrLength(error_source);
Iain Merrick9ac36c92010-09-13 15:29:50 +0100197
198 v8::ScriptData* preparse =
199 v8::ScriptData::PreCompile(source, source_length);
200 CHECK(!preparse->HasError());
201 bool lazy_flag = i::FLAG_lazy;
202 {
203 i::FLAG_lazy = true;
204 ScriptResource* resource = new ScriptResource(source, source_length);
205 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
206 v8::Script::Compile(script_source, NULL, preparse);
207 }
208
209 {
210 i::FLAG_lazy = false;
211
212 ScriptResource* resource = new ScriptResource(source, source_length);
213 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
214 v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
215 }
216 delete preparse;
217 i::FLAG_lazy = lazy_flag;
218
219 // Syntax error.
220 v8::ScriptData* error_preparse =
221 v8::ScriptData::PreCompile(error_source, error_source_length);
222 CHECK(error_preparse->HasError());
223 i::ScriptDataImpl *pre_impl =
224 reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
225 i::Scanner::Location error_location =
226 pre_impl->MessageLocation();
227 // Error is at "z" in source, location 10..11.
228 CHECK_EQ(10, error_location.beg_pos);
229 CHECK_EQ(11, error_location.end_pos);
230 // Should not crash.
231 const char* message = pre_impl->BuildMessage();
232 i::Vector<const char*> args = pre_impl->BuildArgs();
233 CHECK_GT(strlen(message), 0);
234}
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800235
236
237TEST(StandAlonePreParser) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000238 v8::V8::Initialize();
239
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800240 int marker;
Steve Block44f0eee2011-05-26 01:26:41 +0100241 i::Isolate::Current()->stack_guard()->SetStackLimit(
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800242 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
243
244 const char* programs[] = {
245 "{label: 42}",
246 "var x = 42;",
247 "function foo(x, y) { return x + y; }",
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000248 "%ArgleBargle(glop);",
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800249 "var x = new new Function('this.x = 42');",
250 NULL
251 };
252
Ben Murdoch8b112d22011-06-08 16:22:53 +0100253 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800254 for (int i = 0; programs[i]; i++) {
255 const char* program = programs[i];
Ben Murdochb0fe1622011-05-05 13:52:32 +0100256 i::Utf8ToUC16CharacterStream stream(
257 reinterpret_cast<const i::byte*>(program),
258 static_cast<unsigned>(strlen(program)));
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800259 i::CompleteParserRecorder log;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000260 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Ben Murdochb0fe1622011-05-05 13:52:32 +0100261 scanner.Initialize(&stream);
262
263 v8::preparser::PreParser::PreParseResult result =
264 v8::preparser::PreParser::PreParseProgram(&scanner,
265 &log,
266 true,
267 stack_limit);
268 CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result);
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800269 i::ScriptDataImpl data(log.ExtractData());
270 CHECK(!data.has_error());
271 }
272}
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800273
274
275TEST(RegressChromium62639) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000276 v8::V8::Initialize();
277
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800278 int marker;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100279 i::Isolate::Current()->stack_guard()->SetStackLimit(
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800280 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
281
282 const char* program = "var x = 'something';\n"
283 "escape: function() {}";
284 // Fails parsing expecting an identifier after "function".
285 // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
286 // and then used the invalid currently scanned literal. This always
287 // failed in debug mode, and sometimes crashed in release mode.
288
Ben Murdochb0fe1622011-05-05 13:52:32 +0100289 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
290 static_cast<unsigned>(strlen(program)));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800291 i::ScriptDataImpl* data =
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000292 i::ParserApi::PreParse(&stream, NULL, false);
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800293 CHECK(data->HasError());
294 delete data;
295}
296
297
298TEST(Regress928) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000299 v8::V8::Initialize();
300
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800301 // Preparsing didn't consider the catch clause of a try statement
302 // as with-content, which made it assume that a function inside
303 // the block could be lazily compiled, and an extra, unexpected,
304 // entry was added to the data.
305 int marker;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100306 i::Isolate::Current()->stack_guard()->SetStackLimit(
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800307 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
308
309 const char* program =
310 "try { } catch (e) { var foo = function () { /* first */ } }"
311 "var bar = function () { /* second */ }";
312
Ben Murdochb0fe1622011-05-05 13:52:32 +0100313 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
314 static_cast<unsigned>(strlen(program)));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800315 i::ScriptDataImpl* data =
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000316 i::ParserApi::PartialPreParse(&stream, NULL, false);
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800317 CHECK(!data->HasError());
318
319 data->Initialize();
320
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100321 int first_function =
322 static_cast<int>(strstr(program, "function") - program);
323 int first_lbrace = first_function + static_cast<int>(strlen("function () "));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800324 CHECK_EQ('{', program[first_lbrace]);
325 i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
326 CHECK(!entry1.is_valid());
327
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100328 int second_function =
329 static_cast<int>(strstr(program + first_lbrace, "function") - program);
330 int second_lbrace =
331 second_function + static_cast<int>(strlen("function () "));
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800332 CHECK_EQ('{', program[second_lbrace]);
333 i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
334 CHECK(entry2.is_valid());
335 CHECK_EQ('}', program[entry2.end_pos() - 1]);
336 delete data;
337}
Ben Murdochb0fe1622011-05-05 13:52:32 +0100338
339
340TEST(PreParseOverflow) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000341 v8::V8::Initialize();
342
Ben Murdochb0fe1622011-05-05 13:52:32 +0100343 int marker;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100344 i::Isolate::Current()->stack_guard()->SetStackLimit(
Ben Murdochb0fe1622011-05-05 13:52:32 +0100345 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
346
347 size_t kProgramSize = 1024 * 1024;
348 i::SmartPointer<char> program(
349 reinterpret_cast<char*>(malloc(kProgramSize + 1)));
350 memset(*program, '(', kProgramSize);
351 program[kProgramSize] = '\0';
352
Ben Murdoch8b112d22011-06-08 16:22:53 +0100353 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
Ben Murdochb0fe1622011-05-05 13:52:32 +0100354
355 i::Utf8ToUC16CharacterStream stream(
356 reinterpret_cast<const i::byte*>(*program),
357 static_cast<unsigned>(kProgramSize));
358 i::CompleteParserRecorder log;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000359 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Ben Murdochb0fe1622011-05-05 13:52:32 +0100360 scanner.Initialize(&stream);
361
362
363 v8::preparser::PreParser::PreParseResult result =
364 v8::preparser::PreParser::PreParseProgram(&scanner,
365 &log,
366 true,
367 stack_limit);
368 CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
369}
370
371
372class TestExternalResource: public v8::String::ExternalStringResource {
373 public:
374 explicit TestExternalResource(uint16_t* data, int length)
375 : data_(data), length_(static_cast<size_t>(length)) { }
376
377 ~TestExternalResource() { }
378
379 const uint16_t* data() const {
380 return data_;
381 }
382
383 size_t length() const {
384 return length_;
385 }
386 private:
387 uint16_t* data_;
388 size_t length_;
389};
390
391
392#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
393
394void TestCharacterStream(const char* ascii_source,
395 unsigned length,
396 unsigned start = 0,
397 unsigned end = 0) {
398 if (end == 0) end = length;
399 unsigned sub_length = end - start;
400 i::HandleScope test_scope;
401 i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
402 for (unsigned i = 0; i < length; i++) {
403 uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
404 }
405 i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
406 i::Handle<i::String> ascii_string(
Steve Block44f0eee2011-05-26 01:26:41 +0100407 FACTORY->NewStringFromAscii(ascii_vector));
Ben Murdochb0fe1622011-05-05 13:52:32 +0100408 TestExternalResource resource(*uc16_buffer, length);
409 i::Handle<i::String> uc16_string(
Steve Block44f0eee2011-05-26 01:26:41 +0100410 FACTORY->NewExternalStringFromTwoByte(&resource));
Ben Murdochb0fe1622011-05-05 13:52:32 +0100411
412 i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
413 i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
414 i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
415 i::Utf8ToUC16CharacterStream utf8_stream(
416 reinterpret_cast<const i::byte*>(ascii_source), end);
417 utf8_stream.SeekForward(start);
418
419 unsigned i = start;
420 while (i < end) {
421 // Read streams one char at a time
422 CHECK_EQU(i, uc16_stream.pos());
423 CHECK_EQU(i, string_stream.pos());
424 CHECK_EQU(i, utf8_stream.pos());
425 int32_t c0 = ascii_source[i];
426 int32_t c1 = uc16_stream.Advance();
427 int32_t c2 = string_stream.Advance();
428 int32_t c3 = utf8_stream.Advance();
429 i++;
430 CHECK_EQ(c0, c1);
431 CHECK_EQ(c0, c2);
432 CHECK_EQ(c0, c3);
433 CHECK_EQU(i, uc16_stream.pos());
434 CHECK_EQU(i, string_stream.pos());
435 CHECK_EQU(i, utf8_stream.pos());
436 }
437 while (i > start + sub_length / 4) {
438 // Pushback, re-read, pushback again.
439 int32_t c0 = ascii_source[i - 1];
440 CHECK_EQU(i, uc16_stream.pos());
441 CHECK_EQU(i, string_stream.pos());
442 CHECK_EQU(i, utf8_stream.pos());
443 uc16_stream.PushBack(c0);
444 string_stream.PushBack(c0);
445 utf8_stream.PushBack(c0);
446 i--;
447 CHECK_EQU(i, uc16_stream.pos());
448 CHECK_EQU(i, string_stream.pos());
449 CHECK_EQU(i, utf8_stream.pos());
450 int32_t c1 = uc16_stream.Advance();
451 int32_t c2 = string_stream.Advance();
452 int32_t c3 = utf8_stream.Advance();
453 i++;
454 CHECK_EQU(i, uc16_stream.pos());
455 CHECK_EQU(i, string_stream.pos());
456 CHECK_EQU(i, utf8_stream.pos());
457 CHECK_EQ(c0, c1);
458 CHECK_EQ(c0, c2);
459 CHECK_EQ(c0, c3);
460 uc16_stream.PushBack(c0);
461 string_stream.PushBack(c0);
462 utf8_stream.PushBack(c0);
463 i--;
464 CHECK_EQU(i, uc16_stream.pos());
465 CHECK_EQU(i, string_stream.pos());
466 CHECK_EQU(i, utf8_stream.pos());
467 }
468 unsigned halfway = start + sub_length / 2;
469 uc16_stream.SeekForward(halfway - i);
470 string_stream.SeekForward(halfway - i);
471 utf8_stream.SeekForward(halfway - i);
472 i = halfway;
473 CHECK_EQU(i, uc16_stream.pos());
474 CHECK_EQU(i, string_stream.pos());
475 CHECK_EQU(i, utf8_stream.pos());
476
477 while (i < end) {
478 // Read streams one char at a time
479 CHECK_EQU(i, uc16_stream.pos());
480 CHECK_EQU(i, string_stream.pos());
481 CHECK_EQU(i, utf8_stream.pos());
482 int32_t c0 = ascii_source[i];
483 int32_t c1 = uc16_stream.Advance();
484 int32_t c2 = string_stream.Advance();
485 int32_t c3 = utf8_stream.Advance();
486 i++;
487 CHECK_EQ(c0, c1);
488 CHECK_EQ(c0, c2);
489 CHECK_EQ(c0, c3);
490 CHECK_EQU(i, uc16_stream.pos());
491 CHECK_EQU(i, string_stream.pos());
492 CHECK_EQU(i, utf8_stream.pos());
493 }
494
495 int32_t c1 = uc16_stream.Advance();
496 int32_t c2 = string_stream.Advance();
497 int32_t c3 = utf8_stream.Advance();
498 CHECK_LT(c1, 0);
499 CHECK_LT(c2, 0);
500 CHECK_LT(c3, 0);
501}
502
503
504TEST(CharacterStreams) {
505 v8::HandleScope handles;
506 v8::Persistent<v8::Context> context = v8::Context::New();
507 v8::Context::Scope context_scope(context);
508
509 TestCharacterStream("abc\0\n\r\x7f", 7);
510 static const unsigned kBigStringSize = 4096;
511 char buffer[kBigStringSize + 1];
512 for (unsigned i = 0; i < kBigStringSize; i++) {
513 buffer[i] = static_cast<char>(i & 0x7f);
514 }
515 TestCharacterStream(buffer, kBigStringSize);
516
517 TestCharacterStream(buffer, kBigStringSize, 576, 3298);
518
519 TestCharacterStream("\0", 1);
520 TestCharacterStream("", 0);
521}
522
523
524TEST(Utf8CharacterStream) {
525 static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
526 static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
527
528 static const int kAllUtf8CharsSize =
529 (unibrow::Utf8::kMaxOneByteChar + 1) +
530 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
531 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
532 static const unsigned kAllUtf8CharsSizeU =
533 static_cast<unsigned>(kAllUtf8CharsSize);
534
535 char buffer[kAllUtf8CharsSizeU];
536 unsigned cursor = 0;
537 for (int i = 0; i <= kMaxUC16Char; i++) {
538 cursor += unibrow::Utf8::Encode(buffer + cursor, i);
539 }
540 ASSERT(cursor == kAllUtf8CharsSizeU);
541
542 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
543 kAllUtf8CharsSizeU);
544 for (int i = 0; i <= kMaxUC16Char; i++) {
545 CHECK_EQU(i, stream.pos());
546 int32_t c = stream.Advance();
547 CHECK_EQ(i, c);
548 CHECK_EQU(i + 1, stream.pos());
549 }
550 for (int i = kMaxUC16Char; i >= 0; i--) {
551 CHECK_EQU(i + 1, stream.pos());
552 stream.PushBack(i);
553 CHECK_EQU(i, stream.pos());
554 }
555 int i = 0;
556 while (stream.pos() < kMaxUC16CharU) {
557 CHECK_EQU(i, stream.pos());
558 unsigned progress = stream.SeekForward(12);
559 i += progress;
560 int32_t c = stream.Advance();
561 if (i <= kMaxUC16Char) {
562 CHECK_EQ(i, c);
563 } else {
564 CHECK_EQ(-1, c);
565 }
566 i += 1;
567 CHECK_EQU(i, stream.pos());
568 }
569}
570
571#undef CHECK_EQU
572
573void TestStreamScanner(i::UC16CharacterStream* stream,
574 i::Token::Value* expected_tokens,
575 int skip_pos = 0, // Zero means not skipping.
576 int skip_to = 0) {
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000577 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Steve Block9fac8402011-05-12 15:51:54 +0100578 scanner.Initialize(stream);
Ben Murdochb0fe1622011-05-05 13:52:32 +0100579
580 int i = 0;
581 do {
582 i::Token::Value expected = expected_tokens[i];
583 i::Token::Value actual = scanner.Next();
584 CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
585 if (scanner.location().end_pos == skip_pos) {
586 scanner.SeekForward(skip_to);
587 }
588 i++;
589 } while (expected_tokens[i] != i::Token::ILLEGAL);
590}
591
592TEST(StreamScanner) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000593 v8::V8::Initialize();
594
Ben Murdochb0fe1622011-05-05 13:52:32 +0100595 const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
596 i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
597 static_cast<unsigned>(strlen(str1)));
598 i::Token::Value expectations1[] = {
599 i::Token::LBRACE,
600 i::Token::IDENTIFIER,
601 i::Token::IDENTIFIER,
602 i::Token::FOR,
603 i::Token::COLON,
604 i::Token::MUL,
605 i::Token::DIV,
606 i::Token::LT,
607 i::Token::SUB,
608 i::Token::IDENTIFIER,
609 i::Token::EOS,
610 i::Token::ILLEGAL
611 };
612 TestStreamScanner(&stream1, expectations1, 0, 0);
613
614 const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
615 i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
616 static_cast<unsigned>(strlen(str2)));
617 i::Token::Value expectations2[] = {
618 i::Token::CASE,
619 i::Token::DEFAULT,
620 i::Token::CONST,
621 i::Token::LBRACE,
622 // Skipped part here
623 i::Token::RBRACE,
624 i::Token::DO,
625 i::Token::EOS,
626 i::Token::ILLEGAL
627 };
628 ASSERT_EQ('{', str2[19]);
629 ASSERT_EQ('}', str2[37]);
630 TestStreamScanner(&stream2, expectations2, 20, 37);
631
632 const char* str3 = "{}}}}";
633 i::Token::Value expectations3[] = {
634 i::Token::LBRACE,
635 i::Token::RBRACE,
636 i::Token::RBRACE,
637 i::Token::RBRACE,
638 i::Token::RBRACE,
639 i::Token::EOS,
640 i::Token::ILLEGAL
641 };
642 // Skip zero-four RBRACEs.
643 for (int i = 0; i <= 4; i++) {
644 expectations3[6 - i] = i::Token::ILLEGAL;
645 expectations3[5 - i] = i::Token::EOS;
646 i::Utf8ToUC16CharacterStream stream3(
647 reinterpret_cast<const i::byte*>(str3),
648 static_cast<unsigned>(strlen(str3)));
649 TestStreamScanner(&stream3, expectations3, 1, 1 + i);
650 }
651}
Ben Murdoch086aeea2011-05-13 15:57:08 +0100652
653
654void TestScanRegExp(const char* re_source, const char* expected) {
655 i::Utf8ToUC16CharacterStream stream(
656 reinterpret_cast<const i::byte*>(re_source),
657 static_cast<unsigned>(strlen(re_source)));
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000658 i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
Ben Murdoch086aeea2011-05-13 15:57:08 +0100659 scanner.Initialize(&stream);
660
661 i::Token::Value start = scanner.peek();
662 CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
663 CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
664 scanner.Next(); // Current token is now the regexp literal.
665 CHECK(scanner.is_literal_ascii());
666 i::Vector<const char> actual = scanner.literal_ascii_string();
667 for (int i = 0; i < actual.length(); i++) {
668 CHECK_NE('\0', expected[i]);
669 CHECK_EQ(expected[i], actual[i]);
670 }
671}
672
673
674TEST(RegExpScanning) {
Ben Murdoch69a99ed2011-11-30 16:03:39 +0000675 v8::V8::Initialize();
676
Ben Murdoch086aeea2011-05-13 15:57:08 +0100677 // RegExp token with added garbage at the end. The scanner should only
678 // scan the RegExp until the terminating slash just before "flipperwald".
679 TestScanRegExp("/b/flipperwald", "b");
680 // Incomplete escape sequences doesn't hide the terminating slash.
681 TestScanRegExp("/\\x/flipperwald", "\\x");
682 TestScanRegExp("/\\u/flipperwald", "\\u");
683 TestScanRegExp("/\\u1/flipperwald", "\\u1");
684 TestScanRegExp("/\\u12/flipperwald", "\\u12");
685 TestScanRegExp("/\\u123/flipperwald", "\\u123");
686 TestScanRegExp("/\\c/flipperwald", "\\c");
687 TestScanRegExp("/\\c//flipperwald", "\\c");
688 // Slashes inside character classes are not terminating.
689 TestScanRegExp("/[/]/flipperwald", "[/]");
690 TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
691 // Incomplete escape sequences inside a character class doesn't hide
692 // the end of the character class.
693 TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
694 TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
695 TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
696 TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
697 TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
698 TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
699 TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
700 TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
701 // Escaped ']'s wont end the character class.
702 TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
703 // Escaped slashes are not terminating.
704 TestScanRegExp("/\\//flipperwald", "\\/");
705 // Starting with '=' works too.
706 TestScanRegExp("/=/", "=");
707 TestScanRegExp("/=?/", "=?");
708}