Steve Block | d0582a6 | 2009-12-15 09:54:21 +0000 | [diff] [blame] | 1 | // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 | // Redistribution and use in source and binary forms, with or without |
| 3 | // modification, are permitted provided that the following conditions are |
| 4 | // met: |
| 5 | // |
| 6 | // * Redistributions of source code must retain the above copyright |
| 7 | // notice, this list of conditions and the following disclaimer. |
| 8 | // * Redistributions in binary form must reproduce the above |
| 9 | // copyright notice, this list of conditions and the following |
| 10 | // disclaimer in the documentation and/or other materials provided |
| 11 | // with the distribution. |
| 12 | // * Neither the name of Google Inc. nor the names of its |
| 13 | // contributors may be used to endorse or promote products derived |
| 14 | // from this software without specific prior written permission. |
| 15 | // |
| 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | |
| 28 | #include <stdlib.h> |
Teng-Hui Zhu | 3e5fa29 | 2010-11-09 16:16:48 -0800 | [diff] [blame] | 29 | #include <stdio.h> |
Shimeng (Simon) Wang | 8a31eba | 2010-12-06 19:01:33 -0800 | [diff] [blame] | 30 | #include <string.h> |
Steve Block | d0582a6 | 2009-12-15 09:54:21 +0000 | [diff] [blame] | 31 | |
| 32 | #include "v8.h" |
| 33 | |
| 34 | #include "token.h" |
| 35 | #include "scanner.h" |
Iain Merrick | 9ac36c9 | 2010-09-13 15:29:50 +0100 | [diff] [blame] | 36 | #include "parser.h" |
Steve Block | d0582a6 | 2009-12-15 09:54:21 +0000 | [diff] [blame] | 37 | #include "utils.h" |
Iain Merrick | 9ac36c9 | 2010-09-13 15:29:50 +0100 | [diff] [blame] | 38 | #include "execution.h" |
Teng-Hui Zhu | 3e5fa29 | 2010-11-09 16:16:48 -0800 | [diff] [blame] | 39 | #include "preparser.h" |
Steve Block | d0582a6 | 2009-12-15 09:54:21 +0000 | [diff] [blame] | 40 | #include "cctest.h" |
| 41 | |
| 42 | namespace i = ::v8::internal; |
| 43 | |
| 44 | TEST(KeywordMatcher) { |
| 45 | struct KeywordToken { |
| 46 | const char* keyword; |
| 47 | i::Token::Value token; |
| 48 | }; |
| 49 | |
| 50 | static const KeywordToken keywords[] = { |
| 51 | #define KEYWORD(t, s, d) { s, i::Token::t }, |
| 52 | #define IGNORE(t, s, d) /* */ |
| 53 | TOKEN_LIST(IGNORE, KEYWORD, IGNORE) |
| 54 | #undef KEYWORD |
| 55 | { NULL, i::Token::IDENTIFIER } |
| 56 | }; |
| 57 | |
| 58 | static const char* future_keywords[] = { |
| 59 | #define FUTURE(t, s, d) s, |
| 60 | TOKEN_LIST(IGNORE, IGNORE, FUTURE) |
| 61 | #undef FUTURE |
| 62 | #undef IGNORE |
| 63 | NULL |
| 64 | }; |
| 65 | |
| 66 | KeywordToken key_token; |
| 67 | for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) { |
| 68 | i::KeywordMatcher matcher; |
| 69 | const char* keyword = key_token.keyword; |
| 70 | int length = i::StrLength(keyword); |
| 71 | for (int j = 0; j < length; j++) { |
| 72 | if (key_token.token == i::Token::INSTANCEOF && j == 2) { |
| 73 | // "in" is a prefix of "instanceof". It's the only keyword |
| 74 | // that is a prefix of another. |
| 75 | CHECK_EQ(i::Token::IN, matcher.token()); |
| 76 | } else { |
| 77 | CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); |
| 78 | } |
| 79 | matcher.AddChar(keyword[j]); |
| 80 | } |
| 81 | CHECK_EQ(key_token.token, matcher.token()); |
| 82 | // Adding more characters will make keyword matching fail. |
| 83 | matcher.AddChar('z'); |
| 84 | CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); |
| 85 | // Adding a keyword later will not make it match again. |
| 86 | matcher.AddChar('i'); |
| 87 | matcher.AddChar('f'); |
| 88 | CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); |
| 89 | } |
| 90 | |
| 91 | // Future keywords are not recognized. |
| 92 | const char* future_keyword; |
| 93 | for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) { |
| 94 | i::KeywordMatcher matcher; |
| 95 | int length = i::StrLength(future_keyword); |
| 96 | for (int j = 0; j < length; j++) { |
| 97 | matcher.AddChar(future_keyword[j]); |
| 98 | } |
| 99 | CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); |
| 100 | } |
| 101 | |
| 102 | // Zero isn't ignored at first. |
| 103 | i::KeywordMatcher bad_start; |
| 104 | bad_start.AddChar(0); |
| 105 | CHECK_EQ(i::Token::IDENTIFIER, bad_start.token()); |
| 106 | bad_start.AddChar('i'); |
| 107 | bad_start.AddChar('f'); |
| 108 | CHECK_EQ(i::Token::IDENTIFIER, bad_start.token()); |
| 109 | |
| 110 | // Zero isn't ignored at end. |
| 111 | i::KeywordMatcher bad_end; |
| 112 | bad_end.AddChar('i'); |
| 113 | bad_end.AddChar('f'); |
| 114 | CHECK_EQ(i::Token::IF, bad_end.token()); |
| 115 | bad_end.AddChar(0); |
| 116 | CHECK_EQ(i::Token::IDENTIFIER, bad_end.token()); |
| 117 | |
| 118 | // Case isn't ignored. |
| 119 | i::KeywordMatcher bad_case; |
| 120 | bad_case.AddChar('i'); |
| 121 | bad_case.AddChar('F'); |
| 122 | CHECK_EQ(i::Token::IDENTIFIER, bad_case.token()); |
| 123 | |
| 124 | // If we mark it as failure, continuing won't help. |
| 125 | i::KeywordMatcher full_stop; |
| 126 | full_stop.AddChar('i'); |
| 127 | CHECK_EQ(i::Token::IDENTIFIER, full_stop.token()); |
| 128 | full_stop.Fail(); |
| 129 | CHECK_EQ(i::Token::IDENTIFIER, full_stop.token()); |
| 130 | full_stop.AddChar('f'); |
| 131 | CHECK_EQ(i::Token::IDENTIFIER, full_stop.token()); |
| 132 | } |
| 133 | |
Iain Merrick | 9ac36c9 | 2010-09-13 15:29:50 +0100 | [diff] [blame] | 134 | |
| 135 | TEST(ScanHTMLEndComments) { |
| 136 | // Regression test. See: |
| 137 | // http://code.google.com/p/chromium/issues/detail?id=53548 |
| 138 | // Tests that --> is correctly interpreted as comment-to-end-of-line if there |
| 139 | // is only whitespace before it on the line, even after a multiline-comment |
| 140 | // comment. This was not the case if it occurred before the first real token |
| 141 | // in the input. |
| 142 | const char* tests[] = { |
| 143 | // Before first real token. |
| 144 | "--> is eol-comment\nvar y = 37;\n", |
| 145 | "\n --> is eol-comment\nvar y = 37;\n", |
| 146 | "/* precomment */ --> is eol-comment\nvar y = 37;\n", |
| 147 | "\n/* precomment */ --> is eol-comment\nvar y = 37;\n", |
| 148 | // After first real token. |
| 149 | "var x = 42;\n--> is eol-comment\nvar y = 37;\n", |
| 150 | "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n", |
| 151 | NULL |
| 152 | }; |
| 153 | |
| 154 | // Parser/Scanner needs a stack limit. |
| 155 | int marker; |
| 156 | i::StackGuard::SetStackLimit( |
| 157 | reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); |
| 158 | |
| 159 | for (int i = 0; tests[i]; i++) { |
| 160 | v8::ScriptData* data = |
Kristian Monsen | 0d5e116 | 2010-09-30 15:31:59 +0100 | [diff] [blame] | 161 | v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i])); |
Iain Merrick | 9ac36c9 | 2010-09-13 15:29:50 +0100 | [diff] [blame] | 162 | CHECK(data != NULL && !data->HasError()); |
| 163 | delete data; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | |
| 168 | class ScriptResource : public v8::String::ExternalAsciiStringResource { |
| 169 | public: |
| 170 | ScriptResource(const char* data, size_t length) |
| 171 | : data_(data), length_(length) { } |
| 172 | |
| 173 | const char* data() const { return data_; } |
| 174 | size_t length() const { return length_; } |
| 175 | |
| 176 | private: |
| 177 | const char* data_; |
| 178 | size_t length_; |
| 179 | }; |
| 180 | |
| 181 | |
| 182 | TEST(Preparsing) { |
| 183 | v8::HandleScope handles; |
| 184 | v8::Persistent<v8::Context> context = v8::Context::New(); |
| 185 | v8::Context::Scope context_scope(context); |
| 186 | int marker; |
| 187 | i::StackGuard::SetStackLimit( |
| 188 | reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); |
| 189 | |
| 190 | // Source containing functions that might be lazily compiled and all types |
| 191 | // of symbols (string, propertyName, regexp). |
| 192 | const char* source = |
| 193 | "var x = 42;" |
| 194 | "function foo(a) { return function nolazy(b) { return a + b; } }" |
| 195 | "function bar(a) { if (a) return function lazy(b) { return b; } }" |
| 196 | "var z = {'string': 'string literal', bareword: 'propertyName', " |
| 197 | " 42: 'number literal', for: 'keyword as propertyName', " |
| 198 | " f\\u006fr: 'keyword propertyname with escape'};" |
| 199 | "var v = /RegExp Literal/;" |
| 200 | "var w = /RegExp Literal\\u0020With Escape/gin;" |
| 201 | "var y = { get getter() { return 42; }, " |
| 202 | " set setter(v) { this.value = v; }};"; |
Kristian Monsen | 0d5e116 | 2010-09-30 15:31:59 +0100 | [diff] [blame] | 203 | int source_length = i::StrLength(source); |
Iain Merrick | 9ac36c9 | 2010-09-13 15:29:50 +0100 | [diff] [blame] | 204 | const char* error_source = "var x = y z;"; |
Kristian Monsen | 0d5e116 | 2010-09-30 15:31:59 +0100 | [diff] [blame] | 205 | int error_source_length = i::StrLength(error_source); |
Iain Merrick | 9ac36c9 | 2010-09-13 15:29:50 +0100 | [diff] [blame] | 206 | |
| 207 | v8::ScriptData* preparse = |
| 208 | v8::ScriptData::PreCompile(source, source_length); |
| 209 | CHECK(!preparse->HasError()); |
| 210 | bool lazy_flag = i::FLAG_lazy; |
| 211 | { |
| 212 | i::FLAG_lazy = true; |
| 213 | ScriptResource* resource = new ScriptResource(source, source_length); |
| 214 | v8::Local<v8::String> script_source = v8::String::NewExternal(resource); |
| 215 | v8::Script::Compile(script_source, NULL, preparse); |
| 216 | } |
| 217 | |
| 218 | { |
| 219 | i::FLAG_lazy = false; |
| 220 | |
| 221 | ScriptResource* resource = new ScriptResource(source, source_length); |
| 222 | v8::Local<v8::String> script_source = v8::String::NewExternal(resource); |
| 223 | v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>()); |
| 224 | } |
| 225 | delete preparse; |
| 226 | i::FLAG_lazy = lazy_flag; |
| 227 | |
| 228 | // Syntax error. |
| 229 | v8::ScriptData* error_preparse = |
| 230 | v8::ScriptData::PreCompile(error_source, error_source_length); |
| 231 | CHECK(error_preparse->HasError()); |
| 232 | i::ScriptDataImpl *pre_impl = |
| 233 | reinterpret_cast<i::ScriptDataImpl*>(error_preparse); |
| 234 | i::Scanner::Location error_location = |
| 235 | pre_impl->MessageLocation(); |
| 236 | // Error is at "z" in source, location 10..11. |
| 237 | CHECK_EQ(10, error_location.beg_pos); |
| 238 | CHECK_EQ(11, error_location.end_pos); |
| 239 | // Should not crash. |
| 240 | const char* message = pre_impl->BuildMessage(); |
| 241 | i::Vector<const char*> args = pre_impl->BuildArgs(); |
| 242 | CHECK_GT(strlen(message), 0); |
| 243 | } |
Teng-Hui Zhu | 3e5fa29 | 2010-11-09 16:16:48 -0800 | [diff] [blame] | 244 | |
| 245 | |
| 246 | TEST(StandAlonePreParser) { |
| 247 | int marker; |
| 248 | i::StackGuard::SetStackLimit( |
| 249 | reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); |
| 250 | |
| 251 | const char* programs[] = { |
| 252 | "{label: 42}", |
| 253 | "var x = 42;", |
| 254 | "function foo(x, y) { return x + y; }", |
| 255 | "native function foo(); return %ArgleBargle(glop);", |
| 256 | "var x = new new Function('this.x = 42');", |
| 257 | NULL |
| 258 | }; |
| 259 | |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 260 | uintptr_t stack_limit = i::StackGuard::real_climit(); |
Teng-Hui Zhu | 3e5fa29 | 2010-11-09 16:16:48 -0800 | [diff] [blame] | 261 | for (int i = 0; programs[i]; i++) { |
| 262 | const char* program = programs[i]; |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 263 | i::Utf8ToUC16CharacterStream stream( |
| 264 | reinterpret_cast<const i::byte*>(program), |
| 265 | static_cast<unsigned>(strlen(program))); |
Teng-Hui Zhu | 3e5fa29 | 2010-11-09 16:16:48 -0800 | [diff] [blame] | 266 | i::CompleteParserRecorder log; |
Shimeng (Simon) Wang | 8a31eba | 2010-12-06 19:01:33 -0800 | [diff] [blame] | 267 | i::V8JavaScriptScanner scanner; |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 268 | scanner.Initialize(&stream); |
| 269 | |
| 270 | v8::preparser::PreParser::PreParseResult result = |
| 271 | v8::preparser::PreParser::PreParseProgram(&scanner, |
| 272 | &log, |
| 273 | true, |
| 274 | stack_limit); |
| 275 | CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result); |
Teng-Hui Zhu | 3e5fa29 | 2010-11-09 16:16:48 -0800 | [diff] [blame] | 276 | i::ScriptDataImpl data(log.ExtractData()); |
| 277 | CHECK(!data.has_error()); |
| 278 | } |
| 279 | } |
Shimeng (Simon) Wang | 8a31eba | 2010-12-06 19:01:33 -0800 | [diff] [blame] | 280 | |
| 281 | |
| 282 | TEST(RegressChromium62639) { |
| 283 | int marker; |
| 284 | i::StackGuard::SetStackLimit( |
| 285 | reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); |
| 286 | |
| 287 | const char* program = "var x = 'something';\n" |
| 288 | "escape: function() {}"; |
| 289 | // Fails parsing expecting an identifier after "function". |
| 290 | // Before fix, didn't check *ok after Expect(Token::Identifier, ok), |
| 291 | // and then used the invalid currently scanned literal. This always |
| 292 | // failed in debug mode, and sometimes crashed in release mode. |
| 293 | |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 294 | i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program), |
| 295 | static_cast<unsigned>(strlen(program))); |
Shimeng (Simon) Wang | 8a31eba | 2010-12-06 19:01:33 -0800 | [diff] [blame] | 296 | i::ScriptDataImpl* data = |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 297 | i::ParserApi::PreParse(&stream, NULL); |
Shimeng (Simon) Wang | 8a31eba | 2010-12-06 19:01:33 -0800 | [diff] [blame] | 298 | CHECK(data->HasError()); |
| 299 | delete data; |
| 300 | } |
| 301 | |
| 302 | |
| 303 | TEST(Regress928) { |
| 304 | // Preparsing didn't consider the catch clause of a try statement |
| 305 | // as with-content, which made it assume that a function inside |
| 306 | // the block could be lazily compiled, and an extra, unexpected, |
| 307 | // entry was added to the data. |
| 308 | int marker; |
| 309 | i::StackGuard::SetStackLimit( |
| 310 | reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); |
| 311 | |
| 312 | const char* program = |
| 313 | "try { } catch (e) { var foo = function () { /* first */ } }" |
| 314 | "var bar = function () { /* second */ }"; |
| 315 | |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 316 | i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program), |
| 317 | static_cast<unsigned>(strlen(program))); |
Shimeng (Simon) Wang | 8a31eba | 2010-12-06 19:01:33 -0800 | [diff] [blame] | 318 | i::ScriptDataImpl* data = |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 319 | i::ParserApi::PartialPreParse(&stream, NULL); |
Shimeng (Simon) Wang | 8a31eba | 2010-12-06 19:01:33 -0800 | [diff] [blame] | 320 | CHECK(!data->HasError()); |
| 321 | |
| 322 | data->Initialize(); |
| 323 | |
| 324 | int first_function = strstr(program, "function") - program; |
| 325 | int first_lbrace = first_function + strlen("function () "); |
| 326 | CHECK_EQ('{', program[first_lbrace]); |
| 327 | i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace); |
| 328 | CHECK(!entry1.is_valid()); |
| 329 | |
| 330 | int second_function = strstr(program + first_lbrace, "function") - program; |
| 331 | int second_lbrace = second_function + strlen("function () "); |
| 332 | CHECK_EQ('{', program[second_lbrace]); |
| 333 | i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace); |
| 334 | CHECK(entry2.is_valid()); |
| 335 | CHECK_EQ('}', program[entry2.end_pos() - 1]); |
| 336 | delete data; |
| 337 | } |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 338 | |
| 339 | |
| 340 | TEST(PreParseOverflow) { |
| 341 | int marker; |
| 342 | i::StackGuard::SetStackLimit( |
| 343 | reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); |
| 344 | |
| 345 | size_t kProgramSize = 1024 * 1024; |
| 346 | i::SmartPointer<char> program( |
| 347 | reinterpret_cast<char*>(malloc(kProgramSize + 1))); |
| 348 | memset(*program, '(', kProgramSize); |
| 349 | program[kProgramSize] = '\0'; |
| 350 | |
| 351 | uintptr_t stack_limit = i::StackGuard::real_climit(); |
| 352 | |
| 353 | i::Utf8ToUC16CharacterStream stream( |
| 354 | reinterpret_cast<const i::byte*>(*program), |
| 355 | static_cast<unsigned>(kProgramSize)); |
| 356 | i::CompleteParserRecorder log; |
| 357 | i::V8JavaScriptScanner scanner; |
| 358 | scanner.Initialize(&stream); |
| 359 | |
| 360 | |
| 361 | v8::preparser::PreParser::PreParseResult result = |
| 362 | v8::preparser::PreParser::PreParseProgram(&scanner, |
| 363 | &log, |
| 364 | true, |
| 365 | stack_limit); |
| 366 | CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result); |
| 367 | } |
| 368 | |
| 369 | |
| 370 | class TestExternalResource: public v8::String::ExternalStringResource { |
| 371 | public: |
| 372 | explicit TestExternalResource(uint16_t* data, int length) |
| 373 | : data_(data), length_(static_cast<size_t>(length)) { } |
| 374 | |
| 375 | ~TestExternalResource() { } |
| 376 | |
| 377 | const uint16_t* data() const { |
| 378 | return data_; |
| 379 | } |
| 380 | |
| 381 | size_t length() const { |
| 382 | return length_; |
| 383 | } |
| 384 | private: |
| 385 | uint16_t* data_; |
| 386 | size_t length_; |
| 387 | }; |
| 388 | |
| 389 | |
| 390 | #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2)) |
| 391 | |
| 392 | void TestCharacterStream(const char* ascii_source, |
| 393 | unsigned length, |
| 394 | unsigned start = 0, |
| 395 | unsigned end = 0) { |
| 396 | if (end == 0) end = length; |
| 397 | unsigned sub_length = end - start; |
| 398 | i::HandleScope test_scope; |
| 399 | i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]); |
| 400 | for (unsigned i = 0; i < length; i++) { |
| 401 | uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]); |
| 402 | } |
| 403 | i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length)); |
| 404 | i::Handle<i::String> ascii_string( |
| 405 | i::Factory::NewStringFromAscii(ascii_vector)); |
| 406 | TestExternalResource resource(*uc16_buffer, length); |
| 407 | i::Handle<i::String> uc16_string( |
| 408 | i::Factory::NewExternalStringFromTwoByte(&resource)); |
| 409 | |
| 410 | i::ExternalTwoByteStringUC16CharacterStream uc16_stream( |
| 411 | i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end); |
| 412 | i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end); |
| 413 | i::Utf8ToUC16CharacterStream utf8_stream( |
| 414 | reinterpret_cast<const i::byte*>(ascii_source), end); |
| 415 | utf8_stream.SeekForward(start); |
| 416 | |
| 417 | unsigned i = start; |
| 418 | while (i < end) { |
| 419 | // Read streams one char at a time |
| 420 | CHECK_EQU(i, uc16_stream.pos()); |
| 421 | CHECK_EQU(i, string_stream.pos()); |
| 422 | CHECK_EQU(i, utf8_stream.pos()); |
| 423 | int32_t c0 = ascii_source[i]; |
| 424 | int32_t c1 = uc16_stream.Advance(); |
| 425 | int32_t c2 = string_stream.Advance(); |
| 426 | int32_t c3 = utf8_stream.Advance(); |
| 427 | i++; |
| 428 | CHECK_EQ(c0, c1); |
| 429 | CHECK_EQ(c0, c2); |
| 430 | CHECK_EQ(c0, c3); |
| 431 | CHECK_EQU(i, uc16_stream.pos()); |
| 432 | CHECK_EQU(i, string_stream.pos()); |
| 433 | CHECK_EQU(i, utf8_stream.pos()); |
| 434 | } |
| 435 | while (i > start + sub_length / 4) { |
| 436 | // Pushback, re-read, pushback again. |
| 437 | int32_t c0 = ascii_source[i - 1]; |
| 438 | CHECK_EQU(i, uc16_stream.pos()); |
| 439 | CHECK_EQU(i, string_stream.pos()); |
| 440 | CHECK_EQU(i, utf8_stream.pos()); |
| 441 | uc16_stream.PushBack(c0); |
| 442 | string_stream.PushBack(c0); |
| 443 | utf8_stream.PushBack(c0); |
| 444 | i--; |
| 445 | CHECK_EQU(i, uc16_stream.pos()); |
| 446 | CHECK_EQU(i, string_stream.pos()); |
| 447 | CHECK_EQU(i, utf8_stream.pos()); |
| 448 | int32_t c1 = uc16_stream.Advance(); |
| 449 | int32_t c2 = string_stream.Advance(); |
| 450 | int32_t c3 = utf8_stream.Advance(); |
| 451 | i++; |
| 452 | CHECK_EQU(i, uc16_stream.pos()); |
| 453 | CHECK_EQU(i, string_stream.pos()); |
| 454 | CHECK_EQU(i, utf8_stream.pos()); |
| 455 | CHECK_EQ(c0, c1); |
| 456 | CHECK_EQ(c0, c2); |
| 457 | CHECK_EQ(c0, c3); |
| 458 | uc16_stream.PushBack(c0); |
| 459 | string_stream.PushBack(c0); |
| 460 | utf8_stream.PushBack(c0); |
| 461 | i--; |
| 462 | CHECK_EQU(i, uc16_stream.pos()); |
| 463 | CHECK_EQU(i, string_stream.pos()); |
| 464 | CHECK_EQU(i, utf8_stream.pos()); |
| 465 | } |
| 466 | unsigned halfway = start + sub_length / 2; |
| 467 | uc16_stream.SeekForward(halfway - i); |
| 468 | string_stream.SeekForward(halfway - i); |
| 469 | utf8_stream.SeekForward(halfway - i); |
| 470 | i = halfway; |
| 471 | CHECK_EQU(i, uc16_stream.pos()); |
| 472 | CHECK_EQU(i, string_stream.pos()); |
| 473 | CHECK_EQU(i, utf8_stream.pos()); |
| 474 | |
| 475 | while (i < end) { |
| 476 | // Read streams one char at a time |
| 477 | CHECK_EQU(i, uc16_stream.pos()); |
| 478 | CHECK_EQU(i, string_stream.pos()); |
| 479 | CHECK_EQU(i, utf8_stream.pos()); |
| 480 | int32_t c0 = ascii_source[i]; |
| 481 | int32_t c1 = uc16_stream.Advance(); |
| 482 | int32_t c2 = string_stream.Advance(); |
| 483 | int32_t c3 = utf8_stream.Advance(); |
| 484 | i++; |
| 485 | CHECK_EQ(c0, c1); |
| 486 | CHECK_EQ(c0, c2); |
| 487 | CHECK_EQ(c0, c3); |
| 488 | CHECK_EQU(i, uc16_stream.pos()); |
| 489 | CHECK_EQU(i, string_stream.pos()); |
| 490 | CHECK_EQU(i, utf8_stream.pos()); |
| 491 | } |
| 492 | |
| 493 | int32_t c1 = uc16_stream.Advance(); |
| 494 | int32_t c2 = string_stream.Advance(); |
| 495 | int32_t c3 = utf8_stream.Advance(); |
| 496 | CHECK_LT(c1, 0); |
| 497 | CHECK_LT(c2, 0); |
| 498 | CHECK_LT(c3, 0); |
| 499 | } |
| 500 | |
| 501 | |
| 502 | TEST(CharacterStreams) { |
| 503 | v8::HandleScope handles; |
| 504 | v8::Persistent<v8::Context> context = v8::Context::New(); |
| 505 | v8::Context::Scope context_scope(context); |
| 506 | |
| 507 | TestCharacterStream("abc\0\n\r\x7f", 7); |
| 508 | static const unsigned kBigStringSize = 4096; |
| 509 | char buffer[kBigStringSize + 1]; |
| 510 | for (unsigned i = 0; i < kBigStringSize; i++) { |
| 511 | buffer[i] = static_cast<char>(i & 0x7f); |
| 512 | } |
| 513 | TestCharacterStream(buffer, kBigStringSize); |
| 514 | |
| 515 | TestCharacterStream(buffer, kBigStringSize, 576, 3298); |
| 516 | |
| 517 | TestCharacterStream("\0", 1); |
| 518 | TestCharacterStream("", 0); |
| 519 | } |
| 520 | |
| 521 | |
| 522 | TEST(Utf8CharacterStream) { |
| 523 | static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar; |
| 524 | static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU); |
| 525 | |
| 526 | static const int kAllUtf8CharsSize = |
| 527 | (unibrow::Utf8::kMaxOneByteChar + 1) + |
| 528 | (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 + |
| 529 | (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3; |
| 530 | static const unsigned kAllUtf8CharsSizeU = |
| 531 | static_cast<unsigned>(kAllUtf8CharsSize); |
| 532 | |
| 533 | char buffer[kAllUtf8CharsSizeU]; |
| 534 | unsigned cursor = 0; |
| 535 | for (int i = 0; i <= kMaxUC16Char; i++) { |
| 536 | cursor += unibrow::Utf8::Encode(buffer + cursor, i); |
| 537 | } |
| 538 | ASSERT(cursor == kAllUtf8CharsSizeU); |
| 539 | |
| 540 | i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer), |
| 541 | kAllUtf8CharsSizeU); |
| 542 | for (int i = 0; i <= kMaxUC16Char; i++) { |
| 543 | CHECK_EQU(i, stream.pos()); |
| 544 | int32_t c = stream.Advance(); |
| 545 | CHECK_EQ(i, c); |
| 546 | CHECK_EQU(i + 1, stream.pos()); |
| 547 | } |
| 548 | for (int i = kMaxUC16Char; i >= 0; i--) { |
| 549 | CHECK_EQU(i + 1, stream.pos()); |
| 550 | stream.PushBack(i); |
| 551 | CHECK_EQU(i, stream.pos()); |
| 552 | } |
| 553 | int i = 0; |
| 554 | while (stream.pos() < kMaxUC16CharU) { |
| 555 | CHECK_EQU(i, stream.pos()); |
| 556 | unsigned progress = stream.SeekForward(12); |
| 557 | i += progress; |
| 558 | int32_t c = stream.Advance(); |
| 559 | if (i <= kMaxUC16Char) { |
| 560 | CHECK_EQ(i, c); |
| 561 | } else { |
| 562 | CHECK_EQ(-1, c); |
| 563 | } |
| 564 | i += 1; |
| 565 | CHECK_EQU(i, stream.pos()); |
| 566 | } |
| 567 | } |
| 568 | |
| 569 | #undef CHECK_EQU |
| 570 | |
| 571 | void TestStreamScanner(i::UC16CharacterStream* stream, |
| 572 | i::Token::Value* expected_tokens, |
| 573 | int skip_pos = 0, // Zero means not skipping. |
| 574 | int skip_to = 0) { |
| 575 | i::V8JavaScriptScanner scanner; |
Steve Block | 9fac840 | 2011-05-12 15:51:54 +0100 | [diff] [blame^] | 576 | scanner.Initialize(stream); |
Ben Murdoch | b0fe162 | 2011-05-05 13:52:32 +0100 | [diff] [blame] | 577 | |
| 578 | int i = 0; |
| 579 | do { |
| 580 | i::Token::Value expected = expected_tokens[i]; |
| 581 | i::Token::Value actual = scanner.Next(); |
| 582 | CHECK_EQ(i::Token::String(expected), i::Token::String(actual)); |
| 583 | if (scanner.location().end_pos == skip_pos) { |
| 584 | scanner.SeekForward(skip_to); |
| 585 | } |
| 586 | i++; |
| 587 | } while (expected_tokens[i] != i::Token::ILLEGAL); |
| 588 | } |
| 589 | |
| 590 | TEST(StreamScanner) { |
| 591 | const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib"; |
| 592 | i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1), |
| 593 | static_cast<unsigned>(strlen(str1))); |
| 594 | i::Token::Value expectations1[] = { |
| 595 | i::Token::LBRACE, |
| 596 | i::Token::IDENTIFIER, |
| 597 | i::Token::IDENTIFIER, |
| 598 | i::Token::FOR, |
| 599 | i::Token::COLON, |
| 600 | i::Token::MUL, |
| 601 | i::Token::DIV, |
| 602 | i::Token::LT, |
| 603 | i::Token::SUB, |
| 604 | i::Token::IDENTIFIER, |
| 605 | i::Token::EOS, |
| 606 | i::Token::ILLEGAL |
| 607 | }; |
| 608 | TestStreamScanner(&stream1, expectations1, 0, 0); |
| 609 | |
| 610 | const char* str2 = "case default const {THIS\nPART\nSKIPPED} do"; |
| 611 | i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2), |
| 612 | static_cast<unsigned>(strlen(str2))); |
| 613 | i::Token::Value expectations2[] = { |
| 614 | i::Token::CASE, |
| 615 | i::Token::DEFAULT, |
| 616 | i::Token::CONST, |
| 617 | i::Token::LBRACE, |
| 618 | // Skipped part here |
| 619 | i::Token::RBRACE, |
| 620 | i::Token::DO, |
| 621 | i::Token::EOS, |
| 622 | i::Token::ILLEGAL |
| 623 | }; |
| 624 | ASSERT_EQ('{', str2[19]); |
| 625 | ASSERT_EQ('}', str2[37]); |
| 626 | TestStreamScanner(&stream2, expectations2, 20, 37); |
| 627 | |
| 628 | const char* str3 = "{}}}}"; |
| 629 | i::Token::Value expectations3[] = { |
| 630 | i::Token::LBRACE, |
| 631 | i::Token::RBRACE, |
| 632 | i::Token::RBRACE, |
| 633 | i::Token::RBRACE, |
| 634 | i::Token::RBRACE, |
| 635 | i::Token::EOS, |
| 636 | i::Token::ILLEGAL |
| 637 | }; |
| 638 | // Skip zero-four RBRACEs. |
| 639 | for (int i = 0; i <= 4; i++) { |
| 640 | expectations3[6 - i] = i::Token::ILLEGAL; |
| 641 | expectations3[5 - i] = i::Token::EOS; |
| 642 | i::Utf8ToUC16CharacterStream stream3( |
| 643 | reinterpret_cast<const i::byte*>(str3), |
| 644 | static_cast<unsigned>(strlen(str3))); |
| 645 | TestStreamScanner(&stream3, expectations3, 1, 1 + i); |
| 646 | } |
| 647 | } |