Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame^] | 1 | // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "src/v8.h" |
| 6 | |
| 7 | #include "src/arguments.h" |
| 8 | #include "src/jsregexp-inl.h" |
| 9 | #include "src/jsregexp.h" |
| 10 | #include "src/runtime/runtime-utils.h" |
| 11 | #include "src/string-builder.h" |
| 12 | #include "src/string-search.h" |
| 13 | |
| 14 | namespace v8 { |
| 15 | namespace internal { |
| 16 | |
| 17 | class CompiledReplacement { |
| 18 | public: |
| 19 | explicit CompiledReplacement(Zone* zone) |
| 20 | : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} |
| 21 | |
| 22 | // Return whether the replacement is simple. |
| 23 | bool Compile(Handle<String> replacement, int capture_count, |
| 24 | int subject_length); |
| 25 | |
| 26 | // Use Apply only if Compile returned false. |
| 27 | void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, |
| 28 | int32_t* match); |
| 29 | |
| 30 | // Number of distinct parts of the replacement pattern. |
| 31 | int parts() { return parts_.length(); } |
| 32 | |
| 33 | Zone* zone() const { return zone_; } |
| 34 | |
| 35 | private: |
| 36 | enum PartType { |
| 37 | SUBJECT_PREFIX = 1, |
| 38 | SUBJECT_SUFFIX, |
| 39 | SUBJECT_CAPTURE, |
| 40 | REPLACEMENT_SUBSTRING, |
| 41 | REPLACEMENT_STRING, |
| 42 | NUMBER_OF_PART_TYPES |
| 43 | }; |
| 44 | |
| 45 | struct ReplacementPart { |
| 46 | static inline ReplacementPart SubjectMatch() { |
| 47 | return ReplacementPart(SUBJECT_CAPTURE, 0); |
| 48 | } |
| 49 | static inline ReplacementPart SubjectCapture(int capture_index) { |
| 50 | return ReplacementPart(SUBJECT_CAPTURE, capture_index); |
| 51 | } |
| 52 | static inline ReplacementPart SubjectPrefix() { |
| 53 | return ReplacementPart(SUBJECT_PREFIX, 0); |
| 54 | } |
| 55 | static inline ReplacementPart SubjectSuffix(int subject_length) { |
| 56 | return ReplacementPart(SUBJECT_SUFFIX, subject_length); |
| 57 | } |
| 58 | static inline ReplacementPart ReplacementString() { |
| 59 | return ReplacementPart(REPLACEMENT_STRING, 0); |
| 60 | } |
| 61 | static inline ReplacementPart ReplacementSubString(int from, int to) { |
| 62 | DCHECK(from >= 0); |
| 63 | DCHECK(to > from); |
| 64 | return ReplacementPart(-from, to); |
| 65 | } |
| 66 | |
| 67 | // If tag <= 0 then it is the negation of a start index of a substring of |
| 68 | // the replacement pattern, otherwise it's a value from PartType. |
| 69 | ReplacementPart(int tag, int data) : tag(tag), data(data) { |
| 70 | // Must be non-positive or a PartType value. |
| 71 | DCHECK(tag < NUMBER_OF_PART_TYPES); |
| 72 | } |
| 73 | // Either a value of PartType or a non-positive number that is |
| 74 | // the negation of an index into the replacement string. |
| 75 | int tag; |
| 76 | // The data value's interpretation depends on the value of tag: |
| 77 | // tag == SUBJECT_PREFIX || |
| 78 | // tag == SUBJECT_SUFFIX: data is unused. |
| 79 | // tag == SUBJECT_CAPTURE: data is the number of the capture. |
| 80 | // tag == REPLACEMENT_SUBSTRING || |
| 81 | // tag == REPLACEMENT_STRING: data is index into array of substrings |
| 82 | // of the replacement string. |
| 83 | // tag <= 0: Temporary representation of the substring of the replacement |
| 84 | // string ranging over -tag .. data. |
| 85 | // Is replaced by REPLACEMENT_{SUB,}STRING when we create the |
| 86 | // substring objects. |
| 87 | int data; |
| 88 | }; |
| 89 | |
| 90 | template <typename Char> |
| 91 | bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, |
| 92 | Vector<Char> characters, int capture_count, |
| 93 | int subject_length, Zone* zone) { |
| 94 | int length = characters.length(); |
| 95 | int last = 0; |
| 96 | for (int i = 0; i < length; i++) { |
| 97 | Char c = characters[i]; |
| 98 | if (c == '$') { |
| 99 | int next_index = i + 1; |
| 100 | if (next_index == length) { // No next character! |
| 101 | break; |
| 102 | } |
| 103 | Char c2 = characters[next_index]; |
| 104 | switch (c2) { |
| 105 | case '$': |
| 106 | if (i > last) { |
| 107 | // There is a substring before. Include the first "$". |
| 108 | parts->Add( |
| 109 | ReplacementPart::ReplacementSubString(last, next_index), |
| 110 | zone); |
| 111 | last = next_index + 1; // Continue after the second "$". |
| 112 | } else { |
| 113 | // Let the next substring start with the second "$". |
| 114 | last = next_index; |
| 115 | } |
| 116 | i = next_index; |
| 117 | break; |
| 118 | case '`': |
| 119 | if (i > last) { |
| 120 | parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 121 | } |
| 122 | parts->Add(ReplacementPart::SubjectPrefix(), zone); |
| 123 | i = next_index; |
| 124 | last = i + 1; |
| 125 | break; |
| 126 | case '\'': |
| 127 | if (i > last) { |
| 128 | parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 129 | } |
| 130 | parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone); |
| 131 | i = next_index; |
| 132 | last = i + 1; |
| 133 | break; |
| 134 | case '&': |
| 135 | if (i > last) { |
| 136 | parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 137 | } |
| 138 | parts->Add(ReplacementPart::SubjectMatch(), zone); |
| 139 | i = next_index; |
| 140 | last = i + 1; |
| 141 | break; |
| 142 | case '0': |
| 143 | case '1': |
| 144 | case '2': |
| 145 | case '3': |
| 146 | case '4': |
| 147 | case '5': |
| 148 | case '6': |
| 149 | case '7': |
| 150 | case '8': |
| 151 | case '9': { |
| 152 | int capture_ref = c2 - '0'; |
| 153 | if (capture_ref > capture_count) { |
| 154 | i = next_index; |
| 155 | continue; |
| 156 | } |
| 157 | int second_digit_index = next_index + 1; |
| 158 | if (second_digit_index < length) { |
| 159 | // Peek ahead to see if we have two digits. |
| 160 | Char c3 = characters[second_digit_index]; |
| 161 | if ('0' <= c3 && c3 <= '9') { // Double digits. |
| 162 | int double_digit_ref = capture_ref * 10 + c3 - '0'; |
| 163 | if (double_digit_ref <= capture_count) { |
| 164 | next_index = second_digit_index; |
| 165 | capture_ref = double_digit_ref; |
| 166 | } |
| 167 | } |
| 168 | } |
| 169 | if (capture_ref > 0) { |
| 170 | if (i > last) { |
| 171 | parts->Add(ReplacementPart::ReplacementSubString(last, i), |
| 172 | zone); |
| 173 | } |
| 174 | DCHECK(capture_ref <= capture_count); |
| 175 | parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone); |
| 176 | last = next_index + 1; |
| 177 | } |
| 178 | i = next_index; |
| 179 | break; |
| 180 | } |
| 181 | default: |
| 182 | i = next_index; |
| 183 | break; |
| 184 | } |
| 185 | } |
| 186 | } |
| 187 | if (length > last) { |
| 188 | if (last == 0) { |
| 189 | // Replacement is simple. Do not use Apply to do the replacement. |
| 190 | return true; |
| 191 | } else { |
| 192 | parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); |
| 193 | } |
| 194 | } |
| 195 | return false; |
| 196 | } |
| 197 | |
| 198 | ZoneList<ReplacementPart> parts_; |
| 199 | ZoneList<Handle<String> > replacement_substrings_; |
| 200 | Zone* zone_; |
| 201 | }; |
| 202 | |
| 203 | |
| 204 | bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count, |
| 205 | int subject_length) { |
| 206 | { |
| 207 | DisallowHeapAllocation no_gc; |
| 208 | String::FlatContent content = replacement->GetFlatContent(); |
| 209 | DCHECK(content.IsFlat()); |
| 210 | bool simple = false; |
| 211 | if (content.IsOneByte()) { |
| 212 | simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), |
| 213 | capture_count, subject_length, zone()); |
| 214 | } else { |
| 215 | DCHECK(content.IsTwoByte()); |
| 216 | simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), |
| 217 | capture_count, subject_length, zone()); |
| 218 | } |
| 219 | if (simple) return true; |
| 220 | } |
| 221 | |
| 222 | Isolate* isolate = replacement->GetIsolate(); |
| 223 | // Find substrings of replacement string and create them as String objects. |
| 224 | int substring_index = 0; |
| 225 | for (int i = 0, n = parts_.length(); i < n; i++) { |
| 226 | int tag = parts_[i].tag; |
| 227 | if (tag <= 0) { // A replacement string slice. |
| 228 | int from = -tag; |
| 229 | int to = parts_[i].data; |
| 230 | replacement_substrings_.Add( |
| 231 | isolate->factory()->NewSubString(replacement, from, to), zone()); |
| 232 | parts_[i].tag = REPLACEMENT_SUBSTRING; |
| 233 | parts_[i].data = substring_index; |
| 234 | substring_index++; |
| 235 | } else if (tag == REPLACEMENT_STRING) { |
| 236 | replacement_substrings_.Add(replacement, zone()); |
| 237 | parts_[i].data = substring_index; |
| 238 | substring_index++; |
| 239 | } |
| 240 | } |
| 241 | return false; |
| 242 | } |
| 243 | |
| 244 | |
| 245 | void CompiledReplacement::Apply(ReplacementStringBuilder* builder, |
| 246 | int match_from, int match_to, int32_t* match) { |
| 247 | DCHECK_LT(0, parts_.length()); |
| 248 | for (int i = 0, n = parts_.length(); i < n; i++) { |
| 249 | ReplacementPart part = parts_[i]; |
| 250 | switch (part.tag) { |
| 251 | case SUBJECT_PREFIX: |
| 252 | if (match_from > 0) builder->AddSubjectSlice(0, match_from); |
| 253 | break; |
| 254 | case SUBJECT_SUFFIX: { |
| 255 | int subject_length = part.data; |
| 256 | if (match_to < subject_length) { |
| 257 | builder->AddSubjectSlice(match_to, subject_length); |
| 258 | } |
| 259 | break; |
| 260 | } |
| 261 | case SUBJECT_CAPTURE: { |
| 262 | int capture = part.data; |
| 263 | int from = match[capture * 2]; |
| 264 | int to = match[capture * 2 + 1]; |
| 265 | if (from >= 0 && to > from) { |
| 266 | builder->AddSubjectSlice(from, to); |
| 267 | } |
| 268 | break; |
| 269 | } |
| 270 | case REPLACEMENT_SUBSTRING: |
| 271 | case REPLACEMENT_STRING: |
| 272 | builder->AddString(replacement_substrings_[part.data]); |
| 273 | break; |
| 274 | default: |
| 275 | UNREACHABLE(); |
| 276 | } |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | |
| 281 | void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, |
| 282 | ZoneList<int>* indices, unsigned int limit, |
| 283 | Zone* zone) { |
| 284 | DCHECK(limit > 0); |
| 285 | // Collect indices of pattern in subject using memchr. |
| 286 | // Stop after finding at most limit values. |
| 287 | const uint8_t* subject_start = subject.start(); |
| 288 | const uint8_t* subject_end = subject_start + subject.length(); |
| 289 | const uint8_t* pos = subject_start; |
| 290 | while (limit > 0) { |
| 291 | pos = reinterpret_cast<const uint8_t*>( |
| 292 | memchr(pos, pattern, subject_end - pos)); |
| 293 | if (pos == NULL) return; |
| 294 | indices->Add(static_cast<int>(pos - subject_start), zone); |
| 295 | pos++; |
| 296 | limit--; |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | |
| 301 | void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, |
| 302 | ZoneList<int>* indices, unsigned int limit, |
| 303 | Zone* zone) { |
| 304 | DCHECK(limit > 0); |
| 305 | const uc16* subject_start = subject.start(); |
| 306 | const uc16* subject_end = subject_start + subject.length(); |
| 307 | for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { |
| 308 | if (*pos == pattern) { |
| 309 | indices->Add(static_cast<int>(pos - subject_start), zone); |
| 310 | limit--; |
| 311 | } |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | |
| 316 | template <typename SubjectChar, typename PatternChar> |
| 317 | void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, |
| 318 | Vector<const PatternChar> pattern, |
| 319 | ZoneList<int>* indices, unsigned int limit, Zone* zone) { |
| 320 | DCHECK(limit > 0); |
| 321 | // Collect indices of pattern in subject. |
| 322 | // Stop after finding at most limit values. |
| 323 | int pattern_length = pattern.length(); |
| 324 | int index = 0; |
| 325 | StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
| 326 | while (limit > 0) { |
| 327 | index = search.Search(subject, index); |
| 328 | if (index < 0) return; |
| 329 | indices->Add(index, zone); |
| 330 | index += pattern_length; |
| 331 | limit--; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | |
| 336 | void FindStringIndicesDispatch(Isolate* isolate, String* subject, |
| 337 | String* pattern, ZoneList<int>* indices, |
| 338 | unsigned int limit, Zone* zone) { |
| 339 | { |
| 340 | DisallowHeapAllocation no_gc; |
| 341 | String::FlatContent subject_content = subject->GetFlatContent(); |
| 342 | String::FlatContent pattern_content = pattern->GetFlatContent(); |
| 343 | DCHECK(subject_content.IsFlat()); |
| 344 | DCHECK(pattern_content.IsFlat()); |
| 345 | if (subject_content.IsOneByte()) { |
| 346 | Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); |
| 347 | if (pattern_content.IsOneByte()) { |
| 348 | Vector<const uint8_t> pattern_vector = |
| 349 | pattern_content.ToOneByteVector(); |
| 350 | if (pattern_vector.length() == 1) { |
| 351 | FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 352 | limit, zone); |
| 353 | } else { |
| 354 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 355 | limit, zone); |
| 356 | } |
| 357 | } else { |
| 358 | FindStringIndices(isolate, subject_vector, |
| 359 | pattern_content.ToUC16Vector(), indices, limit, zone); |
| 360 | } |
| 361 | } else { |
| 362 | Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
| 363 | if (pattern_content.IsOneByte()) { |
| 364 | Vector<const uint8_t> pattern_vector = |
| 365 | pattern_content.ToOneByteVector(); |
| 366 | if (pattern_vector.length() == 1) { |
| 367 | FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 368 | limit, zone); |
| 369 | } else { |
| 370 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 371 | limit, zone); |
| 372 | } |
| 373 | } else { |
| 374 | Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); |
| 375 | if (pattern_vector.length() == 1) { |
| 376 | FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 377 | limit, zone); |
| 378 | } else { |
| 379 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 380 | limit, zone); |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | |
| 388 | template <typename ResultSeqString> |
| 389 | MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( |
| 390 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, |
| 391 | Handle<String> replacement, Handle<JSArray> last_match_info) { |
| 392 | DCHECK(subject->IsFlat()); |
| 393 | DCHECK(replacement->IsFlat()); |
| 394 | |
| 395 | ZoneScope zone_scope(isolate->runtime_zone()); |
| 396 | ZoneList<int> indices(8, zone_scope.zone()); |
| 397 | DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
| 398 | String* pattern = |
| 399 | String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
| 400 | int subject_len = subject->length(); |
| 401 | int pattern_len = pattern->length(); |
| 402 | int replacement_len = replacement->length(); |
| 403 | |
| 404 | FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, |
| 405 | zone_scope.zone()); |
| 406 | |
| 407 | int matches = indices.length(); |
| 408 | if (matches == 0) return *subject; |
| 409 | |
| 410 | // Detect integer overflow. |
| 411 | int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - |
| 412 | static_cast<int64_t>(pattern_len)) * |
| 413 | static_cast<int64_t>(matches) + |
| 414 | static_cast<int64_t>(subject_len); |
| 415 | int result_len; |
| 416 | if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { |
| 417 | STATIC_ASSERT(String::kMaxLength < kMaxInt); |
| 418 | result_len = kMaxInt; // Provoke exception. |
| 419 | } else { |
| 420 | result_len = static_cast<int>(result_len_64); |
| 421 | } |
| 422 | |
| 423 | int subject_pos = 0; |
| 424 | int result_pos = 0; |
| 425 | |
| 426 | MaybeHandle<SeqString> maybe_res; |
| 427 | if (ResultSeqString::kHasOneByteEncoding) { |
| 428 | maybe_res = isolate->factory()->NewRawOneByteString(result_len); |
| 429 | } else { |
| 430 | maybe_res = isolate->factory()->NewRawTwoByteString(result_len); |
| 431 | } |
| 432 | Handle<SeqString> untyped_res; |
| 433 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); |
| 434 | Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); |
| 435 | |
| 436 | for (int i = 0; i < matches; i++) { |
| 437 | // Copy non-matched subject content. |
| 438 | if (subject_pos < indices.at(i)) { |
| 439 | String::WriteToFlat(*subject, result->GetChars() + result_pos, |
| 440 | subject_pos, indices.at(i)); |
| 441 | result_pos += indices.at(i) - subject_pos; |
| 442 | } |
| 443 | |
| 444 | // Replace match. |
| 445 | if (replacement_len > 0) { |
| 446 | String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, |
| 447 | replacement_len); |
| 448 | result_pos += replacement_len; |
| 449 | } |
| 450 | |
| 451 | subject_pos = indices.at(i) + pattern_len; |
| 452 | } |
| 453 | // Add remaining subject content at the end. |
| 454 | if (subject_pos < subject_len) { |
| 455 | String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, |
| 456 | subject_len); |
| 457 | } |
| 458 | |
| 459 | int32_t match_indices[] = {indices.at(matches - 1), |
| 460 | indices.at(matches - 1) + pattern_len}; |
| 461 | RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); |
| 462 | |
| 463 | return *result; |
| 464 | } |
| 465 | |
| 466 | |
| 467 | MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( |
| 468 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
| 469 | Handle<String> replacement, Handle<JSArray> last_match_info) { |
| 470 | DCHECK(subject->IsFlat()); |
| 471 | DCHECK(replacement->IsFlat()); |
| 472 | |
| 473 | int capture_count = regexp->CaptureCount(); |
| 474 | int subject_length = subject->length(); |
| 475 | |
| 476 | // CompiledReplacement uses zone allocation. |
| 477 | ZoneScope zone_scope(isolate->runtime_zone()); |
| 478 | CompiledReplacement compiled_replacement(zone_scope.zone()); |
| 479 | bool simple_replace = |
| 480 | compiled_replacement.Compile(replacement, capture_count, subject_length); |
| 481 | |
| 482 | // Shortcut for simple non-regexp global replacements |
| 483 | if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { |
| 484 | if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { |
| 485 | return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
| 486 | isolate, subject, regexp, replacement, last_match_info); |
| 487 | } else { |
| 488 | return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
| 489 | isolate, subject, regexp, replacement, last_match_info); |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| 494 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 495 | |
| 496 | int32_t* current_match = global_cache.FetchNext(); |
| 497 | if (current_match == NULL) { |
| 498 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 499 | return *subject; |
| 500 | } |
| 501 | |
| 502 | // Guessing the number of parts that the final result string is built |
| 503 | // from. Global regexps can match any number of times, so we guess |
| 504 | // conservatively. |
| 505 | int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; |
| 506 | ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); |
| 507 | |
| 508 | // Number of parts added by compiled replacement plus preceeding |
| 509 | // string and possibly suffix after last match. It is possible for |
| 510 | // all components to use two elements when encoded as two smis. |
| 511 | const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); |
| 512 | |
| 513 | int prev = 0; |
| 514 | |
| 515 | do { |
| 516 | builder.EnsureCapacity(parts_added_per_loop); |
| 517 | |
| 518 | int start = current_match[0]; |
| 519 | int end = current_match[1]; |
| 520 | |
| 521 | if (prev < start) { |
| 522 | builder.AddSubjectSlice(prev, start); |
| 523 | } |
| 524 | |
| 525 | if (simple_replace) { |
| 526 | builder.AddString(replacement); |
| 527 | } else { |
| 528 | compiled_replacement.Apply(&builder, start, end, current_match); |
| 529 | } |
| 530 | prev = end; |
| 531 | |
| 532 | current_match = global_cache.FetchNext(); |
| 533 | } while (current_match != NULL); |
| 534 | |
| 535 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 536 | |
| 537 | if (prev < subject_length) { |
| 538 | builder.EnsureCapacity(2); |
| 539 | builder.AddSubjectSlice(prev, subject_length); |
| 540 | } |
| 541 | |
| 542 | RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
| 543 | global_cache.LastSuccessfulMatch()); |
| 544 | |
| 545 | Handle<String> result; |
| 546 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString()); |
| 547 | return *result; |
| 548 | } |
| 549 | |
| 550 | |
| 551 | template <typename ResultSeqString> |
| 552 | MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString( |
| 553 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
| 554 | Handle<JSArray> last_match_info) { |
| 555 | DCHECK(subject->IsFlat()); |
| 556 | |
| 557 | // Shortcut for simple non-regexp global replacements |
| 558 | if (regexp->TypeTag() == JSRegExp::ATOM) { |
| 559 | Handle<String> empty_string = isolate->factory()->empty_string(); |
| 560 | if (subject->IsOneByteRepresentation()) { |
| 561 | return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
| 562 | isolate, subject, regexp, empty_string, last_match_info); |
| 563 | } else { |
| 564 | return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
| 565 | isolate, subject, regexp, empty_string, last_match_info); |
| 566 | } |
| 567 | } |
| 568 | |
| 569 | RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| 570 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 571 | |
| 572 | int32_t* current_match = global_cache.FetchNext(); |
| 573 | if (current_match == NULL) { |
| 574 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 575 | return *subject; |
| 576 | } |
| 577 | |
| 578 | int start = current_match[0]; |
| 579 | int end = current_match[1]; |
| 580 | int capture_count = regexp->CaptureCount(); |
| 581 | int subject_length = subject->length(); |
| 582 | |
| 583 | int new_length = subject_length - (end - start); |
| 584 | if (new_length == 0) return isolate->heap()->empty_string(); |
| 585 | |
| 586 | Handle<ResultSeqString> answer; |
| 587 | if (ResultSeqString::kHasOneByteEncoding) { |
| 588 | answer = Handle<ResultSeqString>::cast( |
| 589 | isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); |
| 590 | } else { |
| 591 | answer = Handle<ResultSeqString>::cast( |
| 592 | isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); |
| 593 | } |
| 594 | |
| 595 | int prev = 0; |
| 596 | int position = 0; |
| 597 | |
| 598 | do { |
| 599 | start = current_match[0]; |
| 600 | end = current_match[1]; |
| 601 | if (prev < start) { |
| 602 | // Add substring subject[prev;start] to answer string. |
| 603 | String::WriteToFlat(*subject, answer->GetChars() + position, prev, start); |
| 604 | position += start - prev; |
| 605 | } |
| 606 | prev = end; |
| 607 | |
| 608 | current_match = global_cache.FetchNext(); |
| 609 | } while (current_match != NULL); |
| 610 | |
| 611 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 612 | |
| 613 | RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
| 614 | global_cache.LastSuccessfulMatch()); |
| 615 | |
| 616 | if (prev < subject_length) { |
| 617 | // Add substring subject[prev;length] to answer string. |
| 618 | String::WriteToFlat(*subject, answer->GetChars() + position, prev, |
| 619 | subject_length); |
| 620 | position += subject_length - prev; |
| 621 | } |
| 622 | |
| 623 | if (position == 0) return isolate->heap()->empty_string(); |
| 624 | |
| 625 | // Shorten string and fill |
| 626 | int string_size = ResultSeqString::SizeFor(position); |
| 627 | int allocated_string_size = ResultSeqString::SizeFor(new_length); |
| 628 | int delta = allocated_string_size - string_size; |
| 629 | |
| 630 | answer->set_length(position); |
| 631 | if (delta == 0) return *answer; |
| 632 | |
| 633 | Address end_of_string = answer->address() + string_size; |
| 634 | Heap* heap = isolate->heap(); |
| 635 | |
| 636 | // The trimming is performed on a newly allocated object, which is on a |
| 637 | // fresly allocated page or on an already swept page. Hence, the sweeper |
| 638 | // thread can not get confused with the filler creation. No synchronization |
| 639 | // needed. |
| 640 | heap->CreateFillerObjectAt(end_of_string, delta); |
| 641 | heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR); |
| 642 | return *answer; |
| 643 | } |
| 644 | |
| 645 | |
| 646 | RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) { |
| 647 | HandleScope scope(isolate); |
| 648 | DCHECK(args.length() == 4); |
| 649 | |
| 650 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
| 651 | CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); |
| 652 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); |
| 653 | CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
| 654 | |
| 655 | RUNTIME_ASSERT(regexp->GetFlags().is_global()); |
| 656 | RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); |
| 657 | |
| 658 | subject = String::Flatten(subject); |
| 659 | |
| 660 | if (replacement->length() == 0) { |
| 661 | if (subject->HasOnlyOneByteChars()) { |
| 662 | return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( |
| 663 | isolate, subject, regexp, last_match_info); |
| 664 | } else { |
| 665 | return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( |
| 666 | isolate, subject, regexp, last_match_info); |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | replacement = String::Flatten(replacement); |
| 671 | |
| 672 | return StringReplaceGlobalRegExpWithString(isolate, subject, regexp, |
| 673 | replacement, last_match_info); |
| 674 | } |
| 675 | |
| 676 | |
| 677 | RUNTIME_FUNCTION(Runtime_StringSplit) { |
| 678 | HandleScope handle_scope(isolate); |
| 679 | DCHECK(args.length() == 3); |
| 680 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
| 681 | CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); |
| 682 | CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
| 683 | RUNTIME_ASSERT(limit > 0); |
| 684 | |
| 685 | int subject_length = subject->length(); |
| 686 | int pattern_length = pattern->length(); |
| 687 | RUNTIME_ASSERT(pattern_length > 0); |
| 688 | |
| 689 | if (limit == 0xffffffffu) { |
| 690 | Handle<Object> cached_answer( |
| 691 | RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, |
| 692 | RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), |
| 693 | isolate); |
| 694 | if (*cached_answer != Smi::FromInt(0)) { |
| 695 | // The cache FixedArray is a COW-array and can therefore be reused. |
| 696 | Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( |
| 697 | Handle<FixedArray>::cast(cached_answer)); |
| 698 | return *result; |
| 699 | } |
| 700 | } |
| 701 | |
| 702 | // The limit can be very large (0xffffffffu), but since the pattern |
| 703 | // isn't empty, we can never create more parts than ~half the length |
| 704 | // of the subject. |
| 705 | |
| 706 | subject = String::Flatten(subject); |
| 707 | pattern = String::Flatten(pattern); |
| 708 | |
| 709 | static const int kMaxInitialListCapacity = 16; |
| 710 | |
| 711 | ZoneScope zone_scope(isolate->runtime_zone()); |
| 712 | |
| 713 | // Find (up to limit) indices of separator and end-of-string in subject |
| 714 | int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
| 715 | ZoneList<int> indices(initial_capacity, zone_scope.zone()); |
| 716 | |
| 717 | FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit, |
| 718 | zone_scope.zone()); |
| 719 | |
| 720 | if (static_cast<uint32_t>(indices.length()) < limit) { |
| 721 | indices.Add(subject_length, zone_scope.zone()); |
| 722 | } |
| 723 | |
| 724 | // The list indices now contains the end of each part to create. |
| 725 | |
| 726 | // Create JSArray of substrings separated by separator. |
| 727 | int part_count = indices.length(); |
| 728 | |
| 729 | Handle<JSArray> result = isolate->factory()->NewJSArray(part_count); |
| 730 | JSObject::EnsureCanContainHeapObjectElements(result); |
| 731 | result->set_length(Smi::FromInt(part_count)); |
| 732 | |
| 733 | DCHECK(result->HasFastObjectElements()); |
| 734 | |
| 735 | if (part_count == 1 && indices.at(0) == subject_length) { |
| 736 | FixedArray::cast(result->elements())->set(0, *subject); |
| 737 | return *result; |
| 738 | } |
| 739 | |
| 740 | Handle<FixedArray> elements(FixedArray::cast(result->elements())); |
| 741 | int part_start = 0; |
| 742 | for (int i = 0; i < part_count; i++) { |
| 743 | HandleScope local_loop_handle(isolate); |
| 744 | int part_end = indices.at(i); |
| 745 | Handle<String> substring = |
| 746 | isolate->factory()->NewProperSubString(subject, part_start, part_end); |
| 747 | elements->set(i, *substring); |
| 748 | part_start = part_end + pattern_length; |
| 749 | } |
| 750 | |
| 751 | if (limit == 0xffffffffu) { |
| 752 | if (result->HasFastObjectElements()) { |
| 753 | RegExpResultsCache::Enter(isolate, subject, pattern, elements, |
| 754 | RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); |
| 755 | } |
| 756 | } |
| 757 | |
| 758 | return *result; |
| 759 | } |
| 760 | |
| 761 | |
| 762 | RUNTIME_FUNCTION(Runtime_RegExpExecRT) { |
| 763 | HandleScope scope(isolate); |
| 764 | DCHECK(args.length() == 4); |
| 765 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 766 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
| 767 | CONVERT_INT32_ARG_CHECKED(index, 2); |
| 768 | CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
| 769 | // Due to the way the JS calls are constructed this must be less than the |
| 770 | // length of a string, i.e. it is always a Smi. We check anyway for security. |
| 771 | RUNTIME_ASSERT(index >= 0); |
| 772 | RUNTIME_ASSERT(index <= subject->length()); |
| 773 | isolate->counters()->regexp_entry_runtime()->Increment(); |
| 774 | Handle<Object> result; |
| 775 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 776 | isolate, result, |
| 777 | RegExpImpl::Exec(regexp, subject, index, last_match_info)); |
| 778 | return *result; |
| 779 | } |
| 780 | |
| 781 | |
| 782 | RUNTIME_FUNCTION(Runtime_RegExpConstructResult) { |
| 783 | HandleScope handle_scope(isolate); |
| 784 | DCHECK(args.length() == 3); |
| 785 | CONVERT_SMI_ARG_CHECKED(size, 0); |
| 786 | RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength); |
| 787 | CONVERT_ARG_HANDLE_CHECKED(Object, index, 1); |
| 788 | CONVERT_ARG_HANDLE_CHECKED(Object, input, 2); |
| 789 | Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size); |
| 790 | Handle<Map> regexp_map(isolate->native_context()->regexp_result_map()); |
| 791 | Handle<JSObject> object = |
| 792 | isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false); |
| 793 | Handle<JSArray> array = Handle<JSArray>::cast(object); |
| 794 | array->set_elements(*elements); |
| 795 | array->set_length(Smi::FromInt(size)); |
| 796 | // Write in-object properties after the length of the array. |
| 797 | array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index); |
| 798 | array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input); |
| 799 | return *array; |
| 800 | } |
| 801 | |
| 802 | |
| 803 | static JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags, |
| 804 | bool* success) { |
| 805 | uint32_t value = JSRegExp::NONE; |
| 806 | int length = flags->length(); |
| 807 | // A longer flags string cannot be valid. |
| 808 | if (length > 4) return JSRegExp::Flags(0); |
| 809 | for (int i = 0; i < length; i++) { |
| 810 | uint32_t flag = JSRegExp::NONE; |
| 811 | switch (flags->Get(i)) { |
| 812 | case 'g': |
| 813 | flag = JSRegExp::GLOBAL; |
| 814 | break; |
| 815 | case 'i': |
| 816 | flag = JSRegExp::IGNORE_CASE; |
| 817 | break; |
| 818 | case 'm': |
| 819 | flag = JSRegExp::MULTILINE; |
| 820 | break; |
| 821 | case 'y': |
| 822 | if (!FLAG_harmony_regexps) return JSRegExp::Flags(0); |
| 823 | flag = JSRegExp::STICKY; |
| 824 | break; |
| 825 | default: |
| 826 | return JSRegExp::Flags(0); |
| 827 | } |
| 828 | // Duplicate flag. |
| 829 | if (value & flag) return JSRegExp::Flags(0); |
| 830 | value |= flag; |
| 831 | } |
| 832 | *success = true; |
| 833 | return JSRegExp::Flags(value); |
| 834 | } |
| 835 | |
| 836 | |
| 837 | RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) { |
| 838 | HandleScope scope(isolate); |
| 839 | DCHECK(args.length() == 3); |
| 840 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 841 | CONVERT_ARG_HANDLE_CHECKED(String, source, 1); |
| 842 | CONVERT_ARG_HANDLE_CHECKED(String, flags_string, 2); |
| 843 | Factory* factory = isolate->factory(); |
| 844 | // If source is the empty string we set it to "(?:)" instead as |
| 845 | // suggested by ECMA-262, 5th, section 15.10.4.1. |
| 846 | if (source->length() == 0) source = factory->query_colon_string(); |
| 847 | |
| 848 | bool success = false; |
| 849 | JSRegExp::Flags flags = RegExpFlagsFromString(flags_string, &success); |
| 850 | if (!success) { |
| 851 | Handle<FixedArray> element = factory->NewFixedArray(1); |
| 852 | element->set(0, *flags_string); |
| 853 | Handle<JSArray> args = factory->NewJSArrayWithElements(element); |
| 854 | THROW_NEW_ERROR_RETURN_FAILURE( |
| 855 | isolate, NewSyntaxError("invalid_regexp_flags", args)); |
| 856 | } |
| 857 | |
| 858 | Handle<Object> global = factory->ToBoolean(flags.is_global()); |
| 859 | Handle<Object> ignore_case = factory->ToBoolean(flags.is_ignore_case()); |
| 860 | Handle<Object> multiline = factory->ToBoolean(flags.is_multiline()); |
| 861 | Handle<Object> sticky = factory->ToBoolean(flags.is_sticky()); |
| 862 | |
| 863 | Map* map = regexp->map(); |
| 864 | Object* constructor = map->constructor(); |
| 865 | if (!FLAG_harmony_regexps && constructor->IsJSFunction() && |
| 866 | JSFunction::cast(constructor)->initial_map() == map) { |
| 867 | // If we still have the original map, set in-object properties directly. |
| 868 | // Both true and false are immovable immortal objects so no need for write |
| 869 | // barrier. |
| 870 | regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global, |
| 871 | SKIP_WRITE_BARRIER); |
| 872 | regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignore_case, |
| 873 | SKIP_WRITE_BARRIER); |
| 874 | regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline, |
| 875 | SKIP_WRITE_BARRIER); |
| 876 | regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex, |
| 877 | Smi::FromInt(0), SKIP_WRITE_BARRIER); |
| 878 | } else { |
| 879 | // Map has changed, so use generic, but slower, method. We also end here if |
| 880 | // the --harmony-regexp flag is set, because the initial map does not have |
| 881 | // space for the 'sticky' flag, since it is from the snapshot, but must work |
| 882 | // both with and without --harmony-regexp. When sticky comes out from under |
| 883 | // the flag, we will be able to use the fast initial map. |
| 884 | PropertyAttributes final = |
| 885 | static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE); |
| 886 | PropertyAttributes writable = |
| 887 | static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE); |
| 888 | Handle<Object> zero(Smi::FromInt(0), isolate); |
| 889 | JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(), |
| 890 | global, final).Check(); |
| 891 | JSObject::SetOwnPropertyIgnoreAttributes( |
| 892 | regexp, factory->ignore_case_string(), ignore_case, final).Check(); |
| 893 | JSObject::SetOwnPropertyIgnoreAttributes( |
| 894 | regexp, factory->multiline_string(), multiline, final).Check(); |
| 895 | if (FLAG_harmony_regexps) { |
| 896 | JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(), |
| 897 | sticky, final).Check(); |
| 898 | } |
| 899 | JSObject::SetOwnPropertyIgnoreAttributes( |
| 900 | regexp, factory->last_index_string(), zero, writable).Check(); |
| 901 | } |
| 902 | |
| 903 | Handle<Object> result; |
| 904 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 905 | isolate, result, RegExpImpl::Compile(regexp, source, flags)); |
| 906 | return *result; |
| 907 | } |
| 908 | |
| 909 | |
| 910 | RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) { |
| 911 | HandleScope scope(isolate); |
| 912 | DCHECK(args.length() == 4); |
| 913 | CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0); |
| 914 | CONVERT_SMI_ARG_CHECKED(index, 1); |
| 915 | CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2); |
| 916 | CONVERT_ARG_HANDLE_CHECKED(String, flags, 3); |
| 917 | |
| 918 | // Get the RegExp function from the context in the literals array. |
| 919 | // This is the RegExp function from the context in which the |
| 920 | // function was created. We do not use the RegExp function from the |
| 921 | // current native context because this might be the RegExp function |
| 922 | // from another context which we should not have access to. |
| 923 | Handle<JSFunction> constructor = Handle<JSFunction>( |
| 924 | JSFunction::NativeContextFromLiterals(*literals)->regexp_function()); |
| 925 | // Compute the regular expression literal. |
| 926 | Handle<Object> regexp; |
| 927 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 928 | isolate, regexp, |
| 929 | RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags)); |
| 930 | literals->set(index, *regexp); |
| 931 | return *regexp; |
| 932 | } |
| 933 | |
| 934 | |
| 935 | // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain |
| 936 | // separate last match info. See comment on that function. |
| 937 | template <bool has_capture> |
| 938 | static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, |
| 939 | Handle<JSRegExp> regexp, |
| 940 | Handle<JSArray> last_match_array, |
| 941 | Handle<JSArray> result_array) { |
| 942 | DCHECK(subject->IsFlat()); |
| 943 | DCHECK_NE(has_capture, regexp->CaptureCount() == 0); |
| 944 | |
| 945 | int capture_count = regexp->CaptureCount(); |
| 946 | int subject_length = subject->length(); |
| 947 | |
| 948 | static const int kMinLengthToCache = 0x1000; |
| 949 | |
| 950 | if (subject_length > kMinLengthToCache) { |
| 951 | Handle<Object> cached_answer( |
| 952 | RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(), |
| 953 | RegExpResultsCache::REGEXP_MULTIPLE_INDICES), |
| 954 | isolate); |
| 955 | if (*cached_answer != Smi::FromInt(0)) { |
| 956 | Handle<FixedArray> cached_fixed_array = |
| 957 | Handle<FixedArray>(FixedArray::cast(*cached_answer)); |
| 958 | // The cache FixedArray is a COW-array and can therefore be reused. |
| 959 | JSArray::SetContent(result_array, cached_fixed_array); |
| 960 | // The actual length of the result array is stored in the last element of |
| 961 | // the backing store (the backing FixedArray may have a larger capacity). |
| 962 | Object* cached_fixed_array_last_element = |
| 963 | cached_fixed_array->get(cached_fixed_array->length() - 1); |
| 964 | Smi* js_array_length = Smi::cast(cached_fixed_array_last_element); |
| 965 | result_array->set_length(js_array_length); |
| 966 | RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
| 967 | NULL); |
| 968 | return *result_array; |
| 969 | } |
| 970 | } |
| 971 | |
| 972 | RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| 973 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 974 | |
| 975 | // Ensured in Runtime_RegExpExecMultiple. |
| 976 | DCHECK(result_array->HasFastObjectElements()); |
| 977 | Handle<FixedArray> result_elements( |
| 978 | FixedArray::cast(result_array->elements())); |
| 979 | if (result_elements->length() < 16) { |
| 980 | result_elements = isolate->factory()->NewFixedArrayWithHoles(16); |
| 981 | } |
| 982 | |
| 983 | FixedArrayBuilder builder(result_elements); |
| 984 | |
| 985 | // Position to search from. |
| 986 | int match_start = -1; |
| 987 | int match_end = 0; |
| 988 | bool first = true; |
| 989 | |
| 990 | // Two smis before and after the match, for very long strings. |
| 991 | static const int kMaxBuilderEntriesPerRegExpMatch = 5; |
| 992 | |
| 993 | while (true) { |
| 994 | int32_t* current_match = global_cache.FetchNext(); |
| 995 | if (current_match == NULL) break; |
| 996 | match_start = current_match[0]; |
| 997 | builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| 998 | if (match_end < match_start) { |
| 999 | ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
| 1000 | match_start); |
| 1001 | } |
| 1002 | match_end = current_match[1]; |
| 1003 | { |
| 1004 | // Avoid accumulating new handles inside loop. |
| 1005 | HandleScope temp_scope(isolate); |
| 1006 | Handle<String> match; |
| 1007 | if (!first) { |
| 1008 | match = isolate->factory()->NewProperSubString(subject, match_start, |
| 1009 | match_end); |
| 1010 | } else { |
| 1011 | match = |
| 1012 | isolate->factory()->NewSubString(subject, match_start, match_end); |
| 1013 | first = false; |
| 1014 | } |
| 1015 | |
| 1016 | if (has_capture) { |
| 1017 | // Arguments array to replace function is match, captures, index and |
| 1018 | // subject, i.e., 3 + capture count in total. |
| 1019 | Handle<FixedArray> elements = |
| 1020 | isolate->factory()->NewFixedArray(3 + capture_count); |
| 1021 | |
| 1022 | elements->set(0, *match); |
| 1023 | for (int i = 1; i <= capture_count; i++) { |
| 1024 | int start = current_match[i * 2]; |
| 1025 | if (start >= 0) { |
| 1026 | int end = current_match[i * 2 + 1]; |
| 1027 | DCHECK(start <= end); |
| 1028 | Handle<String> substring = |
| 1029 | isolate->factory()->NewSubString(subject, start, end); |
| 1030 | elements->set(i, *substring); |
| 1031 | } else { |
| 1032 | DCHECK(current_match[i * 2 + 1] < 0); |
| 1033 | elements->set(i, isolate->heap()->undefined_value()); |
| 1034 | } |
| 1035 | } |
| 1036 | elements->set(capture_count + 1, Smi::FromInt(match_start)); |
| 1037 | elements->set(capture_count + 2, *subject); |
| 1038 | builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
| 1039 | } else { |
| 1040 | builder.Add(*match); |
| 1041 | } |
| 1042 | } |
| 1043 | } |
| 1044 | |
| 1045 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 1046 | |
| 1047 | if (match_start >= 0) { |
| 1048 | // Finished matching, with at least one match. |
| 1049 | if (match_end < subject_length) { |
| 1050 | ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
| 1051 | subject_length); |
| 1052 | } |
| 1053 | |
| 1054 | RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
| 1055 | NULL); |
| 1056 | |
| 1057 | if (subject_length > kMinLengthToCache) { |
| 1058 | // Store the length of the result array into the last element of the |
| 1059 | // backing FixedArray. |
| 1060 | builder.EnsureCapacity(1); |
| 1061 | Handle<FixedArray> fixed_array = builder.array(); |
| 1062 | fixed_array->set(fixed_array->length() - 1, |
| 1063 | Smi::FromInt(builder.length())); |
| 1064 | // Cache the result and turn the FixedArray into a COW array. |
| 1065 | RegExpResultsCache::Enter(isolate, subject, |
| 1066 | handle(regexp->data(), isolate), fixed_array, |
| 1067 | RegExpResultsCache::REGEXP_MULTIPLE_INDICES); |
| 1068 | } |
| 1069 | return *builder.ToJSArray(result_array); |
| 1070 | } else { |
| 1071 | return isolate->heap()->null_value(); // No matches at all. |
| 1072 | } |
| 1073 | } |
| 1074 | |
| 1075 | |
| 1076 | // This is only called for StringReplaceGlobalRegExpWithFunction. This sets |
| 1077 | // lastMatchInfoOverride to maintain the last match info, so we don't need to |
| 1078 | // set any other last match array info. |
| 1079 | RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { |
| 1080 | HandleScope handles(isolate); |
| 1081 | DCHECK(args.length() == 4); |
| 1082 | |
| 1083 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
| 1084 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 1085 | CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); |
| 1086 | CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); |
| 1087 | RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); |
| 1088 | RUNTIME_ASSERT(result_array->HasFastObjectElements()); |
| 1089 | |
| 1090 | subject = String::Flatten(subject); |
| 1091 | RUNTIME_ASSERT(regexp->GetFlags().is_global()); |
| 1092 | |
| 1093 | if (regexp->CaptureCount() == 0) { |
| 1094 | return SearchRegExpMultiple<false>(isolate, subject, regexp, |
| 1095 | last_match_info, result_array); |
| 1096 | } else { |
| 1097 | return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info, |
| 1098 | result_array); |
| 1099 | } |
| 1100 | } |
| 1101 | |
| 1102 | |
| 1103 | RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) { |
| 1104 | SealHandleScope shs(isolate); |
| 1105 | return __RT_impl_Runtime_RegExpConstructResult(args, isolate); |
| 1106 | } |
| 1107 | |
| 1108 | |
| 1109 | RUNTIME_FUNCTION(RuntimeReference_RegExpExec) { |
| 1110 | SealHandleScope shs(isolate); |
| 1111 | return __RT_impl_Runtime_RegExpExecRT(args, isolate); |
| 1112 | } |
| 1113 | |
| 1114 | |
| 1115 | RUNTIME_FUNCTION(RuntimeReference_IsRegExp) { |
| 1116 | SealHandleScope shs(isolate); |
| 1117 | DCHECK(args.length() == 1); |
| 1118 | CONVERT_ARG_CHECKED(Object, obj, 0); |
| 1119 | return isolate->heap()->ToBoolean(obj->IsJSRegExp()); |
| 1120 | } |
| 1121 | } |
| 1122 | } // namespace v8::internal |