Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1 | // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 5 | #include "src/runtime/runtime-utils.h" |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 6 | |
| 7 | #include "src/arguments.h" |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 8 | #include "src/conversions-inl.h" |
| 9 | #include "src/isolate-inl.h" |
| 10 | #include "src/messages.h" |
| 11 | #include "src/regexp/jsregexp-inl.h" |
| 12 | #include "src/regexp/jsregexp.h" |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 13 | #include "src/string-builder.h" |
| 14 | #include "src/string-search.h" |
| 15 | |
| 16 | namespace v8 { |
| 17 | namespace internal { |
| 18 | |
| 19 | class CompiledReplacement { |
| 20 | public: |
| 21 | explicit CompiledReplacement(Zone* zone) |
| 22 | : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} |
| 23 | |
| 24 | // Return whether the replacement is simple. |
| 25 | bool Compile(Handle<String> replacement, int capture_count, |
| 26 | int subject_length); |
| 27 | |
| 28 | // Use Apply only if Compile returned false. |
| 29 | void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, |
| 30 | int32_t* match); |
| 31 | |
| 32 | // Number of distinct parts of the replacement pattern. |
| 33 | int parts() { return parts_.length(); } |
| 34 | |
| 35 | Zone* zone() const { return zone_; } |
| 36 | |
| 37 | private: |
| 38 | enum PartType { |
| 39 | SUBJECT_PREFIX = 1, |
| 40 | SUBJECT_SUFFIX, |
| 41 | SUBJECT_CAPTURE, |
| 42 | REPLACEMENT_SUBSTRING, |
| 43 | REPLACEMENT_STRING, |
| 44 | NUMBER_OF_PART_TYPES |
| 45 | }; |
| 46 | |
| 47 | struct ReplacementPart { |
| 48 | static inline ReplacementPart SubjectMatch() { |
| 49 | return ReplacementPart(SUBJECT_CAPTURE, 0); |
| 50 | } |
| 51 | static inline ReplacementPart SubjectCapture(int capture_index) { |
| 52 | return ReplacementPart(SUBJECT_CAPTURE, capture_index); |
| 53 | } |
| 54 | static inline ReplacementPart SubjectPrefix() { |
| 55 | return ReplacementPart(SUBJECT_PREFIX, 0); |
| 56 | } |
| 57 | static inline ReplacementPart SubjectSuffix(int subject_length) { |
| 58 | return ReplacementPart(SUBJECT_SUFFIX, subject_length); |
| 59 | } |
| 60 | static inline ReplacementPart ReplacementString() { |
| 61 | return ReplacementPart(REPLACEMENT_STRING, 0); |
| 62 | } |
| 63 | static inline ReplacementPart ReplacementSubString(int from, int to) { |
| 64 | DCHECK(from >= 0); |
| 65 | DCHECK(to > from); |
| 66 | return ReplacementPart(-from, to); |
| 67 | } |
| 68 | |
| 69 | // If tag <= 0 then it is the negation of a start index of a substring of |
| 70 | // the replacement pattern, otherwise it's a value from PartType. |
| 71 | ReplacementPart(int tag, int data) : tag(tag), data(data) { |
| 72 | // Must be non-positive or a PartType value. |
| 73 | DCHECK(tag < NUMBER_OF_PART_TYPES); |
| 74 | } |
| 75 | // Either a value of PartType or a non-positive number that is |
| 76 | // the negation of an index into the replacement string. |
| 77 | int tag; |
| 78 | // The data value's interpretation depends on the value of tag: |
| 79 | // tag == SUBJECT_PREFIX || |
| 80 | // tag == SUBJECT_SUFFIX: data is unused. |
| 81 | // tag == SUBJECT_CAPTURE: data is the number of the capture. |
| 82 | // tag == REPLACEMENT_SUBSTRING || |
| 83 | // tag == REPLACEMENT_STRING: data is index into array of substrings |
| 84 | // of the replacement string. |
| 85 | // tag <= 0: Temporary representation of the substring of the replacement |
| 86 | // string ranging over -tag .. data. |
| 87 | // Is replaced by REPLACEMENT_{SUB,}STRING when we create the |
| 88 | // substring objects. |
| 89 | int data; |
| 90 | }; |
| 91 | |
| 92 | template <typename Char> |
| 93 | bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, |
| 94 | Vector<Char> characters, int capture_count, |
| 95 | int subject_length, Zone* zone) { |
| 96 | int length = characters.length(); |
| 97 | int last = 0; |
| 98 | for (int i = 0; i < length; i++) { |
| 99 | Char c = characters[i]; |
| 100 | if (c == '$') { |
| 101 | int next_index = i + 1; |
| 102 | if (next_index == length) { // No next character! |
| 103 | break; |
| 104 | } |
| 105 | Char c2 = characters[next_index]; |
| 106 | switch (c2) { |
| 107 | case '$': |
| 108 | if (i > last) { |
| 109 | // There is a substring before. Include the first "$". |
| 110 | parts->Add( |
| 111 | ReplacementPart::ReplacementSubString(last, next_index), |
| 112 | zone); |
| 113 | last = next_index + 1; // Continue after the second "$". |
| 114 | } else { |
| 115 | // Let the next substring start with the second "$". |
| 116 | last = next_index; |
| 117 | } |
| 118 | i = next_index; |
| 119 | break; |
| 120 | case '`': |
| 121 | if (i > last) { |
| 122 | parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 123 | } |
| 124 | parts->Add(ReplacementPart::SubjectPrefix(), zone); |
| 125 | i = next_index; |
| 126 | last = i + 1; |
| 127 | break; |
| 128 | case '\'': |
| 129 | if (i > last) { |
| 130 | parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 131 | } |
| 132 | parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone); |
| 133 | i = next_index; |
| 134 | last = i + 1; |
| 135 | break; |
| 136 | case '&': |
| 137 | if (i > last) { |
| 138 | parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 139 | } |
| 140 | parts->Add(ReplacementPart::SubjectMatch(), zone); |
| 141 | i = next_index; |
| 142 | last = i + 1; |
| 143 | break; |
| 144 | case '0': |
| 145 | case '1': |
| 146 | case '2': |
| 147 | case '3': |
| 148 | case '4': |
| 149 | case '5': |
| 150 | case '6': |
| 151 | case '7': |
| 152 | case '8': |
| 153 | case '9': { |
| 154 | int capture_ref = c2 - '0'; |
| 155 | if (capture_ref > capture_count) { |
| 156 | i = next_index; |
| 157 | continue; |
| 158 | } |
| 159 | int second_digit_index = next_index + 1; |
| 160 | if (second_digit_index < length) { |
| 161 | // Peek ahead to see if we have two digits. |
| 162 | Char c3 = characters[second_digit_index]; |
| 163 | if ('0' <= c3 && c3 <= '9') { // Double digits. |
| 164 | int double_digit_ref = capture_ref * 10 + c3 - '0'; |
| 165 | if (double_digit_ref <= capture_count) { |
| 166 | next_index = second_digit_index; |
| 167 | capture_ref = double_digit_ref; |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | if (capture_ref > 0) { |
| 172 | if (i > last) { |
| 173 | parts->Add(ReplacementPart::ReplacementSubString(last, i), |
| 174 | zone); |
| 175 | } |
| 176 | DCHECK(capture_ref <= capture_count); |
| 177 | parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone); |
| 178 | last = next_index + 1; |
| 179 | } |
| 180 | i = next_index; |
| 181 | break; |
| 182 | } |
| 183 | default: |
| 184 | i = next_index; |
| 185 | break; |
| 186 | } |
| 187 | } |
| 188 | } |
| 189 | if (length > last) { |
| 190 | if (last == 0) { |
| 191 | // Replacement is simple. Do not use Apply to do the replacement. |
| 192 | return true; |
| 193 | } else { |
| 194 | parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); |
| 195 | } |
| 196 | } |
| 197 | return false; |
| 198 | } |
| 199 | |
| 200 | ZoneList<ReplacementPart> parts_; |
| 201 | ZoneList<Handle<String> > replacement_substrings_; |
| 202 | Zone* zone_; |
| 203 | }; |
| 204 | |
| 205 | |
| 206 | bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count, |
| 207 | int subject_length) { |
| 208 | { |
| 209 | DisallowHeapAllocation no_gc; |
| 210 | String::FlatContent content = replacement->GetFlatContent(); |
| 211 | DCHECK(content.IsFlat()); |
| 212 | bool simple = false; |
| 213 | if (content.IsOneByte()) { |
| 214 | simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), |
| 215 | capture_count, subject_length, zone()); |
| 216 | } else { |
| 217 | DCHECK(content.IsTwoByte()); |
| 218 | simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), |
| 219 | capture_count, subject_length, zone()); |
| 220 | } |
| 221 | if (simple) return true; |
| 222 | } |
| 223 | |
| 224 | Isolate* isolate = replacement->GetIsolate(); |
| 225 | // Find substrings of replacement string and create them as String objects. |
| 226 | int substring_index = 0; |
| 227 | for (int i = 0, n = parts_.length(); i < n; i++) { |
| 228 | int tag = parts_[i].tag; |
| 229 | if (tag <= 0) { // A replacement string slice. |
| 230 | int from = -tag; |
| 231 | int to = parts_[i].data; |
| 232 | replacement_substrings_.Add( |
| 233 | isolate->factory()->NewSubString(replacement, from, to), zone()); |
| 234 | parts_[i].tag = REPLACEMENT_SUBSTRING; |
| 235 | parts_[i].data = substring_index; |
| 236 | substring_index++; |
| 237 | } else if (tag == REPLACEMENT_STRING) { |
| 238 | replacement_substrings_.Add(replacement, zone()); |
| 239 | parts_[i].data = substring_index; |
| 240 | substring_index++; |
| 241 | } |
| 242 | } |
| 243 | return false; |
| 244 | } |
| 245 | |
| 246 | |
| 247 | void CompiledReplacement::Apply(ReplacementStringBuilder* builder, |
| 248 | int match_from, int match_to, int32_t* match) { |
| 249 | DCHECK_LT(0, parts_.length()); |
| 250 | for (int i = 0, n = parts_.length(); i < n; i++) { |
| 251 | ReplacementPart part = parts_[i]; |
| 252 | switch (part.tag) { |
| 253 | case SUBJECT_PREFIX: |
| 254 | if (match_from > 0) builder->AddSubjectSlice(0, match_from); |
| 255 | break; |
| 256 | case SUBJECT_SUFFIX: { |
| 257 | int subject_length = part.data; |
| 258 | if (match_to < subject_length) { |
| 259 | builder->AddSubjectSlice(match_to, subject_length); |
| 260 | } |
| 261 | break; |
| 262 | } |
| 263 | case SUBJECT_CAPTURE: { |
| 264 | int capture = part.data; |
| 265 | int from = match[capture * 2]; |
| 266 | int to = match[capture * 2 + 1]; |
| 267 | if (from >= 0 && to > from) { |
| 268 | builder->AddSubjectSlice(from, to); |
| 269 | } |
| 270 | break; |
| 271 | } |
| 272 | case REPLACEMENT_SUBSTRING: |
| 273 | case REPLACEMENT_STRING: |
| 274 | builder->AddString(replacement_substrings_[part.data]); |
| 275 | break; |
| 276 | default: |
| 277 | UNREACHABLE(); |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | |
| 283 | void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, |
| 284 | ZoneList<int>* indices, unsigned int limit, |
| 285 | Zone* zone) { |
| 286 | DCHECK(limit > 0); |
| 287 | // Collect indices of pattern in subject using memchr. |
| 288 | // Stop after finding at most limit values. |
| 289 | const uint8_t* subject_start = subject.start(); |
| 290 | const uint8_t* subject_end = subject_start + subject.length(); |
| 291 | const uint8_t* pos = subject_start; |
| 292 | while (limit > 0) { |
| 293 | pos = reinterpret_cast<const uint8_t*>( |
| 294 | memchr(pos, pattern, subject_end - pos)); |
| 295 | if (pos == NULL) return; |
| 296 | indices->Add(static_cast<int>(pos - subject_start), zone); |
| 297 | pos++; |
| 298 | limit--; |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | |
| 303 | void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, |
| 304 | ZoneList<int>* indices, unsigned int limit, |
| 305 | Zone* zone) { |
| 306 | DCHECK(limit > 0); |
| 307 | const uc16* subject_start = subject.start(); |
| 308 | const uc16* subject_end = subject_start + subject.length(); |
| 309 | for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { |
| 310 | if (*pos == pattern) { |
| 311 | indices->Add(static_cast<int>(pos - subject_start), zone); |
| 312 | limit--; |
| 313 | } |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | |
| 318 | template <typename SubjectChar, typename PatternChar> |
| 319 | void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, |
| 320 | Vector<const PatternChar> pattern, |
| 321 | ZoneList<int>* indices, unsigned int limit, Zone* zone) { |
| 322 | DCHECK(limit > 0); |
| 323 | // Collect indices of pattern in subject. |
| 324 | // Stop after finding at most limit values. |
| 325 | int pattern_length = pattern.length(); |
| 326 | int index = 0; |
| 327 | StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
| 328 | while (limit > 0) { |
| 329 | index = search.Search(subject, index); |
| 330 | if (index < 0) return; |
| 331 | indices->Add(index, zone); |
| 332 | index += pattern_length; |
| 333 | limit--; |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | |
| 338 | void FindStringIndicesDispatch(Isolate* isolate, String* subject, |
| 339 | String* pattern, ZoneList<int>* indices, |
| 340 | unsigned int limit, Zone* zone) { |
| 341 | { |
| 342 | DisallowHeapAllocation no_gc; |
| 343 | String::FlatContent subject_content = subject->GetFlatContent(); |
| 344 | String::FlatContent pattern_content = pattern->GetFlatContent(); |
| 345 | DCHECK(subject_content.IsFlat()); |
| 346 | DCHECK(pattern_content.IsFlat()); |
| 347 | if (subject_content.IsOneByte()) { |
| 348 | Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); |
| 349 | if (pattern_content.IsOneByte()) { |
| 350 | Vector<const uint8_t> pattern_vector = |
| 351 | pattern_content.ToOneByteVector(); |
| 352 | if (pattern_vector.length() == 1) { |
| 353 | FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 354 | limit, zone); |
| 355 | } else { |
| 356 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 357 | limit, zone); |
| 358 | } |
| 359 | } else { |
| 360 | FindStringIndices(isolate, subject_vector, |
| 361 | pattern_content.ToUC16Vector(), indices, limit, zone); |
| 362 | } |
| 363 | } else { |
| 364 | Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
| 365 | if (pattern_content.IsOneByte()) { |
| 366 | Vector<const uint8_t> pattern_vector = |
| 367 | pattern_content.ToOneByteVector(); |
| 368 | if (pattern_vector.length() == 1) { |
| 369 | FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 370 | limit, zone); |
| 371 | } else { |
| 372 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 373 | limit, zone); |
| 374 | } |
| 375 | } else { |
| 376 | Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); |
| 377 | if (pattern_vector.length() == 1) { |
| 378 | FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 379 | limit, zone); |
| 380 | } else { |
| 381 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 382 | limit, zone); |
| 383 | } |
| 384 | } |
| 385 | } |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | |
| 390 | template <typename ResultSeqString> |
| 391 | MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( |
| 392 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, |
| 393 | Handle<String> replacement, Handle<JSArray> last_match_info) { |
| 394 | DCHECK(subject->IsFlat()); |
| 395 | DCHECK(replacement->IsFlat()); |
| 396 | |
| 397 | ZoneScope zone_scope(isolate->runtime_zone()); |
| 398 | ZoneList<int> indices(8, zone_scope.zone()); |
| 399 | DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
| 400 | String* pattern = |
| 401 | String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
| 402 | int subject_len = subject->length(); |
| 403 | int pattern_len = pattern->length(); |
| 404 | int replacement_len = replacement->length(); |
| 405 | |
| 406 | FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, |
| 407 | zone_scope.zone()); |
| 408 | |
| 409 | int matches = indices.length(); |
| 410 | if (matches == 0) return *subject; |
| 411 | |
| 412 | // Detect integer overflow. |
| 413 | int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - |
| 414 | static_cast<int64_t>(pattern_len)) * |
| 415 | static_cast<int64_t>(matches) + |
| 416 | static_cast<int64_t>(subject_len); |
| 417 | int result_len; |
| 418 | if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { |
| 419 | STATIC_ASSERT(String::kMaxLength < kMaxInt); |
| 420 | result_len = kMaxInt; // Provoke exception. |
| 421 | } else { |
| 422 | result_len = static_cast<int>(result_len_64); |
| 423 | } |
| 424 | |
| 425 | int subject_pos = 0; |
| 426 | int result_pos = 0; |
| 427 | |
| 428 | MaybeHandle<SeqString> maybe_res; |
| 429 | if (ResultSeqString::kHasOneByteEncoding) { |
| 430 | maybe_res = isolate->factory()->NewRawOneByteString(result_len); |
| 431 | } else { |
| 432 | maybe_res = isolate->factory()->NewRawTwoByteString(result_len); |
| 433 | } |
| 434 | Handle<SeqString> untyped_res; |
| 435 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); |
| 436 | Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); |
| 437 | |
| 438 | for (int i = 0; i < matches; i++) { |
| 439 | // Copy non-matched subject content. |
| 440 | if (subject_pos < indices.at(i)) { |
| 441 | String::WriteToFlat(*subject, result->GetChars() + result_pos, |
| 442 | subject_pos, indices.at(i)); |
| 443 | result_pos += indices.at(i) - subject_pos; |
| 444 | } |
| 445 | |
| 446 | // Replace match. |
| 447 | if (replacement_len > 0) { |
| 448 | String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, |
| 449 | replacement_len); |
| 450 | result_pos += replacement_len; |
| 451 | } |
| 452 | |
| 453 | subject_pos = indices.at(i) + pattern_len; |
| 454 | } |
| 455 | // Add remaining subject content at the end. |
| 456 | if (subject_pos < subject_len) { |
| 457 | String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, |
| 458 | subject_len); |
| 459 | } |
| 460 | |
| 461 | int32_t match_indices[] = {indices.at(matches - 1), |
| 462 | indices.at(matches - 1) + pattern_len}; |
| 463 | RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); |
| 464 | |
| 465 | return *result; |
| 466 | } |
| 467 | |
| 468 | |
| 469 | MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( |
| 470 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
| 471 | Handle<String> replacement, Handle<JSArray> last_match_info) { |
| 472 | DCHECK(subject->IsFlat()); |
| 473 | DCHECK(replacement->IsFlat()); |
| 474 | |
| 475 | int capture_count = regexp->CaptureCount(); |
| 476 | int subject_length = subject->length(); |
| 477 | |
| 478 | // CompiledReplacement uses zone allocation. |
| 479 | ZoneScope zone_scope(isolate->runtime_zone()); |
| 480 | CompiledReplacement compiled_replacement(zone_scope.zone()); |
| 481 | bool simple_replace = |
| 482 | compiled_replacement.Compile(replacement, capture_count, subject_length); |
| 483 | |
| 484 | // Shortcut for simple non-regexp global replacements |
| 485 | if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { |
| 486 | if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { |
| 487 | return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
| 488 | isolate, subject, regexp, replacement, last_match_info); |
| 489 | } else { |
| 490 | return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
| 491 | isolate, subject, regexp, replacement, last_match_info); |
| 492 | } |
| 493 | } |
| 494 | |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 495 | RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 496 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 497 | |
| 498 | int32_t* current_match = global_cache.FetchNext(); |
| 499 | if (current_match == NULL) { |
| 500 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 501 | return *subject; |
| 502 | } |
| 503 | |
| 504 | // Guessing the number of parts that the final result string is built |
| 505 | // from. Global regexps can match any number of times, so we guess |
| 506 | // conservatively. |
| 507 | int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; |
| 508 | ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); |
| 509 | |
| 510 | // Number of parts added by compiled replacement plus preceeding |
| 511 | // string and possibly suffix after last match. It is possible for |
| 512 | // all components to use two elements when encoded as two smis. |
| 513 | const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); |
| 514 | |
| 515 | int prev = 0; |
| 516 | |
| 517 | do { |
| 518 | builder.EnsureCapacity(parts_added_per_loop); |
| 519 | |
| 520 | int start = current_match[0]; |
| 521 | int end = current_match[1]; |
| 522 | |
| 523 | if (prev < start) { |
| 524 | builder.AddSubjectSlice(prev, start); |
| 525 | } |
| 526 | |
| 527 | if (simple_replace) { |
| 528 | builder.AddString(replacement); |
| 529 | } else { |
| 530 | compiled_replacement.Apply(&builder, start, end, current_match); |
| 531 | } |
| 532 | prev = end; |
| 533 | |
| 534 | current_match = global_cache.FetchNext(); |
| 535 | } while (current_match != NULL); |
| 536 | |
| 537 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 538 | |
| 539 | if (prev < subject_length) { |
| 540 | builder.EnsureCapacity(2); |
| 541 | builder.AddSubjectSlice(prev, subject_length); |
| 542 | } |
| 543 | |
| 544 | RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
| 545 | global_cache.LastSuccessfulMatch()); |
| 546 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 547 | RETURN_RESULT_OR_FAILURE(isolate, builder.ToString()); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 548 | } |
| 549 | |
| 550 | |
| 551 | template <typename ResultSeqString> |
| 552 | MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString( |
| 553 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
| 554 | Handle<JSArray> last_match_info) { |
| 555 | DCHECK(subject->IsFlat()); |
| 556 | |
| 557 | // Shortcut for simple non-regexp global replacements |
| 558 | if (regexp->TypeTag() == JSRegExp::ATOM) { |
| 559 | Handle<String> empty_string = isolate->factory()->empty_string(); |
| 560 | if (subject->IsOneByteRepresentation()) { |
| 561 | return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
| 562 | isolate, subject, regexp, empty_string, last_match_info); |
| 563 | } else { |
| 564 | return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
| 565 | isolate, subject, regexp, empty_string, last_match_info); |
| 566 | } |
| 567 | } |
| 568 | |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 569 | RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 570 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 571 | |
| 572 | int32_t* current_match = global_cache.FetchNext(); |
| 573 | if (current_match == NULL) { |
| 574 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 575 | return *subject; |
| 576 | } |
| 577 | |
| 578 | int start = current_match[0]; |
| 579 | int end = current_match[1]; |
| 580 | int capture_count = regexp->CaptureCount(); |
| 581 | int subject_length = subject->length(); |
| 582 | |
| 583 | int new_length = subject_length - (end - start); |
| 584 | if (new_length == 0) return isolate->heap()->empty_string(); |
| 585 | |
| 586 | Handle<ResultSeqString> answer; |
| 587 | if (ResultSeqString::kHasOneByteEncoding) { |
| 588 | answer = Handle<ResultSeqString>::cast( |
| 589 | isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); |
| 590 | } else { |
| 591 | answer = Handle<ResultSeqString>::cast( |
| 592 | isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); |
| 593 | } |
| 594 | |
| 595 | int prev = 0; |
| 596 | int position = 0; |
| 597 | |
| 598 | do { |
| 599 | start = current_match[0]; |
| 600 | end = current_match[1]; |
| 601 | if (prev < start) { |
| 602 | // Add substring subject[prev;start] to answer string. |
| 603 | String::WriteToFlat(*subject, answer->GetChars() + position, prev, start); |
| 604 | position += start - prev; |
| 605 | } |
| 606 | prev = end; |
| 607 | |
| 608 | current_match = global_cache.FetchNext(); |
| 609 | } while (current_match != NULL); |
| 610 | |
| 611 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 612 | |
| 613 | RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
| 614 | global_cache.LastSuccessfulMatch()); |
| 615 | |
| 616 | if (prev < subject_length) { |
| 617 | // Add substring subject[prev;length] to answer string. |
| 618 | String::WriteToFlat(*subject, answer->GetChars() + position, prev, |
| 619 | subject_length); |
| 620 | position += subject_length - prev; |
| 621 | } |
| 622 | |
| 623 | if (position == 0) return isolate->heap()->empty_string(); |
| 624 | |
| 625 | // Shorten string and fill |
| 626 | int string_size = ResultSeqString::SizeFor(position); |
| 627 | int allocated_string_size = ResultSeqString::SizeFor(new_length); |
| 628 | int delta = allocated_string_size - string_size; |
| 629 | |
| 630 | answer->set_length(position); |
| 631 | if (delta == 0) return *answer; |
| 632 | |
| 633 | Address end_of_string = answer->address() + string_size; |
| 634 | Heap* heap = isolate->heap(); |
| 635 | |
| 636 | // The trimming is performed on a newly allocated object, which is on a |
| 637 | // fresly allocated page or on an already swept page. Hence, the sweeper |
| 638 | // thread can not get confused with the filler creation. No synchronization |
| 639 | // needed. |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 640 | // TODO(hpayer): We should shrink the large object page if the size |
| 641 | // of the object changed significantly. |
| 642 | if (!heap->lo_space()->Contains(*answer)) { |
Ben Murdoch | da12d29 | 2016-06-02 14:46:10 +0100 | [diff] [blame] | 643 | heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 644 | } |
| 645 | heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 646 | return *answer; |
| 647 | } |
| 648 | |
| 649 | |
| 650 | RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) { |
| 651 | HandleScope scope(isolate); |
| 652 | DCHECK(args.length() == 4); |
| 653 | |
| 654 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
| 655 | CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); |
| 656 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); |
| 657 | CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
| 658 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 659 | CHECK(regexp->GetFlags() & JSRegExp::kGlobal); |
| 660 | CHECK(last_match_info->HasFastObjectElements()); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 661 | |
| 662 | subject = String::Flatten(subject); |
| 663 | |
| 664 | if (replacement->length() == 0) { |
| 665 | if (subject->HasOnlyOneByteChars()) { |
| 666 | return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( |
| 667 | isolate, subject, regexp, last_match_info); |
| 668 | } else { |
| 669 | return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( |
| 670 | isolate, subject, regexp, last_match_info); |
| 671 | } |
| 672 | } |
| 673 | |
| 674 | replacement = String::Flatten(replacement); |
| 675 | |
| 676 | return StringReplaceGlobalRegExpWithString(isolate, subject, regexp, |
| 677 | replacement, last_match_info); |
| 678 | } |
| 679 | |
| 680 | |
| 681 | RUNTIME_FUNCTION(Runtime_StringSplit) { |
| 682 | HandleScope handle_scope(isolate); |
| 683 | DCHECK(args.length() == 3); |
| 684 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
| 685 | CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); |
| 686 | CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 687 | CHECK(limit > 0); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 688 | |
| 689 | int subject_length = subject->length(); |
| 690 | int pattern_length = pattern->length(); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 691 | CHECK(pattern_length > 0); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 692 | |
| 693 | if (limit == 0xffffffffu) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 694 | FixedArray* last_match_cache_unused; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 695 | Handle<Object> cached_answer( |
| 696 | RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 697 | &last_match_cache_unused, |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 698 | RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), |
| 699 | isolate); |
| 700 | if (*cached_answer != Smi::FromInt(0)) { |
| 701 | // The cache FixedArray is a COW-array and can therefore be reused. |
| 702 | Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( |
| 703 | Handle<FixedArray>::cast(cached_answer)); |
| 704 | return *result; |
| 705 | } |
| 706 | } |
| 707 | |
| 708 | // The limit can be very large (0xffffffffu), but since the pattern |
| 709 | // isn't empty, we can never create more parts than ~half the length |
| 710 | // of the subject. |
| 711 | |
| 712 | subject = String::Flatten(subject); |
| 713 | pattern = String::Flatten(pattern); |
| 714 | |
| 715 | static const int kMaxInitialListCapacity = 16; |
| 716 | |
| 717 | ZoneScope zone_scope(isolate->runtime_zone()); |
| 718 | |
| 719 | // Find (up to limit) indices of separator and end-of-string in subject |
| 720 | int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
| 721 | ZoneList<int> indices(initial_capacity, zone_scope.zone()); |
| 722 | |
| 723 | FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit, |
| 724 | zone_scope.zone()); |
| 725 | |
| 726 | if (static_cast<uint32_t>(indices.length()) < limit) { |
| 727 | indices.Add(subject_length, zone_scope.zone()); |
| 728 | } |
| 729 | |
| 730 | // The list indices now contains the end of each part to create. |
| 731 | |
| 732 | // Create JSArray of substrings separated by separator. |
| 733 | int part_count = indices.length(); |
| 734 | |
Ben Murdoch | da12d29 | 2016-06-02 14:46:10 +0100 | [diff] [blame] | 735 | Handle<JSArray> result = |
| 736 | isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count, |
| 737 | INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 738 | |
| 739 | DCHECK(result->HasFastObjectElements()); |
| 740 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 741 | Handle<FixedArray> elements(FixedArray::cast(result->elements())); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 742 | |
| 743 | if (part_count == 1 && indices.at(0) == subject_length) { |
| 744 | elements->set(0, *subject); |
| 745 | } else { |
| 746 | int part_start = 0; |
Ben Murdoch | da12d29 | 2016-06-02 14:46:10 +0100 | [diff] [blame] | 747 | FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 748 | int part_end = indices.at(i); |
| 749 | Handle<String> substring = |
| 750 | isolate->factory()->NewProperSubString(subject, part_start, part_end); |
| 751 | elements->set(i, *substring); |
| 752 | part_start = part_end + pattern_length; |
Ben Murdoch | da12d29 | 2016-06-02 14:46:10 +0100 | [diff] [blame] | 753 | }); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 754 | } |
| 755 | |
| 756 | if (limit == 0xffffffffu) { |
| 757 | if (result->HasFastObjectElements()) { |
| 758 | RegExpResultsCache::Enter(isolate, subject, pattern, elements, |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 759 | isolate->factory()->empty_fixed_array(), |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 760 | RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); |
| 761 | } |
| 762 | } |
| 763 | |
| 764 | return *result; |
| 765 | } |
| 766 | |
| 767 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 768 | RUNTIME_FUNCTION(Runtime_RegExpExec) { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 769 | HandleScope scope(isolate); |
| 770 | DCHECK(args.length() == 4); |
| 771 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 772 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
| 773 | CONVERT_INT32_ARG_CHECKED(index, 2); |
| 774 | CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
| 775 | // Due to the way the JS calls are constructed this must be less than the |
| 776 | // length of a string, i.e. it is always a Smi. We check anyway for security. |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 777 | CHECK(index >= 0); |
| 778 | CHECK(index <= subject->length()); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 779 | isolate->counters()->regexp_entry_runtime()->Increment(); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 780 | RETURN_RESULT_OR_FAILURE( |
| 781 | isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 782 | } |
| 783 | |
| 784 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 785 | RUNTIME_FUNCTION(Runtime_RegExpFlags) { |
| 786 | SealHandleScope shs(isolate); |
| 787 | DCHECK(args.length() == 1); |
| 788 | CONVERT_ARG_CHECKED(JSRegExp, regexp, 0); |
| 789 | return regexp->flags(); |
| 790 | } |
| 791 | |
| 792 | |
| 793 | RUNTIME_FUNCTION(Runtime_RegExpSource) { |
| 794 | SealHandleScope shs(isolate); |
| 795 | DCHECK(args.length() == 1); |
| 796 | CONVERT_ARG_CHECKED(JSRegExp, regexp, 0); |
| 797 | return regexp->source(); |
| 798 | } |
| 799 | |
| 800 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 801 | RUNTIME_FUNCTION(Runtime_RegExpConstructResult) { |
| 802 | HandleScope handle_scope(isolate); |
| 803 | DCHECK(args.length() == 3); |
| 804 | CONVERT_SMI_ARG_CHECKED(size, 0); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 805 | CHECK(size >= 0 && size <= FixedArray::kMaxLength); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 806 | CONVERT_ARG_HANDLE_CHECKED(Object, index, 1); |
| 807 | CONVERT_ARG_HANDLE_CHECKED(Object, input, 2); |
| 808 | Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size); |
| 809 | Handle<Map> regexp_map(isolate->native_context()->regexp_result_map()); |
| 810 | Handle<JSObject> object = |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 811 | isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 812 | Handle<JSArray> array = Handle<JSArray>::cast(object); |
| 813 | array->set_elements(*elements); |
| 814 | array->set_length(Smi::FromInt(size)); |
| 815 | // Write in-object properties after the length of the array. |
| 816 | array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index); |
| 817 | array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input); |
| 818 | return *array; |
| 819 | } |
| 820 | |
| 821 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 822 | RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) { |
| 823 | HandleScope scope(isolate); |
| 824 | DCHECK(args.length() == 3); |
| 825 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 826 | CONVERT_ARG_HANDLE_CHECKED(String, source, 1); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 827 | CONVERT_ARG_HANDLE_CHECKED(String, flags, 2); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 828 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 829 | RETURN_FAILURE_ON_EXCEPTION(isolate, |
| 830 | JSRegExp::Initialize(regexp, source, flags)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 831 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 832 | return *regexp; |
| 833 | } |
| 834 | |
| 835 | |
| 836 | // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain |
| 837 | // separate last match info. See comment on that function. |
| 838 | template <bool has_capture> |
| 839 | static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, |
| 840 | Handle<JSRegExp> regexp, |
| 841 | Handle<JSArray> last_match_array, |
| 842 | Handle<JSArray> result_array) { |
| 843 | DCHECK(subject->IsFlat()); |
| 844 | DCHECK_NE(has_capture, regexp->CaptureCount() == 0); |
| 845 | |
| 846 | int capture_count = regexp->CaptureCount(); |
| 847 | int subject_length = subject->length(); |
| 848 | |
| 849 | static const int kMinLengthToCache = 0x1000; |
| 850 | |
| 851 | if (subject_length > kMinLengthToCache) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 852 | FixedArray* last_match_cache; |
| 853 | Object* cached_answer = RegExpResultsCache::Lookup( |
| 854 | isolate->heap(), *subject, regexp->data(), &last_match_cache, |
| 855 | RegExpResultsCache::REGEXP_MULTIPLE_INDICES); |
| 856 | if (cached_answer->IsFixedArray()) { |
| 857 | int capture_registers = (capture_count + 1) * 2; |
| 858 | int32_t* last_match = NewArray<int32_t>(capture_registers); |
| 859 | for (int i = 0; i < capture_registers; i++) { |
| 860 | last_match[i] = Smi::cast(last_match_cache->get(i))->value(); |
| 861 | } |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 862 | Handle<FixedArray> cached_fixed_array = |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 863 | Handle<FixedArray>(FixedArray::cast(cached_answer)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 864 | // The cache FixedArray is a COW-array and can therefore be reused. |
| 865 | JSArray::SetContent(result_array, cached_fixed_array); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 866 | RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 867 | last_match); |
| 868 | DeleteArray(last_match); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 869 | return *result_array; |
| 870 | } |
| 871 | } |
| 872 | |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 873 | RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 874 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 875 | |
| 876 | // Ensured in Runtime_RegExpExecMultiple. |
| 877 | DCHECK(result_array->HasFastObjectElements()); |
| 878 | Handle<FixedArray> result_elements( |
| 879 | FixedArray::cast(result_array->elements())); |
| 880 | if (result_elements->length() < 16) { |
| 881 | result_elements = isolate->factory()->NewFixedArrayWithHoles(16); |
| 882 | } |
| 883 | |
| 884 | FixedArrayBuilder builder(result_elements); |
| 885 | |
| 886 | // Position to search from. |
| 887 | int match_start = -1; |
| 888 | int match_end = 0; |
| 889 | bool first = true; |
| 890 | |
| 891 | // Two smis before and after the match, for very long strings. |
| 892 | static const int kMaxBuilderEntriesPerRegExpMatch = 5; |
| 893 | |
| 894 | while (true) { |
| 895 | int32_t* current_match = global_cache.FetchNext(); |
| 896 | if (current_match == NULL) break; |
| 897 | match_start = current_match[0]; |
| 898 | builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| 899 | if (match_end < match_start) { |
| 900 | ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
| 901 | match_start); |
| 902 | } |
| 903 | match_end = current_match[1]; |
| 904 | { |
| 905 | // Avoid accumulating new handles inside loop. |
| 906 | HandleScope temp_scope(isolate); |
| 907 | Handle<String> match; |
| 908 | if (!first) { |
| 909 | match = isolate->factory()->NewProperSubString(subject, match_start, |
| 910 | match_end); |
| 911 | } else { |
| 912 | match = |
| 913 | isolate->factory()->NewSubString(subject, match_start, match_end); |
| 914 | first = false; |
| 915 | } |
| 916 | |
| 917 | if (has_capture) { |
| 918 | // Arguments array to replace function is match, captures, index and |
| 919 | // subject, i.e., 3 + capture count in total. |
| 920 | Handle<FixedArray> elements = |
| 921 | isolate->factory()->NewFixedArray(3 + capture_count); |
| 922 | |
| 923 | elements->set(0, *match); |
| 924 | for (int i = 1; i <= capture_count; i++) { |
| 925 | int start = current_match[i * 2]; |
| 926 | if (start >= 0) { |
| 927 | int end = current_match[i * 2 + 1]; |
| 928 | DCHECK(start <= end); |
| 929 | Handle<String> substring = |
| 930 | isolate->factory()->NewSubString(subject, start, end); |
| 931 | elements->set(i, *substring); |
| 932 | } else { |
| 933 | DCHECK(current_match[i * 2 + 1] < 0); |
| 934 | elements->set(i, isolate->heap()->undefined_value()); |
| 935 | } |
| 936 | } |
| 937 | elements->set(capture_count + 1, Smi::FromInt(match_start)); |
| 938 | elements->set(capture_count + 2, *subject); |
| 939 | builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
| 940 | } else { |
| 941 | builder.Add(*match); |
| 942 | } |
| 943 | } |
| 944 | } |
| 945 | |
| 946 | if (global_cache.HasException()) return isolate->heap()->exception(); |
| 947 | |
| 948 | if (match_start >= 0) { |
| 949 | // Finished matching, with at least one match. |
| 950 | if (match_end < subject_length) { |
| 951 | ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
| 952 | subject_length); |
| 953 | } |
| 954 | |
| 955 | RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 956 | global_cache.LastSuccessfulMatch()); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 957 | |
| 958 | if (subject_length > kMinLengthToCache) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 959 | // Store the last successful match into the array for caching. |
| 960 | // TODO(yangguo): do not expose last match to JS and simplify caching. |
| 961 | int capture_registers = (capture_count + 1) * 2; |
| 962 | Handle<FixedArray> last_match_cache = |
| 963 | isolate->factory()->NewFixedArray(capture_registers); |
| 964 | int32_t* last_match = global_cache.LastSuccessfulMatch(); |
| 965 | for (int i = 0; i < capture_registers; i++) { |
| 966 | last_match_cache->set(i, Smi::FromInt(last_match[i])); |
| 967 | } |
| 968 | Handle<FixedArray> result_fixed_array = builder.array(); |
| 969 | result_fixed_array->Shrink(builder.length()); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 970 | // Cache the result and turn the FixedArray into a COW array. |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 971 | RegExpResultsCache::Enter( |
| 972 | isolate, subject, handle(regexp->data(), isolate), result_fixed_array, |
| 973 | last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 974 | } |
| 975 | return *builder.ToJSArray(result_array); |
| 976 | } else { |
| 977 | return isolate->heap()->null_value(); // No matches at all. |
| 978 | } |
| 979 | } |
| 980 | |
| 981 | |
| 982 | // This is only called for StringReplaceGlobalRegExpWithFunction. This sets |
| 983 | // lastMatchInfoOverride to maintain the last match info, so we don't need to |
| 984 | // set any other last match array info. |
| 985 | RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { |
| 986 | HandleScope handles(isolate); |
| 987 | DCHECK(args.length() == 4); |
| 988 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 989 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 990 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 991 | CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); |
| 992 | CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 993 | CHECK(last_match_info->HasFastObjectElements()); |
| 994 | CHECK(result_array->HasFastObjectElements()); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 995 | |
| 996 | subject = String::Flatten(subject); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 997 | CHECK(regexp->GetFlags() & JSRegExp::kGlobal); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 998 | |
| 999 | if (regexp->CaptureCount() == 0) { |
| 1000 | return SearchRegExpMultiple<false>(isolate, subject, regexp, |
| 1001 | last_match_info, result_array); |
| 1002 | } else { |
| 1003 | return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info, |
| 1004 | result_array); |
| 1005 | } |
| 1006 | } |
| 1007 | |
| 1008 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1009 | RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1010 | SealHandleScope shs(isolate); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1011 | DCHECK(args.length() == 4); |
| 1012 | Object* exception = isolate->pending_exception(); |
| 1013 | isolate->clear_pending_exception(); |
| 1014 | return isolate->ReThrow(exception); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1015 | } |
| 1016 | |
| 1017 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1018 | RUNTIME_FUNCTION(Runtime_IsRegExp) { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1019 | SealHandleScope shs(isolate); |
| 1020 | DCHECK(args.length() == 1); |
| 1021 | CONVERT_ARG_CHECKED(Object, obj, 0); |
| 1022 | return isolate->heap()->ToBoolean(obj->IsJSRegExp()); |
| 1023 | } |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1024 | } // namespace internal |
| 1025 | } // namespace v8 |