blob: a8133d349538e4f0742f25042653b793d0f8b877 [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00005#include "src/runtime/runtime-utils.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -04006
7#include "src/arguments.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00008#include "src/conversions-inl.h"
9#include "src/isolate-inl.h"
10#include "src/messages.h"
11#include "src/regexp/jsregexp-inl.h"
12#include "src/regexp/jsregexp.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040013#include "src/string-builder.h"
14#include "src/string-search.h"
15
16namespace v8 {
17namespace internal {
18
19class CompiledReplacement {
20 public:
21 explicit CompiledReplacement(Zone* zone)
22 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
23
24 // Return whether the replacement is simple.
25 bool Compile(Handle<String> replacement, int capture_count,
26 int subject_length);
27
28 // Use Apply only if Compile returned false.
29 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
30 int32_t* match);
31
32 // Number of distinct parts of the replacement pattern.
33 int parts() { return parts_.length(); }
34
35 Zone* zone() const { return zone_; }
36
37 private:
38 enum PartType {
39 SUBJECT_PREFIX = 1,
40 SUBJECT_SUFFIX,
41 SUBJECT_CAPTURE,
42 REPLACEMENT_SUBSTRING,
43 REPLACEMENT_STRING,
44 NUMBER_OF_PART_TYPES
45 };
46
47 struct ReplacementPart {
48 static inline ReplacementPart SubjectMatch() {
49 return ReplacementPart(SUBJECT_CAPTURE, 0);
50 }
51 static inline ReplacementPart SubjectCapture(int capture_index) {
52 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
53 }
54 static inline ReplacementPart SubjectPrefix() {
55 return ReplacementPart(SUBJECT_PREFIX, 0);
56 }
57 static inline ReplacementPart SubjectSuffix(int subject_length) {
58 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
59 }
60 static inline ReplacementPart ReplacementString() {
61 return ReplacementPart(REPLACEMENT_STRING, 0);
62 }
63 static inline ReplacementPart ReplacementSubString(int from, int to) {
64 DCHECK(from >= 0);
65 DCHECK(to > from);
66 return ReplacementPart(-from, to);
67 }
68
69 // If tag <= 0 then it is the negation of a start index of a substring of
70 // the replacement pattern, otherwise it's a value from PartType.
71 ReplacementPart(int tag, int data) : tag(tag), data(data) {
72 // Must be non-positive or a PartType value.
73 DCHECK(tag < NUMBER_OF_PART_TYPES);
74 }
75 // Either a value of PartType or a non-positive number that is
76 // the negation of an index into the replacement string.
77 int tag;
78 // The data value's interpretation depends on the value of tag:
79 // tag == SUBJECT_PREFIX ||
80 // tag == SUBJECT_SUFFIX: data is unused.
81 // tag == SUBJECT_CAPTURE: data is the number of the capture.
82 // tag == REPLACEMENT_SUBSTRING ||
83 // tag == REPLACEMENT_STRING: data is index into array of substrings
84 // of the replacement string.
85 // tag <= 0: Temporary representation of the substring of the replacement
86 // string ranging over -tag .. data.
87 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
88 // substring objects.
89 int data;
90 };
91
92 template <typename Char>
93 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
94 Vector<Char> characters, int capture_count,
95 int subject_length, Zone* zone) {
96 int length = characters.length();
97 int last = 0;
98 for (int i = 0; i < length; i++) {
99 Char c = characters[i];
100 if (c == '$') {
101 int next_index = i + 1;
102 if (next_index == length) { // No next character!
103 break;
104 }
105 Char c2 = characters[next_index];
106 switch (c2) {
107 case '$':
108 if (i > last) {
109 // There is a substring before. Include the first "$".
110 parts->Add(
111 ReplacementPart::ReplacementSubString(last, next_index),
112 zone);
113 last = next_index + 1; // Continue after the second "$".
114 } else {
115 // Let the next substring start with the second "$".
116 last = next_index;
117 }
118 i = next_index;
119 break;
120 case '`':
121 if (i > last) {
122 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
123 }
124 parts->Add(ReplacementPart::SubjectPrefix(), zone);
125 i = next_index;
126 last = i + 1;
127 break;
128 case '\'':
129 if (i > last) {
130 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
131 }
132 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
133 i = next_index;
134 last = i + 1;
135 break;
136 case '&':
137 if (i > last) {
138 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
139 }
140 parts->Add(ReplacementPart::SubjectMatch(), zone);
141 i = next_index;
142 last = i + 1;
143 break;
144 case '0':
145 case '1':
146 case '2':
147 case '3':
148 case '4':
149 case '5':
150 case '6':
151 case '7':
152 case '8':
153 case '9': {
154 int capture_ref = c2 - '0';
155 if (capture_ref > capture_count) {
156 i = next_index;
157 continue;
158 }
159 int second_digit_index = next_index + 1;
160 if (second_digit_index < length) {
161 // Peek ahead to see if we have two digits.
162 Char c3 = characters[second_digit_index];
163 if ('0' <= c3 && c3 <= '9') { // Double digits.
164 int double_digit_ref = capture_ref * 10 + c3 - '0';
165 if (double_digit_ref <= capture_count) {
166 next_index = second_digit_index;
167 capture_ref = double_digit_ref;
168 }
169 }
170 }
171 if (capture_ref > 0) {
172 if (i > last) {
173 parts->Add(ReplacementPart::ReplacementSubString(last, i),
174 zone);
175 }
176 DCHECK(capture_ref <= capture_count);
177 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
178 last = next_index + 1;
179 }
180 i = next_index;
181 break;
182 }
183 default:
184 i = next_index;
185 break;
186 }
187 }
188 }
189 if (length > last) {
190 if (last == 0) {
191 // Replacement is simple. Do not use Apply to do the replacement.
192 return true;
193 } else {
194 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
195 }
196 }
197 return false;
198 }
199
200 ZoneList<ReplacementPart> parts_;
201 ZoneList<Handle<String> > replacement_substrings_;
202 Zone* zone_;
203};
204
205
206bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
207 int subject_length) {
208 {
209 DisallowHeapAllocation no_gc;
210 String::FlatContent content = replacement->GetFlatContent();
211 DCHECK(content.IsFlat());
212 bool simple = false;
213 if (content.IsOneByte()) {
214 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
215 capture_count, subject_length, zone());
216 } else {
217 DCHECK(content.IsTwoByte());
218 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
219 capture_count, subject_length, zone());
220 }
221 if (simple) return true;
222 }
223
224 Isolate* isolate = replacement->GetIsolate();
225 // Find substrings of replacement string and create them as String objects.
226 int substring_index = 0;
227 for (int i = 0, n = parts_.length(); i < n; i++) {
228 int tag = parts_[i].tag;
229 if (tag <= 0) { // A replacement string slice.
230 int from = -tag;
231 int to = parts_[i].data;
232 replacement_substrings_.Add(
233 isolate->factory()->NewSubString(replacement, from, to), zone());
234 parts_[i].tag = REPLACEMENT_SUBSTRING;
235 parts_[i].data = substring_index;
236 substring_index++;
237 } else if (tag == REPLACEMENT_STRING) {
238 replacement_substrings_.Add(replacement, zone());
239 parts_[i].data = substring_index;
240 substring_index++;
241 }
242 }
243 return false;
244}
245
246
247void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
248 int match_from, int match_to, int32_t* match) {
249 DCHECK_LT(0, parts_.length());
250 for (int i = 0, n = parts_.length(); i < n; i++) {
251 ReplacementPart part = parts_[i];
252 switch (part.tag) {
253 case SUBJECT_PREFIX:
254 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
255 break;
256 case SUBJECT_SUFFIX: {
257 int subject_length = part.data;
258 if (match_to < subject_length) {
259 builder->AddSubjectSlice(match_to, subject_length);
260 }
261 break;
262 }
263 case SUBJECT_CAPTURE: {
264 int capture = part.data;
265 int from = match[capture * 2];
266 int to = match[capture * 2 + 1];
267 if (from >= 0 && to > from) {
268 builder->AddSubjectSlice(from, to);
269 }
270 break;
271 }
272 case REPLACEMENT_SUBSTRING:
273 case REPLACEMENT_STRING:
274 builder->AddString(replacement_substrings_[part.data]);
275 break;
276 default:
277 UNREACHABLE();
278 }
279 }
280}
281
282
283void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284 ZoneList<int>* indices, unsigned int limit,
285 Zone* zone) {
286 DCHECK(limit > 0);
287 // Collect indices of pattern in subject using memchr.
288 // Stop after finding at most limit values.
289 const uint8_t* subject_start = subject.start();
290 const uint8_t* subject_end = subject_start + subject.length();
291 const uint8_t* pos = subject_start;
292 while (limit > 0) {
293 pos = reinterpret_cast<const uint8_t*>(
294 memchr(pos, pattern, subject_end - pos));
295 if (pos == NULL) return;
296 indices->Add(static_cast<int>(pos - subject_start), zone);
297 pos++;
298 limit--;
299 }
300}
301
302
303void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
304 ZoneList<int>* indices, unsigned int limit,
305 Zone* zone) {
306 DCHECK(limit > 0);
307 const uc16* subject_start = subject.start();
308 const uc16* subject_end = subject_start + subject.length();
309 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
310 if (*pos == pattern) {
311 indices->Add(static_cast<int>(pos - subject_start), zone);
312 limit--;
313 }
314 }
315}
316
317
318template <typename SubjectChar, typename PatternChar>
319void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
320 Vector<const PatternChar> pattern,
321 ZoneList<int>* indices, unsigned int limit, Zone* zone) {
322 DCHECK(limit > 0);
323 // Collect indices of pattern in subject.
324 // Stop after finding at most limit values.
325 int pattern_length = pattern.length();
326 int index = 0;
327 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
328 while (limit > 0) {
329 index = search.Search(subject, index);
330 if (index < 0) return;
331 indices->Add(index, zone);
332 index += pattern_length;
333 limit--;
334 }
335}
336
337
338void FindStringIndicesDispatch(Isolate* isolate, String* subject,
339 String* pattern, ZoneList<int>* indices,
340 unsigned int limit, Zone* zone) {
341 {
342 DisallowHeapAllocation no_gc;
343 String::FlatContent subject_content = subject->GetFlatContent();
344 String::FlatContent pattern_content = pattern->GetFlatContent();
345 DCHECK(subject_content.IsFlat());
346 DCHECK(pattern_content.IsFlat());
347 if (subject_content.IsOneByte()) {
348 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
349 if (pattern_content.IsOneByte()) {
350 Vector<const uint8_t> pattern_vector =
351 pattern_content.ToOneByteVector();
352 if (pattern_vector.length() == 1) {
353 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
354 limit, zone);
355 } else {
356 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
357 limit, zone);
358 }
359 } else {
360 FindStringIndices(isolate, subject_vector,
361 pattern_content.ToUC16Vector(), indices, limit, zone);
362 }
363 } else {
364 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
365 if (pattern_content.IsOneByte()) {
366 Vector<const uint8_t> pattern_vector =
367 pattern_content.ToOneByteVector();
368 if (pattern_vector.length() == 1) {
369 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
370 limit, zone);
371 } else {
372 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
373 limit, zone);
374 }
375 } else {
376 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
377 if (pattern_vector.length() == 1) {
378 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
379 limit, zone);
380 } else {
381 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
382 limit, zone);
383 }
384 }
385 }
386 }
387}
388
389
390template <typename ResultSeqString>
391MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
392 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
393 Handle<String> replacement, Handle<JSArray> last_match_info) {
394 DCHECK(subject->IsFlat());
395 DCHECK(replacement->IsFlat());
396
397 ZoneScope zone_scope(isolate->runtime_zone());
398 ZoneList<int> indices(8, zone_scope.zone());
399 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
400 String* pattern =
401 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
402 int subject_len = subject->length();
403 int pattern_len = pattern->length();
404 int replacement_len = replacement->length();
405
406 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
407 zone_scope.zone());
408
409 int matches = indices.length();
410 if (matches == 0) return *subject;
411
412 // Detect integer overflow.
413 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
414 static_cast<int64_t>(pattern_len)) *
415 static_cast<int64_t>(matches) +
416 static_cast<int64_t>(subject_len);
417 int result_len;
418 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
419 STATIC_ASSERT(String::kMaxLength < kMaxInt);
420 result_len = kMaxInt; // Provoke exception.
421 } else {
422 result_len = static_cast<int>(result_len_64);
423 }
424
425 int subject_pos = 0;
426 int result_pos = 0;
427
428 MaybeHandle<SeqString> maybe_res;
429 if (ResultSeqString::kHasOneByteEncoding) {
430 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
431 } else {
432 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
433 }
434 Handle<SeqString> untyped_res;
435 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
436 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
437
438 for (int i = 0; i < matches; i++) {
439 // Copy non-matched subject content.
440 if (subject_pos < indices.at(i)) {
441 String::WriteToFlat(*subject, result->GetChars() + result_pos,
442 subject_pos, indices.at(i));
443 result_pos += indices.at(i) - subject_pos;
444 }
445
446 // Replace match.
447 if (replacement_len > 0) {
448 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
449 replacement_len);
450 result_pos += replacement_len;
451 }
452
453 subject_pos = indices.at(i) + pattern_len;
454 }
455 // Add remaining subject content at the end.
456 if (subject_pos < subject_len) {
457 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
458 subject_len);
459 }
460
461 int32_t match_indices[] = {indices.at(matches - 1),
462 indices.at(matches - 1) + pattern_len};
463 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
464
465 return *result;
466}
467
468
469MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
470 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
471 Handle<String> replacement, Handle<JSArray> last_match_info) {
472 DCHECK(subject->IsFlat());
473 DCHECK(replacement->IsFlat());
474
475 int capture_count = regexp->CaptureCount();
476 int subject_length = subject->length();
477
478 // CompiledReplacement uses zone allocation.
479 ZoneScope zone_scope(isolate->runtime_zone());
480 CompiledReplacement compiled_replacement(zone_scope.zone());
481 bool simple_replace =
482 compiled_replacement.Compile(replacement, capture_count, subject_length);
483
484 // Shortcut for simple non-regexp global replacements
485 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
486 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
487 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
488 isolate, subject, regexp, replacement, last_match_info);
489 } else {
490 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
491 isolate, subject, regexp, replacement, last_match_info);
492 }
493 }
494
Ben Murdoch097c5b22016-05-18 11:27:45 +0100495 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400496 if (global_cache.HasException()) return isolate->heap()->exception();
497
498 int32_t* current_match = global_cache.FetchNext();
499 if (current_match == NULL) {
500 if (global_cache.HasException()) return isolate->heap()->exception();
501 return *subject;
502 }
503
504 // Guessing the number of parts that the final result string is built
505 // from. Global regexps can match any number of times, so we guess
506 // conservatively.
507 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
508 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
509
510 // Number of parts added by compiled replacement plus preceeding
511 // string and possibly suffix after last match. It is possible for
512 // all components to use two elements when encoded as two smis.
513 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
514
515 int prev = 0;
516
517 do {
518 builder.EnsureCapacity(parts_added_per_loop);
519
520 int start = current_match[0];
521 int end = current_match[1];
522
523 if (prev < start) {
524 builder.AddSubjectSlice(prev, start);
525 }
526
527 if (simple_replace) {
528 builder.AddString(replacement);
529 } else {
530 compiled_replacement.Apply(&builder, start, end, current_match);
531 }
532 prev = end;
533
534 current_match = global_cache.FetchNext();
535 } while (current_match != NULL);
536
537 if (global_cache.HasException()) return isolate->heap()->exception();
538
539 if (prev < subject_length) {
540 builder.EnsureCapacity(2);
541 builder.AddSubjectSlice(prev, subject_length);
542 }
543
544 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
545 global_cache.LastSuccessfulMatch());
546
Ben Murdoch61f157c2016-09-16 13:49:30 +0100547 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400548}
549
550
551template <typename ResultSeqString>
552MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
553 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
554 Handle<JSArray> last_match_info) {
555 DCHECK(subject->IsFlat());
556
557 // Shortcut for simple non-regexp global replacements
558 if (regexp->TypeTag() == JSRegExp::ATOM) {
559 Handle<String> empty_string = isolate->factory()->empty_string();
560 if (subject->IsOneByteRepresentation()) {
561 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
562 isolate, subject, regexp, empty_string, last_match_info);
563 } else {
564 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
565 isolate, subject, regexp, empty_string, last_match_info);
566 }
567 }
568
Ben Murdoch097c5b22016-05-18 11:27:45 +0100569 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400570 if (global_cache.HasException()) return isolate->heap()->exception();
571
572 int32_t* current_match = global_cache.FetchNext();
573 if (current_match == NULL) {
574 if (global_cache.HasException()) return isolate->heap()->exception();
575 return *subject;
576 }
577
578 int start = current_match[0];
579 int end = current_match[1];
580 int capture_count = regexp->CaptureCount();
581 int subject_length = subject->length();
582
583 int new_length = subject_length - (end - start);
584 if (new_length == 0) return isolate->heap()->empty_string();
585
586 Handle<ResultSeqString> answer;
587 if (ResultSeqString::kHasOneByteEncoding) {
588 answer = Handle<ResultSeqString>::cast(
589 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
590 } else {
591 answer = Handle<ResultSeqString>::cast(
592 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
593 }
594
595 int prev = 0;
596 int position = 0;
597
598 do {
599 start = current_match[0];
600 end = current_match[1];
601 if (prev < start) {
602 // Add substring subject[prev;start] to answer string.
603 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
604 position += start - prev;
605 }
606 prev = end;
607
608 current_match = global_cache.FetchNext();
609 } while (current_match != NULL);
610
611 if (global_cache.HasException()) return isolate->heap()->exception();
612
613 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
614 global_cache.LastSuccessfulMatch());
615
616 if (prev < subject_length) {
617 // Add substring subject[prev;length] to answer string.
618 String::WriteToFlat(*subject, answer->GetChars() + position, prev,
619 subject_length);
620 position += subject_length - prev;
621 }
622
623 if (position == 0) return isolate->heap()->empty_string();
624
625 // Shorten string and fill
626 int string_size = ResultSeqString::SizeFor(position);
627 int allocated_string_size = ResultSeqString::SizeFor(new_length);
628 int delta = allocated_string_size - string_size;
629
630 answer->set_length(position);
631 if (delta == 0) return *answer;
632
633 Address end_of_string = answer->address() + string_size;
634 Heap* heap = isolate->heap();
635
636 // The trimming is performed on a newly allocated object, which is on a
637 // fresly allocated page or on an already swept page. Hence, the sweeper
638 // thread can not get confused with the filler creation. No synchronization
639 // needed.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000640 // TODO(hpayer): We should shrink the large object page if the size
641 // of the object changed significantly.
642 if (!heap->lo_space()->Contains(*answer)) {
Ben Murdochda12d292016-06-02 14:46:10 +0100643 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000644 }
645 heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400646 return *answer;
647}
648
649
650RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
651 HandleScope scope(isolate);
652 DCHECK(args.length() == 4);
653
654 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
655 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
656 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
657 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
658
Ben Murdoch61f157c2016-09-16 13:49:30 +0100659 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
660 CHECK(last_match_info->HasFastObjectElements());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400661
662 subject = String::Flatten(subject);
663
664 if (replacement->length() == 0) {
665 if (subject->HasOnlyOneByteChars()) {
666 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
667 isolate, subject, regexp, last_match_info);
668 } else {
669 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
670 isolate, subject, regexp, last_match_info);
671 }
672 }
673
674 replacement = String::Flatten(replacement);
675
676 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
677 replacement, last_match_info);
678}
679
680
681RUNTIME_FUNCTION(Runtime_StringSplit) {
682 HandleScope handle_scope(isolate);
683 DCHECK(args.length() == 3);
684 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
685 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
686 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100687 CHECK(limit > 0);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400688
689 int subject_length = subject->length();
690 int pattern_length = pattern->length();
Ben Murdoch61f157c2016-09-16 13:49:30 +0100691 CHECK(pattern_length > 0);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400692
693 if (limit == 0xffffffffu) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000694 FixedArray* last_match_cache_unused;
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400695 Handle<Object> cached_answer(
696 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000697 &last_match_cache_unused,
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400698 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
699 isolate);
700 if (*cached_answer != Smi::FromInt(0)) {
701 // The cache FixedArray is a COW-array and can therefore be reused.
702 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
703 Handle<FixedArray>::cast(cached_answer));
704 return *result;
705 }
706 }
707
708 // The limit can be very large (0xffffffffu), but since the pattern
709 // isn't empty, we can never create more parts than ~half the length
710 // of the subject.
711
712 subject = String::Flatten(subject);
713 pattern = String::Flatten(pattern);
714
715 static const int kMaxInitialListCapacity = 16;
716
717 ZoneScope zone_scope(isolate->runtime_zone());
718
719 // Find (up to limit) indices of separator and end-of-string in subject
720 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
721 ZoneList<int> indices(initial_capacity, zone_scope.zone());
722
723 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
724 zone_scope.zone());
725
726 if (static_cast<uint32_t>(indices.length()) < limit) {
727 indices.Add(subject_length, zone_scope.zone());
728 }
729
730 // The list indices now contains the end of each part to create.
731
732 // Create JSArray of substrings separated by separator.
733 int part_count = indices.length();
734
Ben Murdochda12d292016-06-02 14:46:10 +0100735 Handle<JSArray> result =
736 isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
737 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400738
739 DCHECK(result->HasFastObjectElements());
740
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400741 Handle<FixedArray> elements(FixedArray::cast(result->elements()));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000742
743 if (part_count == 1 && indices.at(0) == subject_length) {
744 elements->set(0, *subject);
745 } else {
746 int part_start = 0;
Ben Murdochda12d292016-06-02 14:46:10 +0100747 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000748 int part_end = indices.at(i);
749 Handle<String> substring =
750 isolate->factory()->NewProperSubString(subject, part_start, part_end);
751 elements->set(i, *substring);
752 part_start = part_end + pattern_length;
Ben Murdochda12d292016-06-02 14:46:10 +0100753 });
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400754 }
755
756 if (limit == 0xffffffffu) {
757 if (result->HasFastObjectElements()) {
758 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000759 isolate->factory()->empty_fixed_array(),
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400760 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
761 }
762 }
763
764 return *result;
765}
766
767
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000768RUNTIME_FUNCTION(Runtime_RegExpExec) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400769 HandleScope scope(isolate);
770 DCHECK(args.length() == 4);
771 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
772 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
773 CONVERT_INT32_ARG_CHECKED(index, 2);
774 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
775 // Due to the way the JS calls are constructed this must be less than the
776 // length of a string, i.e. it is always a Smi. We check anyway for security.
Ben Murdoch61f157c2016-09-16 13:49:30 +0100777 CHECK(index >= 0);
778 CHECK(index <= subject->length());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400779 isolate->counters()->regexp_entry_runtime()->Increment();
Ben Murdoch61f157c2016-09-16 13:49:30 +0100780 RETURN_RESULT_OR_FAILURE(
781 isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400782}
783
784
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000785RUNTIME_FUNCTION(Runtime_RegExpFlags) {
786 SealHandleScope shs(isolate);
787 DCHECK(args.length() == 1);
788 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
789 return regexp->flags();
790}
791
792
793RUNTIME_FUNCTION(Runtime_RegExpSource) {
794 SealHandleScope shs(isolate);
795 DCHECK(args.length() == 1);
796 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
797 return regexp->source();
798}
799
800
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400801RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
802 HandleScope handle_scope(isolate);
803 DCHECK(args.length() == 3);
804 CONVERT_SMI_ARG_CHECKED(size, 0);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100805 CHECK(size >= 0 && size <= FixedArray::kMaxLength);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400806 CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
807 CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
808 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
809 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
810 Handle<JSObject> object =
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000811 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400812 Handle<JSArray> array = Handle<JSArray>::cast(object);
813 array->set_elements(*elements);
814 array->set_length(Smi::FromInt(size));
815 // Write in-object properties after the length of the array.
816 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
817 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
818 return *array;
819}
820
821
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400822RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
823 HandleScope scope(isolate);
824 DCHECK(args.length() == 3);
825 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
826 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000827 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400828
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000829 RETURN_FAILURE_ON_EXCEPTION(isolate,
830 JSRegExp::Initialize(regexp, source, flags));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400831
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400832 return *regexp;
833}
834
835
836// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
837// separate last match info. See comment on that function.
838template <bool has_capture>
839static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
840 Handle<JSRegExp> regexp,
841 Handle<JSArray> last_match_array,
842 Handle<JSArray> result_array) {
843 DCHECK(subject->IsFlat());
844 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
845
846 int capture_count = regexp->CaptureCount();
847 int subject_length = subject->length();
848
849 static const int kMinLengthToCache = 0x1000;
850
851 if (subject_length > kMinLengthToCache) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000852 FixedArray* last_match_cache;
853 Object* cached_answer = RegExpResultsCache::Lookup(
854 isolate->heap(), *subject, regexp->data(), &last_match_cache,
855 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
856 if (cached_answer->IsFixedArray()) {
857 int capture_registers = (capture_count + 1) * 2;
858 int32_t* last_match = NewArray<int32_t>(capture_registers);
859 for (int i = 0; i < capture_registers; i++) {
860 last_match[i] = Smi::cast(last_match_cache->get(i))->value();
861 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400862 Handle<FixedArray> cached_fixed_array =
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000863 Handle<FixedArray>(FixedArray::cast(cached_answer));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400864 // The cache FixedArray is a COW-array and can therefore be reused.
865 JSArray::SetContent(result_array, cached_fixed_array);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400866 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000867 last_match);
868 DeleteArray(last_match);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400869 return *result_array;
870 }
871 }
872
Ben Murdoch097c5b22016-05-18 11:27:45 +0100873 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400874 if (global_cache.HasException()) return isolate->heap()->exception();
875
876 // Ensured in Runtime_RegExpExecMultiple.
877 DCHECK(result_array->HasFastObjectElements());
878 Handle<FixedArray> result_elements(
879 FixedArray::cast(result_array->elements()));
880 if (result_elements->length() < 16) {
881 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
882 }
883
884 FixedArrayBuilder builder(result_elements);
885
886 // Position to search from.
887 int match_start = -1;
888 int match_end = 0;
889 bool first = true;
890
891 // Two smis before and after the match, for very long strings.
892 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
893
894 while (true) {
895 int32_t* current_match = global_cache.FetchNext();
896 if (current_match == NULL) break;
897 match_start = current_match[0];
898 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
899 if (match_end < match_start) {
900 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
901 match_start);
902 }
903 match_end = current_match[1];
904 {
905 // Avoid accumulating new handles inside loop.
906 HandleScope temp_scope(isolate);
907 Handle<String> match;
908 if (!first) {
909 match = isolate->factory()->NewProperSubString(subject, match_start,
910 match_end);
911 } else {
912 match =
913 isolate->factory()->NewSubString(subject, match_start, match_end);
914 first = false;
915 }
916
917 if (has_capture) {
918 // Arguments array to replace function is match, captures, index and
919 // subject, i.e., 3 + capture count in total.
920 Handle<FixedArray> elements =
921 isolate->factory()->NewFixedArray(3 + capture_count);
922
923 elements->set(0, *match);
924 for (int i = 1; i <= capture_count; i++) {
925 int start = current_match[i * 2];
926 if (start >= 0) {
927 int end = current_match[i * 2 + 1];
928 DCHECK(start <= end);
929 Handle<String> substring =
930 isolate->factory()->NewSubString(subject, start, end);
931 elements->set(i, *substring);
932 } else {
933 DCHECK(current_match[i * 2 + 1] < 0);
934 elements->set(i, isolate->heap()->undefined_value());
935 }
936 }
937 elements->set(capture_count + 1, Smi::FromInt(match_start));
938 elements->set(capture_count + 2, *subject);
939 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
940 } else {
941 builder.Add(*match);
942 }
943 }
944 }
945
946 if (global_cache.HasException()) return isolate->heap()->exception();
947
948 if (match_start >= 0) {
949 // Finished matching, with at least one match.
950 if (match_end < subject_length) {
951 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
952 subject_length);
953 }
954
955 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000956 global_cache.LastSuccessfulMatch());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400957
958 if (subject_length > kMinLengthToCache) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000959 // Store the last successful match into the array for caching.
960 // TODO(yangguo): do not expose last match to JS and simplify caching.
961 int capture_registers = (capture_count + 1) * 2;
962 Handle<FixedArray> last_match_cache =
963 isolate->factory()->NewFixedArray(capture_registers);
964 int32_t* last_match = global_cache.LastSuccessfulMatch();
965 for (int i = 0; i < capture_registers; i++) {
966 last_match_cache->set(i, Smi::FromInt(last_match[i]));
967 }
968 Handle<FixedArray> result_fixed_array = builder.array();
969 result_fixed_array->Shrink(builder.length());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400970 // Cache the result and turn the FixedArray into a COW array.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000971 RegExpResultsCache::Enter(
972 isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
973 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400974 }
975 return *builder.ToJSArray(result_array);
976 } else {
977 return isolate->heap()->null_value(); // No matches at all.
978 }
979}
980
981
982// This is only called for StringReplaceGlobalRegExpWithFunction. This sets
983// lastMatchInfoOverride to maintain the last match info, so we don't need to
984// set any other last match array info.
985RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
986 HandleScope handles(isolate);
987 DCHECK(args.length() == 4);
988
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400989 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000990 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400991 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
992 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100993 CHECK(last_match_info->HasFastObjectElements());
994 CHECK(result_array->HasFastObjectElements());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400995
996 subject = String::Flatten(subject);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100997 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400998
999 if (regexp->CaptureCount() == 0) {
1000 return SearchRegExpMultiple<false>(isolate, subject, regexp,
1001 last_match_info, result_array);
1002 } else {
1003 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1004 result_array);
1005 }
1006}
1007
1008
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001009RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001010 SealHandleScope shs(isolate);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001011 DCHECK(args.length() == 4);
1012 Object* exception = isolate->pending_exception();
1013 isolate->clear_pending_exception();
1014 return isolate->ReThrow(exception);
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001015}
1016
1017
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001018RUNTIME_FUNCTION(Runtime_IsRegExp) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001019 SealHandleScope shs(isolate);
1020 DCHECK(args.length() == 1);
1021 CONVERT_ARG_CHECKED(Object, obj, 0);
1022 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1023}
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001024} // namespace internal
1025} // namespace v8