blob: df86aa870cb659eb057c2be97b3c9c06a2355f0e [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00005#include "src/runtime/runtime-utils.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -04006
7#include "src/arguments.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00008#include "src/conversions-inl.h"
9#include "src/isolate-inl.h"
10#include "src/messages.h"
11#include "src/regexp/jsregexp-inl.h"
12#include "src/regexp/jsregexp.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040013#include "src/string-builder.h"
14#include "src/string-search.h"
15
16namespace v8 {
17namespace internal {
18
19class CompiledReplacement {
20 public:
21 explicit CompiledReplacement(Zone* zone)
22 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
23
24 // Return whether the replacement is simple.
25 bool Compile(Handle<String> replacement, int capture_count,
26 int subject_length);
27
28 // Use Apply only if Compile returned false.
29 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
30 int32_t* match);
31
32 // Number of distinct parts of the replacement pattern.
33 int parts() { return parts_.length(); }
34
35 Zone* zone() const { return zone_; }
36
37 private:
38 enum PartType {
39 SUBJECT_PREFIX = 1,
40 SUBJECT_SUFFIX,
41 SUBJECT_CAPTURE,
42 REPLACEMENT_SUBSTRING,
43 REPLACEMENT_STRING,
44 NUMBER_OF_PART_TYPES
45 };
46
47 struct ReplacementPart {
48 static inline ReplacementPart SubjectMatch() {
49 return ReplacementPart(SUBJECT_CAPTURE, 0);
50 }
51 static inline ReplacementPart SubjectCapture(int capture_index) {
52 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
53 }
54 static inline ReplacementPart SubjectPrefix() {
55 return ReplacementPart(SUBJECT_PREFIX, 0);
56 }
57 static inline ReplacementPart SubjectSuffix(int subject_length) {
58 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
59 }
60 static inline ReplacementPart ReplacementString() {
61 return ReplacementPart(REPLACEMENT_STRING, 0);
62 }
63 static inline ReplacementPart ReplacementSubString(int from, int to) {
64 DCHECK(from >= 0);
65 DCHECK(to > from);
66 return ReplacementPart(-from, to);
67 }
68
69 // If tag <= 0 then it is the negation of a start index of a substring of
70 // the replacement pattern, otherwise it's a value from PartType.
71 ReplacementPart(int tag, int data) : tag(tag), data(data) {
72 // Must be non-positive or a PartType value.
73 DCHECK(tag < NUMBER_OF_PART_TYPES);
74 }
75 // Either a value of PartType or a non-positive number that is
76 // the negation of an index into the replacement string.
77 int tag;
78 // The data value's interpretation depends on the value of tag:
79 // tag == SUBJECT_PREFIX ||
80 // tag == SUBJECT_SUFFIX: data is unused.
81 // tag == SUBJECT_CAPTURE: data is the number of the capture.
82 // tag == REPLACEMENT_SUBSTRING ||
83 // tag == REPLACEMENT_STRING: data is index into array of substrings
84 // of the replacement string.
85 // tag <= 0: Temporary representation of the substring of the replacement
86 // string ranging over -tag .. data.
87 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
88 // substring objects.
89 int data;
90 };
91
92 template <typename Char>
93 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
94 Vector<Char> characters, int capture_count,
95 int subject_length, Zone* zone) {
96 int length = characters.length();
97 int last = 0;
98 for (int i = 0; i < length; i++) {
99 Char c = characters[i];
100 if (c == '$') {
101 int next_index = i + 1;
102 if (next_index == length) { // No next character!
103 break;
104 }
105 Char c2 = characters[next_index];
106 switch (c2) {
107 case '$':
108 if (i > last) {
109 // There is a substring before. Include the first "$".
110 parts->Add(
111 ReplacementPart::ReplacementSubString(last, next_index),
112 zone);
113 last = next_index + 1; // Continue after the second "$".
114 } else {
115 // Let the next substring start with the second "$".
116 last = next_index;
117 }
118 i = next_index;
119 break;
120 case '`':
121 if (i > last) {
122 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
123 }
124 parts->Add(ReplacementPart::SubjectPrefix(), zone);
125 i = next_index;
126 last = i + 1;
127 break;
128 case '\'':
129 if (i > last) {
130 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
131 }
132 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
133 i = next_index;
134 last = i + 1;
135 break;
136 case '&':
137 if (i > last) {
138 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
139 }
140 parts->Add(ReplacementPart::SubjectMatch(), zone);
141 i = next_index;
142 last = i + 1;
143 break;
144 case '0':
145 case '1':
146 case '2':
147 case '3':
148 case '4':
149 case '5':
150 case '6':
151 case '7':
152 case '8':
153 case '9': {
154 int capture_ref = c2 - '0';
155 if (capture_ref > capture_count) {
156 i = next_index;
157 continue;
158 }
159 int second_digit_index = next_index + 1;
160 if (second_digit_index < length) {
161 // Peek ahead to see if we have two digits.
162 Char c3 = characters[second_digit_index];
163 if ('0' <= c3 && c3 <= '9') { // Double digits.
164 int double_digit_ref = capture_ref * 10 + c3 - '0';
165 if (double_digit_ref <= capture_count) {
166 next_index = second_digit_index;
167 capture_ref = double_digit_ref;
168 }
169 }
170 }
171 if (capture_ref > 0) {
172 if (i > last) {
173 parts->Add(ReplacementPart::ReplacementSubString(last, i),
174 zone);
175 }
176 DCHECK(capture_ref <= capture_count);
177 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
178 last = next_index + 1;
179 }
180 i = next_index;
181 break;
182 }
183 default:
184 i = next_index;
185 break;
186 }
187 }
188 }
189 if (length > last) {
190 if (last == 0) {
191 // Replacement is simple. Do not use Apply to do the replacement.
192 return true;
193 } else {
194 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
195 }
196 }
197 return false;
198 }
199
200 ZoneList<ReplacementPart> parts_;
201 ZoneList<Handle<String> > replacement_substrings_;
202 Zone* zone_;
203};
204
205
206bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
207 int subject_length) {
208 {
209 DisallowHeapAllocation no_gc;
210 String::FlatContent content = replacement->GetFlatContent();
211 DCHECK(content.IsFlat());
212 bool simple = false;
213 if (content.IsOneByte()) {
214 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
215 capture_count, subject_length, zone());
216 } else {
217 DCHECK(content.IsTwoByte());
218 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
219 capture_count, subject_length, zone());
220 }
221 if (simple) return true;
222 }
223
224 Isolate* isolate = replacement->GetIsolate();
225 // Find substrings of replacement string and create them as String objects.
226 int substring_index = 0;
227 for (int i = 0, n = parts_.length(); i < n; i++) {
228 int tag = parts_[i].tag;
229 if (tag <= 0) { // A replacement string slice.
230 int from = -tag;
231 int to = parts_[i].data;
232 replacement_substrings_.Add(
233 isolate->factory()->NewSubString(replacement, from, to), zone());
234 parts_[i].tag = REPLACEMENT_SUBSTRING;
235 parts_[i].data = substring_index;
236 substring_index++;
237 } else if (tag == REPLACEMENT_STRING) {
238 replacement_substrings_.Add(replacement, zone());
239 parts_[i].data = substring_index;
240 substring_index++;
241 }
242 }
243 return false;
244}
245
246
247void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
248 int match_from, int match_to, int32_t* match) {
249 DCHECK_LT(0, parts_.length());
250 for (int i = 0, n = parts_.length(); i < n; i++) {
251 ReplacementPart part = parts_[i];
252 switch (part.tag) {
253 case SUBJECT_PREFIX:
254 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
255 break;
256 case SUBJECT_SUFFIX: {
257 int subject_length = part.data;
258 if (match_to < subject_length) {
259 builder->AddSubjectSlice(match_to, subject_length);
260 }
261 break;
262 }
263 case SUBJECT_CAPTURE: {
264 int capture = part.data;
265 int from = match[capture * 2];
266 int to = match[capture * 2 + 1];
267 if (from >= 0 && to > from) {
268 builder->AddSubjectSlice(from, to);
269 }
270 break;
271 }
272 case REPLACEMENT_SUBSTRING:
273 case REPLACEMENT_STRING:
274 builder->AddString(replacement_substrings_[part.data]);
275 break;
276 default:
277 UNREACHABLE();
278 }
279 }
280}
281
282
283void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284 ZoneList<int>* indices, unsigned int limit,
285 Zone* zone) {
286 DCHECK(limit > 0);
287 // Collect indices of pattern in subject using memchr.
288 // Stop after finding at most limit values.
289 const uint8_t* subject_start = subject.start();
290 const uint8_t* subject_end = subject_start + subject.length();
291 const uint8_t* pos = subject_start;
292 while (limit > 0) {
293 pos = reinterpret_cast<const uint8_t*>(
294 memchr(pos, pattern, subject_end - pos));
295 if (pos == NULL) return;
296 indices->Add(static_cast<int>(pos - subject_start), zone);
297 pos++;
298 limit--;
299 }
300}
301
302
303void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
304 ZoneList<int>* indices, unsigned int limit,
305 Zone* zone) {
306 DCHECK(limit > 0);
307 const uc16* subject_start = subject.start();
308 const uc16* subject_end = subject_start + subject.length();
309 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
310 if (*pos == pattern) {
311 indices->Add(static_cast<int>(pos - subject_start), zone);
312 limit--;
313 }
314 }
315}
316
317
318template <typename SubjectChar, typename PatternChar>
319void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
320 Vector<const PatternChar> pattern,
321 ZoneList<int>* indices, unsigned int limit, Zone* zone) {
322 DCHECK(limit > 0);
323 // Collect indices of pattern in subject.
324 // Stop after finding at most limit values.
325 int pattern_length = pattern.length();
326 int index = 0;
327 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
328 while (limit > 0) {
329 index = search.Search(subject, index);
330 if (index < 0) return;
331 indices->Add(index, zone);
332 index += pattern_length;
333 limit--;
334 }
335}
336
337
338void FindStringIndicesDispatch(Isolate* isolate, String* subject,
339 String* pattern, ZoneList<int>* indices,
340 unsigned int limit, Zone* zone) {
341 {
342 DisallowHeapAllocation no_gc;
343 String::FlatContent subject_content = subject->GetFlatContent();
344 String::FlatContent pattern_content = pattern->GetFlatContent();
345 DCHECK(subject_content.IsFlat());
346 DCHECK(pattern_content.IsFlat());
347 if (subject_content.IsOneByte()) {
348 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
349 if (pattern_content.IsOneByte()) {
350 Vector<const uint8_t> pattern_vector =
351 pattern_content.ToOneByteVector();
352 if (pattern_vector.length() == 1) {
353 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
354 limit, zone);
355 } else {
356 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
357 limit, zone);
358 }
359 } else {
360 FindStringIndices(isolate, subject_vector,
361 pattern_content.ToUC16Vector(), indices, limit, zone);
362 }
363 } else {
364 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
365 if (pattern_content.IsOneByte()) {
366 Vector<const uint8_t> pattern_vector =
367 pattern_content.ToOneByteVector();
368 if (pattern_vector.length() == 1) {
369 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
370 limit, zone);
371 } else {
372 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
373 limit, zone);
374 }
375 } else {
376 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
377 if (pattern_vector.length() == 1) {
378 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
379 limit, zone);
380 } else {
381 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
382 limit, zone);
383 }
384 }
385 }
386 }
387}
388
389
390template <typename ResultSeqString>
391MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
392 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
393 Handle<String> replacement, Handle<JSArray> last_match_info) {
394 DCHECK(subject->IsFlat());
395 DCHECK(replacement->IsFlat());
396
397 ZoneScope zone_scope(isolate->runtime_zone());
398 ZoneList<int> indices(8, zone_scope.zone());
399 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
400 String* pattern =
401 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
402 int subject_len = subject->length();
403 int pattern_len = pattern->length();
404 int replacement_len = replacement->length();
405
406 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
407 zone_scope.zone());
408
409 int matches = indices.length();
410 if (matches == 0) return *subject;
411
412 // Detect integer overflow.
413 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
414 static_cast<int64_t>(pattern_len)) *
415 static_cast<int64_t>(matches) +
416 static_cast<int64_t>(subject_len);
417 int result_len;
418 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
419 STATIC_ASSERT(String::kMaxLength < kMaxInt);
420 result_len = kMaxInt; // Provoke exception.
421 } else {
422 result_len = static_cast<int>(result_len_64);
423 }
424
425 int subject_pos = 0;
426 int result_pos = 0;
427
428 MaybeHandle<SeqString> maybe_res;
429 if (ResultSeqString::kHasOneByteEncoding) {
430 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
431 } else {
432 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
433 }
434 Handle<SeqString> untyped_res;
435 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
436 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
437
438 for (int i = 0; i < matches; i++) {
439 // Copy non-matched subject content.
440 if (subject_pos < indices.at(i)) {
441 String::WriteToFlat(*subject, result->GetChars() + result_pos,
442 subject_pos, indices.at(i));
443 result_pos += indices.at(i) - subject_pos;
444 }
445
446 // Replace match.
447 if (replacement_len > 0) {
448 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
449 replacement_len);
450 result_pos += replacement_len;
451 }
452
453 subject_pos = indices.at(i) + pattern_len;
454 }
455 // Add remaining subject content at the end.
456 if (subject_pos < subject_len) {
457 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
458 subject_len);
459 }
460
461 int32_t match_indices[] = {indices.at(matches - 1),
462 indices.at(matches - 1) + pattern_len};
463 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
464
465 return *result;
466}
467
468
469MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
470 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
471 Handle<String> replacement, Handle<JSArray> last_match_info) {
472 DCHECK(subject->IsFlat());
473 DCHECK(replacement->IsFlat());
474
475 int capture_count = regexp->CaptureCount();
476 int subject_length = subject->length();
477
478 // CompiledReplacement uses zone allocation.
479 ZoneScope zone_scope(isolate->runtime_zone());
480 CompiledReplacement compiled_replacement(zone_scope.zone());
481 bool simple_replace =
482 compiled_replacement.Compile(replacement, capture_count, subject_length);
483
484 // Shortcut for simple non-regexp global replacements
485 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
486 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
487 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
488 isolate, subject, regexp, replacement, last_match_info);
489 } else {
490 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
491 isolate, subject, regexp, replacement, last_match_info);
492 }
493 }
494
Ben Murdoch097c5b22016-05-18 11:27:45 +0100495 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400496 if (global_cache.HasException()) return isolate->heap()->exception();
497
498 int32_t* current_match = global_cache.FetchNext();
499 if (current_match == NULL) {
500 if (global_cache.HasException()) return isolate->heap()->exception();
501 return *subject;
502 }
503
504 // Guessing the number of parts that the final result string is built
505 // from. Global regexps can match any number of times, so we guess
506 // conservatively.
507 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
508 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
509
510 // Number of parts added by compiled replacement plus preceeding
511 // string and possibly suffix after last match. It is possible for
512 // all components to use two elements when encoded as two smis.
513 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
514
515 int prev = 0;
516
517 do {
518 builder.EnsureCapacity(parts_added_per_loop);
519
520 int start = current_match[0];
521 int end = current_match[1];
522
523 if (prev < start) {
524 builder.AddSubjectSlice(prev, start);
525 }
526
527 if (simple_replace) {
528 builder.AddString(replacement);
529 } else {
530 compiled_replacement.Apply(&builder, start, end, current_match);
531 }
532 prev = end;
533
534 current_match = global_cache.FetchNext();
535 } while (current_match != NULL);
536
537 if (global_cache.HasException()) return isolate->heap()->exception();
538
539 if (prev < subject_length) {
540 builder.EnsureCapacity(2);
541 builder.AddSubjectSlice(prev, subject_length);
542 }
543
544 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
545 global_cache.LastSuccessfulMatch());
546
547 Handle<String> result;
548 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
549 return *result;
550}
551
552
553template <typename ResultSeqString>
554MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
555 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
556 Handle<JSArray> last_match_info) {
557 DCHECK(subject->IsFlat());
558
559 // Shortcut for simple non-regexp global replacements
560 if (regexp->TypeTag() == JSRegExp::ATOM) {
561 Handle<String> empty_string = isolate->factory()->empty_string();
562 if (subject->IsOneByteRepresentation()) {
563 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
564 isolate, subject, regexp, empty_string, last_match_info);
565 } else {
566 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
567 isolate, subject, regexp, empty_string, last_match_info);
568 }
569 }
570
Ben Murdoch097c5b22016-05-18 11:27:45 +0100571 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400572 if (global_cache.HasException()) return isolate->heap()->exception();
573
574 int32_t* current_match = global_cache.FetchNext();
575 if (current_match == NULL) {
576 if (global_cache.HasException()) return isolate->heap()->exception();
577 return *subject;
578 }
579
580 int start = current_match[0];
581 int end = current_match[1];
582 int capture_count = regexp->CaptureCount();
583 int subject_length = subject->length();
584
585 int new_length = subject_length - (end - start);
586 if (new_length == 0) return isolate->heap()->empty_string();
587
588 Handle<ResultSeqString> answer;
589 if (ResultSeqString::kHasOneByteEncoding) {
590 answer = Handle<ResultSeqString>::cast(
591 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
592 } else {
593 answer = Handle<ResultSeqString>::cast(
594 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
595 }
596
597 int prev = 0;
598 int position = 0;
599
600 do {
601 start = current_match[0];
602 end = current_match[1];
603 if (prev < start) {
604 // Add substring subject[prev;start] to answer string.
605 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
606 position += start - prev;
607 }
608 prev = end;
609
610 current_match = global_cache.FetchNext();
611 } while (current_match != NULL);
612
613 if (global_cache.HasException()) return isolate->heap()->exception();
614
615 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
616 global_cache.LastSuccessfulMatch());
617
618 if (prev < subject_length) {
619 // Add substring subject[prev;length] to answer string.
620 String::WriteToFlat(*subject, answer->GetChars() + position, prev,
621 subject_length);
622 position += subject_length - prev;
623 }
624
625 if (position == 0) return isolate->heap()->empty_string();
626
627 // Shorten string and fill
628 int string_size = ResultSeqString::SizeFor(position);
629 int allocated_string_size = ResultSeqString::SizeFor(new_length);
630 int delta = allocated_string_size - string_size;
631
632 answer->set_length(position);
633 if (delta == 0) return *answer;
634
635 Address end_of_string = answer->address() + string_size;
636 Heap* heap = isolate->heap();
637
638 // The trimming is performed on a newly allocated object, which is on a
639 // fresly allocated page or on an already swept page. Hence, the sweeper
640 // thread can not get confused with the filler creation. No synchronization
641 // needed.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000642 // TODO(hpayer): We should shrink the large object page if the size
643 // of the object changed significantly.
644 if (!heap->lo_space()->Contains(*answer)) {
645 heap->CreateFillerObjectAt(end_of_string, delta);
646 }
647 heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400648 return *answer;
649}
650
651
652RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
653 HandleScope scope(isolate);
654 DCHECK(args.length() == 4);
655
656 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
657 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
658 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
659 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
660
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000661 RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400662 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
663
664 subject = String::Flatten(subject);
665
666 if (replacement->length() == 0) {
667 if (subject->HasOnlyOneByteChars()) {
668 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
669 isolate, subject, regexp, last_match_info);
670 } else {
671 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
672 isolate, subject, regexp, last_match_info);
673 }
674 }
675
676 replacement = String::Flatten(replacement);
677
678 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
679 replacement, last_match_info);
680}
681
682
683RUNTIME_FUNCTION(Runtime_StringSplit) {
684 HandleScope handle_scope(isolate);
685 DCHECK(args.length() == 3);
686 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
687 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
688 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
689 RUNTIME_ASSERT(limit > 0);
690
691 int subject_length = subject->length();
692 int pattern_length = pattern->length();
693 RUNTIME_ASSERT(pattern_length > 0);
694
695 if (limit == 0xffffffffu) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000696 FixedArray* last_match_cache_unused;
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400697 Handle<Object> cached_answer(
698 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000699 &last_match_cache_unused,
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400700 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
701 isolate);
702 if (*cached_answer != Smi::FromInt(0)) {
703 // The cache FixedArray is a COW-array and can therefore be reused.
704 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
705 Handle<FixedArray>::cast(cached_answer));
706 return *result;
707 }
708 }
709
710 // The limit can be very large (0xffffffffu), but since the pattern
711 // isn't empty, we can never create more parts than ~half the length
712 // of the subject.
713
714 subject = String::Flatten(subject);
715 pattern = String::Flatten(pattern);
716
717 static const int kMaxInitialListCapacity = 16;
718
719 ZoneScope zone_scope(isolate->runtime_zone());
720
721 // Find (up to limit) indices of separator and end-of-string in subject
722 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
723 ZoneList<int> indices(initial_capacity, zone_scope.zone());
724
725 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
726 zone_scope.zone());
727
728 if (static_cast<uint32_t>(indices.length()) < limit) {
729 indices.Add(subject_length, zone_scope.zone());
730 }
731
732 // The list indices now contains the end of each part to create.
733
734 // Create JSArray of substrings separated by separator.
735 int part_count = indices.length();
736
737 Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
738 JSObject::EnsureCanContainHeapObjectElements(result);
739 result->set_length(Smi::FromInt(part_count));
740
741 DCHECK(result->HasFastObjectElements());
742
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400743 Handle<FixedArray> elements(FixedArray::cast(result->elements()));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000744
745 if (part_count == 1 && indices.at(0) == subject_length) {
746 elements->set(0, *subject);
747 } else {
748 int part_start = 0;
749 for (int i = 0; i < part_count; i++) {
750 HandleScope local_loop_handle(isolate);
751 int part_end = indices.at(i);
752 Handle<String> substring =
753 isolate->factory()->NewProperSubString(subject, part_start, part_end);
754 elements->set(i, *substring);
755 part_start = part_end + pattern_length;
756 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400757 }
758
759 if (limit == 0xffffffffu) {
760 if (result->HasFastObjectElements()) {
761 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000762 isolate->factory()->empty_fixed_array(),
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400763 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
764 }
765 }
766
767 return *result;
768}
769
770
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000771RUNTIME_FUNCTION(Runtime_RegExpExec) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400772 HandleScope scope(isolate);
773 DCHECK(args.length() == 4);
774 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
775 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
776 CONVERT_INT32_ARG_CHECKED(index, 2);
777 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
778 // Due to the way the JS calls are constructed this must be less than the
779 // length of a string, i.e. it is always a Smi. We check anyway for security.
780 RUNTIME_ASSERT(index >= 0);
781 RUNTIME_ASSERT(index <= subject->length());
782 isolate->counters()->regexp_entry_runtime()->Increment();
783 Handle<Object> result;
784 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
785 isolate, result,
786 RegExpImpl::Exec(regexp, subject, index, last_match_info));
787 return *result;
788}
789
790
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000791RUNTIME_FUNCTION(Runtime_RegExpFlags) {
792 SealHandleScope shs(isolate);
793 DCHECK(args.length() == 1);
794 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
795 return regexp->flags();
796}
797
798
799RUNTIME_FUNCTION(Runtime_RegExpSource) {
800 SealHandleScope shs(isolate);
801 DCHECK(args.length() == 1);
802 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
803 return regexp->source();
804}
805
806
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400807RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
808 HandleScope handle_scope(isolate);
809 DCHECK(args.length() == 3);
810 CONVERT_SMI_ARG_CHECKED(size, 0);
811 RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
812 CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
813 CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
814 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
815 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
816 Handle<JSObject> object =
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000817 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400818 Handle<JSArray> array = Handle<JSArray>::cast(object);
819 array->set_elements(*elements);
820 array->set_length(Smi::FromInt(size));
821 // Write in-object properties after the length of the array.
822 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
823 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
824 return *array;
825}
826
827
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400828RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
829 HandleScope scope(isolate);
830 DCHECK(args.length() == 3);
831 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
832 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000833 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400834
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000835 RETURN_FAILURE_ON_EXCEPTION(isolate,
836 JSRegExp::Initialize(regexp, source, flags));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400837
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400838 return *regexp;
839}
840
841
842// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
843// separate last match info. See comment on that function.
844template <bool has_capture>
845static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
846 Handle<JSRegExp> regexp,
847 Handle<JSArray> last_match_array,
848 Handle<JSArray> result_array) {
849 DCHECK(subject->IsFlat());
850 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
851
852 int capture_count = regexp->CaptureCount();
853 int subject_length = subject->length();
854
855 static const int kMinLengthToCache = 0x1000;
856
857 if (subject_length > kMinLengthToCache) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000858 FixedArray* last_match_cache;
859 Object* cached_answer = RegExpResultsCache::Lookup(
860 isolate->heap(), *subject, regexp->data(), &last_match_cache,
861 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
862 if (cached_answer->IsFixedArray()) {
863 int capture_registers = (capture_count + 1) * 2;
864 int32_t* last_match = NewArray<int32_t>(capture_registers);
865 for (int i = 0; i < capture_registers; i++) {
866 last_match[i] = Smi::cast(last_match_cache->get(i))->value();
867 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400868 Handle<FixedArray> cached_fixed_array =
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000869 Handle<FixedArray>(FixedArray::cast(cached_answer));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400870 // The cache FixedArray is a COW-array and can therefore be reused.
871 JSArray::SetContent(result_array, cached_fixed_array);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400872 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000873 last_match);
874 DeleteArray(last_match);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400875 return *result_array;
876 }
877 }
878
Ben Murdoch097c5b22016-05-18 11:27:45 +0100879 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400880 if (global_cache.HasException()) return isolate->heap()->exception();
881
882 // Ensured in Runtime_RegExpExecMultiple.
883 DCHECK(result_array->HasFastObjectElements());
884 Handle<FixedArray> result_elements(
885 FixedArray::cast(result_array->elements()));
886 if (result_elements->length() < 16) {
887 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
888 }
889
890 FixedArrayBuilder builder(result_elements);
891
892 // Position to search from.
893 int match_start = -1;
894 int match_end = 0;
895 bool first = true;
896
897 // Two smis before and after the match, for very long strings.
898 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
899
900 while (true) {
901 int32_t* current_match = global_cache.FetchNext();
902 if (current_match == NULL) break;
903 match_start = current_match[0];
904 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
905 if (match_end < match_start) {
906 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
907 match_start);
908 }
909 match_end = current_match[1];
910 {
911 // Avoid accumulating new handles inside loop.
912 HandleScope temp_scope(isolate);
913 Handle<String> match;
914 if (!first) {
915 match = isolate->factory()->NewProperSubString(subject, match_start,
916 match_end);
917 } else {
918 match =
919 isolate->factory()->NewSubString(subject, match_start, match_end);
920 first = false;
921 }
922
923 if (has_capture) {
924 // Arguments array to replace function is match, captures, index and
925 // subject, i.e., 3 + capture count in total.
926 Handle<FixedArray> elements =
927 isolate->factory()->NewFixedArray(3 + capture_count);
928
929 elements->set(0, *match);
930 for (int i = 1; i <= capture_count; i++) {
931 int start = current_match[i * 2];
932 if (start >= 0) {
933 int end = current_match[i * 2 + 1];
934 DCHECK(start <= end);
935 Handle<String> substring =
936 isolate->factory()->NewSubString(subject, start, end);
937 elements->set(i, *substring);
938 } else {
939 DCHECK(current_match[i * 2 + 1] < 0);
940 elements->set(i, isolate->heap()->undefined_value());
941 }
942 }
943 elements->set(capture_count + 1, Smi::FromInt(match_start));
944 elements->set(capture_count + 2, *subject);
945 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
946 } else {
947 builder.Add(*match);
948 }
949 }
950 }
951
952 if (global_cache.HasException()) return isolate->heap()->exception();
953
954 if (match_start >= 0) {
955 // Finished matching, with at least one match.
956 if (match_end < subject_length) {
957 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
958 subject_length);
959 }
960
961 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000962 global_cache.LastSuccessfulMatch());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400963
964 if (subject_length > kMinLengthToCache) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000965 // Store the last successful match into the array for caching.
966 // TODO(yangguo): do not expose last match to JS and simplify caching.
967 int capture_registers = (capture_count + 1) * 2;
968 Handle<FixedArray> last_match_cache =
969 isolate->factory()->NewFixedArray(capture_registers);
970 int32_t* last_match = global_cache.LastSuccessfulMatch();
971 for (int i = 0; i < capture_registers; i++) {
972 last_match_cache->set(i, Smi::FromInt(last_match[i]));
973 }
974 Handle<FixedArray> result_fixed_array = builder.array();
975 result_fixed_array->Shrink(builder.length());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400976 // Cache the result and turn the FixedArray into a COW array.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000977 RegExpResultsCache::Enter(
978 isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
979 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400980 }
981 return *builder.ToJSArray(result_array);
982 } else {
983 return isolate->heap()->null_value(); // No matches at all.
984 }
985}
986
987
988// This is only called for StringReplaceGlobalRegExpWithFunction. This sets
989// lastMatchInfoOverride to maintain the last match info, so we don't need to
990// set any other last match array info.
991RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
992 HandleScope handles(isolate);
993 DCHECK(args.length() == 4);
994
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400995 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000996 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400997 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
998 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
999 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
1000 RUNTIME_ASSERT(result_array->HasFastObjectElements());
1001
1002 subject = String::Flatten(subject);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001003 RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal);
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001004
1005 if (regexp->CaptureCount() == 0) {
1006 return SearchRegExpMultiple<false>(isolate, subject, regexp,
1007 last_match_info, result_array);
1008 } else {
1009 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1010 result_array);
1011 }
1012}
1013
1014
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001015RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001016 SealHandleScope shs(isolate);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001017 DCHECK(args.length() == 4);
1018 Object* exception = isolate->pending_exception();
1019 isolate->clear_pending_exception();
1020 return isolate->ReThrow(exception);
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001021}
1022
1023
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001024RUNTIME_FUNCTION(Runtime_IsRegExp) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001025 SealHandleScope shs(isolate);
1026 DCHECK(args.length() == 1);
1027 CONVERT_ARG_CHECKED(Object, obj, 0);
1028 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1029}
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001030} // namespace internal
1031} // namespace v8