blob: 9296a4b8a9e15d84125258578d087da5b1b4abc6 [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/v8.h"
6
7#include "src/arguments.h"
8#include "src/jsregexp-inl.h"
9#include "src/jsregexp.h"
10#include "src/runtime/runtime-utils.h"
11#include "src/string-builder.h"
12#include "src/string-search.h"
13
14namespace v8 {
15namespace internal {
16
17class CompiledReplacement {
18 public:
19 explicit CompiledReplacement(Zone* zone)
20 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
21
22 // Return whether the replacement is simple.
23 bool Compile(Handle<String> replacement, int capture_count,
24 int subject_length);
25
26 // Use Apply only if Compile returned false.
27 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
28 int32_t* match);
29
30 // Number of distinct parts of the replacement pattern.
31 int parts() { return parts_.length(); }
32
33 Zone* zone() const { return zone_; }
34
35 private:
36 enum PartType {
37 SUBJECT_PREFIX = 1,
38 SUBJECT_SUFFIX,
39 SUBJECT_CAPTURE,
40 REPLACEMENT_SUBSTRING,
41 REPLACEMENT_STRING,
42 NUMBER_OF_PART_TYPES
43 };
44
45 struct ReplacementPart {
46 static inline ReplacementPart SubjectMatch() {
47 return ReplacementPart(SUBJECT_CAPTURE, 0);
48 }
49 static inline ReplacementPart SubjectCapture(int capture_index) {
50 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
51 }
52 static inline ReplacementPart SubjectPrefix() {
53 return ReplacementPart(SUBJECT_PREFIX, 0);
54 }
55 static inline ReplacementPart SubjectSuffix(int subject_length) {
56 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
57 }
58 static inline ReplacementPart ReplacementString() {
59 return ReplacementPart(REPLACEMENT_STRING, 0);
60 }
61 static inline ReplacementPart ReplacementSubString(int from, int to) {
62 DCHECK(from >= 0);
63 DCHECK(to > from);
64 return ReplacementPart(-from, to);
65 }
66
67 // If tag <= 0 then it is the negation of a start index of a substring of
68 // the replacement pattern, otherwise it's a value from PartType.
69 ReplacementPart(int tag, int data) : tag(tag), data(data) {
70 // Must be non-positive or a PartType value.
71 DCHECK(tag < NUMBER_OF_PART_TYPES);
72 }
73 // Either a value of PartType or a non-positive number that is
74 // the negation of an index into the replacement string.
75 int tag;
76 // The data value's interpretation depends on the value of tag:
77 // tag == SUBJECT_PREFIX ||
78 // tag == SUBJECT_SUFFIX: data is unused.
79 // tag == SUBJECT_CAPTURE: data is the number of the capture.
80 // tag == REPLACEMENT_SUBSTRING ||
81 // tag == REPLACEMENT_STRING: data is index into array of substrings
82 // of the replacement string.
83 // tag <= 0: Temporary representation of the substring of the replacement
84 // string ranging over -tag .. data.
85 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
86 // substring objects.
87 int data;
88 };
89
90 template <typename Char>
91 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
92 Vector<Char> characters, int capture_count,
93 int subject_length, Zone* zone) {
94 int length = characters.length();
95 int last = 0;
96 for (int i = 0; i < length; i++) {
97 Char c = characters[i];
98 if (c == '$') {
99 int next_index = i + 1;
100 if (next_index == length) { // No next character!
101 break;
102 }
103 Char c2 = characters[next_index];
104 switch (c2) {
105 case '$':
106 if (i > last) {
107 // There is a substring before. Include the first "$".
108 parts->Add(
109 ReplacementPart::ReplacementSubString(last, next_index),
110 zone);
111 last = next_index + 1; // Continue after the second "$".
112 } else {
113 // Let the next substring start with the second "$".
114 last = next_index;
115 }
116 i = next_index;
117 break;
118 case '`':
119 if (i > last) {
120 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
121 }
122 parts->Add(ReplacementPart::SubjectPrefix(), zone);
123 i = next_index;
124 last = i + 1;
125 break;
126 case '\'':
127 if (i > last) {
128 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
129 }
130 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
131 i = next_index;
132 last = i + 1;
133 break;
134 case '&':
135 if (i > last) {
136 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
137 }
138 parts->Add(ReplacementPart::SubjectMatch(), zone);
139 i = next_index;
140 last = i + 1;
141 break;
142 case '0':
143 case '1':
144 case '2':
145 case '3':
146 case '4':
147 case '5':
148 case '6':
149 case '7':
150 case '8':
151 case '9': {
152 int capture_ref = c2 - '0';
153 if (capture_ref > capture_count) {
154 i = next_index;
155 continue;
156 }
157 int second_digit_index = next_index + 1;
158 if (second_digit_index < length) {
159 // Peek ahead to see if we have two digits.
160 Char c3 = characters[second_digit_index];
161 if ('0' <= c3 && c3 <= '9') { // Double digits.
162 int double_digit_ref = capture_ref * 10 + c3 - '0';
163 if (double_digit_ref <= capture_count) {
164 next_index = second_digit_index;
165 capture_ref = double_digit_ref;
166 }
167 }
168 }
169 if (capture_ref > 0) {
170 if (i > last) {
171 parts->Add(ReplacementPart::ReplacementSubString(last, i),
172 zone);
173 }
174 DCHECK(capture_ref <= capture_count);
175 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
176 last = next_index + 1;
177 }
178 i = next_index;
179 break;
180 }
181 default:
182 i = next_index;
183 break;
184 }
185 }
186 }
187 if (length > last) {
188 if (last == 0) {
189 // Replacement is simple. Do not use Apply to do the replacement.
190 return true;
191 } else {
192 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
193 }
194 }
195 return false;
196 }
197
198 ZoneList<ReplacementPart> parts_;
199 ZoneList<Handle<String> > replacement_substrings_;
200 Zone* zone_;
201};
202
203
204bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
205 int subject_length) {
206 {
207 DisallowHeapAllocation no_gc;
208 String::FlatContent content = replacement->GetFlatContent();
209 DCHECK(content.IsFlat());
210 bool simple = false;
211 if (content.IsOneByte()) {
212 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
213 capture_count, subject_length, zone());
214 } else {
215 DCHECK(content.IsTwoByte());
216 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
217 capture_count, subject_length, zone());
218 }
219 if (simple) return true;
220 }
221
222 Isolate* isolate = replacement->GetIsolate();
223 // Find substrings of replacement string and create them as String objects.
224 int substring_index = 0;
225 for (int i = 0, n = parts_.length(); i < n; i++) {
226 int tag = parts_[i].tag;
227 if (tag <= 0) { // A replacement string slice.
228 int from = -tag;
229 int to = parts_[i].data;
230 replacement_substrings_.Add(
231 isolate->factory()->NewSubString(replacement, from, to), zone());
232 parts_[i].tag = REPLACEMENT_SUBSTRING;
233 parts_[i].data = substring_index;
234 substring_index++;
235 } else if (tag == REPLACEMENT_STRING) {
236 replacement_substrings_.Add(replacement, zone());
237 parts_[i].data = substring_index;
238 substring_index++;
239 }
240 }
241 return false;
242}
243
244
245void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
246 int match_from, int match_to, int32_t* match) {
247 DCHECK_LT(0, parts_.length());
248 for (int i = 0, n = parts_.length(); i < n; i++) {
249 ReplacementPart part = parts_[i];
250 switch (part.tag) {
251 case SUBJECT_PREFIX:
252 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
253 break;
254 case SUBJECT_SUFFIX: {
255 int subject_length = part.data;
256 if (match_to < subject_length) {
257 builder->AddSubjectSlice(match_to, subject_length);
258 }
259 break;
260 }
261 case SUBJECT_CAPTURE: {
262 int capture = part.data;
263 int from = match[capture * 2];
264 int to = match[capture * 2 + 1];
265 if (from >= 0 && to > from) {
266 builder->AddSubjectSlice(from, to);
267 }
268 break;
269 }
270 case REPLACEMENT_SUBSTRING:
271 case REPLACEMENT_STRING:
272 builder->AddString(replacement_substrings_[part.data]);
273 break;
274 default:
275 UNREACHABLE();
276 }
277 }
278}
279
280
281void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
282 ZoneList<int>* indices, unsigned int limit,
283 Zone* zone) {
284 DCHECK(limit > 0);
285 // Collect indices of pattern in subject using memchr.
286 // Stop after finding at most limit values.
287 const uint8_t* subject_start = subject.start();
288 const uint8_t* subject_end = subject_start + subject.length();
289 const uint8_t* pos = subject_start;
290 while (limit > 0) {
291 pos = reinterpret_cast<const uint8_t*>(
292 memchr(pos, pattern, subject_end - pos));
293 if (pos == NULL) return;
294 indices->Add(static_cast<int>(pos - subject_start), zone);
295 pos++;
296 limit--;
297 }
298}
299
300
301void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
302 ZoneList<int>* indices, unsigned int limit,
303 Zone* zone) {
304 DCHECK(limit > 0);
305 const uc16* subject_start = subject.start();
306 const uc16* subject_end = subject_start + subject.length();
307 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
308 if (*pos == pattern) {
309 indices->Add(static_cast<int>(pos - subject_start), zone);
310 limit--;
311 }
312 }
313}
314
315
316template <typename SubjectChar, typename PatternChar>
317void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
318 Vector<const PatternChar> pattern,
319 ZoneList<int>* indices, unsigned int limit, Zone* zone) {
320 DCHECK(limit > 0);
321 // Collect indices of pattern in subject.
322 // Stop after finding at most limit values.
323 int pattern_length = pattern.length();
324 int index = 0;
325 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
326 while (limit > 0) {
327 index = search.Search(subject, index);
328 if (index < 0) return;
329 indices->Add(index, zone);
330 index += pattern_length;
331 limit--;
332 }
333}
334
335
336void FindStringIndicesDispatch(Isolate* isolate, String* subject,
337 String* pattern, ZoneList<int>* indices,
338 unsigned int limit, Zone* zone) {
339 {
340 DisallowHeapAllocation no_gc;
341 String::FlatContent subject_content = subject->GetFlatContent();
342 String::FlatContent pattern_content = pattern->GetFlatContent();
343 DCHECK(subject_content.IsFlat());
344 DCHECK(pattern_content.IsFlat());
345 if (subject_content.IsOneByte()) {
346 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
347 if (pattern_content.IsOneByte()) {
348 Vector<const uint8_t> pattern_vector =
349 pattern_content.ToOneByteVector();
350 if (pattern_vector.length() == 1) {
351 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
352 limit, zone);
353 } else {
354 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
355 limit, zone);
356 }
357 } else {
358 FindStringIndices(isolate, subject_vector,
359 pattern_content.ToUC16Vector(), indices, limit, zone);
360 }
361 } else {
362 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
363 if (pattern_content.IsOneByte()) {
364 Vector<const uint8_t> pattern_vector =
365 pattern_content.ToOneByteVector();
366 if (pattern_vector.length() == 1) {
367 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
368 limit, zone);
369 } else {
370 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
371 limit, zone);
372 }
373 } else {
374 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
375 if (pattern_vector.length() == 1) {
376 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
377 limit, zone);
378 } else {
379 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
380 limit, zone);
381 }
382 }
383 }
384 }
385}
386
387
388template <typename ResultSeqString>
389MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
390 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
391 Handle<String> replacement, Handle<JSArray> last_match_info) {
392 DCHECK(subject->IsFlat());
393 DCHECK(replacement->IsFlat());
394
395 ZoneScope zone_scope(isolate->runtime_zone());
396 ZoneList<int> indices(8, zone_scope.zone());
397 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
398 String* pattern =
399 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
400 int subject_len = subject->length();
401 int pattern_len = pattern->length();
402 int replacement_len = replacement->length();
403
404 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
405 zone_scope.zone());
406
407 int matches = indices.length();
408 if (matches == 0) return *subject;
409
410 // Detect integer overflow.
411 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
412 static_cast<int64_t>(pattern_len)) *
413 static_cast<int64_t>(matches) +
414 static_cast<int64_t>(subject_len);
415 int result_len;
416 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
417 STATIC_ASSERT(String::kMaxLength < kMaxInt);
418 result_len = kMaxInt; // Provoke exception.
419 } else {
420 result_len = static_cast<int>(result_len_64);
421 }
422
423 int subject_pos = 0;
424 int result_pos = 0;
425
426 MaybeHandle<SeqString> maybe_res;
427 if (ResultSeqString::kHasOneByteEncoding) {
428 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
429 } else {
430 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
431 }
432 Handle<SeqString> untyped_res;
433 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
434 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
435
436 for (int i = 0; i < matches; i++) {
437 // Copy non-matched subject content.
438 if (subject_pos < indices.at(i)) {
439 String::WriteToFlat(*subject, result->GetChars() + result_pos,
440 subject_pos, indices.at(i));
441 result_pos += indices.at(i) - subject_pos;
442 }
443
444 // Replace match.
445 if (replacement_len > 0) {
446 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
447 replacement_len);
448 result_pos += replacement_len;
449 }
450
451 subject_pos = indices.at(i) + pattern_len;
452 }
453 // Add remaining subject content at the end.
454 if (subject_pos < subject_len) {
455 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
456 subject_len);
457 }
458
459 int32_t match_indices[] = {indices.at(matches - 1),
460 indices.at(matches - 1) + pattern_len};
461 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
462
463 return *result;
464}
465
466
467MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
468 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
469 Handle<String> replacement, Handle<JSArray> last_match_info) {
470 DCHECK(subject->IsFlat());
471 DCHECK(replacement->IsFlat());
472
473 int capture_count = regexp->CaptureCount();
474 int subject_length = subject->length();
475
476 // CompiledReplacement uses zone allocation.
477 ZoneScope zone_scope(isolate->runtime_zone());
478 CompiledReplacement compiled_replacement(zone_scope.zone());
479 bool simple_replace =
480 compiled_replacement.Compile(replacement, capture_count, subject_length);
481
482 // Shortcut for simple non-regexp global replacements
483 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
484 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
485 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
486 isolate, subject, regexp, replacement, last_match_info);
487 } else {
488 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
489 isolate, subject, regexp, replacement, last_match_info);
490 }
491 }
492
493 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
494 if (global_cache.HasException()) return isolate->heap()->exception();
495
496 int32_t* current_match = global_cache.FetchNext();
497 if (current_match == NULL) {
498 if (global_cache.HasException()) return isolate->heap()->exception();
499 return *subject;
500 }
501
502 // Guessing the number of parts that the final result string is built
503 // from. Global regexps can match any number of times, so we guess
504 // conservatively.
505 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
506 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
507
508 // Number of parts added by compiled replacement plus preceeding
509 // string and possibly suffix after last match. It is possible for
510 // all components to use two elements when encoded as two smis.
511 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
512
513 int prev = 0;
514
515 do {
516 builder.EnsureCapacity(parts_added_per_loop);
517
518 int start = current_match[0];
519 int end = current_match[1];
520
521 if (prev < start) {
522 builder.AddSubjectSlice(prev, start);
523 }
524
525 if (simple_replace) {
526 builder.AddString(replacement);
527 } else {
528 compiled_replacement.Apply(&builder, start, end, current_match);
529 }
530 prev = end;
531
532 current_match = global_cache.FetchNext();
533 } while (current_match != NULL);
534
535 if (global_cache.HasException()) return isolate->heap()->exception();
536
537 if (prev < subject_length) {
538 builder.EnsureCapacity(2);
539 builder.AddSubjectSlice(prev, subject_length);
540 }
541
542 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
543 global_cache.LastSuccessfulMatch());
544
545 Handle<String> result;
546 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
547 return *result;
548}
549
550
551template <typename ResultSeqString>
552MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
553 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
554 Handle<JSArray> last_match_info) {
555 DCHECK(subject->IsFlat());
556
557 // Shortcut for simple non-regexp global replacements
558 if (regexp->TypeTag() == JSRegExp::ATOM) {
559 Handle<String> empty_string = isolate->factory()->empty_string();
560 if (subject->IsOneByteRepresentation()) {
561 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
562 isolate, subject, regexp, empty_string, last_match_info);
563 } else {
564 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
565 isolate, subject, regexp, empty_string, last_match_info);
566 }
567 }
568
569 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
570 if (global_cache.HasException()) return isolate->heap()->exception();
571
572 int32_t* current_match = global_cache.FetchNext();
573 if (current_match == NULL) {
574 if (global_cache.HasException()) return isolate->heap()->exception();
575 return *subject;
576 }
577
578 int start = current_match[0];
579 int end = current_match[1];
580 int capture_count = regexp->CaptureCount();
581 int subject_length = subject->length();
582
583 int new_length = subject_length - (end - start);
584 if (new_length == 0) return isolate->heap()->empty_string();
585
586 Handle<ResultSeqString> answer;
587 if (ResultSeqString::kHasOneByteEncoding) {
588 answer = Handle<ResultSeqString>::cast(
589 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
590 } else {
591 answer = Handle<ResultSeqString>::cast(
592 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
593 }
594
595 int prev = 0;
596 int position = 0;
597
598 do {
599 start = current_match[0];
600 end = current_match[1];
601 if (prev < start) {
602 // Add substring subject[prev;start] to answer string.
603 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
604 position += start - prev;
605 }
606 prev = end;
607
608 current_match = global_cache.FetchNext();
609 } while (current_match != NULL);
610
611 if (global_cache.HasException()) return isolate->heap()->exception();
612
613 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
614 global_cache.LastSuccessfulMatch());
615
616 if (prev < subject_length) {
617 // Add substring subject[prev;length] to answer string.
618 String::WriteToFlat(*subject, answer->GetChars() + position, prev,
619 subject_length);
620 position += subject_length - prev;
621 }
622
623 if (position == 0) return isolate->heap()->empty_string();
624
625 // Shorten string and fill
626 int string_size = ResultSeqString::SizeFor(position);
627 int allocated_string_size = ResultSeqString::SizeFor(new_length);
628 int delta = allocated_string_size - string_size;
629
630 answer->set_length(position);
631 if (delta == 0) return *answer;
632
633 Address end_of_string = answer->address() + string_size;
634 Heap* heap = isolate->heap();
635
636 // The trimming is performed on a newly allocated object, which is on a
637 // fresly allocated page or on an already swept page. Hence, the sweeper
638 // thread can not get confused with the filler creation. No synchronization
639 // needed.
640 heap->CreateFillerObjectAt(end_of_string, delta);
641 heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR);
642 return *answer;
643}
644
645
646RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
647 HandleScope scope(isolate);
648 DCHECK(args.length() == 4);
649
650 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
651 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
652 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
653 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
654
655 RUNTIME_ASSERT(regexp->GetFlags().is_global());
656 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
657
658 subject = String::Flatten(subject);
659
660 if (replacement->length() == 0) {
661 if (subject->HasOnlyOneByteChars()) {
662 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
663 isolate, subject, regexp, last_match_info);
664 } else {
665 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
666 isolate, subject, regexp, last_match_info);
667 }
668 }
669
670 replacement = String::Flatten(replacement);
671
672 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
673 replacement, last_match_info);
674}
675
676
677RUNTIME_FUNCTION(Runtime_StringSplit) {
678 HandleScope handle_scope(isolate);
679 DCHECK(args.length() == 3);
680 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
681 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
682 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
683 RUNTIME_ASSERT(limit > 0);
684
685 int subject_length = subject->length();
686 int pattern_length = pattern->length();
687 RUNTIME_ASSERT(pattern_length > 0);
688
689 if (limit == 0xffffffffu) {
690 Handle<Object> cached_answer(
691 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
692 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
693 isolate);
694 if (*cached_answer != Smi::FromInt(0)) {
695 // The cache FixedArray is a COW-array and can therefore be reused.
696 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
697 Handle<FixedArray>::cast(cached_answer));
698 return *result;
699 }
700 }
701
702 // The limit can be very large (0xffffffffu), but since the pattern
703 // isn't empty, we can never create more parts than ~half the length
704 // of the subject.
705
706 subject = String::Flatten(subject);
707 pattern = String::Flatten(pattern);
708
709 static const int kMaxInitialListCapacity = 16;
710
711 ZoneScope zone_scope(isolate->runtime_zone());
712
713 // Find (up to limit) indices of separator and end-of-string in subject
714 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
715 ZoneList<int> indices(initial_capacity, zone_scope.zone());
716
717 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
718 zone_scope.zone());
719
720 if (static_cast<uint32_t>(indices.length()) < limit) {
721 indices.Add(subject_length, zone_scope.zone());
722 }
723
724 // The list indices now contains the end of each part to create.
725
726 // Create JSArray of substrings separated by separator.
727 int part_count = indices.length();
728
729 Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
730 JSObject::EnsureCanContainHeapObjectElements(result);
731 result->set_length(Smi::FromInt(part_count));
732
733 DCHECK(result->HasFastObjectElements());
734
735 if (part_count == 1 && indices.at(0) == subject_length) {
736 FixedArray::cast(result->elements())->set(0, *subject);
737 return *result;
738 }
739
740 Handle<FixedArray> elements(FixedArray::cast(result->elements()));
741 int part_start = 0;
742 for (int i = 0; i < part_count; i++) {
743 HandleScope local_loop_handle(isolate);
744 int part_end = indices.at(i);
745 Handle<String> substring =
746 isolate->factory()->NewProperSubString(subject, part_start, part_end);
747 elements->set(i, *substring);
748 part_start = part_end + pattern_length;
749 }
750
751 if (limit == 0xffffffffu) {
752 if (result->HasFastObjectElements()) {
753 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
754 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
755 }
756 }
757
758 return *result;
759}
760
761
762RUNTIME_FUNCTION(Runtime_RegExpExecRT) {
763 HandleScope scope(isolate);
764 DCHECK(args.length() == 4);
765 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
766 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
767 CONVERT_INT32_ARG_CHECKED(index, 2);
768 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
769 // Due to the way the JS calls are constructed this must be less than the
770 // length of a string, i.e. it is always a Smi. We check anyway for security.
771 RUNTIME_ASSERT(index >= 0);
772 RUNTIME_ASSERT(index <= subject->length());
773 isolate->counters()->regexp_entry_runtime()->Increment();
774 Handle<Object> result;
775 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
776 isolate, result,
777 RegExpImpl::Exec(regexp, subject, index, last_match_info));
778 return *result;
779}
780
781
782RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
783 HandleScope handle_scope(isolate);
784 DCHECK(args.length() == 3);
785 CONVERT_SMI_ARG_CHECKED(size, 0);
786 RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
787 CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
788 CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
789 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
790 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
791 Handle<JSObject> object =
792 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false);
793 Handle<JSArray> array = Handle<JSArray>::cast(object);
794 array->set_elements(*elements);
795 array->set_length(Smi::FromInt(size));
796 // Write in-object properties after the length of the array.
797 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
798 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
799 return *array;
800}
801
802
803static JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags,
804 bool* success) {
805 uint32_t value = JSRegExp::NONE;
806 int length = flags->length();
807 // A longer flags string cannot be valid.
808 if (length > 4) return JSRegExp::Flags(0);
809 for (int i = 0; i < length; i++) {
810 uint32_t flag = JSRegExp::NONE;
811 switch (flags->Get(i)) {
812 case 'g':
813 flag = JSRegExp::GLOBAL;
814 break;
815 case 'i':
816 flag = JSRegExp::IGNORE_CASE;
817 break;
818 case 'm':
819 flag = JSRegExp::MULTILINE;
820 break;
821 case 'y':
822 if (!FLAG_harmony_regexps) return JSRegExp::Flags(0);
823 flag = JSRegExp::STICKY;
824 break;
825 default:
826 return JSRegExp::Flags(0);
827 }
828 // Duplicate flag.
829 if (value & flag) return JSRegExp::Flags(0);
830 value |= flag;
831 }
832 *success = true;
833 return JSRegExp::Flags(value);
834}
835
836
837RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
838 HandleScope scope(isolate);
839 DCHECK(args.length() == 3);
840 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
841 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
842 CONVERT_ARG_HANDLE_CHECKED(String, flags_string, 2);
843 Factory* factory = isolate->factory();
844 // If source is the empty string we set it to "(?:)" instead as
845 // suggested by ECMA-262, 5th, section 15.10.4.1.
846 if (source->length() == 0) source = factory->query_colon_string();
847
848 bool success = false;
849 JSRegExp::Flags flags = RegExpFlagsFromString(flags_string, &success);
850 if (!success) {
851 Handle<FixedArray> element = factory->NewFixedArray(1);
852 element->set(0, *flags_string);
853 Handle<JSArray> args = factory->NewJSArrayWithElements(element);
854 THROW_NEW_ERROR_RETURN_FAILURE(
855 isolate, NewSyntaxError("invalid_regexp_flags", args));
856 }
857
858 Handle<Object> global = factory->ToBoolean(flags.is_global());
859 Handle<Object> ignore_case = factory->ToBoolean(flags.is_ignore_case());
860 Handle<Object> multiline = factory->ToBoolean(flags.is_multiline());
861 Handle<Object> sticky = factory->ToBoolean(flags.is_sticky());
862
863 Map* map = regexp->map();
864 Object* constructor = map->constructor();
865 if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&
866 JSFunction::cast(constructor)->initial_map() == map) {
867 // If we still have the original map, set in-object properties directly.
868 // Both true and false are immovable immortal objects so no need for write
869 // barrier.
870 regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global,
871 SKIP_WRITE_BARRIER);
872 regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignore_case,
873 SKIP_WRITE_BARRIER);
874 regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline,
875 SKIP_WRITE_BARRIER);
876 regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,
877 Smi::FromInt(0), SKIP_WRITE_BARRIER);
878 } else {
879 // Map has changed, so use generic, but slower, method. We also end here if
880 // the --harmony-regexp flag is set, because the initial map does not have
881 // space for the 'sticky' flag, since it is from the snapshot, but must work
882 // both with and without --harmony-regexp. When sticky comes out from under
883 // the flag, we will be able to use the fast initial map.
884 PropertyAttributes final =
885 static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);
886 PropertyAttributes writable =
887 static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
888 Handle<Object> zero(Smi::FromInt(0), isolate);
889 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(),
890 global, final).Check();
891 JSObject::SetOwnPropertyIgnoreAttributes(
892 regexp, factory->ignore_case_string(), ignore_case, final).Check();
893 JSObject::SetOwnPropertyIgnoreAttributes(
894 regexp, factory->multiline_string(), multiline, final).Check();
895 if (FLAG_harmony_regexps) {
896 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
897 sticky, final).Check();
898 }
899 JSObject::SetOwnPropertyIgnoreAttributes(
900 regexp, factory->last_index_string(), zero, writable).Check();
901 }
902
903 Handle<Object> result;
904 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
905 isolate, result, RegExpImpl::Compile(regexp, source, flags));
906 return *result;
907}
908
909
910RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) {
911 HandleScope scope(isolate);
912 DCHECK(args.length() == 4);
913 CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0);
914 CONVERT_SMI_ARG_CHECKED(index, 1);
915 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2);
916 CONVERT_ARG_HANDLE_CHECKED(String, flags, 3);
917
918 // Get the RegExp function from the context in the literals array.
919 // This is the RegExp function from the context in which the
920 // function was created. We do not use the RegExp function from the
921 // current native context because this might be the RegExp function
922 // from another context which we should not have access to.
923 Handle<JSFunction> constructor = Handle<JSFunction>(
924 JSFunction::NativeContextFromLiterals(*literals)->regexp_function());
925 // Compute the regular expression literal.
926 Handle<Object> regexp;
927 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
928 isolate, regexp,
929 RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags));
930 literals->set(index, *regexp);
931 return *regexp;
932}
933
934
935// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
936// separate last match info. See comment on that function.
937template <bool has_capture>
938static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
939 Handle<JSRegExp> regexp,
940 Handle<JSArray> last_match_array,
941 Handle<JSArray> result_array) {
942 DCHECK(subject->IsFlat());
943 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
944
945 int capture_count = regexp->CaptureCount();
946 int subject_length = subject->length();
947
948 static const int kMinLengthToCache = 0x1000;
949
950 if (subject_length > kMinLengthToCache) {
951 Handle<Object> cached_answer(
952 RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),
953 RegExpResultsCache::REGEXP_MULTIPLE_INDICES),
954 isolate);
955 if (*cached_answer != Smi::FromInt(0)) {
956 Handle<FixedArray> cached_fixed_array =
957 Handle<FixedArray>(FixedArray::cast(*cached_answer));
958 // The cache FixedArray is a COW-array and can therefore be reused.
959 JSArray::SetContent(result_array, cached_fixed_array);
960 // The actual length of the result array is stored in the last element of
961 // the backing store (the backing FixedArray may have a larger capacity).
962 Object* cached_fixed_array_last_element =
963 cached_fixed_array->get(cached_fixed_array->length() - 1);
964 Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
965 result_array->set_length(js_array_length);
966 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
967 NULL);
968 return *result_array;
969 }
970 }
971
972 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
973 if (global_cache.HasException()) return isolate->heap()->exception();
974
975 // Ensured in Runtime_RegExpExecMultiple.
976 DCHECK(result_array->HasFastObjectElements());
977 Handle<FixedArray> result_elements(
978 FixedArray::cast(result_array->elements()));
979 if (result_elements->length() < 16) {
980 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
981 }
982
983 FixedArrayBuilder builder(result_elements);
984
985 // Position to search from.
986 int match_start = -1;
987 int match_end = 0;
988 bool first = true;
989
990 // Two smis before and after the match, for very long strings.
991 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
992
993 while (true) {
994 int32_t* current_match = global_cache.FetchNext();
995 if (current_match == NULL) break;
996 match_start = current_match[0];
997 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
998 if (match_end < match_start) {
999 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1000 match_start);
1001 }
1002 match_end = current_match[1];
1003 {
1004 // Avoid accumulating new handles inside loop.
1005 HandleScope temp_scope(isolate);
1006 Handle<String> match;
1007 if (!first) {
1008 match = isolate->factory()->NewProperSubString(subject, match_start,
1009 match_end);
1010 } else {
1011 match =
1012 isolate->factory()->NewSubString(subject, match_start, match_end);
1013 first = false;
1014 }
1015
1016 if (has_capture) {
1017 // Arguments array to replace function is match, captures, index and
1018 // subject, i.e., 3 + capture count in total.
1019 Handle<FixedArray> elements =
1020 isolate->factory()->NewFixedArray(3 + capture_count);
1021
1022 elements->set(0, *match);
1023 for (int i = 1; i <= capture_count; i++) {
1024 int start = current_match[i * 2];
1025 if (start >= 0) {
1026 int end = current_match[i * 2 + 1];
1027 DCHECK(start <= end);
1028 Handle<String> substring =
1029 isolate->factory()->NewSubString(subject, start, end);
1030 elements->set(i, *substring);
1031 } else {
1032 DCHECK(current_match[i * 2 + 1] < 0);
1033 elements->set(i, isolate->heap()->undefined_value());
1034 }
1035 }
1036 elements->set(capture_count + 1, Smi::FromInt(match_start));
1037 elements->set(capture_count + 2, *subject);
1038 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1039 } else {
1040 builder.Add(*match);
1041 }
1042 }
1043 }
1044
1045 if (global_cache.HasException()) return isolate->heap()->exception();
1046
1047 if (match_start >= 0) {
1048 // Finished matching, with at least one match.
1049 if (match_end < subject_length) {
1050 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1051 subject_length);
1052 }
1053
1054 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1055 NULL);
1056
1057 if (subject_length > kMinLengthToCache) {
1058 // Store the length of the result array into the last element of the
1059 // backing FixedArray.
1060 builder.EnsureCapacity(1);
1061 Handle<FixedArray> fixed_array = builder.array();
1062 fixed_array->set(fixed_array->length() - 1,
1063 Smi::FromInt(builder.length()));
1064 // Cache the result and turn the FixedArray into a COW array.
1065 RegExpResultsCache::Enter(isolate, subject,
1066 handle(regexp->data(), isolate), fixed_array,
1067 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1068 }
1069 return *builder.ToJSArray(result_array);
1070 } else {
1071 return isolate->heap()->null_value(); // No matches at all.
1072 }
1073}
1074
1075
1076// This is only called for StringReplaceGlobalRegExpWithFunction. This sets
1077// lastMatchInfoOverride to maintain the last match info, so we don't need to
1078// set any other last match array info.
1079RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1080 HandleScope handles(isolate);
1081 DCHECK(args.length() == 4);
1082
1083 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1084 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1085 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
1086 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1087 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
1088 RUNTIME_ASSERT(result_array->HasFastObjectElements());
1089
1090 subject = String::Flatten(subject);
1091 RUNTIME_ASSERT(regexp->GetFlags().is_global());
1092
1093 if (regexp->CaptureCount() == 0) {
1094 return SearchRegExpMultiple<false>(isolate, subject, regexp,
1095 last_match_info, result_array);
1096 } else {
1097 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1098 result_array);
1099 }
1100}
1101
1102
1103RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) {
1104 SealHandleScope shs(isolate);
1105 return __RT_impl_Runtime_RegExpConstructResult(args, isolate);
1106}
1107
1108
1109RUNTIME_FUNCTION(RuntimeReference_RegExpExec) {
1110 SealHandleScope shs(isolate);
1111 return __RT_impl_Runtime_RegExpExecRT(args, isolate);
1112}
1113
1114
1115RUNTIME_FUNCTION(RuntimeReference_IsRegExp) {
1116 SealHandleScope shs(isolate);
1117 DCHECK(args.length() == 1);
1118 CONVERT_ARG_CHECKED(Object, obj, 0);
1119 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1120}
1121}
1122} // namespace v8::internal