blob: eeacd6eb9e126dc055ec382683325c4b902f2dac [file] [log] [blame]
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5(function(global, utils) {
6
7%CheckIsBootstrapping();
8
9// -------------------------------------------------------------------
10// Imports
11
12var FLAG_harmony_tolength;
13var GlobalObject = global.Object;
14var GlobalRegExp = global.RegExp;
15var GlobalRegExpPrototype;
16var InternalArray = utils.InternalArray;
17var InternalPackedArray = utils.InternalPackedArray;
18var MakeTypeError;
19var matchSymbol = utils.ImportNow("match_symbol");
20var searchSymbol = utils.ImportNow("search_symbol");
21var splitSymbol = utils.ImportNow("split_symbol");
22
23utils.ImportFromExperimental(function(from) {
24 FLAG_harmony_tolength = from.FLAG_harmony_tolength;
25});
26
27utils.Import(function(from) {
28 MakeTypeError = from.MakeTypeError;
29});
30
31// -------------------------------------------------------------------
32
33// Property of the builtins object for recording the result of the last
34// regexp match. The property RegExpLastMatchInfo includes the matchIndices
35// array of the last successful regexp match (an array of start/end index
36// pairs for the match and all the captured substrings), the invariant is
37// that there are at least two capture indeces. The array also contains
38// the subject string for the last successful match.
39var RegExpLastMatchInfo = new InternalPackedArray(
40 2, // REGEXP_NUMBER_OF_CAPTURES
41 "", // Last subject.
42 UNDEFINED, // Last input - settable with RegExpSetInput.
43 0, // REGEXP_FIRST_CAPTURE + 0
44 0 // REGEXP_FIRST_CAPTURE + 1
45);
46
47// -------------------------------------------------------------------
48
49function IsRegExp(o) {
50 if (!IS_RECEIVER(o)) return false;
51 var is_regexp = o[matchSymbol];
52 if (!IS_UNDEFINED(is_regexp)) return TO_BOOLEAN(is_regexp);
53 return IS_REGEXP(o);
54}
55
56
57// ES6 section 21.2.3.2.2
58function RegExpInitialize(object, pattern, flags) {
59 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern);
60 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags);
61 %RegExpInitializeAndCompile(object, pattern, flags);
62 return object;
63}
64
65
66function PatternFlags(pattern) {
67 return (REGEXP_GLOBAL(pattern) ? 'g' : '') +
68 (REGEXP_IGNORE_CASE(pattern) ? 'i' : '') +
69 (REGEXP_MULTILINE(pattern) ? 'm' : '') +
70 (REGEXP_UNICODE(pattern) ? 'u' : '') +
71 (REGEXP_STICKY(pattern) ? 'y' : '');
72}
73
74
75function RegExpConstructor(pattern, flags) {
76 var newtarget = new.target;
77 var pattern_is_regexp = IsRegExp(pattern);
78
79 if (IS_UNDEFINED(newtarget)) {
80 newtarget = GlobalRegExp;
81
82 // ES6 section 21.2.3.1 step 3.b
83 if (pattern_is_regexp && IS_UNDEFINED(flags) &&
84 pattern.constructor === newtarget) {
85 return pattern;
86 }
87 }
88
89 if (IS_REGEXP(pattern)) {
90 if (IS_UNDEFINED(flags)) flags = PatternFlags(pattern);
91 pattern = REGEXP_SOURCE(pattern);
92
93 } else if (pattern_is_regexp) {
94 var input_pattern = pattern;
95 pattern = pattern.source;
96 if (IS_UNDEFINED(flags)) flags = input_pattern.flags;
97 }
98
99 var object = %NewObject(GlobalRegExp, newtarget);
100 return RegExpInitialize(object, pattern, flags);
101}
102
103
104function RegExpCompileJS(pattern, flags) {
105 if (!IS_REGEXP(this)) {
106 throw MakeTypeError(kIncompatibleMethodReceiver,
107 "RegExp.prototype.compile", this);
108 }
109
110 if (IS_REGEXP(pattern)) {
111 if (!IS_UNDEFINED(flags)) throw MakeTypeError(kRegExpFlags);
112
113 flags = PatternFlags(pattern);
114 pattern = REGEXP_SOURCE(pattern);
115 }
116
117 RegExpInitialize(this, pattern, flags);
118
119 // Return undefined for compatibility with JSC.
120 // See http://crbug.com/585775 for web compat details.
121}
122
123
124function DoRegExpExec(regexp, string, index) {
125 return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo);
126}
127
128
129// This is kind of performance sensitive, so we want to avoid unnecessary
130// type checks on inputs. But we also don't want to inline it several times
131// manually, so we use a macro :-)
132macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
133 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
134 var start = MATCHINFO[CAPTURE0];
135 var end = MATCHINFO[CAPTURE1];
136 // Calculate the substring of the first match before creating the result array
137 // to avoid an unnecessary write barrier storing the first result.
138 var first = %_SubString(STRING, start, end);
139 var result = %_RegExpConstructResult(numResults, start, STRING);
140 result[0] = first;
141 if (numResults == 1) return result;
142 var j = REGEXP_FIRST_CAPTURE + 2;
143 for (var i = 1; i < numResults; i++) {
144 start = MATCHINFO[j++];
145 if (start != -1) {
146 end = MATCHINFO[j];
147 result[i] = %_SubString(STRING, start, end);
148 }
149 j++;
150 }
151 return result;
152endmacro
153
154
155function RegExpExecNoTests(regexp, string, start) {
156 // Must be called with RegExp, string and positive integer as arguments.
157 var matchInfo = %_RegExpExec(regexp, string, start, RegExpLastMatchInfo);
158 if (matchInfo !== null) {
159 // ES6 21.2.5.2.2 step 18.
160 if (REGEXP_STICKY(regexp)) regexp.lastIndex = matchInfo[CAPTURE1];
161 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string);
162 }
163 regexp.lastIndex = 0;
164 return null;
165}
166
167
168function RegExpExecJS(string) {
169 if (!IS_REGEXP(this)) {
170 throw MakeTypeError(kIncompatibleMethodReceiver,
171 'RegExp.prototype.exec', this);
172 }
173
174 string = TO_STRING(string);
175 var lastIndex = this.lastIndex;
176
177 // Conversion is required by the ES2015 specification (RegExpBuiltinExec
178 // algorithm, step 4) even if the value is discarded for non-global RegExps.
179 var i = TO_LENGTH_OR_INTEGER(lastIndex);
180
181 var updateLastIndex = REGEXP_GLOBAL(this) || REGEXP_STICKY(this);
182 if (updateLastIndex) {
183 if (i < 0 || i > string.length) {
184 this.lastIndex = 0;
185 return null;
186 }
187 } else {
188 i = 0;
189 }
190
191 // matchIndices is either null or the RegExpLastMatchInfo array.
192 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo);
193
194 if (IS_NULL(matchIndices)) {
195 this.lastIndex = 0;
196 return null;
197 }
198
199 // Successful match.
200 if (updateLastIndex) {
201 this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
202 }
203 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
204}
205
206
207// One-element cache for the simplified test regexp.
208var regexp_key;
209var regexp_val;
210
211// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
212// that test is defined in terms of String.prototype.exec. However, it probably
213// means the original value of String.prototype.exec, which is what everybody
214// else implements.
215function RegExpTest(string) {
216 if (!IS_REGEXP(this)) {
217 throw MakeTypeError(kIncompatibleMethodReceiver,
218 'RegExp.prototype.test', this);
219 }
220 string = TO_STRING(string);
221
222 var lastIndex = this.lastIndex;
223
224 // Conversion is required by the ES2015 specification (RegExpBuiltinExec
225 // algorithm, step 4) even if the value is discarded for non-global RegExps.
226 var i = TO_LENGTH_OR_INTEGER(lastIndex);
227
228 if (REGEXP_GLOBAL(this) || REGEXP_STICKY(this)) {
229 if (i < 0 || i > string.length) {
230 this.lastIndex = 0;
231 return false;
232 }
233 // matchIndices is either null or the RegExpLastMatchInfo array.
234 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo);
235 if (IS_NULL(matchIndices)) {
236 this.lastIndex = 0;
237 return false;
238 }
239 this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
240 return true;
241 } else {
242 // Non-global, non-sticky regexp.
243 // Remove irrelevant preceeding '.*' in a test regexp. The expression
244 // checks whether this.source starts with '.*' and that the third char is
245 // not a '?'. But see https://code.google.com/p/v8/issues/detail?id=3560
246 var regexp = this;
247 var source = REGEXP_SOURCE(regexp);
248 if (regexp.length >= 3 &&
249 %_StringCharCodeAt(regexp, 0) == 46 && // '.'
250 %_StringCharCodeAt(regexp, 1) == 42 && // '*'
251 %_StringCharCodeAt(regexp, 2) != 63) { // '?'
252 regexp = TrimRegExp(regexp);
253 }
254 // matchIndices is either null or the RegExpLastMatchInfo array.
255 var matchIndices = %_RegExpExec(regexp, string, 0, RegExpLastMatchInfo);
256 if (IS_NULL(matchIndices)) {
257 this.lastIndex = 0;
258 return false;
259 }
260 return true;
261 }
262}
263
264function TrimRegExp(regexp) {
265 if (!%_ObjectEquals(regexp_key, regexp)) {
266 regexp_key = regexp;
267 regexp_val =
268 new GlobalRegExp(
269 %_SubString(REGEXP_SOURCE(regexp), 2, REGEXP_SOURCE(regexp).length),
270 (REGEXP_IGNORE_CASE(regexp) ? REGEXP_MULTILINE(regexp) ? "im" : "i"
271 : REGEXP_MULTILINE(regexp) ? "m" : ""));
272 }
273 return regexp_val;
274}
275
276
277function RegExpToString() {
278 if (!IS_REGEXP(this)) {
279 // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
280 // a UseCounter is incremented to track it.
281 // TODO(littledan): Remove this workaround or standardize it
282 if (this === GlobalRegExpPrototype) {
283 %IncrementUseCounter(kRegExpPrototypeToString);
284 return '/(?:)/';
285 }
286 throw MakeTypeError(kIncompatibleMethodReceiver,
287 'RegExp.prototype.toString', this);
288 }
289 var result = '/' + REGEXP_SOURCE(this) + '/';
290 if (REGEXP_GLOBAL(this)) result += 'g';
291 if (REGEXP_IGNORE_CASE(this)) result += 'i';
292 if (REGEXP_MULTILINE(this)) result += 'm';
293 if (REGEXP_UNICODE(this)) result += 'u';
294 if (REGEXP_STICKY(this)) result += 'y';
295 return result;
296}
297
298
299// ES6 21.2.5.11.
300function RegExpSplit(string, limit) {
301 // TODO(yangguo): allow non-regexp receivers.
302 if (!IS_REGEXP(this)) {
303 throw MakeTypeError(kIncompatibleMethodReceiver,
304 "RegExp.prototype.@@split", this);
305 }
306 var separator = this;
307 var subject = TO_STRING(string);
308
309 limit = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
310 var length = subject.length;
311
312 if (limit === 0) return [];
313
314 if (length === 0) {
315 if (DoRegExpExec(separator, subject, 0, 0) !== null) return [];
316 return [subject];
317 }
318
319 var currentIndex = 0;
320 var startIndex = 0;
321 var startMatch = 0;
322 var result = new InternalArray();
323
324 outer_loop:
325 while (true) {
326 if (startIndex === length) {
327 result[result.length] = %_SubString(subject, currentIndex, length);
328 break;
329 }
330
331 var matchInfo = DoRegExpExec(separator, subject, startIndex);
332 if (matchInfo === null || length === (startMatch = matchInfo[CAPTURE0])) {
333 result[result.length] = %_SubString(subject, currentIndex, length);
334 break;
335 }
336 var endIndex = matchInfo[CAPTURE1];
337
338 // We ignore a zero-length match at the currentIndex.
339 if (startIndex === endIndex && endIndex === currentIndex) {
340 startIndex++;
341 continue;
342 }
343
344 result[result.length] = %_SubString(subject, currentIndex, startMatch);
345
346 if (result.length === limit) break;
347
348 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;
349 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) {
350 var start = matchInfo[i++];
351 var end = matchInfo[i++];
352 if (end != -1) {
353 result[result.length] = %_SubString(subject, start, end);
354 } else {
355 result[result.length] = UNDEFINED;
356 }
357 if (result.length === limit) break outer_loop;
358 }
359
360 startIndex = currentIndex = endIndex;
361 }
362
363 var array_result = [];
364 %MoveArrayContents(result, array_result);
365 return array_result;
366}
367
368
369// ES6 21.2.5.6.
370function RegExpMatch(string) {
371 // TODO(yangguo): allow non-regexp receivers.
372 if (!IS_REGEXP(this)) {
373 throw MakeTypeError(kIncompatibleMethodReceiver,
374 "RegExp.prototype.@@match", this);
375 }
376 var subject = TO_STRING(string);
377
378 if (!REGEXP_GLOBAL(this)) return RegExpExecNoTests(this, subject, 0);
379 this.lastIndex = 0;
380 var result = %StringMatch(subject, this, RegExpLastMatchInfo);
381 return result;
382}
383
384
385// ES6 21.2.5.9.
386function RegExpSearch(string) {
387 // TODO(yangguo): allow non-regexp receivers.
388 if (!IS_REGEXP(this)) {
389 throw MakeTypeError(kIncompatibleMethodReceiver,
390 "RegExp.prototype.@@search", this);
391 }
392 var match = DoRegExpExec(this, TO_STRING(string), 0);
393 if (match) return match[CAPTURE0];
394 return -1;
395}
396
397
398// Getters for the static properties lastMatch, lastParen, leftContext, and
399// rightContext of the RegExp constructor. The properties are computed based
400// on the captures array of the last successful match and the subject string
401// of the last successful match.
402function RegExpGetLastMatch() {
403 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo);
404 return %_SubString(regExpSubject,
405 RegExpLastMatchInfo[CAPTURE0],
406 RegExpLastMatchInfo[CAPTURE1]);
407}
408
409
410function RegExpGetLastParen() {
411 var length = NUMBER_OF_CAPTURES(RegExpLastMatchInfo);
412 if (length <= 2) return ''; // There were no captures.
413 // We match the SpiderMonkey behavior: return the substring defined by the
414 // last pair (after the first pair) of elements of the capture array even if
415 // it is empty.
416 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo);
417 var start = RegExpLastMatchInfo[CAPTURE(length - 2)];
418 var end = RegExpLastMatchInfo[CAPTURE(length - 1)];
419 if (start != -1 && end != -1) {
420 return %_SubString(regExpSubject, start, end);
421 }
422 return "";
423}
424
425
426function RegExpGetLeftContext() {
427 var start_index;
428 var subject;
429 start_index = RegExpLastMatchInfo[CAPTURE0];
430 subject = LAST_SUBJECT(RegExpLastMatchInfo);
431 return %_SubString(subject, 0, start_index);
432}
433
434
435function RegExpGetRightContext() {
436 var start_index;
437 var subject;
438 start_index = RegExpLastMatchInfo[CAPTURE1];
439 subject = LAST_SUBJECT(RegExpLastMatchInfo);
440 return %_SubString(subject, start_index, subject.length);
441}
442
443
444// The properties $1..$9 are the first nine capturing substrings of the last
445// successful match, or ''. The function RegExpMakeCaptureGetter will be
446// called with indices from 1 to 9.
447function RegExpMakeCaptureGetter(n) {
448 return function foo() {
449 var index = n * 2;
450 if (index >= NUMBER_OF_CAPTURES(RegExpLastMatchInfo)) return '';
451 var matchStart = RegExpLastMatchInfo[CAPTURE(index)];
452 var matchEnd = RegExpLastMatchInfo[CAPTURE(index + 1)];
453 if (matchStart == -1 || matchEnd == -1) return '';
454 return %_SubString(LAST_SUBJECT(RegExpLastMatchInfo), matchStart, matchEnd);
455 };
456}
457
458
459// ES6 21.2.5.4.
460function RegExpGetGlobal() {
461 if (!IS_REGEXP(this)) {
462 // TODO(littledan): Remove this RegExp compat workaround
463 if (this === GlobalRegExpPrototype) {
464 return UNDEFINED;
465 }
466 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.global");
467 }
468 return !!REGEXP_GLOBAL(this);
469}
470%FunctionSetName(RegExpGetGlobal, "RegExp.prototype.global");
471%SetNativeFlag(RegExpGetGlobal);
472
473
474// ES6 21.2.5.5.
475function RegExpGetIgnoreCase() {
476 if (!IS_REGEXP(this)) {
477 // TODO(littledan): Remove this RegExp compat workaround
478 if (this === GlobalRegExpPrototype) {
479 return UNDEFINED;
480 }
481 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.ignoreCase");
482 }
483 return !!REGEXP_IGNORE_CASE(this);
484}
485%FunctionSetName(RegExpGetIgnoreCase, "RegExp.prototype.ignoreCase");
486%SetNativeFlag(RegExpGetIgnoreCase);
487
488
489// ES6 21.2.5.7.
490function RegExpGetMultiline() {
491 if (!IS_REGEXP(this)) {
492 // TODO(littledan): Remove this RegExp compat workaround
493 if (this === GlobalRegExpPrototype) {
494 return UNDEFINED;
495 }
496 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.multiline");
497 }
498 return !!REGEXP_MULTILINE(this);
499}
500%FunctionSetName(RegExpGetMultiline, "RegExp.prototype.multiline");
501%SetNativeFlag(RegExpGetMultiline);
502
503
504// ES6 21.2.5.10.
505function RegExpGetSource() {
506 if (!IS_REGEXP(this)) {
507 // TODO(littledan): Remove this RegExp compat workaround
508 if (this === GlobalRegExpPrototype) {
509 return UNDEFINED;
510 }
511 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.source");
512 }
513 return REGEXP_SOURCE(this);
514}
515%FunctionSetName(RegExpGetSource, "RegExp.prototype.source");
516%SetNativeFlag(RegExpGetSource);
517
518// -------------------------------------------------------------------
519
520%FunctionSetInstanceClassName(GlobalRegExp, 'RegExp');
521GlobalRegExpPrototype = new GlobalObject();
522%FunctionSetPrototype(GlobalRegExp, GlobalRegExpPrototype);
523%AddNamedProperty(
524 GlobalRegExp.prototype, 'constructor', GlobalRegExp, DONT_ENUM);
525%SetCode(GlobalRegExp, RegExpConstructor);
526
527utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
528 "exec", RegExpExecJS,
529 "test", RegExpTest,
530 "toString", RegExpToString,
531 "compile", RegExpCompileJS,
532 matchSymbol, RegExpMatch,
533 searchSymbol, RegExpSearch,
534 splitSymbol, RegExpSplit,
535]);
536
537utils.InstallGetter(GlobalRegExp.prototype, 'global', RegExpGetGlobal);
538utils.InstallGetter(GlobalRegExp.prototype, 'ignoreCase', RegExpGetIgnoreCase);
539utils.InstallGetter(GlobalRegExp.prototype, 'multiline', RegExpGetMultiline);
540utils.InstallGetter(GlobalRegExp.prototype, 'source', RegExpGetSource);
541
542// The length of compile is 1 in SpiderMonkey.
543%FunctionSetLength(GlobalRegExp.prototype.compile, 1);
544
545// The properties `input` and `$_` are aliases for each other. When this
546// value is set the value it is set to is coerced to a string.
547// Getter and setter for the input.
548var RegExpGetInput = function() {
549 var regExpInput = LAST_INPUT(RegExpLastMatchInfo);
550 return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
551};
552var RegExpSetInput = function(string) {
553 LAST_INPUT(RegExpLastMatchInfo) = TO_STRING(string);
554};
555
556%OptimizeObjectForAddingMultipleProperties(GlobalRegExp, 22);
557utils.InstallGetterSetter(GlobalRegExp, 'input', RegExpGetInput, RegExpSetInput,
558 DONT_DELETE);
559utils.InstallGetterSetter(GlobalRegExp, '$_', RegExpGetInput, RegExpSetInput,
560 DONT_ENUM | DONT_DELETE);
561
562
563var NoOpSetter = function(ignored) {};
564
565
566// Static properties set by a successful match.
567utils.InstallGetterSetter(GlobalRegExp, 'lastMatch', RegExpGetLastMatch,
568 NoOpSetter, DONT_DELETE);
569utils.InstallGetterSetter(GlobalRegExp, '$&', RegExpGetLastMatch, NoOpSetter,
570 DONT_ENUM | DONT_DELETE);
571utils.InstallGetterSetter(GlobalRegExp, 'lastParen', RegExpGetLastParen,
572 NoOpSetter, DONT_DELETE);
573utils.InstallGetterSetter(GlobalRegExp, '$+', RegExpGetLastParen, NoOpSetter,
574 DONT_ENUM | DONT_DELETE);
575utils.InstallGetterSetter(GlobalRegExp, 'leftContext', RegExpGetLeftContext,
576 NoOpSetter, DONT_DELETE);
577utils.InstallGetterSetter(GlobalRegExp, '$`', RegExpGetLeftContext, NoOpSetter,
578 DONT_ENUM | DONT_DELETE);
579utils.InstallGetterSetter(GlobalRegExp, 'rightContext', RegExpGetRightContext,
580 NoOpSetter, DONT_DELETE);
581utils.InstallGetterSetter(GlobalRegExp, "$'", RegExpGetRightContext, NoOpSetter,
582 DONT_ENUM | DONT_DELETE);
583
584for (var i = 1; i < 10; ++i) {
585 utils.InstallGetterSetter(GlobalRegExp, '$' + i, RegExpMakeCaptureGetter(i),
586 NoOpSetter, DONT_DELETE);
587}
588%ToFastProperties(GlobalRegExp);
589
590// -------------------------------------------------------------------
591// Exports
592
593utils.Export(function(to) {
594 to.RegExpExec = DoRegExpExec;
595 to.RegExpExecNoTests = RegExpExecNoTests;
596 to.RegExpLastMatchInfo = RegExpLastMatchInfo;
597 to.RegExpTest = RegExpTest;
598});
599
600})