blob: 7bec455d37fedd46702d36f02db20ed6d6396d71 [file] [log] [blame]
Andrei Popescu31002712010-02-23 13:46:05 +00001// Copyright 2006-2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// Expect $Object = global.Object;
29// Expect $Array = global.Array;
30
31const $RegExp = global.RegExp;
32
33// A recursive descent parser for Patterns according to the grammar of
34// ECMA-262 15.10.1, with deviations noted below.
35function DoConstructRegExp(object, pattern, flags, isConstructorCall) {
36 // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
37 if (IS_REGEXP(pattern)) {
38 if (!IS_UNDEFINED(flags)) {
39 throw MakeTypeError('regexp_flags', []);
40 }
41 flags = (pattern.global ? 'g' : '')
42 + (pattern.ignoreCase ? 'i' : '')
43 + (pattern.multiline ? 'm' : '');
44 pattern = pattern.source;
45 }
46
47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
49
50 var global = false;
51 var ignoreCase = false;
52 var multiline = false;
53
54 for (var i = 0; i < flags.length; i++) {
55 var c = StringCharAt.call(flags, i);
56 switch (c) {
57 case 'g':
58 // Allow duplicate flags to be consistent with JSC and others.
59 global = true;
60 break;
61 case 'i':
62 ignoreCase = true;
63 break;
64 case 'm':
65 multiline = true;
66 break;
67 default:
68 // Ignore flags that have no meaning to be consistent with
69 // JSC.
70 break;
71 }
72 }
73
74 if (isConstructorCall) {
75 // ECMA-262, section 15.10.7.1.
76 %SetProperty(object, 'source', pattern,
77 DONT_DELETE | READ_ONLY | DONT_ENUM);
78
79 // ECMA-262, section 15.10.7.2.
80 %SetProperty(object, 'global', global, DONT_DELETE | READ_ONLY | DONT_ENUM);
81
82 // ECMA-262, section 15.10.7.3.
83 %SetProperty(object, 'ignoreCase', ignoreCase,
84 DONT_DELETE | READ_ONLY | DONT_ENUM);
85
86 // ECMA-262, section 15.10.7.4.
87 %SetProperty(object, 'multiline', multiline,
88 DONT_DELETE | READ_ONLY | DONT_ENUM);
89
90 // ECMA-262, section 15.10.7.5.
91 %SetProperty(object, 'lastIndex', 0, DONT_DELETE | DONT_ENUM);
92 } else { // RegExp is being recompiled via RegExp.prototype.compile.
93 %IgnoreAttributesAndSetProperty(object, 'source', pattern);
94 %IgnoreAttributesAndSetProperty(object, 'global', global);
95 %IgnoreAttributesAndSetProperty(object, 'ignoreCase', ignoreCase);
96 %IgnoreAttributesAndSetProperty(object, 'multiline', multiline);
97 %IgnoreAttributesAndSetProperty(object, 'lastIndex', 0);
98 }
99
100 // Call internal function to compile the pattern.
101 %RegExpCompile(object, pattern, flags);
102}
103
104
105function RegExpConstructor(pattern, flags) {
106 if (%_IsConstructCall()) {
107 DoConstructRegExp(this, pattern, flags, true);
108 } else {
109 // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
110 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
111 return pattern;
112 }
113 return new $RegExp(pattern, flags);
114 }
115}
116
117
118// Deprecated RegExp.prototype.compile method. We behave like the constructor
119// were called again. In SpiderMonkey, this method returns the regexp object.
120// In JSC, it returns undefined. For compatibility with JSC, we match their
121// behavior.
122function CompileRegExp(pattern, flags) {
123 // Both JSC and SpiderMonkey treat a missing pattern argument as the
124 // empty subject string, and an actual undefined value passed as the
125 // pattern as the string 'undefined'. Note that JSC is inconsistent
126 // here, treating undefined values differently in
127 // RegExp.prototype.compile and in the constructor, where they are
128 // the empty string. For compatibility with JSC, we match their
129 // behavior.
130 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
131 DoConstructRegExp(this, 'undefined', flags, false);
132 } else {
133 DoConstructRegExp(this, pattern, flags, false);
134 }
135}
136
137
138function DoRegExpExec(regexp, string, index) {
139 return %_RegExpExec(regexp, string, index, lastMatchInfo);
140}
141
142
143function RegExpExec(string) {
144 if (!IS_REGEXP(this)) {
145 throw MakeTypeError('method_called_on_incompatible',
146 ['RegExp.prototype.exec', this]);
147 }
148 if (%_ArgumentsLength() == 0) {
149 var regExpInput = LAST_INPUT(lastMatchInfo);
150 if (IS_UNDEFINED(regExpInput)) {
151 throw MakeError('no_input_to_regexp', [this]);
152 }
153 string = regExpInput;
154 }
155 var s = ToString(string);
156 var length = s.length;
157 var lastIndex = this.lastIndex;
158 var i = this.global ? TO_INTEGER(lastIndex) : 0;
159
160 if (i < 0 || i > s.length) {
161 this.lastIndex = 0;
162 return null;
163 }
164
165 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]);
166 // matchIndices is either null or the lastMatchInfo array.
167 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo);
168
169 if (matchIndices == null) {
170 if (this.global) this.lastIndex = 0;
171 return matchIndices; // no match
172 }
173
174 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
175 var result = new $Array(numResults);
176 for (var i = 0; i < numResults; i++) {
177 var matchStart = lastMatchInfo[CAPTURE(i << 1)];
178 var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)];
179 if (matchStart != -1 && matchEnd != -1) {
180 result[i] = SubString(s, matchStart, matchEnd);
181 } else {
182 // Make sure the element is present. Avoid reading the undefined
183 // property from the global object since this may change.
184 result[i] = void 0;
185 }
186 }
187
188 if (this.global)
189 this.lastIndex = lastMatchInfo[CAPTURE1];
190 result.index = lastMatchInfo[CAPTURE0];
191 result.input = s;
192 return result;
193}
194
195
196// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
197// that test is defined in terms of String.prototype.exec. However, it probably
198// means the original value of String.prototype.exec, which is what everybody
199// else implements.
200function RegExpTest(string) {
201 if (!IS_REGEXP(this)) {
202 throw MakeTypeError('method_called_on_incompatible',
203 ['RegExp.prototype.test', this]);
204 }
205 if (%_ArgumentsLength() == 0) {
206 var regExpInput = LAST_INPUT(lastMatchInfo);
207 if (IS_UNDEFINED(regExpInput)) {
208 throw MakeError('no_input_to_regexp', [this]);
209 }
210 string = regExpInput;
211 }
212 var s = ToString(string);
213 var length = s.length;
214 var lastIndex = this.lastIndex;
215 var i = this.global ? TO_INTEGER(lastIndex) : 0;
216
217 if (i < 0 || i > s.length) {
218 this.lastIndex = 0;
219 return false;
220 }
221
222 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]);
223 // matchIndices is either null or the lastMatchInfo array.
224 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo);
225
226 if (matchIndices == null) {
227 if (this.global) this.lastIndex = 0;
228 return false;
229 }
230
231 if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1];
232 return true;
233}
234
235
236function RegExpToString() {
237 // If this.source is an empty string, output /(?:)/.
238 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550
239 // ecma_2/RegExp/properties-001.js.
240 var src = this.source ? this.source : '(?:)';
241 var result = '/' + src + '/';
242 if (this.global)
243 result += 'g';
244 if (this.ignoreCase)
245 result += 'i';
246 if (this.multiline)
247 result += 'm';
248 return result;
249}
250
251
252// Getters for the static properties lastMatch, lastParen, leftContext, and
253// rightContext of the RegExp constructor. The properties are computed based
254// on the captures array of the last successful match and the subject string
255// of the last successful match.
256function RegExpGetLastMatch() {
257 var regExpSubject = LAST_SUBJECT(lastMatchInfo);
258 return SubString(regExpSubject,
259 lastMatchInfo[CAPTURE0],
260 lastMatchInfo[CAPTURE1]);
261}
262
263
264function RegExpGetLastParen() {
265 var length = NUMBER_OF_CAPTURES(lastMatchInfo);
266 if (length <= 2) return ''; // There were no captures.
267 // We match the SpiderMonkey behavior: return the substring defined by the
268 // last pair (after the first pair) of elements of the capture array even if
269 // it is empty.
270 var regExpSubject = LAST_SUBJECT(lastMatchInfo);
271 var start = lastMatchInfo[CAPTURE(length - 2)];
272 var end = lastMatchInfo[CAPTURE(length - 1)];
273 if (start != -1 && end != -1) {
274 return SubString(regExpSubject, start, end);
275 }
276 return "";
277}
278
279
280function RegExpGetLeftContext() {
281 return SubString(LAST_SUBJECT(lastMatchInfo),
282 0,
283 lastMatchInfo[CAPTURE0]);
284}
285
286
287function RegExpGetRightContext() {
288 var subject = LAST_SUBJECT(lastMatchInfo);
289 return SubString(subject,
290 lastMatchInfo[CAPTURE1],
291 subject.length);
292}
293
294
295// The properties $1..$9 are the first nine capturing substrings of the last
296// successful match, or ''. The function RegExpMakeCaptureGetter will be
297// called with indices from 1 to 9.
298function RegExpMakeCaptureGetter(n) {
299 return function() {
300 var index = n * 2;
301 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
302 var matchStart = lastMatchInfo[CAPTURE(index)];
303 var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
304 if (matchStart == -1 || matchEnd == -1) return '';
305 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
306 };
307}
308
309
310// Property of the builtins object for recording the result of the last
311// regexp match. The property lastMatchInfo includes the matchIndices
312// array of the last successful regexp match (an array of start/end index
313// pairs for the match and all the captured substrings), the invariant is
314// that there are at least two capture indeces. The array also contains
315// the subject string for the last successful match.
316var lastMatchInfo = [
317 2, // REGEXP_NUMBER_OF_CAPTURES
318 "", // Last subject.
319 void 0, // Last input - settable with RegExpSetInput.
320 0, // REGEXP_FIRST_CAPTURE + 0
321 0, // REGEXP_FIRST_CAPTURE + 1
322];
323
324// -------------------------------------------------------------------
325
326function SetupRegExp() {
327 %FunctionSetInstanceClassName($RegExp, 'RegExp');
328 %FunctionSetPrototype($RegExp, new $Object());
329 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
330 %SetCode($RegExp, RegExpConstructor);
331
332 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
333 "exec", RegExpExec,
334 "test", RegExpTest,
335 "toString", RegExpToString,
336 "compile", CompileRegExp
337 ));
338
339 // The length of compile is 1 in SpiderMonkey.
340 %FunctionSetLength($RegExp.prototype.compile, 1);
341
342 // The properties input, $input, and $_ are aliases for each other. When this
343 // value is set the value it is set to is coerced to a string.
344 // Getter and setter for the input.
345 function RegExpGetInput() {
346 var regExpInput = LAST_INPUT(lastMatchInfo);
347 return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
348 }
349 function RegExpSetInput(string) {
350 LAST_INPUT(lastMatchInfo) = ToString(string);
351 };
352
353 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE);
354 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE);
355 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
356 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
357 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
358 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
359
360 // The properties multiline and $* are aliases for each other. When this
361 // value is set in SpiderMonkey, the value it is set to is coerced to a
362 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey
363 // the value of the expression 'RegExp.multiline = null' (for instance) is the
364 // boolean false (ie, the value after coercion), while in V8 it is the value
365 // null (ie, the value before coercion).
366
367 // Getter and setter for multiline.
368 var multiline = false;
369 function RegExpGetMultiline() { return multiline; };
370 function RegExpSetMultiline(flag) { multiline = flag ? true : false; };
371
372 %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE);
373 %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE);
374 %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE);
375 %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE);
376
377
378 function NoOpSetter(ignored) {}
379
380
381 // Static properties set by a successful match.
382 %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE);
383 %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE);
384 %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE);
385 %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
386 %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE);
387 %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE);
388 %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE);
389 %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
390 %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE);
391 %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE);
392 %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE);
393 %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
394 %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE);
395 %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE);
396 %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE);
397 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
398
399 for (var i = 1; i < 10; ++i) {
400 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE);
401 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE);
402 }
403}
404
405
406SetupRegExp();