blob: 0c22a79f6fcfcfbaf7ecdc90839b09654410ca6f [file] [log] [blame]
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00001// Copyright 2006-2008 Google Inc. All Rights Reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29
30#include "execution.h"
31#include "factory.h"
32#include "jsregexp.h"
33#include "third_party/jscre/pcre.h"
34#include "platform.h"
35#include "top.h"
36
37namespace v8 { namespace internal {
38
39
40#define CAPTURE_INDEX 0
41#define INTERNAL_INDEX 1
42
43static Failure* malloc_failure;
44
45static void* JSREMalloc(size_t size) {
46 Object* obj = Heap::AllocateByteArray(size);
47
48 // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile
49 // will return NULL to the caller, performs GC there.
50 // Also pass failure information to the caller.
51 if (obj->IsFailure()) {
52 malloc_failure = Failure::cast(obj);
53 return NULL;
54 }
55
56 // Note: object is unrooted, the caller of jsRegExpCompile must
57 // create a handle for the return value before doing heap allocation.
58 return reinterpret_cast<void*>(ByteArray::cast(obj)->GetDataStartAddress());
59}
60
61
62static void JSREFree(void* p) {
63 USE(p); // Do nothing, memory is garbage collected.
64}
65
66
67String* RegExpImpl::last_ascii_string_ = NULL;
68String* RegExpImpl::two_byte_cached_string_ = NULL;
69
70
71void RegExpImpl::NewSpaceCollectionPrologue() {
72 // The two byte string is always in the old space. The Ascii string may be
73 // in either place. If it is in the old space we don't need to do anything.
74 if (Heap::InNewSpace(last_ascii_string_)) {
75 // Invalidate the cache.
76 last_ascii_string_ = NULL;
77 two_byte_cached_string_ = NULL;
78 }
79}
80
81
82void RegExpImpl::OldSpaceCollectionPrologue() {
83 last_ascii_string_ = NULL;
84 two_byte_cached_string_ = NULL;
85}
86
87
mads.s.ager@gmail.com9a4089a2008-09-01 08:55:01 +000088Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
89 Handle<String> pattern,
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000090 Handle<String> flags,
91 bool* has_pending_exception) {
mads.s.ager@gmail.com9a4089a2008-09-01 08:55:01 +000092 // Ensure that the constructor function has been loaded.
93 if (!constructor->IsLoaded()) {
94 LoadLazy(constructor, has_pending_exception);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000095 if (*has_pending_exception) return Handle<Object>(Failure::Exception());
96 }
97 // Call the construct code with 2 arguments.
98 Object** argv[2] = { Handle<Object>::cast(pattern).location(),
99 Handle<Object>::cast(flags).location() };
mads.s.ager@gmail.com9a4089a2008-09-01 08:55:01 +0000100 return Execution::New(constructor, 2, argv, has_pending_exception);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000101}
102
103
104// Converts a source string to a 16 bit flat string or a SlicedString containing
105// a 16 bit flat string).
106Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) {
107 if (*subject == last_ascii_string_) {
108 ASSERT(two_byte_cached_string_ != NULL);
109 return Handle<String>(String::cast(two_byte_cached_string_));
110 }
111 Handle<String> two_byte_string = StringToTwoByte(subject);
112 last_ascii_string_ = *subject;
113 two_byte_cached_string_ = *two_byte_string;
114 return two_byte_string;
115}
116
117
118// Converts a source string to a 16 bit flat string or a SlicedString containing
119// a 16 bit flat string).
120Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) {
121 if (!pattern->IsFlat()) {
122 FlattenString(pattern);
123 }
124 Handle<String> flat_string(pattern->IsConsString() ?
125 String::cast(ConsString::cast(*pattern)->first()) :
126 *pattern);
127 ASSERT(!flat_string->IsConsString());
128 ASSERT(flat_string->IsSeqString() || flat_string->IsSlicedString() ||
129 flat_string->IsExternalString());
130 if (!flat_string->IsAscii()) {
131 return flat_string;
132 }
133
134 Handle<String> two_byte_string =
135 Factory::NewRawTwoByteString(flat_string->length(), TENURED);
136 static StringInputBuffer convert_to_two_byte_buffer;
137 convert_to_two_byte_buffer.Reset(*flat_string);
138 for (int i = 0; convert_to_two_byte_buffer.has_more(); i++) {
139 two_byte_string->Set(i, convert_to_two_byte_buffer.GetNext());
140 }
141 return two_byte_string;
142}
143
144
145Handle<Object> RegExpImpl::JsreCompile(Handle<JSValue> re,
146 Handle<String> pattern,
147 Handle<String> flags) {
148 JSRegExpIgnoreCaseOption case_option = JSRegExpDoNotIgnoreCase;
149 JSRegExpMultilineOption multiline_option = JSRegExpSingleLine;
150 FlattenString(flags);
151 for (int i = 0; i < flags->length(); i++) {
152 if (flags->Get(i) == 'i') case_option = JSRegExpIgnoreCase;
153 if (flags->Get(i) == 'm') multiline_option = JSRegExpMultiline;
154 }
155
156 Handle<String> two_byte_pattern = StringToTwoByte(pattern);
157
158 unsigned number_of_captures;
159 const char* error_message = NULL;
160
161 malloc_failure = Failure::Exception();
162 JSRegExp* code = jsRegExpCompile(two_byte_pattern->GetTwoByteData(),
163 pattern->length(), case_option,
164 multiline_option, &number_of_captures,
165 &error_message, &JSREMalloc, &JSREFree);
166
167 if (code == NULL && malloc_failure->IsRetryAfterGC()) {
168 // Performs a GC, then retries.
169 if (!Heap::CollectGarbage(malloc_failure->requested(),
170 malloc_failure->allocation_space())) {
171 // TODO(1181417): Fix this.
172 V8::FatalProcessOutOfMemory("RegExpImpl::JsreCompile");
173 }
174 malloc_failure = Failure::Exception();
175 code = jsRegExpCompile(two_byte_pattern->GetTwoByteData(),
176 pattern->length(), case_option,
177 multiline_option, &number_of_captures,
178 &error_message, &JSREMalloc, &JSREFree);
179 if (code == NULL && malloc_failure->IsRetryAfterGC()) {
180 // TODO(1181417): Fix this.
181 V8::FatalProcessOutOfMemory("RegExpImpl::JsreCompile");
182 }
183 }
184
185 if (error_message != NULL) {
186 // Throw an exception.
187 SmartPointer<char> char_pattern =
188 two_byte_pattern->ToCString(DISALLOW_NULLS);
189 Handle<JSArray> array = Factory::NewJSArray(2);
190 SetElement(array, 0, Factory::NewStringFromUtf8(CStrVector(*char_pattern)));
191 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(error_message)));
192 Handle<Object> regexp_err =
193 Factory::NewSyntaxError("malformed_regexp", array);
194 return Handle<Object>(Top::Throw(*regexp_err));
195 }
196
197 ASSERT(code != NULL);
198
199 // Convert the return address to a ByteArray pointer.
200 Handle<ByteArray> internal(
201 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
202
203 Handle<FixedArray> value = Factory::NewFixedArray(2);
204 value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
205 value->set(INTERNAL_INDEX, *internal);
206 re->set_value(*value);
207
208 return re;
209}
210
211
212Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSValue> regexp,
213 int num_captures,
214 Handle<String> subject,
215 int previous_index,
216 const uc16* two_byte_subject,
217 int* offsets_vector,
218 int offsets_vector_length) {
219 int rc;
220 {
221 AssertNoAllocation a;
222 ByteArray* internal = JsreInternal(regexp);
223 const JSRegExp* js_regexp =
224 reinterpret_cast<JSRegExp*>(internal->GetDataStartAddress());
225
226 rc = jsRegExpExecute(js_regexp, two_byte_subject,
227 subject->length(),
228 previous_index,
229 offsets_vector,
230 offsets_vector_length);
231 }
232
233 // The KJS JavaScript engine returns null (ie, a failed match) when
234 // JSRE's internal match limit is exceeded. We duplicate that behavior here.
235 if (rc == JSRegExpErrorNoMatch
236 || rc == JSRegExpErrorHitLimit) {
237 return Factory::null_value();
238 }
239
240 // Other JSRE errors:
241 if (rc < 0) {
242 // Throw an exception.
243 Handle<Object> code(Smi::FromInt(rc));
244 Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };
245 Handle<Object> regexp_err(
246 Factory::NewTypeError("jsre_error", HandleVector(args, 2)));
247 return Handle<Object>(Top::Throw(*regexp_err));
248 }
249
250 Handle<JSArray> result = Factory::NewJSArray(2 * (num_captures+1));
251
252 // The captures come in (start, end+1) pairs.
253 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
254 SetElement(result, i, Handle<Object>(Smi::FromInt(offsets_vector[i])));
255 SetElement(result, i+1, Handle<Object>(Smi::FromInt(offsets_vector[i+1])));
256 }
257
258 return result;
259}
260
261
262class OffsetsVector {
263 public:
264 inline OffsetsVector(int num_captures) {
265 offsets_vector_length_ = (num_captures + 1) * 3;
266 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
267 vector_ = NewArray<int>(offsets_vector_length_);
268 } else {
269 vector_ = static_offsets_vector_;
270 }
271 }
272
273
274 inline ~OffsetsVector() {
275 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
276 DeleteArray(vector_);
277 vector_ = NULL;
278 }
279 }
280
281
282 inline int* vector() {
283 return vector_;
284 }
285
286
287 inline int length() {
288 return offsets_vector_length_;
289 }
290
291 private:
292 int* vector_;
293 int offsets_vector_length_;
294 static const int kStaticOffsetsVectorSize = 30;
295 static int static_offsets_vector_[kStaticOffsetsVectorSize];
296};
297
298
299int OffsetsVector::static_offsets_vector_[
300 OffsetsVector::kStaticOffsetsVectorSize];
301
302
303Handle<Object> RegExpImpl::JsreExec(Handle<JSValue> regexp,
304 Handle<String> subject,
305 Handle<Object> index) {
306 // Prepare space for the return values.
307 int num_captures = JsreCapture(regexp);
308
309 OffsetsVector offsets(num_captures);
310
311 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
312
313 Handle<String> subject16 = CachedStringToTwoByte(subject);
314
315 Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
316 previous_index,
317 subject16->GetTwoByteData(),
318 offsets.vector(), offsets.length()));
319
320 return result;
321}
322
323
324Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSValue> regexp,
325 Handle<String> subject) {
326 // Prepare space for the return values.
327 int num_captures = JsreCapture(regexp);
328
329 OffsetsVector offsets(num_captures);
330
331 int previous_index = 0;
332
333 Handle<JSArray> result = Factory::NewJSArray(0);
334 int i = 0;
335 Handle<Object> matches;
336
337 Handle<String> subject16 = CachedStringToTwoByte(subject);
338
339 do {
340 if (previous_index > subject->length() || previous_index < 0) {
341 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
342 // string length, there is no match.
343 matches = Factory::null_value();
344 } else {
345 matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
346 subject16->GetTwoByteData(),
347 offsets.vector(), offsets.length());
348
349 if (matches->IsJSArray()) {
350 SetElement(result, i, matches);
351 i++;
352 previous_index = offsets.vector()[1];
353 if (offsets.vector()[0] == offsets.vector()[1]) {
354 previous_index++;
355 }
356 }
357 }
358 } while (matches->IsJSArray());
359
360 // If we exited the loop with an exception, throw it.
361 if (matches->IsNull()) { // Exited loop normally.
362 return result;
363 } else { // Exited loop with the exception in matches.
364 return matches;
365 }
366}
367
368
369int RegExpImpl::JsreCapture(Handle<JSValue> re) {
370 Object* value = re->value();
371 ASSERT(value->IsFixedArray());
372 return Smi::cast(FixedArray::cast(value)->get(CAPTURE_INDEX))->value();
373}
374
375
376ByteArray* RegExpImpl::JsreInternal(Handle<JSValue> re) {
377 Object* value = re->value();
378 ASSERT(value->IsFixedArray());
379 return ByteArray::cast(FixedArray::cast(value)->get(INTERNAL_INDEX));
380}
381
382}} // namespace v8::internal