blob: b8556173d15fc20d0ece8fa8faa365ea6cece153 [file] [log] [blame]
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00001// Copyright 2006-2008 Google Inc. All Rights Reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29
30#include "execution.h"
31#include "factory.h"
32#include "jsregexp.h"
33#include "third_party/jscre/pcre.h"
34#include "platform.h"
35#include "top.h"
36
37namespace v8 { namespace internal {
38
39
40#define CAPTURE_INDEX 0
41#define INTERNAL_INDEX 1
42
43static Failure* malloc_failure;
44
45static void* JSREMalloc(size_t size) {
46 Object* obj = Heap::AllocateByteArray(size);
47
48 // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile
49 // will return NULL to the caller, performs GC there.
50 // Also pass failure information to the caller.
51 if (obj->IsFailure()) {
52 malloc_failure = Failure::cast(obj);
53 return NULL;
54 }
55
56 // Note: object is unrooted, the caller of jsRegExpCompile must
57 // create a handle for the return value before doing heap allocation.
58 return reinterpret_cast<void*>(ByteArray::cast(obj)->GetDataStartAddress());
59}
60
61
62static void JSREFree(void* p) {
63 USE(p); // Do nothing, memory is garbage collected.
64}
65
66
67String* RegExpImpl::last_ascii_string_ = NULL;
68String* RegExpImpl::two_byte_cached_string_ = NULL;
69
70
71void RegExpImpl::NewSpaceCollectionPrologue() {
72 // The two byte string is always in the old space. The Ascii string may be
73 // in either place. If it is in the old space we don't need to do anything.
74 if (Heap::InNewSpace(last_ascii_string_)) {
75 // Invalidate the cache.
76 last_ascii_string_ = NULL;
77 two_byte_cached_string_ = NULL;
78 }
79}
80
81
82void RegExpImpl::OldSpaceCollectionPrologue() {
83 last_ascii_string_ = NULL;
84 two_byte_cached_string_ = NULL;
85}
86
87
88Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<String> pattern,
89 Handle<String> flags,
90 bool* has_pending_exception) {
91 // Ensure that RegExp has been loaded.
92 if (!Top::regexp_function()->IsLoaded()) {
93 LoadLazy(Top::regexp_function(), has_pending_exception);
94 if (*has_pending_exception) return Handle<Object>(Failure::Exception());
95 }
96 // Call the construct code with 2 arguments.
97 Object** argv[2] = { Handle<Object>::cast(pattern).location(),
98 Handle<Object>::cast(flags).location() };
99 return Execution::New(Top::regexp_function(), 2, argv, has_pending_exception);
100}
101
102
103// Converts a source string to a 16 bit flat string or a SlicedString containing
104// a 16 bit flat string).
105Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) {
106 if (*subject == last_ascii_string_) {
107 ASSERT(two_byte_cached_string_ != NULL);
108 return Handle<String>(String::cast(two_byte_cached_string_));
109 }
110 Handle<String> two_byte_string = StringToTwoByte(subject);
111 last_ascii_string_ = *subject;
112 two_byte_cached_string_ = *two_byte_string;
113 return two_byte_string;
114}
115
116
117// Converts a source string to a 16 bit flat string or a SlicedString containing
118// a 16 bit flat string).
119Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) {
120 if (!pattern->IsFlat()) {
121 FlattenString(pattern);
122 }
123 Handle<String> flat_string(pattern->IsConsString() ?
124 String::cast(ConsString::cast(*pattern)->first()) :
125 *pattern);
126 ASSERT(!flat_string->IsConsString());
127 ASSERT(flat_string->IsSeqString() || flat_string->IsSlicedString() ||
128 flat_string->IsExternalString());
129 if (!flat_string->IsAscii()) {
130 return flat_string;
131 }
132
133 Handle<String> two_byte_string =
134 Factory::NewRawTwoByteString(flat_string->length(), TENURED);
135 static StringInputBuffer convert_to_two_byte_buffer;
136 convert_to_two_byte_buffer.Reset(*flat_string);
137 for (int i = 0; convert_to_two_byte_buffer.has_more(); i++) {
138 two_byte_string->Set(i, convert_to_two_byte_buffer.GetNext());
139 }
140 return two_byte_string;
141}
142
143
144Handle<Object> RegExpImpl::JsreCompile(Handle<JSValue> re,
145 Handle<String> pattern,
146 Handle<String> flags) {
147 JSRegExpIgnoreCaseOption case_option = JSRegExpDoNotIgnoreCase;
148 JSRegExpMultilineOption multiline_option = JSRegExpSingleLine;
149 FlattenString(flags);
150 for (int i = 0; i < flags->length(); i++) {
151 if (flags->Get(i) == 'i') case_option = JSRegExpIgnoreCase;
152 if (flags->Get(i) == 'm') multiline_option = JSRegExpMultiline;
153 }
154
155 Handle<String> two_byte_pattern = StringToTwoByte(pattern);
156
157 unsigned number_of_captures;
158 const char* error_message = NULL;
159
160 malloc_failure = Failure::Exception();
161 JSRegExp* code = jsRegExpCompile(two_byte_pattern->GetTwoByteData(),
162 pattern->length(), case_option,
163 multiline_option, &number_of_captures,
164 &error_message, &JSREMalloc, &JSREFree);
165
166 if (code == NULL && malloc_failure->IsRetryAfterGC()) {
167 // Performs a GC, then retries.
168 if (!Heap::CollectGarbage(malloc_failure->requested(),
169 malloc_failure->allocation_space())) {
170 // TODO(1181417): Fix this.
171 V8::FatalProcessOutOfMemory("RegExpImpl::JsreCompile");
172 }
173 malloc_failure = Failure::Exception();
174 code = jsRegExpCompile(two_byte_pattern->GetTwoByteData(),
175 pattern->length(), case_option,
176 multiline_option, &number_of_captures,
177 &error_message, &JSREMalloc, &JSREFree);
178 if (code == NULL && malloc_failure->IsRetryAfterGC()) {
179 // TODO(1181417): Fix this.
180 V8::FatalProcessOutOfMemory("RegExpImpl::JsreCompile");
181 }
182 }
183
184 if (error_message != NULL) {
185 // Throw an exception.
186 SmartPointer<char> char_pattern =
187 two_byte_pattern->ToCString(DISALLOW_NULLS);
188 Handle<JSArray> array = Factory::NewJSArray(2);
189 SetElement(array, 0, Factory::NewStringFromUtf8(CStrVector(*char_pattern)));
190 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(error_message)));
191 Handle<Object> regexp_err =
192 Factory::NewSyntaxError("malformed_regexp", array);
193 return Handle<Object>(Top::Throw(*regexp_err));
194 }
195
196 ASSERT(code != NULL);
197
198 // Convert the return address to a ByteArray pointer.
199 Handle<ByteArray> internal(
200 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
201
202 Handle<FixedArray> value = Factory::NewFixedArray(2);
203 value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
204 value->set(INTERNAL_INDEX, *internal);
205 re->set_value(*value);
206
207 return re;
208}
209
210
211Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSValue> regexp,
212 int num_captures,
213 Handle<String> subject,
214 int previous_index,
215 const uc16* two_byte_subject,
216 int* offsets_vector,
217 int offsets_vector_length) {
218 int rc;
219 {
220 AssertNoAllocation a;
221 ByteArray* internal = JsreInternal(regexp);
222 const JSRegExp* js_regexp =
223 reinterpret_cast<JSRegExp*>(internal->GetDataStartAddress());
224
225 rc = jsRegExpExecute(js_regexp, two_byte_subject,
226 subject->length(),
227 previous_index,
228 offsets_vector,
229 offsets_vector_length);
230 }
231
232 // The KJS JavaScript engine returns null (ie, a failed match) when
233 // JSRE's internal match limit is exceeded. We duplicate that behavior here.
234 if (rc == JSRegExpErrorNoMatch
235 || rc == JSRegExpErrorHitLimit) {
236 return Factory::null_value();
237 }
238
239 // Other JSRE errors:
240 if (rc < 0) {
241 // Throw an exception.
242 Handle<Object> code(Smi::FromInt(rc));
243 Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };
244 Handle<Object> regexp_err(
245 Factory::NewTypeError("jsre_error", HandleVector(args, 2)));
246 return Handle<Object>(Top::Throw(*regexp_err));
247 }
248
249 Handle<JSArray> result = Factory::NewJSArray(2 * (num_captures+1));
250
251 // The captures come in (start, end+1) pairs.
252 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
253 SetElement(result, i, Handle<Object>(Smi::FromInt(offsets_vector[i])));
254 SetElement(result, i+1, Handle<Object>(Smi::FromInt(offsets_vector[i+1])));
255 }
256
257 return result;
258}
259
260
261class OffsetsVector {
262 public:
263 inline OffsetsVector(int num_captures) {
264 offsets_vector_length_ = (num_captures + 1) * 3;
265 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
266 vector_ = NewArray<int>(offsets_vector_length_);
267 } else {
268 vector_ = static_offsets_vector_;
269 }
270 }
271
272
273 inline ~OffsetsVector() {
274 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
275 DeleteArray(vector_);
276 vector_ = NULL;
277 }
278 }
279
280
281 inline int* vector() {
282 return vector_;
283 }
284
285
286 inline int length() {
287 return offsets_vector_length_;
288 }
289
290 private:
291 int* vector_;
292 int offsets_vector_length_;
293 static const int kStaticOffsetsVectorSize = 30;
294 static int static_offsets_vector_[kStaticOffsetsVectorSize];
295};
296
297
298int OffsetsVector::static_offsets_vector_[
299 OffsetsVector::kStaticOffsetsVectorSize];
300
301
302Handle<Object> RegExpImpl::JsreExec(Handle<JSValue> regexp,
303 Handle<String> subject,
304 Handle<Object> index) {
305 // Prepare space for the return values.
306 int num_captures = JsreCapture(regexp);
307
308 OffsetsVector offsets(num_captures);
309
310 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
311
312 Handle<String> subject16 = CachedStringToTwoByte(subject);
313
314 Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
315 previous_index,
316 subject16->GetTwoByteData(),
317 offsets.vector(), offsets.length()));
318
319 return result;
320}
321
322
323Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSValue> regexp,
324 Handle<String> subject) {
325 // Prepare space for the return values.
326 int num_captures = JsreCapture(regexp);
327
328 OffsetsVector offsets(num_captures);
329
330 int previous_index = 0;
331
332 Handle<JSArray> result = Factory::NewJSArray(0);
333 int i = 0;
334 Handle<Object> matches;
335
336 Handle<String> subject16 = CachedStringToTwoByte(subject);
337
338 do {
339 if (previous_index > subject->length() || previous_index < 0) {
340 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
341 // string length, there is no match.
342 matches = Factory::null_value();
343 } else {
344 matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
345 subject16->GetTwoByteData(),
346 offsets.vector(), offsets.length());
347
348 if (matches->IsJSArray()) {
349 SetElement(result, i, matches);
350 i++;
351 previous_index = offsets.vector()[1];
352 if (offsets.vector()[0] == offsets.vector()[1]) {
353 previous_index++;
354 }
355 }
356 }
357 } while (matches->IsJSArray());
358
359 // If we exited the loop with an exception, throw it.
360 if (matches->IsNull()) { // Exited loop normally.
361 return result;
362 } else { // Exited loop with the exception in matches.
363 return matches;
364 }
365}
366
367
368int RegExpImpl::JsreCapture(Handle<JSValue> re) {
369 Object* value = re->value();
370 ASSERT(value->IsFixedArray());
371 return Smi::cast(FixedArray::cast(value)->get(CAPTURE_INDEX))->value();
372}
373
374
375ByteArray* RegExpImpl::JsreInternal(Handle<JSValue> re) {
376 Object* value = re->value();
377 ASSERT(value->IsFixedArray());
378 return ByteArray::cast(FixedArray::cast(value)->get(INTERNAL_INDEX));
379}
380
381}} // namespace v8::internal