Blame - src/jsregexp.cc - fp2-dev/platform/external/v8

blob: e7aa8602ded02e1cf006d5a0c7e76f2f11c4f988 [file] [log] [blame]

Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1	// Copyright 2006-2009 the V8 project authors. All rights reserved.
				2	// Redistribution and use in source and binary forms, with or without
				3	// modification, are permitted provided that the following conditions are
				4	// met:
				5	//
				6	// * Redistributions of source code must retain the above copyright
				7	// notice, this list of conditions and the following disclaimer.
				8	// * Redistributions in binary form must reproduce the above
				9	// copyright notice, this list of conditions and the following
				10	// disclaimer in the documentation and/or other materials provided
				11	// with the distribution.
				12	// * Neither the name of Google Inc. nor the names of its
				13	// contributors may be used to endorse or promote products derived
				14	// from this software without specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				20	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				21	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				22	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				23	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				24	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				25	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				26	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27
				28	#include "v8.h"
				29
				30	#include "ast.h"
				31	#include "compiler.h"
				32	#include "execution.h"
				33	#include "factory.h"
				34	#include "jsregexp.h"
				35	#include "platform.h"
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	36	#include "string-search.h"
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	37	#include "runtime.h"
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	38	#include "compilation-cache.h"
				39	#include "string-stream.h"
				40	#include "parser.h"
				41	#include "regexp-macro-assembler.h"
				42	#include "regexp-macro-assembler-tracer.h"
				43	#include "regexp-macro-assembler-irregexp.h"
				44	#include "regexp-stack.h"
				45
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	46	#ifndef V8_INTERPRETED_REGEXP
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	47	#if V8_TARGET_ARCH_IA32
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	48	#include "ia32/regexp-macro-assembler-ia32.h"
				49	#elif V8_TARGET_ARCH_X64
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	50	#include "x64/regexp-macro-assembler-x64.h"
				51	#elif V8_TARGET_ARCH_ARM
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	52	#include "arm/regexp-macro-assembler-arm.h"
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	53	#elif V8_TARGET_ARCH_MIPS
				54	#include "mips/regexp-macro-assembler-mips.h"
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	55	#else
				56	#error Unsupported target architecture.
				57	#endif
				58	#endif
				59
				60	#include "interpreter-irregexp.h"
				61
				62
				63	namespace v8 {
				64	namespace internal {
				65
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	66	Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
				67	Handle<String> pattern,
				68	Handle<String> flags,
				69	bool* has_pending_exception) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	70	// Call the construct code with 2 arguments.
				71	Object** argv[2] = { Handle<Object>::cast(pattern).location(),
				72	Handle<Object>::cast(flags).location() };
				73	return Execution::New(constructor, 2, argv, has_pending_exception);
				74	}
				75
				76
				77	static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
				78	int flags = JSRegExp::NONE;
				79	for (int i = 0; i < str->length(); i++) {
				80	switch (str->Get(i)) {
				81	case 'i':
				82	flags \|= JSRegExp::IGNORE_CASE;
				83	break;
				84	case 'g':
				85	flags \|= JSRegExp::GLOBAL;
				86	break;
				87	case 'm':
				88	flags \|= JSRegExp::MULTILINE;
				89	break;
				90	}
				91	}
				92	return JSRegExp::Flags(flags);
				93	}
				94
				95
				96	static inline void ThrowRegExpException(Handle<JSRegExp> re,
				97	Handle<String> pattern,
				98	Handle<String> error_text,
				99	const char* message) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	100	Isolate* isolate = re->GetIsolate();
				101	Factory* factory = isolate->factory();
				102	Handle<FixedArray> elements = factory->NewFixedArray(2);
Ben Murdoch	e0cee9b	2011-05-25 10:26:03 +0100	[diff] [blame]	103	elements->set(0, *pattern);
				104	elements->set(1, *error_text);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	105	Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
				106	Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
				107	isolate->Throw(*regexp_err);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	108	}
				109
				110
				111	// Generic RegExp methods. Dispatches to implementation specific methods.
				112
				113
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	114	Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
				115	Handle<String> pattern,
				116	Handle<String> flag_str) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	117	Isolate* isolate = re->GetIsolate();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	118	JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	119	CompilationCache* compilation_cache = isolate->compilation_cache();
				120	Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	121	bool in_cache = !cached.is_null();
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	122	LOG(isolate, RegExpCompileEvent(re, in_cache));
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	123
				124	Handle<Object> result;
				125	if (in_cache) {
				126	re->set_data(*cached);
				127	return re;
				128	}
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	129	pattern = FlattenGetString(pattern);
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	130	CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	131	PostponeInterruptsScope postpone(isolate);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	132	RegExpCompileData parse_result;
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	133	FlatStringReader reader(isolate, pattern);
Teng-Hui Zhu	3e5fa29	2010-11-09 16:16:48 -0800	[diff] [blame]	134	if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
				135	&parse_result)) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	136	// Throw an exception if we fail to parse the pattern.
				137	ThrowRegExpException(re,
				138	pattern,
				139	parse_result.error,
				140	"malformed_regexp");
				141	return Handle<Object>::null();
				142	}
				143
				144	if (parse_result.simple && !flags.is_ignore_case()) {
				145	// Parse-tree is a single atom that is equal to the pattern.
				146	AtomCompile(re, pattern, flags, pattern);
				147	} else if (parse_result.tree->IsAtom() &&
				148	!flags.is_ignore_case() &&
				149	parse_result.capture_count == 0) {
				150	RegExpAtom* atom = parse_result.tree->AsAtom();
				151	Vector<const uc16> atom_pattern = atom->data();
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	152	Handle<String> atom_string =
				153	isolate->factory()->NewStringFromTwoByte(atom_pattern);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	154	AtomCompile(re, pattern, flags, atom_string);
				155	} else {
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	156	IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	157	}
				158	ASSERT(re->data()->IsFixedArray());
				159	// Compilation succeeded so the data is set on the regexp
				160	// and we can store it in the cache.
				161	Handle<FixedArray> data(FixedArray::cast(re->data()));
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	162	compilation_cache->PutRegExp(pattern, flags, data);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	163
				164	return re;
				165	}
				166
				167
				168	Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
				169	Handle<String> subject,
				170	int index,
				171	Handle<JSArray> last_match_info) {
				172	switch (regexp->TypeTag()) {
				173	case JSRegExp::ATOM:
				174	return AtomExec(regexp, subject, index, last_match_info);
				175	case JSRegExp::IRREGEXP: {
				176	Handle<Object> result =
				177	IrregexpExec(regexp, subject, index, last_match_info);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	178	ASSERT(!result.is_null() \|\| Isolate::Current()->has_pending_exception());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	179	return result;
				180	}
				181	default:
				182	UNREACHABLE();
				183	return Handle<Object>::null();
				184	}
				185	}
				186
				187
				188	// RegExp Atom implementation: Simple string search using indexOf.
				189
				190
				191	void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
				192	Handle<String> pattern,
				193	JSRegExp::Flags flags,
				194	Handle<String> match_pattern) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	195	re->GetIsolate()->factory()->SetRegExpAtomData(re,
				196	JSRegExp::ATOM,
				197	pattern,
				198	flags,
				199	match_pattern);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	200	}
				201
				202
				203	static void SetAtomLastCapture(FixedArray* array,
				204	String* subject,
				205	int from,
				206	int to) {
				207	NoHandleAllocation no_handles;
				208	RegExpImpl::SetLastCaptureCount(array, 2);
				209	RegExpImpl::SetLastSubject(array, subject);
				210	RegExpImpl::SetLastInput(array, subject);
				211	RegExpImpl::SetCapture(array, 0, from);
				212	RegExpImpl::SetCapture(array, 1, to);
				213	}
				214
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	215	/* template <typename SubjectChar>, typename PatternChar>
				216	static int ReStringMatch(Vector<const SubjectChar> sub_vector,
				217	Vector<const PatternChar> pat_vector,
				218	int start_index) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	219
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	220	int pattern_length = pat_vector.length();
				221	if (pattern_length == 0) return start_index;
				222
				223	int subject_length = sub_vector.length();
				224	if (start_index + pattern_length > subject_length) return -1;
				225	return SearchString(sub_vector, pat_vector, start_index);
				226	}
				227	*/
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	228	Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
				229	Handle<String> subject,
				230	int index,
				231	Handle<JSArray> last_match_info) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	232	Isolate* isolate = re->GetIsolate();
				233
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	234	ASSERT(0 <= index);
				235	ASSERT(index <= subject->length());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	236
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	237	if (!subject->IsFlat()) FlattenString(subject);
				238	AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
				239	// Extract flattened substrings of cons strings before determining asciiness.
				240	String* seq_sub = *subject;
				241	if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	242
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	243	String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
				244	int needle_len = needle->length();
				245
				246	if (needle_len != 0) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	247	if (index + needle_len > subject->length())
				248	return isolate->factory()->null_value();
				249
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	250	// dispatch on type of strings
				251	index = (needle->IsAsciiRepresentation()
				252	? (seq_sub->IsAsciiRepresentation()
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	253	? SearchString(isolate,
				254	seq_sub->ToAsciiVector(),
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	255	needle->ToAsciiVector(),
				256	index)
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	257	: SearchString(isolate,
				258	seq_sub->ToUC16Vector(),
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	259	needle->ToAsciiVector(),
				260	index))
				261	: (seq_sub->IsAsciiRepresentation()
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	262	? SearchString(isolate,
				263	seq_sub->ToAsciiVector(),
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	264	needle->ToUC16Vector(),
				265	index)
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	266	: SearchString(isolate,
				267	seq_sub->ToUC16Vector(),
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	268	needle->ToUC16Vector(),
				269	index)));
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	270	if (index == -1) return FACTORY->null_value();
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	271	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	272	ASSERT(last_match_info->HasFastElements());
				273
				274	{
				275	NoHandleAllocation no_handles;
				276	FixedArray* array = FixedArray::cast(last_match_info->elements());
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	277	SetAtomLastCapture(array, *subject, index, index + needle_len);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	278	}
				279	return last_match_info;
				280	}
				281
				282
				283	// Irregexp implementation.
				284
				285	// Ensures that the regexp object contains a compiled version of the
				286	// source for either ASCII or non-ASCII strings.
				287	// If the compiled version doesn't already exist, it is compiled
				288	// from the source pattern.
				289	// If compilation fails, an exception is thrown and this function
				290	// returns false.
				291	bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
				292	Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	293	#ifdef V8_INTERPRETED_REGEXP
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	294	if (compiled_code->IsByteArray()) return true;
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	295	#else // V8_INTERPRETED_REGEXP (RegExp native code)
				296	if (compiled_code->IsCode()) return true;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	297	#endif
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	298	// We could potentially have marked this as flushable, but have kept
				299	// a saved version if we did not flush it yet.
				300	Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
				301	if (saved_code->IsCode()) {
				302	// Reinstate the code in the original place.
				303	re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
				304	ASSERT(compiled_code->IsSmi());
				305	return true;
				306	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	307	return CompileIrregexp(re, is_ascii);
				308	}
				309
				310
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	311	static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
				312	bool is_ascii,
				313	Handle<String> error_message,
				314	Isolate* isolate) {
				315	Factory* factory = isolate->factory();
				316	Handle<FixedArray> elements = factory->NewFixedArray(2);
				317	elements->set(0, re->Pattern());
				318	elements->set(1, *error_message);
				319	Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
				320	Handle<Object> regexp_err =
				321	factory->NewSyntaxError("malformed_regexp", array);
				322	isolate->Throw(*regexp_err);
				323	return false;
				324	}
				325
				326
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	327	bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
				328	// Compile the RegExp.
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	329	Isolate* isolate = re->GetIsolate();
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	330	CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	331	PostponeInterruptsScope postpone(isolate);
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	332	// If we had a compilation error the last time this is saved at the
				333	// saved code index.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	334	Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	335	// When arriving here entry can only be a smi, either representing an
				336	// uncompiled regexp, a previous compilation error, or code that has
				337	// been flushed.
				338	ASSERT(entry->IsSmi());
				339	int entry_value = Smi::cast(entry)->value();
				340	ASSERT(entry_value == JSRegExp::kUninitializedValue \|\|
				341	entry_value == JSRegExp::kCompilationErrorValue \|\|
				342	(entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
				343
				344	if (entry_value == JSRegExp::kCompilationErrorValue) {
				345	// A previous compilation failed and threw an error which we store in
				346	// the saved code index (we store the error message, not the actual
				347	// error). Recreate the error object and throw it.
				348	Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
				349	ASSERT(error_string->IsString());
				350	Handle<String> error_message(String::cast(error_string));
				351	CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	352	return false;
				353	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	354
				355	JSRegExp::Flags flags = re->GetFlags();
				356
				357	Handle<String> pattern(re->Pattern());
				358	if (!pattern->IsFlat()) {
				359	FlattenString(pattern);
				360	}
				361
				362	RegExpCompileData compile_data;
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	363	FlatStringReader reader(isolate, pattern);
Teng-Hui Zhu	3e5fa29	2010-11-09 16:16:48 -0800	[diff] [blame]	364	if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
				365	&compile_data)) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	366	// Throw an exception if we fail to parse the pattern.
				367	// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
				368	ThrowRegExpException(re,
				369	pattern,
				370	compile_data.error,
				371	"malformed_regexp");
				372	return false;
				373	}
				374	RegExpEngine::CompilationResult result =
				375	RegExpEngine::Compile(&compile_data,
				376	flags.is_ignore_case(),
				377	flags.is_multiline(),
				378	pattern,
				379	is_ascii);
				380	if (result.error_message != NULL) {
				381	// Unable to compile regexp.
Ben Murdoch	e0cee9b	2011-05-25 10:26:03 +0100	[diff] [blame]	382	Handle<String> error_message =
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	383	isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
				384	CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	385	return false;
				386	}
				387
				388	Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
				389	data->set(JSRegExp::code_index(is_ascii), result.code);
				390	int register_max = IrregexpMaxRegisterCount(*data);
				391	if (result.num_registers > register_max) {
				392	SetIrregexpMaxRegisterCount(*data, result.num_registers);
				393	}
				394
				395	return true;
				396	}
				397
				398
				399	int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {
				400	return Smi::cast(
				401	re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
				402	}
				403
				404
				405	void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {
				406	re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
				407	}
				408
				409
				410	int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
				411	return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
				412	}
				413
				414
				415	int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
				416	return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
				417	}
				418
				419
				420	ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
				421	return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
				422	}
				423
				424
				425	Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
				426	return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
				427	}
				428
				429
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	430	void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
				431	Handle<String> pattern,
				432	JSRegExp::Flags flags,
				433	int capture_count) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	434	// Initialize compiled code entries to null.
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	435	re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
				436	JSRegExp::IRREGEXP,
				437	pattern,
				438	flags,
				439	capture_count);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	440	}
				441
				442
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	443	int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
				444	Handle<String> subject) {
				445	if (!subject->IsFlat()) {
				446	FlattenString(subject);
				447	}
Steve Block	8defd9f	2010-07-08 12:39:36 +0100	[diff] [blame]	448	// Check the asciiness of the underlying storage.
				449	bool is_ascii;
				450	{
				451	AssertNoAllocation no_gc;
				452	String* sequential_string = *subject;
				453	if (subject->IsConsString()) {
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	454	sequential_string = ConsString::cast(*subject)->first();
Steve Block	8defd9f	2010-07-08 12:39:36 +0100	[diff] [blame]	455	}
				456	is_ascii = sequential_string->IsAsciiRepresentation();
				457	}
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	458	if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
				459	return -1;
				460	}
				461	#ifdef V8_INTERPRETED_REGEXP
				462	// Byte-code regexp needs space allocated for all its registers.
				463	return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
				464	#else // V8_INTERPRETED_REGEXP
				465	// Native regexp only needs room to output captures. Registers are handled
				466	// internally.
				467	return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
				468	#endif // V8_INTERPRETED_REGEXP
				469	}
				470
				471
Steve Block	791712a	2010-08-27 10:21:07 +0100	[diff] [blame]	472	RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
				473	Handle<JSRegExp> regexp,
				474	Handle<String> subject,
				475	int index,
Ben Murdoch	b8e0da2	2011-05-16 14:20:40 +0100	[diff] [blame]	476	Vector<int> output) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	477	Isolate* isolate = regexp->GetIsolate();
				478
				479	Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	480
				481	ASSERT(index >= 0);
				482	ASSERT(index <= subject->length());
				483	ASSERT(subject->IsFlat());
				484
Steve Block	8defd9f	2010-07-08 12:39:36 +0100	[diff] [blame]	485	// A flat ASCII string might have a two-byte first part.
				486	if (subject->IsConsString()) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	487	subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
Steve Block	8defd9f	2010-07-08 12:39:36 +0100	[diff] [blame]	488	}
				489
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	490	#ifndef V8_INTERPRETED_REGEXP
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	491	ASSERT(output.length() >= (IrregexpNumberOfCaptures(irregexp) + 1) 2);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	492	do {
				493	bool is_ascii = subject->IsAsciiRepresentation();
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	494	EnsureCompiledIrregexp(regexp, is_ascii);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	495	Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	496	NativeRegExpMacroAssembler::Result res =
				497	NativeRegExpMacroAssembler::Match(code,
				498	subject,
				499	output.start(),
				500	output.length(),
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	501	index,
				502	isolate);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	503	if (res != NativeRegExpMacroAssembler::RETRY) {
				504	ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION \|\|
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	505	isolate->has_pending_exception());
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	506	STATIC_ASSERT(
				507	static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
				508	STATIC_ASSERT(
				509	static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
				510	STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
				511	== RE_EXCEPTION);
				512	return static_cast<IrregexpResult>(res);
				513	}
				514	// If result is RETRY, the string has changed representation, and we
				515	// must restart from scratch.
				516	// In this case, it means we must make sure we are prepared to handle
Steve Block	8defd9f	2010-07-08 12:39:36 +0100	[diff] [blame]	517	// the, potentially, different subject (the string can switch between
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	518	// being internal and external, and even between being ASCII and UC16,
				519	// but the characters are always the same).
				520	IrregexpPrepare(regexp, subject);
				521	} while (true);
				522	UNREACHABLE();
				523	return RE_EXCEPTION;
				524	#else // V8_INTERPRETED_REGEXP
				525
				526	ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
				527	bool is_ascii = subject->IsAsciiRepresentation();
				528	// We must have done EnsureCompiledIrregexp, so we can get the number of
				529	// registers.
				530	int* register_vector = output.start();
				531	int number_of_capture_registers =
				532	(IrregexpNumberOfCaptures(irregexp) + 1) 2;
				533	for (int i = number_of_capture_registers - 1; i >= 0; i--) {
				534	register_vector[i] = -1;
				535	}
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	536	Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	537
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	538	if (IrregexpInterpreter::Match(isolate,
				539	byte_codes,
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	540	subject,
				541	register_vector,
				542	index)) {
				543	return RE_SUCCESS;
				544	}
				545	return RE_FAILURE;
				546	#endif // V8_INTERPRETED_REGEXP
				547	}
				548
				549
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	550	Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
				551	Handle<String> subject,
				552	int previous_index,
				553	Handle<JSArray> last_match_info) {
				554	ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
				555
				556	// Prepare space for the return values.
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	557	#ifdef V8_INTERPRETED_REGEXP
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	558	#ifdef DEBUG
				559	if (FLAG_trace_regexp_bytecodes) {
				560	String* pattern = jsregexp->Pattern();
				561	PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
				562	PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
				563	}
				564	#endif
				565	#endif
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	566	int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
				567	if (required_registers < 0) {
				568	// Compiling failed with an exception.
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	569	ASSERT(Isolate::Current()->has_pending_exception());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	570	return Handle<Object>::null();
				571	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	572
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	573	OffsetsVector registers(required_registers);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	574
Iain Merrick	7568138	2010-08-19 15:07:18 +0100	[diff] [blame]	575	IrregexpResult res = RegExpImpl::IrregexpExecOnce(
Ben Murdoch	b8e0da2	2011-05-16 14:20:40 +0100	[diff] [blame]	576	jsregexp, subject, previous_index, Vector<int>(registers.vector(),
				577	registers.length()));
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	578	if (res == RE_SUCCESS) {
				579	int capture_register_count =
				580	(IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
				581	last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
				582	AssertNoAllocation no_gc;
				583	int* register_vector = registers.vector();
				584	FixedArray* array = FixedArray::cast(last_match_info->elements());
				585	for (int i = 0; i < capture_register_count; i += 2) {
				586	SetCapture(array, i, register_vector[i]);
				587	SetCapture(array, i + 1, register_vector[i + 1]);
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	588	}
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	589	SetLastCaptureCount(array, capture_register_count);
				590	SetLastSubject(array, *subject);
				591	SetLastInput(array, *subject);
				592	return last_match_info;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	593	}
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	594	if (res == RE_EXCEPTION) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	595	ASSERT(Isolate::Current()->has_pending_exception());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	596	return Handle<Object>::null();
				597	}
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	598	ASSERT(res == RE_FAILURE);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	599	return Isolate::Current()->factory()->null_value();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	600	}
				601
				602
				603	// -------------------------------------------------------------------
				604	// Implementation of the Irregexp regular expression engine.
				605	//
				606	// The Irregexp regular expression engine is intended to be a complete
				607	// implementation of ECMAScript regular expressions. It generates either
				608	// bytecodes or native code.
				609
				610	// The Irregexp regexp engine is structured in three steps.
				611	// 1) The parser generates an abstract syntax tree. See ast.cc.
				612	// 2) From the AST a node network is created. The nodes are all
				613	// subclasses of RegExpNode. The nodes represent states when
				614	// executing a regular expression. Several optimizations are
				615	// performed on the node network.
				616	// 3) From the nodes we generate either byte codes or native code
				617	// that can actually execute the regular expression (perform
				618	// the search). The code generation step is described in more
				619	// detail below.
				620
				621	// Code generation.
				622	//
				623	// The nodes are divided into four main categories.
				624	// * Choice nodes
				625	// These represent places where the regular expression can
				626	// match in more than one way. For example on entry to an
				627	// alternation (foo\|bar) or a repetition (*, +, ? or {}).
				628	// * Action nodes
				629	// These represent places where some action should be
				630	// performed. Examples include recording the current position
				631	// in the input string to a register (in order to implement
				632	// captures) or other actions on register for example in order
				633	// to implement the counters needed for {} repetitions.
				634	// * Matching nodes
				635	// These attempt to match some element part of the input string.
				636	// Examples of elements include character classes, plain strings
				637	// or back references.
				638	// * End nodes
				639	// These are used to implement the actions required on finding
				640	// a successful match or failing to find a match.
				641	//
				642	// The code generated (whether as byte codes or native code) maintains
				643	// some state as it runs. This consists of the following elements:
				644	//
				645	// * The capture registers. Used for string captures.
				646	// * Other registers. Used for counters etc.
				647	// * The current position.
				648	// * The stack of backtracking information. Used when a matching node
				649	// fails to find a match and needs to try an alternative.
				650	//
				651	// Conceptual regular expression execution model:
				652	//
				653	// There is a simple conceptual model of regular expression execution
				654	// which will be presented first. The actual code generated is a more
				655	// efficient simulation of the simple conceptual model:
				656	//
				657	// * Choice nodes are implemented as follows:
				658	// For each choice except the last {
				659	// push current position
				660	// push backtrack code location
				661	// <generate code to test for choice>
				662	// backtrack code location:
				663	// pop current position
				664	// }
				665	// <generate code to test for last choice>
				666	//
				667	// * Actions nodes are generated as follows
				668	// <push affected registers on backtrack stack>
				669	// <generate code to perform action>
				670	// push backtrack code location
				671	// <generate code to test for following nodes>
				672	// backtrack code location:
				673	// <pop affected registers to restore their state>
				674	// <pop backtrack location from stack and go to it>
				675	//
				676	// * Matching nodes are generated as follows:
				677	// if input string matches at current position
				678	// update current position
				679	// <generate code to test for following nodes>
				680	// else
				681	// <pop backtrack location from stack and go to it>
				682	//
				683	// Thus it can be seen that the current position is saved and restored
				684	// by the choice nodes, whereas the registers are saved and restored by
				685	// by the action nodes that manipulate them.
				686	//
				687	// The other interesting aspect of this model is that nodes are generated
				688	// at the point where they are needed by a recursive call to Emit(). If
				689	// the node has already been code generated then the Emit() call will
				690	// generate a jump to the previously generated code instead. In order to
				691	// limit recursion it is possible for the Emit() function to put the node
				692	// on a work list for later generation and instead generate a jump. The
				693	// destination of the jump is resolved later when the code is generated.
				694	//
				695	// Actual regular expression code generation.
				696	//
				697	// Code generation is actually more complicated than the above. In order
				698	// to improve the efficiency of the generated code some optimizations are
				699	// performed
				700	//
				701	// * Choice nodes have 1-character lookahead.
				702	// A choice node looks at the following character and eliminates some of
				703	// the choices immediately based on that character. This is not yet
				704	// implemented.
				705	// * Simple greedy loops store reduced backtracking information.
				706	// A quantifier like /.*foo/m will greedily match the whole input. It will
				707	// then need to backtrack to a point where it can match "foo". The naive
				708	// implementation of this would push each character position onto the
				709	// backtracking stack, then pop them off one by one. This would use space
				710	// proportional to the length of the input string. However since the "."
				711	// can only match in one way and always has a constant length (in this case
				712	// of 1) it suffices to store the current position on the top of the stack
				713	// once. Matching now becomes merely incrementing the current position and
				714	// backtracking becomes decrementing the current position and checking the
				715	// result against the stored current position. This is faster and saves
				716	// space.
				717	// * The current state is virtualized.
				718	// This is used to defer expensive operations until it is clear that they
				719	// are needed and to generate code for a node more than once, allowing
				720	// specialized an efficient versions of the code to be created. This is
				721	// explained in the section below.
				722	//
				723	// Execution state virtualization.
				724	//
				725	// Instead of emitting code, nodes that manipulate the state can record their
				726	// manipulation in an object called the Trace. The Trace object can record a
				727	// current position offset, an optional backtrack code location on the top of
				728	// the virtualized backtrack stack and some register changes. When a node is
				729	// to be emitted it can flush the Trace or update it. Flushing the Trace
				730	// will emit code to bring the actual state into line with the virtual state.
				731	// Avoiding flushing the state can postpone some work (eg updates of capture
				732	// registers). Postponing work can save time when executing the regular
				733	// expression since it may be found that the work never has to be done as a
				734	// failure to match can occur. In addition it is much faster to jump to a
				735	// known backtrack code location than it is to pop an unknown backtrack
				736	// location from the stack and jump there.
				737	//
				738	// The virtual state found in the Trace affects code generation. For example
				739	// the virtual state contains the difference between the actual current
				740	// position and the virtual current position, and matching code needs to use
				741	// this offset to attempt a match in the correct location of the input
				742	// string. Therefore code generated for a non-trivial trace is specialized
				743	// to that trace. The code generator therefore has the ability to generate
				744	// code for each node several times. In order to limit the size of the
				745	// generated code there is an arbitrary limit on how many specialized sets of
				746	// code may be generated for a given node. If the limit is reached, the
				747	// trace is flushed and a generic version of the code for a node is emitted.
				748	// This is subsequently used for that node. The code emitted for non-generic
				749	// trace is not recorded in the node and so it cannot currently be reused in
				750	// the event that code generation is requested for an identical trace.
				751
				752
				753	void RegExpTree::AppendToText(RegExpText* text) {
				754	UNREACHABLE();
				755	}
				756
				757
				758	void RegExpAtom::AppendToText(RegExpText* text) {
				759	text->AddElement(TextElement::Atom(this));
				760	}
				761
				762
				763	void RegExpCharacterClass::AppendToText(RegExpText* text) {
				764	text->AddElement(TextElement::CharClass(this));
				765	}
				766
				767
				768	void RegExpText::AppendToText(RegExpText* text) {
				769	for (int i = 0; i < elements()->length(); i++)
				770	text->AddElement(elements()->at(i));
				771	}
				772
				773
				774	TextElement TextElement::Atom(RegExpAtom* atom) {
				775	TextElement result = TextElement(ATOM);
				776	result.data.u_atom = atom;
				777	return result;
				778	}
				779
				780
				781	TextElement TextElement::CharClass(
				782	RegExpCharacterClass* char_class) {
				783	TextElement result = TextElement(CHAR_CLASS);
				784	result.data.u_char_class = char_class;
				785	return result;
				786	}
				787
				788
				789	int TextElement::length() {
				790	if (type == ATOM) {
				791	return data.u_atom->length();
				792	} else {
				793	ASSERT(type == CHAR_CLASS);
				794	return 1;
				795	}
				796	}
				797
				798
				799	DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
				800	if (table_ == NULL) {
				801	table_ = new DispatchTable();
				802	DispatchTableConstructor cons(table_, ignore_case);
				803	cons.BuildTable(this);
				804	}
				805	return table_;
				806	}
				807
				808
				809	class RegExpCompiler {
				810	public:
				811	RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);
				812
				813	int AllocateRegister() {
				814	if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
				815	reg_exp_too_big_ = true;
				816	return next_register_;
				817	}
				818	return next_register_++;
				819	}
				820
				821	RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
				822	RegExpNode* start,
				823	int capture_count,
				824	Handle<String> pattern);
				825
				826	inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
				827
				828	static const int kImplementationOffset = 0;
				829	static const int kNumberOfRegistersOffset = 0;
				830	static const int kCodeOffset = 1;
				831
				832	RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
				833	EndNode* accept() { return accept_; }
				834
				835	static const int kMaxRecursion = 100;
				836	inline int recursion_depth() { return recursion_depth_; }
				837	inline void IncrementRecursionDepth() { recursion_depth_++; }
				838	inline void DecrementRecursionDepth() { recursion_depth_--; }
				839
				840	void SetRegExpTooBig() { reg_exp_too_big_ = true; }
				841
				842	inline bool ignore_case() { return ignore_case_; }
				843	inline bool ascii() { return ascii_; }
				844
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	845	int current_expansion_factor() { return current_expansion_factor_; }
				846	void set_current_expansion_factor(int value) {
				847	current_expansion_factor_ = value;
				848	}
				849
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	850	static const int kNoRegister = -1;
				851	private:
				852	EndNode* accept_;
				853	int next_register_;
				854	List<RegExpNode> work_list_;
				855	int recursion_depth_;
				856	RegExpMacroAssembler* macro_assembler_;
				857	bool ignore_case_;
				858	bool ascii_;
				859	bool reg_exp_too_big_;
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	860	int current_expansion_factor_;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	861	};
				862
				863
				864	class RecursionCheck {
				865	public:
				866	explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
				867	compiler->IncrementRecursionDepth();
				868	}
				869	~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
				870	private:
				871	RegExpCompiler* compiler_;
				872	};
				873
				874
				875	static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
				876	return RegExpEngine::CompilationResult("RegExp too big");
				877	}
				878
				879
				880	// Attempts to compile the regexp using an Irregexp code generator. Returns
				881	// a fixed array or a null handle depending on whether it succeeded.
				882	RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
				883	: next_register_(2 * (capture_count + 1)),
				884	work_list_(NULL),
				885	recursion_depth_(0),
				886	ignore_case_(ignore_case),
				887	ascii_(ascii),
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	888	reg_exp_too_big_(false),
				889	current_expansion_factor_(1) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	890	accept_ = new EndNode(EndNode::ACCEPT);
				891	ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
				892	}
				893
				894
				895	RegExpEngine::CompilationResult RegExpCompiler::Assemble(
				896	RegExpMacroAssembler* macro_assembler,
				897	RegExpNode* start,
				898	int capture_count,
				899	Handle<String> pattern) {
Steve Block	053d10c	2011-06-13 19:13:29 +0100	[diff] [blame]	900	Heap* heap = pattern->GetHeap();
				901
				902	bool use_slow_safe_regexp_compiler = false;
				903	if (heap->total_regexp_code_generated() >
				904	RegExpImpl::kRegWxpCompiledLimit &&
				905	heap->isolate()->memory_allocator()->SizeExecutable() >
				906	RegExpImpl::kRegExpExecutableMemoryLimit) {
				907	use_slow_safe_regexp_compiler = true;
				908	}
				909
				910	macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
				911
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	912	#ifdef DEBUG
				913	if (FLAG_trace_regexp_assembler)
				914	macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
				915	else
				916	#endif
				917	macro_assembler_ = macro_assembler;
Steve Block	053d10c	2011-06-13 19:13:29 +0100	[diff] [blame]	918
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	919	List <RegExpNode*> work_list(0);
				920	work_list_ = &work_list;
				921	Label fail;
				922	macro_assembler_->PushBacktrack(&fail);
				923	Trace new_trace;
				924	start->Emit(this, &new_trace);
				925	macro_assembler_->Bind(&fail);
				926	macro_assembler_->Fail();
				927	while (!work_list.is_empty()) {
				928	work_list.RemoveLast()->Emit(this, &new_trace);
				929	}
				930	if (reg_exp_too_big_) return IrregexpRegExpTooBig();
				931
Steve Block	053d10c	2011-06-13 19:13:29 +0100	[diff] [blame]	932	Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
				933	heap->IncreaseTotalRegexpCodeGenerated(code->Size());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	934	work_list_ = NULL;
				935	#ifdef DEBUG
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	936	if (FLAG_print_code) {
				937	Handle<Code>::cast(code)->Disassemble(*pattern->ToCString());
				938	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	939	if (FLAG_trace_regexp_assembler) {
				940	delete macro_assembler_;
				941	}
				942	#endif
				943	return RegExpEngine::CompilationResult(*code, next_register_);
				944	}
				945
				946
				947	bool Trace::DeferredAction::Mentions(int that) {
				948	if (type() == ActionNode::CLEAR_CAPTURES) {
				949	Interval range = static_cast<DeferredClearCaptures*>(this)->range();
				950	return range.Contains(that);
				951	} else {
				952	return reg() == that;
				953	}
				954	}
				955
				956
				957	bool Trace::mentions_reg(int reg) {
				958	for (DeferredAction* action = actions_;
				959	action != NULL;
				960	action = action->next()) {
				961	if (action->Mentions(reg))
				962	return true;
				963	}
				964	return false;
				965	}
				966
				967
				968	bool Trace::GetStoredPosition(int reg, int* cp_offset) {
				969	ASSERT_EQ(0, *cp_offset);
				970	for (DeferredAction* action = actions_;
				971	action != NULL;
				972	action = action->next()) {
				973	if (action->Mentions(reg)) {
				974	if (action->type() == ActionNode::STORE_POSITION) {
				975	cp_offset = static_cast<DeferredCapture>(action)->cp_offset();
				976	return true;
				977	} else {
				978	return false;
				979	}
				980	}
				981	}
				982	return false;
				983	}
				984
				985
				986	int Trace::FindAffectedRegisters(OutSet* affected_registers) {
				987	int max_register = RegExpCompiler::kNoRegister;
				988	for (DeferredAction* action = actions_;
				989	action != NULL;
				990	action = action->next()) {
				991	if (action->type() == ActionNode::CLEAR_CAPTURES) {
				992	Interval range = static_cast<DeferredClearCaptures*>(action)->range();
				993	for (int i = range.from(); i <= range.to(); i++)
				994	affected_registers->Set(i);
				995	if (range.to() > max_register) max_register = range.to();
				996	} else {
				997	affected_registers->Set(action->reg());
				998	if (action->reg() > max_register) max_register = action->reg();
				999	}
				1000	}
				1001	return max_register;
				1002	}
				1003
				1004
				1005	void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
				1006	int max_register,
				1007	OutSet& registers_to_pop,
				1008	OutSet& registers_to_clear) {
				1009	for (int reg = max_register; reg >= 0; reg--) {
				1010	if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
				1011	else if (registers_to_clear.Get(reg)) {
				1012	int clear_to = reg;
				1013	while (reg > 0 && registers_to_clear.Get(reg - 1)) {
				1014	reg--;
				1015	}
				1016	assembler->ClearRegisters(reg, clear_to);
				1017	}
				1018	}
				1019	}
				1020
				1021
				1022	void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
				1023	int max_register,
				1024	OutSet& affected_registers,
				1025	OutSet* registers_to_pop,
				1026	OutSet* registers_to_clear) {
				1027	// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
				1028	const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
				1029
				1030	// Count pushes performed to force a stack limit check occasionally.
				1031	int pushes = 0;
				1032
				1033	for (int reg = 0; reg <= max_register; reg++) {
				1034	if (!affected_registers.Get(reg)) {
				1035	continue;
				1036	}
				1037
				1038	// The chronologically first deferred action in the trace
				1039	// is used to infer the action needed to restore a register
				1040	// to its previous state (or not, if it's safe to ignore it).
				1041	enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
				1042	DeferredActionUndoType undo_action = IGNORE;
				1043
				1044	int value = 0;
				1045	bool absolute = false;
				1046	bool clear = false;
				1047	int store_position = -1;
				1048	// This is a little tricky because we are scanning the actions in reverse
				1049	// historical order (newest first).
				1050	for (DeferredAction* action = actions_;
				1051	action != NULL;
				1052	action = action->next()) {
				1053	if (action->Mentions(reg)) {
				1054	switch (action->type()) {
				1055	case ActionNode::SET_REGISTER: {
				1056	Trace::DeferredSetRegister* psr =
				1057	static_cast<Trace::DeferredSetRegister*>(action);
				1058	if (!absolute) {
				1059	value += psr->value();
				1060	absolute = true;
				1061	}
				1062	// SET_REGISTER is currently only used for newly introduced loop
				1063	// counters. They can have a significant previous value if they
				1064	// occour in a loop. TODO(lrn): Propagate this information, so
				1065	// we can set undo_action to IGNORE if we know there is no value to
				1066	// restore.
				1067	undo_action = RESTORE;
				1068	ASSERT_EQ(store_position, -1);
				1069	ASSERT(!clear);
				1070	break;
				1071	}
				1072	case ActionNode::INCREMENT_REGISTER:
				1073	if (!absolute) {
				1074	value++;
				1075	}
				1076	ASSERT_EQ(store_position, -1);
				1077	ASSERT(!clear);
				1078	undo_action = RESTORE;
				1079	break;
				1080	case ActionNode::STORE_POSITION: {
				1081	Trace::DeferredCapture* pc =
				1082	static_cast<Trace::DeferredCapture*>(action);
				1083	if (!clear && store_position == -1) {
				1084	store_position = pc->cp_offset();
				1085	}
				1086
				1087	// For captures we know that stores and clears alternate.
				1088	// Other register, are never cleared, and if the occur
				1089	// inside a loop, they might be assigned more than once.
				1090	if (reg <= 1) {
				1091	// Registers zero and one, aka "capture zero", is
				1092	// always set correctly if we succeed. There is no
				1093	// need to undo a setting on backtrack, because we
				1094	// will set it again or fail.
				1095	undo_action = IGNORE;
				1096	} else {
				1097	undo_action = pc->is_capture() ? CLEAR : RESTORE;
				1098	}
				1099	ASSERT(!absolute);
				1100	ASSERT_EQ(value, 0);
				1101	break;
				1102	}
				1103	case ActionNode::CLEAR_CAPTURES: {
				1104	// Since we're scanning in reverse order, if we've already
				1105	// set the position we have to ignore historically earlier
				1106	// clearing operations.
				1107	if (store_position == -1) {
				1108	clear = true;
				1109	}
				1110	undo_action = RESTORE;
				1111	ASSERT(!absolute);
				1112	ASSERT_EQ(value, 0);
				1113	break;
				1114	}
				1115	default:
				1116	UNREACHABLE();
				1117	break;
				1118	}
				1119	}
				1120	}
				1121	// Prepare for the undo-action (e.g., push if it's going to be popped).
				1122	if (undo_action == RESTORE) {
				1123	pushes++;
				1124	RegExpMacroAssembler::StackCheckFlag stack_check =
				1125	RegExpMacroAssembler::kNoStackLimitCheck;
				1126	if (pushes == push_limit) {
				1127	stack_check = RegExpMacroAssembler::kCheckStackLimit;
				1128	pushes = 0;
				1129	}
				1130
				1131	assembler->PushRegister(reg, stack_check);
				1132	registers_to_pop->Set(reg);
				1133	} else if (undo_action == CLEAR) {
				1134	registers_to_clear->Set(reg);
				1135	}
				1136	// Perform the chronologically last action (or accumulated increment)
				1137	// for the register.
				1138	if (store_position != -1) {
				1139	assembler->WriteCurrentPositionToRegister(reg, store_position);
				1140	} else if (clear) {
				1141	assembler->ClearRegisters(reg, reg);
				1142	} else if (absolute) {
				1143	assembler->SetRegister(reg, value);
				1144	} else if (value != 0) {
				1145	assembler->AdvanceRegister(reg, value);
				1146	}
				1147	}
				1148	}
				1149
				1150
				1151	// This is called as we come into a loop choice node and some other tricky
				1152	// nodes. It normalizes the state of the code generator to ensure we can
				1153	// generate generic code.
				1154	void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
				1155	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1156
				1157	ASSERT(!is_trivial());
				1158
				1159	if (actions_ == NULL && backtrack() == NULL) {
				1160	// Here we just have some deferred cp advances to fix and we are back to
				1161	// a normal situation. We may also have to forget some information gained
				1162	// through a quick check that was already performed.
				1163	if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
				1164	// Create a new trivial state and generate the node with that.
				1165	Trace new_state;
				1166	successor->Emit(compiler, &new_state);
				1167	return;
				1168	}
				1169
				1170	// Generate deferred actions here along with code to undo them again.
				1171	OutSet affected_registers;
				1172
				1173	if (backtrack() != NULL) {
				1174	// Here we have a concrete backtrack location. These are set up by choice
				1175	// nodes and so they indicate that we have a deferred save of the current
				1176	// position which we may need to emit here.
				1177	assembler->PushCurrentPosition();
				1178	}
				1179
				1180	int max_register = FindAffectedRegisters(&affected_registers);
				1181	OutSet registers_to_pop;
				1182	OutSet registers_to_clear;
				1183	PerformDeferredActions(assembler,
				1184	max_register,
				1185	affected_registers,
				1186	&registers_to_pop,
				1187	&registers_to_clear);
				1188	if (cp_offset_ != 0) {
				1189	assembler->AdvanceCurrentPosition(cp_offset_);
				1190	}
				1191
				1192	// Create a new trivial state and generate the node with that.
				1193	Label undo;
				1194	assembler->PushBacktrack(&undo);
				1195	Trace new_state;
				1196	successor->Emit(compiler, &new_state);
				1197
				1198	// On backtrack we need to restore state.
				1199	assembler->Bind(&undo);
				1200	RestoreAffectedRegisters(assembler,
				1201	max_register,
				1202	registers_to_pop,
				1203	registers_to_clear);
				1204	if (backtrack() == NULL) {
				1205	assembler->Backtrack();
				1206	} else {
				1207	assembler->PopCurrentPosition();
				1208	assembler->GoTo(backtrack());
				1209	}
				1210	}
				1211
				1212
				1213	void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
				1214	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1215
				1216	// Omit flushing the trace. We discard the entire stack frame anyway.
				1217
				1218	if (!label()->is_bound()) {
				1219	// We are completely independent of the trace, since we ignore it,
				1220	// so this code can be used as the generic version.
				1221	assembler->Bind(label());
				1222	}
				1223
				1224	// Throw away everything on the backtrack stack since the start
				1225	// of the negative submatch and restore the character position.
				1226	assembler->ReadCurrentPositionFromRegister(current_position_register_);
				1227	assembler->ReadStackPointerFromRegister(stack_pointer_register_);
				1228	if (clear_capture_count_ > 0) {
				1229	// Clear any captures that might have been performed during the success
				1230	// of the body of the negative look-ahead.
				1231	int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
				1232	assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
				1233	}
				1234	// Now that we have unwound the stack we find at the top of the stack the
				1235	// backtrack that the BeginSubmatch node got.
				1236	assembler->Backtrack();
				1237	}
				1238
				1239
				1240	void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				1241	if (!trace->is_trivial()) {
				1242	trace->Flush(compiler, this);
				1243	return;
				1244	}
				1245	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1246	if (!label()->is_bound()) {
				1247	assembler->Bind(label());
				1248	}
				1249	switch (action_) {
				1250	case ACCEPT:
				1251	assembler->Succeed();
				1252	return;
				1253	case BACKTRACK:
				1254	assembler->GoTo(trace->backtrack());
				1255	return;
				1256	case NEGATIVE_SUBMATCH_SUCCESS:
				1257	// This case is handled in a different virtual method.
				1258	UNREACHABLE();
				1259	}
				1260	UNIMPLEMENTED();
				1261	}
				1262
				1263
				1264	void GuardedAlternative::AddGuard(Guard* guard) {
				1265	if (guards_ == NULL)
				1266	guards_ = new ZoneList<Guard*>(1);
				1267	guards_->Add(guard);
				1268	}
				1269
				1270
				1271	ActionNode* ActionNode::SetRegister(int reg,
				1272	int val,
				1273	RegExpNode* on_success) {
				1274	ActionNode* result = new ActionNode(SET_REGISTER, on_success);
				1275	result->data_.u_store_register.reg = reg;
				1276	result->data_.u_store_register.value = val;
				1277	return result;
				1278	}
				1279
				1280
				1281	ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
				1282	ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
				1283	result->data_.u_increment_register.reg = reg;
				1284	return result;
				1285	}
				1286
				1287
				1288	ActionNode* ActionNode::StorePosition(int reg,
				1289	bool is_capture,
				1290	RegExpNode* on_success) {
				1291	ActionNode* result = new ActionNode(STORE_POSITION, on_success);
				1292	result->data_.u_position_register.reg = reg;
				1293	result->data_.u_position_register.is_capture = is_capture;
				1294	return result;
				1295	}
				1296
				1297
				1298	ActionNode* ActionNode::ClearCaptures(Interval range,
				1299	RegExpNode* on_success) {
				1300	ActionNode* result = new ActionNode(CLEAR_CAPTURES, on_success);
				1301	result->data_.u_clear_captures.range_from = range.from();
				1302	result->data_.u_clear_captures.range_to = range.to();
				1303	return result;
				1304	}
				1305
				1306
				1307	ActionNode* ActionNode::BeginSubmatch(int stack_reg,
				1308	int position_reg,
				1309	RegExpNode* on_success) {
				1310	ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
				1311	result->data_.u_submatch.stack_pointer_register = stack_reg;
				1312	result->data_.u_submatch.current_position_register = position_reg;
				1313	return result;
				1314	}
				1315
				1316
				1317	ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
				1318	int position_reg,
				1319	int clear_register_count,
				1320	int clear_register_from,
				1321	RegExpNode* on_success) {
				1322	ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
				1323	result->data_.u_submatch.stack_pointer_register = stack_reg;
				1324	result->data_.u_submatch.current_position_register = position_reg;
				1325	result->data_.u_submatch.clear_register_count = clear_register_count;
				1326	result->data_.u_submatch.clear_register_from = clear_register_from;
				1327	return result;
				1328	}
				1329
				1330
				1331	ActionNode* ActionNode::EmptyMatchCheck(int start_register,
				1332	int repetition_register,
				1333	int repetition_limit,
				1334	RegExpNode* on_success) {
				1335	ActionNode* result = new ActionNode(EMPTY_MATCH_CHECK, on_success);
				1336	result->data_.u_empty_match_check.start_register = start_register;
				1337	result->data_.u_empty_match_check.repetition_register = repetition_register;
				1338	result->data_.u_empty_match_check.repetition_limit = repetition_limit;
				1339	return result;
				1340	}
				1341
				1342
				1343	#define DEFINE_ACCEPT(Type) \
				1344	void Type##Node::Accept(NodeVisitor* visitor) { \
				1345	visitor->Visit##Type(this); \
				1346	}
				1347	FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
				1348	#undef DEFINE_ACCEPT
				1349
				1350
				1351	void LoopChoiceNode::Accept(NodeVisitor* visitor) {
				1352	visitor->VisitLoopChoice(this);
				1353	}
				1354
				1355
				1356	// -------------------------------------------------------------------
				1357	// Emit code.
				1358
				1359
				1360	void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
				1361	Guard* guard,
				1362	Trace* trace) {
				1363	switch (guard->op()) {
				1364	case Guard::LT:
				1365	ASSERT(!trace->mentions_reg(guard->reg()));
				1366	macro_assembler->IfRegisterGE(guard->reg(),
				1367	guard->value(),
				1368	trace->backtrack());
				1369	break;
				1370	case Guard::GEQ:
				1371	ASSERT(!trace->mentions_reg(guard->reg()));
				1372	macro_assembler->IfRegisterLT(guard->reg(),
				1373	guard->value(),
				1374	trace->backtrack());
				1375	break;
				1376	}
				1377	}
				1378
				1379
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1380	// Returns the number of characters in the equivalence class, omitting those
				1381	// that cannot occur in the source string because it is ASCII.
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1382	static int GetCaseIndependentLetters(Isolate* isolate,
				1383	uc16 character,
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1384	bool ascii_subject,
				1385	unibrow::uchar* letters) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1386	int length =
				1387	isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	1388	// Unibrow returns 0 or 1 for characters where case independence is
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1389	// trivial.
				1390	if (length == 0) {
				1391	letters[0] = character;
				1392	length = 1;
				1393	}
				1394	if (!ascii_subject \|\| character <= String::kMaxAsciiCharCode) {
				1395	return length;
				1396	}
				1397	// The standard requires that non-ASCII characters cannot have ASCII
				1398	// character codes in their equivalence class.
				1399	return 0;
				1400	}
				1401
				1402
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1403	static inline bool EmitSimpleCharacter(Isolate* isolate,
				1404	RegExpCompiler* compiler,
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1405	uc16 c,
				1406	Label* on_failure,
				1407	int cp_offset,
				1408	bool check,
				1409	bool preloaded) {
				1410	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1411	bool bound_checked = false;
				1412	if (!preloaded) {
				1413	assembler->LoadCurrentCharacter(
				1414	cp_offset,
				1415	on_failure,
				1416	check);
				1417	bound_checked = true;
				1418	}
				1419	assembler->CheckNotCharacter(c, on_failure);
				1420	return bound_checked;
				1421	}
				1422
				1423
				1424	// Only emits non-letters (things that don't have case). Only used for case
				1425	// independent matches.
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1426	static inline bool EmitAtomNonLetter(Isolate* isolate,
				1427	RegExpCompiler* compiler,
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1428	uc16 c,
				1429	Label* on_failure,
				1430	int cp_offset,
				1431	bool check,
				1432	bool preloaded) {
				1433	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				1434	bool ascii = compiler->ascii();
				1435	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1436	int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1437	if (length < 1) {
				1438	// This can't match. Must be an ASCII subject and a non-ASCII character.
				1439	// We do not need to do anything since the ASCII pass already handled this.
				1440	return false; // Bounds not checked.
				1441	}
				1442	bool checked = false;
				1443	// We handle the length > 1 case in a later pass.
				1444	if (length == 1) {
				1445	if (ascii && c > String::kMaxAsciiCharCodeU) {
				1446	// Can't match - see above.
				1447	return false; // Bounds not checked.
				1448	}
				1449	if (!preloaded) {
				1450	macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
				1451	checked = check;
				1452	}
				1453	macro_assembler->CheckNotCharacter(c, on_failure);
				1454	}
				1455	return checked;
				1456	}
				1457
				1458
				1459	static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
				1460	bool ascii,
				1461	uc16 c1,
				1462	uc16 c2,
				1463	Label* on_failure) {
				1464	uc16 char_mask;
				1465	if (ascii) {
				1466	char_mask = String::kMaxAsciiCharCode;
				1467	} else {
				1468	char_mask = String::kMaxUC16CharCode;
				1469	}
				1470	uc16 exor = c1 ^ c2;
				1471	// Check whether exor has only one bit set.
				1472	if (((exor - 1) & exor) == 0) {
				1473	// If c1 and c2 differ only by one bit.
				1474	// Ecma262UnCanonicalize always gives the highest number last.
				1475	ASSERT(c2 > c1);
				1476	uc16 mask = char_mask ^ exor;
				1477	macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
				1478	return true;
				1479	}
				1480	ASSERT(c2 > c1);
				1481	uc16 diff = c2 - c1;
				1482	if (((diff - 1) & diff) == 0 && c1 >= diff) {
				1483	// If the characters differ by 2^n but don't differ by one bit then
				1484	// subtract the difference from the found character, then do the or
				1485	// trick. We avoid the theoretical case where negative numbers are
				1486	// involved in order to simplify code generation.
				1487	uc16 mask = char_mask ^ diff;
				1488	macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
				1489	diff,
				1490	mask,
				1491	on_failure);
				1492	return true;
				1493	}
				1494	return false;
				1495	}
				1496
				1497
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1498	typedef bool EmitCharacterFunction(Isolate* isolate,
				1499	RegExpCompiler* compiler,
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1500	uc16 c,
				1501	Label* on_failure,
				1502	int cp_offset,
				1503	bool check,
				1504	bool preloaded);
				1505
				1506	// Only emits letters (things that have case). Only used for case independent
				1507	// matches.
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1508	static inline bool EmitAtomLetter(Isolate* isolate,
				1509	RegExpCompiler* compiler,
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1510	uc16 c,
				1511	Label* on_failure,
				1512	int cp_offset,
				1513	bool check,
				1514	bool preloaded) {
				1515	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				1516	bool ascii = compiler->ascii();
				1517	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1518	int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1519	if (length <= 1) return false;
				1520	// We may not need to check against the end of the input string
				1521	// if this character lies before a character that matched.
				1522	if (!preloaded) {
				1523	macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
				1524	}
				1525	Label ok;
				1526	ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
				1527	switch (length) {
				1528	case 2: {
				1529	if (ShortCutEmitCharacterPair(macro_assembler,
				1530	ascii,
				1531	chars[0],
				1532	chars[1],
				1533	on_failure)) {
				1534	} else {
				1535	macro_assembler->CheckCharacter(chars[0], &ok);
				1536	macro_assembler->CheckNotCharacter(chars[1], on_failure);
				1537	macro_assembler->Bind(&ok);
				1538	}
				1539	break;
				1540	}
				1541	case 4:
				1542	macro_assembler->CheckCharacter(chars[3], &ok);
				1543	// Fall through!
				1544	case 3:
				1545	macro_assembler->CheckCharacter(chars[0], &ok);
				1546	macro_assembler->CheckCharacter(chars[1], &ok);
				1547	macro_assembler->CheckNotCharacter(chars[2], on_failure);
				1548	macro_assembler->Bind(&ok);
				1549	break;
				1550	default:
				1551	UNREACHABLE();
				1552	break;
				1553	}
				1554	return true;
				1555	}
				1556
				1557
				1558	static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
				1559	RegExpCharacterClass* cc,
				1560	bool ascii,
				1561	Label* on_failure,
				1562	int cp_offset,
				1563	bool check_offset,
				1564	bool preloaded) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1565	ZoneList<CharacterRange>* ranges = cc->ranges();
				1566	int max_char;
				1567	if (ascii) {
				1568	max_char = String::kMaxAsciiCharCode;
				1569	} else {
				1570	max_char = String::kMaxUC16CharCode;
				1571	}
				1572
				1573	Label success;
				1574
				1575	Label* char_is_in_class =
				1576	cc->is_negated() ? on_failure : &success;
				1577
				1578	int range_count = ranges->length();
				1579
				1580	int last_valid_range = range_count - 1;
				1581	while (last_valid_range >= 0) {
				1582	CharacterRange& range = ranges->at(last_valid_range);
				1583	if (range.from() <= max_char) {
				1584	break;
				1585	}
				1586	last_valid_range--;
				1587	}
				1588
				1589	if (last_valid_range < 0) {
				1590	if (!cc->is_negated()) {
				1591	// TODO(plesner): We can remove this when the node level does our
				1592	// ASCII optimizations for us.
				1593	macro_assembler->GoTo(on_failure);
				1594	}
				1595	if (check_offset) {
				1596	macro_assembler->CheckPosition(cp_offset, on_failure);
				1597	}
				1598	return;
				1599	}
				1600
				1601	if (last_valid_range == 0 &&
				1602	!cc->is_negated() &&
				1603	ranges->at(0).IsEverything(max_char)) {
				1604	// This is a common case hit by non-anchored expressions.
				1605	if (check_offset) {
				1606	macro_assembler->CheckPosition(cp_offset, on_failure);
				1607	}
				1608	return;
				1609	}
				1610
				1611	if (!preloaded) {
				1612	macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
				1613	}
				1614
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	1615	if (cc->is_standard() &&
				1616	macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
				1617	on_failure)) {
				1618	return;
				1619	}
				1620
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1621	for (int i = 0; i < last_valid_range; i++) {
				1622	CharacterRange& range = ranges->at(i);
				1623	Label next_range;
				1624	uc16 from = range.from();
				1625	uc16 to = range.to();
				1626	if (from > max_char) {
				1627	continue;
				1628	}
				1629	if (to > max_char) to = max_char;
				1630	if (to == from) {
				1631	macro_assembler->CheckCharacter(to, char_is_in_class);
				1632	} else {
				1633	if (from != 0) {
				1634	macro_assembler->CheckCharacterLT(from, &next_range);
				1635	}
				1636	if (to != max_char) {
				1637	macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
				1638	} else {
				1639	macro_assembler->GoTo(char_is_in_class);
				1640	}
				1641	}
				1642	macro_assembler->Bind(&next_range);
				1643	}
				1644
				1645	CharacterRange& range = ranges->at(last_valid_range);
				1646	uc16 from = range.from();
				1647	uc16 to = range.to();
				1648
				1649	if (to > max_char) to = max_char;
				1650	ASSERT(to >= from);
				1651
				1652	if (to == from) {
				1653	if (cc->is_negated()) {
				1654	macro_assembler->CheckCharacter(to, on_failure);
				1655	} else {
				1656	macro_assembler->CheckNotCharacter(to, on_failure);
				1657	}
				1658	} else {
				1659	if (from != 0) {
				1660	if (cc->is_negated()) {
				1661	macro_assembler->CheckCharacterLT(from, &success);
				1662	} else {
				1663	macro_assembler->CheckCharacterLT(from, on_failure);
				1664	}
				1665	}
				1666	if (to != String::kMaxUC16CharCode) {
				1667	if (cc->is_negated()) {
				1668	macro_assembler->CheckCharacterLT(to + 1, on_failure);
				1669	} else {
				1670	macro_assembler->CheckCharacterGT(to, on_failure);
				1671	}
				1672	} else {
				1673	if (cc->is_negated()) {
				1674	macro_assembler->GoTo(on_failure);
				1675	}
				1676	}
				1677	}
				1678	macro_assembler->Bind(&success);
				1679	}
				1680
				1681
				1682	RegExpNode::~RegExpNode() {
				1683	}
				1684
				1685
				1686	RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
				1687	Trace* trace) {
				1688	// If we are generating a greedy loop then don't stop and don't reuse code.
				1689	if (trace->stop_node() != NULL) {
				1690	return CONTINUE;
				1691	}
				1692
				1693	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				1694	if (trace->is_trivial()) {
				1695	if (label_.is_bound()) {
				1696	// We are being asked to generate a generic version, but that's already
				1697	// been done so just go to it.
				1698	macro_assembler->GoTo(&label_);
				1699	return DONE;
				1700	}
				1701	if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
				1702	// To avoid too deep recursion we push the node to the work queue and just
				1703	// generate a goto here.
				1704	compiler->AddWork(this);
				1705	macro_assembler->GoTo(&label_);
				1706	return DONE;
				1707	}
				1708	// Generate generic version of the node and bind the label for later use.
				1709	macro_assembler->Bind(&label_);
				1710	return CONTINUE;
				1711	}
				1712
				1713	// We are being asked to make a non-generic version. Keep track of how many
				1714	// non-generic versions we generate so as not to overdo it.
				1715	trace_count_++;
				1716	if (FLAG_regexp_optimization &&
				1717	trace_count_ < kMaxCopiesCodeGenerated &&
				1718	compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
				1719	return CONTINUE;
				1720	}
				1721
				1722	// If we get here code has been generated for this node too many times or
				1723	// recursion is too deep. Time to switch to a generic version. The code for
				1724	// generic versions above can handle deep recursion properly.
				1725	trace->Flush(compiler, this);
				1726	return DONE;
				1727	}
				1728
				1729
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1730	int ActionNode::EatsAtLeast(int still_to_find,
				1731	int recursion_depth,
				1732	bool not_at_start) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1733	if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
				1734	if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1735	return on_success()->EatsAtLeast(still_to_find,
				1736	recursion_depth + 1,
				1737	not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1738	}
				1739
				1740
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1741	int AssertionNode::EatsAtLeast(int still_to_find,
				1742	int recursion_depth,
				1743	bool not_at_start) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1744	if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1745	// If we know we are not at the start and we are asked "how many characters
				1746	// will you match if you succeed?" then we can answer anything since false
				1747	// implies false. So lets just return the max answer (still_to_find) since
				1748	// that won't prevent us from preloading a lot of characters for the other
				1749	// branches in the node graph.
				1750	if (type() == AT_START && not_at_start) return still_to_find;
				1751	return on_success()->EatsAtLeast(still_to_find,
				1752	recursion_depth + 1,
				1753	not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1754	}
				1755
				1756
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1757	int BackReferenceNode::EatsAtLeast(int still_to_find,
				1758	int recursion_depth,
				1759	bool not_at_start) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1760	if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1761	return on_success()->EatsAtLeast(still_to_find,
				1762	recursion_depth + 1,
				1763	not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1764	}
				1765
				1766
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1767	int TextNode::EatsAtLeast(int still_to_find,
				1768	int recursion_depth,
				1769	bool not_at_start) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1770	int answer = Length();
				1771	if (answer >= still_to_find) return answer;
				1772	if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1773	// We are not at start after this node so we set the last argument to 'true'.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1774	return answer + on_success()->EatsAtLeast(still_to_find - answer,
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1775	recursion_depth + 1,
				1776	true);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1777	}
				1778
				1779
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	1780	int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1781	int recursion_depth,
				1782	bool not_at_start) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1783	if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
				1784	// Alternative 0 is the negative lookahead, alternative 1 is what comes
				1785	// afterwards.
				1786	RegExpNode* node = alternatives_->at(1).node();
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1787	return node->EatsAtLeast(still_to_find, recursion_depth + 1, not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1788	}
				1789
				1790
				1791	void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
				1792	QuickCheckDetails* details,
				1793	RegExpCompiler* compiler,
				1794	int filled_in,
				1795	bool not_at_start) {
				1796	// Alternative 0 is the negative lookahead, alternative 1 is what comes
				1797	// afterwards.
				1798	RegExpNode* node = alternatives_->at(1).node();
				1799	return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
				1800	}
				1801
				1802
				1803	int ChoiceNode::EatsAtLeastHelper(int still_to_find,
				1804	int recursion_depth,
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1805	RegExpNode* ignore_this_node,
				1806	bool not_at_start) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1807	if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
				1808	int min = 100;
				1809	int choice_count = alternatives_->length();
				1810	for (int i = 0; i < choice_count; i++) {
				1811	RegExpNode* node = alternatives_->at(i).node();
				1812	if (node == ignore_this_node) continue;
				1813	int node_eats_at_least = node->EatsAtLeast(still_to_find,
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1814	recursion_depth + 1,
				1815	not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1816	if (node_eats_at_least < min) min = node_eats_at_least;
				1817	}
				1818	return min;
				1819	}
				1820
				1821
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1822	int LoopChoiceNode::EatsAtLeast(int still_to_find,
				1823	int recursion_depth,
				1824	bool not_at_start) {
				1825	return EatsAtLeastHelper(still_to_find,
				1826	recursion_depth,
				1827	loop_node_,
				1828	not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1829	}
				1830
				1831
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	1832	int ChoiceNode::EatsAtLeast(int still_to_find,
				1833	int recursion_depth,
				1834	bool not_at_start) {
				1835	return EatsAtLeastHelper(still_to_find,
				1836	recursion_depth,
				1837	NULL,
				1838	not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1839	}
				1840
				1841
				1842	// Takes the left-most 1-bit and smears it out, setting all bits to its right.
				1843	static inline uint32_t SmearBitsRight(uint32_t v) {
				1844	v \|= v >> 1;
				1845	v \|= v >> 2;
				1846	v \|= v >> 4;
				1847	v \|= v >> 8;
				1848	v \|= v >> 16;
				1849	return v;
				1850	}
				1851
				1852
				1853	bool QuickCheckDetails::Rationalize(bool asc) {
				1854	bool found_useful_op = false;
				1855	uint32_t char_mask;
				1856	if (asc) {
				1857	char_mask = String::kMaxAsciiCharCode;
				1858	} else {
				1859	char_mask = String::kMaxUC16CharCode;
				1860	}
				1861	mask_ = 0;
				1862	value_ = 0;
				1863	int char_shift = 0;
				1864	for (int i = 0; i < characters_; i++) {
				1865	Position* pos = &positions_[i];
				1866	if ((pos->mask & String::kMaxAsciiCharCode) != 0) {
				1867	found_useful_op = true;
				1868	}
				1869	mask_ \|= (pos->mask & char_mask) << char_shift;
				1870	value_ \|= (pos->value & char_mask) << char_shift;
				1871	char_shift += asc ? 8 : 16;
				1872	}
				1873	return found_useful_op;
				1874	}
				1875
				1876
				1877	bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
				1878	Trace* trace,
				1879	bool preload_has_checked_bounds,
				1880	Label* on_possible_success,
				1881	QuickCheckDetails* details,
				1882	bool fall_through_on_failure) {
				1883	if (details->characters() == 0) return false;
				1884	GetQuickCheckDetails(details, compiler, 0, trace->at_start() == Trace::FALSE);
				1885	if (details->cannot_match()) return false;
				1886	if (!details->Rationalize(compiler->ascii())) return false;
				1887	ASSERT(details->characters() == 1 \|\|
				1888	compiler->macro_assembler()->CanReadUnaligned());
				1889	uint32_t mask = details->mask();
				1890	uint32_t value = details->value();
				1891
				1892	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1893
				1894	if (trace->characters_preloaded() != details->characters()) {
				1895	assembler->LoadCurrentCharacter(trace->cp_offset(),
				1896	trace->backtrack(),
				1897	!preload_has_checked_bounds,
				1898	details->characters());
				1899	}
				1900
				1901
				1902	bool need_mask = true;
				1903
				1904	if (details->characters() == 1) {
				1905	// If number of characters preloaded is 1 then we used a byte or 16 bit
				1906	// load so the value is already masked down.
				1907	uint32_t char_mask;
				1908	if (compiler->ascii()) {
				1909	char_mask = String::kMaxAsciiCharCode;
				1910	} else {
				1911	char_mask = String::kMaxUC16CharCode;
				1912	}
				1913	if ((mask & char_mask) == char_mask) need_mask = false;
				1914	mask &= char_mask;
				1915	} else {
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	1916	// For 2-character preloads in ASCII mode or 1-character preloads in
				1917	// TWO_BYTE mode we also use a 16 bit load with zero extend.
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1918	if (details->characters() == 2 && compiler->ascii()) {
Kristian Monsen	9dcf7e2	2010-06-28 14:14:28 +0100	[diff] [blame]	1919	if ((mask & 0x7f7f) == 0x7f7f) need_mask = false;
				1920	} else if (details->characters() == 1 && !compiler->ascii()) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1921	if ((mask & 0xffff) == 0xffff) need_mask = false;
				1922	} else {
				1923	if (mask == 0xffffffff) need_mask = false;
				1924	}
				1925	}
				1926
				1927	if (fall_through_on_failure) {
				1928	if (need_mask) {
				1929	assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
				1930	} else {
				1931	assembler->CheckCharacter(value, on_possible_success);
				1932	}
				1933	} else {
				1934	if (need_mask) {
				1935	assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
				1936	} else {
				1937	assembler->CheckNotCharacter(value, trace->backtrack());
				1938	}
				1939	}
				1940	return true;
				1941	}
				1942
				1943
				1944	// Here is the meat of GetQuickCheckDetails (see also the comment on the
				1945	// super-class in the .h file).
				1946	//
				1947	// We iterate along the text object, building up for each character a
				1948	// mask and value that can be used to test for a quick failure to match.
				1949	// The masks and values for the positions will be combined into a single
				1950	// machine word for the current character width in order to be used in
				1951	// generating a quick check.
				1952	void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
				1953	RegExpCompiler* compiler,
				1954	int characters_filled_in,
				1955	bool not_at_start) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1956	Isolate* isolate = Isolate::Current();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1957	ASSERT(characters_filled_in < details->characters());
				1958	int characters = details->characters();
				1959	int char_mask;
				1960	int char_shift;
				1961	if (compiler->ascii()) {
				1962	char_mask = String::kMaxAsciiCharCode;
				1963	char_shift = 8;
				1964	} else {
				1965	char_mask = String::kMaxUC16CharCode;
				1966	char_shift = 16;
				1967	}
				1968	for (int k = 0; k < elms_->length(); k++) {
				1969	TextElement elm = elms_->at(k);
				1970	if (elm.type == TextElement::ATOM) {
				1971	Vector<const uc16> quarks = elm.data.u_atom->data();
				1972	for (int i = 0; i < characters && i < quarks.length(); i++) {
				1973	QuickCheckDetails::Position* pos =
				1974	details->positions(characters_filled_in);
				1975	uc16 c = quarks[i];
				1976	if (c > char_mask) {
				1977	// If we expect a non-ASCII character from an ASCII string,
				1978	// there is no way we can match. Not even case independent
				1979	// matching can turn an ASCII character into non-ASCII or
				1980	// vice versa.
				1981	details->set_cannot_match();
				1982	pos->determines_perfectly = false;
				1983	return;
				1984	}
				1985	if (compiler->ignore_case()) {
				1986	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	1987	int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
				1988	chars);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	1989	ASSERT(length != 0); // Can only happen if c > char_mask (see above).
				1990	if (length == 1) {
				1991	// This letter has no case equivalents, so it's nice and simple
				1992	// and the mask-compare will determine definitely whether we have
				1993	// a match at this character position.
				1994	pos->mask = char_mask;
				1995	pos->value = c;
				1996	pos->determines_perfectly = true;
				1997	} else {
				1998	uint32_t common_bits = char_mask;
				1999	uint32_t bits = chars[0];
				2000	for (int j = 1; j < length; j++) {
				2001	uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
				2002	common_bits ^= differing_bits;
				2003	bits &= common_bits;
				2004	}
				2005	// If length is 2 and common bits has only one zero in it then
				2006	// our mask and compare instruction will determine definitely
				2007	// whether we have a match at this character position. Otherwise
				2008	// it can only be an approximate check.
				2009	uint32_t one_zero = (common_bits \| ~char_mask);
				2010	if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
				2011	pos->determines_perfectly = true;
				2012	}
				2013	pos->mask = common_bits;
				2014	pos->value = bits;
				2015	}
				2016	} else {
				2017	// Don't ignore case. Nice simple case where the mask-compare will
				2018	// determine definitely whether we have a match at this character
				2019	// position.
				2020	pos->mask = char_mask;
				2021	pos->value = c;
				2022	pos->determines_perfectly = true;
				2023	}
				2024	characters_filled_in++;
				2025	ASSERT(characters_filled_in <= details->characters());
				2026	if (characters_filled_in == details->characters()) {
				2027	return;
				2028	}
				2029	}
				2030	} else {
				2031	QuickCheckDetails::Position* pos =
				2032	details->positions(characters_filled_in);
				2033	RegExpCharacterClass* tree = elm.data.u_char_class;
				2034	ZoneList<CharacterRange>* ranges = tree->ranges();
				2035	if (tree->is_negated()) {
				2036	// A quick check uses multi-character mask and compare. There is no
				2037	// useful way to incorporate a negative char class into this scheme
				2038	// so we just conservatively create a mask and value that will always
				2039	// succeed.
				2040	pos->mask = 0;
				2041	pos->value = 0;
				2042	} else {
				2043	int first_range = 0;
				2044	while (ranges->at(first_range).from() > char_mask) {
				2045	first_range++;
				2046	if (first_range == ranges->length()) {
				2047	details->set_cannot_match();
				2048	pos->determines_perfectly = false;
				2049	return;
				2050	}
				2051	}
				2052	CharacterRange range = ranges->at(first_range);
				2053	uc16 from = range.from();
				2054	uc16 to = range.to();
				2055	if (to > char_mask) {
				2056	to = char_mask;
				2057	}
				2058	uint32_t differing_bits = (from ^ to);
				2059	// A mask and compare is only perfect if the differing bits form a
				2060	// number like 00011111 with one single block of trailing 1s.
				2061	if ((differing_bits & (differing_bits + 1)) == 0 &&
				2062	from + differing_bits == to) {
				2063	pos->determines_perfectly = true;
				2064	}
				2065	uint32_t common_bits = ~SmearBitsRight(differing_bits);
				2066	uint32_t bits = (from & common_bits);
				2067	for (int i = first_range + 1; i < ranges->length(); i++) {
				2068	CharacterRange range = ranges->at(i);
				2069	uc16 from = range.from();
				2070	uc16 to = range.to();
				2071	if (from > char_mask) continue;
				2072	if (to > char_mask) to = char_mask;
				2073	// Here we are combining more ranges into the mask and compare
				2074	// value. With each new range the mask becomes more sparse and
				2075	// so the chances of a false positive rise. A character class
				2076	// with multiple ranges is assumed never to be equivalent to a
				2077	// mask and compare operation.
				2078	pos->determines_perfectly = false;
				2079	uint32_t new_common_bits = (from ^ to);
				2080	new_common_bits = ~SmearBitsRight(new_common_bits);
				2081	common_bits &= new_common_bits;
				2082	bits &= new_common_bits;
				2083	uint32_t differing_bits = (from & common_bits) ^ bits;
				2084	common_bits ^= differing_bits;
				2085	bits &= common_bits;
				2086	}
				2087	pos->mask = common_bits;
				2088	pos->value = bits;
				2089	}
				2090	characters_filled_in++;
				2091	ASSERT(characters_filled_in <= details->characters());
				2092	if (characters_filled_in == details->characters()) {
				2093	return;
				2094	}
				2095	}
				2096	}
				2097	ASSERT(characters_filled_in != details->characters());
				2098	on_success()-> GetQuickCheckDetails(details,
				2099	compiler,
				2100	characters_filled_in,
				2101	true);
				2102	}
				2103
				2104
				2105	void QuickCheckDetails::Clear() {
				2106	for (int i = 0; i < characters_; i++) {
				2107	positions_[i].mask = 0;
				2108	positions_[i].value = 0;
				2109	positions_[i].determines_perfectly = false;
				2110	}
				2111	characters_ = 0;
				2112	}
				2113
				2114
				2115	void QuickCheckDetails::Advance(int by, bool ascii) {
				2116	ASSERT(by >= 0);
				2117	if (by >= characters_) {
				2118	Clear();
				2119	return;
				2120	}
				2121	for (int i = 0; i < characters_ - by; i++) {
				2122	positions_[i] = positions_[by + i];
				2123	}
				2124	for (int i = characters_ - by; i < characters_; i++) {
				2125	positions_[i].mask = 0;
				2126	positions_[i].value = 0;
				2127	positions_[i].determines_perfectly = false;
				2128	}
				2129	characters_ -= by;
				2130	// We could change mask_ and value_ here but we would never advance unless
				2131	// they had already been used in a check and they won't be used again because
				2132	// it would gain us nothing. So there's no point.
				2133	}
				2134
				2135
				2136	void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
				2137	ASSERT(characters_ == other->characters_);
				2138	if (other->cannot_match_) {
				2139	return;
				2140	}
				2141	if (cannot_match_) {
				2142	this = other;
				2143	return;
				2144	}
				2145	for (int i = from_index; i < characters_; i++) {
				2146	QuickCheckDetails::Position* pos = positions(i);
				2147	QuickCheckDetails::Position* other_pos = other->positions(i);
				2148	if (pos->mask != other_pos->mask \|\|
				2149	pos->value != other_pos->value \|\|
				2150	!other_pos->determines_perfectly) {
				2151	// Our mask-compare operation will be approximate unless we have the
				2152	// exact same operation on both sides of the alternation.
				2153	pos->determines_perfectly = false;
				2154	}
				2155	pos->mask &= other_pos->mask;
				2156	pos->value &= pos->mask;
				2157	other_pos->value &= pos->mask;
				2158	uc16 differing_bits = (pos->value ^ other_pos->value);
				2159	pos->mask &= ~differing_bits;
				2160	pos->value &= pos->mask;
				2161	}
				2162	}
				2163
				2164
				2165	class VisitMarker {
				2166	public:
				2167	explicit VisitMarker(NodeInfo* info) : info_(info) {
				2168	ASSERT(!info->visited);
				2169	info->visited = true;
				2170	}
				2171	~VisitMarker() {
				2172	info_->visited = false;
				2173	}
				2174	private:
				2175	NodeInfo* info_;
				2176	};
				2177
				2178
				2179	void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
				2180	RegExpCompiler* compiler,
				2181	int characters_filled_in,
				2182	bool not_at_start) {
				2183	if (body_can_be_zero_length_ \|\| info()->visited) return;
				2184	VisitMarker marker(info());
				2185	return ChoiceNode::GetQuickCheckDetails(details,
				2186	compiler,
				2187	characters_filled_in,
				2188	not_at_start);
				2189	}
				2190
				2191
				2192	void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
				2193	RegExpCompiler* compiler,
				2194	int characters_filled_in,
				2195	bool not_at_start) {
				2196	not_at_start = (not_at_start \|\| not_at_start_);
				2197	int choice_count = alternatives_->length();
				2198	ASSERT(choice_count > 0);
				2199	alternatives_->at(0).node()->GetQuickCheckDetails(details,
				2200	compiler,
				2201	characters_filled_in,
				2202	not_at_start);
				2203	for (int i = 1; i < choice_count; i++) {
				2204	QuickCheckDetails new_details(details->characters());
				2205	RegExpNode* node = alternatives_->at(i).node();
				2206	node->GetQuickCheckDetails(&new_details, compiler,
				2207	characters_filled_in,
				2208	not_at_start);
				2209	// Here we merge the quick match details of the two branches.
				2210	details->Merge(&new_details, characters_filled_in);
				2211	}
				2212	}
				2213
				2214
				2215	// Check for [0-9A-Z_a-z].
				2216	static void EmitWordCheck(RegExpMacroAssembler* assembler,
				2217	Label* word,
				2218	Label* non_word,
				2219	bool fall_through_on_word) {
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	2220	if (assembler->CheckSpecialCharacterClass(
				2221	fall_through_on_word ? 'w' : 'W',
				2222	fall_through_on_word ? non_word : word)) {
				2223	// Optimized implementation available.
				2224	return;
				2225	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2226	assembler->CheckCharacterGT('z', non_word);
				2227	assembler->CheckCharacterLT('0', non_word);
				2228	assembler->CheckCharacterGT('a' - 1, word);
				2229	assembler->CheckCharacterLT('9' + 1, word);
				2230	assembler->CheckCharacterLT('A', non_word);
				2231	assembler->CheckCharacterLT('Z' + 1, word);
				2232	if (fall_through_on_word) {
				2233	assembler->CheckNotCharacter('_', non_word);
				2234	} else {
				2235	assembler->CheckCharacter('_', word);
				2236	}
				2237	}
				2238
				2239
				2240	// Emit the code to check for a ^ in multiline mode (1-character lookbehind
				2241	// that matches newline or the start of input).
				2242	static void EmitHat(RegExpCompiler* compiler,
				2243	RegExpNode* on_success,
				2244	Trace* trace) {
				2245	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				2246	// We will be loading the previous character into the current character
				2247	// register.
				2248	Trace new_trace(*trace);
				2249	new_trace.InvalidateCurrentCharacter();
				2250
				2251	Label ok;
				2252	if (new_trace.cp_offset() == 0) {
				2253	// The start of input counts as a newline in this context, so skip to
				2254	// ok if we are at the start.
				2255	assembler->CheckAtStart(&ok);
				2256	}
				2257	// We already checked that we are not at the start of input so it must be
				2258	// OK to load the previous character.
				2259	assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
				2260	new_trace.backtrack(),
				2261	false);
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	2262	if (!assembler->CheckSpecialCharacterClass('n',
				2263	new_trace.backtrack())) {
				2264	// Newline means \n, \r, 0x2028 or 0x2029.
				2265	if (!compiler->ascii()) {
				2266	assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
				2267	}
				2268	assembler->CheckCharacter('\n', &ok);
				2269	assembler->CheckNotCharacter('\r', new_trace.backtrack());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2270	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2271	assembler->Bind(&ok);
				2272	on_success->Emit(compiler, &new_trace);
				2273	}
				2274
				2275
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	2276	// Emit the code to handle \b and \B (word-boundary or non-word-boundary)
				2277	// when we know whether the next character must be a word character or not.
				2278	static void EmitHalfBoundaryCheck(AssertionNode::AssertionNodeType type,
				2279	RegExpCompiler* compiler,
				2280	RegExpNode* on_success,
				2281	Trace* trace) {
				2282	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				2283	Label done;
				2284
				2285	Trace new_trace(*trace);
				2286
				2287	bool expect_word_character = (type == AssertionNode::AFTER_WORD_CHARACTER);
				2288	Label* on_word = expect_word_character ? &done : new_trace.backtrack();
				2289	Label* on_non_word = expect_word_character ? new_trace.backtrack() : &done;
				2290
				2291	// Check whether previous character was a word character.
				2292	switch (trace->at_start()) {
				2293	case Trace::TRUE:
				2294	if (expect_word_character) {
				2295	assembler->GoTo(on_non_word);
				2296	}
				2297	break;
				2298	case Trace::UNKNOWN:
				2299	ASSERT_EQ(0, trace->cp_offset());
				2300	assembler->CheckAtStart(on_non_word);
				2301	// Fall through.
				2302	case Trace::FALSE:
				2303	int prev_char_offset = trace->cp_offset() - 1;
				2304	assembler->LoadCurrentCharacter(prev_char_offset, NULL, false, 1);
				2305	EmitWordCheck(assembler, on_word, on_non_word, expect_word_character);
				2306	// We may or may not have loaded the previous character.
				2307	new_trace.InvalidateCurrentCharacter();
				2308	}
				2309
				2310	assembler->Bind(&done);
				2311
				2312	on_success->Emit(compiler, &new_trace);
				2313	}
				2314
				2315
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2316	// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
				2317	static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,
				2318	RegExpCompiler* compiler,
				2319	RegExpNode* on_success,
				2320	Trace* trace) {
				2321	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				2322	Label before_non_word;
				2323	Label before_word;
				2324	if (trace->characters_preloaded() != 1) {
				2325	assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
				2326	}
				2327	// Fall through on non-word.
				2328	EmitWordCheck(assembler, &before_word, &before_non_word, false);
				2329
				2330	// We will be loading the previous character into the current character
				2331	// register.
				2332	Trace new_trace(*trace);
				2333	new_trace.InvalidateCurrentCharacter();
				2334
				2335	Label ok;
				2336	Label* boundary;
				2337	Label* not_boundary;
				2338	if (type == AssertionNode::AT_BOUNDARY) {
				2339	boundary = &ok;
				2340	not_boundary = new_trace.backtrack();
				2341	} else {
				2342	not_boundary = &ok;
				2343	boundary = new_trace.backtrack();
				2344	}
				2345
				2346	// Next character is not a word character.
				2347	assembler->Bind(&before_non_word);
				2348	if (new_trace.cp_offset() == 0) {
				2349	// The start of input counts as a non-word character, so the question is
				2350	// decided if we are at the start.
				2351	assembler->CheckAtStart(not_boundary);
				2352	}
				2353	// We already checked that we are not at the start of input so it must be
				2354	// OK to load the previous character.
				2355	assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
				2356	&ok, // Unused dummy label in this call.
				2357	false);
				2358	// Fall through on non-word.
				2359	EmitWordCheck(assembler, boundary, not_boundary, false);
				2360	assembler->GoTo(not_boundary);
				2361
				2362	// Next character is a word character.
				2363	assembler->Bind(&before_word);
				2364	if (new_trace.cp_offset() == 0) {
				2365	// The start of input counts as a non-word character, so the question is
				2366	// decided if we are at the start.
				2367	assembler->CheckAtStart(boundary);
				2368	}
				2369	// We already checked that we are not at the start of input so it must be
				2370	// OK to load the previous character.
				2371	assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
				2372	&ok, // Unused dummy label in this call.
				2373	false);
				2374	bool fall_through_on_word = (type == AssertionNode::AT_NON_BOUNDARY);
				2375	EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word);
				2376
				2377	assembler->Bind(&ok);
				2378
				2379	on_success->Emit(compiler, &new_trace);
				2380	}
				2381
				2382
				2383	void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
				2384	RegExpCompiler* compiler,
				2385	int filled_in,
				2386	bool not_at_start) {
				2387	if (type_ == AT_START && not_at_start) {
				2388	details->set_cannot_match();
				2389	return;
				2390	}
				2391	return on_success()->GetQuickCheckDetails(details,
				2392	compiler,
				2393	filled_in,
				2394	not_at_start);
				2395	}
				2396
				2397
				2398	void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				2399	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				2400	switch (type_) {
				2401	case AT_END: {
				2402	Label ok;
				2403	assembler->CheckPosition(trace->cp_offset(), &ok);
				2404	assembler->GoTo(trace->backtrack());
				2405	assembler->Bind(&ok);
				2406	break;
				2407	}
				2408	case AT_START: {
				2409	if (trace->at_start() == Trace::FALSE) {
				2410	assembler->GoTo(trace->backtrack());
				2411	return;
				2412	}
				2413	if (trace->at_start() == Trace::UNKNOWN) {
				2414	assembler->CheckNotAtStart(trace->backtrack());
				2415	Trace at_start_trace = *trace;
				2416	at_start_trace.set_at_start(true);
				2417	on_success()->Emit(compiler, &at_start_trace);
				2418	return;
				2419	}
				2420	}
				2421	break;
				2422	case AFTER_NEWLINE:
				2423	EmitHat(compiler, on_success(), trace);
				2424	return;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2425	case AT_BOUNDARY:
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	2426	case AT_NON_BOUNDARY: {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2427	EmitBoundaryCheck(type_, compiler, on_success(), trace);
				2428	return;
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	2429	}
				2430	case AFTER_WORD_CHARACTER:
				2431	case AFTER_NONWORD_CHARACTER: {
				2432	EmitHalfBoundaryCheck(type_, compiler, on_success(), trace);
				2433	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2434	}
				2435	on_success()->Emit(compiler, trace);
				2436	}
				2437
				2438
				2439	static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
				2440	if (quick_check == NULL) return false;
				2441	if (offset >= quick_check->characters()) return false;
				2442	return quick_check->positions(offset)->determines_perfectly;
				2443	}
				2444
				2445
				2446	static void UpdateBoundsCheck(int index, int* checked_up_to) {
				2447	if (index > *checked_up_to) {
				2448	*checked_up_to = index;
				2449	}
				2450	}
				2451
				2452
				2453	// We call this repeatedly to generate code for each pass over the text node.
				2454	// The passes are in increasing order of difficulty because we hope one
				2455	// of the first passes will fail in which case we are saved the work of the
				2456	// later passes. for example for the case independent regexp /%[asdfghjkl]a/
				2457	// we will check the '%' in the first pass, the case independent 'a' in the
				2458	// second pass and the character class in the last pass.
				2459	//
				2460	// The passes are done from right to left, so for example to test for /bar/
				2461	// we will first test for an 'r' with offset 2, then an 'a' with offset 1
				2462	// and then a 'b' with offset 0. This means we can avoid the end-of-input
				2463	// bounds check most of the time. In the example we only need to check for
				2464	// end-of-input when loading the putative 'r'.
				2465	//
				2466	// A slight complication involves the fact that the first character may already
				2467	// be fetched into a register by the previous node. In this case we want to
				2468	// do the test for that character first. We do this in separate passes. The
				2469	// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
				2470	// pass has been performed then subsequent passes will have true in
				2471	// first_element_checked to indicate that that character does not need to be
				2472	// checked again.
				2473	//
				2474	// In addition to all this we are passed a Trace, which can
				2475	// contain an AlternativeGeneration object. In this AlternativeGeneration
				2476	// object we can see details of any quick check that was already passed in
				2477	// order to get to the code we are now generating. The quick check can involve
				2478	// loading characters, which means we do not need to recheck the bounds
				2479	// up to the limit the quick check already checked. In addition the quick
				2480	// check can have involved a mask and compare operation which may simplify
				2481	// or obviate the need for further checks at some character positions.
				2482	void TextNode::TextEmitPass(RegExpCompiler* compiler,
				2483	TextEmitPassType pass,
				2484	bool preloaded,
				2485	Trace* trace,
				2486	bool first_element_checked,
				2487	int* checked_up_to) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	2488	Isolate* isolate = Isolate::Current();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2489	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				2490	bool ascii = compiler->ascii();
				2491	Label* backtrack = trace->backtrack();
				2492	QuickCheckDetails* quick_check = trace->quick_check_performed();
				2493	int element_count = elms_->length();
				2494	for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
				2495	TextElement elm = elms_->at(i);
				2496	int cp_offset = trace->cp_offset() + elm.cp_offset;
				2497	if (elm.type == TextElement::ATOM) {
				2498	Vector<const uc16> quarks = elm.data.u_atom->data();
				2499	for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
				2500	if (first_element_checked && i == 0 && j == 0) continue;
				2501	if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;
				2502	EmitCharacterFunction* emit_function = NULL;
				2503	switch (pass) {
				2504	case NON_ASCII_MATCH:
				2505	ASSERT(ascii);
				2506	if (quarks[j] > String::kMaxAsciiCharCode) {
				2507	assembler->GoTo(backtrack);
				2508	return;
				2509	}
				2510	break;
				2511	case NON_LETTER_CHARACTER_MATCH:
				2512	emit_function = &EmitAtomNonLetter;
				2513	break;
				2514	case SIMPLE_CHARACTER_MATCH:
				2515	emit_function = &EmitSimpleCharacter;
				2516	break;
				2517	case CASE_CHARACTER_MATCH:
				2518	emit_function = &EmitAtomLetter;
				2519	break;
				2520	default:
				2521	break;
				2522	}
				2523	if (emit_function != NULL) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	2524	bool bound_checked = emit_function(isolate,
				2525	compiler,
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2526	quarks[j],
				2527	backtrack,
				2528	cp_offset + j,
				2529	*checked_up_to < cp_offset + j,
				2530	preloaded);
				2531	if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
				2532	}
				2533	}
				2534	} else {
				2535	ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
				2536	if (pass == CHARACTER_CLASS_MATCH) {
				2537	if (first_element_checked && i == 0) continue;
				2538	if (DeterminedAlready(quick_check, elm.cp_offset)) continue;
				2539	RegExpCharacterClass* cc = elm.data.u_char_class;
				2540	EmitCharClass(assembler,
				2541	cc,
				2542	ascii,
				2543	backtrack,
				2544	cp_offset,
				2545	*checked_up_to < cp_offset,
				2546	preloaded);
				2547	UpdateBoundsCheck(cp_offset, checked_up_to);
				2548	}
				2549	}
				2550	}
				2551	}
				2552
				2553
				2554	int TextNode::Length() {
				2555	TextElement elm = elms_->last();
				2556	ASSERT(elm.cp_offset >= 0);
				2557	if (elm.type == TextElement::ATOM) {
				2558	return elm.cp_offset + elm.data.u_atom->data().length();
				2559	} else {
				2560	return elm.cp_offset + 1;
				2561	}
				2562	}
				2563
				2564
				2565	bool TextNode::SkipPass(int int_pass, bool ignore_case) {
				2566	TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
				2567	if (ignore_case) {
				2568	return pass == SIMPLE_CHARACTER_MATCH;
				2569	} else {
				2570	return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;
				2571	}
				2572	}
				2573
				2574
				2575	// This generates the code to match a text node. A text node can contain
				2576	// straight character sequences (possibly to be matched in a case-independent
				2577	// way) and character classes. For efficiency we do not do this in a single
				2578	// pass from left to right. Instead we pass over the text node several times,
				2579	// emitting code for some character positions every time. See the comment on
				2580	// TextEmitPass for details.
				2581	void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				2582	LimitResult limit_result = LimitVersions(compiler, trace);
				2583	if (limit_result == DONE) return;
				2584	ASSERT(limit_result == CONTINUE);
				2585
				2586	if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
				2587	compiler->SetRegExpTooBig();
				2588	return;
				2589	}
				2590
				2591	if (compiler->ascii()) {
				2592	int dummy = 0;
				2593	TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
				2594	}
				2595
				2596	bool first_elt_done = false;
				2597	int bound_checked_to = trace->cp_offset() - 1;
				2598	bound_checked_to += trace->bound_checked_up_to();
				2599
				2600	// If a character is preloaded into the current character register then
				2601	// check that now.
				2602	if (trace->characters_preloaded() == 1) {
				2603	for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
				2604	if (!SkipPass(pass, compiler->ignore_case())) {
				2605	TextEmitPass(compiler,
				2606	static_cast<TextEmitPassType>(pass),
				2607	true,
				2608	trace,
				2609	false,
				2610	&bound_checked_to);
				2611	}
				2612	}
				2613	first_elt_done = true;
				2614	}
				2615
				2616	for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
				2617	if (!SkipPass(pass, compiler->ignore_case())) {
				2618	TextEmitPass(compiler,
				2619	static_cast<TextEmitPassType>(pass),
				2620	false,
				2621	trace,
				2622	first_elt_done,
				2623	&bound_checked_to);
				2624	}
				2625	}
				2626
				2627	Trace successor_trace(*trace);
				2628	successor_trace.set_at_start(false);
				2629	successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
				2630	RecursionCheck rc(compiler);
				2631	on_success()->Emit(compiler, &successor_trace);
				2632	}
				2633
				2634
				2635	void Trace::InvalidateCurrentCharacter() {
				2636	characters_preloaded_ = 0;
				2637	}
				2638
				2639
				2640	void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
				2641	ASSERT(by > 0);
				2642	// We don't have an instruction for shifting the current character register
				2643	// down or for using a shifted value for anything so lets just forget that
				2644	// we preloaded any characters into it.
				2645	characters_preloaded_ = 0;
				2646	// Adjust the offsets of the quick check performed information. This
				2647	// information is used to find out what we already determined about the
				2648	// characters by means of mask and compare.
				2649	quick_check_performed_.Advance(by, compiler->ascii());
				2650	cp_offset_ += by;
				2651	if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
				2652	compiler->SetRegExpTooBig();
				2653	cp_offset_ = 0;
				2654	}
				2655	bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
				2656	}
				2657
				2658
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	2659	void TextNode::MakeCaseIndependent(bool is_ascii) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2660	int element_count = elms_->length();
				2661	for (int i = 0; i < element_count; i++) {
				2662	TextElement elm = elms_->at(i);
				2663	if (elm.type == TextElement::CHAR_CLASS) {
				2664	RegExpCharacterClass* cc = elm.data.u_char_class;
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	2665	// None of the standard character classses is different in the case
				2666	// independent case and it slows us down if we don't know that.
				2667	if (cc->is_standard()) continue;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2668	ZoneList<CharacterRange>* ranges = cc->ranges();
				2669	int range_count = ranges->length();
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	2670	for (int j = 0; j < range_count; j++) {
				2671	ranges->at(j).AddCaseEquivalents(ranges, is_ascii);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2672	}
				2673	}
				2674	}
				2675	}
				2676
				2677
				2678	int TextNode::GreedyLoopTextLength() {
				2679	TextElement elm = elms_->at(elms_->length() - 1);
				2680	if (elm.type == TextElement::CHAR_CLASS) {
				2681	return elm.cp_offset + 1;
				2682	} else {
				2683	return elm.cp_offset + elm.data.u_atom->data().length();
				2684	}
				2685	}
				2686
				2687
				2688	// Finds the fixed match length of a sequence of nodes that goes from
				2689	// this alternative and back to this choice node. If there are variable
				2690	// length nodes or other complications in the way then return a sentinel
				2691	// value indicating that a greedy loop cannot be constructed.
				2692	int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) {
				2693	int length = 0;
				2694	RegExpNode* node = alternative->node();
				2695	// Later we will generate code for all these text nodes using recursion
				2696	// so we have to limit the max number.
				2697	int recursion_depth = 0;
				2698	while (node != this) {
				2699	if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
				2700	return kNodeIsTooComplexForGreedyLoops;
				2701	}
				2702	int node_length = node->GreedyLoopTextLength();
				2703	if (node_length == kNodeIsTooComplexForGreedyLoops) {
				2704	return kNodeIsTooComplexForGreedyLoops;
				2705	}
				2706	length += node_length;
				2707	SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
				2708	node = seq_node->on_success();
				2709	}
				2710	return length;
				2711	}
				2712
				2713
				2714	void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
				2715	ASSERT_EQ(loop_node_, NULL);
				2716	AddAlternative(alt);
				2717	loop_node_ = alt.node();
				2718	}
				2719
				2720
				2721	void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
				2722	ASSERT_EQ(continue_node_, NULL);
				2723	AddAlternative(alt);
				2724	continue_node_ = alt.node();
				2725	}
				2726
				2727
				2728	void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				2729	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				2730	if (trace->stop_node() == this) {
				2731	int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
				2732	ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
				2733	// Update the counter-based backtracking info on the stack. This is an
				2734	// optimization for greedy loops (see below).
				2735	ASSERT(trace->cp_offset() == text_length);
				2736	macro_assembler->AdvanceCurrentPosition(text_length);
				2737	macro_assembler->GoTo(trace->loop_label());
				2738	return;
				2739	}
				2740	ASSERT(trace->stop_node() == NULL);
				2741	if (!trace->is_trivial()) {
				2742	trace->Flush(compiler, this);
				2743	return;
				2744	}
				2745	ChoiceNode::Emit(compiler, trace);
				2746	}
				2747
				2748
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	2749	int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
				2750	bool not_at_start) {
				2751	int preload_characters = EatsAtLeast(4, 0, not_at_start);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2752	if (compiler->macro_assembler()->CanReadUnaligned()) {
				2753	bool ascii = compiler->ascii();
				2754	if (ascii) {
				2755	if (preload_characters > 4) preload_characters = 4;
				2756	// We can't preload 3 characters because there is no machine instruction
				2757	// to do that. We can't just load 4 because we could be reading
				2758	// beyond the end of the string, which could cause a memory fault.
				2759	if (preload_characters == 3) preload_characters = 2;
				2760	} else {
				2761	if (preload_characters > 2) preload_characters = 2;
				2762	}
				2763	} else {
				2764	if (preload_characters > 1) preload_characters = 1;
				2765	}
				2766	return preload_characters;
				2767	}
				2768
				2769
				2770	// This class is used when generating the alternatives in a choice node. It
				2771	// records the way the alternative is being code generated.
				2772	class AlternativeGeneration: public Malloced {
				2773	public:
				2774	AlternativeGeneration()
				2775	: possible_success(),
				2776	expects_preload(false),
				2777	after(),
				2778	quick_check_details() { }
				2779	Label possible_success;
				2780	bool expects_preload;
				2781	Label after;
				2782	QuickCheckDetails quick_check_details;
				2783	};
				2784
				2785
				2786	// Creates a list of AlternativeGenerations. If the list has a reasonable
				2787	// size then it is on the stack, otherwise the excess is on the heap.
				2788	class AlternativeGenerationList {
				2789	public:
				2790	explicit AlternativeGenerationList(int count)
				2791	: alt_gens_(count) {
				2792	for (int i = 0; i < count && i < kAFew; i++) {
				2793	alt_gens_.Add(a_few_alt_gens_ + i);
				2794	}
				2795	for (int i = kAFew; i < count; i++) {
				2796	alt_gens_.Add(new AlternativeGeneration());
				2797	}
				2798	}
				2799	~AlternativeGenerationList() {
				2800	for (int i = kAFew; i < alt_gens_.length(); i++) {
				2801	delete alt_gens_[i];
				2802	alt_gens_[i] = NULL;
				2803	}
				2804	}
				2805
				2806	AlternativeGeneration* at(int i) {
				2807	return alt_gens_[i];
				2808	}
				2809	private:
				2810	static const int kAFew = 10;
				2811	ZoneList<AlternativeGeneration*> alt_gens_;
				2812	AlternativeGeneration a_few_alt_gens_[kAFew];
				2813	};
				2814
				2815
				2816	/* Code generation for choice nodes.
				2817	*
				2818	* We generate quick checks that do a mask and compare to eliminate a
				2819	* choice. If the quick check succeeds then it jumps to the continuation to
				2820	* do slow checks and check subsequent nodes. If it fails (the common case)
				2821	* it falls through to the next choice.
				2822	*
				2823	* Here is the desired flow graph. Nodes directly below each other imply
				2824	* fallthrough. Alternatives 1 and 2 have quick checks. Alternative
				2825	* 3 doesn't have a quick check so we have to call the slow check.
				2826	* Nodes are marked Qn for quick checks and Sn for slow checks. The entire
				2827	* regexp continuation is generated directly after the Sn node, up to the
				2828	* next GoTo if we decide to reuse some already generated code. Some
				2829	* nodes expect preload_characters to be preloaded into the current
				2830	* character register. R nodes do this preloading. Vertices are marked
				2831	* F for failures and S for success (possible success in the case of quick
				2832	* nodes). L, V, < and > are used as arrow heads.
				2833	*
				2834	* ----------> R
				2835	* \|
				2836	* V
				2837	* Q1 -----> S1
				2838	* \| S /
				2839	* F\| /
				2840	* \| F/
				2841	* \| /
				2842	* \| R
				2843	* \| /
				2844	* V L
				2845	* Q2 -----> S2
				2846	* \| S /
				2847	* F\| /
				2848	* \| F/
				2849	* \| /
				2850	* \| R
				2851	* \| /
				2852	* V L
				2853	* S3
				2854	* \|
				2855	* F\|
				2856	* \|
				2857	* R
				2858	* \|
				2859	* backtrack V
				2860	* <----------Q4
				2861	* \ F \|
				2862	* \ \|S
				2863	* \ F V
				2864	* \-----S4
				2865	*
				2866	* For greedy loops we reverse our expectation and expect to match rather
				2867	* than fail. Therefore we want the loop code to look like this (U is the
				2868	* unwind code that steps back in the greedy loop). The following alternatives
				2869	* look the same as above.
				2870	* _____
				2871	* / \
				2872	* V \|
				2873	* ----------> S1 \|
				2874	* /\| \|
				2875	* / \|S \|
				2876	* F/ \_____/
				2877	* /
				2878	* \|<-----------
				2879	* \| \
				2880	* V \
				2881	* Q2 ---> S2 \
				2882	* \| S / \|
				2883	* F\| / \|
				2884	* \| F/ \|
				2885	* \| / \|
				2886	* \| R \|
				2887	* \| / \|
				2888	* F VL \|
				2889	* <------U \|
				2890	* back \|S \|
				2891	* \______________/
				2892	*/
				2893
				2894
				2895	void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				2896	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				2897	int choice_count = alternatives_->length();
				2898	#ifdef DEBUG
				2899	for (int i = 0; i < choice_count - 1; i++) {
				2900	GuardedAlternative alternative = alternatives_->at(i);
				2901	ZoneList<Guard> guards = alternative.guards();
				2902	int guard_count = (guards == NULL) ? 0 : guards->length();
				2903	for (int j = 0; j < guard_count; j++) {
				2904	ASSERT(!trace->mentions_reg(guards->at(j)->reg()));
				2905	}
				2906	}
				2907	#endif
				2908
				2909	LimitResult limit_result = LimitVersions(compiler, trace);
				2910	if (limit_result == DONE) return;
				2911	ASSERT(limit_result == CONTINUE);
				2912
				2913	int new_flush_budget = trace->flush_budget() / choice_count;
				2914	if (trace->flush_budget() == 0 && trace->actions() != NULL) {
				2915	trace->Flush(compiler, this);
				2916	return;
				2917	}
				2918
				2919	RecursionCheck rc(compiler);
				2920
				2921	Trace* current_trace = trace;
				2922
				2923	int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
				2924	bool greedy_loop = false;
				2925	Label greedy_loop_label;
				2926	Trace counter_backtrack_trace;
				2927	counter_backtrack_trace.set_backtrack(&greedy_loop_label);
				2928	if (not_at_start()) counter_backtrack_trace.set_at_start(false);
				2929
				2930	if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
				2931	// Here we have special handling for greedy loops containing only text nodes
				2932	// and other simple nodes. These are handled by pushing the current
				2933	// position on the stack and then incrementing the current position each
				2934	// time around the switch. On backtrack we decrement the current position
				2935	// and check it against the pushed value. This avoids pushing backtrack
				2936	// information for each iteration of the loop, which could take up a lot of
				2937	// space.
				2938	greedy_loop = true;
				2939	ASSERT(trace->stop_node() == NULL);
				2940	macro_assembler->PushCurrentPosition();
				2941	current_trace = &counter_backtrack_trace;
				2942	Label greedy_match_failed;
				2943	Trace greedy_match_trace;
				2944	if (not_at_start()) greedy_match_trace.set_at_start(false);
				2945	greedy_match_trace.set_backtrack(&greedy_match_failed);
				2946	Label loop_label;
				2947	macro_assembler->Bind(&loop_label);
				2948	greedy_match_trace.set_stop_node(this);
				2949	greedy_match_trace.set_loop_label(&loop_label);
				2950	alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
				2951	macro_assembler->Bind(&greedy_match_failed);
				2952	}
				2953
				2954	Label second_choice; // For use in greedy matches.
				2955	macro_assembler->Bind(&second_choice);
				2956
				2957	int first_normal_choice = greedy_loop ? 1 : 0;
				2958
Ben Murdoch	b0fe162	2011-05-05 13:52:32 +0100	[diff] [blame]	2959	int preload_characters =
				2960	CalculatePreloadCharacters(compiler,
				2961	current_trace->at_start() == Trace::FALSE);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	2962	bool preload_is_current =
				2963	(current_trace->characters_preloaded() == preload_characters);
				2964	bool preload_has_checked_bounds = preload_is_current;
				2965
				2966	AlternativeGenerationList alt_gens(choice_count);
				2967
				2968	// For now we just call all choices one after the other. The idea ultimately
				2969	// is to use the Dispatch table to try only the relevant ones.
				2970	for (int i = first_normal_choice; i < choice_count; i++) {
				2971	GuardedAlternative alternative = alternatives_->at(i);
				2972	AlternativeGeneration* alt_gen = alt_gens.at(i);
				2973	alt_gen->quick_check_details.set_characters(preload_characters);
				2974	ZoneList<Guard> guards = alternative.guards();
				2975	int guard_count = (guards == NULL) ? 0 : guards->length();
				2976	Trace new_trace(*current_trace);
				2977	new_trace.set_characters_preloaded(preload_is_current ?
				2978	preload_characters :
				2979	0);
				2980	if (preload_has_checked_bounds) {
				2981	new_trace.set_bound_checked_up_to(preload_characters);
				2982	}
				2983	new_trace.quick_check_performed()->Clear();
				2984	if (not_at_start_) new_trace.set_at_start(Trace::FALSE);
				2985	alt_gen->expects_preload = preload_is_current;
				2986	bool generate_full_check_inline = false;
				2987	if (FLAG_regexp_optimization &&
				2988	try_to_emit_quick_check_for_alternative(i) &&
				2989	alternative.node()->EmitQuickCheck(compiler,
				2990	&new_trace,
				2991	preload_has_checked_bounds,
				2992	&alt_gen->possible_success,
				2993	&alt_gen->quick_check_details,
				2994	i < choice_count - 1)) {
				2995	// Quick check was generated for this choice.
				2996	preload_is_current = true;
				2997	preload_has_checked_bounds = true;
				2998	// On the last choice in the ChoiceNode we generated the quick
				2999	// check to fall through on possible success. So now we need to
				3000	// generate the full check inline.
				3001	if (i == choice_count - 1) {
				3002	macro_assembler->Bind(&alt_gen->possible_success);
				3003	new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
				3004	new_trace.set_characters_preloaded(preload_characters);
				3005	new_trace.set_bound_checked_up_to(preload_characters);
				3006	generate_full_check_inline = true;
				3007	}
				3008	} else if (alt_gen->quick_check_details.cannot_match()) {
				3009	if (i == choice_count - 1 && !greedy_loop) {
				3010	macro_assembler->GoTo(trace->backtrack());
				3011	}
				3012	continue;
				3013	} else {
				3014	// No quick check was generated. Put the full code here.
				3015	// If this is not the first choice then there could be slow checks from
				3016	// previous cases that go here when they fail. There's no reason to
				3017	// insist that they preload characters since the slow check we are about
				3018	// to generate probably can't use it.
				3019	if (i != first_normal_choice) {
				3020	alt_gen->expects_preload = false;
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	3021	new_trace.InvalidateCurrentCharacter();
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	3022	}
				3023	if (i < choice_count - 1) {
				3024	new_trace.set_backtrack(&alt_gen->after);
				3025	}
				3026	generate_full_check_inline = true;
				3027	}
				3028	if (generate_full_check_inline) {
				3029	if (new_trace.actions() != NULL) {
				3030	new_trace.set_flush_budget(new_flush_budget);
				3031	}
				3032	for (int j = 0; j < guard_count; j++) {
				3033	GenerateGuard(macro_assembler, guards->at(j), &new_trace);
				3034	}
				3035	alternative.node()->Emit(compiler, &new_trace);
				3036	preload_is_current = false;
				3037	}
				3038	macro_assembler->Bind(&alt_gen->after);
				3039	}
				3040	if (greedy_loop) {
				3041	macro_assembler->Bind(&greedy_loop_label);
				3042	// If we have unwound to the bottom then backtrack.
				3043	macro_assembler->CheckGreedyLoop(trace->backtrack());
				3044	// Otherwise try the second priority at an earlier position.
				3045	macro_assembler->AdvanceCurrentPosition(-text_length);
				3046	macro_assembler->GoTo(&second_choice);
				3047	}
				3048
				3049	// At this point we need to generate slow checks for the alternatives where
				3050	// the quick check was inlined. We can recognize these because the associated
				3051	// label was bound.
				3052	for (int i = first_normal_choice; i < choice_count - 1; i++) {
				3053	AlternativeGeneration* alt_gen = alt_gens.at(i);
				3054	Trace new_trace(*current_trace);
				3055	// If there are actions to be flushed we have to limit how many times
				3056	// they are flushed. Take the budget of the parent trace and distribute
				3057	// it fairly amongst the children.
				3058	if (new_trace.actions() != NULL) {
				3059	new_trace.set_flush_budget(new_flush_budget);
				3060	}
				3061	EmitOutOfLineContinuation(compiler,
				3062	&new_trace,
				3063	alternatives_->at(i),
				3064	alt_gen,
				3065	preload_characters,
				3066	alt_gens.at(i + 1)->expects_preload);
				3067	}
				3068	}
				3069
				3070
				3071	void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
				3072	Trace* trace,
				3073	GuardedAlternative alternative,
				3074	AlternativeGeneration* alt_gen,
				3075	int preload_characters,
				3076	bool next_expects_preload) {
				3077	if (!alt_gen->possible_success.is_linked()) return;
				3078
				3079	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				3080	macro_assembler->Bind(&alt_gen->possible_success);
				3081	Trace out_of_line_trace(*trace);
				3082	out_of_line_trace.set_characters_preloaded(preload_characters);
				3083	out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
				3084	if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE);
				3085	ZoneList<Guard> guards = alternative.guards();
				3086	int guard_count = (guards == NULL) ? 0 : guards->length();
				3087	if (next_expects_preload) {
				3088	Label reload_current_char;
				3089	out_of_line_trace.set_backtrack(&reload_current_char);
				3090	for (int j = 0; j < guard_count; j++) {
				3091	GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
				3092	}
				3093	alternative.node()->Emit(compiler, &out_of_line_trace);
				3094	macro_assembler->Bind(&reload_current_char);
				3095	// Reload the current character, since the next quick check expects that.
				3096	// We don't need to check bounds here because we only get into this
				3097	// code through a quick check which already did the checked load.
				3098	macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
				3099	NULL,
				3100	false,
				3101	preload_characters);
				3102	macro_assembler->GoTo(&(alt_gen->after));
				3103	} else {
				3104	out_of_line_trace.set_backtrack(&(alt_gen->after));
				3105	for (int j = 0; j < guard_count; j++) {
				3106	GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
				3107	}
				3108	alternative.node()->Emit(compiler, &out_of_line_trace);
				3109	}
				3110	}
				3111
				3112
				3113	void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				3114	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				3115	LimitResult limit_result = LimitVersions(compiler, trace);
				3116	if (limit_result == DONE) return;
				3117	ASSERT(limit_result == CONTINUE);
				3118
				3119	RecursionCheck rc(compiler);
				3120
				3121	switch (type_) {
				3122	case STORE_POSITION: {
				3123	Trace::DeferredCapture
				3124	new_capture(data_.u_position_register.reg,
				3125	data_.u_position_register.is_capture,
				3126	trace);
				3127	Trace new_trace = *trace;
				3128	new_trace.add_action(&new_capture);
				3129	on_success()->Emit(compiler, &new_trace);
				3130	break;
				3131	}
				3132	case INCREMENT_REGISTER: {
				3133	Trace::DeferredIncrementRegister
				3134	new_increment(data_.u_increment_register.reg);
				3135	Trace new_trace = *trace;
				3136	new_trace.add_action(&new_increment);
				3137	on_success()->Emit(compiler, &new_trace);
				3138	break;
				3139	}
				3140	case SET_REGISTER: {
				3141	Trace::DeferredSetRegister
				3142	new_set(data_.u_store_register.reg, data_.u_store_register.value);
				3143	Trace new_trace = *trace;
				3144	new_trace.add_action(&new_set);
				3145	on_success()->Emit(compiler, &new_trace);
				3146	break;
				3147	}
				3148	case CLEAR_CAPTURES: {
				3149	Trace::DeferredClearCaptures
				3150	new_capture(Interval(data_.u_clear_captures.range_from,
				3151	data_.u_clear_captures.range_to));
				3152	Trace new_trace = *trace;
				3153	new_trace.add_action(&new_capture);
				3154	on_success()->Emit(compiler, &new_trace);
				3155	break;
				3156	}
				3157	case BEGIN_SUBMATCH:
				3158	if (!trace->is_trivial()) {
				3159	trace->Flush(compiler, this);
				3160	} else {
				3161	assembler->WriteCurrentPositionToRegister(
				3162	data_.u_submatch.current_position_register, 0);
				3163	assembler->WriteStackPointerToRegister(
				3164	data_.u_submatch.stack_pointer_register);
				3165	on_success()->Emit(compiler, trace);
				3166	}
				3167	break;
				3168	case EMPTY_MATCH_CHECK: {
				3169	int start_pos_reg = data_.u_empty_match_check.start_register;
				3170	int stored_pos = 0;
				3171	int rep_reg = data_.u_empty_match_check.repetition_register;
				3172	bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
				3173	bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
				3174	if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
				3175	// If we know we haven't advanced and there is no minimum we
				3176	// can just backtrack immediately.
				3177	assembler->GoTo(trace->backtrack());
				3178	} else if (know_dist && stored_pos < trace->cp_offset()) {
				3179	// If we know we've advanced we can generate the continuation
				3180	// immediately.
				3181	on_success()->Emit(compiler, trace);
				3182	} else if (!trace->is_trivial()) {
				3183	trace->Flush(compiler, this);
				3184	} else {
				3185	Label skip_empty_check;
				3186	// If we have a minimum number of repetitions we check the current
				3187	// number first and skip the empty check if it's not enough.
				3188	if (has_minimum) {
				3189	int limit = data_.u_empty_match_check.repetition_limit;
				3190	assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
				3191	}
				3192	// If the match is empty we bail out, otherwise we fall through
				3193	// to the on-success continuation.
				3194	assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
				3195	trace->backtrack());
				3196	assembler->Bind(&skip_empty_check);
				3197	on_success()->Emit(compiler, trace);
				3198	}
				3199	break;
				3200	}
				3201	case POSITIVE_SUBMATCH_SUCCESS: {
				3202	if (!trace->is_trivial()) {
				3203	trace->Flush(compiler, this);
				3204	return;
				3205	}
				3206	assembler->ReadCurrentPositionFromRegister(
				3207	data_.u_submatch.current_position_register);
				3208	assembler->ReadStackPointerFromRegister(
				3209	data_.u_submatch.stack_pointer_register);
				3210	int clear_register_count = data_.u_submatch.clear_register_count;
				3211	if (clear_register_count == 0) {
				3212	on_success()->Emit(compiler, trace);
				3213	return;
				3214	}
				3215	int clear_registers_from = data_.u_submatch.clear_register_from;
				3216	Label clear_registers_backtrack;
				3217	Trace new_trace = *trace;
				3218	new_trace.set_backtrack(&clear_registers_backtrack);
				3219	on_success()->Emit(compiler, &new_trace);
				3220
				3221	assembler->Bind(&clear_registers_backtrack);
				3222	int clear_registers_to = clear_registers_from + clear_register_count - 1;
				3223	assembler->ClearRegisters(clear_registers_from, clear_registers_to);
				3224
				3225	ASSERT(trace->backtrack() == NULL);
				3226	assembler->Backtrack();
				3227	return;
				3228	}
				3229	default:
				3230	UNREACHABLE();
				3231	}
				3232	}
				3233
				3234
				3235	void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				3236	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				3237	if (!trace->is_trivial()) {
				3238	trace->Flush(compiler, this);
				3239	return;
				3240	}
				3241
				3242	LimitResult limit_result = LimitVersions(compiler, trace);
				3243	if (limit_result == DONE) return;
				3244	ASSERT(limit_result == CONTINUE);
				3245
				3246	RecursionCheck rc(compiler);
				3247
				3248	ASSERT_EQ(start_reg_ + 1, end_reg_);
				3249	if (compiler->ignore_case()) {
				3250	assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
				3251	trace->backtrack());
				3252	} else {
				3253	assembler->CheckNotBackReference(start_reg_, trace->backtrack());
				3254	}
				3255	on_success()->Emit(compiler, trace);
				3256	}
				3257
				3258
				3259	// -------------------------------------------------------------------
				3260	// Dot/dotty output
				3261
				3262
				3263	#ifdef DEBUG
				3264
				3265
				3266	class DotPrinter: public NodeVisitor {
				3267	public:
				3268	explicit DotPrinter(bool ignore_case)
				3269	: ignore_case_(ignore_case),
				3270	stream_(&alloc_) { }
				3271	void PrintNode(const char* label, RegExpNode* node);
				3272	void Visit(RegExpNode* node);
				3273	void PrintAttributes(RegExpNode* from);
				3274	StringStream* stream() { return &stream_; }
				3275	void PrintOnFailure(RegExpNode* from, RegExpNode* to);
				3276	#define DECLARE_VISIT(Type) \
				3277	virtual void Visit##Type(Type##Node* that);
				3278	FOR_EACH_NODE_TYPE(DECLARE_VISIT)
				3279	#undef DECLARE_VISIT
				3280	private:
				3281	bool ignore_case_;
				3282	HeapStringAllocator alloc_;
				3283	StringStream stream_;
				3284	};
				3285
				3286
				3287	void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
				3288	stream()->Add("digraph G {\n graph [label=\"");
				3289	for (int i = 0; label[i]; i++) {
				3290	switch (label[i]) {
				3291	case '\\':
				3292	stream()->Add("\\\\");
				3293	break;
				3294	case '"':
				3295	stream()->Add("\"");
				3296	break;
				3297	default:
				3298	stream()->Put(label[i]);
				3299	break;
				3300	}
				3301	}
				3302	stream()->Add("\"];\n");
				3303	Visit(node);
				3304	stream()->Add("}\n");
				3305	printf("%s", *(stream()->ToCString()));
				3306	}
				3307
				3308
				3309	void DotPrinter::Visit(RegExpNode* node) {
				3310	if (node->info()->visited) return;
				3311	node->info()->visited = true;
				3312	node->Accept(this);
				3313	}
				3314
				3315
				3316	void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
				3317	stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
				3318	Visit(on_failure);
				3319	}
				3320
				3321
				3322	class TableEntryBodyPrinter {
				3323	public:
				3324	TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)
				3325	: stream_(stream), choice_(choice) { }
				3326	void Call(uc16 from, DispatchTable::Entry entry) {
				3327	OutSet* out_set = entry.out_set();
				3328	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				3329	if (out_set->Get(i)) {
				3330	stream()->Add(" n%p:s%io%i -> n%p;\n",
				3331	choice(),
				3332	from,
				3333	i,
				3334	choice()->alternatives()->at(i).node());
				3335	}
				3336	}
				3337	}
				3338	private:
				3339	StringStream* stream() { return stream_; }
				3340	ChoiceNode* choice() { return choice_; }
				3341	StringStream* stream_;
				3342	ChoiceNode* choice_;
				3343	};
				3344
				3345
				3346	class TableEntryHeaderPrinter {
				3347	public:
				3348	explicit TableEntryHeaderPrinter(StringStream* stream)
				3349	: first_(true), stream_(stream) { }
				3350	void Call(uc16 from, DispatchTable::Entry entry) {
				3351	if (first_) {
				3352	first_ = false;
				3353	} else {
				3354	stream()->Add("\|");
				3355	}
				3356	stream()->Add("{\\%k-\\%k\|{", from, entry.to());
				3357	OutSet* out_set = entry.out_set();
				3358	int priority = 0;
				3359	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				3360	if (out_set->Get(i)) {
				3361	if (priority > 0) stream()->Add("\|");
				3362	stream()->Add("<s%io%i> %i", from, i, priority);
				3363	priority++;
				3364	}
				3365	}
				3366	stream()->Add("}}");
				3367	}
				3368	private:
				3369	bool first_;
				3370	StringStream* stream() { return stream_; }
				3371	StringStream* stream_;
				3372	};
				3373
				3374
				3375	class AttributePrinter {
				3376	public:
				3377	explicit AttributePrinter(DotPrinter* out)
				3378	: out_(out), first_(true) { }
				3379	void PrintSeparator() {
				3380	if (first_) {
				3381	first_ = false;
				3382	} else {
				3383	out_->stream()->Add("\|");
				3384	}
				3385	}
				3386	void PrintBit(const char* name, bool value) {
				3387	if (!value) return;
				3388	PrintSeparator();
				3389	out_->stream()->Add("{%s}", name);
				3390	}
				3391	void PrintPositive(const char* name, int value) {
				3392	if (value < 0) return;
				3393	PrintSeparator();
				3394	out_->stream()->Add("{%s\|%x}", name, value);
				3395	}
				3396	private:
				3397	DotPrinter* out_;
				3398	bool first_;
				3399	};
				3400
				3401
				3402	void DotPrinter::PrintAttributes(RegExpNode* that) {
				3403	stream()->Add(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
				3404	"margin=0.1, fontsize=10, label=\"{",
				3405	that);
				3406	AttributePrinter printer(this);
				3407	NodeInfo* info = that->info();
				3408	printer.PrintBit("NI", info->follows_newline_interest);
				3409	printer.PrintBit("WI", info->follows_word_interest);
				3410	printer.PrintBit("SI", info->follows_start_interest);
				3411	Label* label = that->label();
				3412	if (label->is_bound())
				3413	printer.PrintPositive("@", label->pos());
				3414	stream()->Add("}\"];\n");
				3415	stream()->Add(" a%p -> n%p [style=dashed, color=grey, "
				3416	"arrowhead=none];\n", that, that);
				3417	}
				3418
				3419
				3420	static const bool kPrintDispatchTable = false;
				3421	void DotPrinter::VisitChoice(ChoiceNode* that) {
				3422	if (kPrintDispatchTable) {
				3423	stream()->Add(" n%p [shape=Mrecord, label=\"", that);
				3424	TableEntryHeaderPrinter header_printer(stream());
				3425	that->GetTable(ignore_case_)->ForEach(&header_printer);
				3426	stream()->Add("\"]\n", that);
				3427	PrintAttributes(that);
				3428	TableEntryBodyPrinter body_printer(stream(), that);
				3429	that->GetTable(ignore_case_)->ForEach(&body_printer);
				3430	} else {
				3431	stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);
				3432	for (int i = 0; i < that->alternatives()->length(); i++) {
				3433	GuardedAlternative alt = that->alternatives()->at(i);
				3434	stream()->Add(" n%p -> n%p;\n", that, alt.node());
				3435	}
				3436	}
				3437	for (int i = 0; i < that->alternatives()->length(); i++) {
				3438	GuardedAlternative alt = that->alternatives()->at(i);
				3439	alt.node()->Accept(this);
				3440	}
				3441	}
				3442
				3443
				3444	void DotPrinter::VisitText(TextNode* that) {
				3445	stream()->Add(" n%p [label=\"", that);
				3446	for (int i = 0; i < that->elements()->length(); i++) {
				3447	if (i > 0) stream()->Add(" ");
				3448	TextElement elm = that->elements()->at(i);
				3449	switch (elm.type) {
				3450	case TextElement::ATOM: {
				3451	stream()->Add("'%w'", elm.data.u_atom->data());
				3452	break;
				3453	}
				3454	case TextElement::CHAR_CLASS: {
				3455	RegExpCharacterClass* node = elm.data.u_char_class;
				3456	stream()->Add("[");
				3457	if (node->is_negated())
				3458	stream()->Add("^");
				3459	for (int j = 0; j < node->ranges()->length(); j++) {
				3460	CharacterRange range = node->ranges()->at(j);
				3461	stream()->Add("%k-%k", range.from(), range.to());
				3462	}
				3463	stream()->Add("]");
				3464	break;
				3465	}
				3466	default:
				3467	UNREACHABLE();
				3468	}
				3469	}
				3470	stream()->Add("\", shape=box, peripheries=2];\n");
				3471	PrintAttributes(that);
				3472	stream()->Add(" n%p -> n%p;\n", that, that->on_success());
				3473	Visit(that->on_success());
				3474	}
				3475
				3476
				3477	void DotPrinter::VisitBackReference(BackReferenceNode* that) {
				3478	stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
				3479	that,
				3480	that->start_register(),
				3481	that->end_register());
				3482	PrintAttributes(that);
				3483	stream()->Add(" n%p -> n%p;\n", that, that->on_success());
				3484	Visit(that->on_success());
				3485	}
				3486
				3487
				3488	void DotPrinter::VisitEnd(EndNode* that) {
				3489	stream()->Add(" n%p [style=bold, shape=point];\n", that);
				3490	PrintAttributes(that);
				3491	}
				3492
				3493
				3494	void DotPrinter::VisitAssertion(AssertionNode* that) {
				3495	stream()->Add(" n%p [", that);
				3496	switch (that->type()) {
				3497	case AssertionNode::AT_END:
				3498	stream()->Add("label=\"$\", shape=septagon");
				3499	break;
				3500	case AssertionNode::AT_START:
				3501	stream()->Add("label=\"^\", shape=septagon");
				3502	break;
				3503	case AssertionNode::AT_BOUNDARY:
				3504	stream()->Add("label=\"\\b\", shape=septagon");
				3505	break;
				3506	case AssertionNode::AT_NON_BOUNDARY:
				3507	stream()->Add("label=\"\\B\", shape=septagon");
				3508	break;
				3509	case AssertionNode::AFTER_NEWLINE:
				3510	stream()->Add("label=\"(?<=\\n)\", shape=septagon");
				3511	break;
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	3512	case AssertionNode::AFTER_WORD_CHARACTER:
				3513	stream()->Add("label=\"(?<=\\w)\", shape=septagon");
				3514	break;
				3515	case AssertionNode::AFTER_NONWORD_CHARACTER:
				3516	stream()->Add("label=\"(?<=\\W)\", shape=septagon");
				3517	break;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	3518	}
				3519	stream()->Add("];\n");
				3520	PrintAttributes(that);
				3521	RegExpNode* successor = that->on_success();
				3522	stream()->Add(" n%p -> n%p;\n", that, successor);
				3523	Visit(successor);
				3524	}
				3525
				3526
				3527	void DotPrinter::VisitAction(ActionNode* that) {
				3528	stream()->Add(" n%p [", that);
				3529	switch (that->type_) {
				3530	case ActionNode::SET_REGISTER:
				3531	stream()->Add("label=\"$%i:=%i\", shape=octagon",
				3532	that->data_.u_store_register.reg,
				3533	that->data_.u_store_register.value);
				3534	break;
				3535	case ActionNode::INCREMENT_REGISTER:
				3536	stream()->Add("label=\"$%i++\", shape=octagon",
				3537	that->data_.u_increment_register.reg);
				3538	break;
				3539	case ActionNode::STORE_POSITION:
				3540	stream()->Add("label=\"$%i:=$pos\", shape=octagon",
				3541	that->data_.u_position_register.reg);
				3542	break;
				3543	case ActionNode::BEGIN_SUBMATCH:
				3544	stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
				3545	that->data_.u_submatch.current_position_register);
				3546	break;
				3547	case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
				3548	stream()->Add("label=\"escape\", shape=septagon");
				3549	break;
				3550	case ActionNode::EMPTY_MATCH_CHECK:
				3551	stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
				3552	that->data_.u_empty_match_check.start_register,
				3553	that->data_.u_empty_match_check.repetition_register,
				3554	that->data_.u_empty_match_check.repetition_limit);
				3555	break;
				3556	case ActionNode::CLEAR_CAPTURES: {
				3557	stream()->Add("label=\"clear $%i to $%i\", shape=septagon",
				3558	that->data_.u_clear_captures.range_from,
				3559	that->data_.u_clear_captures.range_to);
				3560	break;
				3561	}
				3562	}
				3563	stream()->Add("];\n");
				3564	PrintAttributes(that);
				3565	RegExpNode* successor = that->on_success();
				3566	stream()->Add(" n%p -> n%p;\n", that, successor);
				3567	Visit(successor);
				3568	}
				3569
				3570
				3571	class DispatchTableDumper {
				3572	public:
				3573	explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
				3574	void Call(uc16 key, DispatchTable::Entry entry);
				3575	StringStream* stream() { return stream_; }
				3576	private:
				3577	StringStream* stream_;
				3578	};
				3579
				3580
				3581	void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
				3582	stream()->Add("[%k-%k]: {", key, entry.to());
				3583	OutSet* set = entry.out_set();
				3584	bool first = true;
				3585	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				3586	if (set->Get(i)) {
				3587	if (first) {
				3588	first = false;
				3589	} else {
				3590	stream()->Add(", ");
				3591	}
				3592	stream()->Add("%i", i);
				3593	}
				3594	}
				3595	stream()->Add("}\n");
				3596	}
				3597
				3598
				3599	void DispatchTable::Dump() {
				3600	HeapStringAllocator alloc;
				3601	StringStream stream(&alloc);
				3602	DispatchTableDumper dumper(&stream);
				3603	tree()->ForEach(&dumper);
				3604	OS::PrintError("%s", *stream.ToCString());
				3605	}
				3606
				3607
				3608	void RegExpEngine::DotPrint(const char* label,
				3609	RegExpNode* node,
				3610	bool ignore_case) {
				3611	DotPrinter printer(ignore_case);
				3612	printer.PrintNode(label, node);
				3613	}
				3614
				3615
				3616	#endif // DEBUG
				3617
				3618
				3619	// -------------------------------------------------------------------
				3620	// Tree to graph conversion
				3621
				3622	static const int kSpaceRangeCount = 20;
				3623	static const int kSpaceRangeAsciiCount = 4;
				3624	static const uc16 kSpaceRanges[kSpaceRangeCount] = { 0x0009, 0x000D, 0x0020,
				3625	0x0020, 0x00A0, 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A,
				3626	0x2028, 0x2029, 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 };
				3627
				3628	static const int kWordRangeCount = 8;
				3629	static const uc16 kWordRanges[kWordRangeCount] = { '0', '9', 'A', 'Z', '_',
				3630	'_', 'a', 'z' };
				3631
				3632	static const int kDigitRangeCount = 2;
				3633	static const uc16 kDigitRanges[kDigitRangeCount] = { '0', '9' };
				3634
				3635	static const int kLineTerminatorRangeCount = 6;
				3636	static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { 0x000A,
				3637	0x000A, 0x000D, 0x000D, 0x2028, 0x2029 };
				3638
				3639	RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
				3640	RegExpNode* on_success) {
				3641	ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
				3642	elms->Add(TextElement::Atom(this));
				3643	return new TextNode(elms, on_success);
				3644	}
				3645
				3646
				3647	RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
				3648	RegExpNode* on_success) {
				3649	return new TextNode(elements(), on_success);
				3650	}
				3651
				3652	static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
				3653	const uc16* special_class,
				3654	int length) {
				3655	ASSERT(ranges->length() != 0);
				3656	ASSERT(length != 0);
				3657	ASSERT(special_class[0] != 0);
				3658	if (ranges->length() != (length >> 1) + 1) {
				3659	return false;
				3660	}
				3661	CharacterRange range = ranges->at(0);
				3662	if (range.from() != 0) {
				3663	return false;
				3664	}
				3665	for (int i = 0; i < length; i += 2) {
				3666	if (special_class[i] != (range.to() + 1)) {
				3667	return false;
				3668	}
				3669	range = ranges->at((i >> 1) + 1);
				3670	if (special_class[i+1] != range.from() - 1) {
				3671	return false;
				3672	}
				3673	}
				3674	if (range.to() != 0xffff) {
				3675	return false;
				3676	}
				3677	return true;
				3678	}
				3679
				3680
				3681	static bool CompareRanges(ZoneList<CharacterRange>* ranges,
				3682	const uc16* special_class,
				3683	int length) {
				3684	if (ranges->length() * 2 != length) {
				3685	return false;
				3686	}
				3687	for (int i = 0; i < length; i += 2) {
				3688	CharacterRange range = ranges->at(i >> 1);
				3689	if (range.from() != special_class[i] \|\| range.to() != special_class[i+1]) {
				3690	return false;
				3691	}
				3692	}
				3693	return true;
				3694	}
				3695
				3696
				3697	bool RegExpCharacterClass::is_standard() {
				3698	// TODO(lrn): Remove need for this function, by not throwing away information
				3699	// along the way.
				3700	if (is_negated_) {
				3701	return false;
				3702	}
				3703	if (set_.is_standard()) {
				3704	return true;
				3705	}
				3706	if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
				3707	set_.set_standard_set_type('s');
				3708	return true;
				3709	}
				3710	if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
				3711	set_.set_standard_set_type('S');
				3712	return true;
				3713	}
				3714	if (CompareInverseRanges(set_.ranges(),
				3715	kLineTerminatorRanges,
				3716	kLineTerminatorRangeCount)) {
				3717	set_.set_standard_set_type('.');
				3718	return true;
				3719	}
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	3720	if (CompareRanges(set_.ranges(),
				3721	kLineTerminatorRanges,
				3722	kLineTerminatorRangeCount)) {
				3723	set_.set_standard_set_type('n');
				3724	return true;
				3725	}
				3726	if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {
				3727	set_.set_standard_set_type('w');
				3728	return true;
				3729	}
				3730	if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {
				3731	set_.set_standard_set_type('W');
				3732	return true;
				3733	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	3734	return false;
				3735	}
				3736
				3737
				3738	RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
				3739	RegExpNode* on_success) {
				3740	return new TextNode(this, on_success);
				3741	}
				3742
				3743
				3744	RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
				3745	RegExpNode* on_success) {
				3746	ZoneList<RegExpTree> alternatives = this->alternatives();
				3747	int length = alternatives->length();
				3748	ChoiceNode* result = new ChoiceNode(length);
				3749	for (int i = 0; i < length; i++) {
				3750	GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
				3751	on_success));
				3752	result->AddAlternative(alternative);
				3753	}
				3754	return result;
				3755	}
				3756
				3757
				3758	RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
				3759	RegExpNode* on_success) {
				3760	return ToNode(min(),
				3761	max(),
				3762	is_greedy(),
				3763	body(),
				3764	compiler,
				3765	on_success);
				3766	}
				3767
				3768
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	3769	// Scoped object to keep track of how much we unroll quantifier loops in the
				3770	// regexp graph generator.
				3771	class RegExpExpansionLimiter {
				3772	public:
				3773	static const int kMaxExpansionFactor = 6;
				3774	RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
				3775	: compiler_(compiler),
				3776	saved_expansion_factor_(compiler->current_expansion_factor()),
				3777	ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
				3778	ASSERT(factor > 0);
				3779	if (ok_to_expand_) {
				3780	if (factor > kMaxExpansionFactor) {
				3781	// Avoid integer overflow of the current expansion factor.
				3782	ok_to_expand_ = false;
				3783	compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
				3784	} else {
				3785	int new_factor = saved_expansion_factor_ * factor;
				3786	ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
				3787	compiler->set_current_expansion_factor(new_factor);
				3788	}
				3789	}
				3790	}
				3791
				3792	~RegExpExpansionLimiter() {
				3793	compiler_->set_current_expansion_factor(saved_expansion_factor_);
				3794	}
				3795
				3796	bool ok_to_expand() { return ok_to_expand_; }
				3797
				3798	private:
				3799	RegExpCompiler* compiler_;
				3800	int saved_expansion_factor_;
				3801	bool ok_to_expand_;
				3802
				3803	DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
				3804	};
				3805
				3806
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	3807	RegExpNode* RegExpQuantifier::ToNode(int min,
				3808	int max,
				3809	bool is_greedy,
				3810	RegExpTree* body,
				3811	RegExpCompiler* compiler,
				3812	RegExpNode* on_success,
				3813	bool not_at_start) {
				3814	// x{f, t} becomes this:
				3815	//
				3816	// (r++)<-.
				3817	// \| `
				3818	// \| (x)
				3819	// v ^
				3820	// (r=0)-->(?)---/ [if r < t]
				3821	// \|
				3822	// [if r >= f] \----> ...
				3823	//
				3824
				3825	// 15.10.2.5 RepeatMatcher algorithm.
				3826	// The parser has already eliminated the case where max is 0. In the case
				3827	// where max_match is zero the parser has removed the quantifier if min was
				3828	// > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
				3829
				3830	// If we know that we cannot match zero length then things are a little
				3831	// simpler since we don't need to make the special zero length match check
				3832	// from step 2.1. If the min and max are small we can unroll a little in
				3833	// this case.
				3834	static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
				3835	static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
				3836	if (max == 0) return on_success; // This can happen due to recursion.
				3837	bool body_can_be_empty = (body->min_match() == 0);
				3838	int body_start_reg = RegExpCompiler::kNoRegister;
				3839	Interval capture_registers = body->CaptureRegisters();
				3840	bool needs_capture_clearing = !capture_registers.is_empty();
				3841	if (body_can_be_empty) {
				3842	body_start_reg = compiler->AllocateRegister();
				3843	} else if (FLAG_regexp_optimization && !needs_capture_clearing) {
				3844	// Only unroll if there are no captures and the body can't be
				3845	// empty.
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	3846	{
				3847	RegExpExpansionLimiter limiter(
				3848	compiler, min + ((max != min) ? 1 : 0));
				3849	if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
				3850	int new_max = (max == kInfinity) ? max : max - min;
				3851	// Recurse once to get the loop or optional matches after the fixed
				3852	// ones.
				3853	RegExpNode* answer = ToNode(
				3854	0, new_max, is_greedy, body, compiler, on_success, true);
				3855	// Unroll the forced matches from 0 to min. This can cause chains of
				3856	// TextNodes (which the parser does not generate). These should be
				3857	// combined if it turns out they hinder good code generation.
				3858	for (int i = 0; i < min; i++) {
				3859	answer = body->ToNode(compiler, answer);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	3860	}
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	3861	return answer;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	3862	}
Ben Murdoch	257744e	2011-11-30 15:57:28 +0000	[diff] [blame^]	3863	}
				3864	if (max <= kMaxUnrolledMaxMatches && min == 0) {
				3865	ASSERT(max > 0); // Due to the 'if' above.
				3866	RegExpExpansionLimiter limiter(compiler, max);
				3867	if (limiter.ok_to_expand()) {
				3868	// Unroll the optional matches up to max.
				3869	RegExpNode* answer = on_success;
				3870	for (int i = 0; i < max; i++) {
				3871	ChoiceNode* alternation = new ChoiceNode(2);
				3872	if (is_greedy) {
				3873	alternation->AddAlternative(
				3874	GuardedAlternative(body->ToNode(compiler, answer)));
				3875	alternation->AddAlternative(GuardedAlternative(on_success));
				3876	} else {
				3877	alternation->AddAlternative(GuardedAlternative(on_success));
				3878	alternation->AddAlternative(
				3879	GuardedAlternative(body->ToNode(compiler, answer)));
				3880	}
				3881	answer = alternation;
				3882	if (not_at_start) alternation->set_not_at_start();
				3883	}
				3884	return answer;
				3885	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	3886	}
				3887	}
				3888	bool has_min = min > 0;
				3889	bool has_max = max < RegExpTree::kInfinity;
				3890	bool needs_counter = has_min \|\| has_max;
				3891	int reg_ctr = needs_counter
				3892	? compiler->AllocateRegister()
				3893	: RegExpCompiler::kNoRegister;
				3894	LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
				3895	if (not_at_start) center->set_not_at_start();
				3896	RegExpNode* loop_return = needs_counter
				3897	? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
				3898	: static_cast<RegExpNode*>(center);
				3899	if (body_can_be_empty) {
				3900	// If the body can be empty we need to check if it was and then
				3901	// backtrack.
				3902	loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
				3903	reg_ctr,
				3904	min,
				3905	loop_return);
				3906	}
				3907	RegExpNode* body_node = body->ToNode(compiler, loop_return);
				3908	if (body_can_be_empty) {
				3909	// If the body can be empty we need to store the start position
				3910	// so we can bail out if it was empty.
				3911	body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
				3912	}
				3913	if (needs_capture_clearing) {
				3914	// Before entering the body of this loop we need to clear captures.
				3915	body_node = ActionNode::ClearCaptures(capture_registers, body_node);
				3916	}
				3917	GuardedAlternative body_alt(body_node);
				3918	if (has_max) {
				3919	Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
				3920	body_alt.AddGuard(body_guard);
				3921	}
				3922	GuardedAlternative rest_alt(on_success);
				3923	if (has_min) {
				3924	Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
				3925	rest_alt.AddGuard(rest_guard);
				3926	}
				3927	if (is_greedy) {
				3928	center->AddLoopAlternative(body_alt);
				3929	center->AddContinueAlternative(rest_alt);
				3930	} else {
				3931	center->AddContinueAlternative(rest_alt);
				3932	center->AddLoopAlternative(body_alt);
				3933	}
				3934	if (needs_counter) {
				3935	return ActionNode::SetRegister(reg_ctr, 0, center);
				3936	} else {
				3937	return center;
				3938	}
				3939	}
				3940
				3941
				3942	RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
				3943	RegExpNode* on_success) {
				3944	NodeInfo info;
				3945	switch (type()) {
				3946	case START_OF_LINE:
				3947	return AssertionNode::AfterNewline(on_success);
				3948	case START_OF_INPUT:
				3949	return AssertionNode::AtStart(on_success);
				3950	case BOUNDARY:
				3951	return AssertionNode::AtBoundary(on_success);
				3952	case NON_BOUNDARY:
				3953	return AssertionNode::AtNonBoundary(on_success);
				3954	case END_OF_INPUT:
				3955	return AssertionNode::AtEnd(on_success);
				3956	case END_OF_LINE: {
				3957	// Compile $ in multiline regexps as an alternation with a positive
				3958	// lookahead in one side and an end-of-input on the other side.
				3959	// We need two registers for the lookahead.
				3960	int stack_pointer_register = compiler->AllocateRegister();
				3961	int position_register = compiler->AllocateRegister();
				3962	// The ChoiceNode to distinguish between a newline and end-of-input.
				3963	ChoiceNode* result = new ChoiceNode(2);
				3964	// Create a newline atom.
				3965	ZoneList<CharacterRange>* newline_ranges =
				3966	new ZoneList<CharacterRange>(3);
				3967	CharacterRange::AddClassEscape('n', newline_ranges);
				3968	RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');
				3969	TextNode* newline_matcher = new TextNode(
				3970	newline_atom,
				3971	ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
				3972	position_register,
				3973	0, // No captures inside.
				3974	-1, // Ignored if no captures.
				3975	on_success));
				3976	// Create an end-of-input matcher.
				3977	RegExpNode* end_of_line = ActionNode::BeginSubmatch(
				3978	stack_pointer_register,
				3979	position_register,
				3980	newline_matcher);
				3981	// Add the two alternatives to the ChoiceNode.
				3982	GuardedAlternative eol_alternative(end_of_line);
				3983	result->AddAlternative(eol_alternative);
				3984	GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
				3985	result->AddAlternative(end_alternative);
				3986	return result;
				3987	}
				3988	default:
				3989	UNREACHABLE();
				3990	}
				3991	return on_success;
				3992	}
				3993
				3994
				3995	RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
				3996	RegExpNode* on_success) {
				3997	return new BackReferenceNode(RegExpCapture::StartRegister(index()),
				3998	RegExpCapture::EndRegister(index()),
				3999	on_success);
				4000	}
				4001
				4002
				4003	RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
				4004	RegExpNode* on_success) {
				4005	return on_success;
				4006	}
				4007
				4008
				4009	RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
				4010	RegExpNode* on_success) {
				4011	int stack_pointer_register = compiler->AllocateRegister();
				4012	int position_register = compiler->AllocateRegister();
				4013
				4014	const int registers_per_capture = 2;
				4015	const int register_of_first_capture = 2;
				4016	int register_count = capture_count_ * registers_per_capture;
				4017	int register_start =
				4018	register_of_first_capture + capture_from_ * registers_per_capture;
				4019
				4020	RegExpNode* success;
				4021	if (is_positive()) {
				4022	RegExpNode* node = ActionNode::BeginSubmatch(
				4023	stack_pointer_register,
				4024	position_register,
				4025	body()->ToNode(
				4026	compiler,
				4027	ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
				4028	position_register,
				4029	register_count,
				4030	register_start,
				4031	on_success)));
				4032	return node;
				4033	} else {
				4034	// We use a ChoiceNode for a negative lookahead because it has most of
				4035	// the characteristics we need. It has the body of the lookahead as its
				4036	// first alternative and the expression after the lookahead of the second
				4037	// alternative. If the first alternative succeeds then the
				4038	// NegativeSubmatchSuccess will unwind the stack including everything the
				4039	// choice node set up and backtrack. If the first alternative fails then
				4040	// the second alternative is tried, which is exactly the desired result
				4041	// for a negative lookahead. The NegativeLookaheadChoiceNode is a special
				4042	// ChoiceNode that knows to ignore the first exit when calculating quick
				4043	// checks.
				4044	GuardedAlternative body_alt(
				4045	body()->ToNode(
				4046	compiler,
				4047	success = new NegativeSubmatchSuccess(stack_pointer_register,
				4048	position_register,
				4049	register_count,
				4050	register_start)));
				4051	ChoiceNode* choice_node =
				4052	new NegativeLookaheadChoiceNode(body_alt,
				4053	GuardedAlternative(on_success));
				4054	return ActionNode::BeginSubmatch(stack_pointer_register,
				4055	position_register,
				4056	choice_node);
				4057	}
				4058	}
				4059
				4060
				4061	RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
				4062	RegExpNode* on_success) {
				4063	return ToNode(body(), index(), compiler, on_success);
				4064	}
				4065
				4066
				4067	RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
				4068	int index,
				4069	RegExpCompiler* compiler,
				4070	RegExpNode* on_success) {
				4071	int start_reg = RegExpCapture::StartRegister(index);
				4072	int end_reg = RegExpCapture::EndRegister(index);
				4073	RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
				4074	RegExpNode* body_node = body->ToNode(compiler, store_end);
				4075	return ActionNode::StorePosition(start_reg, true, body_node);
				4076	}
				4077
				4078
				4079	RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
				4080	RegExpNode* on_success) {
				4081	ZoneList<RegExpTree> children = nodes();
				4082	RegExpNode* current = on_success;
				4083	for (int i = children->length() - 1; i >= 0; i--) {
				4084	current = children->at(i)->ToNode(compiler, current);
				4085	}
				4086	return current;
				4087	}
				4088
				4089
				4090	static void AddClass(const uc16* elmv,
				4091	int elmc,
				4092	ZoneList<CharacterRange>* ranges) {
				4093	for (int i = 0; i < elmc; i += 2) {
				4094	ASSERT(elmv[i] <= elmv[i + 1]);
				4095	ranges->Add(CharacterRange(elmv[i], elmv[i + 1]));
				4096	}
				4097	}
				4098
				4099
				4100	static void AddClassNegated(const uc16 *elmv,
				4101	int elmc,
				4102	ZoneList<CharacterRange>* ranges) {
				4103	ASSERT(elmv[0] != 0x0000);
				4104	ASSERT(elmv[elmc-1] != String::kMaxUC16CharCode);
				4105	uc16 last = 0x0000;
				4106	for (int i = 0; i < elmc; i += 2) {
				4107	ASSERT(last <= elmv[i] - 1);
				4108	ASSERT(elmv[i] <= elmv[i + 1]);
				4109	ranges->Add(CharacterRange(last, elmv[i] - 1));
				4110	last = elmv[i + 1] + 1;
				4111	}
				4112	ranges->Add(CharacterRange(last, String::kMaxUC16CharCode));
				4113	}
				4114
				4115
				4116	void CharacterRange::AddClassEscape(uc16 type,
				4117	ZoneList<CharacterRange>* ranges) {
				4118	switch (type) {
				4119	case 's':
				4120	AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
				4121	break;
				4122	case 'S':
				4123	AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
				4124	break;
				4125	case 'w':
				4126	AddClass(kWordRanges, kWordRangeCount, ranges);
				4127	break;
				4128	case 'W':
				4129	AddClassNegated(kWordRanges, kWordRangeCount, ranges);
				4130	break;
				4131	case 'd':
				4132	AddClass(kDigitRanges, kDigitRangeCount, ranges);
				4133	break;
				4134	case 'D':
				4135	AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
				4136	break;
				4137	case '.':
				4138	AddClassNegated(kLineTerminatorRanges,
				4139	kLineTerminatorRangeCount,
				4140	ranges);
				4141	break;
				4142	// This is not a character range as defined by the spec but a
				4143	// convenient shorthand for a character class that matches any
				4144	// character.
				4145	case '*':
				4146	ranges->Add(CharacterRange::Everything());
				4147	break;
				4148	// This is the set of characters matched by the $ and ^ symbols
				4149	// in multiline mode.
				4150	case 'n':
				4151	AddClass(kLineTerminatorRanges,
				4152	kLineTerminatorRangeCount,
				4153	ranges);
				4154	break;
				4155	default:
				4156	UNREACHABLE();
				4157	}
				4158	}
				4159
				4160
				4161	Vector<const uc16> CharacterRange::GetWordBounds() {
				4162	return Vector<const uc16>(kWordRanges, kWordRangeCount);
				4163	}
				4164
				4165
				4166	class CharacterRangeSplitter {
				4167	public:
				4168	CharacterRangeSplitter(ZoneList<CharacterRange>** included,
				4169	ZoneList<CharacterRange>** excluded)
				4170	: included_(included),
				4171	excluded_(excluded) { }
				4172	void Call(uc16 from, DispatchTable::Entry entry);
				4173
				4174	static const int kInBase = 0;
				4175	static const int kInOverlay = 1;
				4176
				4177	private:
				4178	ZoneList<CharacterRange>** included_;
				4179	ZoneList<CharacterRange>** excluded_;
				4180	};
				4181
				4182
				4183	void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
				4184	if (!entry.out_set()->Get(kInBase)) return;
				4185	ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
				4186	? included_
				4187	: excluded_;
				4188	if (target == NULL) target = new ZoneList<CharacterRange>(2);
				4189	(*target)->Add(CharacterRange(entry.from(), entry.to()));
				4190	}
				4191
				4192
				4193	void CharacterRange::Split(ZoneList<CharacterRange>* base,
				4194	Vector<const uc16> overlay,
				4195	ZoneList<CharacterRange>** included,
				4196	ZoneList<CharacterRange>** excluded) {
				4197	ASSERT_EQ(NULL, *included);
				4198	ASSERT_EQ(NULL, *excluded);
				4199	DispatchTable table;
				4200	for (int i = 0; i < base->length(); i++)
				4201	table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
				4202	for (int i = 0; i < overlay.length(); i += 2) {
				4203	table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
				4204	CharacterRangeSplitter::kInOverlay);
				4205	}
				4206	CharacterRangeSplitter callback(included, excluded);
				4207	table.ForEach(&callback);
				4208	}
				4209
				4210
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4211	void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
				4212	bool is_ascii) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	4213	Isolate* isolate = Isolate::Current();
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4214	uc16 bottom = from();
				4215	uc16 top = to();
				4216	if (is_ascii) {
				4217	if (bottom > String::kMaxAsciiCharCode) return;
				4218	if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;
				4219	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4220	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4221	if (top == bottom) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4222	// If this is a singleton we just expand the one character.
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	4223	int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4224	for (int i = 0; i < length; i++) {
				4225	uc32 chr = chars[i];
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4226	if (chr != bottom) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4227	ranges->Add(CharacterRange::Singleton(chars[i]));
				4228	}
				4229	}
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4230	} else {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4231	// If this is a range we expand the characters block by block,
				4232	// expanding contiguous subranges (blocks) one at a time.
				4233	// The approach is as follows. For a given start character we
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4234	// look up the remainder of the block that contains it (represented
				4235	// by the end point), for instance we find 'z' if the character
				4236	// is 'c'. A block is characterized by the property
				4237	// that all characters uncanonicalize in the same way, except that
				4238	// each entry in the result is incremented by the distance from the first
				4239	// element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
				4240	// the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
				4241	// Once we've found the end point we look up its uncanonicalization
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4242	// and produce a range for each element. For instance for [c-f]
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4243	// we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4244	// add a range if it is not already contained in the input, so [c-f]
				4245	// will be skipped but [C-F] will be added. If this range is not
				4246	// completely contained in a block we do this for all the blocks
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4247	// covered by the range (handling characters that is not in a block
				4248	// as a "singleton block").
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4249	unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4250	int pos = bottom;
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4251	while (pos < top) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	4252	int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4253	uc16 block_end;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4254	if (length == 0) {
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4255	block_end = pos;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4256	} else {
				4257	ASSERT_EQ(1, length);
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4258	block_end = range[0];
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4259	}
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4260	int end = (block_end > top) ? top : block_end;
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	4261	length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4262	for (int i = 0; i < length; i++) {
				4263	uc32 c = range[i];
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4264	uc16 range_from = c - (block_end - pos);
				4265	uc16 range_to = c - (block_end - end);
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4266	if (!(bottom <= range_from && range_to <= top)) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4267	ranges->Add(CharacterRange(range_from, range_to));
				4268	}
				4269	}
Ben Murdoch	bb769b2	2010-08-11 14:56:33 +0100	[diff] [blame]	4270	pos = end + 1;
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4271	}
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4272	}
				4273	}
				4274
				4275
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4276	bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
				4277	ASSERT_NOT_NULL(ranges);
				4278	int n = ranges->length();
				4279	if (n <= 1) return true;
				4280	int max = ranges->at(0).to();
				4281	for (int i = 1; i < n; i++) {
				4282	CharacterRange next_range = ranges->at(i);
				4283	if (next_range.from() <= max + 1) return false;
				4284	max = next_range.to();
				4285	}
				4286	return true;
				4287	}
				4288
				4289	SetRelation CharacterRange::WordCharacterRelation(
				4290	ZoneList<CharacterRange>* range) {
				4291	ASSERT(IsCanonical(range));
				4292	int i = 0; // Word character range index.
				4293	int j = 0; // Argument range index.
				4294	ASSERT_NE(0, kWordRangeCount);
				4295	SetRelation result;
				4296	if (range->length() == 0) {
				4297	result.SetElementsInSecondSet();
				4298	return result;
				4299	}
				4300	CharacterRange argument_range = range->at(0);
				4301	CharacterRange word_range = CharacterRange(kWordRanges[0], kWordRanges[1]);
				4302	while (i < kWordRangeCount && j < range->length()) {
				4303	// Check the two ranges for the five cases:
				4304	// - no overlap.
				4305	// - partial overlap (there are elements in both ranges that isn't
				4306	// in the other, and there are also elements that are in both).
				4307	// - argument range entirely inside word range.
				4308	// - word range entirely inside argument range.
				4309	// - ranges are completely equal.
				4310
				4311	// First check for no overlap. The earlier range is not in the other set.
				4312	if (argument_range.from() > word_range.to()) {
				4313	// Ranges are disjoint. The earlier word range contains elements that
				4314	// cannot be in the argument set.
				4315	result.SetElementsInSecondSet();
				4316	} else if (word_range.from() > argument_range.to()) {
				4317	// Ranges are disjoint. The earlier argument range contains elements that
				4318	// cannot be in the word set.
				4319	result.SetElementsInFirstSet();
				4320	} else if (word_range.from() <= argument_range.from() &&
				4321	word_range.to() >= argument_range.from()) {
				4322	result.SetElementsInBothSets();
				4323	// argument range completely inside word range.
				4324	if (word_range.from() < argument_range.from() \|\|
				4325	word_range.to() > argument_range.from()) {
				4326	result.SetElementsInSecondSet();
				4327	}
				4328	} else if (word_range.from() >= argument_range.from() &&
				4329	word_range.to() <= argument_range.from()) {
				4330	result.SetElementsInBothSets();
				4331	result.SetElementsInFirstSet();
				4332	} else {
				4333	// There is overlap, and neither is a subrange of the other
				4334	result.SetElementsInFirstSet();
				4335	result.SetElementsInSecondSet();
				4336	result.SetElementsInBothSets();
				4337	}
				4338	if (result.NonTrivialIntersection()) {
				4339	// The result is as (im)precise as we can possibly make it.
				4340	return result;
				4341	}
				4342	// Progress the range(s) with minimal to-character.
				4343	uc16 word_to = word_range.to();
				4344	uc16 argument_to = argument_range.to();
				4345	if (argument_to <= word_to) {
				4346	j++;
				4347	if (j < range->length()) {
				4348	argument_range = range->at(j);
				4349	}
				4350	}
				4351	if (word_to <= argument_to) {
				4352	i += 2;
				4353	if (i < kWordRangeCount) {
				4354	word_range = CharacterRange(kWordRanges[i], kWordRanges[i + 1]);
				4355	}
				4356	}
				4357	}
				4358	// Check if anything wasn't compared in the loop.
				4359	if (i < kWordRangeCount) {
				4360	// word range contains something not in argument range.
				4361	result.SetElementsInSecondSet();
				4362	} else if (j < range->length()) {
				4363	// Argument range contains something not in word range.
				4364	result.SetElementsInFirstSet();
				4365	}
				4366
				4367	return result;
				4368	}
				4369
				4370
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4371	ZoneList<CharacterRange>* CharacterSet::ranges() {
				4372	if (ranges_ == NULL) {
				4373	ranges_ = new ZoneList<CharacterRange>(2);
				4374	CharacterRange::AddClassEscape(standard_set_type_, ranges_);
				4375	}
				4376	return ranges_;
				4377	}
				4378
				4379
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4380	// Move a number of elements in a zonelist to another position
				4381	// in the same list. Handles overlapping source and target areas.
				4382	static void MoveRanges(ZoneList<CharacterRange>* list,
				4383	int from,
				4384	int to,
				4385	int count) {
				4386	// Ranges are potentially overlapping.
				4387	if (from < to) {
				4388	for (int i = count - 1; i >= 0; i--) {
				4389	list->at(to + i) = list->at(from + i);
				4390	}
				4391	} else {
				4392	for (int i = 0; i < count; i++) {
				4393	list->at(to + i) = list->at(from + i);
				4394	}
				4395	}
				4396	}
				4397
				4398
				4399	static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
				4400	int count,
				4401	CharacterRange insert) {
				4402	// Inserts a range into list[0..count[, which must be sorted
				4403	// by from value and non-overlapping and non-adjacent, using at most
				4404	// list[0..count] for the result. Returns the number of resulting
				4405	// canonicalized ranges. Inserting a range may collapse existing ranges into
				4406	// fewer ranges, so the return value can be anything in the range 1..count+1.
				4407	uc16 from = insert.from();
				4408	uc16 to = insert.to();
				4409	int start_pos = 0;
				4410	int end_pos = count;
				4411	for (int i = count - 1; i >= 0; i--) {
				4412	CharacterRange current = list->at(i);
				4413	if (current.from() > to + 1) {
				4414	end_pos = i;
				4415	} else if (current.to() + 1 < from) {
				4416	start_pos = i + 1;
				4417	break;
				4418	}
				4419	}
				4420
				4421	// Inserted range overlaps, or is adjacent to, ranges at positions
				4422	// [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
				4423	// not affected by the insertion.
				4424	// If start_pos == end_pos, the range must be inserted before start_pos.
				4425	// if start_pos < end_pos, the entire range from start_pos to end_pos
				4426	// must be merged with the insert range.
				4427
				4428	if (start_pos == end_pos) {
				4429	// Insert between existing ranges at position start_pos.
				4430	if (start_pos < count) {
				4431	MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
				4432	}
				4433	list->at(start_pos) = insert;
				4434	return count + 1;
				4435	}
				4436	if (start_pos + 1 == end_pos) {
				4437	// Replace single existing range at position start_pos.
				4438	CharacterRange to_replace = list->at(start_pos);
				4439	int new_from = Min(to_replace.from(), from);
				4440	int new_to = Max(to_replace.to(), to);
				4441	list->at(start_pos) = CharacterRange(new_from, new_to);
				4442	return count;
				4443	}
				4444	// Replace a number of existing ranges from start_pos to end_pos - 1.
				4445	// Move the remaining ranges down.
				4446
				4447	int new_from = Min(list->at(start_pos).from(), from);
				4448	int new_to = Max(list->at(end_pos - 1).to(), to);
				4449	if (end_pos < count) {
				4450	MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
				4451	}
				4452	list->at(start_pos) = CharacterRange(new_from, new_to);
				4453	return count - (end_pos - start_pos) + 1;
				4454	}
				4455
				4456
				4457	void CharacterSet::Canonicalize() {
				4458	// Special/default classes are always considered canonical. The result
				4459	// of calling ranges() will be sorted.
				4460	if (ranges_ == NULL) return;
				4461	CharacterRange::Canonicalize(ranges_);
				4462	}
				4463
				4464
				4465	void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
				4466	if (character_ranges->length() <= 1) return;
				4467	// Check whether ranges are already canonical (increasing, non-overlapping,
				4468	// non-adjacent).
				4469	int n = character_ranges->length();
				4470	int max = character_ranges->at(0).to();
				4471	int i = 1;
				4472	while (i < n) {
				4473	CharacterRange current = character_ranges->at(i);
				4474	if (current.from() <= max + 1) {
				4475	break;
				4476	}
				4477	max = current.to();
				4478	i++;
				4479	}
				4480	// Canonical until the i'th range. If that's all of them, we are done.
				4481	if (i == n) return;
				4482
				4483	// The ranges at index i and forward are not canonicalized. Make them so by
				4484	// doing the equivalent of insertion sort (inserting each into the previous
				4485	// list, in order).
				4486	// Notice that inserting a range can reduce the number of ranges in the
				4487	// result due to combining of adjacent and overlapping ranges.
				4488	int read = i; // Range to insert.
				4489	int num_canonical = i; // Length of canonicalized part of list.
				4490	do {
				4491	num_canonical = InsertRangeInCanonicalList(character_ranges,
				4492	num_canonical,
				4493	character_ranges->at(read));
				4494	read++;
				4495	} while (read < n);
				4496	character_ranges->Rewind(num_canonical);
				4497
				4498	ASSERT(CharacterRange::IsCanonical(character_ranges));
				4499	}
				4500
				4501
				4502	// Utility function for CharacterRange::Merge. Adds a range at the end of
				4503	// a canonicalized range list, if necessary merging the range with the last
				4504	// range of the list.
				4505	static void AddRangeToSet(ZoneList<CharacterRange>* set, CharacterRange range) {
				4506	if (set == NULL) return;
				4507	ASSERT(set->length() == 0 \|\| set->at(set->length() - 1).to() < range.from());
				4508	int n = set->length();
				4509	if (n > 0) {
				4510	CharacterRange lastRange = set->at(n - 1);
				4511	if (lastRange.to() == range.from() - 1) {
				4512	set->at(n - 1) = CharacterRange(lastRange.from(), range.to());
				4513	return;
				4514	}
				4515	}
				4516	set->Add(range);
				4517	}
				4518
				4519
				4520	static void AddRangeToSelectedSet(int selector,
				4521	ZoneList<CharacterRange>* first_set,
				4522	ZoneList<CharacterRange>* second_set,
				4523	ZoneList<CharacterRange>* intersection_set,
				4524	CharacterRange range) {
				4525	switch (selector) {
				4526	case kInsideFirst:
				4527	AddRangeToSet(first_set, range);
				4528	break;
				4529	case kInsideSecond:
				4530	AddRangeToSet(second_set, range);
				4531	break;
				4532	case kInsideBoth:
				4533	AddRangeToSet(intersection_set, range);
				4534	break;
				4535	}
				4536	}
				4537
				4538
				4539
				4540	void CharacterRange::Merge(ZoneList<CharacterRange>* first_set,
				4541	ZoneList<CharacterRange>* second_set,
				4542	ZoneList<CharacterRange>* first_set_only_out,
				4543	ZoneList<CharacterRange>* second_set_only_out,
				4544	ZoneList<CharacterRange>* both_sets_out) {
				4545	// Inputs are canonicalized.
				4546	ASSERT(CharacterRange::IsCanonical(first_set));
				4547	ASSERT(CharacterRange::IsCanonical(second_set));
				4548	// Outputs are empty, if applicable.
				4549	ASSERT(first_set_only_out == NULL \|\| first_set_only_out->length() == 0);
				4550	ASSERT(second_set_only_out == NULL \|\| second_set_only_out->length() == 0);
				4551	ASSERT(both_sets_out == NULL \|\| both_sets_out->length() == 0);
				4552
				4553	// Merge sets by iterating through the lists in order of lowest "from" value,
				4554	// and putting intervals into one of three sets.
				4555
				4556	if (first_set->length() == 0) {
				4557	second_set_only_out->AddAll(*second_set);
				4558	return;
				4559	}
				4560	if (second_set->length() == 0) {
				4561	first_set_only_out->AddAll(*first_set);
				4562	return;
				4563	}
				4564	// Indices into input lists.
				4565	int i1 = 0;
				4566	int i2 = 0;
				4567	// Cache length of input lists.
				4568	int n1 = first_set->length();
				4569	int n2 = second_set->length();
				4570	// Current range. May be invalid if state is kInsideNone.
				4571	int from = 0;
				4572	int to = -1;
				4573	// Where current range comes from.
				4574	int state = kInsideNone;
				4575
				4576	while (i1 < n1 \|\| i2 < n2) {
				4577	CharacterRange next_range;
				4578	int range_source;
Leon Clarke	d91b9f7	2010-01-27 17:25:45 +0000	[diff] [blame]	4579	if (i2 == n2 \|\|
				4580	(i1 < n1 && first_set->at(i1).from() < second_set->at(i2).from())) {
				4581	// Next smallest element is in first set.
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4582	next_range = first_set->at(i1++);
				4583	range_source = kInsideFirst;
				4584	} else {
Leon Clarke	d91b9f7	2010-01-27 17:25:45 +0000	[diff] [blame]	4585	// Next smallest element is in second set.
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4586	next_range = second_set->at(i2++);
				4587	range_source = kInsideSecond;
				4588	}
				4589	if (to < next_range.from()) {
				4590	// Ranges disjoint: \|current\| \|next\|
				4591	AddRangeToSelectedSet(state,
				4592	first_set_only_out,
				4593	second_set_only_out,
				4594	both_sets_out,
				4595	CharacterRange(from, to));
				4596	from = next_range.from();
				4597	to = next_range.to();
				4598	state = range_source;
				4599	} else {
				4600	if (from < next_range.from()) {
				4601	AddRangeToSelectedSet(state,
				4602	first_set_only_out,
				4603	second_set_only_out,
				4604	both_sets_out,
				4605	CharacterRange(from, next_range.from()-1));
				4606	}
				4607	if (to < next_range.to()) {
				4608	// Ranges overlap: \|current\|
				4609	// \|next\|
				4610	AddRangeToSelectedSet(state \| range_source,
				4611	first_set_only_out,
				4612	second_set_only_out,
				4613	both_sets_out,
				4614	CharacterRange(next_range.from(), to));
				4615	from = to + 1;
				4616	to = next_range.to();
				4617	state = range_source;
				4618	} else {
				4619	// Range included: \|current\| , possibly ending at same character.
				4620	// \|next\|
				4621	AddRangeToSelectedSet(
				4622	state \| range_source,
				4623	first_set_only_out,
				4624	second_set_only_out,
				4625	both_sets_out,
				4626	CharacterRange(next_range.from(), next_range.to()));
				4627	from = next_range.to() + 1;
				4628	// If ranges end at same character, both ranges are consumed completely.
				4629	if (next_range.to() == to) state = kInsideNone;
				4630	}
				4631	}
				4632	}
				4633	AddRangeToSelectedSet(state,
				4634	first_set_only_out,
				4635	second_set_only_out,
				4636	both_sets_out,
				4637	CharacterRange(from, to));
				4638	}
				4639
				4640
				4641	void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
				4642	ZoneList<CharacterRange>* negated_ranges) {
				4643	ASSERT(CharacterRange::IsCanonical(ranges));
				4644	ASSERT_EQ(0, negated_ranges->length());
				4645	int range_count = ranges->length();
				4646	uc16 from = 0;
				4647	int i = 0;
				4648	if (range_count > 0 && ranges->at(0).from() == 0) {
				4649	from = ranges->at(0).to();
				4650	i = 1;
				4651	}
				4652	while (i < range_count) {
				4653	CharacterRange range = ranges->at(i);
				4654	negated_ranges->Add(CharacterRange(from + 1, range.from() - 1));
				4655	from = range.to();
				4656	i++;
				4657	}
				4658	if (from < String::kMaxUC16CharCode) {
				4659	negated_ranges->Add(CharacterRange(from + 1, String::kMaxUC16CharCode));
				4660	}
				4661	}
				4662
				4663
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4664
				4665	// -------------------------------------------------------------------
				4666	// Interest propagation
				4667
				4668
				4669	RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) {
				4670	for (int i = 0; i < siblings_.length(); i++) {
				4671	RegExpNode* sibling = siblings_.Get(i);
				4672	if (sibling->info()->Matches(info))
				4673	return sibling;
				4674	}
				4675	return NULL;
				4676	}
				4677
				4678
				4679	RegExpNode* RegExpNode::EnsureSibling(NodeInfo* info, bool* cloned) {
				4680	ASSERT_EQ(false, *cloned);
				4681	siblings_.Ensure(this);
				4682	RegExpNode* result = TryGetSibling(info);
				4683	if (result != NULL) return result;
				4684	result = this->Clone();
				4685	NodeInfo* new_info = result->info();
				4686	new_info->ResetCompilationState();
				4687	new_info->AddFromPreceding(info);
				4688	AddSibling(result);
				4689	*cloned = true;
				4690	return result;
				4691	}
				4692
				4693
				4694	template <class C>
				4695	static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
				4696	NodeInfo full_info(*node->info());
				4697	full_info.AddFromPreceding(info);
				4698	bool cloned = false;
				4699	return RegExpNode::EnsureSibling(node, &full_info, &cloned);
				4700	}
				4701
				4702
				4703	// -------------------------------------------------------------------
				4704	// Splay tree
				4705
				4706
				4707	OutSet* OutSet::Extend(unsigned value) {
				4708	if (Get(value))
				4709	return this;
				4710	if (successors() != NULL) {
				4711	for (int i = 0; i < successors()->length(); i++) {
				4712	OutSet* successor = successors()->at(i);
				4713	if (successor->Get(value))
				4714	return successor;
				4715	}
				4716	} else {
				4717	successors_ = new ZoneList<OutSet*>(2);
				4718	}
				4719	OutSet* result = new OutSet(first_, remaining_);
				4720	result->Set(value);
				4721	successors()->Add(result);
				4722	return result;
				4723	}
				4724
				4725
				4726	void OutSet::Set(unsigned value) {
				4727	if (value < kFirstLimit) {
				4728	first_ \|= (1 << value);
				4729	} else {
				4730	if (remaining_ == NULL)
				4731	remaining_ = new ZoneList<unsigned>(1);
				4732	if (remaining_->is_empty() \|\| !remaining_->Contains(value))
				4733	remaining_->Add(value);
				4734	}
				4735	}
				4736
				4737
				4738	bool OutSet::Get(unsigned value) {
				4739	if (value < kFirstLimit) {
				4740	return (first_ & (1 << value)) != 0;
				4741	} else if (remaining_ == NULL) {
				4742	return false;
				4743	} else {
				4744	return remaining_->Contains(value);
				4745	}
				4746	}
				4747
				4748
				4749	const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
				4750	const DispatchTable::Entry DispatchTable::Config::kNoValue;
				4751
				4752
				4753	void DispatchTable::AddRange(CharacterRange full_range, int value) {
				4754	CharacterRange current = full_range;
				4755	if (tree()->is_empty()) {
				4756	// If this is the first range we just insert into the table.
				4757	ZoneSplayTree<Config>::Locator loc;
				4758	ASSERT_RESULT(tree()->Insert(current.from(), &loc));
				4759	loc.set_value(Entry(current.from(), current.to(), empty()->Extend(value)));
				4760	return;
				4761	}
				4762	// First see if there is a range to the left of this one that
				4763	// overlaps.
				4764	ZoneSplayTree<Config>::Locator loc;
				4765	if (tree()->FindGreatestLessThan(current.from(), &loc)) {
				4766	Entry* entry = &loc.value();
				4767	// If we've found a range that overlaps with this one, and it
				4768	// starts strictly to the left of this one, we have to fix it
				4769	// because the following code only handles ranges that start on
				4770	// or after the start point of the range we're adding.
				4771	if (entry->from() < current.from() && entry->to() >= current.from()) {
				4772	// Snap the overlapping range in half around the start point of
				4773	// the range we're adding.
				4774	CharacterRange left(entry->from(), current.from() - 1);
				4775	CharacterRange right(current.from(), entry->to());
				4776	// The left part of the overlapping range doesn't overlap.
				4777	// Truncate the whole entry to be just the left part.
				4778	entry->set_to(left.to());
				4779	// The right part is the one that overlaps. We add this part
				4780	// to the map and let the next step deal with merging it with
				4781	// the range we're adding.
				4782	ZoneSplayTree<Config>::Locator loc;
				4783	ASSERT_RESULT(tree()->Insert(right.from(), &loc));
				4784	loc.set_value(Entry(right.from(),
				4785	right.to(),
				4786	entry->out_set()));
				4787	}
				4788	}
				4789	while (current.is_valid()) {
				4790	if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
				4791	(loc.value().from() <= current.to()) &&
				4792	(loc.value().to() >= current.from())) {
				4793	Entry* entry = &loc.value();
				4794	// We have overlap. If there is space between the start point of
				4795	// the range we're adding and where the overlapping range starts
				4796	// then we have to add a range covering just that space.
				4797	if (current.from() < entry->from()) {
				4798	ZoneSplayTree<Config>::Locator ins;
				4799	ASSERT_RESULT(tree()->Insert(current.from(), &ins));
				4800	ins.set_value(Entry(current.from(),
				4801	entry->from() - 1,
				4802	empty()->Extend(value)));
				4803	current.set_from(entry->from());
				4804	}
				4805	ASSERT_EQ(current.from(), entry->from());
				4806	// If the overlapping range extends beyond the one we want to add
				4807	// we have to snap the right part off and add it separately.
				4808	if (entry->to() > current.to()) {
				4809	ZoneSplayTree<Config>::Locator ins;
				4810	ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));
				4811	ins.set_value(Entry(current.to() + 1,
				4812	entry->to(),
				4813	entry->out_set()));
				4814	entry->set_to(current.to());
				4815	}
				4816	ASSERT(entry->to() <= current.to());
				4817	// The overlapping range is now completely contained by the range
				4818	// we're adding so we can just update it and move the start point
				4819	// of the range we're adding just past it.
				4820	entry->AddValue(value);
				4821	// Bail out if the last interval ended at 0xFFFF since otherwise
				4822	// adding 1 will wrap around to 0.
				4823	if (entry->to() == String::kMaxUC16CharCode)
				4824	break;
				4825	ASSERT(entry->to() + 1 > current.from());
				4826	current.set_from(entry->to() + 1);
				4827	} else {
				4828	// There is no overlap so we can just add the range
				4829	ZoneSplayTree<Config>::Locator ins;
				4830	ASSERT_RESULT(tree()->Insert(current.from(), &ins));
				4831	ins.set_value(Entry(current.from(),
				4832	current.to(),
				4833	empty()->Extend(value)));
				4834	break;
				4835	}
				4836	}
				4837	}
				4838
				4839
				4840	OutSet* DispatchTable::Get(uc16 value) {
				4841	ZoneSplayTree<Config>::Locator loc;
				4842	if (!tree()->FindGreatestLessThan(value, &loc))
				4843	return empty();
				4844	Entry* entry = &loc.value();
				4845	if (value <= entry->to())
				4846	return entry->out_set();
				4847	else
				4848	return empty();
				4849	}
				4850
				4851
				4852	// -------------------------------------------------------------------
				4853	// Analysis
				4854
				4855
				4856	void Analysis::EnsureAnalyzed(RegExpNode* that) {
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	4857	StackLimitCheck check(Isolate::Current());
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4858	if (check.HasOverflowed()) {
				4859	fail("Stack overflow");
				4860	return;
				4861	}
				4862	if (that->info()->been_analyzed \|\| that->info()->being_analyzed)
				4863	return;
				4864	that->info()->being_analyzed = true;
				4865	that->Accept(this);
				4866	that->info()->being_analyzed = false;
				4867	that->info()->been_analyzed = true;
				4868	}
				4869
				4870
				4871	void Analysis::VisitEnd(EndNode* that) {
				4872	// nothing to do
				4873	}
				4874
				4875
				4876	void TextNode::CalculateOffsets() {
				4877	int element_count = elements()->length();
				4878	// Set up the offsets of the elements relative to the start. This is a fixed
				4879	// quantity since a TextNode can only contain fixed-width things.
				4880	int cp_offset = 0;
				4881	for (int i = 0; i < element_count; i++) {
				4882	TextElement& elm = elements()->at(i);
				4883	elm.cp_offset = cp_offset;
				4884	if (elm.type == TextElement::ATOM) {
				4885	cp_offset += elm.data.u_atom->data().length();
				4886	} else {
				4887	cp_offset++;
				4888	Vector<const uc16> quarks = elm.data.u_atom->data();
				4889	}
				4890	}
				4891	}
				4892
				4893
				4894	void Analysis::VisitText(TextNode* that) {
				4895	if (ignore_case_) {
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	4896	that->MakeCaseIndependent(is_ascii_);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4897	}
				4898	EnsureAnalyzed(that->on_success());
				4899	if (!has_failed()) {
				4900	that->CalculateOffsets();
				4901	}
				4902	}
				4903
				4904
				4905	void Analysis::VisitAction(ActionNode* that) {
				4906	RegExpNode* target = that->on_success();
				4907	EnsureAnalyzed(target);
				4908	if (!has_failed()) {
				4909	// If the next node is interested in what it follows then this node
				4910	// has to be interested too so it can pass the information on.
				4911	that->info()->AddFromFollowing(target->info());
				4912	}
				4913	}
				4914
				4915
				4916	void Analysis::VisitChoice(ChoiceNode* that) {
				4917	NodeInfo* info = that->info();
				4918	for (int i = 0; i < that->alternatives()->length(); i++) {
				4919	RegExpNode* node = that->alternatives()->at(i).node();
				4920	EnsureAnalyzed(node);
				4921	if (has_failed()) return;
				4922	// Anything the following nodes need to know has to be known by
				4923	// this node also, so it can pass it on.
				4924	info->AddFromFollowing(node->info());
				4925	}
				4926	}
				4927
				4928
				4929	void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
				4930	NodeInfo* info = that->info();
				4931	for (int i = 0; i < that->alternatives()->length(); i++) {
				4932	RegExpNode* node = that->alternatives()->at(i).node();
				4933	if (node != that->loop_node()) {
				4934	EnsureAnalyzed(node);
				4935	if (has_failed()) return;
				4936	info->AddFromFollowing(node->info());
				4937	}
				4938	}
				4939	// Check the loop last since it may need the value of this node
				4940	// to get a correct result.
				4941	EnsureAnalyzed(that->loop_node());
				4942	if (!has_failed()) {
				4943	info->AddFromFollowing(that->loop_node()->info());
				4944	}
				4945	}
				4946
				4947
				4948	void Analysis::VisitBackReference(BackReferenceNode* that) {
				4949	EnsureAnalyzed(that->on_success());
				4950	}
				4951
				4952
				4953	void Analysis::VisitAssertion(AssertionNode* that) {
				4954	EnsureAnalyzed(that->on_success());
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4955	AssertionNode::AssertionNodeType type = that->type();
				4956	if (type == AssertionNode::AT_BOUNDARY \|\|
				4957	type == AssertionNode::AT_NON_BOUNDARY) {
				4958	// Check if the following character is known to be a word character
				4959	// or known to not be a word character.
				4960	ZoneList<CharacterRange>* following_chars = that->FirstCharacterSet();
				4961
				4962	CharacterRange::Canonicalize(following_chars);
				4963
				4964	SetRelation word_relation =
				4965	CharacterRange::WordCharacterRelation(following_chars);
Andrei Popescu	6d3d5a3	2010-04-27 19:40:12 +0100	[diff] [blame]	4966	if (word_relation.Disjoint()) {
				4967	// Includes the case where following_chars is empty (e.g., end-of-input).
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4968	// Following character is definitely not a word character.
				4969	type = (type == AssertionNode::AT_BOUNDARY) ?
Andrei Popescu	6d3d5a3	2010-04-27 19:40:12 +0100	[diff] [blame]	4970	AssertionNode::AFTER_WORD_CHARACTER :
				4971	AssertionNode::AFTER_NONWORD_CHARACTER;
				4972	that->set_type(type);
				4973	} else if (word_relation.ContainedIn()) {
				4974	// Following character is definitely a word character.
				4975	type = (type == AssertionNode::AT_BOUNDARY) ?
				4976	AssertionNode::AFTER_NONWORD_CHARACTER :
				4977	AssertionNode::AFTER_WORD_CHARACTER;
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4978	that->set_type(type);
				4979	}
				4980	}
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	4981	}
				4982
				4983
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	4984	ZoneList<CharacterRange>* RegExpNode::FirstCharacterSet() {
				4985	if (first_character_set_ == NULL) {
				4986	if (ComputeFirstCharacterSet(kFirstCharBudget) < 0) {
				4987	// If we can't find an exact solution within the budget, we
				4988	// set the value to the set of every character, i.e., all characters
				4989	// are possible.
				4990	ZoneList<CharacterRange>* all_set = new ZoneList<CharacterRange>(1);
				4991	all_set->Add(CharacterRange::Everything());
				4992	first_character_set_ = all_set;
				4993	}
				4994	}
				4995	return first_character_set_;
				4996	}
				4997
				4998
				4999	int RegExpNode::ComputeFirstCharacterSet(int budget) {
				5000	// Default behavior is to not be able to determine the first character.
				5001	return kComputeFirstCharacterSetFail;
				5002	}
				5003
				5004
				5005	int LoopChoiceNode::ComputeFirstCharacterSet(int budget) {
				5006	budget--;
				5007	if (budget >= 0) {
				5008	// Find loop min-iteration. It's the value of the guarded choice node
				5009	// with a GEQ guard, if any.
				5010	int min_repetition = 0;
				5011
				5012	for (int i = 0; i <= 1; i++) {
				5013	GuardedAlternative alternative = alternatives()->at(i);
				5014	ZoneList<Guard> guards = alternative.guards();
				5015	if (guards != NULL && guards->length() > 0) {
				5016	Guard* guard = guards->at(0);
				5017	if (guard->op() == Guard::GEQ) {
				5018	min_repetition = guard->value();
				5019	break;
				5020	}
				5021	}
				5022	}
				5023
				5024	budget = loop_node()->ComputeFirstCharacterSet(budget);
				5025	if (budget >= 0) {
				5026	ZoneList<CharacterRange>* character_set =
				5027	loop_node()->first_character_set();
				5028	if (body_can_be_zero_length() \|\| min_repetition == 0) {
				5029	budget = continue_node()->ComputeFirstCharacterSet(budget);
				5030	if (budget < 0) return budget;
				5031	ZoneList<CharacterRange>* body_set =
				5032	continue_node()->first_character_set();
				5033	ZoneList<CharacterRange>* union_set =
				5034	new ZoneList<CharacterRange>(Max(character_set->length(),
				5035	body_set->length()));
				5036	CharacterRange::Merge(character_set,
				5037	body_set,
				5038	union_set,
				5039	union_set,
				5040	union_set);
				5041	character_set = union_set;
				5042	}
				5043	set_first_character_set(character_set);
				5044	}
				5045	}
				5046	return budget;
				5047	}
				5048
				5049
				5050	int NegativeLookaheadChoiceNode::ComputeFirstCharacterSet(int budget) {
				5051	budget--;
				5052	if (budget >= 0) {
				5053	GuardedAlternative successor = this->alternatives()->at(1);
				5054	RegExpNode* successor_node = successor.node();
				5055	budget = successor_node->ComputeFirstCharacterSet(budget);
				5056	if (budget >= 0) {
				5057	set_first_character_set(successor_node->first_character_set());
				5058	}
				5059	}
				5060	return budget;
				5061	}
				5062
				5063
				5064	// The first character set of an EndNode is unknowable. Just use the
				5065	// default implementation that fails and returns all characters as possible.
				5066
				5067
				5068	int AssertionNode::ComputeFirstCharacterSet(int budget) {
				5069	budget -= 1;
				5070	if (budget >= 0) {
				5071	switch (type_) {
				5072	case AT_END: {
				5073	set_first_character_set(new ZoneList<CharacterRange>(0));
				5074	break;
				5075	}
				5076	case AT_START:
				5077	case AT_BOUNDARY:
				5078	case AT_NON_BOUNDARY:
				5079	case AFTER_NEWLINE:
				5080	case AFTER_NONWORD_CHARACTER:
				5081	case AFTER_WORD_CHARACTER: {
				5082	ASSERT_NOT_NULL(on_success());
				5083	budget = on_success()->ComputeFirstCharacterSet(budget);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	5084	if (budget >= 0) {
				5085	set_first_character_set(on_success()->first_character_set());
				5086	}
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	5087	break;
				5088	}
				5089	}
				5090	}
				5091	return budget;
				5092	}
				5093
				5094
				5095	int ActionNode::ComputeFirstCharacterSet(int budget) {
				5096	if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail;
				5097	budget--;
				5098	if (budget >= 0) {
				5099	ASSERT_NOT_NULL(on_success());
				5100	budget = on_success()->ComputeFirstCharacterSet(budget);
				5101	if (budget >= 0) {
				5102	set_first_character_set(on_success()->first_character_set());
				5103	}
				5104	}
				5105	return budget;
				5106	}
				5107
				5108
				5109	int BackReferenceNode::ComputeFirstCharacterSet(int budget) {
				5110	// We don't know anything about the first character of a backreference
				5111	// at this point.
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	5112	// The potential first characters are the first characters of the capture,
				5113	// and the first characters of the on_success node, depending on whether the
				5114	// capture can be empty and whether it is known to be participating or known
				5115	// not to be.
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	5116	return kComputeFirstCharacterSetFail;
				5117	}
				5118
				5119
				5120	int TextNode::ComputeFirstCharacterSet(int budget) {
				5121	budget--;
				5122	if (budget >= 0) {
				5123	ASSERT_NE(0, elements()->length());
				5124	TextElement text = elements()->at(0);
				5125	if (text.type == TextElement::ATOM) {
				5126	RegExpAtom* atom = text.data.u_atom;
				5127	ASSERT_NE(0, atom->length());
				5128	uc16 first_char = atom->data()[0];
				5129	ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1);
				5130	range->Add(CharacterRange(first_char, first_char));
				5131	set_first_character_set(range);
				5132	} else {
				5133	ASSERT(text.type == TextElement::CHAR_CLASS);
				5134	RegExpCharacterClass* char_class = text.data.u_char_class;
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	5135	ZoneList<CharacterRange>* ranges = char_class->ranges();
				5136	// TODO(lrn): Canonicalize ranges when they are created
				5137	// instead of waiting until now.
				5138	CharacterRange::Canonicalize(ranges);
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	5139	if (char_class->is_negated()) {
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	5140	int length = ranges->length();
				5141	int new_length = length + 1;
				5142	if (length > 0) {
				5143	if (ranges->at(0).from() == 0) new_length--;
				5144	if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) {
				5145	new_length--;
				5146	}
				5147	}
				5148	ZoneList<CharacterRange>* negated_ranges =
				5149	new ZoneList<CharacterRange>(new_length);
				5150	CharacterRange::Negate(ranges, negated_ranges);
				5151	set_first_character_set(negated_ranges);
				5152	} else {
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	5153	set_first_character_set(ranges);
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	5154	}
				5155	}
				5156	}
				5157	return budget;
				5158	}
				5159
				5160
				5161
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5162	// -------------------------------------------------------------------
				5163	// Dispatch table construction
				5164
				5165
				5166	void DispatchTableConstructor::VisitEnd(EndNode* that) {
				5167	AddRange(CharacterRange::Everything());
				5168	}
				5169
				5170
				5171	void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
				5172	node->set_being_calculated(true);
				5173	ZoneList<GuardedAlternative>* alternatives = node->alternatives();
				5174	for (int i = 0; i < alternatives->length(); i++) {
				5175	set_choice_index(i);
				5176	alternatives->at(i).node()->Accept(this);
				5177	}
				5178	node->set_being_calculated(false);
				5179	}
				5180
				5181
				5182	class AddDispatchRange {
				5183	public:
				5184	explicit AddDispatchRange(DispatchTableConstructor* constructor)
				5185	: constructor_(constructor) { }
				5186	void Call(uc32 from, DispatchTable::Entry entry);
				5187	private:
				5188	DispatchTableConstructor* constructor_;
				5189	};
				5190
				5191
				5192	void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {
				5193	CharacterRange range(from, entry.to());
				5194	constructor_->AddRange(range);
				5195	}
				5196
				5197
				5198	void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
				5199	if (node->being_calculated())
				5200	return;
				5201	DispatchTable* table = node->GetTable(ignore_case_);
				5202	AddDispatchRange adder(this);
				5203	table->ForEach(&adder);
				5204	}
				5205
				5206
				5207	void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
				5208	// TODO(160): Find the node that we refer back to and propagate its start
				5209	// set back to here. For now we just accept anything.
				5210	AddRange(CharacterRange::Everything());
				5211	}
				5212
				5213
				5214	void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
				5215	RegExpNode* target = that->on_success();
				5216	target->Accept(this);
				5217	}
				5218
				5219
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5220	static int CompareRangeByFrom(const CharacterRange* a,
				5221	const CharacterRange* b) {
				5222	return Compare<uc16>(a->from(), b->from());
				5223	}
				5224
				5225
				5226	void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
				5227	ranges->Sort(CompareRangeByFrom);
				5228	uc16 last = 0;
				5229	for (int i = 0; i < ranges->length(); i++) {
				5230	CharacterRange range = ranges->at(i);
				5231	if (last < range.from())
				5232	AddRange(CharacterRange(last, range.from() - 1));
				5233	if (range.to() >= last) {
				5234	if (range.to() == String::kMaxUC16CharCode) {
				5235	return;
				5236	} else {
				5237	last = range.to() + 1;
				5238	}
				5239	}
				5240	}
				5241	AddRange(CharacterRange(last, String::kMaxUC16CharCode));
				5242	}
				5243
				5244
				5245	void DispatchTableConstructor::VisitText(TextNode* that) {
				5246	TextElement elm = that->elements()->at(0);
				5247	switch (elm.type) {
				5248	case TextElement::ATOM: {
				5249	uc16 c = elm.data.u_atom->data()[0];
				5250	AddRange(CharacterRange(c, c));
				5251	break;
				5252	}
				5253	case TextElement::CHAR_CLASS: {
				5254	RegExpCharacterClass* tree = elm.data.u_char_class;
				5255	ZoneList<CharacterRange>* ranges = tree->ranges();
				5256	if (tree->is_negated()) {
				5257	AddInverse(ranges);
				5258	} else {
				5259	for (int i = 0; i < ranges->length(); i++)
				5260	AddRange(ranges->at(i));
				5261	}
				5262	break;
				5263	}
				5264	default: {
				5265	UNIMPLEMENTED();
				5266	}
				5267	}
				5268	}
				5269
				5270
				5271	void DispatchTableConstructor::VisitAction(ActionNode* that) {
				5272	RegExpNode* target = that->on_success();
				5273	target->Accept(this);
				5274	}
				5275
				5276
				5277	RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
				5278	bool ignore_case,
				5279	bool is_multiline,
				5280	Handle<String> pattern,
				5281	bool is_ascii) {
				5282	if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
				5283	return IrregexpRegExpTooBig();
				5284	}
				5285	RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
				5286	// Wrap the body of the regexp in capture #0.
				5287	RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
				5288	0,
				5289	&compiler,
				5290	compiler.accept());
				5291	RegExpNode* node = captured_body;
Ben Murdoch	f87a203	2010-10-22 12:50:53 +0100	[diff] [blame]	5292	bool is_end_anchored = data->tree->IsAnchoredAtEnd();
				5293	bool is_start_anchored = data->tree->IsAnchoredAtStart();
				5294	int max_length = data->tree->max_match();
				5295	if (!is_start_anchored) {
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5296	// Add a .*? at the beginning, outside the body capture, unless
				5297	// this expression is anchored at the beginning.
				5298	RegExpNode* loop_node =
				5299	RegExpQuantifier::ToNode(0,
				5300	RegExpTree::kInfinity,
				5301	false,
				5302	new RegExpCharacterClass('*'),
				5303	&compiler,
				5304	captured_body,
				5305	data->contains_anchor);
				5306
				5307	if (data->contains_anchor) {
				5308	// Unroll loop once, to take care of the case that might start
				5309	// at the start of input.
				5310	ChoiceNode* first_step_node = new ChoiceNode(2);
				5311	first_step_node->AddAlternative(GuardedAlternative(captured_body));
				5312	first_step_node->AddAlternative(GuardedAlternative(
				5313	new TextNode(new RegExpCharacterClass('*'), loop_node)));
				5314	node = first_step_node;
				5315	} else {
				5316	node = loop_node;
				5317	}
				5318	}
				5319	data->node = node;
Steve Block	d0582a6	2009-12-15 09:54:21 +0000	[diff] [blame]	5320	Analysis analysis(ignore_case, is_ascii);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5321	analysis.EnsureAnalyzed(node);
				5322	if (analysis.has_failed()) {
				5323	const char* error_message = analysis.error_message();
				5324	return CompilationResult(error_message);
				5325	}
				5326
				5327	NodeInfo info = *node->info();
				5328
				5329	// Create the correct assembler for the architecture.
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	5330	#ifndef V8_INTERPRETED_REGEXP
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5331	// Native regexp implementation.
				5332
				5333	NativeRegExpMacroAssembler::Mode mode =
				5334	is_ascii ? NativeRegExpMacroAssembler::ASCII
				5335	: NativeRegExpMacroAssembler::UC16;
				5336
				5337	#if V8_TARGET_ARCH_IA32
				5338	RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2);
				5339	#elif V8_TARGET_ARCH_X64
				5340	RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2);
				5341	#elif V8_TARGET_ARCH_ARM
				5342	RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2);
Steve Block	44f0eee	2011-05-26 01:26:41 +0100	[diff] [blame]	5343	#elif V8_TARGET_ARCH_MIPS
				5344	RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2);
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5345	#endif
				5346
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	5347	#else // V8_INTERPRETED_REGEXP
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5348	// Interpreted regexp implementation.
				5349	EmbeddedVector<byte, 1024> codes;
				5350	RegExpMacroAssemblerIrregexp macro_assembler(codes);
Steve Block	6ded16b	2010-05-10 14:33:55 +0100	[diff] [blame]	5351	#endif // V8_INTERPRETED_REGEXP
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5352
Ben Murdoch	f87a203	2010-10-22 12:50:53 +0100	[diff] [blame]	5353	// Inserted here, instead of in Assembler, because it depends on information
				5354	// in the AST that isn't replicated in the Node structure.
				5355	static const int kMaxBacksearchLimit = 1024;
				5356	if (is_end_anchored &&
				5357	!is_start_anchored &&
				5358	max_length < kMaxBacksearchLimit) {
				5359	macro_assembler.SetCurrentPositionFromEnd(max_length);
				5360	}
				5361
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5362	return compiler.Assemble(&macro_assembler,
				5363	node,
				5364	data->capture_count,
				5365	pattern);
				5366	}
				5367
Leon Clarke	e46be81	2010-01-19 14:06:41 +0000	[diff] [blame]	5368
Steve Block	a7e24c1	2009-10-30 11:49:00 +0000	[diff] [blame]	5369	}} // namespace v8::internal