Blame - src/jsregexp.cc - fp2-dev/platform/external/chromium_org/v8

blob: 3046b96afa650561afaa37163a4fdd81a5ee6515 [file] [log] [blame]

ager@chromium.org	9258b6b	2008-09-11 09:11:10 +0000	[diff] [blame]	1	// Copyright 2006-2008 the V8 project authors. All rights reserved.
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	2	// Redistribution and use in source and binary forms, with or without
				3	// modification, are permitted provided that the following conditions are
				4	// met:
				5	//
				6	// * Redistributions of source code must retain the above copyright
				7	// notice, this list of conditions and the following disclaimer.
				8	// * Redistributions in binary form must reproduce the above
				9	// copyright notice, this list of conditions and the following
				10	// disclaimer in the documentation and/or other materials provided
				11	// with the distribution.
				12	// * Neither the name of Google Inc. nor the names of its
				13	// contributors may be used to endorse or promote products derived
				14	// from this software without specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				20	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				21	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				22	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				23	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				24	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				25	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				26	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27
				28	#include "v8.h"
				29
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	30	#include "ast.h"
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	31	#include "execution.h"
				32	#include "factory.h"
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	33	#include "jsregexp-inl.h"
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	34	#include "platform.h"
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	35	#include "runtime.h"
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	36	#include "top.h"
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	37	#include "compilation-cache.h"
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	38	#include "string-stream.h"
				39	#include "parser.h"
				40	#include "regexp-macro-assembler.h"
				41	#include "regexp-macro-assembler-tracer.h"
				42	#include "regexp-macro-assembler-irregexp.h"
				43
				44	#ifdef ARM
				45	#include "regexp-macro-assembler-arm.h"
				46	#else // IA32
				47	#include "macro-assembler-ia32.h"
				48	#include "regexp-macro-assembler-ia32.h"
				49	#endif
				50
				51	#include "interpreter-irregexp.h"
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	52
				53	// Including pcre.h undefines DEBUG to avoid getting debug output from
				54	// the JSCRE implementation. Make sure to redefine it in debug mode
				55	// after having included the header file.
				56	#ifdef DEBUG
				57	#include "third_party/jscre/pcre.h"
				58	#define DEBUG
				59	#else
				60	#include "third_party/jscre/pcre.h"
				61	#endif
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	62
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	63
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	64	namespace v8 { namespace internal {
				65
				66
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	67	static Failure* malloc_failure;
				68
				69	static void* JSREMalloc(size_t size) {
				70	Object* obj = Heap::AllocateByteArray(size);
				71
				72	// If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile
				73	// will return NULL to the caller, performs GC there.
				74	// Also pass failure information to the caller.
				75	if (obj->IsFailure()) {
				76	malloc_failure = Failure::cast(obj);
				77	return NULL;
				78	}
				79
				80	// Note: object is unrooted, the caller of jsRegExpCompile must
				81	// create a handle for the return value before doing heap allocation.
				82	return reinterpret_cast<void*>(ByteArray::cast(obj)->GetDataStartAddress());
				83	}
				84
				85
				86	static void JSREFree(void* p) {
				87	USE(p); // Do nothing, memory is garbage collected.
				88	}
				89
				90
				91	String* RegExpImpl::last_ascii_string_ = NULL;
				92	String* RegExpImpl::two_byte_cached_string_ = NULL;
				93
				94
				95	void RegExpImpl::NewSpaceCollectionPrologue() {
				96	// The two byte string is always in the old space. The Ascii string may be
				97	// in either place. If it is in the old space we don't need to do anything.
				98	if (Heap::InNewSpace(last_ascii_string_)) {
				99	// Invalidate the cache.
				100	last_ascii_string_ = NULL;
				101	two_byte_cached_string_ = NULL;
				102	}
				103	}
				104
				105
				106	void RegExpImpl::OldSpaceCollectionPrologue() {
				107	last_ascii_string_ = NULL;
				108	two_byte_cached_string_ = NULL;
				109	}
				110
				111
mads.s.ager@gmail.com	9a4089a	2008-09-01 08:55:01 +0000	[diff] [blame]	112	Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
				113	Handle<String> pattern,
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	114	Handle<String> flags,
				115	bool* has_pending_exception) {
mads.s.ager@gmail.com	9a4089a	2008-09-01 08:55:01 +0000	[diff] [blame]	116	// Ensure that the constructor function has been loaded.
				117	if (!constructor->IsLoaded()) {
				118	LoadLazy(constructor, has_pending_exception);
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	119	if (*has_pending_exception) return Handle<Object>();
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	120	}
				121	// Call the construct code with 2 arguments.
				122	Object** argv[2] = { Handle<Object>::cast(pattern).location(),
				123	Handle<Object>::cast(flags).location() };
mads.s.ager@gmail.com	9a4089a	2008-09-01 08:55:01 +0000	[diff] [blame]	124	return Execution::New(constructor, 2, argv, has_pending_exception);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	125	}
				126
				127
				128	// Converts a source string to a 16 bit flat string or a SlicedString containing
				129	// a 16 bit flat string).
				130	Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) {
				131	if (*subject == last_ascii_string_) {
				132	ASSERT(two_byte_cached_string_ != NULL);
				133	return Handle<String>(String::cast(two_byte_cached_string_));
				134	}
				135	Handle<String> two_byte_string = StringToTwoByte(subject);
				136	last_ascii_string_ = *subject;
				137	two_byte_cached_string_ = *two_byte_string;
				138	return two_byte_string;
				139	}
				140
				141
				142	// Converts a source string to a 16 bit flat string or a SlicedString containing
				143	// a 16 bit flat string).
				144	Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) {
ager@chromium.org	870a0b6	2008-11-04 11:43:05 +0000	[diff] [blame]	145	StringShape shape(*pattern);
				146	if (!pattern->IsFlat(shape)) {
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	147	FlattenString(pattern);
ager@chromium.org	c3e50d8	2008-11-05 11:53:10 +0000	[diff] [blame]	148	shape = StringShape(*pattern);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	149	}
ager@chromium.org	870a0b6	2008-11-04 11:43:05 +0000	[diff] [blame]	150	Handle<String> flat_string(shape.IsCons() ?
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	151	String::cast(ConsString::cast(*pattern)->first()) :
				152	*pattern);
ager@chromium.org	870a0b6	2008-11-04 11:43:05 +0000	[diff] [blame]	153	ASSERT(flat_string->IsString());
				154	StringShape flat_shape(*flat_string);
				155	ASSERT(!flat_shape.IsCons());
				156	ASSERT(flat_shape.IsSequential() \|\|
				157	flat_shape.IsSliced() \|\|
				158	flat_shape.IsExternal());
				159	if (!flat_shape.IsAsciiRepresentation()) {
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	160	return flat_string;
				161	}
				162
ager@chromium.org	870a0b6	2008-11-04 11:43:05 +0000	[diff] [blame]	163	int len = flat_string->length(flat_shape);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	164	Handle<String> two_byte_string =
ager@chromium.org	870a0b6	2008-11-04 11:43:05 +0000	[diff] [blame]	165	Factory::NewRawTwoByteString(len, TENURED);
				166	uc16* dest = SeqTwoByteString::cast(*two_byte_string)->GetChars();
				167	String::WriteToFlat(*flat_string, flat_shape, dest, 0, len);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	168	return two_byte_string;
				169	}
				170
				171
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	172	static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
				173	int flags = JSRegExp::NONE;
ager@chromium.org	870a0b6	2008-11-04 11:43:05 +0000	[diff] [blame]	174	StringShape shape(*str);
				175	for (int i = 0; i < str->length(shape); i++) {
				176	switch (str->Get(shape, i)) {
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	177	case 'i':
				178	flags \|= JSRegExp::IGNORE_CASE;
				179	break;
				180	case 'g':
				181	flags \|= JSRegExp::GLOBAL;
				182	break;
				183	case 'm':
				184	flags \|= JSRegExp::MULTILINE;
				185	break;
				186	}
				187	}
				188	return JSRegExp::Flags(flags);
				189	}
				190
				191
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	192	static inline void ThrowRegExpException(Handle<JSRegExp> re,
				193	Handle<String> pattern,
				194	Handle<String> error_text,
				195	const char* message) {
				196	Handle<JSArray> array = Factory::NewJSArray(2);
				197	SetElement(array, 0, pattern);
				198	SetElement(array, 1, error_text);
				199	Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);
				200	Top::Throw(*regexp_err);
				201	}
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	202
				203
				204	Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
				205	Handle<String> pattern,
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	206	Handle<String> flag_str) {
				207	JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
				208	Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
				209	bool in_cache = !cached.is_null();
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	210	LOG(RegExpCompileEvent(re, in_cache));
				211
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	212	Handle<Object> result;
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	213	if (in_cache) {
				214	re->set_data(*cached);
				215	result = re;
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	216	} else {
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	217	FlattenString(pattern);
				218	ZoneScope zone_scope(DELETE_ON_EXIT);
				219	RegExpParseResult parse_result;
				220	FlatStringReader reader(pattern);
				221	if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
				222	// Throw an exception if we fail to parse the pattern.
				223	ThrowRegExpException(re,
				224	pattern,
				225	parse_result.error,
				226	"malformed_regexp");
				227	return Handle<Object>();
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	228	}
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	229	RegExpAtom* atom = parse_result.tree->AsAtom();
				230	if (atom != NULL && !flags.is_ignore_case()) {
				231	if (parse_result.has_character_escapes) {
				232	Vector<const uc16> atom_pattern = atom->data();
				233	Handle<String> atom_string =
				234	Factory::NewStringFromTwoByte(atom_pattern);
				235	result = AtomCompile(re, pattern, flags, atom_string);
				236	} else {
				237	result = AtomCompile(re, pattern, flags, pattern);
				238	}
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	239	} else {
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	240	RegExpNode* node = NULL;
				241	Handle<FixedArray> irregexp_data =
				242	RegExpEngine::Compile(&parse_result,
				243	&node,
				244	flags.is_ignore_case(),
				245	flags.is_multiline());
				246	if (irregexp_data.is_null()) {
				247	if (FLAG_disable_jscre) {
				248	UNIMPLEMENTED();
				249	}
				250	result = JscrePrepare(re, pattern, flags);
				251	} else {
				252	result = IrregexpPrepare(re, pattern, flags, irregexp_data);
				253	}
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	254	}
				255	Object* data = re->data();
				256	if (data->IsFixedArray()) {
				257	// If compilation succeeded then the data is set on the regexp
				258	// and we can store it in the cache.
				259	Handle<FixedArray> data(FixedArray::cast(re->data()));
				260	CompilationCache::PutRegExp(pattern, flags, data);
				261	}
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	262	}
				263
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	264	return result;
				265	}
				266
				267
				268	Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
				269	Handle<String> subject,
				270	Handle<Object> index) {
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	271	switch (regexp->TypeTag()) {
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	272	case JSRegExp::JSCRE:
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	273	if (FLAG_disable_jscre) {
				274	UNIMPLEMENTED();
				275	}
				276	return JscreExec(regexp, subject, index);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	277	case JSRegExp::ATOM:
				278	return AtomExec(regexp, subject, index);
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	279	case JSRegExp::IRREGEXP:
				280	return IrregexpExec(regexp, subject, index);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	281	default:
				282	UNREACHABLE();
				283	return Handle<Object>();
				284	}
				285	}
				286
				287
				288	Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
				289	Handle<String> subject) {
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	290	switch (regexp->TypeTag()) {
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	291	case JSRegExp::JSCRE:
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	292	if (FLAG_disable_jscre) {
				293	UNIMPLEMENTED();
				294	}
				295	return JscreExecGlobal(regexp, subject);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	296	case JSRegExp::ATOM:
				297	return AtomExecGlobal(regexp, subject);
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	298	case JSRegExp::IRREGEXP:
				299	return IrregexpExecGlobal(regexp, subject);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	300	default:
				301	UNREACHABLE();
				302	return Handle<Object>();
				303	}
				304	}
				305
				306
				307	Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	308	Handle<String> pattern,
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	309	JSRegExp::Flags flags,
				310	Handle<String> match_pattern) {
				311	Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	312	return re;
				313	}
				314
				315
				316	Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
				317	Handle<String> subject,
				318	Handle<Object> index) {
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	319	Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	320
				321	uint32_t start_index;
				322	if (!Array::IndexFromObject(*index, &start_index)) {
				323	return Handle<Smi>(Smi::FromInt(-1));
				324	}
				325
				326	LOG(RegExpExecEvent(re, start_index, subject));
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	327	int value = Runtime::StringMatch(subject, needle, start_index);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	328	if (value == -1) return Factory::null_value();
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	329
				330	Handle<FixedArray> array = Factory::NewFixedArray(2);
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	331	array->set(0, Smi::FromInt(value));
				332	array->set(1, Smi::FromInt(value + needle->length()));
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	333	return Factory::NewJSArrayWithElements(array);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	334	}
				335
				336
				337	Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
				338	Handle<String> subject) {
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	339	Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	340	Handle<JSArray> result = Factory::NewJSArray(1);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	341	int index = 0;
				342	int match_count = 0;
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	343	int subject_length = subject->length();
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	344	int needle_length = needle->length();
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	345	while (true) {
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	346	LOG(RegExpExecEvent(re, index, subject));
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	347	int value = -1;
				348	if (index + needle_length <= subject_length) {
				349	value = Runtime::StringMatch(subject, needle, index);
				350	}
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	351	if (value == -1) break;
				352	HandleScope scope;
				353	int end = value + needle_length;
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	354
				355	Handle<FixedArray> array = Factory::NewFixedArray(2);
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	356	array->set(0, Smi::FromInt(value));
				357	array->set(1, Smi::FromInt(end));
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	358	Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	359	SetElement(result, match_count, pair);
				360	match_count++;
				361	index = end;
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	362	if (needle_length == 0) index++;
kasperl@chromium.org	41044eb	2008-10-06 08:24:46 +0000	[diff] [blame]	363	}
				364	return result;
				365	}
				366
				367
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	368	Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
				369	Handle<String> pattern,
				370	JSRegExp::Flags flags) {
				371	Handle<Object> value(Heap::undefined_value());
				372	Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
				373	return re;
				374	}
				375
				376
				377	Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
				378	Handle<String> pattern,
				379	JSRegExp::Flags flags,
				380	Handle<FixedArray> irregexp_data) {
				381	Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data);
				382	return re;
				383	}
				384
				385
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	386	static inline Object* DoCompile(String* pattern,
				387	JSRegExp::Flags flags,
				388	unsigned* number_of_captures,
				389	const char** error_message,
iposva@chromium.org	96f667e	2008-11-26 23:48:02 +0000	[diff] [blame]	390	v8::jscre::JscreRegExp** code) {
				391	v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
				392	? v8::jscre::JSRegExpIgnoreCase
				393	: v8::jscre::JSRegExpDoNotIgnoreCase;
				394	v8::jscre::JSRegExpMultilineOption multiline_option = flags.is_multiline()
				395	? v8::jscre::JSRegExpMultiline
				396	: v8::jscre::JSRegExpSingleLine;
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	397	*error_message = NULL;
				398	malloc_failure = Failure::Exception();
iposva@chromium.org	96f667e	2008-11-26 23:48:02 +0000	[diff] [blame]	399	*code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(),
				400	pattern->length(),
				401	case_option,
				402	multiline_option,
				403	number_of_captures,
				404	error_message,
				405	&JSREMalloc,
				406	&JSREFree);
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	407	if (*code == NULL && (malloc_failure->IsRetryAfterGC() \|\|
				408	malloc_failure->IsOutOfMemoryFailure())) {
				409	return malloc_failure;
				410	} else {
				411	// It doesn't matter which object we return here, we just need to return
				412	// a non-failure to indicate to the GC-retry code that there was no
				413	// allocation failure.
				414	return pattern;
				415	}
				416	}
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	417
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	418
				419	void CompileWithRetryAfterGC(Handle<String> pattern,
				420	JSRegExp::Flags flags,
				421	unsigned* number_of_captures,
				422	const char** error_message,
iposva@chromium.org	96f667e	2008-11-26 23:48:02 +0000	[diff] [blame]	423	v8::jscre::JscreRegExp** code) {
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	424	CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
				425	flags,
				426	number_of_captures,
				427	error_message,
				428	code));
				429	}
				430
				431
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	432	Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
				433	ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
				434	ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
				435
				436	Handle<String> pattern(re->Pattern());
				437	JSRegExp::Flags flags = re->GetFlags();
				438
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	439	Handle<String> two_byte_pattern = StringToTwoByte(pattern);
				440
				441	unsigned number_of_captures;
				442	const char* error_message = NULL;
				443
iposva@chromium.org	96f667e	2008-11-26 23:48:02 +0000	[diff] [blame]	444	v8::jscre::JscreRegExp* code = NULL;
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	445	FlattenString(pattern);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	446
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	447	CompileWithRetryAfterGC(two_byte_pattern,
				448	flags,
				449	&number_of_captures,
				450	&error_message,
				451	&code);
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	452
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	453	if (code == NULL) {
				454	// Throw an exception.
				455	Handle<JSArray> array = Factory::NewJSArray(2);
				456	SetElement(array, 0, pattern);
				457	SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
				458	(error_message == NULL) ? "Unknown regexp error" : error_message)));
				459	Handle<Object> regexp_err =
				460	Factory::NewSyntaxError("malformed_regexp", array);
ager@chromium.org	3bf7b91	2008-11-17 09:09:45 +0000	[diff] [blame]	461	Top::Throw(*regexp_err);
				462	return Handle<Object>();
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	463	}
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	464
				465	// Convert the return address to a ByteArray pointer.
				466	Handle<ByteArray> internal(
				467	ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
				468
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	469	Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);
				470	value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures));
				471	value->set(kJscreInternalIndex, *internal);
kasperl@chromium.org	9bbf968	2008-10-30 11:53:07 +0000	[diff] [blame]	472	Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
				473
				474	return re;
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	475	}
				476
				477
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	478	Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
				479	int num_captures,
				480	Handle<String> two_byte_subject,
				481	int previous_index,
				482	int* offsets_vector,
				483	int offsets_vector_length) {
				484	#ifdef DEBUG
				485	if (FLAG_trace_regexp_bytecodes) {
				486	String* pattern = regexp->Pattern();
				487	PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
				488	PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString()));
				489	}
				490	#endif
				491	ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
				492	ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
				493	bool rc;
				494
				495	for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
				496	offsets_vector[i] = -1;
				497	}
				498
				499	LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
				500
				501	FixedArray* irregexp =
				502	FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
				503	int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
				504
				505	switch (tag) {
				506	case RegExpMacroAssembler::kIA32Implementation: {
				507	#ifndef ARM
				508	Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
				509	Address start_addr =
				510	Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();
				511	int string_offset =
				512	start_addr - reinterpret_cast<Address>(*two_byte_subject);
				513	int start_offset = string_offset + previous_index * sizeof(uc16);
				514	int end_offset =
				515	string_offset + two_byte_subject->length() * sizeof(uc16);
				516	rc = RegExpMacroAssemblerIA32::Execute(code,
				517	two_byte_subject.location(),
				518	start_offset,
				519	end_offset,
				520	offsets_vector,
				521	previous_index == 0);
				522	if (rc) {
				523	// Capture values are relative to start_offset only.
				524	for (int i = 0; i < offsets_vector_length; i++) {
				525	if (offsets_vector[i] >= 0) {
				526	offsets_vector[i] += previous_index;
				527	}
				528	}
				529	}
				530	break;
				531	#else
				532	UNIMPLEMENTED();
				533	rc = false;
				534	break;
				535	#endif
				536	}
				537	case RegExpMacroAssembler::kBytecodeImplementation: {
				538	Handle<ByteArray> byte_codes = IrregexpCode(regexp);
				539
				540	rc = IrregexpInterpreter::Match(byte_codes,
				541	two_byte_subject,
				542	offsets_vector,
				543	previous_index);
				544	break;
				545	}
				546	case RegExpMacroAssembler::kARMImplementation:
				547	default:
				548	UNREACHABLE();
				549	rc = false;
				550	break;
				551	}
				552
				553	if (!rc) {
				554	return Factory::null_value();
				555	}
				556
				557	Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
				558	// The captures come in (start, end+1) pairs.
				559	for (int i = 0; i < 2 * (num_captures+1); i += 2) {
				560	array->set(i, Smi::FromInt(offsets_vector[i]));
				561	array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
				562	}
				563	return Factory::NewJSArrayWithElements(array);
				564	}
				565
				566
				567	Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
				568	int num_captures,
				569	Handle<String> subject,
				570	int previous_index,
				571	const uc16* two_byte_subject,
				572	int* offsets_vector,
				573	int offsets_vector_length) {
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	574	int rc;
				575	{
				576	AssertNoAllocation a;
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	577	ByteArray* internal = JscreInternal(regexp);
iposva@chromium.org	96f667e	2008-11-26 23:48:02 +0000	[diff] [blame]	578	const v8::jscre::JscreRegExp* js_regexp =
				579	reinterpret_cast<v8::jscre::JscreRegExp*>(
				580	internal->GetDataStartAddress());
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	581
kasperl@chromium.org	b912362	2008-09-17 14:05:56 +0000	[diff] [blame]	582	LOG(RegExpExecEvent(regexp, previous_index, subject));
				583
iposva@chromium.org	96f667e	2008-11-26 23:48:02 +0000	[diff] [blame]	584	rc = v8::jscre::jsRegExpExecute(js_regexp,
				585	two_byte_subject,
				586	subject->length(),
				587	previous_index,
				588	offsets_vector,
				589	offsets_vector_length);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	590	}
				591
				592	// The KJS JavaScript engine returns null (ie, a failed match) when
				593	// JSRE's internal match limit is exceeded. We duplicate that behavior here.
iposva@chromium.org	96f667e	2008-11-26 23:48:02 +0000	[diff] [blame]	594	if (rc == v8::jscre::JSRegExpErrorNoMatch
				595	\|\| rc == v8::jscre::JSRegExpErrorHitLimit) {
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	596	return Factory::null_value();
				597	}
				598
				599	// Other JSRE errors:
				600	if (rc < 0) {
				601	// Throw an exception.
				602	Handle<Object> code(Smi::FromInt(rc));
				603	Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };
				604	Handle<Object> regexp_err(
				605	Factory::NewTypeError("jsre_error", HandleVector(args, 2)));
				606	return Handle<Object>(Top::Throw(*regexp_err));
				607	}
				608
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	609	Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	610	// The captures come in (start, end+1) pairs.
				611	for (int i = 0; i < 2 * (num_captures+1); i += 2) {
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	612	array->set(i, Smi::FromInt(offsets_vector[i]));
				613	array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	614	}
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	615	return Factory::NewJSArrayWithElements(array);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	616	}
				617
				618
				619	class OffsetsVector {
				620	public:
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	621	inline OffsetsVector(int num_registers)
				622	: offsets_vector_length_(num_registers) {
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	623	if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
				624	vector_ = NewArray<int>(offsets_vector_length_);
				625	} else {
				626	vector_ = static_offsets_vector_;
				627	}
				628	}
				629
				630
				631	inline ~OffsetsVector() {
				632	if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
				633	DeleteArray(vector_);
				634	vector_ = NULL;
				635	}
				636	}
				637
				638
				639	inline int* vector() {
				640	return vector_;
				641	}
				642
				643
				644	inline int length() {
				645	return offsets_vector_length_;
				646	}
				647
				648	private:
				649	int* vector_;
				650	int offsets_vector_length_;
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	651	static const int kStaticOffsetsVectorSize = 50;
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	652	static int static_offsets_vector_[kStaticOffsetsVectorSize];
				653	};
				654
				655
				656	int OffsetsVector::static_offsets_vector_[
				657	OffsetsVector::kStaticOffsetsVectorSize];
				658
				659
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	660	Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
				661	Handle<String> subject,
				662	Handle<Object> index) {
				663	ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
				664	ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	665
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	666	// Prepare space for the return values.
				667	int number_of_registers = IrregexpNumberOfRegisters(regexp);
				668	OffsetsVector offsets(number_of_registers);
				669
				670	int num_captures = IrregexpNumberOfCaptures(regexp);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	671
				672	int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
				673
				674	Handle<String> subject16 = CachedStringToTwoByte(subject);
				675
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	676	Handle<Object> result(IrregexpExecOnce(regexp,
				677	num_captures,
				678	subject16,
				679	previous_index,
				680	offsets.vector(),
				681	offsets.length()));
				682	return result;
				683	}
				684
				685
				686	Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
				687	Handle<String> subject,
				688	Handle<Object> index) {
				689	ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
				690	if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
				691	Handle<Object> compile_result = JscreCompile(regexp);
				692	if (compile_result.is_null()) return compile_result;
				693	}
				694	ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
				695
				696	int num_captures = JscreNumberOfCaptures(regexp);
				697
				698	OffsetsVector offsets((num_captures + 1) * 3);
				699
				700	int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
				701
				702	Handle<String> subject16 = CachedStringToTwoByte(subject);
				703
				704	Handle<Object> result(JscreExecOnce(regexp,
				705	num_captures,
				706	subject,
				707	previous_index,
				708	subject16->GetTwoByteData(),
				709	offsets.vector(),
				710	offsets.length()));
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	711
				712	return result;
				713	}
				714
				715
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	716	Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
				717	Handle<String> subject) {
				718	ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
				719	ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	720
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	721	// Prepare space for the return values.
				722	int number_of_registers = IrregexpNumberOfRegisters(regexp);
				723	OffsetsVector offsets(number_of_registers);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	724
				725	int previous_index = 0;
				726
ager@chromium.org	7c537e2	2008-10-16 08:43:32 +0000	[diff] [blame]	727	Handle<JSArray> result = Factory::NewJSArray(0);
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	728	int i = 0;
				729	Handle<Object> matches;
				730
				731	Handle<String> subject16 = CachedStringToTwoByte(subject);
				732
				733	do {
				734	if (previous_index > subject->length() \|\| previous_index < 0) {
				735	// Per ECMA-262 15.10.6.2, if the previous index is greater than the
				736	// string length, there is no match.
				737	matches = Factory::null_value();
				738	} else {
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	739	matches = IrregexpExecOnce(regexp,
				740	IrregexpNumberOfCaptures(regexp),
				741	subject16,
				742	previous_index,
				743	offsets.vector(),
				744	offsets.length());
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	745
				746	if (matches->IsJSArray()) {
				747	SetElement(result, i, matches);
				748	i++;
				749	previous_index = offsets.vector()[1];
				750	if (offsets.vector()[0] == offsets.vector()[1]) {
				751	previous_index++;
				752	}
				753	}
				754	}
				755	} while (matches->IsJSArray());
				756
				757	// If we exited the loop with an exception, throw it.
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	758	if (matches->IsNull()) {
				759	// Exited loop normally.
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	760	return result;
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	761	} else {
				762	// Exited loop with the exception in matches.
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	763	return matches;
				764	}
				765	}
				766
				767
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	768	Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
				769	Handle<String> subject) {
				770	ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
				771	if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
				772	Handle<Object> compile_result = JscreCompile(regexp);
				773	if (compile_result.is_null()) return compile_result;
				774	}
				775	ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
				776
				777	// Prepare space for the return values.
				778	int num_captures = JscreNumberOfCaptures(regexp);
				779
				780	OffsetsVector offsets((num_captures + 1) * 3);
				781
				782	int previous_index = 0;
				783
				784	Handle<JSArray> result = Factory::NewJSArray(0);
				785	int i = 0;
				786	Handle<Object> matches;
				787
				788	Handle<String> subject16 = CachedStringToTwoByte(subject);
				789
				790	do {
				791	if (previous_index > subject->length() \|\| previous_index < 0) {
				792	// Per ECMA-262 15.10.6.2, if the previous index is greater than the
				793	// string length, there is no match.
				794	matches = Factory::null_value();
				795	} else {
				796	matches = JscreExecOnce(regexp,
				797	num_captures,
				798	subject,
				799	previous_index,
				800	subject16->GetTwoByteData(),
				801	offsets.vector(),
				802	offsets.length());
				803
				804	if (matches->IsJSArray()) {
				805	SetElement(result, i, matches);
				806	i++;
				807	previous_index = offsets.vector()[1];
				808	if (offsets.vector()[0] == offsets.vector()[1]) {
				809	previous_index++;
				810	}
				811	}
				812	}
				813	} while (matches->IsJSArray());
				814
				815	// If we exited the loop with an exception, throw it.
				816	if (matches->IsNull()) {
				817	// Exited loop normally.
				818	return result;
				819	} else {
				820	// Exited loop with the exception in matches.
				821	return matches;
				822	}
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	823	}
				824
				825
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	826	int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
kasperl@chromium.org	9fe21c6	2008-10-28 08:53:51 +0000	[diff] [blame]	827	FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	828	return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	829	}
				830
ager@chromium.org	a74f0da	2008-12-03 16:05:52 +0000	[diff] [blame^]	831
				832	ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
				833	FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
				834	return ByteArray::cast(value->get(kJscreInternalIndex));
				835	}
				836
				837
				838	int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {
				839	FixedArray* value =
				840	FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
				841	return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();
				842	}
				843
				844
				845	int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {
				846	FixedArray* value =
				847	FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
				848	return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();
				849	}
				850
				851
				852	Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
				853	FixedArray* value =
				854	FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
				855	return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
				856	}
				857
				858
				859	// -------------------------------------------------------------------
				860	// Implmentation of the Irregexp regular expression engine.
				861
				862
				863	void RegExpTree::AppendToText(RegExpText* text) {
				864	UNREACHABLE();
				865	}
				866
				867
				868	void RegExpAtom::AppendToText(RegExpText* text) {
				869	text->AddElement(TextElement::Atom(this));
				870	}
				871
				872
				873	void RegExpCharacterClass::AppendToText(RegExpText* text) {
				874	text->AddElement(TextElement::CharClass(this));
				875	}
				876
				877
				878	void RegExpText::AppendToText(RegExpText* text) {
				879	for (int i = 0; i < elements()->length(); i++)
				880	text->AddElement(elements()->at(i));
				881	}
				882
				883
				884	TextElement TextElement::Atom(RegExpAtom* atom) {
				885	TextElement result = TextElement(ATOM);
				886	result.data.u_atom = atom;
				887	return result;
				888	}
				889
				890
				891	TextElement TextElement::CharClass(
				892	RegExpCharacterClass* char_class) {
				893	TextElement result = TextElement(CHAR_CLASS);
				894	result.data.u_char_class = char_class;
				895	return result;
				896	}
				897
				898
				899	DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
				900	if (table_ == NULL) {
				901	table_ = new DispatchTable();
				902	DispatchTableConstructor cons(table_, ignore_case);
				903	cons.BuildTable(this);
				904	}
				905	return table_;
				906	}
				907
				908
				909	class RegExpCompiler {
				910	public:
				911	RegExpCompiler(int capture_count, bool ignore_case);
				912
				913	int AllocateRegister() { return next_register_++; }
				914
				915	Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
				916	RegExpNode* start,
				917	int capture_count);
				918
				919	inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
				920
				921	static const int kImplementationOffset = 0;
				922	static const int kNumberOfRegistersOffset = 0;
				923	static const int kCodeOffset = 1;
				924
				925	RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
				926	EndNode* accept() { return accept_; }
				927	EndNode* backtrack() { return backtrack_; }
				928
				929	static const int kMaxRecursion = 100;
				930	inline int recursion_depth() { return recursion_depth_; }
				931	inline void IncrementRecursionDepth() { recursion_depth_++; }
				932	inline void DecrementRecursionDepth() { recursion_depth_--; }
				933
				934	inline bool ignore_case() { return ignore_case_; }
				935
				936	private:
				937	EndNode* accept_;
				938	EndNode* backtrack_;
				939	int next_register_;
				940	List<RegExpNode> work_list_;
				941	int recursion_depth_;
				942	RegExpMacroAssembler* macro_assembler_;
				943	bool ignore_case_;
				944	};
				945
				946
				947	// Attempts to compile the regexp using an Irregexp code generator. Returns
				948	// a fixed array or a null handle depending on whether it succeeded.
				949	RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
				950	: next_register_(2 * (capture_count + 1)),
				951	work_list_(NULL),
				952	recursion_depth_(0),
				953	ignore_case_(ignore_case) {
				954	accept_ = new EndNode(EndNode::ACCEPT);
				955	backtrack_ = new EndNode(EndNode::BACKTRACK);
				956	}
				957
				958
				959	Handle<FixedArray> RegExpCompiler::Assemble(
				960	RegExpMacroAssembler* macro_assembler,
				961	RegExpNode* start,
				962	int capture_count) {
				963	#ifdef DEBUG
				964	if (FLAG_trace_regexp_assembler)
				965	macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
				966	else
				967	#endif
				968	macro_assembler_ = macro_assembler;
				969	List <RegExpNode*> work_list(0);
				970	work_list_ = &work_list;
				971	Label fail;
				972	macro_assembler_->PushBacktrack(&fail);
				973	if (!start->GoTo(this)) {
				974	fail.Unuse();
				975	return Handle<FixedArray>::null();
				976	}
				977	while (!work_list.is_empty()) {
				978	if (!work_list.RemoveLast()->GoTo(this)) {
				979	fail.Unuse();
				980	return Handle<FixedArray>::null();
				981	}
				982	}
				983	macro_assembler_->Bind(&fail);
				984	macro_assembler_->Fail();
				985	Handle<FixedArray> array =
				986	Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
				987	array->set(RegExpImpl::kIrregexpImplementationIndex,
				988	Smi::FromInt(macro_assembler_->Implementation()));
				989	array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
				990	Smi::FromInt(next_register_));
				991	array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
				992	Smi::FromInt(capture_count));
				993	Handle<Object> code = macro_assembler_->GetCode();
				994	array->set(RegExpImpl::kIrregexpCodeIndex, *code);
				995	work_list_ = NULL;
				996	#ifdef DEBUG
				997	if (FLAG_trace_regexp_assembler) {
				998	delete macro_assembler_;
				999	}
				1000	#endif
				1001	return array;
				1002	}
				1003
				1004
				1005	bool RegExpNode::GoTo(RegExpCompiler* compiler) {
				1006	// TODO(erikcorry): Implement support.
				1007	if (info_.follows_word_interest \|\|
				1008	info_.follows_newline_interest \|\|
				1009	info_.follows_start_interest) {
				1010	return false;
				1011	}
				1012	if (label_.is_bound()) {
				1013	compiler->macro_assembler()->GoTo(&label_);
				1014	return true;
				1015	} else {
				1016	if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) {
				1017	compiler->macro_assembler()->GoTo(&label_);
				1018	compiler->AddWork(this);
				1019	return true;
				1020	} else {
				1021	compiler->IncrementRecursionDepth();
				1022	bool how_it_went = Emit(compiler);
				1023	compiler->DecrementRecursionDepth();
				1024	return how_it_went;
				1025	}
				1026	}
				1027	}
				1028
				1029
				1030	// EndNodes are special. Because they can be very common and they are very
				1031	// short we normally inline them. That is, if we are asked to emit a GoTo
				1032	// we just emit the entire node. Since they don't have successors this
				1033	// works.
				1034	bool EndNode::GoTo(RegExpCompiler* compiler) {
				1035	if (info()->follows_word_interest \|\|
				1036	info()->follows_newline_interest \|\|
				1037	info()->follows_start_interest) {
				1038	return false;
				1039	}
				1040	return Emit(compiler);
				1041	}
				1042
				1043
				1044	Label* RegExpNode::label() {
				1045	return &label_;
				1046	}
				1047
				1048
				1049	bool EndNode::Emit(RegExpCompiler* compiler) {
				1050	RegExpMacroAssembler* macro = compiler->macro_assembler();
				1051	switch (action_) {
				1052	case ACCEPT:
				1053	if (!label()->is_bound()) Bind(macro);
				1054	if (info()->at_end) {
				1055	Label succeed;
				1056	// LoadCurrentCharacter will go to the label if we are at the end of the
				1057	// input string.
				1058	macro->LoadCurrentCharacter(0, &succeed);
				1059	macro->Backtrack();
				1060	macro->Bind(&succeed);
				1061	}
				1062	macro->Succeed();
				1063	return true;
				1064	case BACKTRACK:
				1065	if (!label()->is_bound()) Bind(macro);
				1066	ASSERT(!info()->at_end);
				1067	macro->Backtrack();
				1068	return true;
				1069	}
				1070	return false;
				1071	}
				1072
				1073
				1074	void GuardedAlternative::AddGuard(Guard* guard) {
				1075	if (guards_ == NULL)
				1076	guards_ = new ZoneList<Guard*>(1);
				1077	guards_->Add(guard);
				1078	}
				1079
				1080
				1081	ActionNode* ActionNode::StoreRegister(int reg,
				1082	int val,
				1083	RegExpNode* on_success) {
				1084	ActionNode* result = new ActionNode(STORE_REGISTER, on_success);
				1085	result->data_.u_store_register.reg = reg;
				1086	result->data_.u_store_register.value = val;
				1087	return result;
				1088	}
				1089
				1090
				1091	ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
				1092	ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
				1093	result->data_.u_increment_register.reg = reg;
				1094	return result;
				1095	}
				1096
				1097
				1098	ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
				1099	ActionNode* result = new ActionNode(STORE_POSITION, on_success);
				1100	result->data_.u_position_register.reg = reg;
				1101	return result;
				1102	}
				1103
				1104
				1105	ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
				1106	ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
				1107	result->data_.u_position_register.reg = reg;
				1108	return result;
				1109	}
				1110
				1111
				1112	ActionNode* ActionNode::BeginSubmatch(int stack_reg,
				1113	int position_reg,
				1114	RegExpNode* on_success) {
				1115	ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
				1116	result->data_.u_submatch.stack_pointer_register = stack_reg;
				1117	result->data_.u_submatch.current_position_register = position_reg;
				1118	return result;
				1119	}
				1120
				1121
				1122	ActionNode* ActionNode::EscapeSubmatch(int stack_reg,
				1123	bool restore_position,
				1124	int position_reg,
				1125	RegExpNode* on_success) {
				1126	ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);
				1127	result->data_.u_submatch.stack_pointer_register = stack_reg;
				1128	if (restore_position) {
				1129	result->data_.u_submatch.current_position_register = position_reg;
				1130	} else {
				1131	result->data_.u_submatch.current_position_register = -1;
				1132	}
				1133	return result;
				1134	}
				1135
				1136
				1137	#define DEFINE_ACCEPT(Type) \
				1138	void Type##Node::Accept(NodeVisitor* visitor) { \
				1139	visitor->Visit##Type(this); \
				1140	}
				1141	FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
				1142	#undef DEFINE_ACCEPT
				1143
				1144
				1145	// -------------------------------------------------------------------
				1146	// Emit code.
				1147
				1148
				1149	void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
				1150	Guard* guard,
				1151	Label* on_failure) {
				1152	switch (guard->op()) {
				1153	case Guard::LT:
				1154	macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure);
				1155	break;
				1156	case Guard::GEQ:
				1157	macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure);
				1158	break;
				1159	}
				1160	}
				1161
				1162
				1163	static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
				1164	static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
				1165
				1166
				1167	static inline void EmitAtomNonLetters(
				1168	RegExpMacroAssembler* macro_assembler,
				1169	TextElement elm,
				1170	Vector<const uc16> quarks,
				1171	Label* on_failure,
				1172	int cp_offset) {
				1173	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				1174	for (int i = quarks.length() - 1; i >= 0; i--) {
				1175	uc16 c = quarks[i];
				1176	int length = uncanonicalize.get(c, '\0', chars);
				1177	if (length <= 1) {
				1178	macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
				1179	macro_assembler->CheckNotCharacter(c, on_failure);
				1180	}
				1181	}
				1182	}
				1183
				1184
				1185	static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
				1186	uc16 c1,
				1187	uc16 c2,
				1188	Label* on_failure) {
				1189	uc16 exor = c1 ^ c2;
				1190	// Check whether exor has only one bit set.
				1191	if (((exor - 1) & exor) == 0) {
				1192	// If c1 and c2 differ only by one bit.
				1193	// Ecma262UnCanonicalize always gives the highest number last.
				1194	ASSERT(c2 > c1);
				1195	macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
				1196	return true;
				1197	}
				1198	ASSERT(c2 > c1);
				1199	uc16 diff = c2 - c1;
				1200	if (((diff - 1) & diff) == 0 && c1 >= diff) {
				1201	// If the characters differ by 2^n but don't differ by one bit then
				1202	// subtract the difference from the found character, then do the or
				1203	// trick. We avoid the theoretical case where negative numbers are
				1204	// involved in order to simplify code generation.
				1205	macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
				1206	diff,
				1207	on_failure);
				1208	return true;
				1209	}
				1210	return false;
				1211	}
				1212
				1213
				1214	static inline void EmitAtomLetters(
				1215	RegExpMacroAssembler* macro_assembler,
				1216	TextElement elm,
				1217	Vector<const uc16> quarks,
				1218	Label* on_failure,
				1219	int cp_offset) {
				1220	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				1221	for (int i = quarks.length() - 1; i >= 0; i--) {
				1222	uc16 c = quarks[i];
				1223	int length = uncanonicalize.get(c, '\0', chars);
				1224	if (length <= 1) continue;
				1225	macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
				1226	Label ok;
				1227	ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
				1228	switch (length) {
				1229	case 2: {
				1230	if (ShortCutEmitCharacterPair(macro_assembler,
				1231	chars[0],
				1232	chars[1],
				1233	on_failure)) {
				1234	ok.Unuse();
				1235	} else {
				1236	macro_assembler->CheckCharacter(chars[0], &ok);
				1237	macro_assembler->CheckNotCharacter(chars[1], on_failure);
				1238	macro_assembler->Bind(&ok);
				1239	}
				1240	break;
				1241	}
				1242	case 4:
				1243	macro_assembler->CheckCharacter(chars[3], &ok);
				1244	// Fall through!
				1245	case 3:
				1246	macro_assembler->CheckCharacter(chars[0], &ok);
				1247	macro_assembler->CheckCharacter(chars[1], &ok);
				1248	macro_assembler->CheckNotCharacter(chars[2], on_failure);
				1249	macro_assembler->Bind(&ok);
				1250	break;
				1251	default:
				1252	UNREACHABLE();
				1253	break;
				1254	}
				1255	}
				1256	}
				1257
				1258
				1259	static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
				1260	RegExpCharacterClass* cc,
				1261	int cp_offset,
				1262	Label* on_failure) {
				1263	macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
				1264	cp_offset++;
				1265
				1266	ZoneList<CharacterRange>* ranges = cc->ranges();
				1267
				1268	Label success;
				1269
				1270	Label* char_is_in_class =
				1271	cc->is_negated() ? on_failure : &success;
				1272
				1273	int range_count = ranges->length();
				1274
				1275	if (range_count == 0) {
				1276	if (!cc->is_negated()) {
				1277	macro_assembler->GoTo(on_failure);
				1278	}
				1279	return;
				1280	}
				1281
				1282	for (int i = 0; i < range_count - 1; i++) {
				1283	CharacterRange& range = ranges->at(i);
				1284	Label next_range;
				1285	uc16 from = range.from();
				1286	uc16 to = range.to();
				1287	if (to == from) {
				1288	macro_assembler->CheckCharacter(to, char_is_in_class);
				1289	} else {
				1290	if (from != 0) {
				1291	macro_assembler->CheckCharacterLT(from, &next_range);
				1292	}
				1293	if (to != 0xffff) {
				1294	macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
				1295	} else {
				1296	macro_assembler->GoTo(char_is_in_class);
				1297	}
				1298	}
				1299	macro_assembler->Bind(&next_range);
				1300	}
				1301
				1302	CharacterRange& range = ranges->at(range_count - 1);
				1303	uc16 from = range.from();
				1304	uc16 to = range.to();
				1305
				1306	if (to == from) {
				1307	if (cc->is_negated()) {
				1308	macro_assembler->CheckCharacter(to, on_failure);
				1309	} else {
				1310	macro_assembler->CheckNotCharacter(to, on_failure);
				1311	}
				1312	} else {
				1313	if (from != 0) {
				1314	if (cc->is_negated()) {
				1315	macro_assembler->CheckCharacterLT(from, &success);
				1316	} else {
				1317	macro_assembler->CheckCharacterLT(from, on_failure);
				1318	}
				1319	}
				1320	if (to != 0xffff) {
				1321	if (cc->is_negated()) {
				1322	macro_assembler->CheckCharacterLT(to + 1, on_failure);
				1323	} else {
				1324	macro_assembler->CheckCharacterGT(to, on_failure);
				1325	}
				1326	} else {
				1327	if (cc->is_negated()) {
				1328	macro_assembler->GoTo(on_failure);
				1329	}
				1330	}
				1331	}
				1332	macro_assembler->Bind(&success);
				1333	}
				1334
				1335
				1336
				1337	bool TextNode::Emit(RegExpCompiler* compiler) {
				1338	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				1339	Bind(macro_assembler);
				1340	int element_count = elms_->length();
				1341	ASSERT(element_count != 0);
				1342	int cp_offset = 0;
				1343	if (info()->at_end) {
				1344	macro_assembler->Backtrack();
				1345	return true;
				1346	}
				1347	// First, handle straight character matches.
				1348	for (int i = 0; i < element_count; i++) {
				1349	TextElement elm = elms_->at(i);
				1350	if (elm.type == TextElement::ATOM) {
				1351	Vector<const uc16> quarks = elm.data.u_atom->data();
				1352	if (compiler->ignore_case()) {
				1353	EmitAtomNonLetters(macro_assembler,
				1354	elm,
				1355	quarks,
				1356	on_failure_->label(),
				1357	cp_offset);
				1358	} else {
				1359	macro_assembler->CheckCharacters(quarks,
				1360	cp_offset,
				1361	on_failure_->label());
				1362	}
				1363	cp_offset += quarks.length();
				1364	} else {
				1365	ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
				1366	cp_offset++;
				1367	}
				1368	}
				1369	// Second, handle case independent letter matches if any.
				1370	if (compiler->ignore_case()) {
				1371	cp_offset = 0;
				1372	for (int i = 0; i < element_count; i++) {
				1373	TextElement elm = elms_->at(i);
				1374	if (elm.type == TextElement::ATOM) {
				1375	Vector<const uc16> quarks = elm.data.u_atom->data();
				1376	EmitAtomLetters(macro_assembler,
				1377	elm,
				1378	quarks,
				1379	on_failure_->label(),
				1380	cp_offset);
				1381	cp_offset += quarks.length();
				1382	} else {
				1383	cp_offset++;
				1384	}
				1385	}
				1386	}
				1387	// If the fast character matches passed then do the character classes.
				1388	cp_offset = 0;
				1389	for (int i = 0; i < element_count; i++) {
				1390	TextElement elm = elms_->at(i);
				1391	if (elm.type == TextElement::CHAR_CLASS) {
				1392	RegExpCharacterClass* cc = elm.data.u_char_class;
				1393	EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label());
				1394	cp_offset++;
				1395	} else {
				1396	cp_offset += elm.data.u_atom->data().length();
				1397	}
				1398	}
				1399
				1400	compiler->AddWork(on_failure_);
				1401	macro_assembler->AdvanceCurrentPosition(cp_offset);
				1402	return on_success()->GoTo(compiler);
				1403	}
				1404
				1405
				1406	void TextNode::MakeCaseIndependent() {
				1407	int element_count = elms_->length();
				1408	for (int i = 0; i < element_count; i++) {
				1409	TextElement elm = elms_->at(i);
				1410	if (elm.type == TextElement::CHAR_CLASS) {
				1411	RegExpCharacterClass* cc = elm.data.u_char_class;
				1412	ZoneList<CharacterRange>* ranges = cc->ranges();
				1413	int range_count = ranges->length();
				1414	for (int i = 0; i < range_count; i++) {
				1415	ranges->at(i).AddCaseEquivalents(ranges);
				1416	}
				1417	}
				1418	}
				1419	}
				1420
				1421
				1422	bool ChoiceNode::Emit(RegExpCompiler* compiler) {
				1423	int choice_count = alternatives_->length();
				1424	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				1425	Bind(macro_assembler);
				1426	// For now we just call all choices one after the other. The idea ultimately
				1427	// is to use the Dispatch table to try only the relevant ones.
				1428	for (int i = 0; i < choice_count - 1; i++) {
				1429	GuardedAlternative alternative = alternatives_->at(i);
				1430	Label after;
				1431	Label after_no_pop_cp;
				1432	ZoneList<Guard> guards = alternative.guards();
				1433	if (guards != NULL) {
				1434	int guard_count = guards->length();
				1435	for (int j = 0; j < guard_count; j++) {
				1436	GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp);
				1437	}
				1438	}
				1439	macro_assembler->PushCurrentPosition();
				1440	macro_assembler->PushBacktrack(&after);
				1441	if (!alternative.node()->GoTo(compiler)) {
				1442	after.Unuse();
				1443	after_no_pop_cp.Unuse();
				1444	return false;
				1445	}
				1446	macro_assembler->Bind(&after);
				1447	macro_assembler->PopCurrentPosition();
				1448	macro_assembler->Bind(&after_no_pop_cp);
				1449	}
				1450	GuardedAlternative alternative = alternatives_->at(choice_count - 1);
				1451	ZoneList<Guard> guards = alternative.guards();
				1452	if (guards != NULL) {
				1453	int guard_count = guards->length();
				1454	for (int j = 0; j < guard_count; j++) {
				1455	GenerateGuard(macro_assembler, guards->at(j), on_failure_->label());
				1456	}
				1457	}
				1458	if (!on_failure_->IsBacktrack()) {
				1459	ASSERT_NOT_NULL(on_failure_ -> label());
				1460	macro_assembler->PushBacktrack(on_failure_->label());
				1461	compiler->AddWork(on_failure_);
				1462	}
				1463	if (!alternative.node()->GoTo(compiler)) {
				1464	return false;
				1465	}
				1466	return true;
				1467	}
				1468
				1469
				1470	bool ActionNode::Emit(RegExpCompiler* compiler) {
				1471	RegExpMacroAssembler* macro = compiler->macro_assembler();
				1472	Bind(macro);
				1473	switch (type_) {
				1474	case STORE_REGISTER:
				1475	macro->SetRegister(data_.u_store_register.reg,
				1476	data_.u_store_register.value);
				1477	break;
				1478	case INCREMENT_REGISTER: {
				1479	Label undo;
				1480	macro->PushBacktrack(&undo);
				1481	macro->AdvanceRegister(data_.u_increment_register.reg, 1);
				1482	bool ok = on_success()->GoTo(compiler);
				1483	if (!ok) {
				1484	undo.Unuse();
				1485	return false;
				1486	}
				1487	macro->Bind(&undo);
				1488	macro->AdvanceRegister(data_.u_increment_register.reg, -1);
				1489	macro->Backtrack();
				1490	break;
				1491	}
				1492	case STORE_POSITION: {
				1493	Label undo;
				1494	macro->PushRegister(data_.u_position_register.reg);
				1495	macro->PushBacktrack(&undo);
				1496	macro->WriteCurrentPositionToRegister(data_.u_position_register.reg);
				1497	bool ok = on_success()->GoTo(compiler);
				1498	if (!ok) {
				1499	undo.Unuse();
				1500	return false;
				1501	}
				1502	macro->Bind(&undo);
				1503	macro->PopRegister(data_.u_position_register.reg);
				1504	macro->Backtrack();
				1505	break;
				1506	}
				1507	case RESTORE_POSITION:
				1508	macro->ReadCurrentPositionFromRegister(
				1509	data_.u_position_register.reg);
				1510	break;
				1511	case BEGIN_SUBMATCH:
				1512	macro->WriteCurrentPositionToRegister(
				1513	data_.u_submatch.current_position_register);
				1514	macro->WriteStackPointerToRegister(
				1515	data_.u_submatch.stack_pointer_register);
				1516	break;
				1517	case ESCAPE_SUBMATCH:
				1518	if (info()->at_end) {
				1519	Label at_end;
				1520	// Load current character jumps to the label if we are beyond the string
				1521	// end.
				1522	macro->LoadCurrentCharacter(0, &at_end);
				1523	macro->Backtrack();
				1524	macro->Bind(&at_end);
				1525	}
				1526	if (data_.u_submatch.current_position_register != -1) {
				1527	macro->ReadCurrentPositionFromRegister(
				1528	data_.u_submatch.current_position_register);
				1529	}
				1530	macro->ReadStackPointerFromRegister(
				1531	data_.u_submatch.stack_pointer_register);
				1532	break;
				1533	default:
				1534	UNREACHABLE();
				1535	return false;
				1536	}
				1537	return on_success()->GoTo(compiler);
				1538	}
				1539
				1540
				1541	bool BackReferenceNode::Emit(RegExpCompiler* compiler) {
				1542	RegExpMacroAssembler* macro = compiler->macro_assembler();
				1543	Bind(macro);
				1544	ASSERT_EQ(start_reg_ + 1, end_reg_);
				1545	if (info()->at_end) {
				1546	// If we are constrained to match at the end of the input then succeed
				1547	// iff the back reference is empty.
				1548	macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label());
				1549	} else {
				1550	if (compiler->ignore_case()) {
				1551	macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
				1552	} else {
				1553	macro->CheckNotBackReference(start_reg_, on_failure_->label());
				1554	}
				1555	}
				1556	return on_success()->GoTo(compiler);
				1557	}
				1558
				1559
				1560	// -------------------------------------------------------------------
				1561	// Dot/dotty output
				1562
				1563
				1564	#ifdef DEBUG
				1565
				1566
				1567	class DotPrinter: public NodeVisitor {
				1568	public:
				1569	explicit DotPrinter(bool ignore_case)
				1570	: ignore_case_(ignore_case),
				1571	stream_(&alloc_) { }
				1572	void PrintNode(const char* label, RegExpNode* node);
				1573	void Visit(RegExpNode* node);
				1574	void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure);
				1575	void PrintAttributes(RegExpNode* from);
				1576	StringStream* stream() { return &stream_; }
				1577	#define DECLARE_VISIT(Type) \
				1578	virtual void Visit##Type(Type##Node* that);
				1579	FOR_EACH_NODE_TYPE(DECLARE_VISIT)
				1580	#undef DECLARE_VISIT
				1581	private:
				1582	bool ignore_case_;
				1583	HeapStringAllocator alloc_;
				1584	StringStream stream_;
				1585	};
				1586
				1587
				1588	void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
				1589	stream()->Add("digraph G {\n graph [label=\"");
				1590	for (int i = 0; label[i]; i++) {
				1591	switch (label[i]) {
				1592	case '\\':
				1593	stream()->Add("\\\\");
				1594	break;
				1595	case '"':
				1596	stream()->Add("\"");
				1597	break;
				1598	default:
				1599	stream()->Put(label[i]);
				1600	break;
				1601	}
				1602	}
				1603	stream()->Add("\"];\n");
				1604	Visit(node);
				1605	stream()->Add("}\n");
				1606	printf("%s", *(stream()->ToCString()));
				1607	}
				1608
				1609
				1610	void DotPrinter::Visit(RegExpNode* node) {
				1611	if (node->info()->visited) return;
				1612	node->info()->visited = true;
				1613	node->Accept(this);
				1614	}
				1615
				1616
				1617	void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
				1618	if (on_failure->IsBacktrack()) return;
				1619	stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
				1620	Visit(on_failure);
				1621	}
				1622
				1623
				1624	class TableEntryBodyPrinter {
				1625	public:
				1626	TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)
				1627	: stream_(stream), choice_(choice) { }
				1628	void Call(uc16 from, DispatchTable::Entry entry) {
				1629	OutSet* out_set = entry.out_set();
				1630	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				1631	if (out_set->Get(i)) {
				1632	stream()->Add(" n%p:s%io%i -> n%p;\n",
				1633	choice(),
				1634	from,
				1635	i,
				1636	choice()->alternatives()->at(i).node());
				1637	}
				1638	}
				1639	}
				1640	private:
				1641	StringStream* stream() { return stream_; }
				1642	ChoiceNode* choice() { return choice_; }
				1643	StringStream* stream_;
				1644	ChoiceNode* choice_;
				1645	};
				1646
				1647
				1648	class TableEntryHeaderPrinter {
				1649	public:
				1650	explicit TableEntryHeaderPrinter(StringStream* stream)
				1651	: first_(true), stream_(stream) { }
				1652	void Call(uc16 from, DispatchTable::Entry entry) {
				1653	if (first_) {
				1654	first_ = false;
				1655	} else {
				1656	stream()->Add("\|");
				1657	}
				1658	stream()->Add("{\\%k-\\%k\|{", from, entry.to());
				1659	OutSet* out_set = entry.out_set();
				1660	int priority = 0;
				1661	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				1662	if (out_set->Get(i)) {
				1663	if (priority > 0) stream()->Add("\|");
				1664	stream()->Add("<s%io%i> %i", from, i, priority);
				1665	priority++;
				1666	}
				1667	}
				1668	stream()->Add("}}");
				1669	}
				1670	private:
				1671	bool first_;
				1672	StringStream* stream() { return stream_; }
				1673	StringStream* stream_;
				1674	};
				1675
				1676
				1677	class AttributePrinter {
				1678	public:
				1679	explicit AttributePrinter(DotPrinter* out)
				1680	: out_(out), first_(true) { }
				1681	void PrintSeparator() {
				1682	if (first_) {
				1683	first_ = false;
				1684	} else {
				1685	out_->stream()->Add("\|");
				1686	}
				1687	}
				1688	void PrintBit(const char* name, bool value) {
				1689	if (!value) return;
				1690	PrintSeparator();
				1691	out_->stream()->Add("{%s}", name);
				1692	}
				1693	void PrintPositive(const char* name, int value) {
				1694	if (value < 0) return;
				1695	PrintSeparator();
				1696	out_->stream()->Add("{%s\|%x}", name, value);
				1697	}
				1698	private:
				1699	DotPrinter* out_;
				1700	bool first_;
				1701	};
				1702
				1703
				1704	void DotPrinter::PrintAttributes(RegExpNode* that) {
				1705	stream()->Add(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
				1706	"margin=0.1, fontsize=10, label=\"{",
				1707	that);
				1708	AttributePrinter printer(this);
				1709	NodeInfo* info = that->info();
				1710	printer.PrintBit("NI", info->follows_newline_interest);
				1711	printer.PrintBit("WI", info->follows_word_interest);
				1712	printer.PrintBit("SI", info->follows_start_interest);
				1713	printer.PrintBit("DN", info->determine_newline);
				1714	printer.PrintBit("DW", info->determine_word);
				1715	printer.PrintBit("DS", info->determine_start);
				1716	printer.PrintBit("DDN", info->does_determine_newline);
				1717	printer.PrintBit("DDW", info->does_determine_word);
				1718	printer.PrintBit("DDS", info->does_determine_start);
				1719	printer.PrintPositive("IW", info->is_word);
				1720	printer.PrintPositive("IN", info->is_newline);
				1721	printer.PrintPositive("FN", info->follows_newline);
				1722	printer.PrintPositive("FW", info->follows_word);
				1723	printer.PrintPositive("FS", info->follows_start);
				1724	Label* label = that->label();
				1725	if (label->is_bound())
				1726	printer.PrintPositive("@", label->pos());
				1727	stream()->Add("}\"];\n");
				1728	stream()->Add(" a%p -> n%p [style=dashed, color=grey, "
				1729	"arrowhead=none];\n", that, that);
				1730	}
				1731
				1732
				1733	static const bool kPrintDispatchTable = false;
				1734	void DotPrinter::VisitChoice(ChoiceNode* that) {
				1735	if (kPrintDispatchTable) {
				1736	stream()->Add(" n%p [shape=Mrecord, label=\"", that);
				1737	TableEntryHeaderPrinter header_printer(stream());
				1738	that->GetTable(ignore_case_)->ForEach(&header_printer);
				1739	stream()->Add("\"]\n", that);
				1740	PrintAttributes(that);
				1741	TableEntryBodyPrinter body_printer(stream(), that);
				1742	that->GetTable(ignore_case_)->ForEach(&body_printer);
				1743	PrintOnFailure(that, that->on_failure());
				1744	} else {
				1745	stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);
				1746	for (int i = 0; i < that->alternatives()->length(); i++) {
				1747	GuardedAlternative alt = that->alternatives()->at(i);
				1748	stream()->Add(" n%p -> n%p;\n", that, alt.node());
				1749	}
				1750	}
				1751	for (int i = 0; i < that->alternatives()->length(); i++) {
				1752	GuardedAlternative alt = that->alternatives()->at(i);
				1753	alt.node()->Accept(this);
				1754	}
				1755	}
				1756
				1757
				1758	void DotPrinter::VisitText(TextNode* that) {
				1759	stream()->Add(" n%p [label=\"", that);
				1760	for (int i = 0; i < that->elements()->length(); i++) {
				1761	if (i > 0) stream()->Add(" ");
				1762	TextElement elm = that->elements()->at(i);
				1763	switch (elm.type) {
				1764	case TextElement::ATOM: {
				1765	stream()->Add("'%w'", elm.data.u_atom->data());
				1766	break;
				1767	}
				1768	case TextElement::CHAR_CLASS: {
				1769	RegExpCharacterClass* node = elm.data.u_char_class;
				1770	stream()->Add("[");
				1771	if (node->is_negated())
				1772	stream()->Add("^");
				1773	for (int j = 0; j < node->ranges()->length(); j++) {
				1774	CharacterRange range = node->ranges()->at(j);
				1775	stream()->Add("%k-%k", range.from(), range.to());
				1776	}
				1777	stream()->Add("]");
				1778	break;
				1779	}
				1780	default:
				1781	UNREACHABLE();
				1782	}
				1783	}
				1784	stream()->Add("\", shape=box, peripheries=2];\n");
				1785	PrintAttributes(that);
				1786	stream()->Add(" n%p -> n%p;\n", that, that->on_success());
				1787	Visit(that->on_success());
				1788	PrintOnFailure(that, that->on_failure());
				1789	}
				1790
				1791
				1792	void DotPrinter::VisitBackReference(BackReferenceNode* that) {
				1793	stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
				1794	that,
				1795	that->start_register(),
				1796	that->end_register());
				1797	PrintAttributes(that);
				1798	stream()->Add(" n%p -> n%p;\n", that, that->on_success());
				1799	Visit(that->on_success());
				1800	PrintOnFailure(that, that->on_failure());
				1801	}
				1802
				1803
				1804	void DotPrinter::VisitEnd(EndNode* that) {
				1805	stream()->Add(" n%p [style=bold, shape=point];\n", that);
				1806	PrintAttributes(that);
				1807	}
				1808
				1809
				1810	void DotPrinter::VisitAction(ActionNode* that) {
				1811	stream()->Add(" n%p [", that);
				1812	switch (that->type_) {
				1813	case ActionNode::STORE_REGISTER:
				1814	stream()->Add("label=\"$%i:=%i\", shape=octagon",
				1815	that->data_.u_store_register.reg,
				1816	that->data_.u_store_register.value);
				1817	break;
				1818	case ActionNode::INCREMENT_REGISTER:
				1819	stream()->Add("label=\"$%i++\", shape=octagon",
				1820	that->data_.u_increment_register.reg);
				1821	break;
				1822	case ActionNode::STORE_POSITION:
				1823	stream()->Add("label=\"$%i:=$pos\", shape=octagon",
				1824	that->data_.u_position_register.reg);
				1825	break;
				1826	case ActionNode::RESTORE_POSITION:
				1827	stream()->Add("label=\"$pos:=$%i\", shape=octagon",
				1828	that->data_.u_position_register.reg);
				1829	break;
				1830	case ActionNode::BEGIN_SUBMATCH:
				1831	stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
				1832	that->data_.u_submatch.current_position_register);
				1833	break;
				1834	case ActionNode::ESCAPE_SUBMATCH:
				1835	stream()->Add("label=\"escape\", shape=septagon");
				1836	break;
				1837	}
				1838	stream()->Add("];\n");
				1839	PrintAttributes(that);
				1840	stream()->Add(" n%p -> n%p;\n", that, that->on_success());
				1841	Visit(that->on_success());
				1842	}
				1843
				1844
				1845	class DispatchTableDumper {
				1846	public:
				1847	explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
				1848	void Call(uc16 key, DispatchTable::Entry entry);
				1849	StringStream* stream() { return stream_; }
				1850	private:
				1851	StringStream* stream_;
				1852	};
				1853
				1854
				1855	void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
				1856	stream()->Add("[%k-%k]: {", key, entry.to());
				1857	OutSet* set = entry.out_set();
				1858	bool first = true;
				1859	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				1860	if (set->Get(i)) {
				1861	if (first) {
				1862	first = false;
				1863	} else {
				1864	stream()->Add(", ");
				1865	}
				1866	stream()->Add("%i", i);
				1867	}
				1868	}
				1869	stream()->Add("}\n");
				1870	}
				1871
				1872
				1873	void DispatchTable::Dump() {
				1874	HeapStringAllocator alloc;
				1875	StringStream stream(&alloc);
				1876	DispatchTableDumper dumper(&stream);
				1877	tree()->ForEach(&dumper);
				1878	OS::PrintError("%s", *stream.ToCString());
				1879	}
				1880
				1881
				1882	void RegExpEngine::DotPrint(const char* label,
				1883	RegExpNode* node,
				1884	bool ignore_case) {
				1885	DotPrinter printer(ignore_case);
				1886	printer.PrintNode(label, node);
				1887	}
				1888
				1889
				1890	#endif // DEBUG
				1891
				1892
				1893	// -------------------------------------------------------------------
				1894	// Tree to graph conversion
				1895
				1896
				1897	RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
				1898	RegExpNode* on_success,
				1899	RegExpNode* on_failure) {
				1900	ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
				1901	elms->Add(TextElement::Atom(this));
				1902	return new TextNode(elms, on_success, on_failure);
				1903	}
				1904
				1905
				1906	RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
				1907	RegExpNode* on_success,
				1908	RegExpNode* on_failure) {
				1909	return new TextNode(elements(), on_success, on_failure);
				1910	}
				1911
				1912
				1913	RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
				1914	RegExpNode* on_success,
				1915	RegExpNode* on_failure) {
				1916	ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
				1917	elms->Add(TextElement::CharClass(this));
				1918	return new TextNode(elms, on_success, on_failure);
				1919	}
				1920
				1921
				1922	RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
				1923	RegExpNode* on_success,
				1924	RegExpNode* on_failure) {
				1925	ZoneList<RegExpTree> alternatives = this->alternatives();
				1926	int length = alternatives->length();
				1927	ChoiceNode* result = new ChoiceNode(length, on_failure);
				1928	for (int i = 0; i < length; i++) {
				1929	GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
				1930	on_success,
				1931	on_failure));
				1932	result->AddAlternative(alternative);
				1933	}
				1934	return result;
				1935	}
				1936
				1937
				1938	RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
				1939	RegExpNode* on_success,
				1940	RegExpNode* on_failure) {
				1941	return ToNode(min(),
				1942	max(),
				1943	is_greedy(),
				1944	body(),
				1945	compiler,
				1946	on_success,
				1947	on_failure);
				1948	}
				1949
				1950
				1951	RegExpNode* RegExpQuantifier::ToNode(int min,
				1952	int max,
				1953	bool is_greedy,
				1954	RegExpTree* body,
				1955	RegExpCompiler* compiler,
				1956	RegExpNode* on_success,
				1957	RegExpNode* on_failure) {
				1958	// x{f, t} becomes this:
				1959	//
				1960	// (r++)<-.
				1961	// \| `
				1962	// \| (x)
				1963	// v ^
				1964	// (r=0)-->(?)---/ [if r < t]
				1965	// \|
				1966	// [if r >= f] \----> ...
				1967	//
				1968	//
				1969	// TODO(someone): clear captures on repetition and handle empty
				1970	// matches.
				1971	bool has_min = min > 0;
				1972	bool has_max = max < RegExpQuantifier::kInfinity;
				1973	bool needs_counter = has_min \|\| has_max;
				1974	int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
				1975	ChoiceNode* center = new ChoiceNode(2, on_failure);
				1976	RegExpNode* loop_return = needs_counter
				1977	? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
				1978	: static_cast<RegExpNode*>(center);
				1979	RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure);
				1980	GuardedAlternative body_alt(body_node);
				1981	if (has_max) {
				1982	Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
				1983	body_alt.AddGuard(body_guard);
				1984	}
				1985	GuardedAlternative rest_alt(on_success);
				1986	if (has_min) {
				1987	Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
				1988	rest_alt.AddGuard(rest_guard);
				1989	}
				1990	if (is_greedy) {
				1991	center->AddAlternative(body_alt);
				1992	center->AddAlternative(rest_alt);
				1993	} else {
				1994	center->AddAlternative(rest_alt);
				1995	center->AddAlternative(body_alt);
				1996	}
				1997	if (needs_counter) {
				1998	return ActionNode::StoreRegister(reg_ctr, 0, center);
				1999	} else {
				2000	return center;
				2001	}
				2002	}
				2003
				2004
				2005	RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
				2006	RegExpNode* on_success,
				2007	RegExpNode* on_failure) {
				2008	NodeInfo info;
				2009	switch (type()) {
				2010	case START_OF_LINE:
				2011	info.follows_newline_interest = true;
				2012	break;
				2013	case START_OF_INPUT:
				2014	info.follows_start_interest = true;
				2015	break;
				2016	case BOUNDARY: case NON_BOUNDARY:
				2017	info.follows_word_interest = true;
				2018	break;
				2019	case END_OF_INPUT:
				2020	info.at_end = true;
				2021	break;
				2022	case END_OF_LINE:
				2023	// This is wrong but has the effect of making the compiler abort.
				2024	info.at_end = true;
				2025	}
				2026	return on_success->PropagateForward(&info);
				2027	}
				2028
				2029
				2030	RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
				2031	RegExpNode* on_success,
				2032	RegExpNode* on_failure) {
				2033	return new BackReferenceNode(RegExpCapture::StartRegister(index()),
				2034	RegExpCapture::EndRegister(index()),
				2035	on_success,
				2036	on_failure);
				2037	}
				2038
				2039
				2040	RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
				2041	RegExpNode* on_success,
				2042	RegExpNode* on_failure) {
				2043	return on_success;
				2044	}
				2045
				2046
				2047	RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
				2048	RegExpNode* on_success,
				2049	RegExpNode* on_failure) {
				2050	int stack_pointer_register = compiler->AllocateRegister();
				2051	int position_register = compiler->AllocateRegister();
				2052	if (is_positive()) {
				2053	// begin submatch scope
				2054	// $reg = $pos
				2055	// if [body]
				2056	// then
				2057	// $pos = $reg
				2058	// escape submatch scope (drop all backtracks created in scope)
				2059	// succeed
				2060	// else
				2061	// end submatch scope (nothing to clean up, just exit the scope)
				2062	// fail
				2063	return ActionNode::BeginSubmatch(
				2064	stack_pointer_register,
				2065	position_register,
				2066	body()->ToNode(
				2067	compiler,
				2068	ActionNode::EscapeSubmatch(
				2069	stack_pointer_register,
				2070	true, // Also restore input position.
				2071	position_register,
				2072	on_success),
				2073	on_failure));
				2074	} else {
				2075	// begin submatch scope
				2076	// try
				2077	// first if (body)
				2078	// then
				2079	// escape submatch scope
				2080	// fail
				2081	// else
				2082	// backtrack
				2083	// second
				2084	// end submatch scope
				2085	// restore current position
				2086	// succeed
				2087	ChoiceNode* try_node =
				2088	new ChoiceNode(1, ActionNode::RestorePosition(position_register,
				2089	on_success));
				2090	RegExpNode* body_node = body()->ToNode(
				2091	compiler,
				2092	ActionNode::EscapeSubmatch(stack_pointer_register,
				2093	false, // Don't also restore position
				2094	0, // Unused arguments.
				2095	on_failure),
				2096	compiler->backtrack());
				2097	GuardedAlternative body_alt(body_node);
				2098	try_node->AddAlternative(body_alt);
				2099	return ActionNode::BeginSubmatch(stack_pointer_register,
				2100	position_register,
				2101	try_node);
				2102	}
				2103	}
				2104
				2105
				2106	RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
				2107	RegExpNode* on_success,
				2108	RegExpNode* on_failure) {
				2109	return ToNode(body(), index(), compiler, on_success, on_failure);
				2110	}
				2111
				2112
				2113	RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
				2114	int index,
				2115	RegExpCompiler* compiler,
				2116	RegExpNode* on_success,
				2117	RegExpNode* on_failure) {
				2118	int start_reg = RegExpCapture::StartRegister(index);
				2119	int end_reg = RegExpCapture::EndRegister(index);
				2120	RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
				2121	RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure);
				2122	return ActionNode::StorePosition(start_reg, body_node);
				2123	}
				2124
				2125
				2126	RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
				2127	RegExpNode* on_success,
				2128	RegExpNode* on_failure) {
				2129	ZoneList<RegExpTree> children = nodes();
				2130	RegExpNode* current = on_success;
				2131	for (int i = children->length() - 1; i >= 0; i--) {
				2132	current = children->at(i)->ToNode(compiler, current, on_failure);
				2133	}
				2134	return current;
				2135	}
				2136
				2137
				2138	static const int kSpaceRangeCount = 20;
				2139	static const uc16 kSpaceRanges[kSpaceRangeCount] = {
				2140	0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
				2141	0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
				2142	0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
				2143	};
				2144
				2145
				2146	static const int kWordRangeCount = 8;
				2147	static const uc16 kWordRanges[kWordRangeCount] = {
				2148	'0', '9', 'A', 'Z', '_', '_', 'a', 'z'
				2149	};
				2150
				2151
				2152	static const int kDigitRangeCount = 2;
				2153	static const uc16 kDigitRanges[kDigitRangeCount] = {
				2154	'0', '9'
				2155	};
				2156
				2157
				2158	static const int kLineTerminatorRangeCount = 6;
				2159	static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {
				2160	0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029
				2161	};
				2162
				2163
				2164	static void AddClass(const uc16* elmv,
				2165	int elmc,
				2166	ZoneList<CharacterRange>* ranges) {
				2167	for (int i = 0; i < elmc; i += 2) {
				2168	ASSERT(elmv[i] <= elmv[i + 1]);
				2169	ranges->Add(CharacterRange(elmv[i], elmv[i + 1]));
				2170	}
				2171	}
				2172
				2173
				2174	static void AddClassNegated(const uc16 *elmv,
				2175	int elmc,
				2176	ZoneList<CharacterRange>* ranges) {
				2177	ASSERT(elmv[0] != 0x0000);
				2178	ASSERT(elmv[elmc-1] != 0xFFFF);
				2179	uc16 last = 0x0000;
				2180	for (int i = 0; i < elmc; i += 2) {
				2181	ASSERT(last <= elmv[i] - 1);
				2182	ASSERT(elmv[i] <= elmv[i + 1]);
				2183	ranges->Add(CharacterRange(last, elmv[i] - 1));
				2184	last = elmv[i + 1] + 1;
				2185	}
				2186	ranges->Add(CharacterRange(last, 0xFFFF));
				2187	}
				2188
				2189
				2190	void CharacterRange::AddClassEscape(uc16 type,
				2191	ZoneList<CharacterRange>* ranges) {
				2192	switch (type) {
				2193	case 's':
				2194	AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
				2195	break;
				2196	case 'S':
				2197	AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
				2198	break;
				2199	case 'w':
				2200	AddClass(kWordRanges, kWordRangeCount, ranges);
				2201	break;
				2202	case 'W':
				2203	AddClassNegated(kWordRanges, kWordRangeCount, ranges);
				2204	break;
				2205	case 'd':
				2206	AddClass(kDigitRanges, kDigitRangeCount, ranges);
				2207	break;
				2208	case 'D':
				2209	AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
				2210	break;
				2211	case '.':
				2212	AddClassNegated(kLineTerminatorRanges,
				2213	kLineTerminatorRangeCount,
				2214	ranges);
				2215	break;
				2216	// This is not a character range as defined by the spec but a
				2217	// convenient shorthand for a character class that matches any
				2218	// character.
				2219	case '*':
				2220	ranges->Add(CharacterRange::Everything());
				2221	break;
				2222	default:
				2223	UNREACHABLE();
				2224	}
				2225	}
				2226
				2227
				2228	Vector<const uc16> CharacterRange::GetWordBounds() {
				2229	return Vector<const uc16>(kWordRanges, kWordRangeCount);
				2230	}
				2231
				2232
				2233	class CharacterRangeSplitter {
				2234	public:
				2235	CharacterRangeSplitter(ZoneList<CharacterRange>** included,
				2236	ZoneList<CharacterRange>** excluded)
				2237	: included_(included),
				2238	excluded_(excluded) { }
				2239	void Call(uc16 from, DispatchTable::Entry entry);
				2240
				2241	static const int kInBase = 0;
				2242	static const int kInOverlay = 1;
				2243
				2244	private:
				2245	ZoneList<CharacterRange>** included_;
				2246	ZoneList<CharacterRange>** excluded_;
				2247	};
				2248
				2249
				2250	void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
				2251	if (!entry.out_set()->Get(kInBase)) return;
				2252	ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
				2253	? included_
				2254	: excluded_;
				2255	if (target == NULL) target = new ZoneList<CharacterRange>(2);
				2256	(*target)->Add(CharacterRange(entry.from(), entry.to()));
				2257	}
				2258
				2259
				2260	void CharacterRange::Split(ZoneList<CharacterRange>* base,
				2261	Vector<const uc16> overlay,
				2262	ZoneList<CharacterRange>** included,
				2263	ZoneList<CharacterRange>** excluded) {
				2264	ASSERT_EQ(NULL, *included);
				2265	ASSERT_EQ(NULL, *excluded);
				2266	DispatchTable table;
				2267	for (int i = 0; i < base->length(); i++)
				2268	table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
				2269	for (int i = 0; i < overlay.length(); i += 2) {
				2270	table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
				2271	CharacterRangeSplitter::kInOverlay);
				2272	}
				2273	CharacterRangeSplitter callback(included, excluded);
				2274	table.ForEach(&callback);
				2275	}
				2276
				2277
				2278	void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
				2279	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				2280	if (IsSingleton()) {
				2281	// If this is a singleton we just expand the one character.
				2282	int length = uncanonicalize.get(from(), '\0', chars);
				2283	for (int i = 0; i < length; i++) {
				2284	uc32 chr = chars[i];
				2285	if (chr != from()) {
				2286	ranges->Add(CharacterRange::Singleton(chars[i]));
				2287	}
				2288	}
				2289	} else if (from() <= kRangeCanonicalizeMax &&
				2290	to() <= kRangeCanonicalizeMax) {
				2291	// If this is a range we expand the characters block by block,
				2292	// expanding contiguous subranges (blocks) one at a time.
				2293	// The approach is as follows. For a given start character we
				2294	// look up the block that contains it, for instance 'a' if the
				2295	// start character is 'c'. A block is characterized by the property
				2296	// that all characters uncanonicalize in the same way as the first
				2297	// element, except that each entry in the result is incremented
				2298	// by the distance from the first element. So a-z is a block
				2299	// because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter
				2300	// uncanonicalizes to ['a' + k, 'A' + k].
				2301	// Once we've found the start point we look up its uncanonicalization
				2302	// and produce a range for each element. For instance for [c-f]
				2303	// we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only
				2304	// add a range if it is not already contained in the input, so [c-f]
				2305	// will be skipped but [C-F] will be added. If this range is not
				2306	// completely contained in a block we do this for all the blocks
				2307	// covered by the range.
				2308	unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				2309	// First, look up the block that contains the 'from' character.
				2310	int length = canonrange.get(from(), '\0', range);
				2311	if (length == 0) {
				2312	range[0] = from();
				2313	} else {
				2314	ASSERT_EQ(1, length);
				2315	}
				2316	int pos = from();
				2317	// The start of the current block. Note that except for the first
				2318	// iteration 'start' is always equal to 'pos'.
				2319	int start;
				2320	// If it is not the start point of a block the entry contains the
				2321	// offset of the character from the start point.
				2322	if ((range[0] & kStartMarker) == 0) {
				2323	start = pos - range[0];
				2324	} else {
				2325	start = pos;
				2326	}
				2327	// Then we add the ranges on at a time, incrementing the current
				2328	// position to be after the last block each time. The position
				2329	// always points to the start of a block.
				2330	while (pos < to()) {
				2331	length = canonrange.get(start, '\0', range);
				2332	if (length == 0) {
				2333	range[0] = start;
				2334	} else {
				2335	ASSERT_EQ(1, length);
				2336	}
				2337	ASSERT((range[0] & kStartMarker) != 0);
				2338	// The start point of a block contains the distance to the end
				2339	// of the range.
				2340	int block_end = start + (range[0] & kPayloadMask) - 1;
				2341	int end = (block_end > to()) ? to() : block_end;
				2342	length = uncanonicalize.get(start, '\0', range);
				2343	for (int i = 0; i < length; i++) {
				2344	uc32 c = range[i];
				2345	uc16 range_from = c + (pos - start);
				2346	uc16 range_to = c + (end - start);
				2347	if (!(from() <= range_from && range_to <= to())) {
				2348	ranges->Add(CharacterRange(range_from, range_to));
				2349	}
				2350	}
				2351	start = pos = block_end + 1;
				2352	}
				2353	} else {
				2354	// TODO(plesner) when we've fixed the 2^11 bug in unibrow.
				2355	}
				2356	}
				2357
				2358
				2359	// -------------------------------------------------------------------
				2360	// Interest propagation
				2361
				2362
				2363	RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) {
				2364	for (int i = 0; i < siblings_.length(); i++) {
				2365	RegExpNode* sibling = siblings_.Get(i);
				2366	if (sibling->info()->Matches(info))
				2367	return sibling;
				2368	}
				2369	return NULL;
				2370	}
				2371
				2372
				2373	RegExpNode* RegExpNode::EnsureSibling(NodeInfo* info, bool* cloned) {
				2374	ASSERT_EQ(false, *cloned);
				2375	ASSERT(!info->HasAssertions());
				2376	siblings_.Ensure(this);
				2377	RegExpNode* result = TryGetSibling(info);
				2378	if (result != NULL) return result;
				2379	result = this->Clone();
				2380	NodeInfo* new_info = result->info();
				2381	new_info->ResetCompilationState();
				2382	new_info->AddFromPreceding(info);
				2383	AddSibling(result);
				2384	*cloned = true;
				2385	return result;
				2386	}
				2387
				2388
				2389	template <class C>
				2390	static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
				2391	NodeInfo full_info(*node->info());
				2392	full_info.AddFromPreceding(info);
				2393	bool cloned = false;
				2394	return RegExpNode::EnsureSibling(node, &full_info, &cloned);
				2395	}
				2396
				2397
				2398	RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
				2399	NodeInfo full_info(*this->info());
				2400	full_info.AddFromPreceding(info);
				2401	bool cloned = false;
				2402	ActionNode* action = EnsureSibling(this, &full_info, &cloned);
				2403	if (cloned && type_ != ESCAPE_SUBMATCH) {
				2404	action->set_on_success(action->on_success()->PropagateForward(info));
				2405	}
				2406	return action;
				2407	}
				2408
				2409
				2410	RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
				2411	NodeInfo full_info(*this->info());
				2412	full_info.AddFromPreceding(info);
				2413	bool cloned = false;
				2414	ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
				2415	if (cloned) {
				2416	ZoneList<GuardedAlternative>* old_alternatives = alternatives();
				2417	int count = old_alternatives->length();
				2418	choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
				2419	for (int i = 0; i < count; i++) {
				2420	GuardedAlternative alternative = old_alternatives->at(i);
				2421	alternative.set_node(alternative.node()->PropagateForward(info));
				2422	choice->alternatives()->Add(alternative);
				2423	}
				2424	if (!choice->on_failure_->IsBacktrack()) {
				2425	choice->on_failure_ = choice->on_failure_->PropagateForward(info);
				2426	}
				2427	}
				2428	return choice;
				2429	}
				2430
				2431
				2432	RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
				2433	return PropagateToEndpoint(this, info);
				2434	}
				2435
				2436
				2437	RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
				2438	NodeInfo full_info(*this->info());
				2439	full_info.AddFromPreceding(info);
				2440	bool cloned = false;
				2441	BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);
				2442	if (cloned) {
				2443	// TODO(erikcorry): A back reference has to have two successors (by default
				2444	// the same node). The first is used if the back reference matches a non-
				2445	// empty back reference, the second if it matches an empty one. This
				2446	// doesn't matter for at_end, which is the only one implemented right now,
				2447	// but it will matter for other pieces of info.
				2448	back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
				2449	}
				2450	return back_ref;
				2451	}
				2452
				2453
				2454	RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
				2455	return PropagateToEndpoint(this, info);
				2456	}
				2457
				2458
				2459	// -------------------------------------------------------------------
				2460	// Splay tree
				2461
				2462
				2463	OutSet* OutSet::Extend(unsigned value) {
				2464	if (Get(value))
				2465	return this;
				2466	if (successors() != NULL) {
				2467	for (int i = 0; i < successors()->length(); i++) {
				2468	OutSet* successor = successors()->at(i);
				2469	if (successor->Get(value))
				2470	return successor;
				2471	}
				2472	} else {
				2473	successors_ = new ZoneList<OutSet*>(2);
				2474	}
				2475	OutSet* result = new OutSet(first_, remaining_);
				2476	result->Set(value);
				2477	successors()->Add(result);
				2478	return result;
				2479	}
				2480
				2481
				2482	void OutSet::Set(unsigned value) {
				2483	if (value < kFirstLimit) {
				2484	first_ \|= (1 << value);
				2485	} else {
				2486	if (remaining_ == NULL)
				2487	remaining_ = new ZoneList<unsigned>(1);
				2488	if (remaining_->is_empty() \|\| !remaining_->Contains(value))
				2489	remaining_->Add(value);
				2490	}
				2491	}
				2492
				2493
				2494	bool OutSet::Get(unsigned value) {
				2495	if (value < kFirstLimit) {
				2496	return (first_ & (1 << value)) != 0;
				2497	} else if (remaining_ == NULL) {
				2498	return false;
				2499	} else {
				2500	return remaining_->Contains(value);
				2501	}
				2502	}
				2503
				2504
				2505	const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
				2506	const DispatchTable::Entry DispatchTable::Config::kNoValue;
				2507
				2508
				2509	void DispatchTable::AddRange(CharacterRange full_range, int value) {
				2510	CharacterRange current = full_range;
				2511	if (tree()->is_empty()) {
				2512	// If this is the first range we just insert into the table.
				2513	ZoneSplayTree<Config>::Locator loc;
				2514	ASSERT_RESULT(tree()->Insert(current.from(), &loc));
				2515	loc.set_value(Entry(current.from(), current.to(), empty()->Extend(value)));
				2516	return;
				2517	}
				2518	// First see if there is a range to the left of this one that
				2519	// overlaps.
				2520	ZoneSplayTree<Config>::Locator loc;
				2521	if (tree()->FindGreatestLessThan(current.from(), &loc)) {
				2522	Entry* entry = &loc.value();
				2523	// If we've found a range that overlaps with this one, and it
				2524	// starts strictly to the left of this one, we have to fix it
				2525	// because the following code only handles ranges that start on
				2526	// or after the start point of the range we're adding.
				2527	if (entry->from() < current.from() && entry->to() >= current.from()) {
				2528	// Snap the overlapping range in half around the start point of
				2529	// the range we're adding.
				2530	CharacterRange left(entry->from(), current.from() - 1);
				2531	CharacterRange right(current.from(), entry->to());
				2532	// The left part of the overlapping range doesn't overlap.
				2533	// Truncate the whole entry to be just the left part.
				2534	entry->set_to(left.to());
				2535	// The right part is the one that overlaps. We add this part
				2536	// to the map and let the next step deal with merging it with
				2537	// the range we're adding.
				2538	ZoneSplayTree<Config>::Locator loc;
				2539	ASSERT_RESULT(tree()->Insert(right.from(), &loc));
				2540	loc.set_value(Entry(right.from(),
				2541	right.to(),
				2542	entry->out_set()));
				2543	}
				2544	}
				2545	while (current.is_valid()) {
				2546	if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
				2547	(loc.value().from() <= current.to()) &&
				2548	(loc.value().to() >= current.from())) {
				2549	Entry* entry = &loc.value();
				2550	// We have overlap. If there is space between the start point of
				2551	// the range we're adding and where the overlapping range starts
				2552	// then we have to add a range covering just that space.
				2553	if (current.from() < entry->from()) {
				2554	ZoneSplayTree<Config>::Locator ins;
				2555	ASSERT_RESULT(tree()->Insert(current.from(), &ins));
				2556	ins.set_value(Entry(current.from(),
				2557	entry->from() - 1,
				2558	empty()->Extend(value)));
				2559	current.set_from(entry->from());
				2560	}
				2561	ASSERT_EQ(current.from(), entry->from());
				2562	// If the overlapping range extends beyond the one we want to add
				2563	// we have to snap the right part off and add it separately.
				2564	if (entry->to() > current.to()) {
				2565	ZoneSplayTree<Config>::Locator ins;
				2566	ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));
				2567	ins.set_value(Entry(current.to() + 1,
				2568	entry->to(),
				2569	entry->out_set()));
				2570	entry->set_to(current.to());
				2571	}
				2572	ASSERT(entry->to() <= current.to());
				2573	// The overlapping range is now completely contained by the range
				2574	// we're adding so we can just update it and move the start point
				2575	// of the range we're adding just past it.
				2576	entry->AddValue(value);
				2577	// Bail out if the last interval ended at 0xFFFF since otherwise
				2578	// adding 1 will wrap around to 0.
				2579	if (entry->to() == 0xFFFF)
				2580	break;
				2581	ASSERT(entry->to() + 1 > current.from());
				2582	current.set_from(entry->to() + 1);
				2583	} else {
				2584	// There is no overlap so we can just add the range
				2585	ZoneSplayTree<Config>::Locator ins;
				2586	ASSERT_RESULT(tree()->Insert(current.from(), &ins));
				2587	ins.set_value(Entry(current.from(),
				2588	current.to(),
				2589	empty()->Extend(value)));
				2590	break;
				2591	}
				2592	}
				2593	}
				2594
				2595
				2596	OutSet* DispatchTable::Get(uc16 value) {
				2597	ZoneSplayTree<Config>::Locator loc;
				2598	if (!tree()->FindGreatestLessThan(value, &loc))
				2599	return empty();
				2600	Entry* entry = &loc.value();
				2601	if (value <= entry->to())
				2602	return entry->out_set();
				2603	else
				2604	return empty();
				2605	}
				2606
				2607
				2608	// -------------------------------------------------------------------
				2609	// Analysis
				2610
				2611
				2612	void Analysis::EnsureAnalyzed(RegExpNode* that) {
				2613	if (that->info()->been_analyzed \|\| that->info()->being_analyzed)
				2614	return;
				2615	that->info()->being_analyzed = true;
				2616	that->Accept(this);
				2617	that->info()->being_analyzed = false;
				2618	that->info()->been_analyzed = true;
				2619	}
				2620
				2621
				2622	void Analysis::VisitEnd(EndNode* that) {
				2623	// nothing to do
				2624	}
				2625
				2626
				2627	void Analysis::VisitText(TextNode* that) {
				2628	if (ignore_case_) {
				2629	that->MakeCaseIndependent();
				2630	}
				2631	EnsureAnalyzed(that->on_success());
				2632	EnsureAnalyzed(that->on_failure());
				2633	NodeInfo* info = that->info();
				2634	NodeInfo* next_info = that->on_success()->info();
				2635	// If the following node is interested in what it follows then this
				2636	// node must determine it.
				2637	info->determine_newline = next_info->follows_newline_interest;
				2638	info->determine_word = next_info->follows_word_interest;
				2639	info->determine_start = next_info->follows_start_interest;
				2640	}
				2641
				2642
				2643	void Analysis::VisitAction(ActionNode* that) {
				2644	EnsureAnalyzed(that->on_success());
				2645	// If the next node is interested in what it follows then this node
				2646	// has to be interested too so it can pass the information on.
				2647	that->info()->AddFromFollowing(that->on_success()->info());
				2648	}
				2649
				2650
				2651	void Analysis::VisitChoice(ChoiceNode* that) {
				2652	NodeInfo* info = that->info();
				2653	for (int i = 0; i < that->alternatives()->length(); i++) {
				2654	RegExpNode* node = that->alternatives()->at(i).node();
				2655	EnsureAnalyzed(node);
				2656	// Anything the following nodes need to know has to be known by
				2657	// this node also, so it can pass it on.
				2658	info->AddFromFollowing(node->info());
				2659	}
				2660	EnsureAnalyzed(that->on_failure());
				2661	}
				2662
				2663
				2664	void Analysis::VisitBackReference(BackReferenceNode* that) {
				2665	EnsureAnalyzed(that->on_success());
				2666	EnsureAnalyzed(that->on_failure());
				2667	}
				2668
				2669
				2670	// -------------------------------------------------------------------
				2671	// Assumption expansion
				2672
				2673
				2674	RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
				2675	siblings_.Ensure(this);
				2676	NodeInfo new_info = *this->info();
				2677	if (new_info.follows_word_interest)
				2678	new_info.follows_word = info->follows_word;
				2679	if (new_info.follows_newline_interest)
				2680	new_info.follows_newline = info->follows_newline;
				2681	// If the following node should determine something we need to get
				2682	// a sibling that determines it.
				2683	new_info.does_determine_newline = new_info.determine_newline;
				2684	new_info.does_determine_word = new_info.determine_word;
				2685	new_info.does_determine_start = new_info.determine_start;
				2686	RegExpNode* sibling = TryGetSibling(&new_info);
				2687	if (sibling == NULL) {
				2688	sibling = ExpandLocal(&new_info);
				2689	siblings_.Add(sibling);
				2690	sibling->info()->being_expanded = true;
				2691	sibling->ExpandChildren();
				2692	sibling->info()->being_expanded = false;
				2693	sibling->info()->been_expanded = true;
				2694	} else {
				2695	NodeInfo* sib_info = sibling->info();
				2696	if (!sib_info->been_expanded && !sib_info->being_expanded) {
				2697	sibling->info()->being_expanded = true;
				2698	sibling->ExpandChildren();
				2699	sibling->info()->being_expanded = false;
				2700	sibling->info()->been_expanded = true;
				2701	}
				2702	}
				2703	return sibling;
				2704	}
				2705
				2706
				2707	RegExpNode* ChoiceNode::ExpandLocal(NodeInfo* info) {
				2708	ChoiceNode* clone = this->Clone();
				2709	clone->info()->ResetCompilationState();
				2710	clone->info()->AddAssumptions(info);
				2711	return clone;
				2712	}
				2713
				2714
				2715	void ChoiceNode::ExpandChildren() {
				2716	ZoneList<GuardedAlternative>* alts = alternatives();
				2717	ZoneList<GuardedAlternative>* new_alts
				2718	= new ZoneList<GuardedAlternative>(alts->length());
				2719	for (int i = 0; i < alts->length(); i++) {
				2720	GuardedAlternative next = alts->at(i);
				2721	next.set_node(next.node()->EnsureExpanded(info()));
				2722	new_alts->Add(next);
				2723	}
				2724	alternatives_ = new_alts;
				2725	}
				2726
				2727
				2728	RegExpNode* TextNode::ExpandLocal(NodeInfo* info) {
				2729	TextElement last = elements()->last();
				2730	if (last.type == TextElement::CHAR_CLASS) {
				2731	RegExpCharacterClass* char_class = last.data.u_char_class;
				2732	if (info->does_determine_word) {
				2733	ZoneList<CharacterRange>* word = NULL;
				2734	ZoneList<CharacterRange>* non_word = NULL;
				2735	CharacterRange::Split(char_class->ranges(),
				2736	CharacterRange::GetWordBounds(),
				2737	&word,
				2738	&non_word);
				2739	if (non_word == NULL) {
				2740	// This node contains no non-word characters so it must be
				2741	// all word.
				2742	this->info()->is_word = NodeInfo::TRUE;
				2743	} else if (word == NULL) {
				2744	// Vice versa.
				2745	this->info()->is_word = NodeInfo::FALSE;
				2746	} else {
				2747	// If this character class contains both word and non-word
				2748	// characters we need to split it into two.
				2749	ChoiceNode* result = new ChoiceNode(2, on_failure());
				2750	// Welcome to the family, son!
				2751	result->set_siblings(this->siblings());
				2752	result->info() = this->info();
				2753	result->info()->ResetCompilationState();
				2754	result->info()->AddAssumptions(info);
				2755	RegExpNode* word_node
				2756	= new TextNode(new RegExpCharacterClass(word, false),
				2757	on_success(),
				2758	on_failure());
				2759	word_node->info()->determine_word = true;
				2760	word_node->info()->does_determine_word = true;
				2761	word_node->info()->is_word = NodeInfo::TRUE;
				2762	result->alternatives()->Add(GuardedAlternative(word_node));
				2763	RegExpNode* non_word_node
				2764	= new TextNode(new RegExpCharacterClass(non_word, false),
				2765	on_success(),
				2766	on_failure());
				2767	non_word_node->info()->determine_word = true;
				2768	non_word_node->info()->does_determine_word = true;
				2769	non_word_node->info()->is_word = NodeInfo::FALSE;
				2770	result->alternatives()->Add(GuardedAlternative(non_word_node));
				2771	return result;
				2772	}
				2773	}
				2774	}
				2775	TextNode* clone = this->Clone();
				2776	clone->info()->ResetCompilationState();
				2777	clone->info()->AddAssumptions(info);
				2778	return clone;
				2779	}
				2780
				2781
				2782	void TextNode::ExpandAtomChildren(RegExpAtom* that) {
				2783	NodeInfo new_info = *info();
				2784	uc16 last = that->data()[that->data().length() - 1];
				2785	if (info()->determine_word) {
				2786	new_info.follows_word = IsRegExpWord(last)
				2787	? NodeInfo::TRUE : NodeInfo::FALSE;
				2788	} else {
				2789	new_info.follows_word = NodeInfo::UNKNOWN;
				2790	}
				2791	if (info()->determine_newline) {
				2792	new_info.follows_newline = IsRegExpNewline(last)
				2793	? NodeInfo::TRUE : NodeInfo::FALSE;
				2794	} else {
				2795	new_info.follows_newline = NodeInfo::UNKNOWN;
				2796	}
				2797	if (info()->determine_start) {
				2798	new_info.follows_start = NodeInfo::FALSE;
				2799	} else {
				2800	new_info.follows_start = NodeInfo::UNKNOWN;
				2801	}
				2802	set_on_success(on_success()->EnsureExpanded(&new_info));
				2803	}
				2804
				2805
				2806	void TextNode::ExpandCharClassChildren(RegExpCharacterClass* that) {
				2807	if (info()->does_determine_word) {
				2808	// ASSERT(info()->is_word != NodeInfo::UNKNOWN);
				2809	NodeInfo next_info = *on_success()->info();
				2810	next_info.follows_word = info()->is_word;
				2811	set_on_success(on_success()->EnsureExpanded(&next_info));
				2812	} else {
				2813	set_on_success(on_success()->EnsureExpanded(info()));
				2814	}
				2815	}
				2816
				2817
				2818	void TextNode::ExpandChildren() {
				2819	TextElement last = elements()->last();
				2820	switch (last.type) {
				2821	case TextElement::ATOM:
				2822	ExpandAtomChildren(last.data.u_atom);
				2823	break;
				2824	case TextElement::CHAR_CLASS:
				2825	ExpandCharClassChildren(last.data.u_char_class);
				2826	break;
				2827	default:
				2828	UNREACHABLE();
				2829	}
				2830	}
				2831
				2832
				2833	RegExpNode* ActionNode::ExpandLocal(NodeInfo* info) {
				2834	ActionNode* clone = this->Clone();
				2835	clone->info()->ResetCompilationState();
				2836	clone->info()->AddAssumptions(info);
				2837	return clone;
				2838	}
				2839
				2840
				2841	void ActionNode::ExpandChildren() {
				2842	set_on_success(on_success()->EnsureExpanded(info()));
				2843	}
				2844
				2845
				2846	RegExpNode* BackReferenceNode::ExpandLocal(NodeInfo* info) {
				2847	BackReferenceNode* clone = this->Clone();
				2848	clone->info()->ResetCompilationState();
				2849	clone->info()->AddAssumptions(info);
				2850	return clone;
				2851	}
				2852
				2853
				2854	void BackReferenceNode::ExpandChildren() {
				2855	set_on_success(on_success()->EnsureExpanded(info()));
				2856	}
				2857
				2858
				2859	RegExpNode* EndNode::ExpandLocal(NodeInfo* info) {
				2860	EndNode* clone = this->Clone();
				2861	clone->info()->ResetCompilationState();
				2862	clone->info()->AddAssumptions(info);
				2863	return clone;
				2864	}
				2865
				2866
				2867	void EndNode::ExpandChildren() {
				2868	// nothing to do
				2869	}
				2870
				2871
				2872	// -------------------------------------------------------------------
				2873	// Dispatch table construction
				2874
				2875
				2876	void DispatchTableConstructor::VisitEnd(EndNode* that) {
				2877	AddRange(CharacterRange::Everything());
				2878	}
				2879
				2880
				2881	void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
				2882	node->set_being_calculated(true);
				2883	ZoneList<GuardedAlternative>* alternatives = node->alternatives();
				2884	for (int i = 0; i < alternatives->length(); i++) {
				2885	set_choice_index(i);
				2886	alternatives->at(i).node()->Accept(this);
				2887	}
				2888	node->set_being_calculated(false);
				2889	}
				2890
				2891
				2892	class AddDispatchRange {
				2893	public:
				2894	explicit AddDispatchRange(DispatchTableConstructor* constructor)
				2895	: constructor_(constructor) { }
				2896	void Call(uc32 from, DispatchTable::Entry entry);
				2897	private:
				2898	DispatchTableConstructor* constructor_;
				2899	};
				2900
				2901
				2902	void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {
				2903	CharacterRange range(from, entry.to());
				2904	constructor_->AddRange(range);
				2905	}
				2906
				2907
				2908	void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
				2909	if (node->being_calculated())
				2910	return;
				2911	DispatchTable* table = node->GetTable(ignore_case_);
				2912	AddDispatchRange adder(this);
				2913	table->ForEach(&adder);
				2914	}
				2915
				2916
				2917	void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
				2918	// TODO(160): Find the node that we refer back to and propagate its start
				2919	// set back to here. For now we just accept anything.
				2920	AddRange(CharacterRange::Everything());
				2921	}
				2922
				2923
				2924
				2925	static int CompareRangeByFrom(const CharacterRange* a,
				2926	const CharacterRange* b) {
				2927	return Compare<uc16>(a->from(), b->from());
				2928	}
				2929
				2930
				2931	void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
				2932	ranges->Sort(CompareRangeByFrom);
				2933	uc16 last = 0;
				2934	for (int i = 0; i < ranges->length(); i++) {
				2935	CharacterRange range = ranges->at(i);
				2936	if (last < range.from())
				2937	AddRange(CharacterRange(last, range.from() - 1));
				2938	if (range.to() >= last) {
				2939	if (range.to() == 0xFFFF) {
				2940	return;
				2941	} else {
				2942	last = range.to() + 1;
				2943	}
				2944	}
				2945	}
				2946	AddRange(CharacterRange(last, 0xFFFF));
				2947	}
				2948
				2949
				2950	void DispatchTableConstructor::VisitText(TextNode* that) {
				2951	TextElement elm = that->elements()->at(0);
				2952	switch (elm.type) {
				2953	case TextElement::ATOM: {
				2954	uc16 c = elm.data.u_atom->data()[0];
				2955	AddRange(CharacterRange(c, c));
				2956	break;
				2957	}
				2958	case TextElement::CHAR_CLASS: {
				2959	RegExpCharacterClass* tree = elm.data.u_char_class;
				2960	ZoneList<CharacterRange>* ranges = tree->ranges();
				2961	if (tree->is_negated()) {
				2962	AddInverse(ranges);
				2963	} else {
				2964	for (int i = 0; i < ranges->length(); i++)
				2965	AddRange(ranges->at(i));
				2966	}
				2967	break;
				2968	}
				2969	default: {
				2970	UNIMPLEMENTED();
				2971	}
				2972	}
				2973	}
				2974
				2975
				2976	void DispatchTableConstructor::VisitAction(ActionNode* that) {
				2977	that->on_success()->Accept(this);
				2978	}
				2979
				2980
				2981	Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
				2982	RegExpNode** node_return,
				2983	bool ignore_case,
				2984	bool is_multiline) {
				2985	RegExpCompiler compiler(input->capture_count, ignore_case);
				2986	// Wrap the body of the regexp in capture #0.
				2987	RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
				2988	0,
				2989	&compiler,
				2990	compiler.accept(),
				2991	compiler.backtrack());
				2992	// Add a .*? at the beginning, outside the body capture.
				2993	// Note: We could choose to not add this if the regexp is anchored at
				2994	// the start of the input but I'm not sure how best to do that and
				2995	// since we don't even handle ^ yet I'm saving that optimization for
				2996	// later.
				2997	RegExpNode* node = RegExpQuantifier::ToNode(0,
				2998	RegExpQuantifier::kInfinity,
				2999	false,
				3000	new RegExpCharacterClass('*'),
				3001	&compiler,
				3002	captured_body,
				3003	compiler.backtrack());
				3004	if (node_return != NULL) *node_return = node;
				3005	Analysis analysis(ignore_case);
				3006	analysis.EnsureAnalyzed(node);
				3007
				3008	NodeInfo info = *node->info();
				3009	node = node->EnsureExpanded(&info);
				3010
				3011	if (!FLAG_irregexp) {
				3012	return Handle<FixedArray>::null();
				3013	}
				3014
				3015	if (is_multiline && !FLAG_attempt_multiline_irregexp) {
				3016	return Handle<FixedArray>::null();
				3017	}
				3018
				3019	if (FLAG_irregexp_native) {
				3020	#ifdef ARM
				3021	// Unimplemented, fall-through to bytecode implementation.
				3022	#else // IA32
				3023	RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,
				3024	(input->capture_count + 1) * 2);
				3025	return compiler.Assemble(&macro_assembler,
				3026	node,
				3027	input->capture_count);
				3028	#endif
				3029	}
				3030	EmbeddedVector<byte, 1024> codes;
				3031	RegExpMacroAssemblerIrregexp macro_assembler(codes);
				3032	return compiler.Assemble(&macro_assembler,
				3033	node,
				3034	input->capture_count);
				3035	}
				3036
				3037
christian.plesner.hansen	43d26ec	2008-07-03 15:10:15 +0000	[diff] [blame]	3038	}} // namespace v8::internal