Blame - src/regexp/jsregexp.cc - fp2-dev/platform/external/v8

blob: 34d20fe781d291b14e785a803e3e0f07e188bcb6 [file] [log] [blame]

Ben Murdoch	4a90d5f	2016-03-22 12:00:34 +0000	[diff] [blame^]	1	// Copyright 2012 the V8 project authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "src/regexp/jsregexp.h"
				6
				7	#include "src/ast/ast.h"
				8	#include "src/base/platform/platform.h"
				9	#include "src/compilation-cache.h"
				10	#include "src/compiler.h"
				11	#include "src/execution.h"
				12	#include "src/factory.h"
				13	#include "src/isolate-inl.h"
				14	#include "src/messages.h"
				15	#include "src/ostreams.h"
				16	#include "src/regexp/interpreter-irregexp.h"
				17	#include "src/regexp/jsregexp-inl.h"
				18	#include "src/regexp/regexp-macro-assembler.h"
				19	#include "src/regexp/regexp-macro-assembler-irregexp.h"
				20	#include "src/regexp/regexp-macro-assembler-tracer.h"
				21	#include "src/regexp/regexp-parser.h"
				22	#include "src/regexp/regexp-stack.h"
				23	#include "src/runtime/runtime.h"
				24	#include "src/splay-tree-inl.h"
				25	#include "src/string-search.h"
				26	#include "src/unicode-decoder.h"
				27
				28	#ifndef V8_INTERPRETED_REGEXP
				29	#if V8_TARGET_ARCH_IA32
				30	#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
				31	#elif V8_TARGET_ARCH_X64
				32	#include "src/regexp/x64/regexp-macro-assembler-x64.h"
				33	#elif V8_TARGET_ARCH_ARM64
				34	#include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
				35	#elif V8_TARGET_ARCH_ARM
				36	#include "src/regexp/arm/regexp-macro-assembler-arm.h"
				37	#elif V8_TARGET_ARCH_PPC
				38	#include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
				39	#elif V8_TARGET_ARCH_MIPS
				40	#include "src/regexp/mips/regexp-macro-assembler-mips.h"
				41	#elif V8_TARGET_ARCH_MIPS64
				42	#include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
				43	#elif V8_TARGET_ARCH_X87
				44	#include "src/regexp/x87/regexp-macro-assembler-x87.h"
				45	#else
				46	#error Unsupported target architecture.
				47	#endif
				48	#endif
				49
				50
				51	namespace v8 {
				52	namespace internal {
				53
				54	MUST_USE_RESULT
				55	static inline MaybeHandle<Object> ThrowRegExpException(
				56	Handle<JSRegExp> re, Handle<String> pattern, Handle<String> error_text) {
				57	Isolate* isolate = re->GetIsolate();
				58	THROW_NEW_ERROR(isolate, NewSyntaxError(MessageTemplate::kMalformedRegExp,
				59	pattern, error_text),
				60	Object);
				61	}
				62
				63
				64	inline void ThrowRegExpException(Handle<JSRegExp> re,
				65	Handle<String> error_text) {
				66	USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text));
				67	}
				68
				69
				70	ContainedInLattice AddRange(ContainedInLattice containment,
				71	const int* ranges,
				72	int ranges_length,
				73	Interval new_range) {
				74	DCHECK((ranges_length & 1) == 1);
				75	DCHECK(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);
				76	if (containment == kLatticeUnknown) return containment;
				77	bool inside = false;
				78	int last = 0;
				79	for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
				80	// Consider the range from last to ranges[i].
				81	// We haven't got to the new range yet.
				82	if (ranges[i] <= new_range.from()) continue;
				83	// New range is wholly inside last-ranges[i]. Note that new_range.to() is
				84	// inclusive, but the values in ranges are not.
				85	if (last <= new_range.from() && new_range.to() < ranges[i]) {
				86	return Combine(containment, inside ? kLatticeIn : kLatticeOut);
				87	}
				88	return kLatticeUnknown;
				89	}
				90	return containment;
				91	}
				92
				93
				94	// More makes code generation slower, less makes V8 benchmark score lower.
				95	const int kMaxLookaheadForBoyerMoore = 8;
				96	// In a 3-character pattern you can maximally step forwards 3 characters
				97	// at a time, which is not always enough to pay for the extra logic.
				98	const int kPatternTooShortForBoyerMoore = 2;
				99
				100
				101	// Identifies the sort of regexps where the regexp engine is faster
				102	// than the code used for atom matches.
				103	static bool HasFewDifferentCharacters(Handle<String> pattern) {
				104	int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
				105	if (length <= kPatternTooShortForBoyerMoore) return false;
				106	const int kMod = 128;
				107	bool character_found[kMod];
				108	int different = 0;
				109	memset(&character_found[0], 0, sizeof(character_found));
				110	for (int i = 0; i < length; i++) {
				111	int ch = (pattern->Get(i) & (kMod - 1));
				112	if (!character_found[ch]) {
				113	character_found[ch] = true;
				114	different++;
				115	// We declare a regexp low-alphabet if it has at least 3 times as many
				116	// characters as it has different characters.
				117	if (different * 3 > length) return false;
				118	}
				119	}
				120	return true;
				121	}
				122
				123
				124	// Generic RegExp methods. Dispatches to implementation specific methods.
				125
				126
				127	MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
				128	Handle<String> pattern,
				129	JSRegExp::Flags flags) {
				130	Isolate* isolate = re->GetIsolate();
				131	Zone zone;
				132	CompilationCache* compilation_cache = isolate->compilation_cache();
				133	MaybeHandle<FixedArray> maybe_cached =
				134	compilation_cache->LookupRegExp(pattern, flags);
				135	Handle<FixedArray> cached;
				136	bool in_cache = maybe_cached.ToHandle(&cached);
				137	LOG(isolate, RegExpCompileEvent(re, in_cache));
				138
				139	Handle<Object> result;
				140	if (in_cache) {
				141	re->set_data(*cached);
				142	return re;
				143	}
				144	pattern = String::Flatten(pattern);
				145	PostponeInterruptsScope postpone(isolate);
				146	RegExpCompileData parse_result;
				147	FlatStringReader reader(isolate, pattern);
				148	if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader,
				149	flags & JSRegExp::kMultiline,
				150	flags & JSRegExp::kUnicode, &parse_result)) {
				151	// Throw an exception if we fail to parse the pattern.
				152	return ThrowRegExpException(re, pattern, parse_result.error);
				153	}
				154
				155	bool has_been_compiled = false;
				156
				157	if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&
				158	!(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {
				159	// Parse-tree is a single atom that is equal to the pattern.
				160	AtomCompile(re, pattern, flags, pattern);
				161	has_been_compiled = true;
				162	} else if (parse_result.tree->IsAtom() && !(flags & JSRegExp::kIgnoreCase) &&
				163	!(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) {
				164	RegExpAtom* atom = parse_result.tree->AsAtom();
				165	Vector<const uc16> atom_pattern = atom->data();
				166	Handle<String> atom_string;
				167	ASSIGN_RETURN_ON_EXCEPTION(
				168	isolate, atom_string,
				169	isolate->factory()->NewStringFromTwoByte(atom_pattern),
				170	Object);
				171	if (!HasFewDifferentCharacters(atom_string)) {
				172	AtomCompile(re, pattern, flags, atom_string);
				173	has_been_compiled = true;
				174	}
				175	}
				176	if (!has_been_compiled) {
				177	IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
				178	}
				179	DCHECK(re->data()->IsFixedArray());
				180	// Compilation succeeded so the data is set on the regexp
				181	// and we can store it in the cache.
				182	Handle<FixedArray> data(FixedArray::cast(re->data()));
				183	compilation_cache->PutRegExp(pattern, flags, data);
				184
				185	return re;
				186	}
				187
				188
				189	MaybeHandle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
				190	Handle<String> subject,
				191	int index,
				192	Handle<JSArray> last_match_info) {
				193	switch (regexp->TypeTag()) {
				194	case JSRegExp::ATOM:
				195	return AtomExec(regexp, subject, index, last_match_info);
				196	case JSRegExp::IRREGEXP: {
				197	return IrregexpExec(regexp, subject, index, last_match_info);
				198	}
				199	default:
				200	UNREACHABLE();
				201	return MaybeHandle<Object>();
				202	}
				203	}
				204
				205
				206	// RegExp Atom implementation: Simple string search using indexOf.
				207
				208
				209	void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
				210	Handle<String> pattern,
				211	JSRegExp::Flags flags,
				212	Handle<String> match_pattern) {
				213	re->GetIsolate()->factory()->SetRegExpAtomData(re,
				214	JSRegExp::ATOM,
				215	pattern,
				216	flags,
				217	match_pattern);
				218	}
				219
				220
				221	static void SetAtomLastCapture(FixedArray* array,
				222	String* subject,
				223	int from,
				224	int to) {
				225	SealHandleScope shs(array->GetIsolate());
				226	RegExpImpl::SetLastCaptureCount(array, 2);
				227	RegExpImpl::SetLastSubject(array, subject);
				228	RegExpImpl::SetLastInput(array, subject);
				229	RegExpImpl::SetCapture(array, 0, from);
				230	RegExpImpl::SetCapture(array, 1, to);
				231	}
				232
				233
				234	int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
				235	Handle<String> subject,
				236	int index,
				237	int32_t* output,
				238	int output_size) {
				239	Isolate* isolate = regexp->GetIsolate();
				240
				241	DCHECK(0 <= index);
				242	DCHECK(index <= subject->length());
				243
				244	subject = String::Flatten(subject);
				245	DisallowHeapAllocation no_gc; // ensure vectors stay valid
				246
				247	String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
				248	int needle_len = needle->length();
				249	DCHECK(needle->IsFlat());
				250	DCHECK_LT(0, needle_len);
				251
				252	if (index + needle_len > subject->length()) {
				253	return RegExpImpl::RE_FAILURE;
				254	}
				255
				256	for (int i = 0; i < output_size; i += 2) {
				257	String::FlatContent needle_content = needle->GetFlatContent();
				258	String::FlatContent subject_content = subject->GetFlatContent();
				259	DCHECK(needle_content.IsFlat());
				260	DCHECK(subject_content.IsFlat());
				261	// dispatch on type of strings
				262	index =
				263	(needle_content.IsOneByte()
				264	? (subject_content.IsOneByte()
				265	? SearchString(isolate, subject_content.ToOneByteVector(),
				266	needle_content.ToOneByteVector(), index)
				267	: SearchString(isolate, subject_content.ToUC16Vector(),
				268	needle_content.ToOneByteVector(), index))
				269	: (subject_content.IsOneByte()
				270	? SearchString(isolate, subject_content.ToOneByteVector(),
				271	needle_content.ToUC16Vector(), index)
				272	: SearchString(isolate, subject_content.ToUC16Vector(),
				273	needle_content.ToUC16Vector(), index)));
				274	if (index == -1) {
				275	return i / 2; // Return number of matches.
				276	} else {
				277	output[i] = index;
				278	output[i+1] = index + needle_len;
				279	index += needle_len;
				280	}
				281	}
				282	return output_size / 2;
				283	}
				284
				285
				286	Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
				287	Handle<String> subject,
				288	int index,
				289	Handle<JSArray> last_match_info) {
				290	Isolate* isolate = re->GetIsolate();
				291
				292	static const int kNumRegisters = 2;
				293	STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
				294	int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
				295
				296	int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
				297
				298	if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
				299
				300	DCHECK_EQ(res, RegExpImpl::RE_SUCCESS);
				301	SealHandleScope shs(isolate);
				302	FixedArray* array = FixedArray::cast(last_match_info->elements());
				303	SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
				304	return last_match_info;
				305	}
				306
				307
				308	// Irregexp implementation.
				309
				310	// Ensures that the regexp object contains a compiled version of the
				311	// source for either one-byte or two-byte subject strings.
				312	// If the compiled version doesn't already exist, it is compiled
				313	// from the source pattern.
				314	// If compilation fails, an exception is thrown and this function
				315	// returns false.
				316	bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
				317	Handle<String> sample_subject,
				318	bool is_one_byte) {
				319	Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
				320	#ifdef V8_INTERPRETED_REGEXP
				321	if (compiled_code->IsByteArray()) return true;
				322	#else // V8_INTERPRETED_REGEXP (RegExp native code)
				323	if (compiled_code->IsCode()) return true;
				324	#endif
				325	// We could potentially have marked this as flushable, but have kept
				326	// a saved version if we did not flush it yet.
				327	Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
				328	if (saved_code->IsCode()) {
				329	// Reinstate the code in the original place.
				330	re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);
				331	DCHECK(compiled_code->IsSmi());
				332	return true;
				333	}
				334	return CompileIrregexp(re, sample_subject, is_one_byte);
				335	}
				336
				337
				338	bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
				339	Handle<String> sample_subject,
				340	bool is_one_byte) {
				341	// Compile the RegExp.
				342	Isolate* isolate = re->GetIsolate();
				343	Zone zone;
				344	PostponeInterruptsScope postpone(isolate);
				345	// If we had a compilation error the last time this is saved at the
				346	// saved code index.
				347	Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));
				348	// When arriving here entry can only be a smi, either representing an
				349	// uncompiled regexp, a previous compilation error, or code that has
				350	// been flushed.
				351	DCHECK(entry->IsSmi());
				352	int entry_value = Smi::cast(entry)->value();
				353	DCHECK(entry_value == JSRegExp::kUninitializedValue \|\|
				354	entry_value == JSRegExp::kCompilationErrorValue \|\|
				355	(entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
				356
				357	if (entry_value == JSRegExp::kCompilationErrorValue) {
				358	// A previous compilation failed and threw an error which we store in
				359	// the saved code index (we store the error message, not the actual
				360	// error). Recreate the error object and throw it.
				361	Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
				362	DCHECK(error_string->IsString());
				363	Handle<String> error_message(String::cast(error_string));
				364	ThrowRegExpException(re, error_message);
				365	return false;
				366	}
				367
				368	JSRegExp::Flags flags = re->GetFlags();
				369
				370	Handle<String> pattern(re->Pattern());
				371	pattern = String::Flatten(pattern);
				372	RegExpCompileData compile_data;
				373	FlatStringReader reader(isolate, pattern);
				374	if (!RegExpParser::ParseRegExp(isolate, &zone, &reader,
				375	flags & JSRegExp::kMultiline,
				376	flags & JSRegExp::kUnicode, &compile_data)) {
				377	// Throw an exception if we fail to parse the pattern.
				378	// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
				379	USE(ThrowRegExpException(re, pattern, compile_data.error));
				380	return false;
				381	}
				382	RegExpEngine::CompilationResult result = RegExpEngine::Compile(
				383	isolate, &zone, &compile_data, flags & JSRegExp::kIgnoreCase,
				384	flags & JSRegExp::kGlobal, flags & JSRegExp::kMultiline,
				385	flags & JSRegExp::kSticky, pattern, sample_subject, is_one_byte);
				386	if (result.error_message != NULL) {
				387	// Unable to compile regexp.
				388	Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
				389	CStrVector(result.error_message)).ToHandleChecked();
				390	ThrowRegExpException(re, error_message);
				391	return false;
				392	}
				393
				394	Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
				395	data->set(JSRegExp::code_index(is_one_byte), result.code);
				396	int register_max = IrregexpMaxRegisterCount(*data);
				397	if (result.num_registers > register_max) {
				398	SetIrregexpMaxRegisterCount(*data, result.num_registers);
				399	}
				400
				401	return true;
				402	}
				403
				404
				405	int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {
				406	return Smi::cast(
				407	re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
				408	}
				409
				410
				411	void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {
				412	re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
				413	}
				414
				415
				416	int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
				417	return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
				418	}
				419
				420
				421	int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
				422	return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
				423	}
				424
				425
				426	ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) {
				427	return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));
				428	}
				429
				430
				431	Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) {
				432	return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
				433	}
				434
				435
				436	void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
				437	Handle<String> pattern,
				438	JSRegExp::Flags flags,
				439	int capture_count) {
				440	// Initialize compiled code entries to null.
				441	re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
				442	JSRegExp::IRREGEXP,
				443	pattern,
				444	flags,
				445	capture_count);
				446	}
				447
				448
				449	int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
				450	Handle<String> subject) {
				451	subject = String::Flatten(subject);
				452
				453	// Check representation of the underlying storage.
				454	bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
				455	if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;
				456
				457	#ifdef V8_INTERPRETED_REGEXP
				458	// Byte-code regexp needs space allocated for all its registers.
				459	// The result captures are copied to the start of the registers array
				460	// if the match succeeds. This way those registers are not clobbered
				461	// when we set the last match info from last successful match.
				462	return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
				463	(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
				464	#else // V8_INTERPRETED_REGEXP
				465	// Native regexp only needs room to output captures. Registers are handled
				466	// internally.
				467	return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
				468	#endif // V8_INTERPRETED_REGEXP
				469	}
				470
				471
				472	int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
				473	Handle<String> subject,
				474	int index,
				475	int32_t* output,
				476	int output_size) {
				477	Isolate* isolate = regexp->GetIsolate();
				478
				479	Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
				480
				481	DCHECK(index >= 0);
				482	DCHECK(index <= subject->length());
				483	DCHECK(subject->IsFlat());
				484
				485	bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
				486
				487	#ifndef V8_INTERPRETED_REGEXP
				488	DCHECK(output_size >= (IrregexpNumberOfCaptures(irregexp) + 1) 2);
				489	do {
				490	EnsureCompiledIrregexp(regexp, subject, is_one_byte);
				491	Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
				492	// The stack is used to allocate registers for the compiled regexp code.
				493	// This means that in case of failure, the output registers array is left
				494	// untouched and contains the capture results from the previous successful
				495	// match. We can use that to set the last match info lazily.
				496	NativeRegExpMacroAssembler::Result res =
				497	NativeRegExpMacroAssembler::Match(code,
				498	subject,
				499	output,
				500	output_size,
				501	index,
				502	isolate);
				503	if (res != NativeRegExpMacroAssembler::RETRY) {
				504	DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION \|\|
				505	isolate->has_pending_exception());
				506	STATIC_ASSERT(
				507	static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
				508	STATIC_ASSERT(
				509	static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
				510	STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
				511	== RE_EXCEPTION);
				512	return static_cast<IrregexpResult>(res);
				513	}
				514	// If result is RETRY, the string has changed representation, and we
				515	// must restart from scratch.
				516	// In this case, it means we must make sure we are prepared to handle
				517	// the, potentially, different subject (the string can switch between
				518	// being internal and external, and even between being Latin1 and UC16,
				519	// but the characters are always the same).
				520	IrregexpPrepare(regexp, subject);
				521	is_one_byte = subject->IsOneByteRepresentationUnderneath();
				522	} while (true);
				523	UNREACHABLE();
				524	return RE_EXCEPTION;
				525	#else // V8_INTERPRETED_REGEXP
				526
				527	DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
				528	// We must have done EnsureCompiledIrregexp, so we can get the number of
				529	// registers.
				530	int number_of_capture_registers =
				531	(IrregexpNumberOfCaptures(irregexp) + 1) 2;
				532	int32_t* raw_output = &output[number_of_capture_registers];
				533	// We do not touch the actual capture result registers until we know there
				534	// has been a match so that we can use those capture results to set the
				535	// last match info.
				536	for (int i = number_of_capture_registers - 1; i >= 0; i--) {
				537	raw_output[i] = -1;
				538	}
				539	Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
				540	isolate);
				541
				542	IrregexpResult result = IrregexpInterpreter::Match(isolate,
				543	byte_codes,
				544	subject,
				545	raw_output,
				546	index);
				547	if (result == RE_SUCCESS) {
				548	// Copy capture results to the start of the registers array.
				549	MemCopy(output, raw_output, number_of_capture_registers * sizeof(int32_t));
				550	}
				551	if (result == RE_EXCEPTION) {
				552	DCHECK(!isolate->has_pending_exception());
				553	isolate->StackOverflow();
				554	}
				555	return result;
				556	#endif // V8_INTERPRETED_REGEXP
				557	}
				558
				559
				560	MaybeHandle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
				561	Handle<String> subject,
				562	int previous_index,
				563	Handle<JSArray> last_match_info) {
				564	Isolate* isolate = regexp->GetIsolate();
				565	DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
				566
				567	// Prepare space for the return values.
				568	#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
				569	if (FLAG_trace_regexp_bytecodes) {
				570	String* pattern = regexp->Pattern();
				571	PrintF("\n\nRegexp match: /%s/\n\n", pattern->ToCString().get());
				572	PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
				573	}
				574	#endif
				575	int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
				576	if (required_registers < 0) {
				577	// Compiling failed with an exception.
				578	DCHECK(isolate->has_pending_exception());
				579	return MaybeHandle<Object>();
				580	}
				581
				582	int32_t* output_registers = NULL;
				583	if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
				584	output_registers = NewArray<int32_t>(required_registers);
				585	}
				586	base::SmartArrayPointer<int32_t> auto_release(output_registers);
				587	if (output_registers == NULL) {
				588	output_registers = isolate->jsregexp_static_offsets_vector();
				589	}
				590
				591	int res = RegExpImpl::IrregexpExecRaw(
				592	regexp, subject, previous_index, output_registers, required_registers);
				593	if (res == RE_SUCCESS) {
				594	int capture_count =
				595	IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
				596	return SetLastMatchInfo(
				597	last_match_info, subject, capture_count, output_registers);
				598	}
				599	if (res == RE_EXCEPTION) {
				600	DCHECK(isolate->has_pending_exception());
				601	return MaybeHandle<Object>();
				602	}
				603	DCHECK(res == RE_FAILURE);
				604	return isolate->factory()->null_value();
				605	}
				606
				607
				608	static void EnsureSize(Handle<JSArray> array, uint32_t minimum_size) {
				609	if (static_cast<uint32_t>(array->elements()->length()) < minimum_size) {
				610	JSArray::SetLength(array, minimum_size);
				611	}
				612	}
				613
				614
				615	Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
				616	Handle<String> subject,
				617	int capture_count,
				618	int32_t* match) {
				619	DCHECK(last_match_info->HasFastObjectElements());
				620	int capture_register_count = (capture_count + 1) * 2;
				621	EnsureSize(last_match_info, capture_register_count + kLastMatchOverhead);
				622	DisallowHeapAllocation no_allocation;
				623	FixedArray* array = FixedArray::cast(last_match_info->elements());
				624	if (match != NULL) {
				625	for (int i = 0; i < capture_register_count; i += 2) {
				626	SetCapture(array, i, match[i]);
				627	SetCapture(array, i + 1, match[i + 1]);
				628	}
				629	}
				630	SetLastCaptureCount(array, capture_register_count);
				631	SetLastSubject(array, *subject);
				632	SetLastInput(array, *subject);
				633	return last_match_info;
				634	}
				635
				636
				637	RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
				638	Handle<String> subject,
				639	bool is_global,
				640	Isolate* isolate)
				641	: register_array_(NULL),
				642	register_array_size_(0),
				643	regexp_(regexp),
				644	subject_(subject) {
				645	#ifdef V8_INTERPRETED_REGEXP
				646	bool interpreted = true;
				647	#else
				648	bool interpreted = false;
				649	#endif // V8_INTERPRETED_REGEXP
				650
				651	if (regexp_->TypeTag() == JSRegExp::ATOM) {
				652	static const int kAtomRegistersPerMatch = 2;
				653	registers_per_match_ = kAtomRegistersPerMatch;
				654	// There is no distinction between interpreted and native for atom regexps.
				655	interpreted = false;
				656	} else {
				657	registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
				658	if (registers_per_match_ < 0) {
				659	num_matches_ = -1; // Signal exception.
				660	return;
				661	}
				662	}
				663
				664	if (is_global && !interpreted) {
				665	register_array_size_ =
				666	Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
				667	max_matches_ = register_array_size_ / registers_per_match_;
				668	} else {
				669	// Global loop in interpreted regexp is not implemented. We choose
				670	// the size of the offsets vector so that it can only store one match.
				671	register_array_size_ = registers_per_match_;
				672	max_matches_ = 1;
				673	}
				674
				675	if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
				676	register_array_ = NewArray<int32_t>(register_array_size_);
				677	} else {
				678	register_array_ = isolate->jsregexp_static_offsets_vector();
				679	}
				680
				681	// Set state so that fetching the results the first time triggers a call
				682	// to the compiled regexp.
				683	current_match_index_ = max_matches_ - 1;
				684	num_matches_ = max_matches_;
				685	DCHECK(registers_per_match_ >= 2); // Each match has at least one capture.
				686	DCHECK_GE(register_array_size_, registers_per_match_);
				687	int32_t* last_match =
				688	&register_array_[current_match_index_ * registers_per_match_];
				689	last_match[0] = -1;
				690	last_match[1] = 0;
				691	}
				692
				693
				694	// -------------------------------------------------------------------
				695	// Implementation of the Irregexp regular expression engine.
				696	//
				697	// The Irregexp regular expression engine is intended to be a complete
				698	// implementation of ECMAScript regular expressions. It generates either
				699	// bytecodes or native code.
				700
				701	// The Irregexp regexp engine is structured in three steps.
				702	// 1) The parser generates an abstract syntax tree. See ast.cc.
				703	// 2) From the AST a node network is created. The nodes are all
				704	// subclasses of RegExpNode. The nodes represent states when
				705	// executing a regular expression. Several optimizations are
				706	// performed on the node network.
				707	// 3) From the nodes we generate either byte codes or native code
				708	// that can actually execute the regular expression (perform
				709	// the search). The code generation step is described in more
				710	// detail below.
				711
				712	// Code generation.
				713	//
				714	// The nodes are divided into four main categories.
				715	// * Choice nodes
				716	// These represent places where the regular expression can
				717	// match in more than one way. For example on entry to an
				718	// alternation (foo\|bar) or a repetition (*, +, ? or {}).
				719	// * Action nodes
				720	// These represent places where some action should be
				721	// performed. Examples include recording the current position
				722	// in the input string to a register (in order to implement
				723	// captures) or other actions on register for example in order
				724	// to implement the counters needed for {} repetitions.
				725	// * Matching nodes
				726	// These attempt to match some element part of the input string.
				727	// Examples of elements include character classes, plain strings
				728	// or back references.
				729	// * End nodes
				730	// These are used to implement the actions required on finding
				731	// a successful match or failing to find a match.
				732	//
				733	// The code generated (whether as byte codes or native code) maintains
				734	// some state as it runs. This consists of the following elements:
				735	//
				736	// * The capture registers. Used for string captures.
				737	// * Other registers. Used for counters etc.
				738	// * The current position.
				739	// * The stack of backtracking information. Used when a matching node
				740	// fails to find a match and needs to try an alternative.
				741	//
				742	// Conceptual regular expression execution model:
				743	//
				744	// There is a simple conceptual model of regular expression execution
				745	// which will be presented first. The actual code generated is a more
				746	// efficient simulation of the simple conceptual model:
				747	//
				748	// * Choice nodes are implemented as follows:
				749	// For each choice except the last {
				750	// push current position
				751	// push backtrack code location
				752	// <generate code to test for choice>
				753	// backtrack code location:
				754	// pop current position
				755	// }
				756	// <generate code to test for last choice>
				757	//
				758	// * Actions nodes are generated as follows
				759	// <push affected registers on backtrack stack>
				760	// <generate code to perform action>
				761	// push backtrack code location
				762	// <generate code to test for following nodes>
				763	// backtrack code location:
				764	// <pop affected registers to restore their state>
				765	// <pop backtrack location from stack and go to it>
				766	//
				767	// * Matching nodes are generated as follows:
				768	// if input string matches at current position
				769	// update current position
				770	// <generate code to test for following nodes>
				771	// else
				772	// <pop backtrack location from stack and go to it>
				773	//
				774	// Thus it can be seen that the current position is saved and restored
				775	// by the choice nodes, whereas the registers are saved and restored by
				776	// by the action nodes that manipulate them.
				777	//
				778	// The other interesting aspect of this model is that nodes are generated
				779	// at the point where they are needed by a recursive call to Emit(). If
				780	// the node has already been code generated then the Emit() call will
				781	// generate a jump to the previously generated code instead. In order to
				782	// limit recursion it is possible for the Emit() function to put the node
				783	// on a work list for later generation and instead generate a jump. The
				784	// destination of the jump is resolved later when the code is generated.
				785	//
				786	// Actual regular expression code generation.
				787	//
				788	// Code generation is actually more complicated than the above. In order
				789	// to improve the efficiency of the generated code some optimizations are
				790	// performed
				791	//
				792	// * Choice nodes have 1-character lookahead.
				793	// A choice node looks at the following character and eliminates some of
				794	// the choices immediately based on that character. This is not yet
				795	// implemented.
				796	// * Simple greedy loops store reduced backtracking information.
				797	// A quantifier like /.*foo/m will greedily match the whole input. It will
				798	// then need to backtrack to a point where it can match "foo". The naive
				799	// implementation of this would push each character position onto the
				800	// backtracking stack, then pop them off one by one. This would use space
				801	// proportional to the length of the input string. However since the "."
				802	// can only match in one way and always has a constant length (in this case
				803	// of 1) it suffices to store the current position on the top of the stack
				804	// once. Matching now becomes merely incrementing the current position and
				805	// backtracking becomes decrementing the current position and checking the
				806	// result against the stored current position. This is faster and saves
				807	// space.
				808	// * The current state is virtualized.
				809	// This is used to defer expensive operations until it is clear that they
				810	// are needed and to generate code for a node more than once, allowing
				811	// specialized an efficient versions of the code to be created. This is
				812	// explained in the section below.
				813	//
				814	// Execution state virtualization.
				815	//
				816	// Instead of emitting code, nodes that manipulate the state can record their
				817	// manipulation in an object called the Trace. The Trace object can record a
				818	// current position offset, an optional backtrack code location on the top of
				819	// the virtualized backtrack stack and some register changes. When a node is
				820	// to be emitted it can flush the Trace or update it. Flushing the Trace
				821	// will emit code to bring the actual state into line with the virtual state.
				822	// Avoiding flushing the state can postpone some work (e.g. updates of capture
				823	// registers). Postponing work can save time when executing the regular
				824	// expression since it may be found that the work never has to be done as a
				825	// failure to match can occur. In addition it is much faster to jump to a
				826	// known backtrack code location than it is to pop an unknown backtrack
				827	// location from the stack and jump there.
				828	//
				829	// The virtual state found in the Trace affects code generation. For example
				830	// the virtual state contains the difference between the actual current
				831	// position and the virtual current position, and matching code needs to use
				832	// this offset to attempt a match in the correct location of the input
				833	// string. Therefore code generated for a non-trivial trace is specialized
				834	// to that trace. The code generator therefore has the ability to generate
				835	// code for each node several times. In order to limit the size of the
				836	// generated code there is an arbitrary limit on how many specialized sets of
				837	// code may be generated for a given node. If the limit is reached, the
				838	// trace is flushed and a generic version of the code for a node is emitted.
				839	// This is subsequently used for that node. The code emitted for non-generic
				840	// trace is not recorded in the node and so it cannot currently be reused in
				841	// the event that code generation is requested for an identical trace.
				842
				843
				844	void RegExpTree::AppendToText(RegExpText* text, Zone* zone) {
				845	UNREACHABLE();
				846	}
				847
				848
				849	void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
				850	text->AddElement(TextElement::Atom(this), zone);
				851	}
				852
				853
				854	void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
				855	text->AddElement(TextElement::CharClass(this), zone);
				856	}
				857
				858
				859	void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
				860	for (int i = 0; i < elements()->length(); i++)
				861	text->AddElement(elements()->at(i), zone);
				862	}
				863
				864
				865	TextElement TextElement::Atom(RegExpAtom* atom) {
				866	return TextElement(ATOM, atom);
				867	}
				868
				869
				870	TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
				871	return TextElement(CHAR_CLASS, char_class);
				872	}
				873
				874
				875	int TextElement::length() const {
				876	switch (text_type()) {
				877	case ATOM:
				878	return atom()->length();
				879
				880	case CHAR_CLASS:
				881	return 1;
				882	}
				883	UNREACHABLE();
				884	return 0;
				885	}
				886
				887
				888	DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
				889	if (table_ == NULL) {
				890	table_ = new(zone()) DispatchTable(zone());
				891	DispatchTableConstructor cons(table_, ignore_case, zone());
				892	cons.BuildTable(this);
				893	}
				894	return table_;
				895	}
				896
				897
				898	class FrequencyCollator {
				899	public:
				900	FrequencyCollator() : total_samples_(0) {
				901	for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
				902	frequencies_[i] = CharacterFrequency(i);
				903	}
				904	}
				905
				906	void CountCharacter(int character) {
				907	int index = (character & RegExpMacroAssembler::kTableMask);
				908	frequencies_[index].Increment();
				909	total_samples_++;
				910	}
				911
				912	// Does not measure in percent, but rather per-128 (the table size from the
				913	// regexp macro assembler).
				914	int Frequency(int in_character) {
				915	DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
				916	if (total_samples_ < 1) return 1; // Division by zero.
				917	int freq_in_per128 =
				918	(frequencies_[in_character].counter() * 128) / total_samples_;
				919	return freq_in_per128;
				920	}
				921
				922	private:
				923	class CharacterFrequency {
				924	public:
				925	CharacterFrequency() : counter_(0), character_(-1) { }
				926	explicit CharacterFrequency(int character)
				927	: counter_(0), character_(character) { }
				928
				929	void Increment() { counter_++; }
				930	int counter() { return counter_; }
				931	int character() { return character_; }
				932
				933	private:
				934	int counter_;
				935	int character_;
				936	};
				937
				938
				939	private:
				940	CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
				941	int total_samples_;
				942	};
				943
				944
				945	class RegExpCompiler {
				946	public:
				947	RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
				948	bool ignore_case, bool is_one_byte);
				949
				950	int AllocateRegister() {
				951	if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
				952	reg_exp_too_big_ = true;
				953	return next_register_;
				954	}
				955	return next_register_++;
				956	}
				957
				958	RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
				959	RegExpNode* start,
				960	int capture_count,
				961	Handle<String> pattern);
				962
				963	inline void AddWork(RegExpNode* node) {
				964	if (!node->on_work_list() && !node->label()->is_bound()) {
				965	node->set_on_work_list(true);
				966	work_list_->Add(node);
				967	}
				968	}
				969
				970	static const int kImplementationOffset = 0;
				971	static const int kNumberOfRegistersOffset = 0;
				972	static const int kCodeOffset = 1;
				973
				974	RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
				975	EndNode* accept() { return accept_; }
				976
				977	static const int kMaxRecursion = 100;
				978	inline int recursion_depth() { return recursion_depth_; }
				979	inline void IncrementRecursionDepth() { recursion_depth_++; }
				980	inline void DecrementRecursionDepth() { recursion_depth_--; }
				981
				982	void SetRegExpTooBig() { reg_exp_too_big_ = true; }
				983
				984	inline bool ignore_case() { return ignore_case_; }
				985	inline bool one_byte() { return one_byte_; }
				986	inline bool optimize() { return optimize_; }
				987	inline void set_optimize(bool value) { optimize_ = value; }
				988	inline bool limiting_recursion() { return limiting_recursion_; }
				989	inline void set_limiting_recursion(bool value) {
				990	limiting_recursion_ = value;
				991	}
				992	bool read_backward() { return read_backward_; }
				993	void set_read_backward(bool value) { read_backward_ = value; }
				994	FrequencyCollator* frequency_collator() { return &frequency_collator_; }
				995
				996	int current_expansion_factor() { return current_expansion_factor_; }
				997	void set_current_expansion_factor(int value) {
				998	current_expansion_factor_ = value;
				999	}
				1000
				1001	Isolate* isolate() const { return isolate_; }
				1002	Zone* zone() const { return zone_; }
				1003
				1004	static const int kNoRegister = -1;
				1005
				1006	private:
				1007	EndNode* accept_;
				1008	int next_register_;
				1009	List<RegExpNode> work_list_;
				1010	int recursion_depth_;
				1011	RegExpMacroAssembler* macro_assembler_;
				1012	bool ignore_case_;
				1013	bool one_byte_;
				1014	bool reg_exp_too_big_;
				1015	bool limiting_recursion_;
				1016	bool optimize_;
				1017	bool read_backward_;
				1018	int current_expansion_factor_;
				1019	FrequencyCollator frequency_collator_;
				1020	Isolate* isolate_;
				1021	Zone* zone_;
				1022	};
				1023
				1024
				1025	class RecursionCheck {
				1026	public:
				1027	explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
				1028	compiler->IncrementRecursionDepth();
				1029	}
				1030	~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
				1031	private:
				1032	RegExpCompiler* compiler_;
				1033	};
				1034
				1035
				1036	static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
				1037	return RegExpEngine::CompilationResult(isolate, "RegExp too big");
				1038	}
				1039
				1040
				1041	// Attempts to compile the regexp using an Irregexp code generator. Returns
				1042	// a fixed array or a null handle depending on whether it succeeded.
				1043	RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
				1044	bool ignore_case, bool one_byte)
				1045	: next_register_(2 * (capture_count + 1)),
				1046	work_list_(NULL),
				1047	recursion_depth_(0),
				1048	ignore_case_(ignore_case),
				1049	one_byte_(one_byte),
				1050	reg_exp_too_big_(false),
				1051	limiting_recursion_(false),
				1052	optimize_(FLAG_regexp_optimization),
				1053	read_backward_(false),
				1054	current_expansion_factor_(1),
				1055	frequency_collator_(),
				1056	isolate_(isolate),
				1057	zone_(zone) {
				1058	accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
				1059	DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
				1060	}
				1061
				1062
				1063	RegExpEngine::CompilationResult RegExpCompiler::Assemble(
				1064	RegExpMacroAssembler* macro_assembler,
				1065	RegExpNode* start,
				1066	int capture_count,
				1067	Handle<String> pattern) {
				1068	Heap* heap = pattern->GetHeap();
				1069
				1070	#ifdef DEBUG
				1071	if (FLAG_trace_regexp_assembler)
				1072	macro_assembler_ =
				1073	new RegExpMacroAssemblerTracer(isolate(), macro_assembler);
				1074	else
				1075	#endif
				1076	macro_assembler_ = macro_assembler;
				1077
				1078	List <RegExpNode*> work_list(0);
				1079	work_list_ = &work_list;
				1080	Label fail;
				1081	macro_assembler_->PushBacktrack(&fail);
				1082	Trace new_trace;
				1083	start->Emit(this, &new_trace);
				1084	macro_assembler_->Bind(&fail);
				1085	macro_assembler_->Fail();
				1086	while (!work_list.is_empty()) {
				1087	RegExpNode* node = work_list.RemoveLast();
				1088	node->set_on_work_list(false);
				1089	if (!node->label()->is_bound()) node->Emit(this, &new_trace);
				1090	}
				1091	if (reg_exp_too_big_) {
				1092	macro_assembler_->AbortedCodeGeneration();
				1093	return IrregexpRegExpTooBig(isolate_);
				1094	}
				1095
				1096	Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
				1097	heap->IncreaseTotalRegexpCodeGenerated(code->Size());
				1098	work_list_ = NULL;
				1099	#ifdef ENABLE_DISASSEMBLER
				1100	if (FLAG_print_code) {
				1101	CodeTracer::Scope trace_scope(heap->isolate()->GetCodeTracer());
				1102	OFStream os(trace_scope.file());
				1103	Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
				1104	}
				1105	#endif
				1106	#ifdef DEBUG
				1107	if (FLAG_trace_regexp_assembler) {
				1108	delete macro_assembler_;
				1109	}
				1110	#endif
				1111	return RegExpEngine::CompilationResult(*code, next_register_);
				1112	}
				1113
				1114
				1115	bool Trace::DeferredAction::Mentions(int that) {
				1116	if (action_type() == ActionNode::CLEAR_CAPTURES) {
				1117	Interval range = static_cast<DeferredClearCaptures*>(this)->range();
				1118	return range.Contains(that);
				1119	} else {
				1120	return reg() == that;
				1121	}
				1122	}
				1123
				1124
				1125	bool Trace::mentions_reg(int reg) {
				1126	for (DeferredAction* action = actions_;
				1127	action != NULL;
				1128	action = action->next()) {
				1129	if (action->Mentions(reg))
				1130	return true;
				1131	}
				1132	return false;
				1133	}
				1134
				1135
				1136	bool Trace::GetStoredPosition(int reg, int* cp_offset) {
				1137	DCHECK_EQ(0, *cp_offset);
				1138	for (DeferredAction* action = actions_;
				1139	action != NULL;
				1140	action = action->next()) {
				1141	if (action->Mentions(reg)) {
				1142	if (action->action_type() == ActionNode::STORE_POSITION) {
				1143	cp_offset = static_cast<DeferredCapture>(action)->cp_offset();
				1144	return true;
				1145	} else {
				1146	return false;
				1147	}
				1148	}
				1149	}
				1150	return false;
				1151	}
				1152
				1153
				1154	int Trace::FindAffectedRegisters(OutSet* affected_registers,
				1155	Zone* zone) {
				1156	int max_register = RegExpCompiler::kNoRegister;
				1157	for (DeferredAction* action = actions_;
				1158	action != NULL;
				1159	action = action->next()) {
				1160	if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
				1161	Interval range = static_cast<DeferredClearCaptures*>(action)->range();
				1162	for (int i = range.from(); i <= range.to(); i++)
				1163	affected_registers->Set(i, zone);
				1164	if (range.to() > max_register) max_register = range.to();
				1165	} else {
				1166	affected_registers->Set(action->reg(), zone);
				1167	if (action->reg() > max_register) max_register = action->reg();
				1168	}
				1169	}
				1170	return max_register;
				1171	}
				1172
				1173
				1174	void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
				1175	int max_register,
				1176	const OutSet& registers_to_pop,
				1177	const OutSet& registers_to_clear) {
				1178	for (int reg = max_register; reg >= 0; reg--) {
				1179	if (registers_to_pop.Get(reg)) {
				1180	assembler->PopRegister(reg);
				1181	} else if (registers_to_clear.Get(reg)) {
				1182	int clear_to = reg;
				1183	while (reg > 0 && registers_to_clear.Get(reg - 1)) {
				1184	reg--;
				1185	}
				1186	assembler->ClearRegisters(reg, clear_to);
				1187	}
				1188	}
				1189	}
				1190
				1191
				1192	void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
				1193	int max_register,
				1194	const OutSet& affected_registers,
				1195	OutSet* registers_to_pop,
				1196	OutSet* registers_to_clear,
				1197	Zone* zone) {
				1198	// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
				1199	const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
				1200
				1201	// Count pushes performed to force a stack limit check occasionally.
				1202	int pushes = 0;
				1203
				1204	for (int reg = 0; reg <= max_register; reg++) {
				1205	if (!affected_registers.Get(reg)) {
				1206	continue;
				1207	}
				1208
				1209	// The chronologically first deferred action in the trace
				1210	// is used to infer the action needed to restore a register
				1211	// to its previous state (or not, if it's safe to ignore it).
				1212	enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
				1213	DeferredActionUndoType undo_action = IGNORE;
				1214
				1215	int value = 0;
				1216	bool absolute = false;
				1217	bool clear = false;
				1218	static const int kNoStore = kMinInt;
				1219	int store_position = kNoStore;
				1220	// This is a little tricky because we are scanning the actions in reverse
				1221	// historical order (newest first).
				1222	for (DeferredAction* action = actions_;
				1223	action != NULL;
				1224	action = action->next()) {
				1225	if (action->Mentions(reg)) {
				1226	switch (action->action_type()) {
				1227	case ActionNode::SET_REGISTER: {
				1228	Trace::DeferredSetRegister* psr =
				1229	static_cast<Trace::DeferredSetRegister*>(action);
				1230	if (!absolute) {
				1231	value += psr->value();
				1232	absolute = true;
				1233	}
				1234	// SET_REGISTER is currently only used for newly introduced loop
				1235	// counters. They can have a significant previous value if they
				1236	// occour in a loop. TODO(lrn): Propagate this information, so
				1237	// we can set undo_action to IGNORE if we know there is no value to
				1238	// restore.
				1239	undo_action = RESTORE;
				1240	DCHECK_EQ(store_position, kNoStore);
				1241	DCHECK(!clear);
				1242	break;
				1243	}
				1244	case ActionNode::INCREMENT_REGISTER:
				1245	if (!absolute) {
				1246	value++;
				1247	}
				1248	DCHECK_EQ(store_position, kNoStore);
				1249	DCHECK(!clear);
				1250	undo_action = RESTORE;
				1251	break;
				1252	case ActionNode::STORE_POSITION: {
				1253	Trace::DeferredCapture* pc =
				1254	static_cast<Trace::DeferredCapture*>(action);
				1255	if (!clear && store_position == kNoStore) {
				1256	store_position = pc->cp_offset();
				1257	}
				1258
				1259	// For captures we know that stores and clears alternate.
				1260	// Other register, are never cleared, and if the occur
				1261	// inside a loop, they might be assigned more than once.
				1262	if (reg <= 1) {
				1263	// Registers zero and one, aka "capture zero", is
				1264	// always set correctly if we succeed. There is no
				1265	// need to undo a setting on backtrack, because we
				1266	// will set it again or fail.
				1267	undo_action = IGNORE;
				1268	} else {
				1269	undo_action = pc->is_capture() ? CLEAR : RESTORE;
				1270	}
				1271	DCHECK(!absolute);
				1272	DCHECK_EQ(value, 0);
				1273	break;
				1274	}
				1275	case ActionNode::CLEAR_CAPTURES: {
				1276	// Since we're scanning in reverse order, if we've already
				1277	// set the position we have to ignore historically earlier
				1278	// clearing operations.
				1279	if (store_position == kNoStore) {
				1280	clear = true;
				1281	}
				1282	undo_action = RESTORE;
				1283	DCHECK(!absolute);
				1284	DCHECK_EQ(value, 0);
				1285	break;
				1286	}
				1287	default:
				1288	UNREACHABLE();
				1289	break;
				1290	}
				1291	}
				1292	}
				1293	// Prepare for the undo-action (e.g., push if it's going to be popped).
				1294	if (undo_action == RESTORE) {
				1295	pushes++;
				1296	RegExpMacroAssembler::StackCheckFlag stack_check =
				1297	RegExpMacroAssembler::kNoStackLimitCheck;
				1298	if (pushes == push_limit) {
				1299	stack_check = RegExpMacroAssembler::kCheckStackLimit;
				1300	pushes = 0;
				1301	}
				1302
				1303	assembler->PushRegister(reg, stack_check);
				1304	registers_to_pop->Set(reg, zone);
				1305	} else if (undo_action == CLEAR) {
				1306	registers_to_clear->Set(reg, zone);
				1307	}
				1308	// Perform the chronologically last action (or accumulated increment)
				1309	// for the register.
				1310	if (store_position != kNoStore) {
				1311	assembler->WriteCurrentPositionToRegister(reg, store_position);
				1312	} else if (clear) {
				1313	assembler->ClearRegisters(reg, reg);
				1314	} else if (absolute) {
				1315	assembler->SetRegister(reg, value);
				1316	} else if (value != 0) {
				1317	assembler->AdvanceRegister(reg, value);
				1318	}
				1319	}
				1320	}
				1321
				1322
				1323	// This is called as we come into a loop choice node and some other tricky
				1324	// nodes. It normalizes the state of the code generator to ensure we can
				1325	// generate generic code.
				1326	void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
				1327	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1328
				1329	DCHECK(!is_trivial());
				1330
				1331	if (actions_ == NULL && backtrack() == NULL) {
				1332	// Here we just have some deferred cp advances to fix and we are back to
				1333	// a normal situation. We may also have to forget some information gained
				1334	// through a quick check that was already performed.
				1335	if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
				1336	// Create a new trivial state and generate the node with that.
				1337	Trace new_state;
				1338	successor->Emit(compiler, &new_state);
				1339	return;
				1340	}
				1341
				1342	// Generate deferred actions here along with code to undo them again.
				1343	OutSet affected_registers;
				1344
				1345	if (backtrack() != NULL) {
				1346	// Here we have a concrete backtrack location. These are set up by choice
				1347	// nodes and so they indicate that we have a deferred save of the current
				1348	// position which we may need to emit here.
				1349	assembler->PushCurrentPosition();
				1350	}
				1351
				1352	int max_register = FindAffectedRegisters(&affected_registers,
				1353	compiler->zone());
				1354	OutSet registers_to_pop;
				1355	OutSet registers_to_clear;
				1356	PerformDeferredActions(assembler,
				1357	max_register,
				1358	affected_registers,
				1359	&registers_to_pop,
				1360	&registers_to_clear,
				1361	compiler->zone());
				1362	if (cp_offset_ != 0) {
				1363	assembler->AdvanceCurrentPosition(cp_offset_);
				1364	}
				1365
				1366	// Create a new trivial state and generate the node with that.
				1367	Label undo;
				1368	assembler->PushBacktrack(&undo);
				1369	if (successor->KeepRecursing(compiler)) {
				1370	Trace new_state;
				1371	successor->Emit(compiler, &new_state);
				1372	} else {
				1373	compiler->AddWork(successor);
				1374	assembler->GoTo(successor->label());
				1375	}
				1376
				1377	// On backtrack we need to restore state.
				1378	assembler->Bind(&undo);
				1379	RestoreAffectedRegisters(assembler,
				1380	max_register,
				1381	registers_to_pop,
				1382	registers_to_clear);
				1383	if (backtrack() == NULL) {
				1384	assembler->Backtrack();
				1385	} else {
				1386	assembler->PopCurrentPosition();
				1387	assembler->GoTo(backtrack());
				1388	}
				1389	}
				1390
				1391
				1392	void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
				1393	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1394
				1395	// Omit flushing the trace. We discard the entire stack frame anyway.
				1396
				1397	if (!label()->is_bound()) {
				1398	// We are completely independent of the trace, since we ignore it,
				1399	// so this code can be used as the generic version.
				1400	assembler->Bind(label());
				1401	}
				1402
				1403	// Throw away everything on the backtrack stack since the start
				1404	// of the negative submatch and restore the character position.
				1405	assembler->ReadCurrentPositionFromRegister(current_position_register_);
				1406	assembler->ReadStackPointerFromRegister(stack_pointer_register_);
				1407	if (clear_capture_count_ > 0) {
				1408	// Clear any captures that might have been performed during the success
				1409	// of the body of the negative look-ahead.
				1410	int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
				1411	assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
				1412	}
				1413	// Now that we have unwound the stack we find at the top of the stack the
				1414	// backtrack that the BeginSubmatch node got.
				1415	assembler->Backtrack();
				1416	}
				1417
				1418
				1419	void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				1420	if (!trace->is_trivial()) {
				1421	trace->Flush(compiler, this);
				1422	return;
				1423	}
				1424	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1425	if (!label()->is_bound()) {
				1426	assembler->Bind(label());
				1427	}
				1428	switch (action_) {
				1429	case ACCEPT:
				1430	assembler->Succeed();
				1431	return;
				1432	case BACKTRACK:
				1433	assembler->GoTo(trace->backtrack());
				1434	return;
				1435	case NEGATIVE_SUBMATCH_SUCCESS:
				1436	// This case is handled in a different virtual method.
				1437	UNREACHABLE();
				1438	}
				1439	UNIMPLEMENTED();
				1440	}
				1441
				1442
				1443	void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
				1444	if (guards_ == NULL)
				1445	guards_ = new(zone) ZoneList<Guard*>(1, zone);
				1446	guards_->Add(guard, zone);
				1447	}
				1448
				1449
				1450	ActionNode* ActionNode::SetRegister(int reg,
				1451	int val,
				1452	RegExpNode* on_success) {
				1453	ActionNode* result =
				1454	new(on_success->zone()) ActionNode(SET_REGISTER, on_success);
				1455	result->data_.u_store_register.reg = reg;
				1456	result->data_.u_store_register.value = val;
				1457	return result;
				1458	}
				1459
				1460
				1461	ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
				1462	ActionNode* result =
				1463	new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
				1464	result->data_.u_increment_register.reg = reg;
				1465	return result;
				1466	}
				1467
				1468
				1469	ActionNode* ActionNode::StorePosition(int reg,
				1470	bool is_capture,
				1471	RegExpNode* on_success) {
				1472	ActionNode* result =
				1473	new(on_success->zone()) ActionNode(STORE_POSITION, on_success);
				1474	result->data_.u_position_register.reg = reg;
				1475	result->data_.u_position_register.is_capture = is_capture;
				1476	return result;
				1477	}
				1478
				1479
				1480	ActionNode* ActionNode::ClearCaptures(Interval range,
				1481	RegExpNode* on_success) {
				1482	ActionNode* result =
				1483	new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
				1484	result->data_.u_clear_captures.range_from = range.from();
				1485	result->data_.u_clear_captures.range_to = range.to();
				1486	return result;
				1487	}
				1488
				1489
				1490	ActionNode* ActionNode::BeginSubmatch(int stack_reg,
				1491	int position_reg,
				1492	RegExpNode* on_success) {
				1493	ActionNode* result =
				1494	new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
				1495	result->data_.u_submatch.stack_pointer_register = stack_reg;
				1496	result->data_.u_submatch.current_position_register = position_reg;
				1497	return result;
				1498	}
				1499
				1500
				1501	ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
				1502	int position_reg,
				1503	int clear_register_count,
				1504	int clear_register_from,
				1505	RegExpNode* on_success) {
				1506	ActionNode* result =
				1507	new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
				1508	result->data_.u_submatch.stack_pointer_register = stack_reg;
				1509	result->data_.u_submatch.current_position_register = position_reg;
				1510	result->data_.u_submatch.clear_register_count = clear_register_count;
				1511	result->data_.u_submatch.clear_register_from = clear_register_from;
				1512	return result;
				1513	}
				1514
				1515
				1516	ActionNode* ActionNode::EmptyMatchCheck(int start_register,
				1517	int repetition_register,
				1518	int repetition_limit,
				1519	RegExpNode* on_success) {
				1520	ActionNode* result =
				1521	new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
				1522	result->data_.u_empty_match_check.start_register = start_register;
				1523	result->data_.u_empty_match_check.repetition_register = repetition_register;
				1524	result->data_.u_empty_match_check.repetition_limit = repetition_limit;
				1525	return result;
				1526	}
				1527
				1528
				1529	#define DEFINE_ACCEPT(Type) \
				1530	void Type##Node::Accept(NodeVisitor* visitor) { \
				1531	visitor->Visit##Type(this); \
				1532	}
				1533	FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
				1534	#undef DEFINE_ACCEPT
				1535
				1536
				1537	void LoopChoiceNode::Accept(NodeVisitor* visitor) {
				1538	visitor->VisitLoopChoice(this);
				1539	}
				1540
				1541
				1542	// -------------------------------------------------------------------
				1543	// Emit code.
				1544
				1545
				1546	void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
				1547	Guard* guard,
				1548	Trace* trace) {
				1549	switch (guard->op()) {
				1550	case Guard::LT:
				1551	DCHECK(!trace->mentions_reg(guard->reg()));
				1552	macro_assembler->IfRegisterGE(guard->reg(),
				1553	guard->value(),
				1554	trace->backtrack());
				1555	break;
				1556	case Guard::GEQ:
				1557	DCHECK(!trace->mentions_reg(guard->reg()));
				1558	macro_assembler->IfRegisterLT(guard->reg(),
				1559	guard->value(),
				1560	trace->backtrack());
				1561	break;
				1562	}
				1563	}
				1564
				1565
				1566	// Returns the number of characters in the equivalence class, omitting those
				1567	// that cannot occur in the source string because it is Latin1.
				1568	static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
				1569	bool one_byte_subject,
				1570	unibrow::uchar* letters) {
				1571	int length =
				1572	isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
				1573	// Unibrow returns 0 or 1 for characters where case independence is
				1574	// trivial.
				1575	if (length == 0) {
				1576	letters[0] = character;
				1577	length = 1;
				1578	}
				1579
				1580	if (one_byte_subject) {
				1581	int new_length = 0;
				1582	for (int i = 0; i < length; i++) {
				1583	if (letters[i] <= String::kMaxOneByteCharCode) {
				1584	letters[new_length++] = letters[i];
				1585	}
				1586	}
				1587	length = new_length;
				1588	}
				1589
				1590	return length;
				1591	}
				1592
				1593
				1594	static inline bool EmitSimpleCharacter(Isolate* isolate,
				1595	RegExpCompiler* compiler,
				1596	uc16 c,
				1597	Label* on_failure,
				1598	int cp_offset,
				1599	bool check,
				1600	bool preloaded) {
				1601	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				1602	bool bound_checked = false;
				1603	if (!preloaded) {
				1604	assembler->LoadCurrentCharacter(
				1605	cp_offset,
				1606	on_failure,
				1607	check);
				1608	bound_checked = true;
				1609	}
				1610	assembler->CheckNotCharacter(c, on_failure);
				1611	return bound_checked;
				1612	}
				1613
				1614
				1615	// Only emits non-letters (things that don't have case). Only used for case
				1616	// independent matches.
				1617	static inline bool EmitAtomNonLetter(Isolate* isolate,
				1618	RegExpCompiler* compiler,
				1619	uc16 c,
				1620	Label* on_failure,
				1621	int cp_offset,
				1622	bool check,
				1623	bool preloaded) {
				1624	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				1625	bool one_byte = compiler->one_byte();
				1626	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				1627	int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
				1628	if (length < 1) {
				1629	// This can't match. Must be an one-byte subject and a non-one-byte
				1630	// character. We do not need to do anything since the one-byte pass
				1631	// already handled this.
				1632	return false; // Bounds not checked.
				1633	}
				1634	bool checked = false;
				1635	// We handle the length > 1 case in a later pass.
				1636	if (length == 1) {
				1637	if (one_byte && c > String::kMaxOneByteCharCodeU) {
				1638	// Can't match - see above.
				1639	return false; // Bounds not checked.
				1640	}
				1641	if (!preloaded) {
				1642	macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
				1643	checked = check;
				1644	}
				1645	macro_assembler->CheckNotCharacter(c, on_failure);
				1646	}
				1647	return checked;
				1648	}
				1649
				1650
				1651	static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
				1652	bool one_byte, uc16 c1, uc16 c2,
				1653	Label* on_failure) {
				1654	uc16 char_mask;
				1655	if (one_byte) {
				1656	char_mask = String::kMaxOneByteCharCode;
				1657	} else {
				1658	char_mask = String::kMaxUtf16CodeUnit;
				1659	}
				1660	uc16 exor = c1 ^ c2;
				1661	// Check whether exor has only one bit set.
				1662	if (((exor - 1) & exor) == 0) {
				1663	// If c1 and c2 differ only by one bit.
				1664	// Ecma262UnCanonicalize always gives the highest number last.
				1665	DCHECK(c2 > c1);
				1666	uc16 mask = char_mask ^ exor;
				1667	macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
				1668	return true;
				1669	}
				1670	DCHECK(c2 > c1);
				1671	uc16 diff = c2 - c1;
				1672	if (((diff - 1) & diff) == 0 && c1 >= diff) {
				1673	// If the characters differ by 2^n but don't differ by one bit then
				1674	// subtract the difference from the found character, then do the or
				1675	// trick. We avoid the theoretical case where negative numbers are
				1676	// involved in order to simplify code generation.
				1677	uc16 mask = char_mask ^ diff;
				1678	macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
				1679	diff,
				1680	mask,
				1681	on_failure);
				1682	return true;
				1683	}
				1684	return false;
				1685	}
				1686
				1687
				1688	typedef bool EmitCharacterFunction(Isolate* isolate,
				1689	RegExpCompiler* compiler,
				1690	uc16 c,
				1691	Label* on_failure,
				1692	int cp_offset,
				1693	bool check,
				1694	bool preloaded);
				1695
				1696	// Only emits letters (things that have case). Only used for case independent
				1697	// matches.
				1698	static inline bool EmitAtomLetter(Isolate* isolate,
				1699	RegExpCompiler* compiler,
				1700	uc16 c,
				1701	Label* on_failure,
				1702	int cp_offset,
				1703	bool check,
				1704	bool preloaded) {
				1705	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				1706	bool one_byte = compiler->one_byte();
				1707	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				1708	int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
				1709	if (length <= 1) return false;
				1710	// We may not need to check against the end of the input string
				1711	// if this character lies before a character that matched.
				1712	if (!preloaded) {
				1713	macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
				1714	}
				1715	Label ok;
				1716	DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
				1717	switch (length) {
				1718	case 2: {
				1719	if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
				1720	chars[1], on_failure)) {
				1721	} else {
				1722	macro_assembler->CheckCharacter(chars[0], &ok);
				1723	macro_assembler->CheckNotCharacter(chars[1], on_failure);
				1724	macro_assembler->Bind(&ok);
				1725	}
				1726	break;
				1727	}
				1728	case 4:
				1729	macro_assembler->CheckCharacter(chars[3], &ok);
				1730	// Fall through!
				1731	case 3:
				1732	macro_assembler->CheckCharacter(chars[0], &ok);
				1733	macro_assembler->CheckCharacter(chars[1], &ok);
				1734	macro_assembler->CheckNotCharacter(chars[2], on_failure);
				1735	macro_assembler->Bind(&ok);
				1736	break;
				1737	default:
				1738	UNREACHABLE();
				1739	break;
				1740	}
				1741	return true;
				1742	}
				1743
				1744
				1745	static void EmitBoundaryTest(RegExpMacroAssembler* masm,
				1746	int border,
				1747	Label* fall_through,
				1748	Label* above_or_equal,
				1749	Label* below) {
				1750	if (below != fall_through) {
				1751	masm->CheckCharacterLT(border, below);
				1752	if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
				1753	} else {
				1754	masm->CheckCharacterGT(border - 1, above_or_equal);
				1755	}
				1756	}
				1757
				1758
				1759	static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
				1760	int first,
				1761	int last,
				1762	Label* fall_through,
				1763	Label* in_range,
				1764	Label* out_of_range) {
				1765	if (in_range == fall_through) {
				1766	if (first == last) {
				1767	masm->CheckNotCharacter(first, out_of_range);
				1768	} else {
				1769	masm->CheckCharacterNotInRange(first, last, out_of_range);
				1770	}
				1771	} else {
				1772	if (first == last) {
				1773	masm->CheckCharacter(first, in_range);
				1774	} else {
				1775	masm->CheckCharacterInRange(first, last, in_range);
				1776	}
				1777	if (out_of_range != fall_through) masm->GoTo(out_of_range);
				1778	}
				1779	}
				1780
				1781
				1782	// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
				1783	// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
				1784	static void EmitUseLookupTable(
				1785	RegExpMacroAssembler* masm,
				1786	ZoneList<int>* ranges,
				1787	int start_index,
				1788	int end_index,
				1789	int min_char,
				1790	Label* fall_through,
				1791	Label* even_label,
				1792	Label* odd_label) {
				1793	static const int kSize = RegExpMacroAssembler::kTableSize;
				1794	static const int kMask = RegExpMacroAssembler::kTableMask;
				1795
				1796	int base = (min_char & ~kMask);
				1797	USE(base);
				1798
				1799	// Assert that everything is on one kTableSize page.
				1800	for (int i = start_index; i <= end_index; i++) {
				1801	DCHECK_EQ(ranges->at(i) & ~kMask, base);
				1802	}
				1803	DCHECK(start_index == 0 \|\| (ranges->at(start_index - 1) & ~kMask) <= base);
				1804
				1805	char templ[kSize];
				1806	Label* on_bit_set;
				1807	Label* on_bit_clear;
				1808	int bit;
				1809	if (even_label == fall_through) {
				1810	on_bit_set = odd_label;
				1811	on_bit_clear = even_label;
				1812	bit = 1;
				1813	} else {
				1814	on_bit_set = even_label;
				1815	on_bit_clear = odd_label;
				1816	bit = 0;
				1817	}
				1818	for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {
				1819	templ[i] = bit;
				1820	}
				1821	int j = 0;
				1822	bit ^= 1;
				1823	for (int i = start_index; i < end_index; i++) {
				1824	for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
				1825	templ[j] = bit;
				1826	}
				1827	bit ^= 1;
				1828	}
				1829	for (int i = j; i < kSize; i++) {
				1830	templ[i] = bit;
				1831	}
				1832	Factory* factory = masm->isolate()->factory();
				1833	// TODO(erikcorry): Cache these.
				1834	Handle<ByteArray> ba = factory->NewByteArray(kSize, TENURED);
				1835	for (int i = 0; i < kSize; i++) {
				1836	ba->set(i, templ[i]);
				1837	}
				1838	masm->CheckBitInTable(ba, on_bit_set);
				1839	if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
				1840	}
				1841
				1842
				1843	static void CutOutRange(RegExpMacroAssembler* masm,
				1844	ZoneList<int>* ranges,
				1845	int start_index,
				1846	int end_index,
				1847	int cut_index,
				1848	Label* even_label,
				1849	Label* odd_label) {
				1850	bool odd = (((cut_index - start_index) & 1) == 1);
				1851	Label* in_range_label = odd ? odd_label : even_label;
				1852	Label dummy;
				1853	EmitDoubleBoundaryTest(masm,
				1854	ranges->at(cut_index),
				1855	ranges->at(cut_index + 1) - 1,
				1856	&dummy,
				1857	in_range_label,
				1858	&dummy);
				1859	DCHECK(!dummy.is_linked());
				1860	// Cut out the single range by rewriting the array. This creates a new
				1861	// range that is a merger of the two ranges on either side of the one we
				1862	// are cutting out. The oddity of the labels is preserved.
				1863	for (int j = cut_index; j > start_index; j--) {
				1864	ranges->at(j) = ranges->at(j - 1);
				1865	}
				1866	for (int j = cut_index + 1; j < end_index; j++) {
				1867	ranges->at(j) = ranges->at(j + 1);
				1868	}
				1869	}
				1870
				1871
				1872	// Unicode case. Split the search space into kSize spaces that are handled
				1873	// with recursion.
				1874	static void SplitSearchSpace(ZoneList<int>* ranges,
				1875	int start_index,
				1876	int end_index,
				1877	int* new_start_index,
				1878	int* new_end_index,
				1879	int* border) {
				1880	static const int kSize = RegExpMacroAssembler::kTableSize;
				1881	static const int kMask = RegExpMacroAssembler::kTableMask;
				1882
				1883	int first = ranges->at(start_index);
				1884	int last = ranges->at(end_index) - 1;
				1885
				1886	*new_start_index = start_index;
				1887	*border = (ranges->at(start_index) & ~kMask) + kSize;
				1888	while (*new_start_index < end_index) {
				1889	if (ranges->at(new_start_index) > border) break;
				1890	(*new_start_index)++;
				1891	}
				1892	// new_start_index is the index of the first edge that is beyond the
				1893	// current kSize space.
				1894
				1895	// For very large search spaces we do a binary chop search of the non-Latin1
				1896	// space instead of just going to the end of the current kSize space. The
				1897	// heuristics are complicated a little by the fact that any 128-character
				1898	// encoding space can be quickly tested with a table lookup, so we don't
				1899	// wish to do binary chop search at a smaller granularity than that. A
				1900	// 128-character space can take up a lot of space in the ranges array if,
				1901	// for example, we only want to match every second character (eg. the lower
				1902	// case characters on some Unicode pages).
				1903	int binary_chop_index = (end_index + start_index) / 2;
				1904	// The first test ensures that we get to the code that handles the Latin1
				1905	// range with a single not-taken branch, speeding up this important
				1906	// character range (even non-Latin1 charset-based text has spaces and
				1907	// punctuation).
				1908	if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
				1909	end_index - start_index > (new_start_index - start_index) 2 &&
				1910	last - first > kSize * 2 && binary_chop_index > *new_start_index &&
				1911	ranges->at(binary_chop_index) >= first + 2 * kSize) {
				1912	int scan_forward_for_section_border = binary_chop_index;;
				1913	int new_border = (ranges->at(binary_chop_index) \| kMask) + 1;
				1914
				1915	while (scan_forward_for_section_border < end_index) {
				1916	if (ranges->at(scan_forward_for_section_border) > new_border) {
				1917	*new_start_index = scan_forward_for_section_border;
				1918	*border = new_border;
				1919	break;
				1920	}
				1921	scan_forward_for_section_border++;
				1922	}
				1923	}
				1924
				1925	DCHECK(*new_start_index > start_index);
				1926	new_end_index = new_start_index - 1;
				1927	if (ranges->at(new_end_index) == border) {
				1928	(*new_end_index)--;
				1929	}
				1930	if (*border >= ranges->at(end_index)) {
				1931	*border = ranges->at(end_index);
				1932	*new_start_index = end_index; // Won't be used.
				1933	*new_end_index = end_index - 1;
				1934	}
				1935	}
				1936
				1937
				1938	// Gets a series of segment boundaries representing a character class. If the
				1939	// character is in the range between an even and an odd boundary (counting from
				1940	// start_index) then go to even_label, otherwise go to odd_label. We already
				1941	// know that the character is in the range of min_char to max_char inclusive.
				1942	// Either label can be NULL indicating backtracking. Either label can also be
				1943	// equal to the fall_through label.
				1944	static void GenerateBranches(RegExpMacroAssembler* masm,
				1945	ZoneList<int>* ranges,
				1946	int start_index,
				1947	int end_index,
				1948	uc16 min_char,
				1949	uc16 max_char,
				1950	Label* fall_through,
				1951	Label* even_label,
				1952	Label* odd_label) {
				1953	int first = ranges->at(start_index);
				1954	int last = ranges->at(end_index) - 1;
				1955
				1956	DCHECK_LT(min_char, first);
				1957
				1958	// Just need to test if the character is before or on-or-after
				1959	// a particular character.
				1960	if (start_index == end_index) {
				1961	EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
				1962	return;
				1963	}
				1964
				1965	// Another almost trivial case: There is one interval in the middle that is
				1966	// different from the end intervals.
				1967	if (start_index + 1 == end_index) {
				1968	EmitDoubleBoundaryTest(
				1969	masm, first, last, fall_through, even_label, odd_label);
				1970	return;
				1971	}
				1972
				1973	// It's not worth using table lookup if there are very few intervals in the
				1974	// character class.
				1975	if (end_index - start_index <= 6) {
				1976	// It is faster to test for individual characters, so we look for those
				1977	// first, then try arbitrary ranges in the second round.
				1978	static int kNoCutIndex = -1;
				1979	int cut = kNoCutIndex;
				1980	for (int i = start_index; i < end_index; i++) {
				1981	if (ranges->at(i) == ranges->at(i + 1) - 1) {
				1982	cut = i;
				1983	break;
				1984	}
				1985	}
				1986	if (cut == kNoCutIndex) cut = start_index;
				1987	CutOutRange(
				1988	masm, ranges, start_index, end_index, cut, even_label, odd_label);
				1989	DCHECK_GE(end_index - start_index, 2);
				1990	GenerateBranches(masm,
				1991	ranges,
				1992	start_index + 1,
				1993	end_index - 1,
				1994	min_char,
				1995	max_char,
				1996	fall_through,
				1997	even_label,
				1998	odd_label);
				1999	return;
				2000	}
				2001
				2002	// If there are a lot of intervals in the regexp, then we will use tables to
				2003	// determine whether the character is inside or outside the character class.
				2004	static const int kBits = RegExpMacroAssembler::kTableSizeBits;
				2005
				2006	if ((max_char >> kBits) == (min_char >> kBits)) {
				2007	EmitUseLookupTable(masm,
				2008	ranges,
				2009	start_index,
				2010	end_index,
				2011	min_char,
				2012	fall_through,
				2013	even_label,
				2014	odd_label);
				2015	return;
				2016	}
				2017
				2018	if ((min_char >> kBits) != (first >> kBits)) {
				2019	masm->CheckCharacterLT(first, odd_label);
				2020	GenerateBranches(masm,
				2021	ranges,
				2022	start_index + 1,
				2023	end_index,
				2024	first,
				2025	max_char,
				2026	fall_through,
				2027	odd_label,
				2028	even_label);
				2029	return;
				2030	}
				2031
				2032	int new_start_index = 0;
				2033	int new_end_index = 0;
				2034	int border = 0;
				2035
				2036	SplitSearchSpace(ranges,
				2037	start_index,
				2038	end_index,
				2039	&new_start_index,
				2040	&new_end_index,
				2041	&border);
				2042
				2043	Label handle_rest;
				2044	Label* above = &handle_rest;
				2045	if (border == last + 1) {
				2046	// We didn't find any section that started after the limit, so everything
				2047	// above the border is one of the terminal labels.
				2048	above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
				2049	DCHECK(new_end_index == end_index - 1);
				2050	}
				2051
				2052	DCHECK_LE(start_index, new_end_index);
				2053	DCHECK_LE(new_start_index, end_index);
				2054	DCHECK_LT(start_index, new_start_index);
				2055	DCHECK_LT(new_end_index, end_index);
				2056	DCHECK(new_end_index + 1 == new_start_index \|\|
				2057	(new_end_index + 2 == new_start_index &&
				2058	border == ranges->at(new_end_index + 1)));
				2059	DCHECK_LT(min_char, border - 1);
				2060	DCHECK_LT(border, max_char);
				2061	DCHECK_LT(ranges->at(new_end_index), border);
				2062	DCHECK(border < ranges->at(new_start_index) \|\|
				2063	(border == ranges->at(new_start_index) &&
				2064	new_start_index == end_index &&
				2065	new_end_index == end_index - 1 &&
				2066	border == last + 1));
				2067	DCHECK(new_start_index == 0 \|\| border >= ranges->at(new_start_index - 1));
				2068
				2069	masm->CheckCharacterGT(border - 1, above);
				2070	Label dummy;
				2071	GenerateBranches(masm,
				2072	ranges,
				2073	start_index,
				2074	new_end_index,
				2075	min_char,
				2076	border - 1,
				2077	&dummy,
				2078	even_label,
				2079	odd_label);
				2080	if (handle_rest.is_linked()) {
				2081	masm->Bind(&handle_rest);
				2082	bool flip = (new_start_index & 1) != (start_index & 1);
				2083	GenerateBranches(masm,
				2084	ranges,
				2085	new_start_index,
				2086	end_index,
				2087	border,
				2088	max_char,
				2089	&dummy,
				2090	flip ? odd_label : even_label,
				2091	flip ? even_label : odd_label);
				2092	}
				2093	}
				2094
				2095
				2096	static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
				2097	RegExpCharacterClass* cc, bool one_byte,
				2098	Label* on_failure, int cp_offset, bool check_offset,
				2099	bool preloaded, Zone* zone) {
				2100	ZoneList<CharacterRange>* ranges = cc->ranges(zone);
				2101	if (!CharacterRange::IsCanonical(ranges)) {
				2102	CharacterRange::Canonicalize(ranges);
				2103	}
				2104
				2105	int max_char;
				2106	if (one_byte) {
				2107	max_char = String::kMaxOneByteCharCode;
				2108	} else {
				2109	max_char = String::kMaxUtf16CodeUnit;
				2110	}
				2111
				2112	int range_count = ranges->length();
				2113
				2114	int last_valid_range = range_count - 1;
				2115	while (last_valid_range >= 0) {
				2116	CharacterRange& range = ranges->at(last_valid_range);
				2117	if (range.from() <= max_char) {
				2118	break;
				2119	}
				2120	last_valid_range--;
				2121	}
				2122
				2123	if (last_valid_range < 0) {
				2124	if (!cc->is_negated()) {
				2125	macro_assembler->GoTo(on_failure);
				2126	}
				2127	if (check_offset) {
				2128	macro_assembler->CheckPosition(cp_offset, on_failure);
				2129	}
				2130	return;
				2131	}
				2132
				2133	if (last_valid_range == 0 &&
				2134	ranges->at(0).IsEverything(max_char)) {
				2135	if (cc->is_negated()) {
				2136	macro_assembler->GoTo(on_failure);
				2137	} else {
				2138	// This is a common case hit by non-anchored expressions.
				2139	if (check_offset) {
				2140	macro_assembler->CheckPosition(cp_offset, on_failure);
				2141	}
				2142	}
				2143	return;
				2144	}
				2145	if (last_valid_range == 0 &&
				2146	!cc->is_negated() &&
				2147	ranges->at(0).IsEverything(max_char)) {
				2148	// This is a common case hit by non-anchored expressions.
				2149	if (check_offset) {
				2150	macro_assembler->CheckPosition(cp_offset, on_failure);
				2151	}
				2152	return;
				2153	}
				2154
				2155	if (!preloaded) {
				2156	macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
				2157	}
				2158
				2159	if (cc->is_standard(zone) &&
				2160	macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
				2161	on_failure)) {
				2162	return;
				2163	}
				2164
				2165
				2166	// A new list with ascending entries. Each entry is a code unit
				2167	// where there is a boundary between code units that are part of
				2168	// the class and code units that are not. Normally we insert an
				2169	// entry at zero which goes to the failure label, but if there
				2170	// was already one there we fall through for success on that entry.
				2171	// Subsequent entries have alternating meaning (success/failure).
				2172	ZoneList<int>* range_boundaries =
				2173	new(zone) ZoneList<int>(last_valid_range, zone);
				2174
				2175	bool zeroth_entry_is_failure = !cc->is_negated();
				2176
				2177	for (int i = 0; i <= last_valid_range; i++) {
				2178	CharacterRange& range = ranges->at(i);
				2179	if (range.from() == 0) {
				2180	DCHECK_EQ(i, 0);
				2181	zeroth_entry_is_failure = !zeroth_entry_is_failure;
				2182	} else {
				2183	range_boundaries->Add(range.from(), zone);
				2184	}
				2185	range_boundaries->Add(range.to() + 1, zone);
				2186	}
				2187	int end_index = range_boundaries->length() - 1;
				2188	if (range_boundaries->at(end_index) > max_char) {
				2189	end_index--;
				2190	}
				2191
				2192	Label fall_through;
				2193	GenerateBranches(macro_assembler,
				2194	range_boundaries,
				2195	0, // start_index.
				2196	end_index,
				2197	0, // min_char.
				2198	max_char,
				2199	&fall_through,
				2200	zeroth_entry_is_failure ? &fall_through : on_failure,
				2201	zeroth_entry_is_failure ? on_failure : &fall_through);
				2202	macro_assembler->Bind(&fall_through);
				2203	}
				2204
				2205
				2206	RegExpNode::~RegExpNode() {
				2207	}
				2208
				2209
				2210	RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
				2211	Trace* trace) {
				2212	// If we are generating a greedy loop then don't stop and don't reuse code.
				2213	if (trace->stop_node() != NULL) {
				2214	return CONTINUE;
				2215	}
				2216
				2217	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				2218	if (trace->is_trivial()) {
				2219	if (label_.is_bound() \|\| on_work_list() \|\| !KeepRecursing(compiler)) {
				2220	// If a generic version is already scheduled to be generated or we have
				2221	// recursed too deeply then just generate a jump to that code.
				2222	macro_assembler->GoTo(&label_);
				2223	// This will queue it up for generation of a generic version if it hasn't
				2224	// already been queued.
				2225	compiler->AddWork(this);
				2226	return DONE;
				2227	}
				2228	// Generate generic version of the node and bind the label for later use.
				2229	macro_assembler->Bind(&label_);
				2230	return CONTINUE;
				2231	}
				2232
				2233	// We are being asked to make a non-generic version. Keep track of how many
				2234	// non-generic versions we generate so as not to overdo it.
				2235	trace_count_++;
				2236	if (KeepRecursing(compiler) && compiler->optimize() &&
				2237	trace_count_ < kMaxCopiesCodeGenerated) {
				2238	return CONTINUE;
				2239	}
				2240
				2241	// If we get here code has been generated for this node too many times or
				2242	// recursion is too deep. Time to switch to a generic version. The code for
				2243	// generic versions above can handle deep recursion properly.
				2244	bool was_limiting = compiler->limiting_recursion();
				2245	compiler->set_limiting_recursion(true);
				2246	trace->Flush(compiler, this);
				2247	compiler->set_limiting_recursion(was_limiting);
				2248	return DONE;
				2249	}
				2250
				2251
				2252	bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
				2253	return !compiler->limiting_recursion() &&
				2254	compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
				2255	}
				2256
				2257
				2258	int ActionNode::EatsAtLeast(int still_to_find,
				2259	int budget,
				2260	bool not_at_start) {
				2261	if (budget <= 0) return 0;
				2262	if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
				2263	return on_success()->EatsAtLeast(still_to_find,
				2264	budget - 1,
				2265	not_at_start);
				2266	}
				2267
				2268
				2269	void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
				2270	BoyerMooreLookahead* bm, bool not_at_start) {
				2271	if (action_type_ == BEGIN_SUBMATCH) {
				2272	bm->SetRest(offset);
				2273	} else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
				2274	on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
				2275	}
				2276	SaveBMInfo(bm, not_at_start, offset);
				2277	}
				2278
				2279
				2280	int AssertionNode::EatsAtLeast(int still_to_find,
				2281	int budget,
				2282	bool not_at_start) {
				2283	if (budget <= 0) return 0;
				2284	// If we know we are not at the start and we are asked "how many characters
				2285	// will you match if you succeed?" then we can answer anything since false
				2286	// implies false. So lets just return the max answer (still_to_find) since
				2287	// that won't prevent us from preloading a lot of characters for the other
				2288	// branches in the node graph.
				2289	if (assertion_type() == AT_START && not_at_start) return still_to_find;
				2290	return on_success()->EatsAtLeast(still_to_find,
				2291	budget - 1,
				2292	not_at_start);
				2293	}
				2294
				2295
				2296	void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
				2297	BoyerMooreLookahead* bm, bool not_at_start) {
				2298	// Match the behaviour of EatsAtLeast on this node.
				2299	if (assertion_type() == AT_START && not_at_start) return;
				2300	on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
				2301	SaveBMInfo(bm, not_at_start, offset);
				2302	}
				2303
				2304
				2305	int BackReferenceNode::EatsAtLeast(int still_to_find,
				2306	int budget,
				2307	bool not_at_start) {
				2308	if (read_backward()) return 0;
				2309	if (budget <= 0) return 0;
				2310	return on_success()->EatsAtLeast(still_to_find,
				2311	budget - 1,
				2312	not_at_start);
				2313	}
				2314
				2315
				2316	int TextNode::EatsAtLeast(int still_to_find,
				2317	int budget,
				2318	bool not_at_start) {
				2319	if (read_backward()) return 0;
				2320	int answer = Length();
				2321	if (answer >= still_to_find) return answer;
				2322	if (budget <= 0) return answer;
				2323	// We are not at start after this node so we set the last argument to 'true'.
				2324	return answer + on_success()->EatsAtLeast(still_to_find - answer,
				2325	budget - 1,
				2326	true);
				2327	}
				2328
				2329
				2330	int NegativeLookaroundChoiceNode::EatsAtLeast(int still_to_find, int budget,
				2331	bool not_at_start) {
				2332	if (budget <= 0) return 0;
				2333	// Alternative 0 is the negative lookahead, alternative 1 is what comes
				2334	// afterwards.
				2335	RegExpNode* node = alternatives_->at(1).node();
				2336	return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
				2337	}
				2338
				2339
				2340	void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
				2341	QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
				2342	bool not_at_start) {
				2343	// Alternative 0 is the negative lookahead, alternative 1 is what comes
				2344	// afterwards.
				2345	RegExpNode* node = alternatives_->at(1).node();
				2346	return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
				2347	}
				2348
				2349
				2350	int ChoiceNode::EatsAtLeastHelper(int still_to_find,
				2351	int budget,
				2352	RegExpNode* ignore_this_node,
				2353	bool not_at_start) {
				2354	if (budget <= 0) return 0;
				2355	int min = 100;
				2356	int choice_count = alternatives_->length();
				2357	budget = (budget - 1) / choice_count;
				2358	for (int i = 0; i < choice_count; i++) {
				2359	RegExpNode* node = alternatives_->at(i).node();
				2360	if (node == ignore_this_node) continue;
				2361	int node_eats_at_least =
				2362	node->EatsAtLeast(still_to_find, budget, not_at_start);
				2363	if (node_eats_at_least < min) min = node_eats_at_least;
				2364	if (min == 0) return 0;
				2365	}
				2366	return min;
				2367	}
				2368
				2369
				2370	int LoopChoiceNode::EatsAtLeast(int still_to_find,
				2371	int budget,
				2372	bool not_at_start) {
				2373	return EatsAtLeastHelper(still_to_find,
				2374	budget - 1,
				2375	loop_node_,
				2376	not_at_start);
				2377	}
				2378
				2379
				2380	int ChoiceNode::EatsAtLeast(int still_to_find,
				2381	int budget,
				2382	bool not_at_start) {
				2383	return EatsAtLeastHelper(still_to_find,
				2384	budget,
				2385	NULL,
				2386	not_at_start);
				2387	}
				2388
				2389
				2390	// Takes the left-most 1-bit and smears it out, setting all bits to its right.
				2391	static inline uint32_t SmearBitsRight(uint32_t v) {
				2392	v \|= v >> 1;
				2393	v \|= v >> 2;
				2394	v \|= v >> 4;
				2395	v \|= v >> 8;
				2396	v \|= v >> 16;
				2397	return v;
				2398	}
				2399
				2400
				2401	bool QuickCheckDetails::Rationalize(bool asc) {
				2402	bool found_useful_op = false;
				2403	uint32_t char_mask;
				2404	if (asc) {
				2405	char_mask = String::kMaxOneByteCharCode;
				2406	} else {
				2407	char_mask = String::kMaxUtf16CodeUnit;
				2408	}
				2409	mask_ = 0;
				2410	value_ = 0;
				2411	int char_shift = 0;
				2412	for (int i = 0; i < characters_; i++) {
				2413	Position* pos = &positions_[i];
				2414	if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
				2415	found_useful_op = true;
				2416	}
				2417	mask_ \|= (pos->mask & char_mask) << char_shift;
				2418	value_ \|= (pos->value & char_mask) << char_shift;
				2419	char_shift += asc ? 8 : 16;
				2420	}
				2421	return found_useful_op;
				2422	}
				2423
				2424
				2425	bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
				2426	Trace* bounds_check_trace,
				2427	Trace* trace,
				2428	bool preload_has_checked_bounds,
				2429	Label* on_possible_success,
				2430	QuickCheckDetails* details,
				2431	bool fall_through_on_failure) {
				2432	if (details->characters() == 0) return false;
				2433	GetQuickCheckDetails(
				2434	details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
				2435	if (details->cannot_match()) return false;
				2436	if (!details->Rationalize(compiler->one_byte())) return false;
				2437	DCHECK(details->characters() == 1 \|\|
				2438	compiler->macro_assembler()->CanReadUnaligned());
				2439	uint32_t mask = details->mask();
				2440	uint32_t value = details->value();
				2441
				2442	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				2443
				2444	if (trace->characters_preloaded() != details->characters()) {
				2445	DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
				2446	// We are attempting to preload the minimum number of characters
				2447	// any choice would eat, so if the bounds check fails, then none of the
				2448	// choices can succeed, so we can just immediately backtrack, rather
				2449	// than go to the next choice.
				2450	assembler->LoadCurrentCharacter(trace->cp_offset(),
				2451	bounds_check_trace->backtrack(),
				2452	!preload_has_checked_bounds,
				2453	details->characters());
				2454	}
				2455
				2456
				2457	bool need_mask = true;
				2458
				2459	if (details->characters() == 1) {
				2460	// If number of characters preloaded is 1 then we used a byte or 16 bit
				2461	// load so the value is already masked down.
				2462	uint32_t char_mask;
				2463	if (compiler->one_byte()) {
				2464	char_mask = String::kMaxOneByteCharCode;
				2465	} else {
				2466	char_mask = String::kMaxUtf16CodeUnit;
				2467	}
				2468	if ((mask & char_mask) == char_mask) need_mask = false;
				2469	mask &= char_mask;
				2470	} else {
				2471	// For 2-character preloads in one-byte mode or 1-character preloads in
				2472	// two-byte mode we also use a 16 bit load with zero extend.
				2473	if (details->characters() == 2 && compiler->one_byte()) {
				2474	if ((mask & 0xffff) == 0xffff) need_mask = false;
				2475	} else if (details->characters() == 1 && !compiler->one_byte()) {
				2476	if ((mask & 0xffff) == 0xffff) need_mask = false;
				2477	} else {
				2478	if (mask == 0xffffffff) need_mask = false;
				2479	}
				2480	}
				2481
				2482	if (fall_through_on_failure) {
				2483	if (need_mask) {
				2484	assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
				2485	} else {
				2486	assembler->CheckCharacter(value, on_possible_success);
				2487	}
				2488	} else {
				2489	if (need_mask) {
				2490	assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
				2491	} else {
				2492	assembler->CheckNotCharacter(value, trace->backtrack());
				2493	}
				2494	}
				2495	return true;
				2496	}
				2497
				2498
				2499	// Here is the meat of GetQuickCheckDetails (see also the comment on the
				2500	// super-class in the .h file).
				2501	//
				2502	// We iterate along the text object, building up for each character a
				2503	// mask and value that can be used to test for a quick failure to match.
				2504	// The masks and values for the positions will be combined into a single
				2505	// machine word for the current character width in order to be used in
				2506	// generating a quick check.
				2507	void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
				2508	RegExpCompiler* compiler,
				2509	int characters_filled_in,
				2510	bool not_at_start) {
				2511	// Do not collect any quick check details if the text node reads backward,
				2512	// since it reads in the opposite direction than we use for quick checks.
				2513	if (read_backward()) return;
				2514	Isolate* isolate = compiler->macro_assembler()->isolate();
				2515	DCHECK(characters_filled_in < details->characters());
				2516	int characters = details->characters();
				2517	int char_mask;
				2518	if (compiler->one_byte()) {
				2519	char_mask = String::kMaxOneByteCharCode;
				2520	} else {
				2521	char_mask = String::kMaxUtf16CodeUnit;
				2522	}
				2523	for (int k = 0; k < elements()->length(); k++) {
				2524	TextElement elm = elements()->at(k);
				2525	if (elm.text_type() == TextElement::ATOM) {
				2526	Vector<const uc16> quarks = elm.atom()->data();
				2527	for (int i = 0; i < characters && i < quarks.length(); i++) {
				2528	QuickCheckDetails::Position* pos =
				2529	details->positions(characters_filled_in);
				2530	uc16 c = quarks[i];
				2531	if (compiler->ignore_case()) {
				2532	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				2533	int length = GetCaseIndependentLetters(isolate, c,
				2534	compiler->one_byte(), chars);
				2535	if (length == 0) {
				2536	// This can happen because all case variants are non-Latin1, but we
				2537	// know the input is Latin1.
				2538	details->set_cannot_match();
				2539	pos->determines_perfectly = false;
				2540	return;
				2541	}
				2542	if (length == 1) {
				2543	// This letter has no case equivalents, so it's nice and simple
				2544	// and the mask-compare will determine definitely whether we have
				2545	// a match at this character position.
				2546	pos->mask = char_mask;
				2547	pos->value = c;
				2548	pos->determines_perfectly = true;
				2549	} else {
				2550	uint32_t common_bits = char_mask;
				2551	uint32_t bits = chars[0];
				2552	for (int j = 1; j < length; j++) {
				2553	uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
				2554	common_bits ^= differing_bits;
				2555	bits &= common_bits;
				2556	}
				2557	// If length is 2 and common bits has only one zero in it then
				2558	// our mask and compare instruction will determine definitely
				2559	// whether we have a match at this character position. Otherwise
				2560	// it can only be an approximate check.
				2561	uint32_t one_zero = (common_bits \| ~char_mask);
				2562	if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
				2563	pos->determines_perfectly = true;
				2564	}
				2565	pos->mask = common_bits;
				2566	pos->value = bits;
				2567	}
				2568	} else {
				2569	// Don't ignore case. Nice simple case where the mask-compare will
				2570	// determine definitely whether we have a match at this character
				2571	// position.
				2572	if (c > char_mask) {
				2573	details->set_cannot_match();
				2574	pos->determines_perfectly = false;
				2575	return;
				2576	}
				2577	pos->mask = char_mask;
				2578	pos->value = c;
				2579	pos->determines_perfectly = true;
				2580	}
				2581	characters_filled_in++;
				2582	DCHECK(characters_filled_in <= details->characters());
				2583	if (characters_filled_in == details->characters()) {
				2584	return;
				2585	}
				2586	}
				2587	} else {
				2588	QuickCheckDetails::Position* pos =
				2589	details->positions(characters_filled_in);
				2590	RegExpCharacterClass* tree = elm.char_class();
				2591	ZoneList<CharacterRange>* ranges = tree->ranges(zone());
				2592	if (tree->is_negated()) {
				2593	// A quick check uses multi-character mask and compare. There is no
				2594	// useful way to incorporate a negative char class into this scheme
				2595	// so we just conservatively create a mask and value that will always
				2596	// succeed.
				2597	pos->mask = 0;
				2598	pos->value = 0;
				2599	} else {
				2600	int first_range = 0;
				2601	while (ranges->at(first_range).from() > char_mask) {
				2602	first_range++;
				2603	if (first_range == ranges->length()) {
				2604	details->set_cannot_match();
				2605	pos->determines_perfectly = false;
				2606	return;
				2607	}
				2608	}
				2609	CharacterRange range = ranges->at(first_range);
				2610	uc16 from = range.from();
				2611	uc16 to = range.to();
				2612	if (to > char_mask) {
				2613	to = char_mask;
				2614	}
				2615	uint32_t differing_bits = (from ^ to);
				2616	// A mask and compare is only perfect if the differing bits form a
				2617	// number like 00011111 with one single block of trailing 1s.
				2618	if ((differing_bits & (differing_bits + 1)) == 0 &&
				2619	from + differing_bits == to) {
				2620	pos->determines_perfectly = true;
				2621	}
				2622	uint32_t common_bits = ~SmearBitsRight(differing_bits);
				2623	uint32_t bits = (from & common_bits);
				2624	for (int i = first_range + 1; i < ranges->length(); i++) {
				2625	CharacterRange range = ranges->at(i);
				2626	uc16 from = range.from();
				2627	uc16 to = range.to();
				2628	if (from > char_mask) continue;
				2629	if (to > char_mask) to = char_mask;
				2630	// Here we are combining more ranges into the mask and compare
				2631	// value. With each new range the mask becomes more sparse and
				2632	// so the chances of a false positive rise. A character class
				2633	// with multiple ranges is assumed never to be equivalent to a
				2634	// mask and compare operation.
				2635	pos->determines_perfectly = false;
				2636	uint32_t new_common_bits = (from ^ to);
				2637	new_common_bits = ~SmearBitsRight(new_common_bits);
				2638	common_bits &= new_common_bits;
				2639	bits &= new_common_bits;
				2640	uint32_t differing_bits = (from & common_bits) ^ bits;
				2641	common_bits ^= differing_bits;
				2642	bits &= common_bits;
				2643	}
				2644	pos->mask = common_bits;
				2645	pos->value = bits;
				2646	}
				2647	characters_filled_in++;
				2648	DCHECK(characters_filled_in <= details->characters());
				2649	if (characters_filled_in == details->characters()) {
				2650	return;
				2651	}
				2652	}
				2653	}
				2654	DCHECK(characters_filled_in != details->characters());
				2655	if (!details->cannot_match()) {
				2656	on_success()-> GetQuickCheckDetails(details,
				2657	compiler,
				2658	characters_filled_in,
				2659	true);
				2660	}
				2661	}
				2662
				2663
				2664	void QuickCheckDetails::Clear() {
				2665	for (int i = 0; i < characters_; i++) {
				2666	positions_[i].mask = 0;
				2667	positions_[i].value = 0;
				2668	positions_[i].determines_perfectly = false;
				2669	}
				2670	characters_ = 0;
				2671	}
				2672
				2673
				2674	void QuickCheckDetails::Advance(int by, bool one_byte) {
				2675	if (by >= characters_ \|\| by < 0) {
				2676	DCHECK_IMPLIES(by < 0, characters_ == 0);
				2677	Clear();
				2678	return;
				2679	}
				2680	DCHECK_LE(characters_ - by, 4);
				2681	DCHECK_LE(characters_, 4);
				2682	for (int i = 0; i < characters_ - by; i++) {
				2683	positions_[i] = positions_[by + i];
				2684	}
				2685	for (int i = characters_ - by; i < characters_; i++) {
				2686	positions_[i].mask = 0;
				2687	positions_[i].value = 0;
				2688	positions_[i].determines_perfectly = false;
				2689	}
				2690	characters_ -= by;
				2691	// We could change mask_ and value_ here but we would never advance unless
				2692	// they had already been used in a check and they won't be used again because
				2693	// it would gain us nothing. So there's no point.
				2694	}
				2695
				2696
				2697	void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
				2698	DCHECK(characters_ == other->characters_);
				2699	if (other->cannot_match_) {
				2700	return;
				2701	}
				2702	if (cannot_match_) {
				2703	this = other;
				2704	return;
				2705	}
				2706	for (int i = from_index; i < characters_; i++) {
				2707	QuickCheckDetails::Position* pos = positions(i);
				2708	QuickCheckDetails::Position* other_pos = other->positions(i);
				2709	if (pos->mask != other_pos->mask \|\|
				2710	pos->value != other_pos->value \|\|
				2711	!other_pos->determines_perfectly) {
				2712	// Our mask-compare operation will be approximate unless we have the
				2713	// exact same operation on both sides of the alternation.
				2714	pos->determines_perfectly = false;
				2715	}
				2716	pos->mask &= other_pos->mask;
				2717	pos->value &= pos->mask;
				2718	other_pos->value &= pos->mask;
				2719	uc16 differing_bits = (pos->value ^ other_pos->value);
				2720	pos->mask &= ~differing_bits;
				2721	pos->value &= pos->mask;
				2722	}
				2723	}
				2724
				2725
				2726	class VisitMarker {
				2727	public:
				2728	explicit VisitMarker(NodeInfo* info) : info_(info) {
				2729	DCHECK(!info->visited);
				2730	info->visited = true;
				2731	}
				2732	~VisitMarker() {
				2733	info_->visited = false;
				2734	}
				2735	private:
				2736	NodeInfo* info_;
				2737	};
				2738
				2739
				2740	RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
				2741	if (info()->replacement_calculated) return replacement();
				2742	if (depth < 0) return this;
				2743	DCHECK(!info()->visited);
				2744	VisitMarker marker(info());
				2745	return FilterSuccessor(depth - 1, ignore_case);
				2746	}
				2747
				2748
				2749	RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
				2750	RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
				2751	if (next == NULL) return set_replacement(NULL);
				2752	on_success_ = next;
				2753	return set_replacement(this);
				2754	}
				2755
				2756
				2757	// We need to check for the following characters: 0x39c 0x3bc 0x178.
				2758	static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
				2759	// TODO(dcarney): this could be a lot more efficient.
				2760	return range.Contains(0x39c) \|\|
				2761	range.Contains(0x3bc) \|\| range.Contains(0x178);
				2762	}
				2763
				2764
				2765	static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
				2766	for (int i = 0; i < ranges->length(); i++) {
				2767	// TODO(dcarney): this could be a lot more efficient.
				2768	if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
				2769	}
				2770	return false;
				2771	}
				2772
				2773
				2774	RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
				2775	if (info()->replacement_calculated) return replacement();
				2776	if (depth < 0) return this;
				2777	DCHECK(!info()->visited);
				2778	VisitMarker marker(info());
				2779	int element_count = elements()->length();
				2780	for (int i = 0; i < element_count; i++) {
				2781	TextElement elm = elements()->at(i);
				2782	if (elm.text_type() == TextElement::ATOM) {
				2783	Vector<const uc16> quarks = elm.atom()->data();
				2784	for (int j = 0; j < quarks.length(); j++) {
				2785	uint16_t c = quarks[j];
				2786	if (c <= String::kMaxOneByteCharCode) continue;
				2787	if (!ignore_case) return set_replacement(NULL);
				2788	// Here, we need to check for characters whose upper and lower cases
				2789	// are outside the Latin-1 range.
				2790	uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
				2791	// Character is outside Latin-1 completely
				2792	if (converted == 0) return set_replacement(NULL);
				2793	// Convert quark to Latin-1 in place.
				2794	uint16_t* copy = const_cast<uint16_t*>(quarks.start());
				2795	copy[j] = converted;
				2796	}
				2797	} else {
				2798	DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
				2799	RegExpCharacterClass* cc = elm.char_class();
				2800	ZoneList<CharacterRange>* ranges = cc->ranges(zone());
				2801	if (!CharacterRange::IsCanonical(ranges)) {
				2802	CharacterRange::Canonicalize(ranges);
				2803	}
				2804	// Now they are in order so we only need to look at the first.
				2805	int range_count = ranges->length();
				2806	if (cc->is_negated()) {
				2807	if (range_count != 0 &&
				2808	ranges->at(0).from() == 0 &&
				2809	ranges->at(0).to() >= String::kMaxOneByteCharCode) {
				2810	// This will be handled in a later filter.
				2811	if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
				2812	return set_replacement(NULL);
				2813	}
				2814	} else {
				2815	if (range_count == 0 \|\|
				2816	ranges->at(0).from() > String::kMaxOneByteCharCode) {
				2817	// This will be handled in a later filter.
				2818	if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
				2819	return set_replacement(NULL);
				2820	}
				2821	}
				2822	}
				2823	}
				2824	return FilterSuccessor(depth - 1, ignore_case);
				2825	}
				2826
				2827
				2828	RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
				2829	if (info()->replacement_calculated) return replacement();
				2830	if (depth < 0) return this;
				2831	if (info()->visited) return this;
				2832	{
				2833	VisitMarker marker(info());
				2834
				2835	RegExpNode* continue_replacement =
				2836	continue_node_->FilterOneByte(depth - 1, ignore_case);
				2837	// If we can't continue after the loop then there is no sense in doing the
				2838	// loop.
				2839	if (continue_replacement == NULL) return set_replacement(NULL);
				2840	}
				2841
				2842	return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
				2843	}
				2844
				2845
				2846	RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
				2847	if (info()->replacement_calculated) return replacement();
				2848	if (depth < 0) return this;
				2849	if (info()->visited) return this;
				2850	VisitMarker marker(info());
				2851	int choice_count = alternatives_->length();
				2852
				2853	for (int i = 0; i < choice_count; i++) {
				2854	GuardedAlternative alternative = alternatives_->at(i);
				2855	if (alternative.guards() != NULL && alternative.guards()->length() != 0) {
				2856	set_replacement(this);
				2857	return this;
				2858	}
				2859	}
				2860
				2861	int surviving = 0;
				2862	RegExpNode* survivor = NULL;
				2863	for (int i = 0; i < choice_count; i++) {
				2864	GuardedAlternative alternative = alternatives_->at(i);
				2865	RegExpNode* replacement =
				2866	alternative.node()->FilterOneByte(depth - 1, ignore_case);
				2867	DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
				2868	if (replacement != NULL) {
				2869	alternatives_->at(i).set_node(replacement);
				2870	surviving++;
				2871	survivor = replacement;
				2872	}
				2873	}
				2874	if (surviving < 2) return set_replacement(survivor);
				2875
				2876	set_replacement(this);
				2877	if (surviving == choice_count) {
				2878	return this;
				2879	}
				2880	// Only some of the nodes survived the filtering. We need to rebuild the
				2881	// alternatives list.
				2882	ZoneList<GuardedAlternative>* new_alternatives =
				2883	new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
				2884	for (int i = 0; i < choice_count; i++) {
				2885	RegExpNode* replacement =
				2886	alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
				2887	if (replacement != NULL) {
				2888	alternatives_->at(i).set_node(replacement);
				2889	new_alternatives->Add(alternatives_->at(i), zone());
				2890	}
				2891	}
				2892	alternatives_ = new_alternatives;
				2893	return this;
				2894	}
				2895
				2896
				2897	RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
				2898	bool ignore_case) {
				2899	if (info()->replacement_calculated) return replacement();
				2900	if (depth < 0) return this;
				2901	if (info()->visited) return this;
				2902	VisitMarker marker(info());
				2903	// Alternative 0 is the negative lookahead, alternative 1 is what comes
				2904	// afterwards.
				2905	RegExpNode* node = alternatives_->at(1).node();
				2906	RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
				2907	if (replacement == NULL) return set_replacement(NULL);
				2908	alternatives_->at(1).set_node(replacement);
				2909
				2910	RegExpNode* neg_node = alternatives_->at(0).node();
				2911	RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
				2912	// If the negative lookahead is always going to fail then
				2913	// we don't need to check it.
				2914	if (neg_replacement == NULL) return set_replacement(replacement);
				2915	alternatives_->at(0).set_node(neg_replacement);
				2916	return set_replacement(this);
				2917	}
				2918
				2919
				2920	void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
				2921	RegExpCompiler* compiler,
				2922	int characters_filled_in,
				2923	bool not_at_start) {
				2924	if (body_can_be_zero_length_ \|\| info()->visited) return;
				2925	VisitMarker marker(info());
				2926	return ChoiceNode::GetQuickCheckDetails(details,
				2927	compiler,
				2928	characters_filled_in,
				2929	not_at_start);
				2930	}
				2931
				2932
				2933	void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
				2934	BoyerMooreLookahead* bm, bool not_at_start) {
				2935	if (body_can_be_zero_length_ \|\| budget <= 0) {
				2936	bm->SetRest(offset);
				2937	SaveBMInfo(bm, not_at_start, offset);
				2938	return;
				2939	}
				2940	ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
				2941	SaveBMInfo(bm, not_at_start, offset);
				2942	}
				2943
				2944
				2945	void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
				2946	RegExpCompiler* compiler,
				2947	int characters_filled_in,
				2948	bool not_at_start) {
				2949	not_at_start = (not_at_start \|\| not_at_start_);
				2950	int choice_count = alternatives_->length();
				2951	DCHECK(choice_count > 0);
				2952	alternatives_->at(0).node()->GetQuickCheckDetails(details,
				2953	compiler,
				2954	characters_filled_in,
				2955	not_at_start);
				2956	for (int i = 1; i < choice_count; i++) {
				2957	QuickCheckDetails new_details(details->characters());
				2958	RegExpNode* node = alternatives_->at(i).node();
				2959	node->GetQuickCheckDetails(&new_details, compiler,
				2960	characters_filled_in,
				2961	not_at_start);
				2962	// Here we merge the quick match details of the two branches.
				2963	details->Merge(&new_details, characters_filled_in);
				2964	}
				2965	}
				2966
				2967
				2968	// Check for [0-9A-Z_a-z].
				2969	static void EmitWordCheck(RegExpMacroAssembler* assembler,
				2970	Label* word,
				2971	Label* non_word,
				2972	bool fall_through_on_word) {
				2973	if (assembler->CheckSpecialCharacterClass(
				2974	fall_through_on_word ? 'w' : 'W',
				2975	fall_through_on_word ? non_word : word)) {
				2976	// Optimized implementation available.
				2977	return;
				2978	}
				2979	assembler->CheckCharacterGT('z', non_word);
				2980	assembler->CheckCharacterLT('0', non_word);
				2981	assembler->CheckCharacterGT('a' - 1, word);
				2982	assembler->CheckCharacterLT('9' + 1, word);
				2983	assembler->CheckCharacterLT('A', non_word);
				2984	assembler->CheckCharacterLT('Z' + 1, word);
				2985	if (fall_through_on_word) {
				2986	assembler->CheckNotCharacter('_', non_word);
				2987	} else {
				2988	assembler->CheckCharacter('_', word);
				2989	}
				2990	}
				2991
				2992
				2993	// Emit the code to check for a ^ in multiline mode (1-character lookbehind
				2994	// that matches newline or the start of input).
				2995	static void EmitHat(RegExpCompiler* compiler,
				2996	RegExpNode* on_success,
				2997	Trace* trace) {
				2998	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				2999	// We will be loading the previous character into the current character
				3000	// register.
				3001	Trace new_trace(*trace);
				3002	new_trace.InvalidateCurrentCharacter();
				3003
				3004	Label ok;
				3005	if (new_trace.cp_offset() == 0) {
				3006	// The start of input counts as a newline in this context, so skip to
				3007	// ok if we are at the start.
				3008	assembler->CheckAtStart(&ok);
				3009	}
				3010	// We already checked that we are not at the start of input so it must be
				3011	// OK to load the previous character.
				3012	assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
				3013	new_trace.backtrack(),
				3014	false);
				3015	if (!assembler->CheckSpecialCharacterClass('n',
				3016	new_trace.backtrack())) {
				3017	// Newline means \n, \r, 0x2028 or 0x2029.
				3018	if (!compiler->one_byte()) {
				3019	assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
				3020	}
				3021	assembler->CheckCharacter('\n', &ok);
				3022	assembler->CheckNotCharacter('\r', new_trace.backtrack());
				3023	}
				3024	assembler->Bind(&ok);
				3025	on_success->Emit(compiler, &new_trace);
				3026	}
				3027
				3028
				3029	// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
				3030	void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
				3031	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				3032	Isolate* isolate = assembler->isolate();
				3033	Trace::TriBool next_is_word_character = Trace::UNKNOWN;
				3034	bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
				3035	BoyerMooreLookahead* lookahead = bm_info(not_at_start);
				3036	if (lookahead == NULL) {
				3037	int eats_at_least =
				3038	Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
				3039	kRecursionBudget,
				3040	not_at_start));
				3041	if (eats_at_least >= 1) {
				3042	BoyerMooreLookahead* bm =
				3043	new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
				3044	FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
				3045	if (bm->at(0)->is_non_word())
				3046	next_is_word_character = Trace::FALSE_VALUE;
				3047	if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
				3048	}
				3049	} else {
				3050	if (lookahead->at(0)->is_non_word())
				3051	next_is_word_character = Trace::FALSE_VALUE;
				3052	if (lookahead->at(0)->is_word())
				3053	next_is_word_character = Trace::TRUE_VALUE;
				3054	}
				3055	bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
				3056	if (next_is_word_character == Trace::UNKNOWN) {
				3057	Label before_non_word;
				3058	Label before_word;
				3059	if (trace->characters_preloaded() != 1) {
				3060	assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
				3061	}
				3062	// Fall through on non-word.
				3063	EmitWordCheck(assembler, &before_word, &before_non_word, false);
				3064	// Next character is not a word character.
				3065	assembler->Bind(&before_non_word);
				3066	Label ok;
				3067	BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
				3068	assembler->GoTo(&ok);
				3069
				3070	assembler->Bind(&before_word);
				3071	BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
				3072	assembler->Bind(&ok);
				3073	} else if (next_is_word_character == Trace::TRUE_VALUE) {
				3074	BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
				3075	} else {
				3076	DCHECK(next_is_word_character == Trace::FALSE_VALUE);
				3077	BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
				3078	}
				3079	}
				3080
				3081
				3082	void AssertionNode::BacktrackIfPrevious(
				3083	RegExpCompiler* compiler,
				3084	Trace* trace,
				3085	AssertionNode::IfPrevious backtrack_if_previous) {
				3086	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				3087	Trace new_trace(*trace);
				3088	new_trace.InvalidateCurrentCharacter();
				3089
				3090	Label fall_through, dummy;
				3091
				3092	Label* non_word = backtrack_if_previous == kIsNonWord ?
				3093	new_trace.backtrack() :
				3094	&fall_through;
				3095	Label* word = backtrack_if_previous == kIsNonWord ?
				3096	&fall_through :
				3097	new_trace.backtrack();
				3098
				3099	if (new_trace.cp_offset() == 0) {
				3100	// The start of input counts as a non-word character, so the question is
				3101	// decided if we are at the start.
				3102	assembler->CheckAtStart(non_word);
				3103	}
				3104	// We already checked that we are not at the start of input so it must be
				3105	// OK to load the previous character.
				3106	assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
				3107	EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
				3108
				3109	assembler->Bind(&fall_through);
				3110	on_success()->Emit(compiler, &new_trace);
				3111	}
				3112
				3113
				3114	void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
				3115	RegExpCompiler* compiler,
				3116	int filled_in,
				3117	bool not_at_start) {
				3118	if (assertion_type_ == AT_START && not_at_start) {
				3119	details->set_cannot_match();
				3120	return;
				3121	}
				3122	return on_success()->GetQuickCheckDetails(details,
				3123	compiler,
				3124	filled_in,
				3125	not_at_start);
				3126	}
				3127
				3128
				3129	void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				3130	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				3131	switch (assertion_type_) {
				3132	case AT_END: {
				3133	Label ok;
				3134	assembler->CheckPosition(trace->cp_offset(), &ok);
				3135	assembler->GoTo(trace->backtrack());
				3136	assembler->Bind(&ok);
				3137	break;
				3138	}
				3139	case AT_START: {
				3140	if (trace->at_start() == Trace::FALSE_VALUE) {
				3141	assembler->GoTo(trace->backtrack());
				3142	return;
				3143	}
				3144	if (trace->at_start() == Trace::UNKNOWN) {
				3145	assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
				3146	Trace at_start_trace = *trace;
				3147	at_start_trace.set_at_start(Trace::TRUE_VALUE);
				3148	on_success()->Emit(compiler, &at_start_trace);
				3149	return;
				3150	}
				3151	}
				3152	break;
				3153	case AFTER_NEWLINE:
				3154	EmitHat(compiler, on_success(), trace);
				3155	return;
				3156	case AT_BOUNDARY:
				3157	case AT_NON_BOUNDARY: {
				3158	EmitBoundaryCheck(compiler, trace);
				3159	return;
				3160	}
				3161	}
				3162	on_success()->Emit(compiler, trace);
				3163	}
				3164
				3165
				3166	static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
				3167	if (quick_check == NULL) return false;
				3168	if (offset >= quick_check->characters()) return false;
				3169	return quick_check->positions(offset)->determines_perfectly;
				3170	}
				3171
				3172
				3173	static void UpdateBoundsCheck(int index, int* checked_up_to) {
				3174	if (index > *checked_up_to) {
				3175	*checked_up_to = index;
				3176	}
				3177	}
				3178
				3179
				3180	// We call this repeatedly to generate code for each pass over the text node.
				3181	// The passes are in increasing order of difficulty because we hope one
				3182	// of the first passes will fail in which case we are saved the work of the
				3183	// later passes. for example for the case independent regexp /%[asdfghjkl]a/
				3184	// we will check the '%' in the first pass, the case independent 'a' in the
				3185	// second pass and the character class in the last pass.
				3186	//
				3187	// The passes are done from right to left, so for example to test for /bar/
				3188	// we will first test for an 'r' with offset 2, then an 'a' with offset 1
				3189	// and then a 'b' with offset 0. This means we can avoid the end-of-input
				3190	// bounds check most of the time. In the example we only need to check for
				3191	// end-of-input when loading the putative 'r'.
				3192	//
				3193	// A slight complication involves the fact that the first character may already
				3194	// be fetched into a register by the previous node. In this case we want to
				3195	// do the test for that character first. We do this in separate passes. The
				3196	// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
				3197	// pass has been performed then subsequent passes will have true in
				3198	// first_element_checked to indicate that that character does not need to be
				3199	// checked again.
				3200	//
				3201	// In addition to all this we are passed a Trace, which can
				3202	// contain an AlternativeGeneration object. In this AlternativeGeneration
				3203	// object we can see details of any quick check that was already passed in
				3204	// order to get to the code we are now generating. The quick check can involve
				3205	// loading characters, which means we do not need to recheck the bounds
				3206	// up to the limit the quick check already checked. In addition the quick
				3207	// check can have involved a mask and compare operation which may simplify
				3208	// or obviate the need for further checks at some character positions.
				3209	void TextNode::TextEmitPass(RegExpCompiler* compiler,
				3210	TextEmitPassType pass,
				3211	bool preloaded,
				3212	Trace* trace,
				3213	bool first_element_checked,
				3214	int* checked_up_to) {
				3215	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				3216	Isolate* isolate = assembler->isolate();
				3217	bool one_byte = compiler->one_byte();
				3218	Label* backtrack = trace->backtrack();
				3219	QuickCheckDetails* quick_check = trace->quick_check_performed();
				3220	int element_count = elements()->length();
				3221	int backward_offset = read_backward() ? -Length() : 0;
				3222	for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
				3223	TextElement elm = elements()->at(i);
				3224	int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
				3225	if (elm.text_type() == TextElement::ATOM) {
				3226	Vector<const uc16> quarks = elm.atom()->data();
				3227	for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
				3228	if (first_element_checked && i == 0 && j == 0) continue;
				3229	if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
				3230	EmitCharacterFunction* emit_function = NULL;
				3231	switch (pass) {
				3232	case NON_LATIN1_MATCH:
				3233	DCHECK(one_byte);
				3234	if (quarks[j] > String::kMaxOneByteCharCode) {
				3235	assembler->GoTo(backtrack);
				3236	return;
				3237	}
				3238	break;
				3239	case NON_LETTER_CHARACTER_MATCH:
				3240	emit_function = &EmitAtomNonLetter;
				3241	break;
				3242	case SIMPLE_CHARACTER_MATCH:
				3243	emit_function = &EmitSimpleCharacter;
				3244	break;
				3245	case CASE_CHARACTER_MATCH:
				3246	emit_function = &EmitAtomLetter;
				3247	break;
				3248	default:
				3249	break;
				3250	}
				3251	if (emit_function != NULL) {
				3252	bool bounds_check = *checked_up_to < cp_offset + j \|\| read_backward();
				3253	bool bound_checked =
				3254	emit_function(isolate, compiler, quarks[j], backtrack,
				3255	cp_offset + j, bounds_check, preloaded);
				3256	if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
				3257	}
				3258	}
				3259	} else {
				3260	DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
				3261	if (pass == CHARACTER_CLASS_MATCH) {
				3262	if (first_element_checked && i == 0) continue;
				3263	if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
				3264	RegExpCharacterClass* cc = elm.char_class();
				3265	bool bounds_check = *checked_up_to < cp_offset \|\| read_backward();
				3266	EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
				3267	bounds_check, preloaded, zone());
				3268	UpdateBoundsCheck(cp_offset, checked_up_to);
				3269	}
				3270	}
				3271	}
				3272	}
				3273
				3274
				3275	int TextNode::Length() {
				3276	TextElement elm = elements()->last();
				3277	DCHECK(elm.cp_offset() >= 0);
				3278	return elm.cp_offset() + elm.length();
				3279	}
				3280
				3281
				3282	bool TextNode::SkipPass(int int_pass, bool ignore_case) {
				3283	TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
				3284	if (ignore_case) {
				3285	return pass == SIMPLE_CHARACTER_MATCH;
				3286	} else {
				3287	return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;
				3288	}
				3289	}
				3290
				3291
				3292	// This generates the code to match a text node. A text node can contain
				3293	// straight character sequences (possibly to be matched in a case-independent
				3294	// way) and character classes. For efficiency we do not do this in a single
				3295	// pass from left to right. Instead we pass over the text node several times,
				3296	// emitting code for some character positions every time. See the comment on
				3297	// TextEmitPass for details.
				3298	void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				3299	LimitResult limit_result = LimitVersions(compiler, trace);
				3300	if (limit_result == DONE) return;
				3301	DCHECK(limit_result == CONTINUE);
				3302
				3303	if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
				3304	compiler->SetRegExpTooBig();
				3305	return;
				3306	}
				3307
				3308	if (compiler->one_byte()) {
				3309	int dummy = 0;
				3310	TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
				3311	}
				3312
				3313	bool first_elt_done = false;
				3314	int bound_checked_to = trace->cp_offset() - 1;
				3315	bound_checked_to += trace->bound_checked_up_to();
				3316
				3317	// If a character is preloaded into the current character register then
				3318	// check that now.
				3319	if (trace->characters_preloaded() == 1) {
				3320	for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
				3321	if (!SkipPass(pass, compiler->ignore_case())) {
				3322	TextEmitPass(compiler,
				3323	static_cast<TextEmitPassType>(pass),
				3324	true,
				3325	trace,
				3326	false,
				3327	&bound_checked_to);
				3328	}
				3329	}
				3330	first_elt_done = true;
				3331	}
				3332
				3333	for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
				3334	if (!SkipPass(pass, compiler->ignore_case())) {
				3335	TextEmitPass(compiler,
				3336	static_cast<TextEmitPassType>(pass),
				3337	false,
				3338	trace,
				3339	first_elt_done,
				3340	&bound_checked_to);
				3341	}
				3342	}
				3343
				3344	Trace successor_trace(*trace);
				3345	// If we advance backward, we may end up at the start.
				3346	successor_trace.AdvanceCurrentPositionInTrace(
				3347	read_backward() ? -Length() : Length(), compiler);
				3348	successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
				3349	: Trace::FALSE_VALUE);
				3350	RecursionCheck rc(compiler);
				3351	on_success()->Emit(compiler, &successor_trace);
				3352	}
				3353
				3354
				3355	void Trace::InvalidateCurrentCharacter() {
				3356	characters_preloaded_ = 0;
				3357	}
				3358
				3359
				3360	void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
				3361	// We don't have an instruction for shifting the current character register
				3362	// down or for using a shifted value for anything so lets just forget that
				3363	// we preloaded any characters into it.
				3364	characters_preloaded_ = 0;
				3365	// Adjust the offsets of the quick check performed information. This
				3366	// information is used to find out what we already determined about the
				3367	// characters by means of mask and compare.
				3368	quick_check_performed_.Advance(by, compiler->one_byte());
				3369	cp_offset_ += by;
				3370	if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
				3371	compiler->SetRegExpTooBig();
				3372	cp_offset_ = 0;
				3373	}
				3374	bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
				3375	}
				3376
				3377
				3378	void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
				3379	int element_count = elements()->length();
				3380	for (int i = 0; i < element_count; i++) {
				3381	TextElement elm = elements()->at(i);
				3382	if (elm.text_type() == TextElement::CHAR_CLASS) {
				3383	RegExpCharacterClass* cc = elm.char_class();
				3384	// None of the standard character classes is different in the case
				3385	// independent case and it slows us down if we don't know that.
				3386	if (cc->is_standard(zone())) continue;
				3387	ZoneList<CharacterRange>* ranges = cc->ranges(zone());
				3388	int range_count = ranges->length();
				3389	for (int j = 0; j < range_count; j++) {
				3390	ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
				3391	}
				3392	}
				3393	}
				3394	}
				3395
				3396
				3397	int TextNode::GreedyLoopTextLength() { return Length(); }
				3398
				3399
				3400	RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
				3401	RegExpCompiler* compiler) {
				3402	if (read_backward()) return NULL;
				3403	if (elements()->length() != 1) return NULL;
				3404	TextElement elm = elements()->at(0);
				3405	if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
				3406	RegExpCharacterClass* node = elm.char_class();
				3407	ZoneList<CharacterRange>* ranges = node->ranges(zone());
				3408	if (!CharacterRange::IsCanonical(ranges)) {
				3409	CharacterRange::Canonicalize(ranges);
				3410	}
				3411	if (node->is_negated()) {
				3412	return ranges->length() == 0 ? on_success() : NULL;
				3413	}
				3414	if (ranges->length() != 1) return NULL;
				3415	uint32_t max_char;
				3416	if (compiler->one_byte()) {
				3417	max_char = String::kMaxOneByteCharCode;
				3418	} else {
				3419	max_char = String::kMaxUtf16CodeUnit;
				3420	}
				3421	return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
				3422	}
				3423
				3424
				3425	// Finds the fixed match length of a sequence of nodes that goes from
				3426	// this alternative and back to this choice node. If there are variable
				3427	// length nodes or other complications in the way then return a sentinel
				3428	// value indicating that a greedy loop cannot be constructed.
				3429	int ChoiceNode::GreedyLoopTextLengthForAlternative(
				3430	GuardedAlternative* alternative) {
				3431	int length = 0;
				3432	RegExpNode* node = alternative->node();
				3433	// Later we will generate code for all these text nodes using recursion
				3434	// so we have to limit the max number.
				3435	int recursion_depth = 0;
				3436	while (node != this) {
				3437	if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
				3438	return kNodeIsTooComplexForGreedyLoops;
				3439	}
				3440	int node_length = node->GreedyLoopTextLength();
				3441	if (node_length == kNodeIsTooComplexForGreedyLoops) {
				3442	return kNodeIsTooComplexForGreedyLoops;
				3443	}
				3444	length += node_length;
				3445	SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
				3446	node = seq_node->on_success();
				3447	}
				3448	return read_backward() ? -length : length;
				3449	}
				3450
				3451
				3452	void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
				3453	DCHECK_NULL(loop_node_);
				3454	AddAlternative(alt);
				3455	loop_node_ = alt.node();
				3456	}
				3457
				3458
				3459	void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
				3460	DCHECK_NULL(continue_node_);
				3461	AddAlternative(alt);
				3462	continue_node_ = alt.node();
				3463	}
				3464
				3465
				3466	void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				3467	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				3468	if (trace->stop_node() == this) {
				3469	// Back edge of greedy optimized loop node graph.
				3470	int text_length =
				3471	GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
				3472	DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
				3473	// Update the counter-based backtracking info on the stack. This is an
				3474	// optimization for greedy loops (see below).
				3475	DCHECK(trace->cp_offset() == text_length);
				3476	macro_assembler->AdvanceCurrentPosition(text_length);
				3477	macro_assembler->GoTo(trace->loop_label());
				3478	return;
				3479	}
				3480	DCHECK_NULL(trace->stop_node());
				3481	if (!trace->is_trivial()) {
				3482	trace->Flush(compiler, this);
				3483	return;
				3484	}
				3485	ChoiceNode::Emit(compiler, trace);
				3486	}
				3487
				3488
				3489	int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
				3490	int eats_at_least) {
				3491	int preload_characters = Min(4, eats_at_least);
				3492	if (compiler->macro_assembler()->CanReadUnaligned()) {
				3493	bool one_byte = compiler->one_byte();
				3494	if (one_byte) {
				3495	if (preload_characters > 4) preload_characters = 4;
				3496	// We can't preload 3 characters because there is no machine instruction
				3497	// to do that. We can't just load 4 because we could be reading
				3498	// beyond the end of the string, which could cause a memory fault.
				3499	if (preload_characters == 3) preload_characters = 2;
				3500	} else {
				3501	if (preload_characters > 2) preload_characters = 2;
				3502	}
				3503	} else {
				3504	if (preload_characters > 1) preload_characters = 1;
				3505	}
				3506	return preload_characters;
				3507	}
				3508
				3509
				3510	// This class is used when generating the alternatives in a choice node. It
				3511	// records the way the alternative is being code generated.
				3512	class AlternativeGeneration: public Malloced {
				3513	public:
				3514	AlternativeGeneration()
				3515	: possible_success(),
				3516	expects_preload(false),
				3517	after(),
				3518	quick_check_details() { }
				3519	Label possible_success;
				3520	bool expects_preload;
				3521	Label after;
				3522	QuickCheckDetails quick_check_details;
				3523	};
				3524
				3525
				3526	// Creates a list of AlternativeGenerations. If the list has a reasonable
				3527	// size then it is on the stack, otherwise the excess is on the heap.
				3528	class AlternativeGenerationList {
				3529	public:
				3530	AlternativeGenerationList(int count, Zone* zone)
				3531	: alt_gens_(count, zone) {
				3532	for (int i = 0; i < count && i < kAFew; i++) {
				3533	alt_gens_.Add(a_few_alt_gens_ + i, zone);
				3534	}
				3535	for (int i = kAFew; i < count; i++) {
				3536	alt_gens_.Add(new AlternativeGeneration(), zone);
				3537	}
				3538	}
				3539	~AlternativeGenerationList() {
				3540	for (int i = kAFew; i < alt_gens_.length(); i++) {
				3541	delete alt_gens_[i];
				3542	alt_gens_[i] = NULL;
				3543	}
				3544	}
				3545
				3546	AlternativeGeneration* at(int i) {
				3547	return alt_gens_[i];
				3548	}
				3549
				3550	private:
				3551	static const int kAFew = 10;
				3552	ZoneList<AlternativeGeneration*> alt_gens_;
				3553	AlternativeGeneration a_few_alt_gens_[kAFew];
				3554	};
				3555
				3556
				3557	// The '2' variant is has inclusive from and exclusive to.
				3558	// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
				3559	// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
				3560	static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
				3561	0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
				3562	0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
				3563	0xFEFF, 0xFF00, 0x10000 };
				3564	static const int kSpaceRangeCount = arraysize(kSpaceRanges);
				3565
				3566	static const int kWordRanges[] = {
				3567	'0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
				3568	static const int kWordRangeCount = arraysize(kWordRanges);
				3569	static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
				3570	static const int kDigitRangeCount = arraysize(kDigitRanges);
				3571	static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
				3572	static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
				3573	static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
				3574	0x2028, 0x202A, 0x10000 };
				3575	static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
				3576
				3577
				3578	void BoyerMoorePositionInfo::Set(int character) {
				3579	SetInterval(Interval(character, character));
				3580	}
				3581
				3582
				3583	void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
				3584	s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
				3585	w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
				3586	d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
				3587	surrogate_ =
				3588	AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
				3589	if (interval.to() - interval.from() >= kMapSize - 1) {
				3590	if (map_count_ != kMapSize) {
				3591	map_count_ = kMapSize;
				3592	for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
				3593	}
				3594	return;
				3595	}
				3596	for (int i = interval.from(); i <= interval.to(); i++) {
				3597	int mod_character = (i & kMask);
				3598	if (!map_->at(mod_character)) {
				3599	map_count_++;
				3600	map_->at(mod_character) = true;
				3601	}
				3602	if (map_count_ == kMapSize) return;
				3603	}
				3604	}
				3605
				3606
				3607	void BoyerMoorePositionInfo::SetAll() {
				3608	s_ = w_ = d_ = kLatticeUnknown;
				3609	if (map_count_ != kMapSize) {
				3610	map_count_ = kMapSize;
				3611	for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
				3612	}
				3613	}
				3614
				3615
				3616	BoyerMooreLookahead::BoyerMooreLookahead(
				3617	int length, RegExpCompiler* compiler, Zone* zone)
				3618	: length_(length),
				3619	compiler_(compiler) {
				3620	if (compiler->one_byte()) {
				3621	max_char_ = String::kMaxOneByteCharCode;
				3622	} else {
				3623	max_char_ = String::kMaxUtf16CodeUnit;
				3624	}
				3625	bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
				3626	for (int i = 0; i < length; i++) {
				3627	bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);
				3628	}
				3629	}
				3630
				3631
				3632	// Find the longest range of lookahead that has the fewest number of different
				3633	// characters that can occur at a given position. Since we are optimizing two
				3634	// different parameters at once this is a tradeoff.
				3635	bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
				3636	int biggest_points = 0;
				3637	// If more than 32 characters out of 128 can occur it is unlikely that we can
				3638	// be lucky enough to step forwards much of the time.
				3639	const int kMaxMax = 32;
				3640	for (int max_number_of_chars = 4;
				3641	max_number_of_chars < kMaxMax;
				3642	max_number_of_chars *= 2) {
				3643	biggest_points =
				3644	FindBestInterval(max_number_of_chars, biggest_points, from, to);
				3645	}
				3646	if (biggest_points == 0) return false;
				3647	return true;
				3648	}
				3649
				3650
				3651	// Find the highest-points range between 0 and length_ where the character
				3652	// information is not too vague. 'Too vague' means that there are more than
				3653	// max_number_of_chars that can occur at this position. Calculates the number
				3654	// of points as the product of width-of-the-range and
				3655	// probability-of-finding-one-of-the-characters, where the probability is
				3656	// calculated using the frequency distribution of the sample subject string.
				3657	int BoyerMooreLookahead::FindBestInterval(
				3658	int max_number_of_chars, int old_biggest_points, int* from, int* to) {
				3659	int biggest_points = old_biggest_points;
				3660	static const int kSize = RegExpMacroAssembler::kTableSize;
				3661	for (int i = 0; i < length_; ) {
				3662	while (i < length_ && Count(i) > max_number_of_chars) i++;
				3663	if (i == length_) break;
				3664	int remembered_from = i;
				3665	bool union_map[kSize];
				3666	for (int j = 0; j < kSize; j++) union_map[j] = false;
				3667	while (i < length_ && Count(i) <= max_number_of_chars) {
				3668	BoyerMoorePositionInfo* map = bitmaps_->at(i);
				3669	for (int j = 0; j < kSize; j++) union_map[j] \|= map->at(j);
				3670	i++;
				3671	}
				3672	int frequency = 0;
				3673	for (int j = 0; j < kSize; j++) {
				3674	if (union_map[j]) {
				3675	// Add 1 to the frequency to give a small per-character boost for
				3676	// the cases where our sampling is not good enough and many
				3677	// characters have a frequency of zero. This means the frequency
				3678	// can theoretically be up to 2*kSize though we treat it mostly as
				3679	// a fraction of kSize.
				3680	frequency += compiler_->frequency_collator()->Frequency(j) + 1;
				3681	}
				3682	}
				3683	// We use the probability of skipping times the distance we are skipping to
				3684	// judge the effectiveness of this. Actually we have a cut-off: By
				3685	// dividing by 2 we switch off the skipping if the probability of skipping
				3686	// is less than 50%. This is because the multibyte mask-and-compare
				3687	// skipping in quickcheck is more likely to do well on this case.
				3688	bool in_quickcheck_range =
				3689	((i - remembered_from < 4) \|\|
				3690	(compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
				3691	// Called 'probability' but it is only a rough estimate and can actually
				3692	// be outside the 0-kSize range.
				3693	int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
				3694	int points = (i - remembered_from) * probability;
				3695	if (points > biggest_points) {
				3696	*from = remembered_from;
				3697	*to = i - 1;
				3698	biggest_points = points;
				3699	}
				3700	}
				3701	return biggest_points;
				3702	}
				3703
				3704
				3705	// Take all the characters that will not prevent a successful match if they
				3706	// occur in the subject string in the range between min_lookahead and
				3707	// max_lookahead (inclusive) measured from the current position. If the
				3708	// character at max_lookahead offset is not one of these characters, then we
				3709	// can safely skip forwards by the number of characters in the range.
				3710	int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
				3711	int max_lookahead,
				3712	Handle<ByteArray> boolean_skip_table) {
				3713	const int kSize = RegExpMacroAssembler::kTableSize;
				3714
				3715	const int kSkipArrayEntry = 0;
				3716	const int kDontSkipArrayEntry = 1;
				3717
				3718	for (int i = 0; i < kSize; i++) {
				3719	boolean_skip_table->set(i, kSkipArrayEntry);
				3720	}
				3721	int skip = max_lookahead + 1 - min_lookahead;
				3722
				3723	for (int i = max_lookahead; i >= min_lookahead; i--) {
				3724	BoyerMoorePositionInfo* map = bitmaps_->at(i);
				3725	for (int j = 0; j < kSize; j++) {
				3726	if (map->at(j)) {
				3727	boolean_skip_table->set(j, kDontSkipArrayEntry);
				3728	}
				3729	}
				3730	}
				3731
				3732	return skip;
				3733	}
				3734
				3735
				3736	// See comment above on the implementation of GetSkipTable.
				3737	void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
				3738	const int kSize = RegExpMacroAssembler::kTableSize;
				3739
				3740	int min_lookahead = 0;
				3741	int max_lookahead = 0;
				3742
				3743	if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
				3744
				3745	bool found_single_character = false;
				3746	int single_character = 0;
				3747	for (int i = max_lookahead; i >= min_lookahead; i--) {
				3748	BoyerMoorePositionInfo* map = bitmaps_->at(i);
				3749	if (map->map_count() > 1 \|\|
				3750	(found_single_character && map->map_count() != 0)) {
				3751	found_single_character = false;
				3752	break;
				3753	}
				3754	for (int j = 0; j < kSize; j++) {
				3755	if (map->at(j)) {
				3756	found_single_character = true;
				3757	single_character = j;
				3758	break;
				3759	}
				3760	}
				3761	}
				3762
				3763	int lookahead_width = max_lookahead + 1 - min_lookahead;
				3764
				3765	if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
				3766	// The mask-compare can probably handle this better.
				3767	return;
				3768	}
				3769
				3770	if (found_single_character) {
				3771	Label cont, again;
				3772	masm->Bind(&again);
				3773	masm->LoadCurrentCharacter(max_lookahead, &cont, true);
				3774	if (max_char_ > kSize) {
				3775	masm->CheckCharacterAfterAnd(single_character,
				3776	RegExpMacroAssembler::kTableMask,
				3777	&cont);
				3778	} else {
				3779	masm->CheckCharacter(single_character, &cont);
				3780	}
				3781	masm->AdvanceCurrentPosition(lookahead_width);
				3782	masm->GoTo(&again);
				3783	masm->Bind(&cont);
				3784	return;
				3785	}
				3786
				3787	Factory* factory = masm->isolate()->factory();
				3788	Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED);
				3789	int skip_distance = GetSkipTable(
				3790	min_lookahead, max_lookahead, boolean_skip_table);
				3791	DCHECK(skip_distance != 0);
				3792
				3793	Label cont, again;
				3794	masm->Bind(&again);
				3795	masm->LoadCurrentCharacter(max_lookahead, &cont, true);
				3796	masm->CheckBitInTable(boolean_skip_table, &cont);
				3797	masm->AdvanceCurrentPosition(skip_distance);
				3798	masm->GoTo(&again);
				3799	masm->Bind(&cont);
				3800	}
				3801
				3802
				3803	/* Code generation for choice nodes.
				3804	*
				3805	* We generate quick checks that do a mask and compare to eliminate a
				3806	* choice. If the quick check succeeds then it jumps to the continuation to
				3807	* do slow checks and check subsequent nodes. If it fails (the common case)
				3808	* it falls through to the next choice.
				3809	*
				3810	* Here is the desired flow graph. Nodes directly below each other imply
				3811	* fallthrough. Alternatives 1 and 2 have quick checks. Alternative
				3812	* 3 doesn't have a quick check so we have to call the slow check.
				3813	* Nodes are marked Qn for quick checks and Sn for slow checks. The entire
				3814	* regexp continuation is generated directly after the Sn node, up to the
				3815	* next GoTo if we decide to reuse some already generated code. Some
				3816	* nodes expect preload_characters to be preloaded into the current
				3817	* character register. R nodes do this preloading. Vertices are marked
				3818	* F for failures and S for success (possible success in the case of quick
				3819	* nodes). L, V, < and > are used as arrow heads.
				3820	*
				3821	* ----------> R
				3822	* \|
				3823	* V
				3824	* Q1 -----> S1
				3825	* \| S /
				3826	* F\| /
				3827	* \| F/
				3828	* \| /
				3829	* \| R
				3830	* \| /
				3831	* V L
				3832	* Q2 -----> S2
				3833	* \| S /
				3834	* F\| /
				3835	* \| F/
				3836	* \| /
				3837	* \| R
				3838	* \| /
				3839	* V L
				3840	* S3
				3841	* \|
				3842	* F\|
				3843	* \|
				3844	* R
				3845	* \|
				3846	* backtrack V
				3847	* <----------Q4
				3848	* \ F \|
				3849	* \ \|S
				3850	* \ F V
				3851	* \-----S4
				3852	*
				3853	* For greedy loops we push the current position, then generate the code that
				3854	* eats the input specially in EmitGreedyLoop. The other choice (the
				3855	* continuation) is generated by the normal code in EmitChoices, and steps back
				3856	* in the input to the starting position when it fails to match. The loop code
				3857	* looks like this (U is the unwind code that steps back in the greedy loop).
				3858	*
				3859	* _____
				3860	* / \
				3861	* V \|
				3862	* ----------> S1 \|
				3863	* /\| \|
				3864	* / \|S \|
				3865	* F/ \_____/
				3866	* /
				3867	* \|<-----
				3868	* \| \
				3869	* V \|S
				3870	* Q2 ---> U----->backtrack
				3871	* \| F /
				3872	* S\| /
				3873	* V F /
				3874	* S2--/
				3875	*/
				3876
				3877	GreedyLoopState::GreedyLoopState(bool not_at_start) {
				3878	counter_backtrack_trace_.set_backtrack(&label_);
				3879	if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
				3880	}
				3881
				3882
				3883	void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
				3884	#ifdef DEBUG
				3885	int choice_count = alternatives_->length();
				3886	for (int i = 0; i < choice_count - 1; i++) {
				3887	GuardedAlternative alternative = alternatives_->at(i);
				3888	ZoneList<Guard> guards = alternative.guards();
				3889	int guard_count = (guards == NULL) ? 0 : guards->length();
				3890	for (int j = 0; j < guard_count; j++) {
				3891	DCHECK(!trace->mentions_reg(guards->at(j)->reg()));
				3892	}
				3893	}
				3894	#endif
				3895	}
				3896
				3897
				3898	void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
				3899	Trace* current_trace,
				3900	PreloadState* state) {
				3901	if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
				3902	// Save some time by looking at most one machine word ahead.
				3903	state->eats_at_least_ =
				3904	EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
				3905	current_trace->at_start() == Trace::FALSE_VALUE);
				3906	}
				3907	state->preload_characters_ =
				3908	CalculatePreloadCharacters(compiler, state->eats_at_least_);
				3909
				3910	state->preload_is_current_ =
				3911	(current_trace->characters_preloaded() == state->preload_characters_);
				3912	state->preload_has_checked_bounds_ = state->preload_is_current_;
				3913	}
				3914
				3915
				3916	void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				3917	int choice_count = alternatives_->length();
				3918
				3919	AssertGuardsMentionRegisters(trace);
				3920
				3921	LimitResult limit_result = LimitVersions(compiler, trace);
				3922	if (limit_result == DONE) return;
				3923	DCHECK(limit_result == CONTINUE);
				3924
				3925	// For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
				3926	// other choice nodes we only flush if we are out of code size budget.
				3927	if (trace->flush_budget() == 0 && trace->actions() != NULL) {
				3928	trace->Flush(compiler, this);
				3929	return;
				3930	}
				3931
				3932	RecursionCheck rc(compiler);
				3933
				3934	PreloadState preload;
				3935	preload.init();
				3936	GreedyLoopState greedy_loop_state(not_at_start());
				3937
				3938	int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
				3939	AlternativeGenerationList alt_gens(choice_count, zone());
				3940
				3941	if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
				3942	trace = EmitGreedyLoop(compiler,
				3943	trace,
				3944	&alt_gens,
				3945	&preload,
				3946	&greedy_loop_state,
				3947	text_length);
				3948	} else {
				3949	// TODO(erikcorry): Delete this. We don't need this label, but it makes us
				3950	// match the traces produced pre-cleanup.
				3951	Label second_choice;
				3952	compiler->macro_assembler()->Bind(&second_choice);
				3953
				3954	preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
				3955
				3956	EmitChoices(compiler,
				3957	&alt_gens,
				3958	0,
				3959	trace,
				3960	&preload);
				3961	}
				3962
				3963	// At this point we need to generate slow checks for the alternatives where
				3964	// the quick check was inlined. We can recognize these because the associated
				3965	// label was bound.
				3966	int new_flush_budget = trace->flush_budget() / choice_count;
				3967	for (int i = 0; i < choice_count; i++) {
				3968	AlternativeGeneration* alt_gen = alt_gens.at(i);
				3969	Trace new_trace(*trace);
				3970	// If there are actions to be flushed we have to limit how many times
				3971	// they are flushed. Take the budget of the parent trace and distribute
				3972	// it fairly amongst the children.
				3973	if (new_trace.actions() != NULL) {
				3974	new_trace.set_flush_budget(new_flush_budget);
				3975	}
				3976	bool next_expects_preload =
				3977	i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
				3978	EmitOutOfLineContinuation(compiler,
				3979	&new_trace,
				3980	alternatives_->at(i),
				3981	alt_gen,
				3982	preload.preload_characters_,
				3983	next_expects_preload);
				3984	}
				3985	}
				3986
				3987
				3988	Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
				3989	Trace* trace,
				3990	AlternativeGenerationList* alt_gens,
				3991	PreloadState* preload,
				3992	GreedyLoopState* greedy_loop_state,
				3993	int text_length) {
				3994	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				3995	// Here we have special handling for greedy loops containing only text nodes
				3996	// and other simple nodes. These are handled by pushing the current
				3997	// position on the stack and then incrementing the current position each
				3998	// time around the switch. On backtrack we decrement the current position
				3999	// and check it against the pushed value. This avoids pushing backtrack
				4000	// information for each iteration of the loop, which could take up a lot of
				4001	// space.
				4002	DCHECK(trace->stop_node() == NULL);
				4003	macro_assembler->PushCurrentPosition();
				4004	Label greedy_match_failed;
				4005	Trace greedy_match_trace;
				4006	if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
				4007	greedy_match_trace.set_backtrack(&greedy_match_failed);
				4008	Label loop_label;
				4009	macro_assembler->Bind(&loop_label);
				4010	greedy_match_trace.set_stop_node(this);
				4011	greedy_match_trace.set_loop_label(&loop_label);
				4012	alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
				4013	macro_assembler->Bind(&greedy_match_failed);
				4014
				4015	Label second_choice; // For use in greedy matches.
				4016	macro_assembler->Bind(&second_choice);
				4017
				4018	Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
				4019
				4020	EmitChoices(compiler,
				4021	alt_gens,
				4022	1,
				4023	new_trace,
				4024	preload);
				4025
				4026	macro_assembler->Bind(greedy_loop_state->label());
				4027	// If we have unwound to the bottom then backtrack.
				4028	macro_assembler->CheckGreedyLoop(trace->backtrack());
				4029	// Otherwise try the second priority at an earlier position.
				4030	macro_assembler->AdvanceCurrentPosition(-text_length);
				4031	macro_assembler->GoTo(&second_choice);
				4032	return new_trace;
				4033	}
				4034
				4035	int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
				4036	Trace* trace) {
				4037	int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
				4038	if (alternatives_->length() != 2) return eats_at_least;
				4039
				4040	GuardedAlternative alt1 = alternatives_->at(1);
				4041	if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
				4042	return eats_at_least;
				4043	}
				4044	RegExpNode* eats_anything_node = alt1.node();
				4045	if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
				4046	return eats_at_least;
				4047	}
				4048
				4049	// Really we should be creating a new trace when we execute this function,
				4050	// but there is no need, because the code it generates cannot backtrack, and
				4051	// we always arrive here with a trivial trace (since it's the entry to a
				4052	// loop. That also implies that there are no preloaded characters, which is
				4053	// good, because it means we won't be violating any assumptions by
				4054	// overwriting those characters with new load instructions.
				4055	DCHECK(trace->is_trivial());
				4056
				4057	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				4058	Isolate* isolate = macro_assembler->isolate();
				4059	// At this point we know that we are at a non-greedy loop that will eat
				4060	// any character one at a time. Any non-anchored regexp has such a
				4061	// loop prepended to it in order to find where it starts. We look for
				4062	// a pattern of the form ...abc... where we can look 6 characters ahead
				4063	// and step forwards 3 if the character is not one of abc. Abc need
				4064	// not be atoms, they can be any reasonably limited character class or
				4065	// small alternation.
				4066	BoyerMooreLookahead* bm = bm_info(false);
				4067	if (bm == NULL) {
				4068	eats_at_least = Min(kMaxLookaheadForBoyerMoore,
				4069	EatsAtLeast(kMaxLookaheadForBoyerMoore,
				4070	kRecursionBudget,
				4071	false));
				4072	if (eats_at_least >= 1) {
				4073	bm = new(zone()) BoyerMooreLookahead(eats_at_least,
				4074	compiler,
				4075	zone());
				4076	GuardedAlternative alt0 = alternatives_->at(0);
				4077	alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
				4078	}
				4079	}
				4080	if (bm != NULL) {
				4081	bm->EmitSkipInstructions(macro_assembler);
				4082	}
				4083	return eats_at_least;
				4084	}
				4085
				4086
				4087	void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
				4088	AlternativeGenerationList* alt_gens,
				4089	int first_choice,
				4090	Trace* trace,
				4091	PreloadState* preload) {
				4092	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				4093	SetUpPreLoad(compiler, trace, preload);
				4094
				4095	// For now we just call all choices one after the other. The idea ultimately
				4096	// is to use the Dispatch table to try only the relevant ones.
				4097	int choice_count = alternatives_->length();
				4098
				4099	int new_flush_budget = trace->flush_budget() / choice_count;
				4100
				4101	for (int i = first_choice; i < choice_count; i++) {
				4102	bool is_last = i == choice_count - 1;
				4103	bool fall_through_on_failure = !is_last;
				4104	GuardedAlternative alternative = alternatives_->at(i);
				4105	AlternativeGeneration* alt_gen = alt_gens->at(i);
				4106	alt_gen->quick_check_details.set_characters(preload->preload_characters_);
				4107	ZoneList<Guard> guards = alternative.guards();
				4108	int guard_count = (guards == NULL) ? 0 : guards->length();
				4109	Trace new_trace(*trace);
				4110	new_trace.set_characters_preloaded(preload->preload_is_current_ ?
				4111	preload->preload_characters_ :
				4112	0);
				4113	if (preload->preload_has_checked_bounds_) {
				4114	new_trace.set_bound_checked_up_to(preload->preload_characters_);
				4115	}
				4116	new_trace.quick_check_performed()->Clear();
				4117	if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
				4118	if (!is_last) {
				4119	new_trace.set_backtrack(&alt_gen->after);
				4120	}
				4121	alt_gen->expects_preload = preload->preload_is_current_;
				4122	bool generate_full_check_inline = false;
				4123	if (compiler->optimize() &&
				4124	try_to_emit_quick_check_for_alternative(i == 0) &&
				4125	alternative.node()->EmitQuickCheck(
				4126	compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
				4127	&alt_gen->possible_success, &alt_gen->quick_check_details,
				4128	fall_through_on_failure)) {
				4129	// Quick check was generated for this choice.
				4130	preload->preload_is_current_ = true;
				4131	preload->preload_has_checked_bounds_ = true;
				4132	// If we generated the quick check to fall through on possible success,
				4133	// we now need to generate the full check inline.
				4134	if (!fall_through_on_failure) {
				4135	macro_assembler->Bind(&alt_gen->possible_success);
				4136	new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
				4137	new_trace.set_characters_preloaded(preload->preload_characters_);
				4138	new_trace.set_bound_checked_up_to(preload->preload_characters_);
				4139	generate_full_check_inline = true;
				4140	}
				4141	} else if (alt_gen->quick_check_details.cannot_match()) {
				4142	if (!fall_through_on_failure) {
				4143	macro_assembler->GoTo(trace->backtrack());
				4144	}
				4145	continue;
				4146	} else {
				4147	// No quick check was generated. Put the full code here.
				4148	// If this is not the first choice then there could be slow checks from
				4149	// previous cases that go here when they fail. There's no reason to
				4150	// insist that they preload characters since the slow check we are about
				4151	// to generate probably can't use it.
				4152	if (i != first_choice) {
				4153	alt_gen->expects_preload = false;
				4154	new_trace.InvalidateCurrentCharacter();
				4155	}
				4156	generate_full_check_inline = true;
				4157	}
				4158	if (generate_full_check_inline) {
				4159	if (new_trace.actions() != NULL) {
				4160	new_trace.set_flush_budget(new_flush_budget);
				4161	}
				4162	for (int j = 0; j < guard_count; j++) {
				4163	GenerateGuard(macro_assembler, guards->at(j), &new_trace);
				4164	}
				4165	alternative.node()->Emit(compiler, &new_trace);
				4166	preload->preload_is_current_ = false;
				4167	}
				4168	macro_assembler->Bind(&alt_gen->after);
				4169	}
				4170	}
				4171
				4172
				4173	void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
				4174	Trace* trace,
				4175	GuardedAlternative alternative,
				4176	AlternativeGeneration* alt_gen,
				4177	int preload_characters,
				4178	bool next_expects_preload) {
				4179	if (!alt_gen->possible_success.is_linked()) return;
				4180
				4181	RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
				4182	macro_assembler->Bind(&alt_gen->possible_success);
				4183	Trace out_of_line_trace(*trace);
				4184	out_of_line_trace.set_characters_preloaded(preload_characters);
				4185	out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
				4186	if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
				4187	ZoneList<Guard> guards = alternative.guards();
				4188	int guard_count = (guards == NULL) ? 0 : guards->length();
				4189	if (next_expects_preload) {
				4190	Label reload_current_char;
				4191	out_of_line_trace.set_backtrack(&reload_current_char);
				4192	for (int j = 0; j < guard_count; j++) {
				4193	GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
				4194	}
				4195	alternative.node()->Emit(compiler, &out_of_line_trace);
				4196	macro_assembler->Bind(&reload_current_char);
				4197	// Reload the current character, since the next quick check expects that.
				4198	// We don't need to check bounds here because we only get into this
				4199	// code through a quick check which already did the checked load.
				4200	macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
				4201	NULL,
				4202	false,
				4203	preload_characters);
				4204	macro_assembler->GoTo(&(alt_gen->after));
				4205	} else {
				4206	out_of_line_trace.set_backtrack(&(alt_gen->after));
				4207	for (int j = 0; j < guard_count; j++) {
				4208	GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
				4209	}
				4210	alternative.node()->Emit(compiler, &out_of_line_trace);
				4211	}
				4212	}
				4213
				4214
				4215	void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				4216	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				4217	LimitResult limit_result = LimitVersions(compiler, trace);
				4218	if (limit_result == DONE) return;
				4219	DCHECK(limit_result == CONTINUE);
				4220
				4221	RecursionCheck rc(compiler);
				4222
				4223	switch (action_type_) {
				4224	case STORE_POSITION: {
				4225	Trace::DeferredCapture
				4226	new_capture(data_.u_position_register.reg,
				4227	data_.u_position_register.is_capture,
				4228	trace);
				4229	Trace new_trace = *trace;
				4230	new_trace.add_action(&new_capture);
				4231	on_success()->Emit(compiler, &new_trace);
				4232	break;
				4233	}
				4234	case INCREMENT_REGISTER: {
				4235	Trace::DeferredIncrementRegister
				4236	new_increment(data_.u_increment_register.reg);
				4237	Trace new_trace = *trace;
				4238	new_trace.add_action(&new_increment);
				4239	on_success()->Emit(compiler, &new_trace);
				4240	break;
				4241	}
				4242	case SET_REGISTER: {
				4243	Trace::DeferredSetRegister
				4244	new_set(data_.u_store_register.reg, data_.u_store_register.value);
				4245	Trace new_trace = *trace;
				4246	new_trace.add_action(&new_set);
				4247	on_success()->Emit(compiler, &new_trace);
				4248	break;
				4249	}
				4250	case CLEAR_CAPTURES: {
				4251	Trace::DeferredClearCaptures
				4252	new_capture(Interval(data_.u_clear_captures.range_from,
				4253	data_.u_clear_captures.range_to));
				4254	Trace new_trace = *trace;
				4255	new_trace.add_action(&new_capture);
				4256	on_success()->Emit(compiler, &new_trace);
				4257	break;
				4258	}
				4259	case BEGIN_SUBMATCH:
				4260	if (!trace->is_trivial()) {
				4261	trace->Flush(compiler, this);
				4262	} else {
				4263	assembler->WriteCurrentPositionToRegister(
				4264	data_.u_submatch.current_position_register, 0);
				4265	assembler->WriteStackPointerToRegister(
				4266	data_.u_submatch.stack_pointer_register);
				4267	on_success()->Emit(compiler, trace);
				4268	}
				4269	break;
				4270	case EMPTY_MATCH_CHECK: {
				4271	int start_pos_reg = data_.u_empty_match_check.start_register;
				4272	int stored_pos = 0;
				4273	int rep_reg = data_.u_empty_match_check.repetition_register;
				4274	bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
				4275	bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
				4276	if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
				4277	// If we know we haven't advanced and there is no minimum we
				4278	// can just backtrack immediately.
				4279	assembler->GoTo(trace->backtrack());
				4280	} else if (know_dist && stored_pos < trace->cp_offset()) {
				4281	// If we know we've advanced we can generate the continuation
				4282	// immediately.
				4283	on_success()->Emit(compiler, trace);
				4284	} else if (!trace->is_trivial()) {
				4285	trace->Flush(compiler, this);
				4286	} else {
				4287	Label skip_empty_check;
				4288	// If we have a minimum number of repetitions we check the current
				4289	// number first and skip the empty check if it's not enough.
				4290	if (has_minimum) {
				4291	int limit = data_.u_empty_match_check.repetition_limit;
				4292	assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
				4293	}
				4294	// If the match is empty we bail out, otherwise we fall through
				4295	// to the on-success continuation.
				4296	assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
				4297	trace->backtrack());
				4298	assembler->Bind(&skip_empty_check);
				4299	on_success()->Emit(compiler, trace);
				4300	}
				4301	break;
				4302	}
				4303	case POSITIVE_SUBMATCH_SUCCESS: {
				4304	if (!trace->is_trivial()) {
				4305	trace->Flush(compiler, this);
				4306	return;
				4307	}
				4308	assembler->ReadCurrentPositionFromRegister(
				4309	data_.u_submatch.current_position_register);
				4310	assembler->ReadStackPointerFromRegister(
				4311	data_.u_submatch.stack_pointer_register);
				4312	int clear_register_count = data_.u_submatch.clear_register_count;
				4313	if (clear_register_count == 0) {
				4314	on_success()->Emit(compiler, trace);
				4315	return;
				4316	}
				4317	int clear_registers_from = data_.u_submatch.clear_register_from;
				4318	Label clear_registers_backtrack;
				4319	Trace new_trace = *trace;
				4320	new_trace.set_backtrack(&clear_registers_backtrack);
				4321	on_success()->Emit(compiler, &new_trace);
				4322
				4323	assembler->Bind(&clear_registers_backtrack);
				4324	int clear_registers_to = clear_registers_from + clear_register_count - 1;
				4325	assembler->ClearRegisters(clear_registers_from, clear_registers_to);
				4326
				4327	DCHECK(trace->backtrack() == NULL);
				4328	assembler->Backtrack();
				4329	return;
				4330	}
				4331	default:
				4332	UNREACHABLE();
				4333	}
				4334	}
				4335
				4336
				4337	void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
				4338	RegExpMacroAssembler* assembler = compiler->macro_assembler();
				4339	if (!trace->is_trivial()) {
				4340	trace->Flush(compiler, this);
				4341	return;
				4342	}
				4343
				4344	LimitResult limit_result = LimitVersions(compiler, trace);
				4345	if (limit_result == DONE) return;
				4346	DCHECK(limit_result == CONTINUE);
				4347
				4348	RecursionCheck rc(compiler);
				4349
				4350	DCHECK_EQ(start_reg_ + 1, end_reg_);
				4351	if (compiler->ignore_case()) {
				4352	assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
				4353	trace->backtrack());
				4354	} else {
				4355	assembler->CheckNotBackReference(start_reg_, read_backward(),
				4356	trace->backtrack());
				4357	}
				4358	// We are going to advance backward, so we may end up at the start.
				4359	if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
				4360	on_success()->Emit(compiler, trace);
				4361	}
				4362
				4363
				4364	// -------------------------------------------------------------------
				4365	// Dot/dotty output
				4366
				4367
				4368	#ifdef DEBUG
				4369
				4370
				4371	class DotPrinter: public NodeVisitor {
				4372	public:
				4373	DotPrinter(std::ostream& os, bool ignore_case) // NOLINT
				4374	: os_(os),
				4375	ignore_case_(ignore_case) {}
				4376	void PrintNode(const char* label, RegExpNode* node);
				4377	void Visit(RegExpNode* node);
				4378	void PrintAttributes(RegExpNode* from);
				4379	void PrintOnFailure(RegExpNode* from, RegExpNode* to);
				4380	#define DECLARE_VISIT(Type) \
				4381	virtual void Visit##Type(Type##Node* that);
				4382	FOR_EACH_NODE_TYPE(DECLARE_VISIT)
				4383	#undef DECLARE_VISIT
				4384	private:
				4385	std::ostream& os_;
				4386	bool ignore_case_;
				4387	};
				4388
				4389
				4390	void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
				4391	os_ << "digraph G {\n graph [label=\"";
				4392	for (int i = 0; label[i]; i++) {
				4393	switch (label[i]) {
				4394	case '\\':
				4395	os_ << "\\\\";
				4396	break;
				4397	case '"':
				4398	os_ << "\"";
				4399	break;
				4400	default:
				4401	os_ << label[i];
				4402	break;
				4403	}
				4404	}
				4405	os_ << "\"];\n";
				4406	Visit(node);
				4407	os_ << "}" << std::endl;
				4408	}
				4409
				4410
				4411	void DotPrinter::Visit(RegExpNode* node) {
				4412	if (node->info()->visited) return;
				4413	node->info()->visited = true;
				4414	node->Accept(this);
				4415	}
				4416
				4417
				4418	void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
				4419	os_ << " n" << from << " -> n" << on_failure << " [style=dotted];\n";
				4420	Visit(on_failure);
				4421	}
				4422
				4423
				4424	class TableEntryBodyPrinter {
				4425	public:
				4426	TableEntryBodyPrinter(std::ostream& os, ChoiceNode* choice) // NOLINT
				4427	: os_(os),
				4428	choice_(choice) {}
				4429	void Call(uc16 from, DispatchTable::Entry entry) {
				4430	OutSet* out_set = entry.out_set();
				4431	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				4432	if (out_set->Get(i)) {
				4433	os_ << " n" << choice() << ":s" << from << "o" << i << " -> n"
				4434	<< choice()->alternatives()->at(i).node() << ";\n";
				4435	}
				4436	}
				4437	}
				4438	private:
				4439	ChoiceNode* choice() { return choice_; }
				4440	std::ostream& os_;
				4441	ChoiceNode* choice_;
				4442	};
				4443
				4444
				4445	class TableEntryHeaderPrinter {
				4446	public:
				4447	explicit TableEntryHeaderPrinter(std::ostream& os) // NOLINT
				4448	: first_(true),
				4449	os_(os) {}
				4450	void Call(uc16 from, DispatchTable::Entry entry) {
				4451	if (first_) {
				4452	first_ = false;
				4453	} else {
				4454	os_ << "\|";
				4455	}
				4456	os_ << "{\\" << AsUC16(from) << "-\\" << AsUC16(entry.to()) << "\|{";
				4457	OutSet* out_set = entry.out_set();
				4458	int priority = 0;
				4459	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				4460	if (out_set->Get(i)) {
				4461	if (priority > 0) os_ << "\|";
				4462	os_ << "<s" << from << "o" << i << "> " << priority;
				4463	priority++;
				4464	}
				4465	}
				4466	os_ << "}}";
				4467	}
				4468
				4469	private:
				4470	bool first_;
				4471	std::ostream& os_;
				4472	};
				4473
				4474
				4475	class AttributePrinter {
				4476	public:
				4477	explicit AttributePrinter(std::ostream& os) // NOLINT
				4478	: os_(os),
				4479	first_(true) {}
				4480	void PrintSeparator() {
				4481	if (first_) {
				4482	first_ = false;
				4483	} else {
				4484	os_ << "\|";
				4485	}
				4486	}
				4487	void PrintBit(const char* name, bool value) {
				4488	if (!value) return;
				4489	PrintSeparator();
				4490	os_ << "{" << name << "}";
				4491	}
				4492	void PrintPositive(const char* name, int value) {
				4493	if (value < 0) return;
				4494	PrintSeparator();
				4495	os_ << "{" << name << "\|" << value << "}";
				4496	}
				4497
				4498	private:
				4499	std::ostream& os_;
				4500	bool first_;
				4501	};
				4502
				4503
				4504	void DotPrinter::PrintAttributes(RegExpNode* that) {
				4505	os_ << " a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, "
				4506	<< "margin=0.1, fontsize=10, label=\"{";
				4507	AttributePrinter printer(os_);
				4508	NodeInfo* info = that->info();
				4509	printer.PrintBit("NI", info->follows_newline_interest);
				4510	printer.PrintBit("WI", info->follows_word_interest);
				4511	printer.PrintBit("SI", info->follows_start_interest);
				4512	Label* label = that->label();
				4513	if (label->is_bound())
				4514	printer.PrintPositive("@", label->pos());
				4515	os_ << "}\"];\n"
				4516	<< " a" << that << " -> n" << that
				4517	<< " [style=dashed, color=grey, arrowhead=none];\n";
				4518	}
				4519
				4520
				4521	static const bool kPrintDispatchTable = false;
				4522	void DotPrinter::VisitChoice(ChoiceNode* that) {
				4523	if (kPrintDispatchTable) {
				4524	os_ << " n" << that << " [shape=Mrecord, label=\"";
				4525	TableEntryHeaderPrinter header_printer(os_);
				4526	that->GetTable(ignore_case_)->ForEach(&header_printer);
				4527	os_ << "\"]\n";
				4528	PrintAttributes(that);
				4529	TableEntryBodyPrinter body_printer(os_, that);
				4530	that->GetTable(ignore_case_)->ForEach(&body_printer);
				4531	} else {
				4532	os_ << " n" << that << " [shape=Mrecord, label=\"?\"];\n";
				4533	for (int i = 0; i < that->alternatives()->length(); i++) {
				4534	GuardedAlternative alt = that->alternatives()->at(i);
				4535	os_ << " n" << that << " -> n" << alt.node();
				4536	}
				4537	}
				4538	for (int i = 0; i < that->alternatives()->length(); i++) {
				4539	GuardedAlternative alt = that->alternatives()->at(i);
				4540	alt.node()->Accept(this);
				4541	}
				4542	}
				4543
				4544
				4545	void DotPrinter::VisitText(TextNode* that) {
				4546	Zone* zone = that->zone();
				4547	os_ << " n" << that << " [label=\"";
				4548	for (int i = 0; i < that->elements()->length(); i++) {
				4549	if (i > 0) os_ << " ";
				4550	TextElement elm = that->elements()->at(i);
				4551	switch (elm.text_type()) {
				4552	case TextElement::ATOM: {
				4553	Vector<const uc16> data = elm.atom()->data();
				4554	for (int i = 0; i < data.length(); i++) {
				4555	os_ << static_cast<char>(data[i]);
				4556	}
				4557	break;
				4558	}
				4559	case TextElement::CHAR_CLASS: {
				4560	RegExpCharacterClass* node = elm.char_class();
				4561	os_ << "[";
				4562	if (node->is_negated()) os_ << "^";
				4563	for (int j = 0; j < node->ranges(zone)->length(); j++) {
				4564	CharacterRange range = node->ranges(zone)->at(j);
				4565	os_ << AsUC16(range.from()) << "-" << AsUC16(range.to());
				4566	}
				4567	os_ << "]";
				4568	break;
				4569	}
				4570	default:
				4571	UNREACHABLE();
				4572	}
				4573	}
				4574	os_ << "\", shape=box, peripheries=2];\n";
				4575	PrintAttributes(that);
				4576	os_ << " n" << that << " -> n" << that->on_success() << ";\n";
				4577	Visit(that->on_success());
				4578	}
				4579
				4580
				4581	void DotPrinter::VisitBackReference(BackReferenceNode* that) {
				4582	os_ << " n" << that << " [label=\"$" << that->start_register() << "..$"
				4583	<< that->end_register() << "\", shape=doubleoctagon];\n";
				4584	PrintAttributes(that);
				4585	os_ << " n" << that << " -> n" << that->on_success() << ";\n";
				4586	Visit(that->on_success());
				4587	}
				4588
				4589
				4590	void DotPrinter::VisitEnd(EndNode* that) {
				4591	os_ << " n" << that << " [style=bold, shape=point];\n";
				4592	PrintAttributes(that);
				4593	}
				4594
				4595
				4596	void DotPrinter::VisitAssertion(AssertionNode* that) {
				4597	os_ << " n" << that << " [";
				4598	switch (that->assertion_type()) {
				4599	case AssertionNode::AT_END:
				4600	os_ << "label=\"$\", shape=septagon";
				4601	break;
				4602	case AssertionNode::AT_START:
				4603	os_ << "label=\"^\", shape=septagon";
				4604	break;
				4605	case AssertionNode::AT_BOUNDARY:
				4606	os_ << "label=\"\\b\", shape=septagon";
				4607	break;
				4608	case AssertionNode::AT_NON_BOUNDARY:
				4609	os_ << "label=\"\\B\", shape=septagon";
				4610	break;
				4611	case AssertionNode::AFTER_NEWLINE:
				4612	os_ << "label=\"(?<=\\n)\", shape=septagon";
				4613	break;
				4614	}
				4615	os_ << "];\n";
				4616	PrintAttributes(that);
				4617	RegExpNode* successor = that->on_success();
				4618	os_ << " n" << that << " -> n" << successor << ";\n";
				4619	Visit(successor);
				4620	}
				4621
				4622
				4623	void DotPrinter::VisitAction(ActionNode* that) {
				4624	os_ << " n" << that << " [";
				4625	switch (that->action_type_) {
				4626	case ActionNode::SET_REGISTER:
				4627	os_ << "label=\"$" << that->data_.u_store_register.reg
				4628	<< ":=" << that->data_.u_store_register.value << "\", shape=octagon";
				4629	break;
				4630	case ActionNode::INCREMENT_REGISTER:
				4631	os_ << "label=\"$" << that->data_.u_increment_register.reg
				4632	<< "++\", shape=octagon";
				4633	break;
				4634	case ActionNode::STORE_POSITION:
				4635	os_ << "label=\"$" << that->data_.u_position_register.reg
				4636	<< ":=$pos\", shape=octagon";
				4637	break;
				4638	case ActionNode::BEGIN_SUBMATCH:
				4639	os_ << "label=\"$" << that->data_.u_submatch.current_position_register
				4640	<< ":=$pos,begin\", shape=septagon";
				4641	break;
				4642	case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
				4643	os_ << "label=\"escape\", shape=septagon";
				4644	break;
				4645	case ActionNode::EMPTY_MATCH_CHECK:
				4646	os_ << "label=\"$" << that->data_.u_empty_match_check.start_register
				4647	<< "=$pos?,$" << that->data_.u_empty_match_check.repetition_register
				4648	<< "<" << that->data_.u_empty_match_check.repetition_limit
				4649	<< "?\", shape=septagon";
				4650	break;
				4651	case ActionNode::CLEAR_CAPTURES: {
				4652	os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from
				4653	<< " to $" << that->data_.u_clear_captures.range_to
				4654	<< "\", shape=septagon";
				4655	break;
				4656	}
				4657	}
				4658	os_ << "];\n";
				4659	PrintAttributes(that);
				4660	RegExpNode* successor = that->on_success();
				4661	os_ << " n" << that << " -> n" << successor << ";\n";
				4662	Visit(successor);
				4663	}
				4664
				4665
				4666	class DispatchTableDumper {
				4667	public:
				4668	explicit DispatchTableDumper(std::ostream& os) : os_(os) {}
				4669	void Call(uc16 key, DispatchTable::Entry entry);
				4670	private:
				4671	std::ostream& os_;
				4672	};
				4673
				4674
				4675	void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
				4676	os_ << "[" << AsUC16(key) << "-" << AsUC16(entry.to()) << "]: {";
				4677	OutSet* set = entry.out_set();
				4678	bool first = true;
				4679	for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
				4680	if (set->Get(i)) {
				4681	if (first) {
				4682	first = false;
				4683	} else {
				4684	os_ << ", ";
				4685	}
				4686	os_ << i;
				4687	}
				4688	}
				4689	os_ << "}\n";
				4690	}
				4691
				4692
				4693	void DispatchTable::Dump() {
				4694	OFStream os(stderr);
				4695	DispatchTableDumper dumper(os);
				4696	tree()->ForEach(&dumper);
				4697	}
				4698
				4699
				4700	void RegExpEngine::DotPrint(const char* label,
				4701	RegExpNode* node,
				4702	bool ignore_case) {
				4703	OFStream os(stdout);
				4704	DotPrinter printer(os, ignore_case);
				4705	printer.PrintNode(label, node);
				4706	}
				4707
				4708
				4709	#endif // DEBUG
				4710
				4711
				4712	// -------------------------------------------------------------------
				4713	// Tree to graph conversion
				4714
				4715	RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
				4716	RegExpNode* on_success) {
				4717	ZoneList<TextElement>* elms =
				4718	new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
				4719	elms->Add(TextElement::Atom(this), compiler->zone());
				4720	return new (compiler->zone())
				4721	TextNode(elms, compiler->read_backward(), on_success);
				4722	}
				4723
				4724
				4725	RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
				4726	RegExpNode* on_success) {
				4727	return new (compiler->zone())
				4728	TextNode(elements(), compiler->read_backward(), on_success);
				4729	}
				4730
				4731
				4732	static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
				4733	const int* special_class,
				4734	int length) {
				4735	length--; // Remove final 0x10000.
				4736	DCHECK(special_class[length] == 0x10000);
				4737	DCHECK(ranges->length() != 0);
				4738	DCHECK(length != 0);
				4739	DCHECK(special_class[0] != 0);
				4740	if (ranges->length() != (length >> 1) + 1) {
				4741	return false;
				4742	}
				4743	CharacterRange range = ranges->at(0);
				4744	if (range.from() != 0) {
				4745	return false;
				4746	}
				4747	for (int i = 0; i < length; i += 2) {
				4748	if (special_class[i] != (range.to() + 1)) {
				4749	return false;
				4750	}
				4751	range = ranges->at((i >> 1) + 1);
				4752	if (special_class[i+1] != range.from()) {
				4753	return false;
				4754	}
				4755	}
				4756	if (range.to() != 0xffff) {
				4757	return false;
				4758	}
				4759	return true;
				4760	}
				4761
				4762
				4763	static bool CompareRanges(ZoneList<CharacterRange>* ranges,
				4764	const int* special_class,
				4765	int length) {
				4766	length--; // Remove final 0x10000.
				4767	DCHECK(special_class[length] == 0x10000);
				4768	if (ranges->length() * 2 != length) {
				4769	return false;
				4770	}
				4771	for (int i = 0; i < length; i += 2) {
				4772	CharacterRange range = ranges->at(i >> 1);
				4773	if (range.from() != special_class[i] \|\|
				4774	range.to() != special_class[i + 1] - 1) {
				4775	return false;
				4776	}
				4777	}
				4778	return true;
				4779	}
				4780
				4781
				4782	bool RegExpCharacterClass::is_standard(Zone* zone) {
				4783	// TODO(lrn): Remove need for this function, by not throwing away information
				4784	// along the way.
				4785	if (is_negated_) {
				4786	return false;
				4787	}
				4788	if (set_.is_standard()) {
				4789	return true;
				4790	}
				4791	if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
				4792	set_.set_standard_set_type('s');
				4793	return true;
				4794	}
				4795	if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
				4796	set_.set_standard_set_type('S');
				4797	return true;
				4798	}
				4799	if (CompareInverseRanges(set_.ranges(zone),
				4800	kLineTerminatorRanges,
				4801	kLineTerminatorRangeCount)) {
				4802	set_.set_standard_set_type('.');
				4803	return true;
				4804	}
				4805	if (CompareRanges(set_.ranges(zone),
				4806	kLineTerminatorRanges,
				4807	kLineTerminatorRangeCount)) {
				4808	set_.set_standard_set_type('n');
				4809	return true;
				4810	}
				4811	if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
				4812	set_.set_standard_set_type('w');
				4813	return true;
				4814	}
				4815	if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
				4816	set_.set_standard_set_type('W');
				4817	return true;
				4818	}
				4819	return false;
				4820	}
				4821
				4822
				4823	RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
				4824	RegExpNode* on_success) {
				4825	return new (compiler->zone())
				4826	TextNode(this, compiler->read_backward(), on_success);
				4827	}
				4828
				4829
				4830	int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
				4831	RegExpAtom* atom1 = (*a)->AsAtom();
				4832	RegExpAtom* atom2 = (*b)->AsAtom();
				4833	uc16 character1 = atom1->data().at(0);
				4834	uc16 character2 = atom2->data().at(0);
				4835	if (character1 < character2) return -1;
				4836	if (character1 > character2) return 1;
				4837	return 0;
				4838	}
				4839
				4840
				4841	static unibrow::uchar Canonical(
				4842	unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
				4843	unibrow::uchar c) {
				4844	unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
				4845	int length = canonicalize->get(c, '\0', chars);
				4846	DCHECK_LE(length, 1);
				4847	unibrow::uchar canonical = c;
				4848	if (length == 1) canonical = chars[0];
				4849	return canonical;
				4850	}
				4851
				4852
				4853	int CompareFirstCharCaseIndependent(
				4854	unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
				4855	RegExpTree* const* a, RegExpTree* const* b) {
				4856	RegExpAtom* atom1 = (*a)->AsAtom();
				4857	RegExpAtom* atom2 = (*b)->AsAtom();
				4858	unibrow::uchar character1 = atom1->data().at(0);
				4859	unibrow::uchar character2 = atom2->data().at(0);
				4860	if (character1 == character2) return 0;
				4861	if (character1 >= 'a' \|\| character2 >= 'a') {
				4862	character1 = Canonical(canonicalize, character1);
				4863	character2 = Canonical(canonicalize, character2);
				4864	}
				4865	return static_cast<int>(character1) - static_cast<int>(character2);
				4866	}
				4867
				4868
				4869	// We can stable sort runs of atoms, since the order does not matter if they
				4870	// start with different characters.
				4871	// Returns true if any consecutive atoms were found.
				4872	bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
				4873	ZoneList<RegExpTree> alternatives = this->alternatives();
				4874	int length = alternatives->length();
				4875	bool found_consecutive_atoms = false;
				4876	for (int i = 0; i < length; i++) {
				4877	while (i < length) {
				4878	RegExpTree* alternative = alternatives->at(i);
				4879	if (alternative->IsAtom()) break;
				4880	i++;
				4881	}
				4882	// i is length or it is the index of an atom.
				4883	if (i == length) break;
				4884	int first_atom = i;
				4885	i++;
				4886	while (i < length) {
				4887	RegExpTree* alternative = alternatives->at(i);
				4888	if (!alternative->IsAtom()) break;
				4889	i++;
				4890	}
				4891	// Sort atoms to get ones with common prefixes together.
				4892	// This step is more tricky if we are in a case-independent regexp,
				4893	// because it would change /is\|I/ to /I\|is/, and order matters when
				4894	// the regexp parts don't match only disjoint starting points. To fix
				4895	// this we have a version of CompareFirstChar that uses case-
				4896	// independent character classes for comparison.
				4897	DCHECK_LT(first_atom, alternatives->length());
				4898	DCHECK_LE(i, alternatives->length());
				4899	DCHECK_LE(first_atom, i);
				4900	if (compiler->ignore_case()) {
				4901	unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
				4902	compiler->isolate()->regexp_macro_assembler_canonicalize();
				4903	auto compare_closure =
				4904	[canonicalize](RegExpTree* const* a, RegExpTree* const* b) {
				4905	return CompareFirstCharCaseIndependent(canonicalize, a, b);
				4906	};
				4907	alternatives->StableSort(compare_closure, first_atom, i - first_atom);
				4908	} else {
				4909	alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
				4910	}
				4911	if (i - first_atom > 1) found_consecutive_atoms = true;
				4912	}
				4913	return found_consecutive_atoms;
				4914	}
				4915
				4916
				4917	// Optimizes ab\|ac\|az to a(?:b\|c\|d).
				4918	void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
				4919	Zone* zone = compiler->zone();
				4920	ZoneList<RegExpTree> alternatives = this->alternatives();
				4921	int length = alternatives->length();
				4922
				4923	int write_posn = 0;
				4924	int i = 0;
				4925	while (i < length) {
				4926	RegExpTree* alternative = alternatives->at(i);
				4927	if (!alternative->IsAtom()) {
				4928	alternatives->at(write_posn++) = alternatives->at(i);
				4929	i++;
				4930	continue;
				4931	}
				4932	RegExpAtom* atom = alternative->AsAtom();
				4933	unibrow::uchar common_prefix = atom->data().at(0);
				4934	int first_with_prefix = i;
				4935	int prefix_length = atom->length();
				4936	i++;
				4937	while (i < length) {
				4938	alternative = alternatives->at(i);
				4939	if (!alternative->IsAtom()) break;
				4940	atom = alternative->AsAtom();
				4941	unibrow::uchar new_prefix = atom->data().at(0);
				4942	if (new_prefix != common_prefix) {
				4943	if (!compiler->ignore_case()) break;
				4944	unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
				4945	compiler->isolate()->regexp_macro_assembler_canonicalize();
				4946	new_prefix = Canonical(canonicalize, new_prefix);
				4947	common_prefix = Canonical(canonicalize, common_prefix);
				4948	if (new_prefix != common_prefix) break;
				4949	}
				4950	prefix_length = Min(prefix_length, atom->length());
				4951	i++;
				4952	}
				4953	if (i > first_with_prefix + 2) {
				4954	// Found worthwhile run of alternatives with common prefix of at least one
				4955	// character. The sorting function above did not sort on more than one
				4956	// character for reasons of correctness, but there may still be a longer
				4957	// common prefix if the terms were similar or presorted in the input.
				4958	// Find out how long the common prefix is.
				4959	int run_length = i - first_with_prefix;
				4960	atom = alternatives->at(first_with_prefix)->AsAtom();
				4961	for (int j = 1; j < run_length && prefix_length > 1; j++) {
				4962	RegExpAtom* old_atom =
				4963	alternatives->at(j + first_with_prefix)->AsAtom();
				4964	for (int k = 1; k < prefix_length; k++) {
				4965	if (atom->data().at(k) != old_atom->data().at(k)) {
				4966	prefix_length = k;
				4967	break;
				4968	}
				4969	}
				4970	}
				4971	RegExpAtom* prefix =
				4972	new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length));
				4973	ZoneList<RegExpTree> pair = new (zone) ZoneList<RegExpTree*>(2, zone);
				4974	pair->Add(prefix, zone);
				4975	ZoneList<RegExpTree> suffixes =
				4976	new (zone) ZoneList<RegExpTree*>(run_length, zone);
				4977	for (int j = 0; j < run_length; j++) {
				4978	RegExpAtom* old_atom =
				4979	alternatives->at(j + first_with_prefix)->AsAtom();
				4980	int len = old_atom->length();
				4981	if (len == prefix_length) {
				4982	suffixes->Add(new (zone) RegExpEmpty(), zone);
				4983	} else {
				4984	RegExpTree* suffix = new (zone) RegExpAtom(
				4985	old_atom->data().SubVector(prefix_length, old_atom->length()));
				4986	suffixes->Add(suffix, zone);
				4987	}
				4988	}
				4989	pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
				4990	alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
				4991	} else {
				4992	// Just copy any non-worthwhile alternatives.
				4993	for (int j = first_with_prefix; j < i; j++) {
				4994	alternatives->at(write_posn++) = alternatives->at(j);
				4995	}
				4996	}
				4997	}
				4998	alternatives->Rewind(write_posn); // Trim end of array.
				4999	}
				5000
				5001
				5002	// Optimizes b\|c\|z to [bcz].
				5003	void RegExpDisjunction::FixSingleCharacterDisjunctions(
				5004	RegExpCompiler* compiler) {
				5005	Zone* zone = compiler->zone();
				5006	ZoneList<RegExpTree> alternatives = this->alternatives();
				5007	int length = alternatives->length();
				5008
				5009	int write_posn = 0;
				5010	int i = 0;
				5011	while (i < length) {
				5012	RegExpTree* alternative = alternatives->at(i);
				5013	if (!alternative->IsAtom()) {
				5014	alternatives->at(write_posn++) = alternatives->at(i);
				5015	i++;
				5016	continue;
				5017	}
				5018	RegExpAtom* atom = alternative->AsAtom();
				5019	if (atom->length() != 1) {
				5020	alternatives->at(write_posn++) = alternatives->at(i);
				5021	i++;
				5022	continue;
				5023	}
				5024	int first_in_run = i;
				5025	i++;
				5026	while (i < length) {
				5027	alternative = alternatives->at(i);
				5028	if (!alternative->IsAtom()) break;
				5029	atom = alternative->AsAtom();
				5030	if (atom->length() != 1) break;
				5031	i++;
				5032	}
				5033	if (i > first_in_run + 1) {
				5034	// Found non-trivial run of single-character alternatives.
				5035	int run_length = i - first_in_run;
				5036	ZoneList<CharacterRange>* ranges =
				5037	new (zone) ZoneList<CharacterRange>(2, zone);
				5038	for (int j = 0; j < run_length; j++) {
				5039	RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
				5040	DCHECK_EQ(old_atom->length(), 1);
				5041	ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
				5042	}
				5043	alternatives->at(write_posn++) =
				5044	new (zone) RegExpCharacterClass(ranges, false);
				5045	} else {
				5046	// Just copy any trivial alternatives.
				5047	for (int j = first_in_run; j < i; j++) {
				5048	alternatives->at(write_posn++) = alternatives->at(j);
				5049	}
				5050	}
				5051	}
				5052	alternatives->Rewind(write_posn); // Trim end of array.
				5053	}
				5054
				5055
				5056	RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
				5057	RegExpNode* on_success) {
				5058	ZoneList<RegExpTree> alternatives = this->alternatives();
				5059
				5060	if (alternatives->length() > 2) {
				5061	bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
				5062	if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
				5063	FixSingleCharacterDisjunctions(compiler);
				5064	if (alternatives->length() == 1) {
				5065	return alternatives->at(0)->ToNode(compiler, on_success);
				5066	}
				5067	}
				5068
				5069	int length = alternatives->length();
				5070
				5071	ChoiceNode* result =
				5072	new(compiler->zone()) ChoiceNode(length, compiler->zone());
				5073	for (int i = 0; i < length; i++) {
				5074	GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
				5075	on_success));
				5076	result->AddAlternative(alternative);
				5077	}
				5078	return result;
				5079	}
				5080
				5081
				5082	RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
				5083	RegExpNode* on_success) {
				5084	return ToNode(min(),
				5085	max(),
				5086	is_greedy(),
				5087	body(),
				5088	compiler,
				5089	on_success);
				5090	}
				5091
				5092
				5093	// Scoped object to keep track of how much we unroll quantifier loops in the
				5094	// regexp graph generator.
				5095	class RegExpExpansionLimiter {
				5096	public:
				5097	static const int kMaxExpansionFactor = 6;
				5098	RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
				5099	: compiler_(compiler),
				5100	saved_expansion_factor_(compiler->current_expansion_factor()),
				5101	ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
				5102	DCHECK(factor > 0);
				5103	if (ok_to_expand_) {
				5104	if (factor > kMaxExpansionFactor) {
				5105	// Avoid integer overflow of the current expansion factor.
				5106	ok_to_expand_ = false;
				5107	compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
				5108	} else {
				5109	int new_factor = saved_expansion_factor_ * factor;
				5110	ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
				5111	compiler->set_current_expansion_factor(new_factor);
				5112	}
				5113	}
				5114	}
				5115
				5116	~RegExpExpansionLimiter() {
				5117	compiler_->set_current_expansion_factor(saved_expansion_factor_);
				5118	}
				5119
				5120	bool ok_to_expand() { return ok_to_expand_; }
				5121
				5122	private:
				5123	RegExpCompiler* compiler_;
				5124	int saved_expansion_factor_;
				5125	bool ok_to_expand_;
				5126
				5127	DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
				5128	};
				5129
				5130
				5131	RegExpNode* RegExpQuantifier::ToNode(int min,
				5132	int max,
				5133	bool is_greedy,
				5134	RegExpTree* body,
				5135	RegExpCompiler* compiler,
				5136	RegExpNode* on_success,
				5137	bool not_at_start) {
				5138	// x{f, t} becomes this:
				5139	//
				5140	// (r++)<-.
				5141	// \| `
				5142	// \| (x)
				5143	// v ^
				5144	// (r=0)-->(?)---/ [if r < t]
				5145	// \|
				5146	// [if r >= f] \----> ...
				5147	//
				5148
				5149	// 15.10.2.5 RepeatMatcher algorithm.
				5150	// The parser has already eliminated the case where max is 0. In the case
				5151	// where max_match is zero the parser has removed the quantifier if min was
				5152	// > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
				5153
				5154	// If we know that we cannot match zero length then things are a little
				5155	// simpler since we don't need to make the special zero length match check
				5156	// from step 2.1. If the min and max are small we can unroll a little in
				5157	// this case.
				5158	static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
				5159	static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
				5160	if (max == 0) return on_success; // This can happen due to recursion.
				5161	bool body_can_be_empty = (body->min_match() == 0);
				5162	int body_start_reg = RegExpCompiler::kNoRegister;
				5163	Interval capture_registers = body->CaptureRegisters();
				5164	bool needs_capture_clearing = !capture_registers.is_empty();
				5165	Zone* zone = compiler->zone();
				5166
				5167	if (body_can_be_empty) {
				5168	body_start_reg = compiler->AllocateRegister();
				5169	} else if (compiler->optimize() && !needs_capture_clearing) {
				5170	// Only unroll if there are no captures and the body can't be
				5171	// empty.
				5172	{
				5173	RegExpExpansionLimiter limiter(
				5174	compiler, min + ((max != min) ? 1 : 0));
				5175	if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
				5176	int new_max = (max == kInfinity) ? max : max - min;
				5177	// Recurse once to get the loop or optional matches after the fixed
				5178	// ones.
				5179	RegExpNode* answer = ToNode(
				5180	0, new_max, is_greedy, body, compiler, on_success, true);
				5181	// Unroll the forced matches from 0 to min. This can cause chains of
				5182	// TextNodes (which the parser does not generate). These should be
				5183	// combined if it turns out they hinder good code generation.
				5184	for (int i = 0; i < min; i++) {
				5185	answer = body->ToNode(compiler, answer);
				5186	}
				5187	return answer;
				5188	}
				5189	}
				5190	if (max <= kMaxUnrolledMaxMatches && min == 0) {
				5191	DCHECK(max > 0); // Due to the 'if' above.
				5192	RegExpExpansionLimiter limiter(compiler, max);
				5193	if (limiter.ok_to_expand()) {
				5194	// Unroll the optional matches up to max.
				5195	RegExpNode* answer = on_success;
				5196	for (int i = 0; i < max; i++) {
				5197	ChoiceNode* alternation = new(zone) ChoiceNode(2, zone);
				5198	if (is_greedy) {
				5199	alternation->AddAlternative(
				5200	GuardedAlternative(body->ToNode(compiler, answer)));
				5201	alternation->AddAlternative(GuardedAlternative(on_success));
				5202	} else {
				5203	alternation->AddAlternative(GuardedAlternative(on_success));
				5204	alternation->AddAlternative(
				5205	GuardedAlternative(body->ToNode(compiler, answer)));
				5206	}
				5207	answer = alternation;
				5208	if (not_at_start && !compiler->read_backward()) {
				5209	alternation->set_not_at_start();
				5210	}
				5211	}
				5212	return answer;
				5213	}
				5214	}
				5215	}
				5216	bool has_min = min > 0;
				5217	bool has_max = max < RegExpTree::kInfinity;
				5218	bool needs_counter = has_min \|\| has_max;
				5219	int reg_ctr = needs_counter
				5220	? compiler->AllocateRegister()
				5221	: RegExpCompiler::kNoRegister;
				5222	LoopChoiceNode* center = new (zone)
				5223	LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
				5224	if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
				5225	RegExpNode* loop_return = needs_counter
				5226	? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
				5227	: static_cast<RegExpNode*>(center);
				5228	if (body_can_be_empty) {
				5229	// If the body can be empty we need to check if it was and then
				5230	// backtrack.
				5231	loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
				5232	reg_ctr,
				5233	min,
				5234	loop_return);
				5235	}
				5236	RegExpNode* body_node = body->ToNode(compiler, loop_return);
				5237	if (body_can_be_empty) {
				5238	// If the body can be empty we need to store the start position
				5239	// so we can bail out if it was empty.
				5240	body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
				5241	}
				5242	if (needs_capture_clearing) {
				5243	// Before entering the body of this loop we need to clear captures.
				5244	body_node = ActionNode::ClearCaptures(capture_registers, body_node);
				5245	}
				5246	GuardedAlternative body_alt(body_node);
				5247	if (has_max) {
				5248	Guard* body_guard =
				5249	new(zone) Guard(reg_ctr, Guard::LT, max);
				5250	body_alt.AddGuard(body_guard, zone);
				5251	}
				5252	GuardedAlternative rest_alt(on_success);
				5253	if (has_min) {
				5254	Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
				5255	rest_alt.AddGuard(rest_guard, zone);
				5256	}
				5257	if (is_greedy) {
				5258	center->AddLoopAlternative(body_alt);
				5259	center->AddContinueAlternative(rest_alt);
				5260	} else {
				5261	center->AddContinueAlternative(rest_alt);
				5262	center->AddLoopAlternative(body_alt);
				5263	}
				5264	if (needs_counter) {
				5265	return ActionNode::SetRegister(reg_ctr, 0, center);
				5266	} else {
				5267	return center;
				5268	}
				5269	}
				5270
				5271
				5272	RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
				5273	RegExpNode* on_success) {
				5274	NodeInfo info;
				5275	Zone* zone = compiler->zone();
				5276
				5277	switch (assertion_type()) {
				5278	case START_OF_LINE:
				5279	return AssertionNode::AfterNewline(on_success);
				5280	case START_OF_INPUT:
				5281	return AssertionNode::AtStart(on_success);
				5282	case BOUNDARY:
				5283	return AssertionNode::AtBoundary(on_success);
				5284	case NON_BOUNDARY:
				5285	return AssertionNode::AtNonBoundary(on_success);
				5286	case END_OF_INPUT:
				5287	return AssertionNode::AtEnd(on_success);
				5288	case END_OF_LINE: {
				5289	// Compile $ in multiline regexps as an alternation with a positive
				5290	// lookahead in one side and an end-of-input on the other side.
				5291	// We need two registers for the lookahead.
				5292	int stack_pointer_register = compiler->AllocateRegister();
				5293	int position_register = compiler->AllocateRegister();
				5294	// The ChoiceNode to distinguish between a newline and end-of-input.
				5295	ChoiceNode* result = new(zone) ChoiceNode(2, zone);
				5296	// Create a newline atom.
				5297	ZoneList<CharacterRange>* newline_ranges =
				5298	new(zone) ZoneList<CharacterRange>(3, zone);
				5299	CharacterRange::AddClassEscape('n', newline_ranges, zone);
				5300	RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n');
				5301	TextNode* newline_matcher = new (zone) TextNode(
				5302	newline_atom, false, ActionNode::PositiveSubmatchSuccess(
				5303	stack_pointer_register, position_register,
				5304	0, // No captures inside.
				5305	-1, // Ignored if no captures.
				5306	on_success));
				5307	// Create an end-of-input matcher.
				5308	RegExpNode* end_of_line = ActionNode::BeginSubmatch(
				5309	stack_pointer_register,
				5310	position_register,
				5311	newline_matcher);
				5312	// Add the two alternatives to the ChoiceNode.
				5313	GuardedAlternative eol_alternative(end_of_line);
				5314	result->AddAlternative(eol_alternative);
				5315	GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
				5316	result->AddAlternative(end_alternative);
				5317	return result;
				5318	}
				5319	default:
				5320	UNREACHABLE();
				5321	}
				5322	return on_success;
				5323	}
				5324
				5325
				5326	RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
				5327	RegExpNode* on_success) {
				5328	return new (compiler->zone())
				5329	BackReferenceNode(RegExpCapture::StartRegister(index()),
				5330	RegExpCapture::EndRegister(index()),
				5331	compiler->read_backward(), on_success);
				5332	}
				5333
				5334
				5335	RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
				5336	RegExpNode* on_success) {
				5337	return on_success;
				5338	}
				5339
				5340
				5341	RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
				5342	RegExpNode* on_success) {
				5343	int stack_pointer_register = compiler->AllocateRegister();
				5344	int position_register = compiler->AllocateRegister();
				5345
				5346	const int registers_per_capture = 2;
				5347	const int register_of_first_capture = 2;
				5348	int register_count = capture_count_ * registers_per_capture;
				5349	int register_start =
				5350	register_of_first_capture + capture_from_ * registers_per_capture;
				5351
				5352	RegExpNode* result;
				5353	bool was_reading_backward = compiler->read_backward();
				5354	compiler->set_read_backward(type() == LOOKBEHIND);
				5355	if (is_positive()) {
				5356	result = ActionNode::BeginSubmatch(
				5357	stack_pointer_register, position_register,
				5358	body()->ToNode(compiler,
				5359	ActionNode::PositiveSubmatchSuccess(
				5360	stack_pointer_register, position_register,
				5361	register_count, register_start, on_success)));
				5362	} else {
				5363	// We use a ChoiceNode for a negative lookahead because it has most of
				5364	// the characteristics we need. It has the body of the lookahead as its
				5365	// first alternative and the expression after the lookahead of the second
				5366	// alternative. If the first alternative succeeds then the
				5367	// NegativeSubmatchSuccess will unwind the stack including everything the
				5368	// choice node set up and backtrack. If the first alternative fails then
				5369	// the second alternative is tried, which is exactly the desired result
				5370	// for a negative lookahead. The NegativeLookaheadChoiceNode is a special
				5371	// ChoiceNode that knows to ignore the first exit when calculating quick
				5372	// checks.
				5373	Zone* zone = compiler->zone();
				5374
				5375	GuardedAlternative body_alt(
				5376	body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(
				5377	stack_pointer_register, position_register,
				5378	register_count, register_start, zone)));
				5379	ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
				5380	body_alt, GuardedAlternative(on_success), zone);
				5381	result = ActionNode::BeginSubmatch(stack_pointer_register,
				5382	position_register, choice_node);
				5383	}
				5384	compiler->set_read_backward(was_reading_backward);
				5385	return result;
				5386	}
				5387
				5388
				5389	RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
				5390	RegExpNode* on_success) {
				5391	return ToNode(body(), index(), compiler, on_success);
				5392	}
				5393
				5394
				5395	RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
				5396	int index,
				5397	RegExpCompiler* compiler,
				5398	RegExpNode* on_success) {
				5399	DCHECK_NOT_NULL(body);
				5400	int start_reg = RegExpCapture::StartRegister(index);
				5401	int end_reg = RegExpCapture::EndRegister(index);
				5402	if (compiler->read_backward()) std::swap(start_reg, end_reg);
				5403	RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
				5404	RegExpNode* body_node = body->ToNode(compiler, store_end);
				5405	return ActionNode::StorePosition(start_reg, true, body_node);
				5406	}
				5407
				5408
				5409	RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
				5410	RegExpNode* on_success) {
				5411	ZoneList<RegExpTree> children = nodes();
				5412	RegExpNode* current = on_success;
				5413	if (compiler->read_backward()) {
				5414	for (int i = 0; i < children->length(); i++) {
				5415	current = children->at(i)->ToNode(compiler, current);
				5416	}
				5417	} else {
				5418	for (int i = children->length() - 1; i >= 0; i--) {
				5419	current = children->at(i)->ToNode(compiler, current);
				5420	}
				5421	}
				5422	return current;
				5423	}
				5424
				5425
				5426	static void AddClass(const int* elmv,
				5427	int elmc,
				5428	ZoneList<CharacterRange>* ranges,
				5429	Zone* zone) {
				5430	elmc--;
				5431	DCHECK(elmv[elmc] == 0x10000);
				5432	for (int i = 0; i < elmc; i += 2) {
				5433	DCHECK(elmv[i] < elmv[i + 1]);
				5434	ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);
				5435	}
				5436	}
				5437
				5438
				5439	static void AddClassNegated(const int *elmv,
				5440	int elmc,
				5441	ZoneList<CharacterRange>* ranges,
				5442	Zone* zone) {
				5443	elmc--;
				5444	DCHECK(elmv[elmc] == 0x10000);
				5445	DCHECK(elmv[0] != 0x0000);
				5446	DCHECK(elmv[elmc-1] != String::kMaxUtf16CodeUnit);
				5447	uc16 last = 0x0000;
				5448	for (int i = 0; i < elmc; i += 2) {
				5449	DCHECK(last <= elmv[i] - 1);
				5450	DCHECK(elmv[i] < elmv[i + 1]);
				5451	ranges->Add(CharacterRange(last, elmv[i] - 1), zone);
				5452	last = elmv[i + 1];
				5453	}
				5454	ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone);
				5455	}
				5456
				5457
				5458	void CharacterRange::AddClassEscape(uc16 type,
				5459	ZoneList<CharacterRange>* ranges,
				5460	Zone* zone) {
				5461	switch (type) {
				5462	case 's':
				5463	AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
				5464	break;
				5465	case 'S':
				5466	AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
				5467	break;
				5468	case 'w':
				5469	AddClass(kWordRanges, kWordRangeCount, ranges, zone);
				5470	break;
				5471	case 'W':
				5472	AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
				5473	break;
				5474	case 'd':
				5475	AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
				5476	break;
				5477	case 'D':
				5478	AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
				5479	break;
				5480	case '.':
				5481	AddClassNegated(kLineTerminatorRanges,
				5482	kLineTerminatorRangeCount,
				5483	ranges,
				5484	zone);
				5485	break;
				5486	// This is not a character range as defined by the spec but a
				5487	// convenient shorthand for a character class that matches any
				5488	// character.
				5489	case '*':
				5490	ranges->Add(CharacterRange::Everything(), zone);
				5491	break;
				5492	// This is the set of characters matched by the $ and ^ symbols
				5493	// in multiline mode.
				5494	case 'n':
				5495	AddClass(kLineTerminatorRanges,
				5496	kLineTerminatorRangeCount,
				5497	ranges,
				5498	zone);
				5499	break;
				5500	default:
				5501	UNREACHABLE();
				5502	}
				5503	}
				5504
				5505
				5506	Vector<const int> CharacterRange::GetWordBounds() {
				5507	return Vector<const int>(kWordRanges, kWordRangeCount - 1);
				5508	}
				5509
				5510
				5511	class CharacterRangeSplitter {
				5512	public:
				5513	CharacterRangeSplitter(ZoneList<CharacterRange>** included,
				5514	ZoneList<CharacterRange>** excluded,
				5515	Zone* zone)
				5516	: included_(included),
				5517	excluded_(excluded),
				5518	zone_(zone) { }
				5519	void Call(uc16 from, DispatchTable::Entry entry);
				5520
				5521	static const int kInBase = 0;
				5522	static const int kInOverlay = 1;
				5523
				5524	private:
				5525	ZoneList<CharacterRange>** included_;
				5526	ZoneList<CharacterRange>** excluded_;
				5527	Zone* zone_;
				5528	};
				5529
				5530
				5531	void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
				5532	if (!entry.out_set()->Get(kInBase)) return;
				5533	ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
				5534	? included_
				5535	: excluded_;
				5536	if (target == NULL) target = new(zone_) ZoneList<CharacterRange>(2, zone_);
				5537	(*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);
				5538	}
				5539
				5540
				5541	void CharacterRange::Split(ZoneList<CharacterRange>* base,
				5542	Vector<const int> overlay,
				5543	ZoneList<CharacterRange>** included,
				5544	ZoneList<CharacterRange>** excluded,
				5545	Zone* zone) {
				5546	DCHECK_NULL(*included);
				5547	DCHECK_NULL(*excluded);
				5548	DispatchTable table(zone);
				5549	for (int i = 0; i < base->length(); i++)
				5550	table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);
				5551	for (int i = 0; i < overlay.length(); i += 2) {
				5552	table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
				5553	CharacterRangeSplitter::kInOverlay, zone);
				5554	}
				5555	CharacterRangeSplitter callback(included, excluded, zone);
				5556	table.ForEach(&callback);
				5557	}
				5558
				5559
				5560	void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
				5561	ZoneList<CharacterRange>* ranges,
				5562	bool is_one_byte) {
				5563	uc16 bottom = from();
				5564	uc16 top = to();
				5565	if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
				5566	if (bottom > String::kMaxOneByteCharCode) return;
				5567	if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
				5568	}
				5569	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				5570	if (top == bottom) {
				5571	// If this is a singleton we just expand the one character.
				5572	int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
				5573	for (int i = 0; i < length; i++) {
				5574	uc32 chr = chars[i];
				5575	if (chr != bottom) {
				5576	ranges->Add(CharacterRange::Singleton(chars[i]), zone);
				5577	}
				5578	}
				5579	} else {
				5580	// If this is a range we expand the characters block by block,
				5581	// expanding contiguous subranges (blocks) one at a time.
				5582	// The approach is as follows. For a given start character we
				5583	// look up the remainder of the block that contains it (represented
				5584	// by the end point), for instance we find 'z' if the character
				5585	// is 'c'. A block is characterized by the property
				5586	// that all characters uncanonicalize in the same way, except that
				5587	// each entry in the result is incremented by the distance from the first
				5588	// element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
				5589	// the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
				5590	// Once we've found the end point we look up its uncanonicalization
				5591	// and produce a range for each element. For instance for [c-f]
				5592	// we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
				5593	// add a range if it is not already contained in the input, so [c-f]
				5594	// will be skipped but [C-F] will be added. If this range is not
				5595	// completely contained in a block we do this for all the blocks
				5596	// covered by the range (handling characters that is not in a block
				5597	// as a "singleton block").
				5598	unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				5599	int pos = bottom;
				5600	while (pos <= top) {
				5601	int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
				5602	uc16 block_end;
				5603	if (length == 0) {
				5604	block_end = pos;
				5605	} else {
				5606	DCHECK_EQ(1, length);
				5607	block_end = range[0];
				5608	}
				5609	int end = (block_end > top) ? top : block_end;
				5610	length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
				5611	for (int i = 0; i < length; i++) {
				5612	uc32 c = range[i];
				5613	uc16 range_from = c - (block_end - pos);
				5614	uc16 range_to = c - (block_end - end);
				5615	if (!(bottom <= range_from && range_to <= top)) {
				5616	ranges->Add(CharacterRange(range_from, range_to), zone);
				5617	}
				5618	}
				5619	pos = end + 1;
				5620	}
				5621	}
				5622	}
				5623
				5624
				5625	bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
				5626	DCHECK_NOT_NULL(ranges);
				5627	int n = ranges->length();
				5628	if (n <= 1) return true;
				5629	int max = ranges->at(0).to();
				5630	for (int i = 1; i < n; i++) {
				5631	CharacterRange next_range = ranges->at(i);
				5632	if (next_range.from() <= max + 1) return false;
				5633	max = next_range.to();
				5634	}
				5635	return true;
				5636	}
				5637
				5638
				5639	ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
				5640	if (ranges_ == NULL) {
				5641	ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);
				5642	CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone);
				5643	}
				5644	return ranges_;
				5645	}
				5646
				5647
				5648	// Move a number of elements in a zonelist to another position
				5649	// in the same list. Handles overlapping source and target areas.
				5650	static void MoveRanges(ZoneList<CharacterRange>* list,
				5651	int from,
				5652	int to,
				5653	int count) {
				5654	// Ranges are potentially overlapping.
				5655	if (from < to) {
				5656	for (int i = count - 1; i >= 0; i--) {
				5657	list->at(to + i) = list->at(from + i);
				5658	}
				5659	} else {
				5660	for (int i = 0; i < count; i++) {
				5661	list->at(to + i) = list->at(from + i);
				5662	}
				5663	}
				5664	}
				5665
				5666
				5667	static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
				5668	int count,
				5669	CharacterRange insert) {
				5670	// Inserts a range into list[0..count[, which must be sorted
				5671	// by from value and non-overlapping and non-adjacent, using at most
				5672	// list[0..count] for the result. Returns the number of resulting
				5673	// canonicalized ranges. Inserting a range may collapse existing ranges into
				5674	// fewer ranges, so the return value can be anything in the range 1..count+1.
				5675	uc16 from = insert.from();
				5676	uc16 to = insert.to();
				5677	int start_pos = 0;
				5678	int end_pos = count;
				5679	for (int i = count - 1; i >= 0; i--) {
				5680	CharacterRange current = list->at(i);
				5681	if (current.from() > to + 1) {
				5682	end_pos = i;
				5683	} else if (current.to() + 1 < from) {
				5684	start_pos = i + 1;
				5685	break;
				5686	}
				5687	}
				5688
				5689	// Inserted range overlaps, or is adjacent to, ranges at positions
				5690	// [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
				5691	// not affected by the insertion.
				5692	// If start_pos == end_pos, the range must be inserted before start_pos.
				5693	// if start_pos < end_pos, the entire range from start_pos to end_pos
				5694	// must be merged with the insert range.
				5695
				5696	if (start_pos == end_pos) {
				5697	// Insert between existing ranges at position start_pos.
				5698	if (start_pos < count) {
				5699	MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
				5700	}
				5701	list->at(start_pos) = insert;
				5702	return count + 1;
				5703	}
				5704	if (start_pos + 1 == end_pos) {
				5705	// Replace single existing range at position start_pos.
				5706	CharacterRange to_replace = list->at(start_pos);
				5707	int new_from = Min(to_replace.from(), from);
				5708	int new_to = Max(to_replace.to(), to);
				5709	list->at(start_pos) = CharacterRange(new_from, new_to);
				5710	return count;
				5711	}
				5712	// Replace a number of existing ranges from start_pos to end_pos - 1.
				5713	// Move the remaining ranges down.
				5714
				5715	int new_from = Min(list->at(start_pos).from(), from);
				5716	int new_to = Max(list->at(end_pos - 1).to(), to);
				5717	if (end_pos < count) {
				5718	MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
				5719	}
				5720	list->at(start_pos) = CharacterRange(new_from, new_to);
				5721	return count - (end_pos - start_pos) + 1;
				5722	}
				5723
				5724
				5725	void CharacterSet::Canonicalize() {
				5726	// Special/default classes are always considered canonical. The result
				5727	// of calling ranges() will be sorted.
				5728	if (ranges_ == NULL) return;
				5729	CharacterRange::Canonicalize(ranges_);
				5730	}
				5731
				5732
				5733	void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
				5734	if (character_ranges->length() <= 1) return;
				5735	// Check whether ranges are already canonical (increasing, non-overlapping,
				5736	// non-adjacent).
				5737	int n = character_ranges->length();
				5738	int max = character_ranges->at(0).to();
				5739	int i = 1;
				5740	while (i < n) {
				5741	CharacterRange current = character_ranges->at(i);
				5742	if (current.from() <= max + 1) {
				5743	break;
				5744	}
				5745	max = current.to();
				5746	i++;
				5747	}
				5748	// Canonical until the i'th range. If that's all of them, we are done.
				5749	if (i == n) return;
				5750
				5751	// The ranges at index i and forward are not canonicalized. Make them so by
				5752	// doing the equivalent of insertion sort (inserting each into the previous
				5753	// list, in order).
				5754	// Notice that inserting a range can reduce the number of ranges in the
				5755	// result due to combining of adjacent and overlapping ranges.
				5756	int read = i; // Range to insert.
				5757	int num_canonical = i; // Length of canonicalized part of list.
				5758	do {
				5759	num_canonical = InsertRangeInCanonicalList(character_ranges,
				5760	num_canonical,
				5761	character_ranges->at(read));
				5762	read++;
				5763	} while (read < n);
				5764	character_ranges->Rewind(num_canonical);
				5765
				5766	DCHECK(CharacterRange::IsCanonical(character_ranges));
				5767	}
				5768
				5769
				5770	void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
				5771	ZoneList<CharacterRange>* negated_ranges,
				5772	Zone* zone) {
				5773	DCHECK(CharacterRange::IsCanonical(ranges));
				5774	DCHECK_EQ(0, negated_ranges->length());
				5775	int range_count = ranges->length();
				5776	uc16 from = 0;
				5777	int i = 0;
				5778	if (range_count > 0 && ranges->at(0).from() == 0) {
				5779	from = ranges->at(0).to();
				5780	i = 1;
				5781	}
				5782	while (i < range_count) {
				5783	CharacterRange range = ranges->at(i);
				5784	negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);
				5785	from = range.to();
				5786	i++;
				5787	}
				5788	if (from < String::kMaxUtf16CodeUnit) {
				5789	negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),
				5790	zone);
				5791	}
				5792	}
				5793
				5794
				5795	// -------------------------------------------------------------------
				5796	// Splay tree
				5797
				5798
				5799	OutSet* OutSet::Extend(unsigned value, Zone* zone) {
				5800	if (Get(value))
				5801	return this;
				5802	if (successors(zone) != NULL) {
				5803	for (int i = 0; i < successors(zone)->length(); i++) {
				5804	OutSet* successor = successors(zone)->at(i);
				5805	if (successor->Get(value))
				5806	return successor;
				5807	}
				5808	} else {
				5809	successors_ = new(zone) ZoneList<OutSet*>(2, zone);
				5810	}
				5811	OutSet* result = new(zone) OutSet(first_, remaining_);
				5812	result->Set(value, zone);
				5813	successors(zone)->Add(result, zone);
				5814	return result;
				5815	}
				5816
				5817
				5818	void OutSet::Set(unsigned value, Zone *zone) {
				5819	if (value < kFirstLimit) {
				5820	first_ \|= (1 << value);
				5821	} else {
				5822	if (remaining_ == NULL)
				5823	remaining_ = new(zone) ZoneList<unsigned>(1, zone);
				5824	if (remaining_->is_empty() \|\| !remaining_->Contains(value))
				5825	remaining_->Add(value, zone);
				5826	}
				5827	}
				5828
				5829
				5830	bool OutSet::Get(unsigned value) const {
				5831	if (value < kFirstLimit) {
				5832	return (first_ & (1 << value)) != 0;
				5833	} else if (remaining_ == NULL) {
				5834	return false;
				5835	} else {
				5836	return remaining_->Contains(value);
				5837	}
				5838	}
				5839
				5840
				5841	const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
				5842
				5843
				5844	void DispatchTable::AddRange(CharacterRange full_range, int value,
				5845	Zone* zone) {
				5846	CharacterRange current = full_range;
				5847	if (tree()->is_empty()) {
				5848	// If this is the first range we just insert into the table.
				5849	ZoneSplayTree<Config>::Locator loc;
				5850	bool inserted = tree()->Insert(current.from(), &loc);
				5851	DCHECK(inserted);
				5852	USE(inserted);
				5853	loc.set_value(Entry(current.from(), current.to(),
				5854	empty()->Extend(value, zone)));
				5855	return;
				5856	}
				5857	// First see if there is a range to the left of this one that
				5858	// overlaps.
				5859	ZoneSplayTree<Config>::Locator loc;
				5860	if (tree()->FindGreatestLessThan(current.from(), &loc)) {
				5861	Entry* entry = &loc.value();
				5862	// If we've found a range that overlaps with this one, and it
				5863	// starts strictly to the left of this one, we have to fix it
				5864	// because the following code only handles ranges that start on
				5865	// or after the start point of the range we're adding.
				5866	if (entry->from() < current.from() && entry->to() >= current.from()) {
				5867	// Snap the overlapping range in half around the start point of
				5868	// the range we're adding.
				5869	CharacterRange left(entry->from(), current.from() - 1);
				5870	CharacterRange right(current.from(), entry->to());
				5871	// The left part of the overlapping range doesn't overlap.
				5872	// Truncate the whole entry to be just the left part.
				5873	entry->set_to(left.to());
				5874	// The right part is the one that overlaps. We add this part
				5875	// to the map and let the next step deal with merging it with
				5876	// the range we're adding.
				5877	ZoneSplayTree<Config>::Locator loc;
				5878	bool inserted = tree()->Insert(right.from(), &loc);
				5879	DCHECK(inserted);
				5880	USE(inserted);
				5881	loc.set_value(Entry(right.from(),
				5882	right.to(),
				5883	entry->out_set()));
				5884	}
				5885	}
				5886	while (current.is_valid()) {
				5887	if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
				5888	(loc.value().from() <= current.to()) &&
				5889	(loc.value().to() >= current.from())) {
				5890	Entry* entry = &loc.value();
				5891	// We have overlap. If there is space between the start point of
				5892	// the range we're adding and where the overlapping range starts
				5893	// then we have to add a range covering just that space.
				5894	if (current.from() < entry->from()) {
				5895	ZoneSplayTree<Config>::Locator ins;
				5896	bool inserted = tree()->Insert(current.from(), &ins);
				5897	DCHECK(inserted);
				5898	USE(inserted);
				5899	ins.set_value(Entry(current.from(),
				5900	entry->from() - 1,
				5901	empty()->Extend(value, zone)));
				5902	current.set_from(entry->from());
				5903	}
				5904	DCHECK_EQ(current.from(), entry->from());
				5905	// If the overlapping range extends beyond the one we want to add
				5906	// we have to snap the right part off and add it separately.
				5907	if (entry->to() > current.to()) {
				5908	ZoneSplayTree<Config>::Locator ins;
				5909	bool inserted = tree()->Insert(current.to() + 1, &ins);
				5910	DCHECK(inserted);
				5911	USE(inserted);
				5912	ins.set_value(Entry(current.to() + 1,
				5913	entry->to(),
				5914	entry->out_set()));
				5915	entry->set_to(current.to());
				5916	}
				5917	DCHECK(entry->to() <= current.to());
				5918	// The overlapping range is now completely contained by the range
				5919	// we're adding so we can just update it and move the start point
				5920	// of the range we're adding just past it.
				5921	entry->AddValue(value, zone);
				5922	// Bail out if the last interval ended at 0xFFFF since otherwise
				5923	// adding 1 will wrap around to 0.
				5924	if (entry->to() == String::kMaxUtf16CodeUnit)
				5925	break;
				5926	DCHECK(entry->to() + 1 > current.from());
				5927	current.set_from(entry->to() + 1);
				5928	} else {
				5929	// There is no overlap so we can just add the range
				5930	ZoneSplayTree<Config>::Locator ins;
				5931	bool inserted = tree()->Insert(current.from(), &ins);
				5932	DCHECK(inserted);
				5933	USE(inserted);
				5934	ins.set_value(Entry(current.from(),
				5935	current.to(),
				5936	empty()->Extend(value, zone)));
				5937	break;
				5938	}
				5939	}
				5940	}
				5941
				5942
				5943	OutSet* DispatchTable::Get(uc16 value) {
				5944	ZoneSplayTree<Config>::Locator loc;
				5945	if (!tree()->FindGreatestLessThan(value, &loc))
				5946	return empty();
				5947	Entry* entry = &loc.value();
				5948	if (value <= entry->to())
				5949	return entry->out_set();
				5950	else
				5951	return empty();
				5952	}
				5953
				5954
				5955	// -------------------------------------------------------------------
				5956	// Analysis
				5957
				5958
				5959	void Analysis::EnsureAnalyzed(RegExpNode* that) {
				5960	StackLimitCheck check(isolate());
				5961	if (check.HasOverflowed()) {
				5962	fail("Stack overflow");
				5963	return;
				5964	}
				5965	if (that->info()->been_analyzed \|\| that->info()->being_analyzed)
				5966	return;
				5967	that->info()->being_analyzed = true;
				5968	that->Accept(this);
				5969	that->info()->being_analyzed = false;
				5970	that->info()->been_analyzed = true;
				5971	}
				5972
				5973
				5974	void Analysis::VisitEnd(EndNode* that) {
				5975	// nothing to do
				5976	}
				5977
				5978
				5979	void TextNode::CalculateOffsets() {
				5980	int element_count = elements()->length();
				5981	// Set up the offsets of the elements relative to the start. This is a fixed
				5982	// quantity since a TextNode can only contain fixed-width things.
				5983	int cp_offset = 0;
				5984	for (int i = 0; i < element_count; i++) {
				5985	TextElement& elm = elements()->at(i);
				5986	elm.set_cp_offset(cp_offset);
				5987	cp_offset += elm.length();
				5988	}
				5989	}
				5990
				5991
				5992	void Analysis::VisitText(TextNode* that) {
				5993	if (ignore_case_) {
				5994	that->MakeCaseIndependent(isolate(), is_one_byte_);
				5995	}
				5996	EnsureAnalyzed(that->on_success());
				5997	if (!has_failed()) {
				5998	that->CalculateOffsets();
				5999	}
				6000	}
				6001
				6002
				6003	void Analysis::VisitAction(ActionNode* that) {
				6004	RegExpNode* target = that->on_success();
				6005	EnsureAnalyzed(target);
				6006	if (!has_failed()) {
				6007	// If the next node is interested in what it follows then this node
				6008	// has to be interested too so it can pass the information on.
				6009	that->info()->AddFromFollowing(target->info());
				6010	}
				6011	}
				6012
				6013
				6014	void Analysis::VisitChoice(ChoiceNode* that) {
				6015	NodeInfo* info = that->info();
				6016	for (int i = 0; i < that->alternatives()->length(); i++) {
				6017	RegExpNode* node = that->alternatives()->at(i).node();
				6018	EnsureAnalyzed(node);
				6019	if (has_failed()) return;
				6020	// Anything the following nodes need to know has to be known by
				6021	// this node also, so it can pass it on.
				6022	info->AddFromFollowing(node->info());
				6023	}
				6024	}
				6025
				6026
				6027	void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
				6028	NodeInfo* info = that->info();
				6029	for (int i = 0; i < that->alternatives()->length(); i++) {
				6030	RegExpNode* node = that->alternatives()->at(i).node();
				6031	if (node != that->loop_node()) {
				6032	EnsureAnalyzed(node);
				6033	if (has_failed()) return;
				6034	info->AddFromFollowing(node->info());
				6035	}
				6036	}
				6037	// Check the loop last since it may need the value of this node
				6038	// to get a correct result.
				6039	EnsureAnalyzed(that->loop_node());
				6040	if (!has_failed()) {
				6041	info->AddFromFollowing(that->loop_node()->info());
				6042	}
				6043	}
				6044
				6045
				6046	void Analysis::VisitBackReference(BackReferenceNode* that) {
				6047	EnsureAnalyzed(that->on_success());
				6048	}
				6049
				6050
				6051	void Analysis::VisitAssertion(AssertionNode* that) {
				6052	EnsureAnalyzed(that->on_success());
				6053	}
				6054
				6055
				6056	void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
				6057	BoyerMooreLookahead* bm,
				6058	bool not_at_start) {
				6059	// Working out the set of characters that a backreference can match is too
				6060	// hard, so we just say that any character can match.
				6061	bm->SetRest(offset);
				6062	SaveBMInfo(bm, not_at_start, offset);
				6063	}
				6064
				6065
				6066	STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
				6067	RegExpMacroAssembler::kTableSize);
				6068
				6069
				6070	void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
				6071	BoyerMooreLookahead* bm, bool not_at_start) {
				6072	ZoneList<GuardedAlternative>* alts = alternatives();
				6073	budget = (budget - 1) / alts->length();
				6074	for (int i = 0; i < alts->length(); i++) {
				6075	GuardedAlternative& alt = alts->at(i);
				6076	if (alt.guards() != NULL && alt.guards()->length() != 0) {
				6077	bm->SetRest(offset); // Give up trying to fill in info.
				6078	SaveBMInfo(bm, not_at_start, offset);
				6079	return;
				6080	}
				6081	alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
				6082	}
				6083	SaveBMInfo(bm, not_at_start, offset);
				6084	}
				6085
				6086
				6087	void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
				6088	BoyerMooreLookahead* bm, bool not_at_start) {
				6089	if (initial_offset >= bm->length()) return;
				6090	int offset = initial_offset;
				6091	int max_char = bm->max_char();
				6092	for (int i = 0; i < elements()->length(); i++) {
				6093	if (offset >= bm->length()) {
				6094	if (initial_offset == 0) set_bm_info(not_at_start, bm);
				6095	return;
				6096	}
				6097	TextElement text = elements()->at(i);
				6098	if (text.text_type() == TextElement::ATOM) {
				6099	RegExpAtom* atom = text.atom();
				6100	for (int j = 0; j < atom->length(); j++, offset++) {
				6101	if (offset >= bm->length()) {
				6102	if (initial_offset == 0) set_bm_info(not_at_start, bm);
				6103	return;
				6104	}
				6105	uc16 character = atom->data()[j];
				6106	if (bm->compiler()->ignore_case()) {
				6107	unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
				6108	int length = GetCaseIndependentLetters(
				6109	isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
				6110	chars);
				6111	for (int j = 0; j < length; j++) {
				6112	bm->Set(offset, chars[j]);
				6113	}
				6114	} else {
				6115	if (character <= max_char) bm->Set(offset, character);
				6116	}
				6117	}
				6118	} else {
				6119	DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type());
				6120	RegExpCharacterClass* char_class = text.char_class();
				6121	ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
				6122	if (char_class->is_negated()) {
				6123	bm->SetAll(offset);
				6124	} else {
				6125	for (int k = 0; k < ranges->length(); k++) {
				6126	CharacterRange& range = ranges->at(k);
				6127	if (range.from() > max_char) continue;
				6128	int to = Min(max_char, static_cast<int>(range.to()));
				6129	bm->SetInterval(offset, Interval(range.from(), to));
				6130	}
				6131	}
				6132	offset++;
				6133	}
				6134	}
				6135	if (offset >= bm->length()) {
				6136	if (initial_offset == 0) set_bm_info(not_at_start, bm);
				6137	return;
				6138	}
				6139	on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
				6140	true); // Not at start after a text node.
				6141	if (initial_offset == 0) set_bm_info(not_at_start, bm);
				6142	}
				6143
				6144
				6145	// -------------------------------------------------------------------
				6146	// Dispatch table construction
				6147
				6148
				6149	void DispatchTableConstructor::VisitEnd(EndNode* that) {
				6150	AddRange(CharacterRange::Everything());
				6151	}
				6152
				6153
				6154	void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
				6155	node->set_being_calculated(true);
				6156	ZoneList<GuardedAlternative>* alternatives = node->alternatives();
				6157	for (int i = 0; i < alternatives->length(); i++) {
				6158	set_choice_index(i);
				6159	alternatives->at(i).node()->Accept(this);
				6160	}
				6161	node->set_being_calculated(false);
				6162	}
				6163
				6164
				6165	class AddDispatchRange {
				6166	public:
				6167	explicit AddDispatchRange(DispatchTableConstructor* constructor)
				6168	: constructor_(constructor) { }
				6169	void Call(uc32 from, DispatchTable::Entry entry);
				6170	private:
				6171	DispatchTableConstructor* constructor_;
				6172	};
				6173
				6174
				6175	void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {
				6176	CharacterRange range(from, entry.to());
				6177	constructor_->AddRange(range);
				6178	}
				6179
				6180
				6181	void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
				6182	if (node->being_calculated())
				6183	return;
				6184	DispatchTable* table = node->GetTable(ignore_case_);
				6185	AddDispatchRange adder(this);
				6186	table->ForEach(&adder);
				6187	}
				6188
				6189
				6190	void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
				6191	// TODO(160): Find the node that we refer back to and propagate its start
				6192	// set back to here. For now we just accept anything.
				6193	AddRange(CharacterRange::Everything());
				6194	}
				6195
				6196
				6197	void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
				6198	RegExpNode* target = that->on_success();
				6199	target->Accept(this);
				6200	}
				6201
				6202
				6203	static int CompareRangeByFrom(const CharacterRange* a,
				6204	const CharacterRange* b) {
				6205	return Compare<uc16>(a->from(), b->from());
				6206	}
				6207
				6208
				6209	void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
				6210	ranges->Sort(CompareRangeByFrom);
				6211	uc16 last = 0;
				6212	for (int i = 0; i < ranges->length(); i++) {
				6213	CharacterRange range = ranges->at(i);
				6214	if (last < range.from())
				6215	AddRange(CharacterRange(last, range.from() - 1));
				6216	if (range.to() >= last) {
				6217	if (range.to() == String::kMaxUtf16CodeUnit) {
				6218	return;
				6219	} else {
				6220	last = range.to() + 1;
				6221	}
				6222	}
				6223	}
				6224	AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit));
				6225	}
				6226
				6227
				6228	void DispatchTableConstructor::VisitText(TextNode* that) {
				6229	TextElement elm = that->elements()->at(0);
				6230	switch (elm.text_type()) {
				6231	case TextElement::ATOM: {
				6232	uc16 c = elm.atom()->data()[0];
				6233	AddRange(CharacterRange(c, c));
				6234	break;
				6235	}
				6236	case TextElement::CHAR_CLASS: {
				6237	RegExpCharacterClass* tree = elm.char_class();
				6238	ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
				6239	if (tree->is_negated()) {
				6240	AddInverse(ranges);
				6241	} else {
				6242	for (int i = 0; i < ranges->length(); i++)
				6243	AddRange(ranges->at(i));
				6244	}
				6245	break;
				6246	}
				6247	default: {
				6248	UNIMPLEMENTED();
				6249	}
				6250	}
				6251	}
				6252
				6253
				6254	void DispatchTableConstructor::VisitAction(ActionNode* that) {
				6255	RegExpNode* target = that->on_success();
				6256	target->Accept(this);
				6257	}
				6258
				6259
				6260	RegExpEngine::CompilationResult RegExpEngine::Compile(
				6261	Isolate* isolate, Zone* zone, RegExpCompileData* data, bool ignore_case,
				6262	bool is_global, bool is_multiline, bool is_sticky, Handle<String> pattern,
				6263	Handle<String> sample_subject, bool is_one_byte) {
				6264	if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
				6265	return IrregexpRegExpTooBig(isolate);
				6266	}
				6267	RegExpCompiler compiler(isolate, zone, data->capture_count, ignore_case,
				6268	is_one_byte);
				6269
				6270	if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
				6271
				6272	// Sample some characters from the middle of the string.
				6273	static const int kSampleSize = 128;
				6274
				6275	sample_subject = String::Flatten(sample_subject);
				6276	int chars_sampled = 0;
				6277	int half_way = (sample_subject->length() - kSampleSize) / 2;
				6278	for (int i = Max(0, half_way);
				6279	i < sample_subject->length() && chars_sampled < kSampleSize;
				6280	i++, chars_sampled++) {
				6281	compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
				6282	}
				6283
				6284	// Wrap the body of the regexp in capture #0.
				6285	RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
				6286	0,
				6287	&compiler,
				6288	compiler.accept());
				6289	RegExpNode* node = captured_body;
				6290	bool is_end_anchored = data->tree->IsAnchoredAtEnd();
				6291	bool is_start_anchored = data->tree->IsAnchoredAtStart();
				6292	int max_length = data->tree->max_match();
				6293	if (!is_start_anchored && !is_sticky) {
				6294	// Add a .*? at the beginning, outside the body capture, unless
				6295	// this expression is anchored at the beginning or sticky.
				6296	RegExpNode* loop_node = RegExpQuantifier::ToNode(
				6297	0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
				6298	&compiler, captured_body, data->contains_anchor);
				6299
				6300	if (data->contains_anchor) {
				6301	// Unroll loop once, to take care of the case that might start
				6302	// at the start of input.
				6303	ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
				6304	first_step_node->AddAlternative(GuardedAlternative(captured_body));
				6305	first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
				6306	new (zone) RegExpCharacterClass('*'), false, loop_node)));
				6307	node = first_step_node;
				6308	} else {
				6309	node = loop_node;
				6310	}
				6311	}
				6312	if (is_one_byte) {
				6313	node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
				6314	// Do it again to propagate the new nodes to places where they were not
				6315	// put because they had not been calculated yet.
				6316	if (node != NULL) {
				6317	node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
				6318	}
				6319	}
				6320
				6321	if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
				6322	data->node = node;
				6323	Analysis analysis(isolate, ignore_case, is_one_byte);
				6324	analysis.EnsureAnalyzed(node);
				6325	if (analysis.has_failed()) {
				6326	const char* error_message = analysis.error_message();
				6327	return CompilationResult(isolate, error_message);
				6328	}
				6329
				6330	// Create the correct assembler for the architecture.
				6331	#ifndef V8_INTERPRETED_REGEXP
				6332	// Native regexp implementation.
				6333
				6334	NativeRegExpMacroAssembler::Mode mode =
				6335	is_one_byte ? NativeRegExpMacroAssembler::LATIN1
				6336	: NativeRegExpMacroAssembler::UC16;
				6337
				6338	#if V8_TARGET_ARCH_IA32
				6339	RegExpMacroAssemblerIA32 macro_assembler(isolate, zone, mode,
				6340	(data->capture_count + 1) * 2);
				6341	#elif V8_TARGET_ARCH_X64
				6342	RegExpMacroAssemblerX64 macro_assembler(isolate, zone, mode,
				6343	(data->capture_count + 1) * 2);
				6344	#elif V8_TARGET_ARCH_ARM
				6345	RegExpMacroAssemblerARM macro_assembler(isolate, zone, mode,
				6346	(data->capture_count + 1) * 2);
				6347	#elif V8_TARGET_ARCH_ARM64
				6348	RegExpMacroAssemblerARM64 macro_assembler(isolate, zone, mode,
				6349	(data->capture_count + 1) * 2);
				6350	#elif V8_TARGET_ARCH_PPC
				6351	RegExpMacroAssemblerPPC macro_assembler(isolate, zone, mode,
				6352	(data->capture_count + 1) * 2);
				6353	#elif V8_TARGET_ARCH_MIPS
				6354	RegExpMacroAssemblerMIPS macro_assembler(isolate, zone, mode,
				6355	(data->capture_count + 1) * 2);
				6356	#elif V8_TARGET_ARCH_MIPS64
				6357	RegExpMacroAssemblerMIPS macro_assembler(isolate, zone, mode,
				6358	(data->capture_count + 1) * 2);
				6359	#elif V8_TARGET_ARCH_X87
				6360	RegExpMacroAssemblerX87 macro_assembler(isolate, zone, mode,
				6361	(data->capture_count + 1) * 2);
				6362	#else
				6363	#error "Unsupported architecture"
				6364	#endif
				6365
				6366	#else // V8_INTERPRETED_REGEXP
				6367	// Interpreted regexp implementation.
				6368	EmbeddedVector<byte, 1024> codes;
				6369	RegExpMacroAssemblerIrregexp macro_assembler(isolate, codes, zone);
				6370	#endif // V8_INTERPRETED_REGEXP
				6371
				6372	macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern));
				6373
				6374	// Inserted here, instead of in Assembler, because it depends on information
				6375	// in the AST that isn't replicated in the Node structure.
				6376	static const int kMaxBacksearchLimit = 1024;
				6377	if (is_end_anchored &&
				6378	!is_start_anchored &&
				6379	max_length < kMaxBacksearchLimit) {
				6380	macro_assembler.SetCurrentPositionFromEnd(max_length);
				6381	}
				6382
				6383	if (is_global) {
				6384	macro_assembler.set_global_mode(
				6385	(data->tree->min_match() > 0)
				6386	? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
				6387	: RegExpMacroAssembler::GLOBAL);
				6388	}
				6389
				6390	return compiler.Assemble(&macro_assembler,
				6391	node,
				6392	data->capture_count,
				6393	pattern);
				6394	}
				6395
				6396
				6397	bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
				6398	Heap* heap = pattern->GetHeap();
				6399	bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
				6400	if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit &&
				6401	heap->isolate()->memory_allocator()->SizeExecutable() >
				6402	RegExpImpl::kRegExpExecutableMemoryLimit) {
				6403	too_much = true;
				6404	}
				6405	return too_much;
				6406	}
				6407
				6408
				6409	Object* RegExpResultsCache::Lookup(Heap* heap, String* key_string,
				6410	Object* key_pattern,
				6411	FixedArray** last_match_cache,
				6412	ResultsCacheType type) {
				6413	FixedArray* cache;
				6414	if (!key_string->IsInternalizedString()) return Smi::FromInt(0);
				6415	if (type == STRING_SPLIT_SUBSTRINGS) {
				6416	DCHECK(key_pattern->IsString());
				6417	if (!key_pattern->IsInternalizedString()) return Smi::FromInt(0);
				6418	cache = heap->string_split_cache();
				6419	} else {
				6420	DCHECK(type == REGEXP_MULTIPLE_INDICES);
				6421	DCHECK(key_pattern->IsFixedArray());
				6422	cache = heap->regexp_multiple_cache();
				6423	}
				6424
				6425	uint32_t hash = key_string->Hash();
				6426	uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
				6427	~(kArrayEntriesPerCacheEntry - 1));
				6428	if (cache->get(index + kStringOffset) != key_string \|\|
				6429	cache->get(index + kPatternOffset) != key_pattern) {
				6430	index =
				6431	((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
				6432	if (cache->get(index + kStringOffset) != key_string \|\|
				6433	cache->get(index + kPatternOffset) != key_pattern) {
				6434	return Smi::FromInt(0);
				6435	}
				6436	}
				6437
				6438	*last_match_cache = FixedArray::cast(cache->get(index + kLastMatchOffset));
				6439	return cache->get(index + kArrayOffset);
				6440	}
				6441
				6442
				6443	void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string,
				6444	Handle<Object> key_pattern,
				6445	Handle<FixedArray> value_array,
				6446	Handle<FixedArray> last_match_cache,
				6447	ResultsCacheType type) {
				6448	Factory* factory = isolate->factory();
				6449	Handle<FixedArray> cache;
				6450	if (!key_string->IsInternalizedString()) return;
				6451	if (type == STRING_SPLIT_SUBSTRINGS) {
				6452	DCHECK(key_pattern->IsString());
				6453	if (!key_pattern->IsInternalizedString()) return;
				6454	cache = factory->string_split_cache();
				6455	} else {
				6456	DCHECK(type == REGEXP_MULTIPLE_INDICES);
				6457	DCHECK(key_pattern->IsFixedArray());
				6458	cache = factory->regexp_multiple_cache();
				6459	}
				6460
				6461	uint32_t hash = key_string->Hash();
				6462	uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
				6463	~(kArrayEntriesPerCacheEntry - 1));
				6464	if (cache->get(index + kStringOffset) == Smi::FromInt(0)) {
				6465	cache->set(index + kStringOffset, *key_string);
				6466	cache->set(index + kPatternOffset, *key_pattern);
				6467	cache->set(index + kArrayOffset, *value_array);
				6468	cache->set(index + kLastMatchOffset, *last_match_cache);
				6469	} else {
				6470	uint32_t index2 =
				6471	((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
				6472	if (cache->get(index2 + kStringOffset) == Smi::FromInt(0)) {
				6473	cache->set(index2 + kStringOffset, *key_string);
				6474	cache->set(index2 + kPatternOffset, *key_pattern);
				6475	cache->set(index2 + kArrayOffset, *value_array);
				6476	cache->set(index2 + kLastMatchOffset, *last_match_cache);
				6477	} else {
				6478	cache->set(index2 + kStringOffset, Smi::FromInt(0));
				6479	cache->set(index2 + kPatternOffset, Smi::FromInt(0));
				6480	cache->set(index2 + kArrayOffset, Smi::FromInt(0));
				6481	cache->set(index2 + kLastMatchOffset, Smi::FromInt(0));
				6482	cache->set(index + kStringOffset, *key_string);
				6483	cache->set(index + kPatternOffset, *key_pattern);
				6484	cache->set(index + kArrayOffset, *value_array);
				6485	cache->set(index + kLastMatchOffset, *last_match_cache);
				6486	}
				6487	}
				6488	// If the array is a reasonably short list of substrings, convert it into a
				6489	// list of internalized strings.
				6490	if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) {
				6491	for (int i = 0; i < value_array->length(); i++) {
				6492	Handle<String> str(String::cast(value_array->get(i)), isolate);
				6493	Handle<String> internalized_str = factory->InternalizeString(str);
				6494	value_array->set(i, *internalized_str);
				6495	}
				6496	}
				6497	// Convert backing store to a copy-on-write array.
				6498	value_array->set_map_no_write_barrier(*factory->fixed_cow_array_map());
				6499	}
				6500
				6501
				6502	void RegExpResultsCache::Clear(FixedArray* cache) {
				6503	for (int i = 0; i < kRegExpResultsCacheSize; i++) {
				6504	cache->set(i, Smi::FromInt(0));
				6505	}
				6506	}
				6507
				6508	} // namespace internal
				6509	} // namespace v8