Blame - src/exec.rs - platform/external/rust/crates/regex

blob: acca2dccb66f4d2ee547fe4a5f0c738ce2310a43 [file] [log] [blame]

Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame^]	1	use std::cell::RefCell;
				2	use std::collections::HashMap;
				3	use std::sync::Arc;
				4
				5	#[cfg(feature = "perf-literal")]
				6	use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
				7	use syntax::hir::literal::Literals;
				8	use syntax::hir::Hir;
				9	use syntax::ParserBuilder;
				10
				11	use backtrack;
				12	use cache::{Cached, CachedGuard};
				13	use compile::Compiler;
				14	#[cfg(feature = "perf-dfa")]
				15	use dfa;
				16	use error::Error;
				17	use input::{ByteInput, CharInput};
				18	use literal::LiteralSearcher;
				19	use pikevm;
				20	use prog::Program;
				21	use re_builder::RegexOptions;
				22	use re_bytes;
				23	use re_set;
				24	use re_trait::{Locations, RegularExpression, Slot};
				25	use re_unicode;
				26	use utf8::next_utf8;
				27
				28	/// `Exec` manages the execution of a regular expression.
				29	///
				30	/// In particular, this manages the various compiled forms of a single regular
				31	/// expression and the choice of which matching engine to use to execute a
				32	/// regular expression.
				33	pub struct Exec {
				34	/// All read only state.
				35	ro: Arc<ExecReadOnly>,
				36	/// Caches for the various matching engines.
				37	cache: Cached<ProgramCache>,
				38	}
				39
				40	/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
				41	/// means it is no longer Sync, but we can now avoid the overhead of
				42	/// synchronization to fetch the cache.
				43	#[derive(Debug)]
				44	pub struct ExecNoSync<'c> {
				45	/// All read only state.
				46	ro: &'c Arc<ExecReadOnly>,
				47	/// Caches for the various matching engines.
				48	cache: CachedGuard<'c, ProgramCache>,
				49	}
				50
				51	/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
				52	pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
				53
				54	/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
				55	/// state is determined at compile time and never changes during search.
				56	#[derive(Debug)]
				57	struct ExecReadOnly {
				58	/// The original regular expressions given by the caller to compile.
				59	res: Vec<String>,
				60	/// A compiled program that is used in the NFA simulation and backtracking.
				61	/// It can be byte-based or Unicode codepoint based.
				62	///
				63	/// N.B. It is not possibly to make this byte-based from the public API.
				64	/// It is only used for testing byte based programs in the NFA simulations.
				65	nfa: Program,
				66	/// A compiled byte based program for DFA execution. This is only used
				67	/// if a DFA can be executed. (Currently, only word boundary assertions are
				68	/// not supported.) Note that this program contains an embedded `.*?`
				69	/// preceding the first capture group, unless the regex is anchored at the
				70	/// beginning.
				71	dfa: Program,
				72	/// The same as above, except the program is reversed (and there is no
				73	/// preceding `.*?`). This is used by the DFA to find the starting location
				74	/// of matches.
				75	dfa_reverse: Program,
				76	/// A set of suffix literals extracted from the regex.
				77	///
				78	/// Prefix literals are stored on the `Program`, since they are used inside
				79	/// the matching engines.
				80	suffixes: LiteralSearcher,
				81	/// An Aho-Corasick automaton with leftmost-first match semantics.
				82	///
				83	/// This is only set when the entire regex is a simple unanchored
				84	/// alternation of literals. We could probably use it more circumstances,
				85	/// but this is already hacky enough in this architecture.
				86	///
				87	/// N.B. We use u32 as a state ID representation under the assumption that
				88	/// if we were to exhaust the ID space, we probably would have long
				89	/// surpassed the compilation size limit.
				90	#[cfg(feature = "perf-literal")]
				91	ac: Option<AhoCorasick<u32>>,
				92	/// match_type encodes as much upfront knowledge about how we're going to
				93	/// execute a search as possible.
				94	match_type: MatchType,
				95	}
				96
				97	/// Facilitates the construction of an executor by exposing various knobs
				98	/// to control how a regex is executed and what kinds of resources it's
				99	/// permitted to use.
				100	pub struct ExecBuilder {
				101	options: RegexOptions,
				102	match_type: Option<MatchType>,
				103	bytes: bool,
				104	only_utf8: bool,
				105	}
				106
				107	/// Parsed represents a set of parsed regular expressions and their detected
				108	/// literals.
				109	struct Parsed {
				110	exprs: Vec<Hir>,
				111	prefixes: Literals,
				112	suffixes: Literals,
				113	bytes: bool,
				114	}
				115
				116	impl ExecBuilder {
				117	/// Create a regex execution builder.
				118	///
				119	/// This uses default settings for everything except the regex itself,
				120	/// which must be provided. Further knobs can be set by calling methods,
				121	/// and then finally, `build` to actually create the executor.
				122	pub fn new(re: &str) -> Self {
				123	Self::new_many(&[re])
				124	}
				125
				126	/// Like new, but compiles the union of the given regular expressions.
				127	///
				128	/// Note that when compiling 2 or more regular expressions, capture groups
				129	/// are completely unsupported. (This means both `find` and `captures`
				130	/// wont work.)
				131	pub fn new_many<I, S>(res: I) -> Self
				132	where
				133	S: AsRef<str>,
				134	I: IntoIterator<Item = S>,
				135	{
				136	let mut opts = RegexOptions::default();
				137	opts.pats = res.into_iter().map(\|s\| s.as_ref().to_owned()).collect();
				138	Self::new_options(opts)
				139	}
				140
				141	/// Create a regex execution builder.
				142	pub fn new_options(opts: RegexOptions) -> Self {
				143	ExecBuilder {
				144	options: opts,
				145	match_type: None,
				146	bytes: false,
				147	only_utf8: true,
				148	}
				149	}
				150
				151	/// Set the matching engine to be automatically determined.
				152	///
				153	/// This is the default state and will apply whatever optimizations are
				154	/// possible, such as running a DFA.
				155	///
				156	/// This overrides whatever was previously set via the `nfa` or
				157	/// `bounded_backtracking` methods.
				158	pub fn automatic(mut self) -> Self {
				159	self.match_type = None;
				160	self
				161	}
				162
				163	/// Sets the matching engine to use the NFA algorithm no matter what
				164	/// optimizations are possible.
				165	///
				166	/// This overrides whatever was previously set via the `automatic` or
				167	/// `bounded_backtracking` methods.
				168	pub fn nfa(mut self) -> Self {
				169	self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM));
				170	self
				171	}
				172
				173	/// Sets the matching engine to use a bounded backtracking engine no
				174	/// matter what optimizations are possible.
				175	///
				176	/// One must use this with care, since the bounded backtracking engine
				177	/// uses memory proportion to `len(regex) * len(text)`.
				178	///
				179	/// This overrides whatever was previously set via the `automatic` or
				180	/// `nfa` methods.
				181	pub fn bounded_backtracking(mut self) -> Self {
				182	self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack));
				183	self
				184	}
				185
				186	/// Compiles byte based programs for use with the NFA matching engines.
				187	///
				188	/// By default, the NFA engines match on Unicode scalar values. They can
				189	/// be made to use byte based programs instead. In general, the byte based
				190	/// programs are slower because of a less efficient encoding of character
				191	/// classes.
				192	///
				193	/// Note that this does not impact DFA matching engines, which always
				194	/// execute on bytes.
				195	pub fn bytes(mut self, yes: bool) -> Self {
				196	self.bytes = yes;
				197	self
				198	}
				199
				200	/// When disabled, the program compiled may match arbitrary bytes.
				201	///
				202	/// When enabled (the default), all compiled programs exclusively match
				203	/// valid UTF-8 bytes.
				204	pub fn only_utf8(mut self, yes: bool) -> Self {
				205	self.only_utf8 = yes;
				206	self
				207	}
				208
				209	/// Set the Unicode flag.
				210	pub fn unicode(mut self, yes: bool) -> Self {
				211	self.options.unicode = yes;
				212	self
				213	}
				214
				215	/// Parse the current set of patterns into their AST and extract literals.
				216	fn parse(&self) -> Result<Parsed, Error> {
				217	let mut exprs = Vec::with_capacity(self.options.pats.len());
				218	let mut prefixes = Some(Literals::empty());
				219	let mut suffixes = Some(Literals::empty());
				220	let mut bytes = false;
				221	let is_set = self.options.pats.len() > 1;
				222	// If we're compiling a regex set and that set has any anchored
				223	// expressions, then disable all literal optimizations.
				224	for pat in &self.options.pats {
				225	let mut parser = ParserBuilder::new()
				226	.octal(self.options.octal)
				227	.case_insensitive(self.options.case_insensitive)
				228	.multi_line(self.options.multi_line)
				229	.dot_matches_new_line(self.options.dot_matches_new_line)
				230	.swap_greed(self.options.swap_greed)
				231	.ignore_whitespace(self.options.ignore_whitespace)
				232	.unicode(self.options.unicode)
				233	.allow_invalid_utf8(!self.only_utf8)
				234	.nest_limit(self.options.nest_limit)
				235	.build();
				236	let expr =
				237	parser.parse(pat).map_err(\|e\| Error::Syntax(e.to_string()))?;
				238	bytes = bytes \|\| !expr.is_always_utf8();
				239
				240	if cfg!(feature = "perf-literal") {
				241	if !expr.is_anchored_start() && expr.is_any_anchored_start() {
				242	// Partial anchors unfortunately make it hard to use
				243	// prefixes, so disable them.
				244	prefixes = None;
				245	} else if is_set && expr.is_anchored_start() {
				246	// Regex sets with anchors do not go well with literal
				247	// optimizations.
				248	prefixes = None;
				249	}
				250	prefixes = prefixes.and_then(\|mut prefixes\| {
				251	if !prefixes.union_prefixes(&expr) {
				252	None
				253	} else {
				254	Some(prefixes)
				255	}
				256	});
				257
				258	if !expr.is_anchored_end() && expr.is_any_anchored_end() {
				259	// Partial anchors unfortunately make it hard to use
				260	// suffixes, so disable them.
				261	suffixes = None;
				262	} else if is_set && expr.is_anchored_end() {
				263	// Regex sets with anchors do not go well with literal
				264	// optimizations.
				265	suffixes = None;
				266	}
				267	suffixes = suffixes.and_then(\|mut suffixes\| {
				268	if !suffixes.union_suffixes(&expr) {
				269	None
				270	} else {
				271	Some(suffixes)
				272	}
				273	});
				274	}
				275	exprs.push(expr);
				276	}
				277	Ok(Parsed {
				278	exprs: exprs,
				279	prefixes: prefixes.unwrap_or_else(Literals::empty),
				280	suffixes: suffixes.unwrap_or_else(Literals::empty),
				281	bytes: bytes,
				282	})
				283	}
				284
				285	/// Build an executor that can run a regular expression.
				286	pub fn build(self) -> Result<Exec, Error> {
				287	// Special case when we have no patterns to compile.
				288	// This can happen when compiling a regex set.
				289	if self.options.pats.is_empty() {
				290	let ro = Arc::new(ExecReadOnly {
				291	res: vec![],
				292	nfa: Program::new(),
				293	dfa: Program::new(),
				294	dfa_reverse: Program::new(),
				295	suffixes: LiteralSearcher::empty(),
				296	#[cfg(feature = "perf-literal")]
				297	ac: None,
				298	match_type: MatchType::Nothing,
				299	});
				300	return Ok(Exec { ro: ro, cache: Cached::new() });
				301	}
				302	let parsed = self.parse()?;
				303	let mut nfa = Compiler::new()
				304	.size_limit(self.options.size_limit)
				305	.bytes(self.bytes \|\| parsed.bytes)
				306	.only_utf8(self.only_utf8)
				307	.compile(&parsed.exprs)?;
				308	let mut dfa = Compiler::new()
				309	.size_limit(self.options.size_limit)
				310	.dfa(true)
				311	.only_utf8(self.only_utf8)
				312	.compile(&parsed.exprs)?;
				313	let mut dfa_reverse = Compiler::new()
				314	.size_limit(self.options.size_limit)
				315	.dfa(true)
				316	.only_utf8(self.only_utf8)
				317	.reverse(true)
				318	.compile(&parsed.exprs)?;
				319
				320	#[cfg(feature = "perf-literal")]
				321	let ac = self.build_aho_corasick(&parsed);
				322	nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
				323	dfa.prefixes = nfa.prefixes.clone();
				324	dfa.dfa_size_limit = self.options.dfa_size_limit;
				325	dfa_reverse.dfa_size_limit = self.options.dfa_size_limit;
				326
				327	let mut ro = ExecReadOnly {
				328	res: self.options.pats,
				329	nfa: nfa,
				330	dfa: dfa,
				331	dfa_reverse: dfa_reverse,
				332	suffixes: LiteralSearcher::suffixes(parsed.suffixes),
				333	#[cfg(feature = "perf-literal")]
				334	ac: ac,
				335	match_type: MatchType::Nothing,
				336	};
				337	ro.match_type = ro.choose_match_type(self.match_type);
				338
				339	let ro = Arc::new(ro);
				340	Ok(Exec { ro: ro, cache: Cached::new() })
				341	}
				342
				343	#[cfg(feature = "perf-literal")]
				344	fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
				345	if parsed.exprs.len() != 1 {
				346	return None;
				347	}
				348	let lits = match alternation_literals(&parsed.exprs[0]) {
				349	None => return None,
				350	Some(lits) => lits,
				351	};
				352	// If we have a small number of literals, then let Teddy handle
				353	// things (see literal/mod.rs).
				354	if lits.len() <= 32 {
				355	return None;
				356	}
				357	Some(
				358	AhoCorasickBuilder::new()
				359	.match_kind(MatchKind::LeftmostFirst)
				360	.auto_configure(&lits)
				361	// We always want this to reduce size, regardless
				362	// of what auto-configure does.
				363	.byte_classes(true)
				364	.build_with_size::<u32, _, _>(&lits)
				365	// This should never happen because we'd long exceed the
				366	// compilation limit for regexes first.
				367	.expect("AC automaton too big"),
				368	)
				369	}
				370	}
				371
				372	impl<'c> RegularExpression for ExecNoSyncStr<'c> {
				373	type Text = str;
				374
				375	fn slots_len(&self) -> usize {
				376	self.0.slots_len()
				377	}
				378
				379	fn next_after_empty(&self, text: &str, i: usize) -> usize {
				380	next_utf8(text.as_bytes(), i)
				381	}
				382
				383	#[cfg_attr(feature = "perf-inline", inline(always))]
				384	fn shortest_match_at(&self, text: &str, start: usize) -> Option<usize> {
				385	self.0.shortest_match_at(text.as_bytes(), start)
				386	}
				387
				388	#[cfg_attr(feature = "perf-inline", inline(always))]
				389	fn is_match_at(&self, text: &str, start: usize) -> bool {
				390	self.0.is_match_at(text.as_bytes(), start)
				391	}
				392
				393	#[cfg_attr(feature = "perf-inline", inline(always))]
				394	fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
				395	self.0.find_at(text.as_bytes(), start)
				396	}
				397
				398	#[cfg_attr(feature = "perf-inline", inline(always))]
				399	fn captures_read_at(
				400	&self,
				401	locs: &mut Locations,
				402	text: &str,
				403	start: usize,
				404	) -> Option<(usize, usize)> {
				405	self.0.captures_read_at(locs, text.as_bytes(), start)
				406	}
				407	}
				408
				409	impl<'c> RegularExpression for ExecNoSync<'c> {
				410	type Text = [u8];
				411
				412	/// Returns the number of capture slots in the regular expression. (There
				413	/// are two slots for every capture group, corresponding to possibly empty
				414	/// start and end locations of the capture.)
				415	fn slots_len(&self) -> usize {
				416	self.ro.nfa.captures.len() * 2
				417	}
				418
				419	fn next_after_empty(&self, _text: &[u8], i: usize) -> usize {
				420	i + 1
				421	}
				422
				423	/// Returns the end of a match location, possibly occurring before the
				424	/// end location of the correct leftmost-first match.
				425	#[cfg_attr(feature = "perf-inline", inline(always))]
				426	fn shortest_match_at(&self, text: &[u8], start: usize) -> Option<usize> {
				427	if !self.is_anchor_end_match(text) {
				428	return None;
				429	}
				430	match self.ro.match_type {
				431	#[cfg(feature = "perf-literal")]
				432	MatchType::Literal(ty) => {
				433	self.find_literals(ty, text, start).map(\|(_, e)\| e)
				434	}
				435	#[cfg(feature = "perf-dfa")]
				436	MatchType::Dfa \| MatchType::DfaMany => {
				437	match self.shortest_dfa(text, start) {
				438	dfa::Result::Match(end) => Some(end),
				439	dfa::Result::NoMatch(_) => None,
				440	dfa::Result::Quit => self.shortest_nfa(text, start),
				441	}
				442	}
				443	#[cfg(feature = "perf-dfa")]
				444	MatchType::DfaAnchoredReverse => {
				445	match dfa::Fsm::reverse(
				446	&self.ro.dfa_reverse,
				447	self.cache.value(),
				448	true,
				449	&text[start..],
				450	text.len(),
				451	) {
				452	dfa::Result::Match(_) => Some(text.len()),
				453	dfa::Result::NoMatch(_) => None,
				454	dfa::Result::Quit => self.shortest_nfa(text, start),
				455	}
				456	}
				457	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				458	MatchType::DfaSuffix => {
				459	match self.shortest_dfa_reverse_suffix(text, start) {
				460	dfa::Result::Match(e) => Some(e),
				461	dfa::Result::NoMatch(_) => None,
				462	dfa::Result::Quit => self.shortest_nfa(text, start),
				463	}
				464	}
				465	MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start),
				466	MatchType::Nothing => None,
				467	}
				468	}
				469
				470	/// Returns true if and only if the regex matches text.
				471	///
				472	/// For single regular expressions, this is equivalent to calling
				473	/// shortest_match(...).is_some().
				474	#[cfg_attr(feature = "perf-inline", inline(always))]
				475	fn is_match_at(&self, text: &[u8], start: usize) -> bool {
				476	if !self.is_anchor_end_match(text) {
				477	return false;
				478	}
				479	// We need to do this dance because shortest_match relies on the NFA
				480	// filling in captures[1], but a RegexSet has no captures. In other
				481	// words, a RegexSet can't (currently) use shortest_match. ---AG
				482	match self.ro.match_type {
				483	#[cfg(feature = "perf-literal")]
				484	MatchType::Literal(ty) => {
				485	self.find_literals(ty, text, start).is_some()
				486	}
				487	#[cfg(feature = "perf-dfa")]
				488	MatchType::Dfa \| MatchType::DfaMany => {
				489	match self.shortest_dfa(text, start) {
				490	dfa::Result::Match(_) => true,
				491	dfa::Result::NoMatch(_) => false,
				492	dfa::Result::Quit => self.match_nfa(text, start),
				493	}
				494	}
				495	#[cfg(feature = "perf-dfa")]
				496	MatchType::DfaAnchoredReverse => {
				497	match dfa::Fsm::reverse(
				498	&self.ro.dfa_reverse,
				499	self.cache.value(),
				500	true,
				501	&text[start..],
				502	text.len(),
				503	) {
				504	dfa::Result::Match(_) => true,
				505	dfa::Result::NoMatch(_) => false,
				506	dfa::Result::Quit => self.match_nfa(text, start),
				507	}
				508	}
				509	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				510	MatchType::DfaSuffix => {
				511	match self.shortest_dfa_reverse_suffix(text, start) {
				512	dfa::Result::Match(_) => true,
				513	dfa::Result::NoMatch(_) => false,
				514	dfa::Result::Quit => self.match_nfa(text, start),
				515	}
				516	}
				517	MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start),
				518	MatchType::Nothing => false,
				519	}
				520	}
				521
				522	/// Finds the start and end location of the leftmost-first match, starting
				523	/// at the given location.
				524	#[cfg_attr(feature = "perf-inline", inline(always))]
				525	fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> {
				526	if !self.is_anchor_end_match(text) {
				527	return None;
				528	}
				529	match self.ro.match_type {
				530	#[cfg(feature = "perf-literal")]
				531	MatchType::Literal(ty) => self.find_literals(ty, text, start),
				532	#[cfg(feature = "perf-dfa")]
				533	MatchType::Dfa => match self.find_dfa_forward(text, start) {
				534	dfa::Result::Match((s, e)) => Some((s, e)),
				535	dfa::Result::NoMatch(_) => None,
				536	dfa::Result::Quit => {
				537	self.find_nfa(MatchNfaType::Auto, text, start)
				538	}
				539	},
				540	#[cfg(feature = "perf-dfa")]
				541	MatchType::DfaAnchoredReverse => {
				542	match self.find_dfa_anchored_reverse(text, start) {
				543	dfa::Result::Match((s, e)) => Some((s, e)),
				544	dfa::Result::NoMatch(_) => None,
				545	dfa::Result::Quit => {
				546	self.find_nfa(MatchNfaType::Auto, text, start)
				547	}
				548	}
				549	}
				550	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				551	MatchType::DfaSuffix => {
				552	match self.find_dfa_reverse_suffix(text, start) {
				553	dfa::Result::Match((s, e)) => Some((s, e)),
				554	dfa::Result::NoMatch(_) => None,
				555	dfa::Result::Quit => {
				556	self.find_nfa(MatchNfaType::Auto, text, start)
				557	}
				558	}
				559	}
				560	MatchType::Nfa(ty) => self.find_nfa(ty, text, start),
				561	MatchType::Nothing => None,
				562	#[cfg(feature = "perf-dfa")]
				563	MatchType::DfaMany => {
				564	unreachable!("BUG: RegexSet cannot be used with find")
				565	}
				566	}
				567	}
				568
				569	/// Finds the start and end location of the leftmost-first match and also
				570	/// fills in all matching capture groups.
				571	///
				572	/// The number of capture slots given should be equal to the total number
				573	/// of capture slots in the compiled program.
				574	///
				575	/// Note that the first two slots always correspond to the start and end
				576	/// locations of the overall match.
				577	fn captures_read_at(
				578	&self,
				579	locs: &mut Locations,
				580	text: &[u8],
				581	start: usize,
				582	) -> Option<(usize, usize)> {
				583	let slots = locs.as_slots();
				584	for slot in slots.iter_mut() {
				585	*slot = None;
				586	}
				587	// If the caller unnecessarily uses this, then we try to save them
				588	// from themselves.
				589	match slots.len() {
				590	0 => return self.find_at(text, start),
				591	2 => {
				592	return self.find_at(text, start).map(\|(s, e)\| {
				593	slots[0] = Some(s);
				594	slots[1] = Some(e);
				595	(s, e)
				596	});
				597	}
				598	_ => {} // fallthrough
				599	}
				600	if !self.is_anchor_end_match(text) {
				601	return None;
				602	}
				603	match self.ro.match_type {
				604	#[cfg(feature = "perf-literal")]
				605	MatchType::Literal(ty) => {
				606	self.find_literals(ty, text, start).and_then(\|(s, e)\| {
				607	self.captures_nfa_type(
				608	MatchNfaType::Auto,
				609	slots,
				610	text,
				611	s,
				612	e,
				613	)
				614	})
				615	}
				616	#[cfg(feature = "perf-dfa")]
				617	MatchType::Dfa => {
				618	if self.ro.nfa.is_anchored_start {
				619	self.captures_nfa(slots, text, start)
				620	} else {
				621	match self.find_dfa_forward(text, start) {
				622	dfa::Result::Match((s, e)) => self.captures_nfa_type(
				623	MatchNfaType::Auto,
				624	slots,
				625	text,
				626	s,
				627	e,
				628	),
				629	dfa::Result::NoMatch(_) => None,
				630	dfa::Result::Quit => {
				631	self.captures_nfa(slots, text, start)
				632	}
				633	}
				634	}
				635	}
				636	#[cfg(feature = "perf-dfa")]
				637	MatchType::DfaAnchoredReverse => {
				638	match self.find_dfa_anchored_reverse(text, start) {
				639	dfa::Result::Match((s, e)) => self.captures_nfa_type(
				640	MatchNfaType::Auto,
				641	slots,
				642	text,
				643	s,
				644	e,
				645	),
				646	dfa::Result::NoMatch(_) => None,
				647	dfa::Result::Quit => self.captures_nfa(slots, text, start),
				648	}
				649	}
				650	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				651	MatchType::DfaSuffix => {
				652	match self.find_dfa_reverse_suffix(text, start) {
				653	dfa::Result::Match((s, e)) => self.captures_nfa_type(
				654	MatchNfaType::Auto,
				655	slots,
				656	text,
				657	s,
				658	e,
				659	),
				660	dfa::Result::NoMatch(_) => None,
				661	dfa::Result::Quit => self.captures_nfa(slots, text, start),
				662	}
				663	}
				664	MatchType::Nfa(ty) => {
				665	self.captures_nfa_type(ty, slots, text, start, text.len())
				666	}
				667	MatchType::Nothing => None,
				668	#[cfg(feature = "perf-dfa")]
				669	MatchType::DfaMany => {
				670	unreachable!("BUG: RegexSet cannot be used with captures")
				671	}
				672	}
				673	}
				674	}
				675
				676	impl<'c> ExecNoSync<'c> {
				677	/// Finds the leftmost-first match using only literal search.
				678	#[cfg(feature = "perf-literal")]
				679	#[cfg_attr(feature = "perf-inline", inline(always))]
				680	fn find_literals(
				681	&self,
				682	ty: MatchLiteralType,
				683	text: &[u8],
				684	start: usize,
				685	) -> Option<(usize, usize)> {
				686	use self::MatchLiteralType::*;
				687	match ty {
				688	Unanchored => {
				689	let lits = &self.ro.nfa.prefixes;
				690	lits.find(&text[start..]).map(\|(s, e)\| (start + s, start + e))
				691	}
				692	AnchoredStart => {
				693	let lits = &self.ro.nfa.prefixes;
				694	if start == 0 \|\| !self.ro.nfa.is_anchored_start {
				695	lits.find_start(&text[start..])
				696	.map(\|(s, e)\| (start + s, start + e))
				697	} else {
				698	None
				699	}
				700	}
				701	AnchoredEnd => {
				702	let lits = &self.ro.suffixes;
				703	lits.find_end(&text[start..])
				704	.map(\|(s, e)\| (start + s, start + e))
				705	}
				706	AhoCorasick => self
				707	.ro
				708	.ac
				709	.as_ref()
				710	.unwrap()
				711	.find(&text[start..])
				712	.map(\|m\| (start + m.start(), start + m.end())),
				713	}
				714	}
				715
				716	/// Finds the leftmost-first match (start and end) using only the DFA.
				717	///
				718	/// If the result returned indicates that the DFA quit, then another
				719	/// matching engine should be used.
				720	#[cfg(feature = "perf-dfa")]
				721	#[cfg_attr(feature = "perf-inline", inline(always))]
				722	fn find_dfa_forward(
				723	&self,
				724	text: &[u8],
				725	start: usize,
				726	) -> dfa::Result<(usize, usize)> {
				727	use dfa::Result::*;
				728	let end = match dfa::Fsm::forward(
				729	&self.ro.dfa,
				730	self.cache.value(),
				731	false,
				732	text,
				733	start,
				734	) {
				735	NoMatch(i) => return NoMatch(i),
				736	Quit => return Quit,
				737	Match(end) if start == end => return Match((start, start)),
				738	Match(end) => end,
				739	};
				740	// Now run the DFA in reverse to find the start of the match.
				741	match dfa::Fsm::reverse(
				742	&self.ro.dfa_reverse,
				743	self.cache.value(),
				744	false,
				745	&text[start..],
				746	end - start,
				747	) {
				748	Match(s) => Match((start + s, end)),
				749	NoMatch(i) => NoMatch(i),
				750	Quit => Quit,
				751	}
				752	}
				753
				754	/// Finds the leftmost-first match (start and end) using only the DFA,
				755	/// but assumes the regex is anchored at the end and therefore starts at
				756	/// the end of the regex and matches in reverse.
				757	///
				758	/// If the result returned indicates that the DFA quit, then another
				759	/// matching engine should be used.
				760	#[cfg(feature = "perf-dfa")]
				761	#[cfg_attr(feature = "perf-inline", inline(always))]
				762	fn find_dfa_anchored_reverse(
				763	&self,
				764	text: &[u8],
				765	start: usize,
				766	) -> dfa::Result<(usize, usize)> {
				767	use dfa::Result::*;
				768	match dfa::Fsm::reverse(
				769	&self.ro.dfa_reverse,
				770	self.cache.value(),
				771	false,
				772	&text[start..],
				773	text.len() - start,
				774	) {
				775	Match(s) => Match((start + s, text.len())),
				776	NoMatch(i) => NoMatch(i),
				777	Quit => Quit,
				778	}
				779	}
				780
				781	/// Finds the end of the shortest match using only the DFA.
				782	#[cfg(feature = "perf-dfa")]
				783	#[cfg_attr(feature = "perf-inline", inline(always))]
				784	fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
				785	dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start)
				786	}
				787
				788	/// Finds the end of the shortest match using only the DFA by scanning for
				789	/// suffix literals.
				790	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				791	#[cfg_attr(feature = "perf-inline", inline(always))]
				792	fn shortest_dfa_reverse_suffix(
				793	&self,
				794	text: &[u8],
				795	start: usize,
				796	) -> dfa::Result<usize> {
				797	match self.exec_dfa_reverse_suffix(text, start) {
				798	None => self.shortest_dfa(text, start),
				799	Some(r) => r.map(\|(_, end)\| end),
				800	}
				801	}
				802
				803	/// Finds the end of the shortest match using only the DFA by scanning for
				804	/// suffix literals. It also reports the start of the match.
				805	///
				806	/// Note that if None is returned, then the optimization gave up to avoid
				807	/// worst case quadratic behavior. A forward scanning DFA should be tried
				808	/// next.
				809	///
				810	/// If a match is returned and the full leftmost-first match is desired,
				811	/// then a forward scan starting from the beginning of the match must be
				812	/// done.
				813	///
				814	/// If the result returned indicates that the DFA quit, then another
				815	/// matching engine should be used.
				816	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				817	#[cfg_attr(feature = "perf-inline", inline(always))]
				818	fn exec_dfa_reverse_suffix(
				819	&self,
				820	text: &[u8],
				821	original_start: usize,
				822	) -> Option<dfa::Result<(usize, usize)>> {
				823	use dfa::Result::*;
				824
				825	let lcs = self.ro.suffixes.lcs();
				826	debug_assert!(lcs.len() >= 1);
				827	let mut start = original_start;
				828	let mut end = start;
				829	let mut last_literal = start;
				830	while end <= text.len() {
				831	last_literal += match lcs.find(&text[last_literal..]) {
				832	None => return Some(NoMatch(text.len())),
				833	Some(i) => i,
				834	};
				835	end = last_literal + lcs.len();
				836	match dfa::Fsm::reverse(
				837	&self.ro.dfa_reverse,
				838	self.cache.value(),
				839	false,
				840	&text[start..end],
				841	end - start,
				842	) {
				843	Match(0) \| NoMatch(0) => return None,
				844	Match(i) => return Some(Match((start + i, end))),
				845	NoMatch(i) => {
				846	start += i;
				847	last_literal += 1;
				848	continue;
				849	}
				850	Quit => return Some(Quit),
				851	};
				852	}
				853	Some(NoMatch(text.len()))
				854	}
				855
				856	/// Finds the leftmost-first match (start and end) using only the DFA
				857	/// by scanning for suffix literals.
				858	///
				859	/// If the result returned indicates that the DFA quit, then another
				860	/// matching engine should be used.
				861	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				862	#[cfg_attr(feature = "perf-inline", inline(always))]
				863	fn find_dfa_reverse_suffix(
				864	&self,
				865	text: &[u8],
				866	start: usize,
				867	) -> dfa::Result<(usize, usize)> {
				868	use dfa::Result::*;
				869
				870	let match_start = match self.exec_dfa_reverse_suffix(text, start) {
				871	None => return self.find_dfa_forward(text, start),
				872	Some(Match((start, _))) => start,
				873	Some(r) => return r,
				874	};
				875	// At this point, we've found a match. The only way to quit now
				876	// without a match is if the DFA gives up (seems unlikely).
				877	//
				878	// Now run the DFA forwards to find the proper end of the match.
				879	// (The suffix literal match can only indicate the earliest
				880	// possible end location, which may appear before the end of the
				881	// leftmost-first match.)
				882	match dfa::Fsm::forward(
				883	&self.ro.dfa,
				884	self.cache.value(),
				885	false,
				886	text,
				887	match_start,
				888	) {
				889	NoMatch(_) => panic!("BUG: reverse match implies forward match"),
				890	Quit => Quit,
				891	Match(e) => Match((match_start, e)),
				892	}
				893	}
				894
				895	/// Executes the NFA engine to return whether there is a match or not.
				896	///
				897	/// Ideally, we could use shortest_nfa(...).is_some() and get the same
				898	/// performance characteristics, but regex sets don't have captures, which
				899	/// shortest_nfa depends on.
				900	#[cfg(feature = "perf-dfa")]
				901	fn match_nfa(&self, text: &[u8], start: usize) -> bool {
				902	self.match_nfa_type(MatchNfaType::Auto, text, start)
				903	}
				904
				905	/// Like match_nfa, but allows specification of the type of NFA engine.
				906	fn match_nfa_type(
				907	&self,
				908	ty: MatchNfaType,
				909	text: &[u8],
				910	start: usize,
				911	) -> bool {
				912	self.exec_nfa(
				913	ty,
				914	&mut [false],
				915	&mut [],
				916	true,
				917	false,
				918	text,
				919	start,
				920	text.len(),
				921	)
				922	}
				923
				924	/// Finds the shortest match using an NFA.
				925	#[cfg(feature = "perf-dfa")]
				926	fn shortest_nfa(&self, text: &[u8], start: usize) -> Option<usize> {
				927	self.shortest_nfa_type(MatchNfaType::Auto, text, start)
				928	}
				929
				930	/// Like shortest_nfa, but allows specification of the type of NFA engine.
				931	fn shortest_nfa_type(
				932	&self,
				933	ty: MatchNfaType,
				934	text: &[u8],
				935	start: usize,
				936	) -> Option<usize> {
				937	let mut slots = [None, None];
				938	if self.exec_nfa(
				939	ty,
				940	&mut [false],
				941	&mut slots,
				942	true,
				943	true,
				944	text,
				945	start,
				946	text.len(),
				947	) {
				948	slots[1]
				949	} else {
				950	None
				951	}
				952	}
				953
				954	/// Like find, but executes an NFA engine.
				955	fn find_nfa(
				956	&self,
				957	ty: MatchNfaType,
				958	text: &[u8],
				959	start: usize,
				960	) -> Option<(usize, usize)> {
				961	let mut slots = [None, None];
				962	if self.exec_nfa(
				963	ty,
				964	&mut [false],
				965	&mut slots,
				966	false,
				967	false,
				968	text,
				969	start,
				970	text.len(),
				971	) {
				972	match (slots[0], slots[1]) {
				973	(Some(s), Some(e)) => Some((s, e)),
				974	_ => None,
				975	}
				976	} else {
				977	None
				978	}
				979	}
				980
				981	/// Like find_nfa, but fills in captures.
				982	///
				983	/// `slots` should have length equal to `2 * nfa.captures.len()`.
				984	#[cfg(feature = "perf-dfa")]
				985	fn captures_nfa(
				986	&self,
				987	slots: &mut [Slot],
				988	text: &[u8],
				989	start: usize,
				990	) -> Option<(usize, usize)> {
				991	self.captures_nfa_type(
				992	MatchNfaType::Auto,
				993	slots,
				994	text,
				995	start,
				996	text.len(),
				997	)
				998	}
				999
				1000	/// Like captures_nfa, but allows specification of type of NFA engine.
				1001	fn captures_nfa_type(
				1002	&self,
				1003	ty: MatchNfaType,
				1004	slots: &mut [Slot],
				1005	text: &[u8],
				1006	start: usize,
				1007	end: usize,
				1008	) -> Option<(usize, usize)> {
				1009	if self.exec_nfa(
				1010	ty,
				1011	&mut [false],
				1012	slots,
				1013	false,
				1014	false,
				1015	text,
				1016	start,
				1017	end,
				1018	) {
				1019	match (slots[0], slots[1]) {
				1020	(Some(s), Some(e)) => Some((s, e)),
				1021	_ => None,
				1022	}
				1023	} else {
				1024	None
				1025	}
				1026	}
				1027
				1028	fn exec_nfa(
				1029	&self,
				1030	mut ty: MatchNfaType,
				1031	matches: &mut [bool],
				1032	slots: &mut [Slot],
				1033	quit_after_match: bool,
				1034	quit_after_match_with_pos: bool,
				1035	text: &[u8],
				1036	start: usize,
				1037	end: usize,
				1038	) -> bool {
				1039	use self::MatchNfaType::*;
				1040	if let Auto = ty {
				1041	if backtrack::should_exec(self.ro.nfa.len(), text.len()) {
				1042	ty = Backtrack;
				1043	} else {
				1044	ty = PikeVM;
				1045	}
				1046	}
				1047	// The backtracker can't return the shortest match position as it is
				1048	// implemented today. So if someone calls `shortest_match` and we need
				1049	// to run an NFA, then use the PikeVM.
				1050	if quit_after_match_with_pos \|\| ty == PikeVM {
				1051	self.exec_pikevm(
				1052	matches,
				1053	slots,
				1054	quit_after_match,
				1055	text,
				1056	start,
				1057	end,
				1058	)
				1059	} else {
				1060	self.exec_backtrack(matches, slots, text, start, end)
				1061	}
				1062	}
				1063
				1064	/// Always run the NFA algorithm.
				1065	fn exec_pikevm(
				1066	&self,
				1067	matches: &mut [bool],
				1068	slots: &mut [Slot],
				1069	quit_after_match: bool,
				1070	text: &[u8],
				1071	start: usize,
				1072	end: usize,
				1073	) -> bool {
				1074	if self.ro.nfa.uses_bytes() {
				1075	pikevm::Fsm::exec(
				1076	&self.ro.nfa,
				1077	self.cache.value(),
				1078	matches,
				1079	slots,
				1080	quit_after_match,
				1081	ByteInput::new(text, self.ro.nfa.only_utf8),
				1082	start,
				1083	end,
				1084	)
				1085	} else {
				1086	pikevm::Fsm::exec(
				1087	&self.ro.nfa,
				1088	self.cache.value(),
				1089	matches,
				1090	slots,
				1091	quit_after_match,
				1092	CharInput::new(text),
				1093	start,
				1094	end,
				1095	)
				1096	}
				1097	}
				1098
				1099	/// Always runs the NFA using bounded backtracking.
				1100	fn exec_backtrack(
				1101	&self,
				1102	matches: &mut [bool],
				1103	slots: &mut [Slot],
				1104	text: &[u8],
				1105	start: usize,
				1106	end: usize,
				1107	) -> bool {
				1108	if self.ro.nfa.uses_bytes() {
				1109	backtrack::Bounded::exec(
				1110	&self.ro.nfa,
				1111	self.cache.value(),
				1112	matches,
				1113	slots,
				1114	ByteInput::new(text, self.ro.nfa.only_utf8),
				1115	start,
				1116	end,
				1117	)
				1118	} else {
				1119	backtrack::Bounded::exec(
				1120	&self.ro.nfa,
				1121	self.cache.value(),
				1122	matches,
				1123	slots,
				1124	CharInput::new(text),
				1125	start,
				1126	end,
				1127	)
				1128	}
				1129	}
				1130
				1131	/// Finds which regular expressions match the given text.
				1132	///
				1133	/// `matches` should have length equal to the number of regexes being
				1134	/// searched.
				1135	///
				1136	/// This is only useful when one wants to know which regexes in a set
				1137	/// match some text.
				1138	pub fn many_matches_at(
				1139	&self,
				1140	matches: &mut [bool],
				1141	text: &[u8],
				1142	start: usize,
				1143	) -> bool {
				1144	use self::MatchType::*;
				1145	if !self.is_anchor_end_match(text) {
				1146	return false;
				1147	}
				1148	match self.ro.match_type {
				1149	#[cfg(feature = "perf-literal")]
				1150	Literal(ty) => {
				1151	debug_assert_eq!(matches.len(), 1);
				1152	matches[0] = self.find_literals(ty, text, start).is_some();
				1153	matches[0]
				1154	}
				1155	#[cfg(feature = "perf-dfa")]
				1156	Dfa \| DfaAnchoredReverse \| DfaMany => {
				1157	match dfa::Fsm::forward_many(
				1158	&self.ro.dfa,
				1159	self.cache.value(),
				1160	matches,
				1161	text,
				1162	start,
				1163	) {
				1164	dfa::Result::Match(_) => true,
				1165	dfa::Result::NoMatch(_) => false,
				1166	dfa::Result::Quit => self.exec_nfa(
				1167	MatchNfaType::Auto,
				1168	matches,
				1169	&mut [],
				1170	false,
				1171	false,
				1172	text,
				1173	start,
				1174	text.len(),
				1175	),
				1176	}
				1177	}
				1178	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				1179	DfaSuffix => {
				1180	match dfa::Fsm::forward_many(
				1181	&self.ro.dfa,
				1182	self.cache.value(),
				1183	matches,
				1184	text,
				1185	start,
				1186	) {
				1187	dfa::Result::Match(_) => true,
				1188	dfa::Result::NoMatch(_) => false,
				1189	dfa::Result::Quit => self.exec_nfa(
				1190	MatchNfaType::Auto,
				1191	matches,
				1192	&mut [],
				1193	false,
				1194	false,
				1195	text,
				1196	start,
				1197	text.len(),
				1198	),
				1199	}
				1200	}
				1201	Nfa(ty) => self.exec_nfa(
				1202	ty,
				1203	matches,
				1204	&mut [],
				1205	false,
				1206	false,
				1207	text,
				1208	start,
				1209	text.len(),
				1210	),
				1211	Nothing => false,
				1212	}
				1213	}
				1214
				1215	#[cfg_attr(feature = "perf-inline", inline(always))]
				1216	fn is_anchor_end_match(&self, text: &[u8]) -> bool {
				1217	#[cfg(not(feature = "perf-literal"))]
				1218	fn imp(_: &ExecReadOnly, _: &[u8]) -> bool {
				1219	true
				1220	}
				1221
				1222	#[cfg(feature = "perf-literal")]
				1223	fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool {
				1224	// Only do this check if the haystack is big (>1MB).
				1225	if text.len() > (1 << 20) && ro.nfa.is_anchored_end {
				1226	let lcs = ro.suffixes.lcs();
				1227	if lcs.len() >= 1 && !lcs.is_suffix(text) {
				1228	return false;
				1229	}
				1230	}
				1231	true
				1232	}
				1233
				1234	imp(&self.ro, text)
				1235	}
				1236
				1237	pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
				1238	&self.ro.nfa.capture_name_idx
				1239	}
				1240	}
				1241
				1242	impl<'c> ExecNoSyncStr<'c> {
				1243	pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
				1244	self.0.capture_name_idx()
				1245	}
				1246	}
				1247
				1248	impl Exec {
				1249	/// Get a searcher that isn't Sync.
				1250	#[cfg_attr(feature = "perf-inline", inline(always))]
				1251	pub fn searcher(&self) -> ExecNoSync {
				1252	let create = \|\| RefCell::new(ProgramCacheInner::new(&self.ro));
				1253	ExecNoSync {
				1254	ro: &self.ro, // a clone is too expensive here! (and not needed)
				1255	cache: self.cache.get_or(create),
				1256	}
				1257	}
				1258
				1259	/// Get a searcher that isn't Sync and can match on &str.
				1260	#[cfg_attr(feature = "perf-inline", inline(always))]
				1261	pub fn searcher_str(&self) -> ExecNoSyncStr {
				1262	ExecNoSyncStr(self.searcher())
				1263	}
				1264
				1265	/// Build a Regex from this executor.
				1266	pub fn into_regex(self) -> re_unicode::Regex {
				1267	re_unicode::Regex::from(self)
				1268	}
				1269
				1270	/// Build a RegexSet from this executor.
				1271	pub fn into_regex_set(self) -> re_set::unicode::RegexSet {
				1272	re_set::unicode::RegexSet::from(self)
				1273	}
				1274
				1275	/// Build a Regex from this executor that can match arbitrary bytes.
				1276	pub fn into_byte_regex(self) -> re_bytes::Regex {
				1277	re_bytes::Regex::from(self)
				1278	}
				1279
				1280	/// Build a RegexSet from this executor that can match arbitrary bytes.
				1281	pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet {
				1282	re_set::bytes::RegexSet::from(self)
				1283	}
				1284
				1285	/// The original regular expressions given by the caller that were
				1286	/// compiled.
				1287	pub fn regex_strings(&self) -> &[String] {
				1288	&self.ro.res
				1289	}
				1290
				1291	/// Return a slice of capture names.
				1292	///
				1293	/// Any capture that isn't named is None.
				1294	pub fn capture_names(&self) -> &[Option<String>] {
				1295	&self.ro.nfa.captures
				1296	}
				1297
				1298	/// Return a reference to named groups mapping (from group name to
				1299	/// group position).
				1300	pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
				1301	&self.ro.nfa.capture_name_idx
				1302	}
				1303	}
				1304
				1305	impl Clone for Exec {
				1306	fn clone(&self) -> Exec {
				1307	Exec { ro: self.ro.clone(), cache: Cached::new() }
				1308	}
				1309	}
				1310
				1311	impl ExecReadOnly {
				1312	fn choose_match_type(&self, hint: Option<MatchType>) -> MatchType {
				1313	if let Some(MatchType::Nfa(_)) = hint {
				1314	return hint.unwrap();
				1315	}
				1316	// If the NFA is empty, then we'll never match anything.
				1317	if self.nfa.insts.is_empty() {
				1318	return MatchType::Nothing;
				1319	}
				1320	if let Some(literalty) = self.choose_literal_match_type() {
				1321	return literalty;
				1322	}
				1323	if let Some(dfaty) = self.choose_dfa_match_type() {
				1324	return dfaty;
				1325	}
				1326	// We're so totally hosed.
				1327	MatchType::Nfa(MatchNfaType::Auto)
				1328	}
				1329
				1330	/// If a plain literal scan can be used, then a corresponding literal
				1331	/// search type is returned.
				1332	fn choose_literal_match_type(&self) -> Option<MatchType> {
				1333	#[cfg(not(feature = "perf-literal"))]
				1334	fn imp(_: &ExecReadOnly) -> Option<MatchType> {
				1335	None
				1336	}
				1337
				1338	#[cfg(feature = "perf-literal")]
				1339	fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
				1340	// If our set of prefixes is complete, then we can use it to find
				1341	// a match in lieu of a regex engine. This doesn't quite work well
				1342	// in the presence of multiple regexes, so only do it when there's
				1343	// one.
				1344	//
				1345	// TODO(burntsushi): Also, don't try to match literals if the regex
				1346	// is partially anchored. We could technically do it, but we'd need
				1347	// to create two sets of literals: all of them and then the subset
				1348	// that aren't anchored. We would then only search for all of them
				1349	// when at the beginning of the input and use the subset in all
				1350	// other cases.
				1351	if ro.res.len() != 1 {
				1352	return None;
				1353	}
				1354	if ro.ac.is_some() {
				1355	return Some(MatchType::Literal(
				1356	MatchLiteralType::AhoCorasick,
				1357	));
				1358	}
				1359	if ro.nfa.prefixes.complete() {
				1360	return if ro.nfa.is_anchored_start {
				1361	Some(MatchType::Literal(MatchLiteralType::AnchoredStart))
				1362	} else {
				1363	Some(MatchType::Literal(MatchLiteralType::Unanchored))
				1364	};
				1365	}
				1366	if ro.suffixes.complete() {
				1367	return if ro.nfa.is_anchored_end {
				1368	Some(MatchType::Literal(MatchLiteralType::AnchoredEnd))
				1369	} else {
				1370	// This case shouldn't happen. When the regex isn't
				1371	// anchored, then complete prefixes should imply complete
				1372	// suffixes.
				1373	Some(MatchType::Literal(MatchLiteralType::Unanchored))
				1374	};
				1375	}
				1376	None
				1377	}
				1378
				1379	imp(self)
				1380	}
				1381
				1382	/// If a DFA scan can be used, then choose the appropriate DFA strategy.
				1383	fn choose_dfa_match_type(&self) -> Option<MatchType> {
				1384	#[cfg(not(feature = "perf-dfa"))]
				1385	fn imp(_: &ExecReadOnly) -> Option<MatchType> {
				1386	None
				1387	}
				1388
				1389	#[cfg(feature = "perf-dfa")]
				1390	fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
				1391	if !dfa::can_exec(&ro.dfa) {
				1392	return None;
				1393	}
				1394	// Regex sets require a slightly specialized path.
				1395	if ro.res.len() >= 2 {
				1396	return Some(MatchType::DfaMany);
				1397	}
				1398	// If the regex is anchored at the end but not the start, then
				1399	// just match in reverse from the end of the haystack.
				1400	if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end {
				1401	return Some(MatchType::DfaAnchoredReverse);
				1402	}
				1403	#[cfg(feature = "perf-literal")]
				1404	{
				1405	// If there's a longish suffix literal, then it might be faster
				1406	// to look for that first.
				1407	if ro.should_suffix_scan() {
				1408	return Some(MatchType::DfaSuffix);
				1409	}
				1410	}
				1411	// Fall back to your garden variety forward searching lazy DFA.
				1412	Some(MatchType::Dfa)
				1413	}
				1414
				1415	imp(self)
				1416	}
				1417
				1418	/// Returns true if the program is amenable to suffix scanning.
				1419	///
				1420	/// When this is true, as a heuristic, we assume it is OK to quickly scan
				1421	/// for suffix literals and then do a reverse DFA match from any matches
				1422	/// produced by the literal scan. (And then followed by a forward DFA
				1423	/// search, since the previously found suffix literal maybe not actually be
				1424	/// the end of a match.)
				1425	///
				1426	/// This is a bit of a specialized optimization, but can result in pretty
				1427	/// big performance wins if 1) there are no prefix literals and 2) the
				1428	/// suffix literals are pretty rare in the text. (1) is obviously easy to
				1429	/// account for but (2) is harder. As a proxy, we assume that longer
				1430	/// strings are generally rarer, so we only enable this optimization when
				1431	/// we have a meaty suffix.
				1432	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				1433	fn should_suffix_scan(&self) -> bool {
				1434	if self.suffixes.is_empty() {
				1435	return false;
				1436	}
				1437	let lcs_len = self.suffixes.lcs().char_len();
				1438	lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
				1439	}
				1440	}
				1441
				1442	#[derive(Clone, Copy, Debug)]
				1443	enum MatchType {
				1444	/// A single or multiple literal search. This is only used when the regex
				1445	/// can be decomposed into a literal search.
				1446	#[cfg(feature = "perf-literal")]
				1447	Literal(MatchLiteralType),
				1448	/// A normal DFA search.
				1449	#[cfg(feature = "perf-dfa")]
				1450	Dfa,
				1451	/// A reverse DFA search starting from the end of a haystack.
				1452	#[cfg(feature = "perf-dfa")]
				1453	DfaAnchoredReverse,
				1454	/// A reverse DFA search with suffix literal scanning.
				1455	#[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
				1456	DfaSuffix,
				1457	/// Use the DFA on two or more regular expressions.
				1458	#[cfg(feature = "perf-dfa")]
				1459	DfaMany,
				1460	/// An NFA variant.
				1461	Nfa(MatchNfaType),
				1462	/// No match is ever possible, so don't ever try to search.
				1463	Nothing,
				1464	}
				1465
				1466	#[derive(Clone, Copy, Debug)]
				1467	#[cfg(feature = "perf-literal")]
				1468	enum MatchLiteralType {
				1469	/// Match literals anywhere in text.
				1470	Unanchored,
				1471	/// Match literals only at the start of text.
				1472	AnchoredStart,
				1473	/// Match literals only at the end of text.
				1474	AnchoredEnd,
				1475	/// Use an Aho-Corasick automaton. This requires `ac` to be Some on
				1476	/// ExecReadOnly.
				1477	AhoCorasick,
				1478	}
				1479
				1480	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
				1481	enum MatchNfaType {
				1482	/// Choose between Backtrack and PikeVM.
				1483	Auto,
				1484	/// NFA bounded backtracking.
				1485	///
				1486	/// (This is only set by tests, since it never makes sense to always want
				1487	/// backtracking.)
				1488	Backtrack,
				1489	/// The Pike VM.
				1490	///
				1491	/// (This is only set by tests, since it never makes sense to always want
				1492	/// the Pike VM.)
				1493	PikeVM,
				1494	}
				1495
				1496	/// `ProgramCache` maintains reusable allocations for each matching engine
				1497	/// available to a particular program.
				1498	pub type ProgramCache = RefCell<ProgramCacheInner>;
				1499
				1500	#[derive(Debug)]
				1501	pub struct ProgramCacheInner {
				1502	pub pikevm: pikevm::Cache,
				1503	pub backtrack: backtrack::Cache,
				1504	#[cfg(feature = "perf-dfa")]
				1505	pub dfa: dfa::Cache,
				1506	#[cfg(feature = "perf-dfa")]
				1507	pub dfa_reverse: dfa::Cache,
				1508	}
				1509
				1510	impl ProgramCacheInner {
				1511	fn new(ro: &ExecReadOnly) -> Self {
				1512	ProgramCacheInner {
				1513	pikevm: pikevm::Cache::new(&ro.nfa),
				1514	backtrack: backtrack::Cache::new(&ro.nfa),
				1515	#[cfg(feature = "perf-dfa")]
				1516	dfa: dfa::Cache::new(&ro.dfa),
				1517	#[cfg(feature = "perf-dfa")]
				1518	dfa_reverse: dfa::Cache::new(&ro.dfa_reverse),
				1519	}
				1520	}
				1521	}
				1522
				1523	/// Alternation literals checks if the given HIR is a simple alternation of
				1524	/// literals, and if so, returns them. Otherwise, this returns None.
				1525	#[cfg(feature = "perf-literal")]
				1526	fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
				1527	use syntax::hir::{HirKind, Literal};
				1528
				1529	// This is pretty hacky, but basically, if `is_alternation_literal` is
				1530	// true, then we can make several assumptions about the structure of our
				1531	// HIR. This is what justifies the `unreachable!` statements below.
				1532	//
				1533	// This code should be refactored once we overhaul this crate's
				1534	// optimization pipeline, because this is a terribly inflexible way to go
				1535	// about things.
				1536
				1537	if !expr.is_alternation_literal() {
				1538	return None;
				1539	}
				1540	let alts = match *expr.kind() {
				1541	HirKind::Alternation(ref alts) => alts,
				1542	_ => return None, // one literal isn't worth it
				1543	};
				1544
				1545	let extendlit = \|lit: &Literal, dst: &mut Vec<u8>\| match *lit {
				1546	Literal::Unicode(c) => {
				1547	let mut buf = [0; 4];
				1548	dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
				1549	}
				1550	Literal::Byte(b) => {
				1551	dst.push(b);
				1552	}
				1553	};
				1554
				1555	let mut lits = vec![];
				1556	for alt in alts {
				1557	let mut lit = vec![];
				1558	match *alt.kind() {
				1559	HirKind::Literal(ref x) => extendlit(x, &mut lit),
				1560	HirKind::Concat(ref exprs) => {
				1561	for e in exprs {
				1562	match *e.kind() {
				1563	HirKind::Literal(ref x) => extendlit(x, &mut lit),
				1564	_ => unreachable!("expected literal, got {:?}", e),
				1565	}
				1566	}
				1567	}
				1568	_ => unreachable!("expected literal or concat, got {:?}", alt),
				1569	}
				1570	lits.push(lit);
				1571	}
				1572	Some(lits)
				1573	}
				1574
				1575	#[cfg(test)]
				1576	mod test {
				1577	#[test]
				1578	fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
				1579	use internal::ExecBuilder;
				1580
				1581	let backtrack_bytes_re = ExecBuilder::new("^S")
				1582	.bounded_backtracking()
				1583	.only_utf8(false)
				1584	.build()
				1585	.map(\|exec\| exec.into_byte_regex())
				1586	.map_err(\|err\| format!("{}", err))
				1587	.unwrap();
				1588
				1589	let default_bytes_re = ExecBuilder::new("^S")
				1590	.only_utf8(false)
				1591	.build()
				1592	.map(\|exec\| exec.into_byte_regex())
				1593	.map_err(\|err\| format!("{}", err))
				1594	.unwrap();
				1595
				1596	let input = vec![83, 83];
				1597
				1598	let s1 = backtrack_bytes_re.split(&input);
				1599	let s2 = default_bytes_re.split(&input);
				1600	for (chunk1, chunk2) in s1.zip(s2) {
				1601	assert_eq!(chunk1, chunk2);
				1602	}
				1603	}
				1604
				1605	#[test]
				1606	fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
				1607	use internal::ExecBuilder;
				1608
				1609	let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
				1610	.bounded_backtracking()
				1611	.bytes(true)
				1612	.build()
				1613	.map(\|exec\| exec.into_regex())
				1614	.map_err(\|err\| format!("{}", err))
				1615	.unwrap();
				1616
				1617	let default_bytes_re = ExecBuilder::new(r"^(?u:\*)")
				1618	.bytes(true)
				1619	.build()
				1620	.map(\|exec\| exec.into_regex())
				1621	.map_err(\|err\| format!("{}", err))
				1622	.unwrap();
				1623
				1624	let input = "**";
				1625
				1626	let s1 = backtrack_bytes_re.split(input);
				1627	let s2 = default_bytes_re.split(input);
				1628	for (chunk1, chunk2) in s1.zip(s2) {
				1629	assert_eq!(chunk1, chunk2);
				1630	}
				1631	}
				1632	}