Import 'regex' package vesion 1.3.6 * Add OWNERS * No Android.bp yet Bug: 152884384 Test: make Change-Id: I455caf7833b6c437c1c133bc7b2f47b83da9cbce

commit: e42c505f54ac2e7b2ca7d0197304cac1b4f605e9 [log] [tgz]
author: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:44:21 2020 -0700
committer: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:45:53 2020 -0700
tree: 7ca22ebdbdb7d0c9dc5c66d31f564aeaf51cece4
parent: 815d0a2344b5321fd47f0cb7dba96e4cb4e84615 [diff] [blame]
diff --git a/src/exec.rs b/src/exec.rs
new file mode 100644
index 0000000..acca2dc
--- /dev/null
+++ b/src/exec.rs

@@ -0,0 +1,1632 @@
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+#[cfg(feature = "perf-literal")]
+use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
+use syntax::hir::literal::Literals;
+use syntax::hir::Hir;
+use syntax::ParserBuilder;
+
+use backtrack;
+use cache::{Cached, CachedGuard};
+use compile::Compiler;
+#[cfg(feature = "perf-dfa")]
+use dfa;
+use error::Error;
+use input::{ByteInput, CharInput};
+use literal::LiteralSearcher;
+use pikevm;
+use prog::Program;
+use re_builder::RegexOptions;
+use re_bytes;
+use re_set;
+use re_trait::{Locations, RegularExpression, Slot};
+use re_unicode;
+use utf8::next_utf8;
+
+/// `Exec` manages the execution of a regular expression.
+///
+/// In particular, this manages the various compiled forms of a single regular
+/// expression and the choice of which matching engine to use to execute a
+/// regular expression.
+pub struct Exec {
+    /// All read only state.
+    ro: Arc<ExecReadOnly>,
+    /// Caches for the various matching engines.
+    cache: Cached<ProgramCache>,
+}
+
+/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
+/// means it is no longer Sync, but we can now avoid the overhead of
+/// synchronization to fetch the cache.
+#[derive(Debug)]
+pub struct ExecNoSync<'c> {
+    /// All read only state.
+    ro: &'c Arc<ExecReadOnly>,
+    /// Caches for the various matching engines.
+    cache: CachedGuard<'c, ProgramCache>,
+}
+
+/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
+pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
+
+/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
+/// state is determined at compile time and never changes during search.
+#[derive(Debug)]
+struct ExecReadOnly {
+    /// The original regular expressions given by the caller to compile.
+    res: Vec<String>,
+    /// A compiled program that is used in the NFA simulation and backtracking.
+    /// It can be byte-based or Unicode codepoint based.
+    ///
+    /// N.B. It is not possibly to make this byte-based from the public API.
+    /// It is only used for testing byte based programs in the NFA simulations.
+    nfa: Program,
+    /// A compiled byte based program for DFA execution. This is only used
+    /// if a DFA can be executed. (Currently, only word boundary assertions are
+    /// not supported.) Note that this program contains an embedded `.*?`
+    /// preceding the first capture group, unless the regex is anchored at the
+    /// beginning.
+    dfa: Program,
+    /// The same as above, except the program is reversed (and there is no
+    /// preceding `.*?`). This is used by the DFA to find the starting location
+    /// of matches.
+    dfa_reverse: Program,
+    /// A set of suffix literals extracted from the regex.
+    ///
+    /// Prefix literals are stored on the `Program`, since they are used inside
+    /// the matching engines.
+    suffixes: LiteralSearcher,
+    /// An Aho-Corasick automaton with leftmost-first match semantics.
+    ///
+    /// This is only set when the entire regex is a simple unanchored
+    /// alternation of literals. We could probably use it more circumstances,
+    /// but this is already hacky enough in this architecture.
+    ///
+    /// N.B. We use u32 as a state ID representation under the assumption that
+    /// if we were to exhaust the ID space, we probably would have long
+    /// surpassed the compilation size limit.
+    #[cfg(feature = "perf-literal")]
+    ac: Option<AhoCorasick<u32>>,
+    /// match_type encodes as much upfront knowledge about how we're going to
+    /// execute a search as possible.
+    match_type: MatchType,
+}
+
+/// Facilitates the construction of an executor by exposing various knobs
+/// to control how a regex is executed and what kinds of resources it's
+/// permitted to use.
+pub struct ExecBuilder {
+    options: RegexOptions,
+    match_type: Option<MatchType>,
+    bytes: bool,
+    only_utf8: bool,
+}
+
+/// Parsed represents a set of parsed regular expressions and their detected
+/// literals.
+struct Parsed {
+    exprs: Vec<Hir>,
+    prefixes: Literals,
+    suffixes: Literals,
+    bytes: bool,
+}
+
+impl ExecBuilder {
+    /// Create a regex execution builder.
+    ///
+    /// This uses default settings for everything except the regex itself,
+    /// which must be provided. Further knobs can be set by calling methods,
+    /// and then finally, `build` to actually create the executor.
+    pub fn new(re: &str) -> Self {
+        Self::new_many(&[re])
+    }
+
+    /// Like new, but compiles the union of the given regular expressions.
+    ///
+    /// Note that when compiling 2 or more regular expressions, capture groups
+    /// are completely unsupported. (This means both `find` and `captures`
+    /// wont work.)
+    pub fn new_many<I, S>(res: I) -> Self
+    where
+        S: AsRef<str>,
+        I: IntoIterator<Item = S>,
+    {
+        let mut opts = RegexOptions::default();
+        opts.pats = res.into_iter().map(|s| s.as_ref().to_owned()).collect();
+        Self::new_options(opts)
+    }
+
+    /// Create a regex execution builder.
+    pub fn new_options(opts: RegexOptions) -> Self {
+        ExecBuilder {
+            options: opts,
+            match_type: None,
+            bytes: false,
+            only_utf8: true,
+        }
+    }
+
+    /// Set the matching engine to be automatically determined.
+    ///
+    /// This is the default state and will apply whatever optimizations are
+    /// possible, such as running a DFA.
+    ///
+    /// This overrides whatever was previously set via the `nfa` or
+    /// `bounded_backtracking` methods.
+    pub fn automatic(mut self) -> Self {
+        self.match_type = None;
+        self
+    }
+
+    /// Sets the matching engine to use the NFA algorithm no matter what
+    /// optimizations are possible.
+    ///
+    /// This overrides whatever was previously set via the `automatic` or
+    /// `bounded_backtracking` methods.
+    pub fn nfa(mut self) -> Self {
+        self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM));
+        self
+    }
+
+    /// Sets the matching engine to use a bounded backtracking engine no
+    /// matter what optimizations are possible.
+    ///
+    /// One must use this with care, since the bounded backtracking engine
+    /// uses memory proportion to `len(regex) * len(text)`.
+    ///
+    /// This overrides whatever was previously set via the `automatic` or
+    /// `nfa` methods.
+    pub fn bounded_backtracking(mut self) -> Self {
+        self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack));
+        self
+    }
+
+    /// Compiles byte based programs for use with the NFA matching engines.
+    ///
+    /// By default, the NFA engines match on Unicode scalar values. They can
+    /// be made to use byte based programs instead. In general, the byte based
+    /// programs are slower because of a less efficient encoding of character
+    /// classes.
+    ///
+    /// Note that this does not impact DFA matching engines, which always
+    /// execute on bytes.
+    pub fn bytes(mut self, yes: bool) -> Self {
+        self.bytes = yes;
+        self
+    }
+
+    /// When disabled, the program compiled may match arbitrary bytes.
+    ///
+    /// When enabled (the default), all compiled programs exclusively match
+    /// valid UTF-8 bytes.
+    pub fn only_utf8(mut self, yes: bool) -> Self {
+        self.only_utf8 = yes;
+        self
+    }
+
+    /// Set the Unicode flag.
+    pub fn unicode(mut self, yes: bool) -> Self {
+        self.options.unicode = yes;
+        self
+    }
+
+    /// Parse the current set of patterns into their AST and extract literals.
+    fn parse(&self) -> Result<Parsed, Error> {
+        let mut exprs = Vec::with_capacity(self.options.pats.len());
+        let mut prefixes = Some(Literals::empty());
+        let mut suffixes = Some(Literals::empty());
+        let mut bytes = false;
+        let is_set = self.options.pats.len() > 1;
+        // If we're compiling a regex set and that set has any anchored
+        // expressions, then disable all literal optimizations.
+        for pat in &self.options.pats {
+            let mut parser = ParserBuilder::new()
+                .octal(self.options.octal)
+                .case_insensitive(self.options.case_insensitive)
+                .multi_line(self.options.multi_line)
+                .dot_matches_new_line(self.options.dot_matches_new_line)
+                .swap_greed(self.options.swap_greed)
+                .ignore_whitespace(self.options.ignore_whitespace)
+                .unicode(self.options.unicode)
+                .allow_invalid_utf8(!self.only_utf8)
+                .nest_limit(self.options.nest_limit)
+                .build();
+            let expr =
+                parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?;
+            bytes = bytes || !expr.is_always_utf8();
+
+            if cfg!(feature = "perf-literal") {
+                if !expr.is_anchored_start() && expr.is_any_anchored_start() {
+                    // Partial anchors unfortunately make it hard to use
+                    // prefixes, so disable them.
+                    prefixes = None;
+                } else if is_set && expr.is_anchored_start() {
+                    // Regex sets with anchors do not go well with literal
+                    // optimizations.
+                    prefixes = None;
+                }
+                prefixes = prefixes.and_then(|mut prefixes| {
+                    if !prefixes.union_prefixes(&expr) {
+                        None
+                    } else {
+                        Some(prefixes)
+                    }
+                });
+
+                if !expr.is_anchored_end() && expr.is_any_anchored_end() {
+                    // Partial anchors unfortunately make it hard to use
+                    // suffixes, so disable them.
+                    suffixes = None;
+                } else if is_set && expr.is_anchored_end() {
+                    // Regex sets with anchors do not go well with literal
+                    // optimizations.
+                    suffixes = None;
+                }
+                suffixes = suffixes.and_then(|mut suffixes| {
+                    if !suffixes.union_suffixes(&expr) {
+                        None
+                    } else {
+                        Some(suffixes)
+                    }
+                });
+            }
+            exprs.push(expr);
+        }
+        Ok(Parsed {
+            exprs: exprs,
+            prefixes: prefixes.unwrap_or_else(Literals::empty),
+            suffixes: suffixes.unwrap_or_else(Literals::empty),
+            bytes: bytes,
+        })
+    }
+
+    /// Build an executor that can run a regular expression.
+    pub fn build(self) -> Result<Exec, Error> {
+        // Special case when we have no patterns to compile.
+        // This can happen when compiling a regex set.
+        if self.options.pats.is_empty() {
+            let ro = Arc::new(ExecReadOnly {
+                res: vec![],
+                nfa: Program::new(),
+                dfa: Program::new(),
+                dfa_reverse: Program::new(),
+                suffixes: LiteralSearcher::empty(),
+                #[cfg(feature = "perf-literal")]
+                ac: None,
+                match_type: MatchType::Nothing,
+            });
+            return Ok(Exec { ro: ro, cache: Cached::new() });
+        }
+        let parsed = self.parse()?;
+        let mut nfa = Compiler::new()
+            .size_limit(self.options.size_limit)
+            .bytes(self.bytes || parsed.bytes)
+            .only_utf8(self.only_utf8)
+            .compile(&parsed.exprs)?;
+        let mut dfa = Compiler::new()
+            .size_limit(self.options.size_limit)
+            .dfa(true)
+            .only_utf8(self.only_utf8)
+            .compile(&parsed.exprs)?;
+        let mut dfa_reverse = Compiler::new()
+            .size_limit(self.options.size_limit)
+            .dfa(true)
+            .only_utf8(self.only_utf8)
+            .reverse(true)
+            .compile(&parsed.exprs)?;
+
+        #[cfg(feature = "perf-literal")]
+        let ac = self.build_aho_corasick(&parsed);
+        nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
+        dfa.prefixes = nfa.prefixes.clone();
+        dfa.dfa_size_limit = self.options.dfa_size_limit;
+        dfa_reverse.dfa_size_limit = self.options.dfa_size_limit;
+
+        let mut ro = ExecReadOnly {
+            res: self.options.pats,
+            nfa: nfa,
+            dfa: dfa,
+            dfa_reverse: dfa_reverse,
+            suffixes: LiteralSearcher::suffixes(parsed.suffixes),
+            #[cfg(feature = "perf-literal")]
+            ac: ac,
+            match_type: MatchType::Nothing,
+        };
+        ro.match_type = ro.choose_match_type(self.match_type);
+
+        let ro = Arc::new(ro);
+        Ok(Exec { ro: ro, cache: Cached::new() })
+    }
+
+    #[cfg(feature = "perf-literal")]
+    fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
+        if parsed.exprs.len() != 1 {
+            return None;
+        }
+        let lits = match alternation_literals(&parsed.exprs[0]) {
+            None => return None,
+            Some(lits) => lits,
+        };
+        // If we have a small number of literals, then let Teddy handle
+        // things (see literal/mod.rs).
+        if lits.len() <= 32 {
+            return None;
+        }
+        Some(
+            AhoCorasickBuilder::new()
+                .match_kind(MatchKind::LeftmostFirst)
+                .auto_configure(&lits)
+                // We always want this to reduce size, regardless
+                // of what auto-configure does.
+                .byte_classes(true)
+                .build_with_size::<u32, _, _>(&lits)
+                // This should never happen because we'd long exceed the
+                // compilation limit for regexes first.
+                .expect("AC automaton too big"),
+        )
+    }
+}
+
+impl<'c> RegularExpression for ExecNoSyncStr<'c> {
+    type Text = str;
+
+    fn slots_len(&self) -> usize {
+        self.0.slots_len()
+    }
+
+    fn next_after_empty(&self, text: &str, i: usize) -> usize {
+        next_utf8(text.as_bytes(), i)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_match_at(&self, text: &str, start: usize) -> Option<usize> {
+        self.0.shortest_match_at(text.as_bytes(), start)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match_at(&self, text: &str, start: usize) -> bool {
+        self.0.is_match_at(text.as_bytes(), start)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
+        self.0.find_at(text.as_bytes(), start)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn captures_read_at(
+        &self,
+        locs: &mut Locations,
+        text: &str,
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        self.0.captures_read_at(locs, text.as_bytes(), start)
+    }
+}
+
+impl<'c> RegularExpression for ExecNoSync<'c> {
+    type Text = [u8];
+
+    /// Returns the number of capture slots in the regular expression. (There
+    /// are two slots for every capture group, corresponding to possibly empty
+    /// start and end locations of the capture.)
+    fn slots_len(&self) -> usize {
+        self.ro.nfa.captures.len() * 2
+    }
+
+    fn next_after_empty(&self, _text: &[u8], i: usize) -> usize {
+        i + 1
+    }
+
+    /// Returns the end of a match location, possibly occurring before the
+    /// end location of the correct leftmost-first match.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_match_at(&self, text: &[u8], start: usize) -> Option<usize> {
+        if !self.is_anchor_end_match(text) {
+            return None;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => {
+                self.find_literals(ty, text, start).map(|(_, e)| e)
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa | MatchType::DfaMany => {
+                match self.shortest_dfa(text, start) {
+                    dfa::Result::Match(end) => Some(end),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.shortest_nfa(text, start),
+                }
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match dfa::Fsm::reverse(
+                    &self.ro.dfa_reverse,
+                    self.cache.value(),
+                    true,
+                    &text[start..],
+                    text.len(),
+                ) {
+                    dfa::Result::Match(_) => Some(text.len()),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.shortest_nfa(text, start),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.shortest_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match(e) => Some(e),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.shortest_nfa(text, start),
+                }
+            }
+            MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start),
+            MatchType::Nothing => None,
+        }
+    }
+
+    /// Returns true if and only if the regex matches text.
+    ///
+    /// For single regular expressions, this is equivalent to calling
+    /// shortest_match(...).is_some().
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match_at(&self, text: &[u8], start: usize) -> bool {
+        if !self.is_anchor_end_match(text) {
+            return false;
+        }
+        // We need to do this dance because shortest_match relies on the NFA
+        // filling in captures[1], but a RegexSet has no captures. In other
+        // words, a RegexSet can't (currently) use shortest_match. ---AG
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => {
+                self.find_literals(ty, text, start).is_some()
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa | MatchType::DfaMany => {
+                match self.shortest_dfa(text, start) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.match_nfa(text, start),
+                }
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match dfa::Fsm::reverse(
+                    &self.ro.dfa_reverse,
+                    self.cache.value(),
+                    true,
+                    &text[start..],
+                    text.len(),
+                ) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.match_nfa(text, start),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.shortest_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.match_nfa(text, start),
+                }
+            }
+            MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start),
+            MatchType::Nothing => false,
+        }
+    }
+
+    /// Finds the start and end location of the leftmost-first match, starting
+    /// at the given location.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> {
+        if !self.is_anchor_end_match(text) {
+            return None;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => self.find_literals(ty, text, start),
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa => match self.find_dfa_forward(text, start) {
+                dfa::Result::Match((s, e)) => Some((s, e)),
+                dfa::Result::NoMatch(_) => None,
+                dfa::Result::Quit => {
+                    self.find_nfa(MatchNfaType::Auto, text, start)
+                }
+            },
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match self.find_dfa_anchored_reverse(text, start) {
+                    dfa::Result::Match((s, e)) => Some((s, e)),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => {
+                        self.find_nfa(MatchNfaType::Auto, text, start)
+                    }
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.find_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match((s, e)) => Some((s, e)),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => {
+                        self.find_nfa(MatchNfaType::Auto, text, start)
+                    }
+                }
+            }
+            MatchType::Nfa(ty) => self.find_nfa(ty, text, start),
+            MatchType::Nothing => None,
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaMany => {
+                unreachable!("BUG: RegexSet cannot be used with find")
+            }
+        }
+    }
+
+    /// Finds the start and end location of the leftmost-first match and also
+    /// fills in all matching capture groups.
+    ///
+    /// The number of capture slots given should be equal to the total number
+    /// of capture slots in the compiled program.
+    ///
+    /// Note that the first two slots always correspond to the start and end
+    /// locations of the overall match.
+    fn captures_read_at(
+        &self,
+        locs: &mut Locations,
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        let slots = locs.as_slots();
+        for slot in slots.iter_mut() {
+            *slot = None;
+        }
+        // If the caller unnecessarily uses this, then we try to save them
+        // from themselves.
+        match slots.len() {
+            0 => return self.find_at(text, start),
+            2 => {
+                return self.find_at(text, start).map(|(s, e)| {
+                    slots[0] = Some(s);
+                    slots[1] = Some(e);
+                    (s, e)
+                });
+            }
+            _ => {} // fallthrough
+        }
+        if !self.is_anchor_end_match(text) {
+            return None;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            MatchType::Literal(ty) => {
+                self.find_literals(ty, text, start).and_then(|(s, e)| {
+                    self.captures_nfa_type(
+                        MatchNfaType::Auto,
+                        slots,
+                        text,
+                        s,
+                        e,
+                    )
+                })
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::Dfa => {
+                if self.ro.nfa.is_anchored_start {
+                    self.captures_nfa(slots, text, start)
+                } else {
+                    match self.find_dfa_forward(text, start) {
+                        dfa::Result::Match((s, e)) => self.captures_nfa_type(
+                            MatchNfaType::Auto,
+                            slots,
+                            text,
+                            s,
+                            e,
+                        ),
+                        dfa::Result::NoMatch(_) => None,
+                        dfa::Result::Quit => {
+                            self.captures_nfa(slots, text, start)
+                        }
+                    }
+                }
+            }
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaAnchoredReverse => {
+                match self.find_dfa_anchored_reverse(text, start) {
+                    dfa::Result::Match((s, e)) => self.captures_nfa_type(
+                        MatchNfaType::Auto,
+                        slots,
+                        text,
+                        s,
+                        e,
+                    ),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.captures_nfa(slots, text, start),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            MatchType::DfaSuffix => {
+                match self.find_dfa_reverse_suffix(text, start) {
+                    dfa::Result::Match((s, e)) => self.captures_nfa_type(
+                        MatchNfaType::Auto,
+                        slots,
+                        text,
+                        s,
+                        e,
+                    ),
+                    dfa::Result::NoMatch(_) => None,
+                    dfa::Result::Quit => self.captures_nfa(slots, text, start),
+                }
+            }
+            MatchType::Nfa(ty) => {
+                self.captures_nfa_type(ty, slots, text, start, text.len())
+            }
+            MatchType::Nothing => None,
+            #[cfg(feature = "perf-dfa")]
+            MatchType::DfaMany => {
+                unreachable!("BUG: RegexSet cannot be used with captures")
+            }
+        }
+    }
+}
+
+impl<'c> ExecNoSync<'c> {
+    /// Finds the leftmost-first match using only literal search.
+    #[cfg(feature = "perf-literal")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_literals(
+        &self,
+        ty: MatchLiteralType,
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        use self::MatchLiteralType::*;
+        match ty {
+            Unanchored => {
+                let lits = &self.ro.nfa.prefixes;
+                lits.find(&text[start..]).map(|(s, e)| (start + s, start + e))
+            }
+            AnchoredStart => {
+                let lits = &self.ro.nfa.prefixes;
+                if start == 0 || !self.ro.nfa.is_anchored_start {
+                    lits.find_start(&text[start..])
+                        .map(|(s, e)| (start + s, start + e))
+                } else {
+                    None
+                }
+            }
+            AnchoredEnd => {
+                let lits = &self.ro.suffixes;
+                lits.find_end(&text[start..])
+                    .map(|(s, e)| (start + s, start + e))
+            }
+            AhoCorasick => self
+                .ro
+                .ac
+                .as_ref()
+                .unwrap()
+                .find(&text[start..])
+                .map(|m| (start + m.start(), start + m.end())),
+        }
+    }
+
+    /// Finds the leftmost-first match (start and end) using only the DFA.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(feature = "perf-dfa")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_dfa_forward(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<(usize, usize)> {
+        use dfa::Result::*;
+        let end = match dfa::Fsm::forward(
+            &self.ro.dfa,
+            self.cache.value(),
+            false,
+            text,
+            start,
+        ) {
+            NoMatch(i) => return NoMatch(i),
+            Quit => return Quit,
+            Match(end) if start == end => return Match((start, start)),
+            Match(end) => end,
+        };
+        // Now run the DFA in reverse to find the start of the match.
+        match dfa::Fsm::reverse(
+            &self.ro.dfa_reverse,
+            self.cache.value(),
+            false,
+            &text[start..],
+            end - start,
+        ) {
+            Match(s) => Match((start + s, end)),
+            NoMatch(i) => NoMatch(i),
+            Quit => Quit,
+        }
+    }
+
+    /// Finds the leftmost-first match (start and end) using only the DFA,
+    /// but assumes the regex is anchored at the end and therefore starts at
+    /// the end of the regex and matches in reverse.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(feature = "perf-dfa")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_dfa_anchored_reverse(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<(usize, usize)> {
+        use dfa::Result::*;
+        match dfa::Fsm::reverse(
+            &self.ro.dfa_reverse,
+            self.cache.value(),
+            false,
+            &text[start..],
+            text.len() - start,
+        ) {
+            Match(s) => Match((start + s, text.len())),
+            NoMatch(i) => NoMatch(i),
+            Quit => Quit,
+        }
+    }
+
+    /// Finds the end of the shortest match using only the DFA.
+    #[cfg(feature = "perf-dfa")]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
+        dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start)
+    }
+
+    /// Finds the end of the shortest match using only the DFA by scanning for
+    /// suffix literals.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn shortest_dfa_reverse_suffix(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<usize> {
+        match self.exec_dfa_reverse_suffix(text, start) {
+            None => self.shortest_dfa(text, start),
+            Some(r) => r.map(|(_, end)| end),
+        }
+    }
+
+    /// Finds the end of the shortest match using only the DFA by scanning for
+    /// suffix literals. It also reports the start of the match.
+    ///
+    /// Note that if None is returned, then the optimization gave up to avoid
+    /// worst case quadratic behavior. A forward scanning DFA should be tried
+    /// next.
+    ///
+    /// If a match is returned and the full leftmost-first match is desired,
+    /// then a forward scan starting from the beginning of the match must be
+    /// done.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn exec_dfa_reverse_suffix(
+        &self,
+        text: &[u8],
+        original_start: usize,
+    ) -> Option<dfa::Result<(usize, usize)>> {
+        use dfa::Result::*;
+
+        let lcs = self.ro.suffixes.lcs();
+        debug_assert!(lcs.len() >= 1);
+        let mut start = original_start;
+        let mut end = start;
+        let mut last_literal = start;
+        while end <= text.len() {
+            last_literal += match lcs.find(&text[last_literal..]) {
+                None => return Some(NoMatch(text.len())),
+                Some(i) => i,
+            };
+            end = last_literal + lcs.len();
+            match dfa::Fsm::reverse(
+                &self.ro.dfa_reverse,
+                self.cache.value(),
+                false,
+                &text[start..end],
+                end - start,
+            ) {
+                Match(0) | NoMatch(0) => return None,
+                Match(i) => return Some(Match((start + i, end))),
+                NoMatch(i) => {
+                    start += i;
+                    last_literal += 1;
+                    continue;
+                }
+                Quit => return Some(Quit),
+            };
+        }
+        Some(NoMatch(text.len()))
+    }
+
+    /// Finds the leftmost-first match (start and end) using only the DFA
+    /// by scanning for suffix literals.
+    ///
+    /// If the result returned indicates that the DFA quit, then another
+    /// matching engine should be used.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find_dfa_reverse_suffix(
+        &self,
+        text: &[u8],
+        start: usize,
+    ) -> dfa::Result<(usize, usize)> {
+        use dfa::Result::*;
+
+        let match_start = match self.exec_dfa_reverse_suffix(text, start) {
+            None => return self.find_dfa_forward(text, start),
+            Some(Match((start, _))) => start,
+            Some(r) => return r,
+        };
+        // At this point, we've found a match. The only way to quit now
+        // without a match is if the DFA gives up (seems unlikely).
+        //
+        // Now run the DFA forwards to find the proper end of the match.
+        // (The suffix literal match can only indicate the earliest
+        // possible end location, which may appear before the end of the
+        // leftmost-first match.)
+        match dfa::Fsm::forward(
+            &self.ro.dfa,
+            self.cache.value(),
+            false,
+            text,
+            match_start,
+        ) {
+            NoMatch(_) => panic!("BUG: reverse match implies forward match"),
+            Quit => Quit,
+            Match(e) => Match((match_start, e)),
+        }
+    }
+
+    /// Executes the NFA engine to return whether there is a match or not.
+    ///
+    /// Ideally, we could use shortest_nfa(...).is_some() and get the same
+    /// performance characteristics, but regex sets don't have captures, which
+    /// shortest_nfa depends on.
+    #[cfg(feature = "perf-dfa")]
+    fn match_nfa(&self, text: &[u8], start: usize) -> bool {
+        self.match_nfa_type(MatchNfaType::Auto, text, start)
+    }
+
+    /// Like match_nfa, but allows specification of the type of NFA engine.
+    fn match_nfa_type(
+        &self,
+        ty: MatchNfaType,
+        text: &[u8],
+        start: usize,
+    ) -> bool {
+        self.exec_nfa(
+            ty,
+            &mut [false],
+            &mut [],
+            true,
+            false,
+            text,
+            start,
+            text.len(),
+        )
+    }
+
+    /// Finds the shortest match using an NFA.
+    #[cfg(feature = "perf-dfa")]
+    fn shortest_nfa(&self, text: &[u8], start: usize) -> Option<usize> {
+        self.shortest_nfa_type(MatchNfaType::Auto, text, start)
+    }
+
+    /// Like shortest_nfa, but allows specification of the type of NFA engine.
+    fn shortest_nfa_type(
+        &self,
+        ty: MatchNfaType,
+        text: &[u8],
+        start: usize,
+    ) -> Option<usize> {
+        let mut slots = [None, None];
+        if self.exec_nfa(
+            ty,
+            &mut [false],
+            &mut slots,
+            true,
+            true,
+            text,
+            start,
+            text.len(),
+        ) {
+            slots[1]
+        } else {
+            None
+        }
+    }
+
+    /// Like find, but executes an NFA engine.
+    fn find_nfa(
+        &self,
+        ty: MatchNfaType,
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        let mut slots = [None, None];
+        if self.exec_nfa(
+            ty,
+            &mut [false],
+            &mut slots,
+            false,
+            false,
+            text,
+            start,
+            text.len(),
+        ) {
+            match (slots[0], slots[1]) {
+                (Some(s), Some(e)) => Some((s, e)),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    }
+
+    /// Like find_nfa, but fills in captures.
+    ///
+    /// `slots` should have length equal to `2 * nfa.captures.len()`.
+    #[cfg(feature = "perf-dfa")]
+    fn captures_nfa(
+        &self,
+        slots: &mut [Slot],
+        text: &[u8],
+        start: usize,
+    ) -> Option<(usize, usize)> {
+        self.captures_nfa_type(
+            MatchNfaType::Auto,
+            slots,
+            text,
+            start,
+            text.len(),
+        )
+    }
+
+    /// Like captures_nfa, but allows specification of type of NFA engine.
+    fn captures_nfa_type(
+        &self,
+        ty: MatchNfaType,
+        slots: &mut [Slot],
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> Option<(usize, usize)> {
+        if self.exec_nfa(
+            ty,
+            &mut [false],
+            slots,
+            false,
+            false,
+            text,
+            start,
+            end,
+        ) {
+            match (slots[0], slots[1]) {
+                (Some(s), Some(e)) => Some((s, e)),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    }
+
+    fn exec_nfa(
+        &self,
+        mut ty: MatchNfaType,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        quit_after_match: bool,
+        quit_after_match_with_pos: bool,
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> bool {
+        use self::MatchNfaType::*;
+        if let Auto = ty {
+            if backtrack::should_exec(self.ro.nfa.len(), text.len()) {
+                ty = Backtrack;
+            } else {
+                ty = PikeVM;
+            }
+        }
+        // The backtracker can't return the shortest match position as it is
+        // implemented today. So if someone calls `shortest_match` and we need
+        // to run an NFA, then use the PikeVM.
+        if quit_after_match_with_pos || ty == PikeVM {
+            self.exec_pikevm(
+                matches,
+                slots,
+                quit_after_match,
+                text,
+                start,
+                end,
+            )
+        } else {
+            self.exec_backtrack(matches, slots, text, start, end)
+        }
+    }
+
+    /// Always run the NFA algorithm.
+    fn exec_pikevm(
+        &self,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        quit_after_match: bool,
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> bool {
+        if self.ro.nfa.uses_bytes() {
+            pikevm::Fsm::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                quit_after_match,
+                ByteInput::new(text, self.ro.nfa.only_utf8),
+                start,
+                end,
+            )
+        } else {
+            pikevm::Fsm::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                quit_after_match,
+                CharInput::new(text),
+                start,
+                end,
+            )
+        }
+    }
+
+    /// Always runs the NFA using bounded backtracking.
+    fn exec_backtrack(
+        &self,
+        matches: &mut [bool],
+        slots: &mut [Slot],
+        text: &[u8],
+        start: usize,
+        end: usize,
+    ) -> bool {
+        if self.ro.nfa.uses_bytes() {
+            backtrack::Bounded::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                ByteInput::new(text, self.ro.nfa.only_utf8),
+                start,
+                end,
+            )
+        } else {
+            backtrack::Bounded::exec(
+                &self.ro.nfa,
+                self.cache.value(),
+                matches,
+                slots,
+                CharInput::new(text),
+                start,
+                end,
+            )
+        }
+    }
+
+    /// Finds which regular expressions match the given text.
+    ///
+    /// `matches` should have length equal to the number of regexes being
+    /// searched.
+    ///
+    /// This is only useful when one wants to know which regexes in a set
+    /// match some text.
+    pub fn many_matches_at(
+        &self,
+        matches: &mut [bool],
+        text: &[u8],
+        start: usize,
+    ) -> bool {
+        use self::MatchType::*;
+        if !self.is_anchor_end_match(text) {
+            return false;
+        }
+        match self.ro.match_type {
+            #[cfg(feature = "perf-literal")]
+            Literal(ty) => {
+                debug_assert_eq!(matches.len(), 1);
+                matches[0] = self.find_literals(ty, text, start).is_some();
+                matches[0]
+            }
+            #[cfg(feature = "perf-dfa")]
+            Dfa | DfaAnchoredReverse | DfaMany => {
+                match dfa::Fsm::forward_many(
+                    &self.ro.dfa,
+                    self.cache.value(),
+                    matches,
+                    text,
+                    start,
+                ) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.exec_nfa(
+                        MatchNfaType::Auto,
+                        matches,
+                        &mut [],
+                        false,
+                        false,
+                        text,
+                        start,
+                        text.len(),
+                    ),
+                }
+            }
+            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+            DfaSuffix => {
+                match dfa::Fsm::forward_many(
+                    &self.ro.dfa,
+                    self.cache.value(),
+                    matches,
+                    text,
+                    start,
+                ) {
+                    dfa::Result::Match(_) => true,
+                    dfa::Result::NoMatch(_) => false,
+                    dfa::Result::Quit => self.exec_nfa(
+                        MatchNfaType::Auto,
+                        matches,
+                        &mut [],
+                        false,
+                        false,
+                        text,
+                        start,
+                        text.len(),
+                    ),
+                }
+            }
+            Nfa(ty) => self.exec_nfa(
+                ty,
+                matches,
+                &mut [],
+                false,
+                false,
+                text,
+                start,
+                text.len(),
+            ),
+            Nothing => false,
+        }
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_anchor_end_match(&self, text: &[u8]) -> bool {
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &ExecReadOnly, _: &[u8]) -> bool {
+            true
+        }
+
+        #[cfg(feature = "perf-literal")]
+        fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool {
+            // Only do this check if the haystack is big (>1MB).
+            if text.len() > (1 << 20) && ro.nfa.is_anchored_end {
+                let lcs = ro.suffixes.lcs();
+                if lcs.len() >= 1 && !lcs.is_suffix(text) {
+                    return false;
+                }
+            }
+            true
+        }
+
+        imp(&self.ro, text)
+    }
+
+    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
+        &self.ro.nfa.capture_name_idx
+    }
+}
+
+impl<'c> ExecNoSyncStr<'c> {
+    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
+        self.0.capture_name_idx()
+    }
+}
+
+impl Exec {
+    /// Get a searcher that isn't Sync.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn searcher(&self) -> ExecNoSync {
+        let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
+        ExecNoSync {
+            ro: &self.ro, // a clone is too expensive here! (and not needed)
+            cache: self.cache.get_or(create),
+        }
+    }
+
+    /// Get a searcher that isn't Sync and can match on &str.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn searcher_str(&self) -> ExecNoSyncStr {
+        ExecNoSyncStr(self.searcher())
+    }
+
+    /// Build a Regex from this executor.
+    pub fn into_regex(self) -> re_unicode::Regex {
+        re_unicode::Regex::from(self)
+    }
+
+    /// Build a RegexSet from this executor.
+    pub fn into_regex_set(self) -> re_set::unicode::RegexSet {
+        re_set::unicode::RegexSet::from(self)
+    }
+
+    /// Build a Regex from this executor that can match arbitrary bytes.
+    pub fn into_byte_regex(self) -> re_bytes::Regex {
+        re_bytes::Regex::from(self)
+    }
+
+    /// Build a RegexSet from this executor that can match arbitrary bytes.
+    pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet {
+        re_set::bytes::RegexSet::from(self)
+    }
+
+    /// The original regular expressions given by the caller that were
+    /// compiled.
+    pub fn regex_strings(&self) -> &[String] {
+        &self.ro.res
+    }
+
+    /// Return a slice of capture names.
+    ///
+    /// Any capture that isn't named is None.
+    pub fn capture_names(&self) -> &[Option<String>] {
+        &self.ro.nfa.captures
+    }
+
+    /// Return a reference to named groups mapping (from group name to
+    /// group position).
+    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
+        &self.ro.nfa.capture_name_idx
+    }
+}
+
+impl Clone for Exec {
+    fn clone(&self) -> Exec {
+        Exec { ro: self.ro.clone(), cache: Cached::new() }
+    }
+}
+
+impl ExecReadOnly {
+    fn choose_match_type(&self, hint: Option<MatchType>) -> MatchType {
+        if let Some(MatchType::Nfa(_)) = hint {
+            return hint.unwrap();
+        }
+        // If the NFA is empty, then we'll never match anything.
+        if self.nfa.insts.is_empty() {
+            return MatchType::Nothing;
+        }
+        if let Some(literalty) = self.choose_literal_match_type() {
+            return literalty;
+        }
+        if let Some(dfaty) = self.choose_dfa_match_type() {
+            return dfaty;
+        }
+        // We're so totally hosed.
+        MatchType::Nfa(MatchNfaType::Auto)
+    }
+
+    /// If a plain literal scan can be used, then a corresponding literal
+    /// search type is returned.
+    fn choose_literal_match_type(&self) -> Option<MatchType> {
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &ExecReadOnly) -> Option<MatchType> {
+            None
+        }
+
+        #[cfg(feature = "perf-literal")]
+        fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
+            // If our set of prefixes is complete, then we can use it to find
+            // a match in lieu of a regex engine. This doesn't quite work well
+            // in the presence of multiple regexes, so only do it when there's
+            // one.
+            //
+            // TODO(burntsushi): Also, don't try to match literals if the regex
+            // is partially anchored. We could technically do it, but we'd need
+            // to create two sets of literals: all of them and then the subset
+            // that aren't anchored. We would then only search for all of them
+            // when at the beginning of the input and use the subset in all
+            // other cases.
+            if ro.res.len() != 1 {
+                return None;
+            }
+            if ro.ac.is_some() {
+                return Some(MatchType::Literal(
+                    MatchLiteralType::AhoCorasick,
+                ));
+            }
+            if ro.nfa.prefixes.complete() {
+                return if ro.nfa.is_anchored_start {
+                    Some(MatchType::Literal(MatchLiteralType::AnchoredStart))
+                } else {
+                    Some(MatchType::Literal(MatchLiteralType::Unanchored))
+                };
+            }
+            if ro.suffixes.complete() {
+                return if ro.nfa.is_anchored_end {
+                    Some(MatchType::Literal(MatchLiteralType::AnchoredEnd))
+                } else {
+                    // This case shouldn't happen. When the regex isn't
+                    // anchored, then complete prefixes should imply complete
+                    // suffixes.
+                    Some(MatchType::Literal(MatchLiteralType::Unanchored))
+                };
+            }
+            None
+        }
+
+        imp(self)
+    }
+
+    /// If a DFA scan can be used, then choose the appropriate DFA strategy.
+    fn choose_dfa_match_type(&self) -> Option<MatchType> {
+        #[cfg(not(feature = "perf-dfa"))]
+        fn imp(_: &ExecReadOnly) -> Option<MatchType> {
+            None
+        }
+
+        #[cfg(feature = "perf-dfa")]
+        fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
+            if !dfa::can_exec(&ro.dfa) {
+                return None;
+            }
+            // Regex sets require a slightly specialized path.
+            if ro.res.len() >= 2 {
+                return Some(MatchType::DfaMany);
+            }
+            // If the regex is anchored at the end but not the start, then
+            // just match in reverse from the end of the haystack.
+            if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end {
+                return Some(MatchType::DfaAnchoredReverse);
+            }
+            #[cfg(feature = "perf-literal")]
+            {
+                // If there's a longish suffix literal, then it might be faster
+                // to look for that first.
+                if ro.should_suffix_scan() {
+                    return Some(MatchType::DfaSuffix);
+                }
+            }
+            // Fall back to your garden variety forward searching lazy DFA.
+            Some(MatchType::Dfa)
+        }
+
+        imp(self)
+    }
+
+    /// Returns true if the program is amenable to suffix scanning.
+    ///
+    /// When this is true, as a heuristic, we assume it is OK to quickly scan
+    /// for suffix literals and then do a *reverse* DFA match from any matches
+    /// produced by the literal scan. (And then followed by a forward DFA
+    /// search, since the previously found suffix literal maybe not actually be
+    /// the end of a match.)
+    ///
+    /// This is a bit of a specialized optimization, but can result in pretty
+    /// big performance wins if 1) there are no prefix literals and 2) the
+    /// suffix literals are pretty rare in the text. (1) is obviously easy to
+    /// account for but (2) is harder. As a proxy, we assume that longer
+    /// strings are generally rarer, so we only enable this optimization when
+    /// we have a meaty suffix.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    fn should_suffix_scan(&self) -> bool {
+        if self.suffixes.is_empty() {
+            return false;
+        }
+        let lcs_len = self.suffixes.lcs().char_len();
+        lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+enum MatchType {
+    /// A single or multiple literal search. This is only used when the regex
+    /// can be decomposed into a literal search.
+    #[cfg(feature = "perf-literal")]
+    Literal(MatchLiteralType),
+    /// A normal DFA search.
+    #[cfg(feature = "perf-dfa")]
+    Dfa,
+    /// A reverse DFA search starting from the end of a haystack.
+    #[cfg(feature = "perf-dfa")]
+    DfaAnchoredReverse,
+    /// A reverse DFA search with suffix literal scanning.
+    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
+    DfaSuffix,
+    /// Use the DFA on two or more regular expressions.
+    #[cfg(feature = "perf-dfa")]
+    DfaMany,
+    /// An NFA variant.
+    Nfa(MatchNfaType),
+    /// No match is ever possible, so don't ever try to search.
+    Nothing,
+}
+
+#[derive(Clone, Copy, Debug)]
+#[cfg(feature = "perf-literal")]
+enum MatchLiteralType {
+    /// Match literals anywhere in text.
+    Unanchored,
+    /// Match literals only at the start of text.
+    AnchoredStart,
+    /// Match literals only at the end of text.
+    AnchoredEnd,
+    /// Use an Aho-Corasick automaton. This requires `ac` to be Some on
+    /// ExecReadOnly.
+    AhoCorasick,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum MatchNfaType {
+    /// Choose between Backtrack and PikeVM.
+    Auto,
+    /// NFA bounded backtracking.
+    ///
+    /// (This is only set by tests, since it never makes sense to always want
+    /// backtracking.)
+    Backtrack,
+    /// The Pike VM.
+    ///
+    /// (This is only set by tests, since it never makes sense to always want
+    /// the Pike VM.)
+    PikeVM,
+}
+
+/// `ProgramCache` maintains reusable allocations for each matching engine
+/// available to a particular program.
+pub type ProgramCache = RefCell<ProgramCacheInner>;
+
+#[derive(Debug)]
+pub struct ProgramCacheInner {
+    pub pikevm: pikevm::Cache,
+    pub backtrack: backtrack::Cache,
+    #[cfg(feature = "perf-dfa")]
+    pub dfa: dfa::Cache,
+    #[cfg(feature = "perf-dfa")]
+    pub dfa_reverse: dfa::Cache,
+}
+
+impl ProgramCacheInner {
+    fn new(ro: &ExecReadOnly) -> Self {
+        ProgramCacheInner {
+            pikevm: pikevm::Cache::new(&ro.nfa),
+            backtrack: backtrack::Cache::new(&ro.nfa),
+            #[cfg(feature = "perf-dfa")]
+            dfa: dfa::Cache::new(&ro.dfa),
+            #[cfg(feature = "perf-dfa")]
+            dfa_reverse: dfa::Cache::new(&ro.dfa_reverse),
+        }
+    }
+}
+
+/// Alternation literals checks if the given HIR is a simple alternation of
+/// literals, and if so, returns them. Otherwise, this returns None.
+#[cfg(feature = "perf-literal")]
+fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
+    use syntax::hir::{HirKind, Literal};
+
+    // This is pretty hacky, but basically, if `is_alternation_literal` is
+    // true, then we can make several assumptions about the structure of our
+    // HIR. This is what justifies the `unreachable!` statements below.
+    //
+    // This code should be refactored once we overhaul this crate's
+    // optimization pipeline, because this is a terribly inflexible way to go
+    // about things.
+
+    if !expr.is_alternation_literal() {
+        return None;
+    }
+    let alts = match *expr.kind() {
+        HirKind::Alternation(ref alts) => alts,
+        _ => return None, // one literal isn't worth it
+    };
+
+    let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
+        Literal::Unicode(c) => {
+            let mut buf = [0; 4];
+            dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
+        }
+        Literal::Byte(b) => {
+            dst.push(b);
+        }
+    };
+
+    let mut lits = vec![];
+    for alt in alts {
+        let mut lit = vec![];
+        match *alt.kind() {
+            HirKind::Literal(ref x) => extendlit(x, &mut lit),
+            HirKind::Concat(ref exprs) => {
+                for e in exprs {
+                    match *e.kind() {
+                        HirKind::Literal(ref x) => extendlit(x, &mut lit),
+                        _ => unreachable!("expected literal, got {:?}", e),
+                    }
+                }
+            }
+            _ => unreachable!("expected literal or concat, got {:?}", alt),
+        }
+        lits.push(lit);
+    }
+    Some(lits)
+}
+
+#[cfg(test)]
+mod test {
+    #[test]
+    fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
+        use internal::ExecBuilder;
+
+        let backtrack_bytes_re = ExecBuilder::new("^S")
+            .bounded_backtracking()
+            .only_utf8(false)
+            .build()
+            .map(|exec| exec.into_byte_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let default_bytes_re = ExecBuilder::new("^S")
+            .only_utf8(false)
+            .build()
+            .map(|exec| exec.into_byte_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let input = vec![83, 83];
+
+        let s1 = backtrack_bytes_re.split(&input);
+        let s2 = default_bytes_re.split(&input);
+        for (chunk1, chunk2) in s1.zip(s2) {
+            assert_eq!(chunk1, chunk2);
+        }
+    }
+
+    #[test]
+    fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
+        use internal::ExecBuilder;
+
+        let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
+            .bounded_backtracking()
+            .bytes(true)
+            .build()
+            .map(|exec| exec.into_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let default_bytes_re = ExecBuilder::new(r"^(?u:\*)")
+            .bytes(true)
+            .build()
+            .map(|exec| exec.into_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let input = "**";
+
+        let s1 = backtrack_bytes_re.split(input);
+        let s2 = default_bytes_re.split(input);
+        for (chunk1, chunk2) in s1.zip(s2) {
+            assert_eq!(chunk1, chunk2);
+        }
+    }
+}
commit	e42c505f54ac2e7b2ca7d0197304cac1b4f605e9	[log] [tgz]
author	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:44:21 2020 -0700
committer	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:45:53 2020 -0700
tree	7ca22ebdbdb7d0c9dc5c66d31f564aeaf51cece4
parent	815d0a2344b5321fd47f0cb7dba96e4cb4e84615 [diff] [blame]