Import 'regex' package vesion 1.3.6 * Add OWNERS * No Android.bp yet Bug: 152884384 Test: make Change-Id: I455caf7833b6c437c1c133bc7b2f47b83da9cbce

commit: e42c505f54ac2e7b2ca7d0197304cac1b4f605e9 [log] [tgz]
author: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:44:21 2020 -0700
committer: Chih-Hung Hsieh <chh@google.com> Thu Apr 16 10:45:53 2020 -0700
tree: 7ca22ebdbdb7d0c9dc5c66d31f564aeaf51cece4
parent: 815d0a2344b5321fd47f0cb7dba96e4cb4e84615 [diff] [blame]
diff --git a/src/re_trait.rs b/src/re_trait.rs
new file mode 100644
index 0000000..b56804e
--- /dev/null
+++ b/src/re_trait.rs

@@ -0,0 +1,261 @@
+/// Slot is a single saved capture location. Note that there are two slots for
+/// every capture in a regular expression (one slot each for the start and end
+/// of the capture).
+pub type Slot = Option<usize>;
+
+/// Locations represents the offsets of each capturing group in a regex for
+/// a single match.
+///
+/// Unlike `Captures`, a `Locations` value only stores offsets.
+#[doc(hidden)]
+#[derive(Clone, Debug)]
+pub struct Locations(Vec<Slot>);
+
+impl Locations {
+    /// Returns the start and end positions of the Nth capture group. Returns
+    /// `None` if `i` is not a valid capture group or if the capture group did
+    /// not match anything. The positions returned are *always* byte indices
+    /// with respect to the original string matched.
+    pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
+        let (s, e) = (i * 2, i * 2 + 1);
+        match (self.0.get(s), self.0.get(e)) {
+            (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
+            _ => None,
+        }
+    }
+
+    /// Creates an iterator of all the capture group positions in order of
+    /// appearance in the regular expression. Positions are byte indices
+    /// in terms of the original string matched.
+    pub fn iter(&self) -> SubCapturesPosIter {
+        SubCapturesPosIter { idx: 0, locs: self }
+    }
+
+    /// Returns the total number of capturing groups.
+    ///
+    /// This is always at least `1` since every regex has at least `1`
+    /// capturing group that corresponds to the entire match.
+    pub fn len(&self) -> usize {
+        self.0.len() / 2
+    }
+
+    /// Return the individual slots as a slice.
+    pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
+        &mut self.0
+    }
+}
+
+/// An iterator over capture group positions for a particular match of a
+/// regular expression.
+///
+/// Positions are byte indices in terms of the original string matched.
+///
+/// `'c` is the lifetime of the captures.
+pub struct SubCapturesPosIter<'c> {
+    idx: usize,
+    locs: &'c Locations,
+}
+
+impl<'c> Iterator for SubCapturesPosIter<'c> {
+    type Item = Option<(usize, usize)>;
+
+    fn next(&mut self) -> Option<Option<(usize, usize)>> {
+        if self.idx >= self.locs.len() {
+            return None;
+        }
+        let x = match self.locs.pos(self.idx) {
+            None => Some(None),
+            Some((s, e)) => Some(Some((s, e))),
+        };
+        self.idx += 1;
+        x
+    }
+}
+
+/// `RegularExpression` describes types that can implement regex searching.
+///
+/// This trait is my attempt at reducing code duplication and to standardize
+/// the internal API. Specific duplication that is avoided are the `find`
+/// and `capture` iterators, which are slightly tricky.
+///
+/// It's not clear whether this trait is worth it, and it also isn't
+/// clear whether it's useful as a public trait or not. Methods like
+/// `next_after_empty` reak of bad design, but the rest of the methods seem
+/// somewhat reasonable. One particular thing this trait would expose would be
+/// the ability to start the search of a regex anywhere in a haystack, which
+/// isn't possible in the current public API.
+pub trait RegularExpression: Sized {
+    /// The type of the haystack.
+    type Text: ?Sized;
+
+    /// The number of capture slots in the compiled regular expression. This is
+    /// always two times the number of capture groups (two slots per group).
+    fn slots_len(&self) -> usize;
+
+    /// Allocates fresh space for all capturing groups in this regex.
+    fn locations(&self) -> Locations {
+        Locations(vec![None; self.slots_len()])
+    }
+
+    /// Returns the position of the next character after `i`.
+    ///
+    /// For example, a haystack with type `&[u8]` probably returns `i+1`,
+    /// whereas a haystack with type `&str` probably returns `i` plus the
+    /// length of the next UTF-8 sequence.
+    fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
+
+    /// Returns the location of the shortest match.
+    fn shortest_match_at(
+        &self,
+        text: &Self::Text,
+        start: usize,
+    ) -> Option<usize>;
+
+    /// Returns whether the regex matches the text given.
+    fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;
+
+    /// Returns the leftmost-first match location if one exists.
+    fn find_at(
+        &self,
+        text: &Self::Text,
+        start: usize,
+    ) -> Option<(usize, usize)>;
+
+    /// Returns the leftmost-first match location if one exists, and also
+    /// fills in any matching capture slot locations.
+    fn captures_read_at(
+        &self,
+        locs: &mut Locations,
+        text: &Self::Text,
+        start: usize,
+    ) -> Option<(usize, usize)>;
+
+    /// Returns an iterator over all non-overlapping successive leftmost-first
+    /// matches.
+    fn find_iter(self, text: &Self::Text) -> Matches<Self> {
+        Matches { re: self, text: text, last_end: 0, last_match: None }
+    }
+
+    /// Returns an iterator over all non-overlapping successive leftmost-first
+    /// matches with captures.
+    fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
+        CaptureMatches(self.find_iter(text))
+    }
+}
+
+/// An iterator over all non-overlapping successive leftmost-first matches.
+pub struct Matches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't,
+{
+    re: R,
+    text: &'t R::Text,
+    last_end: usize,
+    last_match: Option<usize>,
+}
+
+impl<'t, R> Matches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't,
+{
+    /// Return the text being searched.
+    pub fn text(&self) -> &'t R::Text {
+        self.text
+    }
+
+    /// Return the underlying regex.
+    pub fn regex(&self) -> &R {
+        &self.re
+    }
+}
+
+impl<'t, R> Iterator for Matches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't + AsRef<[u8]>,
+{
+    type Item = (usize, usize);
+
+    fn next(&mut self) -> Option<(usize, usize)> {
+        if self.last_end > self.text.as_ref().len() {
+            return None;
+        }
+        let (s, e) = match self.re.find_at(self.text, self.last_end) {
+            None => return None,
+            Some((s, e)) => (s, e),
+        };
+        if s == e {
+            // This is an empty match. To ensure we make progress, start
+            // the next search at the smallest possible starting position
+            // of the next match following this one.
+            self.last_end = self.re.next_after_empty(self.text, e);
+            // Don't accept empty matches immediately following a match.
+            // Just move on to the next match.
+            if Some(e) == self.last_match {
+                return self.next();
+            }
+        } else {
+            self.last_end = e;
+        }
+        self.last_match = Some(e);
+        Some((s, e))
+    }
+}
+
+/// An iterator over all non-overlapping successive leftmost-first matches with
+/// captures.
+pub struct CaptureMatches<'t, R>(Matches<'t, R>)
+where
+    R: RegularExpression,
+    R::Text: 't;
+
+impl<'t, R> CaptureMatches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't,
+{
+    /// Return the text being searched.
+    pub fn text(&self) -> &'t R::Text {
+        self.0.text()
+    }
+
+    /// Return the underlying regex.
+    pub fn regex(&self) -> &R {
+        self.0.regex()
+    }
+}
+
+impl<'t, R> Iterator for CaptureMatches<'t, R>
+where
+    R: RegularExpression,
+    R::Text: 't + AsRef<[u8]>,
+{
+    type Item = Locations;
+
+    fn next(&mut self) -> Option<Locations> {
+        if self.0.last_end > self.0.text.as_ref().len() {
+            return None;
+        }
+        let mut locs = self.0.re.locations();
+        let (s, e) = match self.0.re.captures_read_at(
+            &mut locs,
+            self.0.text,
+            self.0.last_end,
+        ) {
+            None => return None,
+            Some((s, e)) => (s, e),
+        };
+        if s == e {
+            self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
+            if Some(e) == self.0.last_match {
+                return self.next();
+            }
+        } else {
+            self.0.last_end = e;
+        }
+        self.0.last_match = Some(e);
+        Some(locs)
+    }
+}
commit	e42c505f54ac2e7b2ca7d0197304cac1b4f605e9	[log] [tgz]
author	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:44:21 2020 -0700
committer	Chih-Hung Hsieh <chh@google.com>	Thu Apr 16 10:45:53 2020 -0700
tree	7ca22ebdbdb7d0c9dc5c66d31f564aeaf51cece4
parent	815d0a2344b5321fd47f0cb7dba96e4cb4e84615 [diff] [blame]