src/re_trait.rs - platform/external/rust/crates/regex - Gitiles

 /// Slot is a single saved capture location. Note that there are two slots for
 /// every capture in a regular expression (one slot each for the start and end
 /// of the capture).
 pub type Slot = Option<usize>;

 /// Locations represents the offsets of each capturing group in a regex for
 /// a single match.
 ///
 /// Unlike `Captures`, a `Locations` value only stores offsets.
 #[doc(hidden)]
 #[derive(Clone, Debug)]
 pub struct Locations(Vec<Slot>);

 impl Locations {
     /// Returns the start and end positions of the Nth capture group. Returns
     /// `None` if `i` is not a valid capture group or if the capture group did
     /// not match anything. The positions returned are *always* byte indices
     /// with respect to the original string matched.
     pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
         let (s, e) = (i * 2, i * 2 + 1);
         match (self.0.get(s), self.0.get(e)) {
             (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
             _ => None,
         }
     }

     /// Creates an iterator of all the capture group positions in order of
     /// appearance in the regular expression. Positions are byte indices
     /// in terms of the original string matched.
     pub fn iter(&self) -> SubCapturesPosIter {
         SubCapturesPosIter { idx: 0, locs: self }
     }

     /// Returns the total number of capturing groups.
     ///
     /// This is always at least `1` since every regex has at least `1`
     /// capturing group that corresponds to the entire match.
     pub fn len(&self) -> usize {
         self.0.len() / 2
     }

     /// Return the individual slots as a slice.
     pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
         &mut self.0
     }
 }

 /// An iterator over capture group positions for a particular match of a
 /// regular expression.
 ///
 /// Positions are byte indices in terms of the original string matched.
 ///
 /// `'c` is the lifetime of the captures.
 pub struct SubCapturesPosIter<'c> {
     idx: usize,
     locs: &'c Locations,
 }

 impl<'c> Iterator for SubCapturesPosIter<'c> {
     type Item = Option<(usize, usize)>;

     fn next(&mut self) -> Option<Option<(usize, usize)>> {
         if self.idx >= self.locs.len() {
             return None;
         }
         let x = match self.locs.pos(self.idx) {
             None => Some(None),
             Some((s, e)) => Some(Some((s, e))),
         };
         self.idx += 1;
         x
     }
 }

 /// `RegularExpression` describes types that can implement regex searching.
 ///
 /// This trait is my attempt at reducing code duplication and to standardize
 /// the internal API. Specific duplication that is avoided are the `find`
 /// and `capture` iterators, which are slightly tricky.
 ///
 /// It's not clear whether this trait is worth it, and it also isn't
 /// clear whether it's useful as a public trait or not. Methods like
 /// `next_after_empty` reak of bad design, but the rest of the methods seem
 /// somewhat reasonable. One particular thing this trait would expose would be
 /// the ability to start the search of a regex anywhere in a haystack, which
 /// isn't possible in the current public API.
 pub trait RegularExpression: Sized {
     /// The type of the haystack.
     type Text: ?Sized;

     /// The number of capture slots in the compiled regular expression. This is
     /// always two times the number of capture groups (two slots per group).
     fn slots_len(&self) -> usize;

     /// Allocates fresh space for all capturing groups in this regex.
     fn locations(&self) -> Locations {
         Locations(vec![None; self.slots_len()])
     }

     /// Returns the position of the next character after `i`.
     ///
     /// For example, a haystack with type `&[u8]` probably returns `i+1`,
     /// whereas a haystack with type `&str` probably returns `i` plus the
     /// length of the next UTF-8 sequence.
     fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;

     /// Returns the location of the shortest match.
     fn shortest_match_at(
         &self,
         text: &Self::Text,
         start: usize,
     ) -> Option<usize>;

     /// Returns whether the regex matches the text given.
     fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;

     /// Returns the leftmost-first match location if one exists.
     fn find_at(
         &self,
         text: &Self::Text,
         start: usize,
     ) -> Option<(usize, usize)>;

     /// Returns the leftmost-first match location if one exists, and also
     /// fills in any matching capture slot locations.
     fn captures_read_at(
         &self,
         locs: &mut Locations,
         text: &Self::Text,
         start: usize,
     ) -> Option<(usize, usize)>;

     /// Returns an iterator over all non-overlapping successive leftmost-first
     /// matches.
     fn find_iter(self, text: &Self::Text) -> Matches<Self> {
         Matches { re: self, text: text, last_end: 0, last_match: None }
     }

     /// Returns an iterator over all non-overlapping successive leftmost-first
     /// matches with captures.
     fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
         CaptureMatches(self.find_iter(text))
     }
 }

 /// An iterator over all non-overlapping successive leftmost-first matches.
 pub struct Matches<'t, R>
 where
     R: RegularExpression,
     R::Text: 't,
 {
     re: R,
     text: &'t R::Text,
     last_end: usize,
     last_match: Option<usize>,
 }

 impl<'t, R> Matches<'t, R>
 where
     R: RegularExpression,
     R::Text: 't,
 {
     /// Return the text being searched.
     pub fn text(&self) -> &'t R::Text {
         self.text
     }

     /// Return the underlying regex.
     pub fn regex(&self) -> &R {
         &self.re
     }
 }

 impl<'t, R> Iterator for Matches<'t, R>
 where
     R: RegularExpression,
     R::Text: 't + AsRef<[u8]>,
 {
     type Item = (usize, usize);

     fn next(&mut self) -> Option<(usize, usize)> {
         if self.last_end > self.text.as_ref().len() {
             return None;
         }
         let (s, e) = match self.re.find_at(self.text, self.last_end) {
             None => return None,
             Some((s, e)) => (s, e),
         };
         if s == e {
             // This is an empty match. To ensure we make progress, start
             // the next search at the smallest possible starting position
             // of the next match following this one.
             self.last_end = self.re.next_after_empty(self.text, e);
             // Don't accept empty matches immediately following a match.
             // Just move on to the next match.
             if Some(e) == self.last_match {
                 return self.next();
             }
         } else {
             self.last_end = e;
         }
         self.last_match = Some(e);
         Some((s, e))
     }
 }

 /// An iterator over all non-overlapping successive leftmost-first matches with
 /// captures.
 pub struct CaptureMatches<'t, R>(Matches<'t, R>)
 where
     R: RegularExpression,
     R::Text: 't;

 impl<'t, R> CaptureMatches<'t, R>
 where
     R: RegularExpression,
     R::Text: 't,
 {
     /// Return the text being searched.
     pub fn text(&self) -> &'t R::Text {
         self.0.text()
     }

     /// Return the underlying regex.
     pub fn regex(&self) -> &R {
         self.0.regex()
     }
 }

 impl<'t, R> Iterator for CaptureMatches<'t, R>
 where
     R: RegularExpression,
     R::Text: 't + AsRef<[u8]>,
 {
     type Item = Locations;

     fn next(&mut self) -> Option<Locations> {
         if self.0.last_end > self.0.text.as_ref().len() {
             return None;
         }
         let mut locs = self.0.re.locations();
         let (s, e) = match self.0.re.captures_read_at(
             &mut locs,
             self.0.text,
             self.0.last_end,
         ) {
             None => return None,
             Some((s, e)) => (s, e),
         };
         if s == e {
             self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
             if Some(e) == self.0.last_match {
                 return self.next();
             }
         } else {
             self.0.last_end = e;
         }
         self.0.last_match = Some(e);
         Some(locs)
     }
 }
	/// Slot is a single saved capture location. Note that there are two slots for
	/// every capture in a regular expression (one slot each for the start and end
	/// of the capture).
	pub type Slot = Option<usize>;

	/// Locations represents the offsets of each capturing group in a regex for
	/// a single match.
	///
	/// Unlike `Captures`, a `Locations` value only stores offsets.
	#[doc(hidden)]
	#[derive(Clone, Debug)]
	pub struct Locations(Vec<Slot>);

	impl Locations {
	/// Returns the start and end positions of the Nth capture group. Returns
	/// `None` if `i` is not a valid capture group or if the capture group did
	/// not match anything. The positions returned are always byte indices
	/// with respect to the original string matched.
	pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
	let (s, e) = (i * 2, i * 2 + 1);
	match (self.0.get(s), self.0.get(e)) {
	(Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
	_ => None,
	}
	}

	/// Creates an iterator of all the capture group positions in order of
	/// appearance in the regular expression. Positions are byte indices
	/// in terms of the original string matched.
	pub fn iter(&self) -> SubCapturesPosIter {
	SubCapturesPosIter { idx: 0, locs: self }
	}

	/// Returns the total number of capturing groups.
	///
	/// This is always at least `1` since every regex has at least `1`
	/// capturing group that corresponds to the entire match.
	pub fn len(&self) -> usize {
	self.0.len() / 2
	}

	/// Return the individual slots as a slice.
	pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
	&mut self.0
	}
	}

	/// An iterator over capture group positions for a particular match of a
	/// regular expression.
	///
	/// Positions are byte indices in terms of the original string matched.
	///
	/// `'c` is the lifetime of the captures.
	pub struct SubCapturesPosIter<'c> {
	idx: usize,
	locs: &'c Locations,
	}

	impl<'c> Iterator for SubCapturesPosIter<'c> {
	type Item = Option<(usize, usize)>;

	fn next(&mut self) -> Option<Option<(usize, usize)>> {
	if self.idx >= self.locs.len() {
	return None;
	}
	let x = match self.locs.pos(self.idx) {
	None => Some(None),
	Some((s, e)) => Some(Some((s, e))),
	};
	self.idx += 1;
	x
	}
	}

	/// `RegularExpression` describes types that can implement regex searching.
	///
	/// This trait is my attempt at reducing code duplication and to standardize
	/// the internal API. Specific duplication that is avoided are the `find`
	/// and `capture` iterators, which are slightly tricky.
	///
	/// It's not clear whether this trait is worth it, and it also isn't
	/// clear whether it's useful as a public trait or not. Methods like
	/// `next_after_empty` reak of bad design, but the rest of the methods seem
	/// somewhat reasonable. One particular thing this trait would expose would be
	/// the ability to start the search of a regex anywhere in a haystack, which
	/// isn't possible in the current public API.
	pub trait RegularExpression: Sized {
	/// The type of the haystack.
	type Text: ?Sized;

	/// The number of capture slots in the compiled regular expression. This is
	/// always two times the number of capture groups (two slots per group).
	fn slots_len(&self) -> usize;

	/// Allocates fresh space for all capturing groups in this regex.
	fn locations(&self) -> Locations {
	Locations(vec![None; self.slots_len()])
	}

	/// Returns the position of the next character after `i`.
	///
	/// For example, a haystack with type `&[u8]` probably returns `i+1`,
	/// whereas a haystack with type `&str` probably returns `i` plus the
	/// length of the next UTF-8 sequence.
	fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;

	/// Returns the location of the shortest match.
	fn shortest_match_at(
	&self,
	text: &Self::Text,
	start: usize,
	) -> Option<usize>;

	/// Returns whether the regex matches the text given.
	fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;

	/// Returns the leftmost-first match location if one exists.
	fn find_at(
	&self,
	text: &Self::Text,
	start: usize,
	) -> Option<(usize, usize)>;

	/// Returns the leftmost-first match location if one exists, and also
	/// fills in any matching capture slot locations.
	fn captures_read_at(
	&self,
	locs: &mut Locations,
	text: &Self::Text,
	start: usize,
	) -> Option<(usize, usize)>;

	/// Returns an iterator over all non-overlapping successive leftmost-first
	/// matches.
	fn find_iter(self, text: &Self::Text) -> Matches<Self> {
	Matches { re: self, text: text, last_end: 0, last_match: None }
	}

	/// Returns an iterator over all non-overlapping successive leftmost-first
	/// matches with captures.
	fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
	CaptureMatches(self.find_iter(text))
	}
	}

	/// An iterator over all non-overlapping successive leftmost-first matches.
	pub struct Matches<'t, R>
	where
	R: RegularExpression,
	R::Text: 't,
	{
	re: R,
	text: &'t R::Text,
	last_end: usize,
	last_match: Option<usize>,
	}

	impl<'t, R> Matches<'t, R>
	where
	R: RegularExpression,
	R::Text: 't,
	{
	/// Return the text being searched.
	pub fn text(&self) -> &'t R::Text {
	self.text
	}

	/// Return the underlying regex.
	pub fn regex(&self) -> &R {
	&self.re
	}
	}

	impl<'t, R> Iterator for Matches<'t, R>
	where
	R: RegularExpression,
	R::Text: 't + AsRef<[u8]>,
	{
	type Item = (usize, usize);

	fn next(&mut self) -> Option<(usize, usize)> {
	if self.last_end > self.text.as_ref().len() {
	return None;
	}
	let (s, e) = match self.re.find_at(self.text, self.last_end) {
	None => return None,
	Some((s, e)) => (s, e),
	};
	if s == e {
	// This is an empty match. To ensure we make progress, start
	// the next search at the smallest possible starting position
	// of the next match following this one.
	self.last_end = self.re.next_after_empty(self.text, e);
	// Don't accept empty matches immediately following a match.
	// Just move on to the next match.
	if Some(e) == self.last_match {
	return self.next();
	}
	} else {
	self.last_end = e;
	}
	self.last_match = Some(e);
	Some((s, e))
	}
	}

	/// An iterator over all non-overlapping successive leftmost-first matches with
	/// captures.
	pub struct CaptureMatches<'t, R>(Matches<'t, R>)
	where
	R: RegularExpression,
	R::Text: 't;

	impl<'t, R> CaptureMatches<'t, R>
	where
	R: RegularExpression,
	R::Text: 't,
	{
	/// Return the text being searched.
	pub fn text(&self) -> &'t R::Text {
	self.0.text()
	}

	/// Return the underlying regex.
	pub fn regex(&self) -> &R {
	self.0.regex()
	}
	}

	impl<'t, R> Iterator for CaptureMatches<'t, R>
	where
	R: RegularExpression,
	R::Text: 't + AsRef<[u8]>,
	{
	type Item = Locations;

	fn next(&mut self) -> Option<Locations> {
	if self.0.last_end > self.0.text.as_ref().len() {
	return None;
	}
	let mut locs = self.0.re.locations();
	let (s, e) = match self.0.re.captures_read_at(
	&mut locs,
	self.0.text,
	self.0.last_end,
	) {
	None => return None,
	Some((s, e)) => (s, e),
	};
	if s == e {
	self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
	if Some(e) == self.0.last_match {
	return self.next();
	}
	} else {
	self.0.last_end = e;
	}
	self.0.last_match = Some(e);
	Some(locs)
	}
	}