Blame - src/re_trait.rs - platform/external/rust/crates/regex

blob: 680aa54594547f44a864ca5e4d0712a820a9ae82 [file] [log] [blame]

Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1	use std::fmt;
				2	use std::iter::FusedIterator;
				3
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	4	/// Slot is a single saved capture location. Note that there are two slots for
				5	/// every capture in a regular expression (one slot each for the start and end
				6	/// of the capture).
				7	pub type Slot = Option<usize>;
				8
				9	/// Locations represents the offsets of each capturing group in a regex for
				10	/// a single match.
				11	///
				12	/// Unlike `Captures`, a `Locations` value only stores offsets.
				13	#[doc(hidden)]
				14	#[derive(Clone, Debug)]
				15	pub struct Locations(Vec<Slot>);
				16
				17	impl Locations {
				18	/// Returns the start and end positions of the Nth capture group. Returns
				19	/// `None` if `i` is not a valid capture group or if the capture group did
				20	/// not match anything. The positions returned are always byte indices
				21	/// with respect to the original string matched.
				22	pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
				23	let (s, e) = (i * 2, i * 2 + 1);
				24	match (self.0.get(s), self.0.get(e)) {
				25	(Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
				26	_ => None,
				27	}
				28	}
				29
				30	/// Creates an iterator of all the capture group positions in order of
				31	/// appearance in the regular expression. Positions are byte indices
				32	/// in terms of the original string matched.
Joel Galenson	3874808	2021-05-19 16:51:51 -0700	[diff] [blame^]	33	pub fn iter(&self) -> SubCapturesPosIter<'_> {
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	34	SubCapturesPosIter { idx: 0, locs: self }
				35	}
				36
				37	/// Returns the total number of capturing groups.
				38	///
				39	/// This is always at least `1` since every regex has at least `1`
				40	/// capturing group that corresponds to the entire match.
				41	pub fn len(&self) -> usize {
				42	self.0.len() / 2
				43	}
				44
				45	/// Return the individual slots as a slice.
				46	pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
				47	&mut self.0
				48	}
				49	}
				50
				51	/// An iterator over capture group positions for a particular match of a
				52	/// regular expression.
				53	///
				54	/// Positions are byte indices in terms of the original string matched.
				55	///
				56	/// `'c` is the lifetime of the captures.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	57	#[derive(Clone, Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	58	pub struct SubCapturesPosIter<'c> {
				59	idx: usize,
				60	locs: &'c Locations,
				61	}
				62
				63	impl<'c> Iterator for SubCapturesPosIter<'c> {
				64	type Item = Option<(usize, usize)>;
				65
				66	fn next(&mut self) -> Option<Option<(usize, usize)>> {
				67	if self.idx >= self.locs.len() {
				68	return None;
				69	}
				70	let x = match self.locs.pos(self.idx) {
				71	None => Some(None),
				72	Some((s, e)) => Some(Some((s, e))),
				73	};
				74	self.idx += 1;
				75	x
				76	}
				77	}
				78
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	79	impl<'c> FusedIterator for SubCapturesPosIter<'c> {}
				80
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	81	/// `RegularExpression` describes types that can implement regex searching.
				82	///
				83	/// This trait is my attempt at reducing code duplication and to standardize
				84	/// the internal API. Specific duplication that is avoided are the `find`
				85	/// and `capture` iterators, which are slightly tricky.
				86	///
				87	/// It's not clear whether this trait is worth it, and it also isn't
				88	/// clear whether it's useful as a public trait or not. Methods like
				89	/// `next_after_empty` reak of bad design, but the rest of the methods seem
				90	/// somewhat reasonable. One particular thing this trait would expose would be
				91	/// the ability to start the search of a regex anywhere in a haystack, which
				92	/// isn't possible in the current public API.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	93	pub trait RegularExpression: Sized + fmt::Debug {
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	94	/// The type of the haystack.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	95	type Text: ?Sized + fmt::Debug;
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	96
				97	/// The number of capture slots in the compiled regular expression. This is
				98	/// always two times the number of capture groups (two slots per group).
				99	fn slots_len(&self) -> usize;
				100
				101	/// Allocates fresh space for all capturing groups in this regex.
				102	fn locations(&self) -> Locations {
				103	Locations(vec![None; self.slots_len()])
				104	}
				105
				106	/// Returns the position of the next character after `i`.
				107	///
				108	/// For example, a haystack with type `&[u8]` probably returns `i+1`,
				109	/// whereas a haystack with type `&str` probably returns `i` plus the
				110	/// length of the next UTF-8 sequence.
				111	fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
				112
				113	/// Returns the location of the shortest match.
				114	fn shortest_match_at(
				115	&self,
				116	text: &Self::Text,
				117	start: usize,
				118	) -> Option<usize>;
				119
				120	/// Returns whether the regex matches the text given.
				121	fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;
				122
				123	/// Returns the leftmost-first match location if one exists.
				124	fn find_at(
				125	&self,
				126	text: &Self::Text,
				127	start: usize,
				128	) -> Option<(usize, usize)>;
				129
				130	/// Returns the leftmost-first match location if one exists, and also
				131	/// fills in any matching capture slot locations.
				132	fn captures_read_at(
				133	&self,
				134	locs: &mut Locations,
				135	text: &Self::Text,
				136	start: usize,
				137	) -> Option<(usize, usize)>;
				138
				139	/// Returns an iterator over all non-overlapping successive leftmost-first
				140	/// matches.
Joel Galenson	3874808	2021-05-19 16:51:51 -0700	[diff] [blame^]	141	fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> {
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	142	Matches { re: self, text: text, last_end: 0, last_match: None }
				143	}
				144
				145	/// Returns an iterator over all non-overlapping successive leftmost-first
				146	/// matches with captures.
Joel Galenson	3874808	2021-05-19 16:51:51 -0700	[diff] [blame^]	147	fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> {
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	148	CaptureMatches(self.find_iter(text))
				149	}
				150	}
				151
				152	/// An iterator over all non-overlapping successive leftmost-first matches.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	153	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	154	pub struct Matches<'t, R>
				155	where
				156	R: RegularExpression,
				157	R::Text: 't,
				158	{
				159	re: R,
				160	text: &'t R::Text,
				161	last_end: usize,
				162	last_match: Option<usize>,
				163	}
				164
				165	impl<'t, R> Matches<'t, R>
				166	where
				167	R: RegularExpression,
				168	R::Text: 't,
				169	{
				170	/// Return the text being searched.
				171	pub fn text(&self) -> &'t R::Text {
				172	self.text
				173	}
				174
				175	/// Return the underlying regex.
				176	pub fn regex(&self) -> &R {
				177	&self.re
				178	}
				179	}
				180
				181	impl<'t, R> Iterator for Matches<'t, R>
				182	where
				183	R: RegularExpression,
				184	R::Text: 't + AsRef<[u8]>,
				185	{
				186	type Item = (usize, usize);
				187
				188	fn next(&mut self) -> Option<(usize, usize)> {
				189	if self.last_end > self.text.as_ref().len() {
				190	return None;
				191	}
				192	let (s, e) = match self.re.find_at(self.text, self.last_end) {
				193	None => return None,
				194	Some((s, e)) => (s, e),
				195	};
				196	if s == e {
				197	// This is an empty match. To ensure we make progress, start
				198	// the next search at the smallest possible starting position
				199	// of the next match following this one.
				200	self.last_end = self.re.next_after_empty(self.text, e);
				201	// Don't accept empty matches immediately following a match.
				202	// Just move on to the next match.
				203	if Some(e) == self.last_match {
				204	return self.next();
				205	}
				206	} else {
				207	self.last_end = e;
				208	}
				209	self.last_match = Some(e);
				210	Some((s, e))
				211	}
				212	}
				213
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	214	impl<'t, R> FusedIterator for Matches<'t, R>
				215	where
				216	R: RegularExpression,
				217	R::Text: 't + AsRef<[u8]>,
				218	{
				219	}
				220
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	221	/// An iterator over all non-overlapping successive leftmost-first matches with
				222	/// captures.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	223	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	224	pub struct CaptureMatches<'t, R>(Matches<'t, R>)
				225	where
				226	R: RegularExpression,
				227	R::Text: 't;
				228
				229	impl<'t, R> CaptureMatches<'t, R>
				230	where
				231	R: RegularExpression,
				232	R::Text: 't,
				233	{
				234	/// Return the text being searched.
				235	pub fn text(&self) -> &'t R::Text {
				236	self.0.text()
				237	}
				238
				239	/// Return the underlying regex.
				240	pub fn regex(&self) -> &R {
				241	self.0.regex()
				242	}
				243	}
				244
				245	impl<'t, R> Iterator for CaptureMatches<'t, R>
				246	where
				247	R: RegularExpression,
				248	R::Text: 't + AsRef<[u8]>,
				249	{
				250	type Item = Locations;
				251
				252	fn next(&mut self) -> Option<Locations> {
				253	if self.0.last_end > self.0.text.as_ref().len() {
				254	return None;
				255	}
				256	let mut locs = self.0.re.locations();
				257	let (s, e) = match self.0.re.captures_read_at(
				258	&mut locs,
				259	self.0.text,
				260	self.0.last_end,
				261	) {
				262	None => return None,
				263	Some((s, e)) => (s, e),
				264	};
				265	if s == e {
				266	self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
				267	if Some(e) == self.0.last_match {
				268	return self.next();
				269	}
				270	} else {
				271	self.0.last_end = e;
				272	}
				273	self.0.last_match = Some(e);
				274	Some(locs)
				275	}
				276	}
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	277
				278	impl<'t, R> FusedIterator for CaptureMatches<'t, R>
				279	where
				280	R: RegularExpression,
				281	R::Text: 't + AsRef<[u8]>,
				282	{
				283	}