Blame - src/re_bytes.rs - platform/external/rust/crates/regex

blob: a091436f14a276310abdff71c5f0b555e98a7f4e [file] [log] [blame]

Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1	use std::borrow::Cow;
				2	use std::collections::HashMap;
				3	use std::fmt;
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	4	use std::iter::FusedIterator;
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	5	use std::ops::{Index, Range};
				6	use std::str::FromStr;
				7	use std::sync::Arc;
				8
				9	use find_byte::find_byte;
				10
				11	use error::Error;
				12	use exec::{Exec, ExecNoSync};
				13	use expand::expand_bytes;
				14	use re_builder::bytes::RegexBuilder;
				15	use re_trait::{self, RegularExpression, SubCapturesPosIter};
				16
				17	/// Match represents a single match of a regex in a haystack.
				18	///
				19	/// The lifetime parameter `'t` refers to the lifetime of the matched text.
				20	#[derive(Copy, Clone, Debug, Eq, PartialEq)]
				21	pub struct Match<'t> {
				22	text: &'t [u8],
				23	start: usize,
				24	end: usize,
				25	}
				26
				27	impl<'t> Match<'t> {
				28	/// Returns the starting byte offset of the match in the haystack.
				29	#[inline]
				30	pub fn start(&self) -> usize {
				31	self.start
				32	}
				33
				34	/// Returns the ending byte offset of the match in the haystack.
				35	#[inline]
				36	pub fn end(&self) -> usize {
				37	self.end
				38	}
				39
				40	/// Returns the range over the starting and ending byte offsets of the
				41	/// match in the haystack.
				42	#[inline]
				43	pub fn range(&self) -> Range<usize> {
				44	self.start..self.end
				45	}
				46
				47	/// Returns the matched text.
				48	#[inline]
				49	pub fn as_bytes(&self) -> &'t [u8] {
				50	&self.text[self.range()]
				51	}
				52
				53	/// Creates a new match from the given haystack and byte offsets.
				54	#[inline]
				55	fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> {
				56	Match { text: haystack, start: start, end: end }
				57	}
				58	}
				59
				60	impl<'t> From<Match<'t>> for Range<usize> {
				61	fn from(m: Match<'t>) -> Range<usize> {
				62	m.range()
				63	}
				64	}
				65
				66	/// A compiled regular expression for matching arbitrary bytes.
				67	///
				68	/// It can be used to search, split or replace text. All searching is done with
				69	/// an implicit `.*?` at the beginning and end of an expression. To force an
				70	/// expression to match the whole string (or a prefix or a suffix), you must
				71	/// use an anchor like `^` or `$` (or `\A` and `\z`).
				72	///
				73	/// Like the `Regex` type in the parent module, matches with this regex return
				74	/// byte offsets into the search text. Unlike the parent `Regex` type,
				75	/// these byte offsets may not correspond to UTF-8 sequence boundaries since
				76	/// the regexes in this module can match arbitrary bytes.
				77	#[derive(Clone)]
				78	pub struct Regex(Exec);
				79
				80	impl fmt::Display for Regex {
				81	/// Shows the original regular expression.
				82	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				83	write!(f, "{}", self.as_str())
				84	}
				85	}
				86
				87	impl fmt::Debug for Regex {
				88	/// Shows the original regular expression.
				89	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				90	fmt::Display::fmt(self, f)
				91	}
				92	}
				93
				94	/// A constructor for Regex from an Exec.
				95	///
				96	/// This is hidden because Exec isn't actually part of the public API.
				97	#[doc(hidden)]
				98	impl From<Exec> for Regex {
				99	fn from(exec: Exec) -> Regex {
				100	Regex(exec)
				101	}
				102	}
				103
				104	impl FromStr for Regex {
				105	type Err = Error;
				106
				107	/// Attempts to parse a string into a regular expression
				108	fn from_str(s: &str) -> Result<Regex, Error> {
				109	Regex::new(s)
				110	}
				111	}
				112
				113	/// Core regular expression methods.
				114	impl Regex {
				115	/// Compiles a regular expression. Once compiled, it can be used repeatedly
				116	/// to search, split or replace text in a string.
				117	///
				118	/// If an invalid expression is given, then an error is returned.
				119	pub fn new(re: &str) -> Result<Regex, Error> {
				120	RegexBuilder::new(re).build()
				121	}
				122
Chih-Hung Hsieh	849e445	2020-10-26 13:16:47 -0700	[diff] [blame]	123	/// Returns true if and only if there is a match for the regex in the
				124	/// string given.
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	125	///
				126	/// It is recommended to use this method if all you need to do is test
				127	/// a match, since the underlying matching engine may be able to do less
				128	/// work.
				129	///
				130	/// # Example
				131	///
				132	/// Test if some text contains at least one word with exactly 13 ASCII word
				133	/// bytes:
				134	///
				135	/// ```rust
				136	/// # extern crate regex; use regex::bytes::Regex;
				137	/// # fn main() {
				138	/// let text = b"I categorically deny having triskaidekaphobia.";
				139	/// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
				140	/// # }
				141	/// ```
				142	pub fn is_match(&self, text: &[u8]) -> bool {
				143	self.is_match_at(text, 0)
				144	}
				145
				146	/// Returns the start and end byte range of the leftmost-first match in
				147	/// `text`. If no match exists, then `None` is returned.
				148	///
				149	/// Note that this should only be used if you want to discover the position
				150	/// of the match. Testing the existence of a match is faster if you use
				151	/// `is_match`.
				152	///
				153	/// # Example
				154	///
				155	/// Find the start and end location of the first word with exactly 13
				156	/// ASCII word bytes:
				157	///
				158	/// ```rust
				159	/// # extern crate regex; use regex::bytes::Regex;
				160	/// # fn main() {
				161	/// let text = b"I categorically deny having triskaidekaphobia.";
				162	/// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
				163	/// assert_eq!((mat.start(), mat.end()), (2, 15));
				164	/// # }
				165	/// ```
				166	pub fn find<'t>(&self, text: &'t [u8]) -> Option<Match<'t>> {
				167	self.find_at(text, 0)
				168	}
				169
				170	/// Returns an iterator for each successive non-overlapping match in
				171	/// `text`, returning the start and end byte indices with respect to
				172	/// `text`.
				173	///
				174	/// # Example
				175	///
				176	/// Find the start and end location of every word with exactly 13 ASCII
				177	/// word bytes:
				178	///
				179	/// ```rust
				180	/// # extern crate regex; use regex::bytes::Regex;
				181	/// # fn main() {
				182	/// let text = b"Retroactively relinquishing remunerations is reprehensible.";
				183	/// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
				184	/// println!("{:?}", mat);
				185	/// }
				186	/// # }
				187	/// ```
				188	pub fn find_iter<'r, 't>(&'r self, text: &'t [u8]) -> Matches<'r, 't> {
				189	Matches(self.0.searcher().find_iter(text))
				190	}
				191
				192	/// Returns the capture groups corresponding to the leftmost-first
				193	/// match in `text`. Capture group `0` always corresponds to the entire
				194	/// match. If no match is found, then `None` is returned.
				195	///
				196	/// You should only use `captures` if you need access to the location of
				197	/// capturing group matches. Otherwise, `find` is faster for discovering
				198	/// the location of the overall match.
				199	///
				200	/// # Examples
				201	///
				202	/// Say you have some text with movie names and their release years,
				203	/// like "'Citizen Kane' (1941)". It'd be nice if we could search for text
				204	/// looking like that, while also extracting the movie name and its release
				205	/// year separately.
				206	///
				207	/// ```rust
				208	/// # extern crate regex; use regex::bytes::Regex;
				209	/// # fn main() {
				210	/// let re = Regex::new(r"'([^']+)'\s+$(\d{4})$").unwrap();
				211	/// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
				212	/// let caps = re.captures(text).unwrap();
				213	/// assert_eq!(caps.get(1).unwrap().as_bytes(), &b"Citizen Kane"[..]);
				214	/// assert_eq!(caps.get(2).unwrap().as_bytes(), &b"1941"[..]);
				215	/// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]);
				216	/// // You can also access the groups by index using the Index notation.
				217	/// // Note that this will panic on an invalid index.
				218	/// assert_eq!(&caps[1], b"Citizen Kane");
				219	/// assert_eq!(&caps[2], b"1941");
				220	/// assert_eq!(&caps[0], b"'Citizen Kane' (1941)");
				221	/// # }
				222	/// ```
				223	///
				224	/// Note that the full match is at capture group `0`. Each subsequent
				225	/// capture group is indexed by the order of its opening `(`.
				226	///
				227	/// We can make this example a bit clearer by using named capture groups:
				228	///
				229	/// ```rust
				230	/// # extern crate regex; use regex::bytes::Regex;
				231	/// # fn main() {
				232	/// let re = Regex::new(r"'(?P<title>[^']+)'\s+$(?P<year>\d{4})$")
				233	/// .unwrap();
				234	/// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
				235	/// let caps = re.captures(text).unwrap();
				236	/// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane");
				237	/// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941");
				238	/// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]);
				239	/// // You can also access the groups by name using the Index notation.
				240	/// // Note that this will panic on an invalid group name.
				241	/// assert_eq!(&caps["title"], b"Citizen Kane");
				242	/// assert_eq!(&caps["year"], b"1941");
				243	/// assert_eq!(&caps[0], b"'Citizen Kane' (1941)");
				244	///
				245	/// # }
				246	/// ```
				247	///
				248	/// Here we name the capture groups, which we can access with the `name`
				249	/// method or the `Index` notation with a `&str`. Note that the named
				250	/// capture groups are still accessible with `get` or the `Index` notation
				251	/// with a `usize`.
				252	///
				253	/// The `0`th capture group is always unnamed, so it must always be
				254	/// accessed with `get(0)` or `[0]`.
				255	pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
				256	let mut locs = self.capture_locations();
				257	self.captures_read_at(&mut locs, text, 0).map(move \|_\| Captures {
				258	text: text,
				259	locs: locs.0,
				260	named_groups: self.0.capture_name_idx().clone(),
				261	})
				262	}
				263
				264	/// Returns an iterator over all the non-overlapping capture groups matched
				265	/// in `text`. This is operationally the same as `find_iter`, except it
				266	/// yields information about capturing group matches.
				267	///
				268	/// # Example
				269	///
				270	/// We can use this to find all movie titles and their release years in
				271	/// some text, where the movie is formatted like "'Title' (xxxx)":
				272	///
				273	/// ```rust
				274	/// # extern crate regex; use std::str; use regex::bytes::Regex;
				275	/// # fn main() {
				276	/// let re = Regex::new(r"'(?P<title>[^']+)'\s+$(?P<year>\d{4})$")
				277	/// .unwrap();
				278	/// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
				279	/// for caps in re.captures_iter(text) {
				280	/// let title = str::from_utf8(&caps["title"]).unwrap();
				281	/// let year = str::from_utf8(&caps["year"]).unwrap();
				282	/// println!("Movie: {:?}, Released: {:?}", title, year);
				283	/// }
				284	/// // Output:
				285	/// // Movie: Citizen Kane, Released: 1941
				286	/// // Movie: The Wizard of Oz, Released: 1939
				287	/// // Movie: M, Released: 1931
				288	/// # }
				289	/// ```
				290	pub fn captures_iter<'r, 't>(
				291	&'r self,
				292	text: &'t [u8],
				293	) -> CaptureMatches<'r, 't> {
				294	CaptureMatches(self.0.searcher().captures_iter(text))
				295	}
				296
				297	/// Returns an iterator of substrings of `text` delimited by a match of the
				298	/// regular expression. Namely, each element of the iterator corresponds to
				299	/// text that isn't matched by the regular expression.
				300	///
				301	/// This method will not copy the text given.
				302	///
				303	/// # Example
				304	///
				305	/// To split a string delimited by arbitrary amounts of spaces or tabs:
				306	///
				307	/// ```rust
				308	/// # extern crate regex; use regex::bytes::Regex;
				309	/// # fn main() {
				310	/// let re = Regex::new(r"[ \t]+").unwrap();
				311	/// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect();
				312	/// assert_eq!(fields, vec![
				313	/// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..],
				314	/// ]);
				315	/// # }
				316	/// ```
				317	pub fn split<'r, 't>(&'r self, text: &'t [u8]) -> Split<'r, 't> {
				318	Split { finder: self.find_iter(text), last: 0 }
				319	}
				320
				321	/// Returns an iterator of at most `limit` substrings of `text` delimited
				322	/// by a match of the regular expression. (A `limit` of `0` will return no
				323	/// substrings.) Namely, each element of the iterator corresponds to text
				324	/// that isn't matched by the regular expression. The remainder of the
				325	/// string that is not split will be the last element in the iterator.
				326	///
				327	/// This method will not copy the text given.
				328	///
				329	/// # Example
				330	///
				331	/// Get the first two words in some text:
				332	///
				333	/// ```rust
				334	/// # extern crate regex; use regex::bytes::Regex;
				335	/// # fn main() {
				336	/// let re = Regex::new(r"\W+").unwrap();
				337	/// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect();
				338	/// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]);
				339	/// # }
				340	/// ```
				341	pub fn splitn<'r, 't>(
				342	&'r self,
				343	text: &'t [u8],
				344	limit: usize,
				345	) -> SplitN<'r, 't> {
				346	SplitN { splits: self.split(text), n: limit }
				347	}
				348
				349	/// Replaces the leftmost-first match with the replacement provided. The
				350	/// replacement can be a regular byte string (where `$N` and `$name` are
				351	/// expanded to match capture groups) or a function that takes the matches'
				352	/// `Captures` and returns the replaced byte string.
				353	///
				354	/// If no match is found, then a copy of the byte string is returned
				355	/// unchanged.
				356	///
				357	/// # Replacement string syntax
				358	///
				359	/// All instances of `$name` in the replacement text is replaced with the
				360	/// corresponding capture group `name`.
				361	///
				362	/// `name` may be an integer corresponding to the index of the
				363	/// capture group (counted by order of opening parenthesis where `0` is the
				364	/// entire match) or it can be a name (consisting of letters, digits or
				365	/// underscores) corresponding to a named capture group.
				366	///
				367	/// If `name` isn't a valid capture group (whether the name doesn't exist
				368	/// or isn't a valid index), then it is replaced with the empty string.
				369	///
				370	/// The longest possible name is used. e.g., `$1a` looks up the capture
				371	/// group named `1a` and not the capture group at index `1`. To exert more
				372	/// precise control over the name, use braces, e.g., `${1}a`.
				373	///
				374	/// To write a literal `$` use `$$`.
				375	///
				376	/// # Examples
				377	///
				378	/// Note that this function is polymorphic with respect to the replacement.
				379	/// In typical usage, this can just be a normal byte string:
				380	///
				381	/// ```rust
				382	/// # extern crate regex; use regex::bytes::Regex;
				383	/// # fn main() {
				384	/// let re = Regex::new("[^01]+").unwrap();
				385	/// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]);
				386	/// # }
				387	/// ```
				388	///
				389	/// But anything satisfying the `Replacer` trait will work. For example, a
				390	/// closure of type `\|&Captures\| -> Vec<u8>` provides direct access to the
				391	/// captures corresponding to a match. This allows one to access capturing
				392	/// group matches easily:
				393	///
				394	/// ```rust
				395	/// # extern crate regex; use regex::bytes::Regex;
				396	/// # use regex::bytes::Captures; fn main() {
				397	/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
				398	/// let result = re.replace(b"Springsteen, Bruce", \|caps: &Captures\| {
				399	/// let mut replacement = caps[2].to_owned();
				400	/// replacement.push(b' ');
				401	/// replacement.extend(&caps[1]);
				402	/// replacement
				403	/// });
				404	/// assert_eq!(result, &b"Bruce Springsteen"[..]);
				405	/// # }
				406	/// ```
				407	///
				408	/// But this is a bit cumbersome to use all the time. Instead, a simple
				409	/// syntax is supported that expands `$name` into the corresponding capture
				410	/// group. Here's the last example, but using this expansion technique
				411	/// with named capture groups:
				412	///
				413	/// ```rust
				414	/// # extern crate regex; use regex::bytes::Regex;
				415	/// # fn main() {
				416	/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
				417	/// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]);
				418	/// assert_eq!(result, &b"Bruce Springsteen"[..]);
				419	/// # }
				420	/// ```
				421	///
				422	/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
				423	/// would produce the same result. To write a literal `$` use `$$`.
				424	///
				425	/// Sometimes the replacement string requires use of curly braces to
				426	/// delineate a capture group replacement and surrounding literal text.
				427	/// For example, if we wanted to join two words together with an
				428	/// underscore:
				429	///
				430	/// ```rust
				431	/// # extern crate regex; use regex::bytes::Regex;
				432	/// # fn main() {
				433	/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
				434	/// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
				435	/// assert_eq!(result, &b"deep_fried"[..]);
				436	/// # }
				437	/// ```
				438	///
				439	/// Without the curly braces, the capture group name `first_` would be
				440	/// used, and since it doesn't exist, it would be replaced with the empty
				441	/// string.
				442	///
				443	/// Finally, sometimes you just want to replace a literal string with no
				444	/// regard for capturing group expansion. This can be done by wrapping a
				445	/// byte string with `NoExpand`:
				446	///
				447	/// ```rust
				448	/// # extern crate regex; use regex::bytes::Regex;
				449	/// # fn main() {
				450	/// use regex::bytes::NoExpand;
				451	///
				452	/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
				453	/// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last"));
				454	/// assert_eq!(result, &b"$2 $last"[..]);
				455	/// # }
				456	/// ```
				457	pub fn replace<'t, R: Replacer>(
				458	&self,
				459	text: &'t [u8],
				460	rep: R,
				461	) -> Cow<'t, [u8]> {
				462	self.replacen(text, 1, rep)
				463	}
				464
				465	/// Replaces all non-overlapping matches in `text` with the replacement
				466	/// provided. This is the same as calling `replacen` with `limit` set to
				467	/// `0`.
				468	///
				469	/// See the documentation for `replace` for details on how to access
				470	/// capturing group matches in the replacement text.
				471	pub fn replace_all<'t, R: Replacer>(
				472	&self,
				473	text: &'t [u8],
				474	rep: R,
				475	) -> Cow<'t, [u8]> {
				476	self.replacen(text, 0, rep)
				477	}
				478
				479	/// Replaces at most `limit` non-overlapping matches in `text` with the
				480	/// replacement provided. If `limit` is 0, then all non-overlapping matches
				481	/// are replaced.
				482	///
				483	/// See the documentation for `replace` for details on how to access
				484	/// capturing group matches in the replacement text.
				485	pub fn replacen<'t, R: Replacer>(
				486	&self,
				487	text: &'t [u8],
				488	limit: usize,
				489	mut rep: R,
				490	) -> Cow<'t, [u8]> {
				491	if let Some(rep) = rep.no_expansion() {
				492	let mut it = self.find_iter(text).enumerate().peekable();
				493	if it.peek().is_none() {
				494	return Cow::Borrowed(text);
				495	}
				496	let mut new = Vec::with_capacity(text.len());
				497	let mut last_match = 0;
				498	for (i, m) in it {
				499	if limit > 0 && i >= limit {
				500	break;
				501	}
				502	new.extend_from_slice(&text[last_match..m.start()]);
				503	new.extend_from_slice(&rep);
				504	last_match = m.end();
				505	}
				506	new.extend_from_slice(&text[last_match..]);
				507	return Cow::Owned(new);
				508	}
				509
				510	// The slower path, which we use if the replacement needs access to
				511	// capture groups.
				512	let mut it = self.captures_iter(text).enumerate().peekable();
				513	if it.peek().is_none() {
				514	return Cow::Borrowed(text);
				515	}
				516	let mut new = Vec::with_capacity(text.len());
				517	let mut last_match = 0;
				518	for (i, cap) in it {
				519	if limit > 0 && i >= limit {
				520	break;
				521	}
				522	// unwrap on 0 is OK because captures only reports matches
				523	let m = cap.get(0).unwrap();
				524	new.extend_from_slice(&text[last_match..m.start()]);
				525	rep.replace_append(&cap, &mut new);
				526	last_match = m.end();
				527	}
				528	new.extend_from_slice(&text[last_match..]);
				529	Cow::Owned(new)
				530	}
				531	}
				532
				533	/// Advanced or "lower level" search methods.
				534	impl Regex {
				535	/// Returns the end location of a match in the text given.
				536	///
				537	/// This method may have the same performance characteristics as
				538	/// `is_match`, except it provides an end location for a match. In
				539	/// particular, the location returned may be shorter than the proper end
				540	/// of the leftmost-first match.
				541	///
				542	/// # Example
				543	///
				544	/// Typically, `a+` would match the entire first sequence of `a` in some
				545	/// text, but `shortest_match` can give up as soon as it sees the first
				546	/// `a`.
				547	///
				548	/// ```rust
				549	/// # extern crate regex; use regex::bytes::Regex;
				550	/// # fn main() {
				551	/// let text = b"aaaaa";
				552	/// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
				553	/// assert_eq!(pos, Some(1));
				554	/// # }
				555	/// ```
				556	pub fn shortest_match(&self, text: &[u8]) -> Option<usize> {
				557	self.shortest_match_at(text, 0)
				558	}
				559
				560	/// Returns the same as shortest_match, but starts the search at the given
				561	/// offset.
				562	///
				563	/// The significance of the starting point is that it takes the surrounding
				564	/// context into consideration. For example, the `\A` anchor can only
				565	/// match when `start == 0`.
				566	pub fn shortest_match_at(
				567	&self,
				568	text: &[u8],
				569	start: usize,
				570	) -> Option<usize> {
				571	self.0.searcher().shortest_match_at(text, start)
				572	}
				573
				574	/// Returns the same as is_match, but starts the search at the given
				575	/// offset.
				576	///
				577	/// The significance of the starting point is that it takes the surrounding
				578	/// context into consideration. For example, the `\A` anchor can only
				579	/// match when `start == 0`.
				580	pub fn is_match_at(&self, text: &[u8], start: usize) -> bool {
				581	self.shortest_match_at(text, start).is_some()
				582	}
				583
				584	/// Returns the same as find, but starts the search at the given
				585	/// offset.
				586	///
				587	/// The significance of the starting point is that it takes the surrounding
				588	/// context into consideration. For example, the `\A` anchor can only
				589	/// match when `start == 0`.
				590	pub fn find_at<'t>(
				591	&self,
				592	text: &'t [u8],
				593	start: usize,
				594	) -> Option<Match<'t>> {
				595	self.0
				596	.searcher()
				597	.find_at(text, start)
				598	.map(\|(s, e)\| Match::new(text, s, e))
				599	}
				600
				601	/// This is like `captures`, but uses
				602	/// [`CaptureLocations`](struct.CaptureLocations.html)
				603	/// instead of
				604	/// [`Captures`](struct.Captures.html) in order to amortize allocations.
				605	///
				606	/// To create a `CaptureLocations` value, use the
				607	/// `Regex::capture_locations` method.
				608	///
				609	/// This returns the overall match if this was successful, which is always
				610	/// equivalence to the `0`th capture group.
				611	pub fn captures_read<'t>(
				612	&self,
				613	locs: &mut CaptureLocations,
				614	text: &'t [u8],
				615	) -> Option<Match<'t>> {
				616	self.captures_read_at(locs, text, 0)
				617	}
				618
				619	/// Returns the same as `captures_read`, but starts the search at the given
				620	/// offset and populates the capture locations given.
				621	///
				622	/// The significance of the starting point is that it takes the surrounding
				623	/// context into consideration. For example, the `\A` anchor can only
				624	/// match when `start == 0`.
				625	pub fn captures_read_at<'t>(
				626	&self,
				627	locs: &mut CaptureLocations,
				628	text: &'t [u8],
				629	start: usize,
				630	) -> Option<Match<'t>> {
				631	self.0
				632	.searcher()
				633	.captures_read_at(&mut locs.0, text, start)
				634	.map(\|(s, e)\| Match::new(text, s, e))
				635	}
				636
				637	/// An undocumented alias for `captures_read_at`.
				638	///
				639	/// The `regex-capi` crate previously used this routine, so to avoid
				640	/// breaking that crate, we continue to provide the name as an undocumented
				641	/// alias.
				642	#[doc(hidden)]
				643	pub fn read_captures_at<'t>(
				644	&self,
				645	locs: &mut CaptureLocations,
				646	text: &'t [u8],
				647	start: usize,
				648	) -> Option<Match<'t>> {
				649	self.captures_read_at(locs, text, start)
				650	}
				651	}
				652
				653	/// Auxiliary methods.
				654	impl Regex {
				655	/// Returns the original string of this regex.
				656	pub fn as_str(&self) -> &str {
				657	&self.0.regex_strings()[0]
				658	}
				659
				660	/// Returns an iterator over the capture names.
				661	pub fn capture_names(&self) -> CaptureNames {
				662	CaptureNames(self.0.capture_names().iter())
				663	}
				664
				665	/// Returns the number of captures.
				666	pub fn captures_len(&self) -> usize {
				667	self.0.capture_names().len()
				668	}
				669
				670	/// Returns an empty set of capture locations that can be reused in
				671	/// multiple calls to `captures_read` or `captures_read_at`.
				672	pub fn capture_locations(&self) -> CaptureLocations {
				673	CaptureLocations(self.0.searcher().locations())
				674	}
				675
				676	/// An alias for `capture_locations` to preserve backward compatibility.
				677	///
				678	/// The `regex-capi` crate uses this method, so to avoid breaking that
				679	/// crate, we continue to export it as an undocumented API.
				680	#[doc(hidden)]
				681	pub fn locations(&self) -> CaptureLocations {
				682	CaptureLocations(self.0.searcher().locations())
				683	}
				684	}
				685
				686	/// An iterator over all non-overlapping matches for a particular string.
				687	///
				688	/// The iterator yields a tuple of integers corresponding to the start and end
				689	/// of the match. The indices are byte offsets. The iterator stops when no more
				690	/// matches can be found.
				691	///
				692	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				693	/// lifetime of the matched byte string.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	694	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	695	pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>);
				696
				697	impl<'r, 't> Iterator for Matches<'r, 't> {
				698	type Item = Match<'t>;
				699
				700	fn next(&mut self) -> Option<Match<'t>> {
				701	let text = self.0.text();
				702	self.0.next().map(\|(s, e)\| Match::new(text, s, e))
				703	}
				704	}
				705
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	706	impl<'r, 't> FusedIterator for Matches<'r, 't> {}
				707
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	708	/// An iterator that yields all non-overlapping capture groups matching a
				709	/// particular regular expression.
				710	///
				711	/// The iterator stops when no more matches can be found.
				712	///
				713	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				714	/// lifetime of the matched byte string.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	715	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	716	pub struct CaptureMatches<'r, 't>(
				717	re_trait::CaptureMatches<'t, ExecNoSync<'r>>,
				718	);
				719
				720	impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
				721	type Item = Captures<'t>;
				722
				723	fn next(&mut self) -> Option<Captures<'t>> {
				724	self.0.next().map(\|locs\| Captures {
				725	text: self.0.text(),
				726	locs: locs,
				727	named_groups: self.0.regex().capture_name_idx().clone(),
				728	})
				729	}
				730	}
				731
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	732	impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {}
				733
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	734	/// Yields all substrings delimited by a regular expression match.
				735	///
				736	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				737	/// lifetime of the byte string being split.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	738	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	739	pub struct Split<'r, 't> {
				740	finder: Matches<'r, 't>,
				741	last: usize,
				742	}
				743
				744	impl<'r, 't> Iterator for Split<'r, 't> {
				745	type Item = &'t [u8];
				746
				747	fn next(&mut self) -> Option<&'t [u8]> {
				748	let text = self.finder.0.text();
				749	match self.finder.next() {
				750	None => {
				751	if self.last > text.len() {
				752	None
				753	} else {
				754	let s = &text[self.last..];
				755	self.last = text.len() + 1; // Next call will return None
				756	Some(s)
				757	}
				758	}
				759	Some(m) => {
				760	let matched = &text[self.last..m.start()];
				761	self.last = m.end();
				762	Some(matched)
				763	}
				764	}
				765	}
				766	}
				767
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	768	impl<'r, 't> FusedIterator for Split<'r, 't> {}
				769
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	770	/// Yields at most `N` substrings delimited by a regular expression match.
				771	///
				772	/// The last substring will be whatever remains after splitting.
				773	///
				774	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				775	/// lifetime of the byte string being split.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	776	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	777	pub struct SplitN<'r, 't> {
				778	splits: Split<'r, 't>,
				779	n: usize,
				780	}
				781
				782	impl<'r, 't> Iterator for SplitN<'r, 't> {
				783	type Item = &'t [u8];
				784
				785	fn next(&mut self) -> Option<&'t [u8]> {
				786	if self.n == 0 {
				787	return None;
				788	}
				789
				790	self.n -= 1;
				791	if self.n > 0 {
				792	return self.splits.next();
				793	}
				794
				795	let text = self.splits.finder.0.text();
				796	if self.splits.last > text.len() {
				797	// We've already returned all substrings.
				798	None
				799	} else {
				800	// self.n == 0, so future calls will return None immediately
				801	Some(&text[self.splits.last..])
				802	}
				803	}
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	804
				805	fn size_hint(&self) -> (usize, Option<usize>) {
				806	(0, Some(self.n))
				807	}
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	808	}
				809
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	810	impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
				811
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	812	/// An iterator over the names of all possible captures.
				813	///
				814	/// `None` indicates an unnamed capture; the first element (capture 0, the
				815	/// whole matched region) is always unnamed.
				816	///
				817	/// `'r` is the lifetime of the compiled regular expression.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	818	#[derive(Clone, Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	819	pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
				820
				821	impl<'r> Iterator for CaptureNames<'r> {
				822	type Item = Option<&'r str>;
				823
				824	fn next(&mut self) -> Option<Option<&'r str>> {
				825	self.0
				826	.next()
				827	.as_ref()
				828	.map(\|slot\| slot.as_ref().map(\|name\| name.as_ref()))
				829	}
				830
				831	fn size_hint(&self) -> (usize, Option<usize>) {
				832	self.0.size_hint()
				833	}
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	834
				835	fn count(self) -> usize {
				836	self.0.count()
				837	}
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	838	}
				839
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	840	impl<'r> ExactSizeIterator for CaptureNames<'r> {}
				841
				842	impl<'r> FusedIterator for CaptureNames<'r> {}
				843
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	844	/// CaptureLocations is a low level representation of the raw offsets of each
				845	/// submatch.
				846	///
				847	/// You can think of this as a lower level
				848	/// [`Captures`](struct.Captures.html), where this type does not support
				849	/// named capturing groups directly and it does not borrow the text that these
				850	/// offsets were matched on.
				851	///
				852	/// Primarily, this type is useful when using the lower level `Regex` APIs
				853	/// such as `read_captures`, which permits amortizing the allocation in which
				854	/// capture match locations are stored.
				855	///
				856	/// In order to build a value of this type, you'll need to call the
				857	/// `capture_locations` method on the `Regex` being used to execute the search.
				858	/// The value returned can then be reused in subsequent searches.
				859	#[derive(Clone, Debug)]
				860	pub struct CaptureLocations(re_trait::Locations);
				861
				862	/// A type alias for `CaptureLocations` for backwards compatibility.
				863	///
				864	/// Previously, we exported `CaptureLocations` as `Locations` in an
				865	/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`),
				866	/// we continue re-exporting the same undocumented API.
				867	#[doc(hidden)]
				868	pub type Locations = CaptureLocations;
				869
				870	impl CaptureLocations {
				871	/// Returns the start and end positions of the Nth capture group. Returns
				872	/// `None` if `i` is not a valid capture group or if the capture group did
				873	/// not match anything. The positions returned are always byte indices
				874	/// with respect to the original string matched.
				875	#[inline]
				876	pub fn get(&self, i: usize) -> Option<(usize, usize)> {
				877	self.0.pos(i)
				878	}
				879
				880	/// Returns the total number of capturing groups.
				881	///
				882	/// This is always at least `1` since every regex has at least `1`
				883	/// capturing group that corresponds to the entire match.
				884	#[inline]
				885	pub fn len(&self) -> usize {
				886	self.0.len()
				887	}
				888
				889	/// An alias for the `get` method for backwards compatibility.
				890	///
				891	/// Previously, we exported `get` as `pos` in an undocumented API. To
				892	/// prevent breaking that code (e.g., in `regex-capi`), we continue
				893	/// re-exporting the same undocumented API.
				894	#[doc(hidden)]
				895	#[inline]
				896	pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
				897	self.get(i)
				898	}
				899	}
				900
				901	/// Captures represents a group of captured byte strings for a single match.
				902	///
				903	/// The 0th capture always corresponds to the entire match. Each subsequent
				904	/// index corresponds to the next capture group in the regex. If a capture
				905	/// group is named, then the matched byte string is also available via the
				906	/// `name` method. (Note that the 0th capture is always unnamed and so must be
				907	/// accessed with the `get` method.)
				908	///
				909	/// Positions returned from a capture group are always byte indices.
				910	///
				911	/// `'t` is the lifetime of the matched text.
				912	pub struct Captures<'t> {
				913	text: &'t [u8],
				914	locs: re_trait::Locations,
				915	named_groups: Arc<HashMap<String, usize>>,
				916	}
				917
				918	impl<'t> Captures<'t> {
				919	/// Returns the match associated with the capture group at index `i`. If
				920	/// `i` does not correspond to a capture group, or if the capture group
				921	/// did not participate in the match, then `None` is returned.
				922	///
				923	/// # Examples
				924	///
				925	/// Get the text of the match with a default of an empty string if this
				926	/// group didn't participate in the match:
				927	///
				928	/// ```rust
				929	/// # use regex::bytes::Regex;
				930	/// let re = Regex::new(r"[a-z]+(?:([0-9]+)\|([A-Z]+))").unwrap();
				931	/// let caps = re.captures(b"abc123").unwrap();
				932	///
				933	/// let text1 = caps.get(1).map_or(&b""[..], \|m\| m.as_bytes());
				934	/// let text2 = caps.get(2).map_or(&b""[..], \|m\| m.as_bytes());
				935	/// assert_eq!(text1, &b"123"[..]);
				936	/// assert_eq!(text2, &b""[..]);
				937	/// ```
				938	pub fn get(&self, i: usize) -> Option<Match<'t>> {
				939	self.locs.pos(i).map(\|(s, e)\| Match::new(self.text, s, e))
				940	}
				941
				942	/// Returns the match for the capture group named `name`. If `name` isn't a
				943	/// valid capture group or didn't match anything, then `None` is returned.
				944	pub fn name(&self, name: &str) -> Option<Match<'t>> {
				945	self.named_groups.get(name).and_then(\|&i\| self.get(i))
				946	}
				947
				948	/// An iterator that yields all capturing matches in the order in which
				949	/// they appear in the regex. If a particular capture group didn't
				950	/// participate in the match, then `None` is yielded for that capture.
				951	///
				952	/// The first match always corresponds to the overall match of the regex.
				953	pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
				954	SubCaptureMatches { caps: self, it: self.locs.iter() }
				955	}
				956
				957	/// Expands all instances of `$name` in `replacement` to the corresponding
				958	/// capture group `name`, and writes them to the `dst` buffer given.
				959	///
Chih-Hung Hsieh	849e445	2020-10-26 13:16:47 -0700	[diff] [blame]	960	/// `name` may be an integer corresponding to the index of the capture
				961	/// group (counted by order of opening parenthesis where `0` is the
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	962	/// entire match) or it can be a name (consisting of letters, digits or
				963	/// underscores) corresponding to a named capture group.
				964	///
				965	/// If `name` isn't a valid capture group (whether the name doesn't exist
				966	/// or isn't a valid index), then it is replaced with the empty string.
				967	///
Chih-Hung Hsieh	849e445	2020-10-26 13:16:47 -0700	[diff] [blame]	968	/// The longest possible name consisting of the characters `[_0-9A-Za-z]`
				969	/// is used. e.g., `$1a` looks up the capture group named `1a` and not the
				970	/// capture group at index `1`. To exert more precise control over the
				971	/// name, or to refer to a capture group name that uses characters outside
				972	/// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
				973	/// using braces, any sequence of valid UTF-8 bytes is permitted. If the
				974	/// sequence does not refer to a capture group name in the corresponding
				975	/// regex, then it is replaced with an empty string.
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	976	///
				977	/// To write a literal `$` use `$$`.
				978	pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
				979	expand_bytes(self, replacement, dst)
				980	}
				981
				982	/// Returns the number of captured groups.
				983	///
				984	/// This is always at least `1`, since every regex has at least one capture
				985	/// group that corresponds to the full match.
				986	#[inline]
				987	pub fn len(&self) -> usize {
				988	self.locs.len()
				989	}
				990	}
				991
				992	impl<'t> fmt::Debug for Captures<'t> {
				993	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				994	f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
				995	}
				996	}
				997
				998	struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
				999
				1000	impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
				1001	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				1002	fn escape_bytes(bytes: &[u8]) -> String {
				1003	let mut s = String::new();
				1004	for &b in bytes {
				1005	s.push_str(&escape_byte(b));
				1006	}
				1007	s
				1008	}
				1009
				1010	fn escape_byte(byte: u8) -> String {
				1011	use std::ascii::escape_default;
				1012
				1013	let escaped: Vec<u8> = escape_default(byte).collect();
				1014	String::from_utf8_lossy(&escaped).into_owned()
				1015	}
				1016
				1017	// We'd like to show something nice here, even if it means an
				1018	// allocation to build a reverse index.
				1019	let slot_to_name: HashMap<&usize, &String> =
				1020	self.0.named_groups.iter().map(\|(a, b)\| (b, a)).collect();
				1021	let mut map = f.debug_map();
				1022	for (slot, m) in self.0.locs.iter().enumerate() {
				1023	let m = m.map(\|(s, e)\| escape_bytes(&self.0.text[s..e]));
				1024	if let Some(name) = slot_to_name.get(&slot) {
				1025	map.entry(&name, &m);
				1026	} else {
				1027	map.entry(&slot, &m);
				1028	}
				1029	}
				1030	map.finish()
				1031	}
				1032	}
				1033
				1034	/// Get a group by index.
				1035	///
				1036	/// `'t` is the lifetime of the matched text.
				1037	///
				1038	/// The text can't outlive the `Captures` object if this method is
				1039	/// used, because of how `Index` is defined (normally `a[i]` is part
				1040	/// of `a` and can't outlive it); to do that, use `get()` instead.
				1041	///
				1042	/// # Panics
				1043	///
				1044	/// If there is no group at the given index.
				1045	impl<'t> Index<usize> for Captures<'t> {
				1046	type Output = [u8];
				1047
				1048	fn index(&self, i: usize) -> &[u8] {
				1049	self.get(i)
				1050	.map(\|m\| m.as_bytes())
				1051	.unwrap_or_else(\|\| panic!("no group at index '{}'", i))
				1052	}
				1053	}
				1054
				1055	/// Get a group by name.
				1056	///
				1057	/// `'t` is the lifetime of the matched text and `'i` is the lifetime
				1058	/// of the group name (the index).
				1059	///
				1060	/// The text can't outlive the `Captures` object if this method is
				1061	/// used, because of how `Index` is defined (normally `a[i]` is part
				1062	/// of `a` and can't outlive it); to do that, use `name` instead.
				1063	///
				1064	/// # Panics
				1065	///
				1066	/// If there is no group named by the given value.
				1067	impl<'t, 'i> Index<&'i str> for Captures<'t> {
				1068	type Output = [u8];
				1069
				1070	fn index<'a>(&'a self, name: &'i str) -> &'a [u8] {
				1071	self.name(name)
				1072	.map(\|m\| m.as_bytes())
				1073	.unwrap_or_else(\|\| panic!("no group named '{}'", name))
				1074	}
				1075	}
				1076
				1077	/// An iterator that yields all capturing matches in the order in which they
				1078	/// appear in the regex.
				1079	///
				1080	/// If a particular capture group didn't participate in the match, then `None`
				1081	/// is yielded for that capture. The first match always corresponds to the
				1082	/// overall match of the regex.
				1083	///
				1084	/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
				1085	/// the lifetime `'t` corresponds to the originally matched text.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1086	#[derive(Clone, Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1087	pub struct SubCaptureMatches<'c, 't: 'c> {
				1088	caps: &'c Captures<'t>,
				1089	it: SubCapturesPosIter<'c>,
				1090	}
				1091
				1092	impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
				1093	type Item = Option<Match<'t>>;
				1094
				1095	fn next(&mut self) -> Option<Option<Match<'t>>> {
				1096	self.it
				1097	.next()
				1098	.map(\|cap\| cap.map(\|(s, e)\| Match::new(self.caps.text, s, e)))
				1099	}
				1100	}
				1101
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1102	impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
				1103
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1104	/// Replacer describes types that can be used to replace matches in a byte
				1105	/// string.
				1106	///
				1107	/// In general, users of this crate shouldn't need to implement this trait,
				1108	/// since implementations are already provided for `&[u8]` and
				1109	/// `FnMut(&Captures) -> Vec<u8>` (or any `FnMut(&Captures) -> T`
				1110	/// where `T: AsRef<[u8]>`), which covers most use cases.
				1111	pub trait Replacer {
				1112	/// Appends text to `dst` to replace the current match.
				1113	///
				1114	/// The current match is represented by `caps`, which is guaranteed to
				1115	/// have a match at capture group `0`.
				1116	///
				1117	/// For example, a no-op replacement would be
				1118	/// `dst.extend(&caps[0])`.
				1119	fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>);
				1120
				1121	/// Return a fixed unchanging replacement byte string.
				1122	///
				1123	/// When doing replacements, if access to `Captures` is not needed (e.g.,
				1124	/// the replacement byte string does not need `$` expansion), then it can
				1125	/// be beneficial to avoid finding sub-captures.
				1126	///
				1127	/// In general, this is called once for every call to `replacen`.
				1128	fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
				1129	None
				1130	}
				1131
				1132	/// Return a `Replacer` that borrows and wraps this `Replacer`.
				1133	///
				1134	/// This is useful when you want to take a generic `Replacer` (which might
				1135	/// not be cloneable) and use it without consuming it, so it can be used
				1136	/// more than once.
				1137	///
				1138	/// # Example
				1139	///
				1140	/// ```
				1141	/// use regex::bytes::{Regex, Replacer};
				1142	///
				1143	/// fn replace_all_twice<R: Replacer>(
				1144	/// re: Regex,
				1145	/// src: &[u8],
				1146	/// mut rep: R,
				1147	/// ) -> Vec<u8> {
				1148	/// let dst = re.replace_all(src, rep.by_ref());
				1149	/// let dst = re.replace_all(&dst, rep.by_ref());
				1150	/// dst.into_owned()
				1151	/// }
				1152	/// ```
				1153	fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
				1154	ReplacerRef(self)
				1155	}
				1156	}
				1157
				1158	/// By-reference adaptor for a `Replacer`
				1159	///
				1160	/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
				1161	#[derive(Debug)]
				1162	pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
				1163
				1164	impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
				1165	fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
				1166	self.0.replace_append(caps, dst)
				1167	}
				1168	fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
				1169	self.0.no_expansion()
				1170	}
				1171	}
				1172
				1173	impl<'a> Replacer for &'a [u8] {
				1174	fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
				1175	caps.expand(*self, dst);
				1176	}
				1177
				1178	fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
				1179	match find_byte(b'$', *self) {
				1180	Some(_) => None,
				1181	None => Some(Cow::Borrowed(*self)),
				1182	}
				1183	}
				1184	}
				1185
				1186	impl<F, T> Replacer for F
				1187	where
				1188	F: FnMut(&Captures) -> T,
				1189	T: AsRef<[u8]>,
				1190	{
				1191	fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
				1192	dst.extend_from_slice((*self)(caps).as_ref());
				1193	}
				1194	}
				1195
				1196	/// `NoExpand` indicates literal byte string replacement.
				1197	///
				1198	/// It can be used with `replace` and `replace_all` to do a literal byte string
				1199	/// replacement without expanding `$name` to their corresponding capture
				1200	/// groups. This can be both convenient (to avoid escaping `$`, for example)
				1201	/// and performant (since capture groups don't need to be found).
				1202	///
				1203	/// `'t` is the lifetime of the literal text.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1204	#[derive(Clone, Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1205	pub struct NoExpand<'t>(pub &'t [u8]);
				1206
				1207	impl<'t> Replacer for NoExpand<'t> {
				1208	fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) {
				1209	dst.extend_from_slice(self.0);
				1210	}
				1211
				1212	fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
				1213	Some(Cow::Borrowed(self.0))
				1214	}
				1215	}