Blame - src/re_unicode.rs - platform/external/rust/crates/regex

blob: 1b478cdbad9258499fb653c87eb60f1ada1be1b9 [file] [log] [blame]

Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1	use std::borrow::Cow;
				2	use std::collections::HashMap;
				3	use std::fmt;
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	4	use std::iter::FusedIterator;
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	5	use std::ops::{Index, Range};
				6	use std::str::FromStr;
				7	use std::sync::Arc;
				8
				9	use find_byte::find_byte;
				10	use syntax;
				11
				12	use error::Error;
				13	use exec::{Exec, ExecNoSyncStr};
				14	use expand::expand_str;
				15	use re_builder::unicode::RegexBuilder;
				16	use re_trait::{self, RegularExpression, SubCapturesPosIter};
				17
				18	/// Escapes all regular expression meta characters in `text`.
				19	///
				20	/// The string returned may be safely used as a literal in a regular
				21	/// expression.
				22	pub fn escape(text: &str) -> String {
				23	syntax::escape(text)
				24	}
				25
				26	/// Match represents a single match of a regex in a haystack.
				27	///
				28	/// The lifetime parameter `'t` refers to the lifetime of the matched text.
				29	#[derive(Copy, Clone, Debug, Eq, PartialEq)]
				30	pub struct Match<'t> {
				31	text: &'t str,
				32	start: usize,
				33	end: usize,
				34	}
				35
				36	impl<'t> Match<'t> {
				37	/// Returns the starting byte offset of the match in the haystack.
				38	#[inline]
				39	pub fn start(&self) -> usize {
				40	self.start
				41	}
				42
				43	/// Returns the ending byte offset of the match in the haystack.
				44	#[inline]
				45	pub fn end(&self) -> usize {
				46	self.end
				47	}
				48
				49	/// Returns the range over the starting and ending byte offsets of the
				50	/// match in the haystack.
				51	#[inline]
				52	pub fn range(&self) -> Range<usize> {
				53	self.start..self.end
				54	}
				55
				56	/// Returns the matched text.
				57	#[inline]
				58	pub fn as_str(&self) -> &'t str {
				59	&self.text[self.range()]
				60	}
				61
				62	/// Creates a new match from the given haystack and byte offsets.
				63	#[inline]
				64	fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> {
				65	Match { text: haystack, start: start, end: end }
				66	}
				67	}
				68
				69	impl<'t> From<Match<'t>> for &'t str {
				70	fn from(m: Match<'t>) -> &'t str {
				71	m.as_str()
				72	}
				73	}
				74
				75	impl<'t> From<Match<'t>> for Range<usize> {
				76	fn from(m: Match<'t>) -> Range<usize> {
				77	m.range()
				78	}
				79	}
				80
				81	/// A compiled regular expression for matching Unicode strings.
				82	///
				83	/// It is represented as either a sequence of bytecode instructions (dynamic)
				84	/// or as a specialized Rust function (native). It can be used to search, split
				85	/// or replace text. All searching is done with an implicit `.*?` at the
				86	/// beginning and end of an expression. To force an expression to match the
				87	/// whole string (or a prefix or a suffix), you must use an anchor like `^` or
				88	/// `$` (or `\A` and `\z`).
				89	///
				90	/// While this crate will handle Unicode strings (whether in the regular
				91	/// expression or in the search text), all positions returned are **byte
				92	/// indices**. Every byte index is guaranteed to be at a Unicode code point
				93	/// boundary.
				94	///
				95	/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a
				96	/// compiled regular expression and text to search, respectively.
				97	///
				98	/// The only methods that allocate new strings are the string replacement
				99	/// methods. All other methods (searching and splitting) return borrowed
				100	/// pointers into the string given.
				101	///
				102	/// # Examples
				103	///
				104	/// Find the location of a US phone number:
				105	///
				106	/// ```rust
				107	/// # use regex::Regex;
				108	/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
				109	/// let mat = re.find("phone: 111-222-3333").unwrap();
				110	/// assert_eq!((mat.start(), mat.end()), (7, 19));
				111	/// ```
				112	///
				113	/// # Using the `std::str::pattern` methods with `Regex`
				114	///
				115	/// > Note: This section requires that this crate is compiled with the
				116	/// > `pattern` Cargo feature enabled, which requires nightly Rust.
				117	///
				118	/// Since `Regex` implements `Pattern`, you can use regexes with methods
				119	/// defined on `&str`. For example, `is_match`, `find`, `find_iter`
				120	/// and `split` can be replaced with `str::contains`, `str::find`,
				121	/// `str::match_indices` and `str::split`.
				122	///
				123	/// Here are some examples:
				124	///
				125	/// ```rust,ignore
				126	/// # use regex::Regex;
				127	/// let re = Regex::new(r"\d+").unwrap();
				128	/// let haystack = "a111b222c";
				129	///
				130	/// assert!(haystack.contains(&re));
				131	/// assert_eq!(haystack.find(&re), Some(1));
				132	/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(),
				133	/// vec![(1, 4), (5, 8)]);
				134	/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]);
				135	/// ```
				136	#[derive(Clone)]
				137	pub struct Regex(Exec);
				138
				139	impl fmt::Display for Regex {
				140	/// Shows the original regular expression.
				141	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				142	write!(f, "{}", self.as_str())
				143	}
				144	}
				145
				146	impl fmt::Debug for Regex {
				147	/// Shows the original regular expression.
				148	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				149	fmt::Display::fmt(self, f)
				150	}
				151	}
				152
				153	#[doc(hidden)]
				154	impl From<Exec> for Regex {
				155	fn from(exec: Exec) -> Regex {
				156	Regex(exec)
				157	}
				158	}
				159
				160	impl FromStr for Regex {
				161	type Err = Error;
				162
				163	/// Attempts to parse a string into a regular expression
				164	fn from_str(s: &str) -> Result<Regex, Error> {
				165	Regex::new(s)
				166	}
				167	}
				168
				169	/// Core regular expression methods.
				170	impl Regex {
				171	/// Compiles a regular expression. Once compiled, it can be used repeatedly
				172	/// to search, split or replace text in a string.
				173	///
				174	/// If an invalid expression is given, then an error is returned.
				175	pub fn new(re: &str) -> Result<Regex, Error> {
				176	RegexBuilder::new(re).build()
				177	}
				178
Chih-Hung Hsieh	849e445	2020-10-26 13:16:47 -0700	[diff] [blame]	179	/// Returns true if and only if there is a match for the regex in the
				180	/// string given.
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	181	///
				182	/// It is recommended to use this method if all you need to do is test
				183	/// a match, since the underlying matching engine may be able to do less
				184	/// work.
				185	///
				186	/// # Example
				187	///
				188	/// Test if some text contains at least one word with exactly 13
				189	/// Unicode word characters:
				190	///
				191	/// ```rust
				192	/// # extern crate regex; use regex::Regex;
				193	/// # fn main() {
				194	/// let text = "I categorically deny having triskaidekaphobia.";
				195	/// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
				196	/// # }
				197	/// ```
				198	pub fn is_match(&self, text: &str) -> bool {
				199	self.is_match_at(text, 0)
				200	}
				201
				202	/// Returns the start and end byte range of the leftmost-first match in
				203	/// `text`. If no match exists, then `None` is returned.
				204	///
				205	/// Note that this should only be used if you want to discover the position
				206	/// of the match. Testing the existence of a match is faster if you use
				207	/// `is_match`.
				208	///
				209	/// # Example
				210	///
				211	/// Find the start and end location of the first word with exactly 13
				212	/// Unicode word characters:
				213	///
				214	/// ```rust
				215	/// # extern crate regex; use regex::Regex;
				216	/// # fn main() {
				217	/// let text = "I categorically deny having triskaidekaphobia.";
				218	/// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
				219	/// assert_eq!(mat.start(), 2);
				220	/// assert_eq!(mat.end(), 15);
				221	/// # }
				222	/// ```
				223	pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> {
				224	self.find_at(text, 0)
				225	}
				226
				227	/// Returns an iterator for each successive non-overlapping match in
				228	/// `text`, returning the start and end byte indices with respect to
				229	/// `text`.
				230	///
				231	/// # Example
				232	///
				233	/// Find the start and end location of every word with exactly 13 Unicode
				234	/// word characters:
				235	///
				236	/// ```rust
				237	/// # extern crate regex; use regex::Regex;
				238	/// # fn main() {
				239	/// let text = "Retroactively relinquishing remunerations is reprehensible.";
				240	/// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
				241	/// println!("{:?}", mat);
				242	/// }
				243	/// # }
				244	/// ```
				245	pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
				246	Matches(self.0.searcher_str().find_iter(text))
				247	}
				248
				249	/// Returns the capture groups corresponding to the leftmost-first
				250	/// match in `text`. Capture group `0` always corresponds to the entire
				251	/// match. If no match is found, then `None` is returned.
				252	///
				253	/// You should only use `captures` if you need access to the location of
				254	/// capturing group matches. Otherwise, `find` is faster for discovering
				255	/// the location of the overall match.
				256	///
				257	/// # Examples
				258	///
				259	/// Say you have some text with movie names and their release years,
				260	/// like "'Citizen Kane' (1941)". It'd be nice if we could search for text
				261	/// looking like that, while also extracting the movie name and its release
				262	/// year separately.
				263	///
				264	/// ```rust
				265	/// # extern crate regex; use regex::Regex;
				266	/// # fn main() {
				267	/// let re = Regex::new(r"'([^']+)'\s+$(\d{4})$").unwrap();
				268	/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
				269	/// let caps = re.captures(text).unwrap();
				270	/// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane");
				271	/// assert_eq!(caps.get(2).unwrap().as_str(), "1941");
				272	/// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)");
				273	/// // You can also access the groups by index using the Index notation.
				274	/// // Note that this will panic on an invalid index.
				275	/// assert_eq!(&caps[1], "Citizen Kane");
				276	/// assert_eq!(&caps[2], "1941");
				277	/// assert_eq!(&caps[0], "'Citizen Kane' (1941)");
				278	/// # }
				279	/// ```
				280	///
				281	/// Note that the full match is at capture group `0`. Each subsequent
				282	/// capture group is indexed by the order of its opening `(`.
				283	///
				284	/// We can make this example a bit clearer by using named capture groups:
				285	///
				286	/// ```rust
				287	/// # extern crate regex; use regex::Regex;
				288	/// # fn main() {
				289	/// let re = Regex::new(r"'(?P<title>[^']+)'\s+$(?P<year>\d{4})$")
				290	/// .unwrap();
				291	/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
				292	/// let caps = re.captures(text).unwrap();
				293	/// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane");
				294	/// assert_eq!(caps.name("year").unwrap().as_str(), "1941");
				295	/// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)");
				296	/// // You can also access the groups by name using the Index notation.
				297	/// // Note that this will panic on an invalid group name.
				298	/// assert_eq!(&caps["title"], "Citizen Kane");
				299	/// assert_eq!(&caps["year"], "1941");
				300	/// assert_eq!(&caps[0], "'Citizen Kane' (1941)");
				301	///
				302	/// # }
				303	/// ```
				304	///
				305	/// Here we name the capture groups, which we can access with the `name`
				306	/// method or the `Index` notation with a `&str`. Note that the named
				307	/// capture groups are still accessible with `get` or the `Index` notation
				308	/// with a `usize`.
				309	///
				310	/// The `0`th capture group is always unnamed, so it must always be
				311	/// accessed with `get(0)` or `[0]`.
				312	pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
				313	let mut locs = self.capture_locations();
				314	self.captures_read_at(&mut locs, text, 0).map(move \|_\| Captures {
				315	text: text,
				316	locs: locs.0,
				317	named_groups: self.0.capture_name_idx().clone(),
				318	})
				319	}
				320
				321	/// Returns an iterator over all the non-overlapping capture groups matched
				322	/// in `text`. This is operationally the same as `find_iter`, except it
				323	/// yields information about capturing group matches.
				324	///
				325	/// # Example
				326	///
				327	/// We can use this to find all movie titles and their release years in
				328	/// some text, where the movie is formatted like "'Title' (xxxx)":
				329	///
				330	/// ```rust
				331	/// # extern crate regex; use regex::Regex;
				332	/// # fn main() {
				333	/// let re = Regex::new(r"'(?P<title>[^']+)'\s+$(?P<year>\d{4})$")
				334	/// .unwrap();
				335	/// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
				336	/// for caps in re.captures_iter(text) {
				337	/// println!("Movie: {:?}, Released: {:?}",
				338	/// &caps["title"], &caps["year"]);
				339	/// }
				340	/// // Output:
				341	/// // Movie: Citizen Kane, Released: 1941
				342	/// // Movie: The Wizard of Oz, Released: 1939
				343	/// // Movie: M, Released: 1931
				344	/// # }
				345	/// ```
				346	pub fn captures_iter<'r, 't>(
				347	&'r self,
				348	text: &'t str,
				349	) -> CaptureMatches<'r, 't> {
				350	CaptureMatches(self.0.searcher_str().captures_iter(text))
				351	}
				352
				353	/// Returns an iterator of substrings of `text` delimited by a match of the
				354	/// regular expression. Namely, each element of the iterator corresponds to
				355	/// text that isn't matched by the regular expression.
				356	///
				357	/// This method will not copy the text given.
				358	///
				359	/// # Example
				360	///
				361	/// To split a string delimited by arbitrary amounts of spaces or tabs:
				362	///
				363	/// ```rust
				364	/// # extern crate regex; use regex::Regex;
				365	/// # fn main() {
				366	/// let re = Regex::new(r"[ \t]+").unwrap();
				367	/// let fields: Vec<&str> = re.split("a b \t c\td e").collect();
				368	/// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
				369	/// # }
				370	/// ```
				371	pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> {
				372	Split { finder: self.find_iter(text), last: 0 }
				373	}
				374
				375	/// Returns an iterator of at most `limit` substrings of `text` delimited
				376	/// by a match of the regular expression. (A `limit` of `0` will return no
				377	/// substrings.) Namely, each element of the iterator corresponds to text
				378	/// that isn't matched by the regular expression. The remainder of the
				379	/// string that is not split will be the last element in the iterator.
				380	///
				381	/// This method will not copy the text given.
				382	///
				383	/// # Example
				384	///
				385	/// Get the first two words in some text:
				386	///
				387	/// ```rust
				388	/// # extern crate regex; use regex::Regex;
				389	/// # fn main() {
				390	/// let re = Regex::new(r"\W+").unwrap();
				391	/// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
				392	/// assert_eq!(fields, vec!("Hey", "How", "are you?"));
				393	/// # }
				394	/// ```
				395	pub fn splitn<'r, 't>(
				396	&'r self,
				397	text: &'t str,
				398	limit: usize,
				399	) -> SplitN<'r, 't> {
				400	SplitN { splits: self.split(text), n: limit }
				401	}
				402
				403	/// Replaces the leftmost-first match with the replacement provided.
				404	/// The replacement can be a regular string (where `$N` and `$name` are
				405	/// expanded to match capture groups) or a function that takes the matches'
				406	/// `Captures` and returns the replaced string.
				407	///
				408	/// If no match is found, then a copy of the string is returned unchanged.
				409	///
				410	/// # Replacement string syntax
				411	///
				412	/// All instances of `$name` in the replacement text is replaced with the
				413	/// corresponding capture group `name`.
				414	///
				415	/// `name` may be an integer corresponding to the index of the
				416	/// capture group (counted by order of opening parenthesis where `0` is the
				417	/// entire match) or it can be a name (consisting of letters, digits or
				418	/// underscores) corresponding to a named capture group.
				419	///
				420	/// If `name` isn't a valid capture group (whether the name doesn't exist
				421	/// or isn't a valid index), then it is replaced with the empty string.
				422	///
				423	/// The longest possible name is used. e.g., `$1a` looks up the capture
				424	/// group named `1a` and not the capture group at index `1`. To exert more
				425	/// precise control over the name, use braces, e.g., `${1}a`.
				426	///
				427	/// To write a literal `$` use `$$`.
				428	///
				429	/// # Examples
				430	///
				431	/// Note that this function is polymorphic with respect to the replacement.
				432	/// In typical usage, this can just be a normal string:
				433	///
				434	/// ```rust
				435	/// # extern crate regex; use regex::Regex;
				436	/// # fn main() {
				437	/// let re = Regex::new("[^01]+").unwrap();
				438	/// assert_eq!(re.replace("1078910", ""), "1010");
				439	/// # }
				440	/// ```
				441	///
				442	/// But anything satisfying the `Replacer` trait will work. For example,
				443	/// a closure of type `\|&Captures\| -> String` provides direct access to the
				444	/// captures corresponding to a match. This allows one to access
				445	/// capturing group matches easily:
				446	///
				447	/// ```rust
				448	/// # extern crate regex; use regex::Regex;
				449	/// # use regex::Captures; fn main() {
				450	/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
				451	/// let result = re.replace("Springsteen, Bruce", \|caps: &Captures\| {
				452	/// format!("{} {}", &caps[2], &caps[1])
				453	/// });
				454	/// assert_eq!(result, "Bruce Springsteen");
				455	/// # }
				456	/// ```
				457	///
				458	/// But this is a bit cumbersome to use all the time. Instead, a simple
				459	/// syntax is supported that expands `$name` into the corresponding capture
				460	/// group. Here's the last example, but using this expansion technique
				461	/// with named capture groups:
				462	///
				463	/// ```rust
				464	/// # extern crate regex; use regex::Regex;
				465	/// # fn main() {
				466	/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
				467	/// let result = re.replace("Springsteen, Bruce", "$first $last");
				468	/// assert_eq!(result, "Bruce Springsteen");
				469	/// # }
				470	/// ```
				471	///
				472	/// Note that using `$2` instead of `$first` or `$1` instead of `$last`
				473	/// would produce the same result. To write a literal `$` use `$$`.
				474	///
				475	/// Sometimes the replacement string requires use of curly braces to
				476	/// delineate a capture group replacement and surrounding literal text.
				477	/// For example, if we wanted to join two words together with an
				478	/// underscore:
				479	///
				480	/// ```rust
				481	/// # extern crate regex; use regex::Regex;
				482	/// # fn main() {
				483	/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
				484	/// let result = re.replace("deep fried", "${first}_$second");
				485	/// assert_eq!(result, "deep_fried");
				486	/// # }
				487	/// ```
				488	///
				489	/// Without the curly braces, the capture group name `first_` would be
				490	/// used, and since it doesn't exist, it would be replaced with the empty
				491	/// string.
				492	///
				493	/// Finally, sometimes you just want to replace a literal string with no
				494	/// regard for capturing group expansion. This can be done by wrapping a
				495	/// byte string with `NoExpand`:
				496	///
				497	/// ```rust
				498	/// # extern crate regex; use regex::Regex;
				499	/// # fn main() {
				500	/// use regex::NoExpand;
				501	///
				502	/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
				503	/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
				504	/// assert_eq!(result, "$2 $last");
				505	/// # }
				506	/// ```
				507	pub fn replace<'t, R: Replacer>(
				508	&self,
				509	text: &'t str,
				510	rep: R,
				511	) -> Cow<'t, str> {
				512	self.replacen(text, 1, rep)
				513	}
				514
				515	/// Replaces all non-overlapping matches in `text` with the replacement
				516	/// provided. This is the same as calling `replacen` with `limit` set to
				517	/// `0`.
				518	///
				519	/// See the documentation for `replace` for details on how to access
				520	/// capturing group matches in the replacement string.
				521	pub fn replace_all<'t, R: Replacer>(
				522	&self,
				523	text: &'t str,
				524	rep: R,
				525	) -> Cow<'t, str> {
				526	self.replacen(text, 0, rep)
				527	}
				528
				529	/// Replaces at most `limit` non-overlapping matches in `text` with the
				530	/// replacement provided. If `limit` is 0, then all non-overlapping matches
				531	/// are replaced.
				532	///
				533	/// See the documentation for `replace` for details on how to access
				534	/// capturing group matches in the replacement string.
				535	pub fn replacen<'t, R: Replacer>(
				536	&self,
				537	text: &'t str,
				538	limit: usize,
				539	mut rep: R,
				540	) -> Cow<'t, str> {
				541	// If we know that the replacement doesn't have any capture expansions,
				542	// then we can fast path. The fast path can make a tremendous
				543	// difference:
				544	//
				545	// 1) We use `find_iter` instead of `captures_iter`. Not asking for
				546	// captures generally makes the regex engines faster.
				547	// 2) We don't need to look up all of the capture groups and do
				548	// replacements inside the replacement string. We just push it
				549	// at each match and be done with it.
				550	if let Some(rep) = rep.no_expansion() {
				551	let mut it = self.find_iter(text).enumerate().peekable();
				552	if it.peek().is_none() {
				553	return Cow::Borrowed(text);
				554	}
				555	let mut new = String::with_capacity(text.len());
				556	let mut last_match = 0;
				557	for (i, m) in it {
				558	if limit > 0 && i >= limit {
				559	break;
				560	}
				561	new.push_str(&text[last_match..m.start()]);
				562	new.push_str(&rep);
				563	last_match = m.end();
				564	}
				565	new.push_str(&text[last_match..]);
				566	return Cow::Owned(new);
				567	}
				568
				569	// The slower path, which we use if the replacement needs access to
				570	// capture groups.
				571	let mut it = self.captures_iter(text).enumerate().peekable();
				572	if it.peek().is_none() {
				573	return Cow::Borrowed(text);
				574	}
				575	let mut new = String::with_capacity(text.len());
				576	let mut last_match = 0;
				577	for (i, cap) in it {
				578	if limit > 0 && i >= limit {
				579	break;
				580	}
				581	// unwrap on 0 is OK because captures only reports matches
				582	let m = cap.get(0).unwrap();
				583	new.push_str(&text[last_match..m.start()]);
				584	rep.replace_append(&cap, &mut new);
				585	last_match = m.end();
				586	}
				587	new.push_str(&text[last_match..]);
				588	Cow::Owned(new)
				589	}
				590	}
				591
				592	/// Advanced or "lower level" search methods.
				593	impl Regex {
				594	/// Returns the end location of a match in the text given.
				595	///
				596	/// This method may have the same performance characteristics as
				597	/// `is_match`, except it provides an end location for a match. In
				598	/// particular, the location returned may be shorter than the proper end
				599	/// of the leftmost-first match.
				600	///
				601	/// # Example
				602	///
				603	/// Typically, `a+` would match the entire first sequence of `a` in some
				604	/// text, but `shortest_match` can give up as soon as it sees the first
				605	/// `a`.
				606	///
				607	/// ```rust
				608	/// # extern crate regex; use regex::Regex;
				609	/// # fn main() {
				610	/// let text = "aaaaa";
				611	/// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
				612	/// assert_eq!(pos, Some(1));
				613	/// # }
				614	/// ```
				615	pub fn shortest_match(&self, text: &str) -> Option<usize> {
				616	self.shortest_match_at(text, 0)
				617	}
				618
				619	/// Returns the same as shortest_match, but starts the search at the given
				620	/// offset.
				621	///
				622	/// The significance of the starting point is that it takes the surrounding
				623	/// context into consideration. For example, the `\A` anchor can only
				624	/// match when `start == 0`.
				625	pub fn shortest_match_at(
				626	&self,
				627	text: &str,
				628	start: usize,
				629	) -> Option<usize> {
				630	self.0.searcher_str().shortest_match_at(text, start)
				631	}
				632
				633	/// Returns the same as is_match, but starts the search at the given
				634	/// offset.
				635	///
				636	/// The significance of the starting point is that it takes the surrounding
				637	/// context into consideration. For example, the `\A` anchor can only
				638	/// match when `start == 0`.
				639	pub fn is_match_at(&self, text: &str, start: usize) -> bool {
				640	self.shortest_match_at(text, start).is_some()
				641	}
				642
				643	/// Returns the same as find, but starts the search at the given
				644	/// offset.
				645	///
				646	/// The significance of the starting point is that it takes the surrounding
				647	/// context into consideration. For example, the `\A` anchor can only
				648	/// match when `start == 0`.
				649	pub fn find_at<'t>(
				650	&self,
				651	text: &'t str,
				652	start: usize,
				653	) -> Option<Match<'t>> {
				654	self.0
				655	.searcher_str()
				656	.find_at(text, start)
				657	.map(\|(s, e)\| Match::new(text, s, e))
				658	}
				659
				660	/// This is like `captures`, but uses
				661	/// [`CaptureLocations`](struct.CaptureLocations.html)
				662	/// instead of
				663	/// [`Captures`](struct.Captures.html) in order to amortize allocations.
				664	///
				665	/// To create a `CaptureLocations` value, use the
				666	/// `Regex::capture_locations` method.
				667	///
				668	/// This returns the overall match if this was successful, which is always
				669	/// equivalence to the `0`th capture group.
				670	pub fn captures_read<'t>(
				671	&self,
				672	locs: &mut CaptureLocations,
				673	text: &'t str,
				674	) -> Option<Match<'t>> {
				675	self.captures_read_at(locs, text, 0)
				676	}
				677
				678	/// Returns the same as captures, but starts the search at the given
				679	/// offset and populates the capture locations given.
				680	///
				681	/// The significance of the starting point is that it takes the surrounding
				682	/// context into consideration. For example, the `\A` anchor can only
				683	/// match when `start == 0`.
				684	pub fn captures_read_at<'t>(
				685	&self,
				686	locs: &mut CaptureLocations,
				687	text: &'t str,
				688	start: usize,
				689	) -> Option<Match<'t>> {
				690	self.0
				691	.searcher_str()
				692	.captures_read_at(&mut locs.0, text, start)
				693	.map(\|(s, e)\| Match::new(text, s, e))
				694	}
				695
				696	/// An undocumented alias for `captures_read_at`.
				697	///
				698	/// The `regex-capi` crate previously used this routine, so to avoid
				699	/// breaking that crate, we continue to provide the name as an undocumented
				700	/// alias.
				701	#[doc(hidden)]
				702	pub fn read_captures_at<'t>(
				703	&self,
				704	locs: &mut CaptureLocations,
				705	text: &'t str,
				706	start: usize,
				707	) -> Option<Match<'t>> {
				708	self.captures_read_at(locs, text, start)
				709	}
				710	}
				711
				712	/// Auxiliary methods.
				713	impl Regex {
				714	/// Returns the original string of this regex.
				715	pub fn as_str(&self) -> &str {
				716	&self.0.regex_strings()[0]
				717	}
				718
				719	/// Returns an iterator over the capture names.
				720	pub fn capture_names(&self) -> CaptureNames {
				721	CaptureNames(self.0.capture_names().iter())
				722	}
				723
				724	/// Returns the number of captures.
				725	pub fn captures_len(&self) -> usize {
				726	self.0.capture_names().len()
				727	}
				728
				729	/// Returns an empty set of capture locations that can be reused in
				730	/// multiple calls to `captures_read` or `captures_read_at`.
				731	pub fn capture_locations(&self) -> CaptureLocations {
				732	CaptureLocations(self.0.searcher_str().locations())
				733	}
				734
				735	/// An alias for `capture_locations` to preserve backward compatibility.
				736	///
				737	/// The `regex-capi` crate uses this method, so to avoid breaking that
				738	/// crate, we continue to export it as an undocumented API.
				739	#[doc(hidden)]
				740	pub fn locations(&self) -> CaptureLocations {
				741	CaptureLocations(self.0.searcher_str().locations())
				742	}
				743	}
				744
				745	/// An iterator over the names of all possible captures.
				746	///
				747	/// `None` indicates an unnamed capture; the first element (capture 0, the
				748	/// whole matched region) is always unnamed.
				749	///
				750	/// `'r` is the lifetime of the compiled regular expression.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	751	#[derive(Clone, Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	752	pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
				753
				754	impl<'r> Iterator for CaptureNames<'r> {
				755	type Item = Option<&'r str>;
				756
				757	fn next(&mut self) -> Option<Option<&'r str>> {
				758	self.0
				759	.next()
				760	.as_ref()
				761	.map(\|slot\| slot.as_ref().map(\|name\| name.as_ref()))
				762	}
				763
				764	fn size_hint(&self) -> (usize, Option<usize>) {
				765	self.0.size_hint()
				766	}
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	767
				768	fn count(self) -> usize {
				769	self.0.count()
				770	}
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	771	}
				772
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	773	impl<'r> ExactSizeIterator for CaptureNames<'r> {}
				774
				775	impl<'r> FusedIterator for CaptureNames<'r> {}
				776
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	777	/// Yields all substrings delimited by a regular expression match.
				778	///
				779	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				780	/// lifetime of the string being split.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	781	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	782	pub struct Split<'r, 't> {
				783	finder: Matches<'r, 't>,
				784	last: usize,
				785	}
				786
				787	impl<'r, 't> Iterator for Split<'r, 't> {
				788	type Item = &'t str;
				789
				790	fn next(&mut self) -> Option<&'t str> {
				791	let text = self.finder.0.text();
				792	match self.finder.next() {
				793	None => {
				794	if self.last > text.len() {
				795	None
				796	} else {
				797	let s = &text[self.last..];
				798	self.last = text.len() + 1; // Next call will return None
				799	Some(s)
				800	}
				801	}
				802	Some(m) => {
				803	let matched = &text[self.last..m.start()];
				804	self.last = m.end();
				805	Some(matched)
				806	}
				807	}
				808	}
				809	}
				810
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	811	impl<'r, 't> FusedIterator for Split<'r, 't> {}
				812
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	813	/// Yields at most `N` substrings delimited by a regular expression match.
				814	///
				815	/// The last substring will be whatever remains after splitting.
				816	///
				817	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				818	/// lifetime of the string being split.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	819	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	820	pub struct SplitN<'r, 't> {
				821	splits: Split<'r, 't>,
				822	n: usize,
				823	}
				824
				825	impl<'r, 't> Iterator for SplitN<'r, 't> {
				826	type Item = &'t str;
				827
				828	fn next(&mut self) -> Option<&'t str> {
				829	if self.n == 0 {
				830	return None;
				831	}
				832
				833	self.n -= 1;
				834	if self.n > 0 {
				835	return self.splits.next();
				836	}
				837
				838	let text = self.splits.finder.0.text();
				839	if self.splits.last > text.len() {
				840	// We've already returned all substrings.
				841	None
				842	} else {
				843	// self.n == 0, so future calls will return None immediately
				844	Some(&text[self.splits.last..])
				845	}
				846	}
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	847
				848	fn size_hint(&self) -> (usize, Option<usize>) {
				849	(0, Some(self.n))
				850	}
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	851	}
				852
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	853	impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
				854
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	855	/// CaptureLocations is a low level representation of the raw offsets of each
				856	/// submatch.
				857	///
				858	/// You can think of this as a lower level
				859	/// [`Captures`](struct.Captures.html), where this type does not support
				860	/// named capturing groups directly and it does not borrow the text that these
				861	/// offsets were matched on.
				862	///
				863	/// Primarily, this type is useful when using the lower level `Regex` APIs
				864	/// such as `read_captures`, which permits amortizing the allocation in which
				865	/// capture match locations are stored.
				866	///
				867	/// In order to build a value of this type, you'll need to call the
				868	/// `capture_locations` method on the `Regex` being used to execute the search.
				869	/// The value returned can then be reused in subsequent searches.
				870	#[derive(Clone, Debug)]
				871	pub struct CaptureLocations(re_trait::Locations);
				872
				873	/// A type alias for `CaptureLocations` for backwards compatibility.
				874	///
				875	/// Previously, we exported `CaptureLocations` as `Locations` in an
				876	/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`),
				877	/// we continue re-exporting the same undocumented API.
				878	#[doc(hidden)]
				879	pub type Locations = CaptureLocations;
				880
				881	impl CaptureLocations {
				882	/// Returns the start and end positions of the Nth capture group. Returns
				883	/// `None` if `i` is not a valid capture group or if the capture group did
				884	/// not match anything. The positions returned are always byte indices
				885	/// with respect to the original string matched.
				886	#[inline]
				887	pub fn get(&self, i: usize) -> Option<(usize, usize)> {
				888	self.0.pos(i)
				889	}
				890
				891	/// Returns the total number of capturing groups.
				892	///
				893	/// This is always at least `1` since every regex has at least `1`
				894	/// capturing group that corresponds to the entire match.
				895	#[inline]
				896	pub fn len(&self) -> usize {
				897	self.0.len()
				898	}
				899
				900	/// An alias for the `get` method for backwards compatibility.
				901	///
				902	/// Previously, we exported `get` as `pos` in an undocumented API. To
				903	/// prevent breaking that code (e.g., in `regex-capi`), we continue
				904	/// re-exporting the same undocumented API.
				905	#[doc(hidden)]
				906	#[inline]
				907	pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
				908	self.get(i)
				909	}
				910	}
				911
				912	/// Captures represents a group of captured strings for a single match.
				913	///
				914	/// The 0th capture always corresponds to the entire match. Each subsequent
				915	/// index corresponds to the next capture group in the regex. If a capture
				916	/// group is named, then the matched string is also available via the `name`
				917	/// method. (Note that the 0th capture is always unnamed and so must be
				918	/// accessed with the `get` method.)
				919	///
				920	/// Positions returned from a capture group are always byte indices.
				921	///
				922	/// `'t` is the lifetime of the matched text.
				923	pub struct Captures<'t> {
				924	text: &'t str,
				925	locs: re_trait::Locations,
				926	named_groups: Arc<HashMap<String, usize>>,
				927	}
				928
				929	impl<'t> Captures<'t> {
				930	/// Returns the match associated with the capture group at index `i`. If
				931	/// `i` does not correspond to a capture group, or if the capture group
				932	/// did not participate in the match, then `None` is returned.
				933	///
				934	/// # Examples
				935	///
				936	/// Get the text of the match with a default of an empty string if this
				937	/// group didn't participate in the match:
				938	///
				939	/// ```rust
				940	/// # use regex::Regex;
				941	/// let re = Regex::new(r"[a-z]+(?:([0-9]+)\|([A-Z]+))").unwrap();
				942	/// let caps = re.captures("abc123").unwrap();
				943	///
				944	/// let text1 = caps.get(1).map_or("", \|m\| m.as_str());
				945	/// let text2 = caps.get(2).map_or("", \|m\| m.as_str());
				946	/// assert_eq!(text1, "123");
				947	/// assert_eq!(text2, "");
				948	/// ```
				949	pub fn get(&self, i: usize) -> Option<Match<'t>> {
				950	self.locs.pos(i).map(\|(s, e)\| Match::new(self.text, s, e))
				951	}
				952
				953	/// Returns the match for the capture group named `name`. If `name` isn't a
				954	/// valid capture group or didn't match anything, then `None` is returned.
				955	pub fn name(&self, name: &str) -> Option<Match<'t>> {
				956	self.named_groups.get(name).and_then(\|&i\| self.get(i))
				957	}
				958
				959	/// An iterator that yields all capturing matches in the order in which
				960	/// they appear in the regex. If a particular capture group didn't
				961	/// participate in the match, then `None` is yielded for that capture.
				962	///
				963	/// The first match always corresponds to the overall match of the regex.
				964	pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
				965	SubCaptureMatches { caps: self, it: self.locs.iter() }
				966	}
				967
				968	/// Expands all instances of `$name` in `replacement` to the corresponding
				969	/// capture group `name`, and writes them to the `dst` buffer given.
				970	///
Chih-Hung Hsieh	849e445	2020-10-26 13:16:47 -0700	[diff] [blame]	971	/// `name` may be an integer corresponding to the index of the capture
				972	/// group (counted by order of opening parenthesis where `0` is the
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	973	/// entire match) or it can be a name (consisting of letters, digits or
				974	/// underscores) corresponding to a named capture group.
				975	///
				976	/// If `name` isn't a valid capture group (whether the name doesn't exist
				977	/// or isn't a valid index), then it is replaced with the empty string.
				978	///
Chih-Hung Hsieh	849e445	2020-10-26 13:16:47 -0700	[diff] [blame]	979	/// The longest possible name consisting of the characters `[_0-9A-Za-z]`
				980	/// is used. e.g., `$1a` looks up the capture group named `1a` and not the
				981	/// capture group at index `1`. To exert more precise control over the
				982	/// name, or to refer to a capture group name that uses characters outside
				983	/// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
				984	/// using braces, any sequence of characters is permitted. If the sequence
				985	/// does not refer to a capture group name in the corresponding regex, then
				986	/// it is replaced with an empty string.
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	987	///
				988	/// To write a literal `$` use `$$`.
				989	pub fn expand(&self, replacement: &str, dst: &mut String) {
				990	expand_str(self, replacement, dst)
				991	}
				992
				993	/// Returns the number of captured groups.
				994	///
				995	/// This is always at least `1`, since every regex has at least one capture
				996	/// group that corresponds to the full match.
				997	#[inline]
				998	pub fn len(&self) -> usize {
				999	self.locs.len()
				1000	}
				1001	}
				1002
				1003	impl<'t> fmt::Debug for Captures<'t> {
				1004	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				1005	f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
				1006	}
				1007	}
				1008
				1009	struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
				1010
				1011	impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
				1012	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				1013	// We'd like to show something nice here, even if it means an
				1014	// allocation to build a reverse index.
				1015	let slot_to_name: HashMap<&usize, &String> =
				1016	self.0.named_groups.iter().map(\|(a, b)\| (b, a)).collect();
				1017	let mut map = f.debug_map();
				1018	for (slot, m) in self.0.locs.iter().enumerate() {
				1019	let m = m.map(\|(s, e)\| &self.0.text[s..e]);
				1020	if let Some(name) = slot_to_name.get(&slot) {
				1021	map.entry(&name, &m);
				1022	} else {
				1023	map.entry(&slot, &m);
				1024	}
				1025	}
				1026	map.finish()
				1027	}
				1028	}
				1029
				1030	/// Get a group by index.
				1031	///
				1032	/// `'t` is the lifetime of the matched text.
				1033	///
				1034	/// The text can't outlive the `Captures` object if this method is
				1035	/// used, because of how `Index` is defined (normally `a[i]` is part
				1036	/// of `a` and can't outlive it); to do that, use `get()` instead.
				1037	///
				1038	/// # Panics
				1039	///
				1040	/// If there is no group at the given index.
				1041	impl<'t> Index<usize> for Captures<'t> {
				1042	type Output = str;
				1043
				1044	fn index(&self, i: usize) -> &str {
				1045	self.get(i)
				1046	.map(\|m\| m.as_str())
				1047	.unwrap_or_else(\|\| panic!("no group at index '{}'", i))
				1048	}
				1049	}
				1050
				1051	/// Get a group by name.
				1052	///
				1053	/// `'t` is the lifetime of the matched text and `'i` is the lifetime
				1054	/// of the group name (the index).
				1055	///
				1056	/// The text can't outlive the `Captures` object if this method is
				1057	/// used, because of how `Index` is defined (normally `a[i]` is part
				1058	/// of `a` and can't outlive it); to do that, use `name` instead.
				1059	///
				1060	/// # Panics
				1061	///
				1062	/// If there is no group named by the given value.
				1063	impl<'t, 'i> Index<&'i str> for Captures<'t> {
				1064	type Output = str;
				1065
				1066	fn index<'a>(&'a self, name: &'i str) -> &'a str {
				1067	self.name(name)
				1068	.map(\|m\| m.as_str())
				1069	.unwrap_or_else(\|\| panic!("no group named '{}'", name))
				1070	}
				1071	}
				1072
				1073	/// An iterator that yields all capturing matches in the order in which they
				1074	/// appear in the regex.
				1075	///
				1076	/// If a particular capture group didn't participate in the match, then `None`
				1077	/// is yielded for that capture. The first match always corresponds to the
				1078	/// overall match of the regex.
				1079	///
				1080	/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
				1081	/// the lifetime `'t` corresponds to the originally matched text.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1082	#[derive(Clone, Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1083	pub struct SubCaptureMatches<'c, 't: 'c> {
				1084	caps: &'c Captures<'t>,
				1085	it: SubCapturesPosIter<'c>,
				1086	}
				1087
				1088	impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
				1089	type Item = Option<Match<'t>>;
				1090
				1091	fn next(&mut self) -> Option<Option<Match<'t>>> {
				1092	self.it
				1093	.next()
				1094	.map(\|cap\| cap.map(\|(s, e)\| Match::new(self.caps.text, s, e)))
				1095	}
				1096	}
				1097
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1098	impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
				1099
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1100	/// An iterator that yields all non-overlapping capture groups matching a
				1101	/// particular regular expression.
				1102	///
				1103	/// The iterator stops when no more matches can be found.
				1104	///
				1105	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				1106	/// lifetime of the matched string.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1107	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1108	pub struct CaptureMatches<'r, 't>(
				1109	re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>,
				1110	);
				1111
				1112	impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
				1113	type Item = Captures<'t>;
				1114
				1115	fn next(&mut self) -> Option<Captures<'t>> {
				1116	self.0.next().map(\|locs\| Captures {
				1117	text: self.0.text(),
				1118	locs: locs,
				1119	named_groups: self.0.regex().capture_name_idx().clone(),
				1120	})
				1121	}
				1122	}
				1123
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1124	impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {}
				1125
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1126	/// An iterator over all non-overlapping matches for a particular string.
				1127	///
				1128	/// The iterator yields a `Match` value. The iterator stops when no more
				1129	/// matches can be found.
				1130	///
				1131	/// `'r` is the lifetime of the compiled regular expression and `'t` is the
				1132	/// lifetime of the matched string.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1133	#[derive(Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1134	pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>);
				1135
				1136	impl<'r, 't> Iterator for Matches<'r, 't> {
				1137	type Item = Match<'t>;
				1138
				1139	fn next(&mut self) -> Option<Match<'t>> {
				1140	let text = self.0.text();
				1141	self.0.next().map(\|(s, e)\| Match::new(text, s, e))
				1142	}
				1143	}
				1144
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1145	impl<'r, 't> FusedIterator for Matches<'r, 't> {}
				1146
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1147	/// Replacer describes types that can be used to replace matches in a string.
				1148	///
				1149	/// In general, users of this crate shouldn't need to implement this trait,
Elliott Hughes	ffb6030	2021-04-01 17:11:40 -0700	[diff] [blame^]	1150	/// since implementations are already provided for `&str` along with other
				1151	/// variants of string types and `FnMut(&Captures) -> String` (or any
				1152	/// `FnMut(&Captures) -> T` where `T: AsRef<str>`), which covers most use cases.
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1153	pub trait Replacer {
				1154	/// Appends text to `dst` to replace the current match.
				1155	///
				1156	/// The current match is represented by `caps`, which is guaranteed to
				1157	/// have a match at capture group `0`.
				1158	///
				1159	/// For example, a no-op replacement would be
Chih-Hung Hsieh	849e445	2020-10-26 13:16:47 -0700	[diff] [blame]	1160	/// `dst.push_str(caps.get(0).unwrap().as_str())`.
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1161	fn replace_append(&mut self, caps: &Captures, dst: &mut String);
				1162
				1163	/// Return a fixed unchanging replacement string.
				1164	///
				1165	/// When doing replacements, if access to `Captures` is not needed (e.g.,
				1166	/// the replacement byte string does not need `$` expansion), then it can
				1167	/// be beneficial to avoid finding sub-captures.
				1168	///
				1169	/// In general, this is called once for every call to `replacen`.
				1170	fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
				1171	None
				1172	}
				1173
				1174	/// Return a `Replacer` that borrows and wraps this `Replacer`.
				1175	///
				1176	/// This is useful when you want to take a generic `Replacer` (which might
				1177	/// not be cloneable) and use it without consuming it, so it can be used
				1178	/// more than once.
				1179	///
				1180	/// # Example
				1181	///
				1182	/// ```
				1183	/// use regex::{Regex, Replacer};
				1184	///
				1185	/// fn replace_all_twice<R: Replacer>(
				1186	/// re: Regex,
				1187	/// src: &str,
				1188	/// mut rep: R,
				1189	/// ) -> String {
				1190	/// let dst = re.replace_all(src, rep.by_ref());
				1191	/// let dst = re.replace_all(&dst, rep.by_ref());
				1192	/// dst.into_owned()
				1193	/// }
				1194	/// ```
				1195	fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
				1196	ReplacerRef(self)
				1197	}
				1198	}
				1199
				1200	/// By-reference adaptor for a `Replacer`
				1201	///
				1202	/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
				1203	#[derive(Debug)]
				1204	pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
				1205
				1206	impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
				1207	fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
				1208	self.0.replace_append(caps, dst)
				1209	}
				1210	fn no_expansion(&mut self) -> Option<Cow<str>> {
				1211	self.0.no_expansion()
				1212	}
				1213	}
				1214
				1215	impl<'a> Replacer for &'a str {
				1216	fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
				1217	caps.expand(*self, dst);
				1218	}
				1219
				1220	fn no_expansion(&mut self) -> Option<Cow<str>> {
Elliott Hughes	ffb6030	2021-04-01 17:11:40 -0700	[diff] [blame^]	1221	no_expansion(self)
				1222	}
				1223	}
				1224
				1225	impl<'a> Replacer for &'a String {
				1226	fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
				1227	self.as_str().replace_append(caps, dst)
				1228	}
				1229
				1230	fn no_expansion(&mut self) -> Option<Cow<str>> {
				1231	no_expansion(self)
				1232	}
				1233	}
				1234
				1235	impl Replacer for String {
				1236	fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
				1237	self.as_str().replace_append(caps, dst)
				1238	}
				1239
				1240	fn no_expansion(&mut self) -> Option<Cow<str>> {
				1241	no_expansion(self)
				1242	}
				1243	}
				1244
				1245	impl<'a> Replacer for Cow<'a, str> {
				1246	fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
				1247	self.as_ref().replace_append(caps, dst)
				1248	}
				1249
				1250	fn no_expansion(&mut self) -> Option<Cow<str>> {
				1251	no_expansion(self)
				1252	}
				1253	}
				1254
				1255	impl<'a> Replacer for &'a Cow<'a, str> {
				1256	fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
				1257	self.as_ref().replace_append(caps, dst)
				1258	}
				1259
				1260	fn no_expansion(&mut self) -> Option<Cow<str>> {
				1261	no_expansion(self)
				1262	}
				1263	}
				1264
				1265	fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<str>> {
				1266	let s = t.as_ref();
				1267	match find_byte(b'$', s.as_bytes()) {
				1268	Some(_) => None,
				1269	None => Some(Cow::Borrowed(s)),
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1270	}
				1271	}
				1272
				1273	impl<F, T> Replacer for F
				1274	where
				1275	F: FnMut(&Captures) -> T,
				1276	T: AsRef<str>,
				1277	{
				1278	fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
				1279	dst.push_str((*self)(caps).as_ref());
				1280	}
				1281	}
				1282
				1283	/// `NoExpand` indicates literal string replacement.
				1284	///
				1285	/// It can be used with `replace` and `replace_all` to do a literal string
				1286	/// replacement without expanding `$name` to their corresponding capture
				1287	/// groups. This can be both convenient (to avoid escaping `$`, for example)
				1288	/// and performant (since capture groups don't need to be found).
				1289	///
				1290	/// `'t` is the lifetime of the literal text.
Haibo Huang	47619dd	2021-01-08 17:05:43 -0800	[diff] [blame]	1291	#[derive(Clone, Debug)]
Chih-Hung Hsieh	e42c505	2020-04-16 10:44:21 -0700	[diff] [blame]	1292	pub struct NoExpand<'t>(pub &'t str);
				1293
				1294	impl<'t> Replacer for NoExpand<'t> {
				1295	fn replace_append(&mut self, _: &Captures, dst: &mut String) {
				1296	dst.push_str(self.0);
				1297	}
				1298
				1299	fn no_expansion(&mut self) -> Option<Cow<str>> {
				1300	Some(Cow::Borrowed(self.0))
				1301	}
				1302	}