Blame - src/hir/mod.rs - platform/external/rust/crates/regex-syntax

blob: 53d90b842522a991da8777891f9feb7169f0ba6e [file] [log] [blame]

Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	1	/*!
				2	Defines a high-level intermediate representation for regular expressions.
				3	*/
				4	use std::char;
				5	use std::cmp;
				6	use std::error;
				7	use std::fmt;
				8	use std::result;
				9	use std::u8;
				10
				11	use ast::Span;
				12	use hir::interval::{Interval, IntervalSet, IntervalSetIter};
				13	use unicode;
				14
				15	pub use hir::visitor::{visit, Visitor};
				16	pub use unicode::CaseFoldError;
				17
				18	mod interval;
				19	pub mod literal;
				20	pub mod print;
				21	pub mod translate;
				22	mod visitor;
				23
				24	/// An error that can occur while translating an `Ast` to a `Hir`.
				25	#[derive(Clone, Debug, Eq, PartialEq)]
				26	pub struct Error {
				27	/// The kind of error.
				28	kind: ErrorKind,
				29	/// The original pattern that the translator's Ast was parsed from. Every
				30	/// span in an error is a valid range into this string.
				31	pattern: String,
				32	/// The span of this error, derived from the Ast given to the translator.
				33	span: Span,
				34	}
				35
				36	impl Error {
				37	/// Return the type of this error.
				38	pub fn kind(&self) -> &ErrorKind {
				39	&self.kind
				40	}
				41
				42	/// The original pattern string in which this error occurred.
				43	///
				44	/// Every span reported by this error is reported in terms of this string.
				45	pub fn pattern(&self) -> &str {
				46	&self.pattern
				47	}
				48
				49	/// Return the span at which this error occurred.
				50	pub fn span(&self) -> &Span {
				51	&self.span
				52	}
				53	}
				54
				55	/// The type of an error that occurred while building an `Hir`.
				56	#[derive(Clone, Debug, Eq, PartialEq)]
				57	pub enum ErrorKind {
				58	/// This error occurs when a Unicode feature is used when Unicode
				59	/// support is disabled. For example `(?-u:\pL)` would trigger this error.
				60	UnicodeNotAllowed,
				61	/// This error occurs when translating a pattern that could match a byte
				62	/// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
				63	InvalidUtf8,
				64	/// This occurs when an unrecognized Unicode property name could not
				65	/// be found.
				66	UnicodePropertyNotFound,
				67	/// This occurs when an unrecognized Unicode property value could not
				68	/// be found.
				69	UnicodePropertyValueNotFound,
				70	/// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or
				71	/// `\d`) could not be found. This can occur when the `unicode-perl`
				72	/// crate feature is not enabled.
				73	UnicodePerlClassNotFound,
				74	/// This occurs when the Unicode simple case mapping tables are not
				75	/// available, and the regular expression required Unicode aware case
				76	/// insensitivity.
				77	UnicodeCaseUnavailable,
				78	/// This occurs when the translator attempts to construct a character class
				79	/// that is empty.
				80	///
				81	/// Note that this restriction in the translator may be removed in the
				82	/// future.
				83	EmptyClassNotAllowed,
				84	/// Hints that destructuring should not be exhaustive.
				85	///
				86	/// This enum may grow additional variants, so this makes sure clients
				87	/// don't count on exhaustive matching. (Otherwise, adding a new variant
				88	/// could break existing code.)
				89	#[doc(hidden)]
				90	__Nonexhaustive,
				91	}
				92
				93	impl ErrorKind {
				94	// TODO: Remove this method entirely on the next breaking semver release.
				95	#[allow(deprecated)]
				96	fn description(&self) -> &str {
				97	use self::ErrorKind::*;
				98	match *self {
				99	UnicodeNotAllowed => "Unicode not allowed here",
				100	InvalidUtf8 => "pattern can match invalid UTF-8",
				101	UnicodePropertyNotFound => "Unicode property not found",
				102	UnicodePropertyValueNotFound => "Unicode property value not found",
				103	UnicodePerlClassNotFound => {
				104	"Unicode-aware Perl class not found \
				105	(make sure the unicode-perl feature is enabled)"
				106	}
				107	UnicodeCaseUnavailable => {
				108	"Unicode-aware case insensitivity matching is not available \
				109	(make sure the unicode-case feature is enabled)"
				110	}
				111	EmptyClassNotAllowed => "empty character classes are not allowed",
				112	__Nonexhaustive => unreachable!(),
				113	}
				114	}
				115	}
				116
				117	impl error::Error for Error {
				118	// TODO: Remove this method entirely on the next breaking semver release.
				119	#[allow(deprecated)]
				120	fn description(&self) -> &str {
				121	self.kind.description()
				122	}
				123	}
				124
				125	impl fmt::Display for Error {
				126	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				127	::error::Formatter::from(self).fmt(f)
				128	}
				129	}
				130
				131	impl fmt::Display for ErrorKind {
				132	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				133	// TODO: Remove this on the next breaking semver release.
				134	#[allow(deprecated)]
				135	f.write_str(self.description())
				136	}
				137	}
				138
				139	/// A high-level intermediate representation (HIR) for a regular expression.
				140	///
				141	/// The HIR of a regular expression represents an intermediate step between its
				142	/// abstract syntax (a structured description of the concrete syntax) and
				143	/// compiled byte codes. The purpose of HIR is to make regular expressions
				144	/// easier to analyze. In particular, the AST is much more complex than the
				145	/// HIR. For example, while an AST supports arbitrarily nested character
				146	/// classes, the HIR will flatten all nested classes into a single set. The HIR
				147	/// will also "compile away" every flag present in the concrete syntax. For
				148	/// example, users of HIR expressions never need to worry about case folding;
				149	/// it is handled automatically by the translator (e.g., by translating `(?i)A`
				150	/// to `[aA]`).
				151	///
				152	/// If the HIR was produced by a translator that disallows invalid UTF-8, then
				153	/// the HIR is guaranteed to match UTF-8 exclusively.
				154	///
				155	/// This type defines its own destructor that uses constant stack space and
				156	/// heap space proportional to the size of the HIR.
				157	///
				158	/// The specific type of an HIR expression can be accessed via its `kind`
				159	/// or `into_kind` methods. This extra level of indirection exists for two
				160	/// reasons:
				161	///
				162	/// 1. Construction of an HIR expression must use the constructor methods
				163	/// on this `Hir` type instead of building the `HirKind` values directly.
				164	/// This permits construction to enforce invariants like "concatenations
				165	/// always consist of two or more sub-expressions."
				166	/// 2. Every HIR expression contains attributes that are defined inductively,
				167	/// and can be computed cheaply during the construction process. For
				168	/// example, one such attribute is whether the expression must match at the
				169	/// beginning of the text.
				170	///
				171	/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
				172	/// expression pattern string, and uses constant stack space and heap space
				173	/// proportional to the size of the `Hir`.
				174	#[derive(Clone, Debug, Eq, PartialEq)]
				175	pub struct Hir {
				176	/// The underlying HIR kind.
				177	kind: HirKind,
				178	/// Analysis info about this HIR, computed during construction.
				179	info: HirInfo,
				180	}
				181
				182	/// The kind of an arbitrary `Hir` expression.
				183	#[derive(Clone, Debug, Eq, PartialEq)]
				184	pub enum HirKind {
				185	/// The empty regular expression, which matches everything, including the
				186	/// empty string.
				187	Empty,
				188	/// A single literal character that matches exactly this character.
				189	Literal(Literal),
				190	/// A single character class that matches any of the characters in the
				191	/// class. A class can either consist of Unicode scalar values as
				192	/// characters, or it can use bytes.
				193	Class(Class),
				194	/// An anchor assertion. An anchor assertion match always has zero length.
				195	Anchor(Anchor),
				196	/// A word boundary assertion, which may or may not be Unicode aware. A
				197	/// word boundary assertion match always has zero length.
				198	WordBoundary(WordBoundary),
				199	/// A repetition operation applied to a child expression.
				200	Repetition(Repetition),
				201	/// A possibly capturing group, which contains a child expression.
				202	Group(Group),
				203	/// A concatenation of expressions. A concatenation always has at least two
				204	/// child expressions.
				205	///
				206	/// A concatenation matches only if each of its child expression matches
				207	/// one after the other.
				208	Concat(Vec<Hir>),
				209	/// An alternation of expressions. An alternation always has at least two
				210	/// child expressions.
				211	///
				212	/// An alternation matches only if at least one of its child expression
				213	/// matches. If multiple expressions match, then the leftmost is preferred.
				214	Alternation(Vec<Hir>),
				215	}
				216
				217	impl Hir {
				218	/// Returns a reference to the underlying HIR kind.
				219	pub fn kind(&self) -> &HirKind {
				220	&self.kind
				221	}
				222
				223	/// Consumes ownership of this HIR expression and returns its underlying
				224	/// `HirKind`.
				225	pub fn into_kind(mut self) -> HirKind {
				226	use std::mem;
				227	mem::replace(&mut self.kind, HirKind::Empty)
				228	}
				229
				230	/// Returns an empty HIR expression.
				231	///
				232	/// An empty HIR expression always matches, including the empty string.
				233	pub fn empty() -> Hir {
				234	let mut info = HirInfo::new();
				235	info.set_always_utf8(true);
				236	info.set_all_assertions(true);
				237	info.set_anchored_start(false);
				238	info.set_anchored_end(false);
				239	info.set_line_anchored_start(false);
				240	info.set_line_anchored_end(false);
				241	info.set_any_anchored_start(false);
				242	info.set_any_anchored_end(false);
				243	info.set_match_empty(true);
Haibo Huang	5b6434b	2020-05-28 20:14:37 -0700	[diff] [blame^]	244	info.set_literal(false);
				245	info.set_alternation_literal(false);
Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	246	Hir { kind: HirKind::Empty, info: info }
				247	}
				248
				249	/// Creates a literal HIR expression.
				250	///
				251	/// If the given literal has a `Byte` variant with an ASCII byte, then this
				252	/// method panics. This enforces the invariant that `Byte` variants are
				253	/// only used to express matching of invalid UTF-8.
				254	pub fn literal(lit: Literal) -> Hir {
				255	if let Literal::Byte(b) = lit {
				256	assert!(b > 0x7F);
				257	}
				258
				259	let mut info = HirInfo::new();
				260	info.set_always_utf8(lit.is_unicode());
				261	info.set_all_assertions(false);
				262	info.set_anchored_start(false);
				263	info.set_anchored_end(false);
				264	info.set_line_anchored_start(false);
				265	info.set_line_anchored_end(false);
				266	info.set_any_anchored_start(false);
				267	info.set_any_anchored_end(false);
				268	info.set_match_empty(false);
				269	info.set_literal(true);
				270	info.set_alternation_literal(true);
				271	Hir { kind: HirKind::Literal(lit), info: info }
				272	}
				273
				274	/// Creates a class HIR expression.
				275	pub fn class(class: Class) -> Hir {
				276	let mut info = HirInfo::new();
				277	info.set_always_utf8(class.is_always_utf8());
				278	info.set_all_assertions(false);
				279	info.set_anchored_start(false);
				280	info.set_anchored_end(false);
				281	info.set_line_anchored_start(false);
				282	info.set_line_anchored_end(false);
				283	info.set_any_anchored_start(false);
				284	info.set_any_anchored_end(false);
				285	info.set_match_empty(false);
				286	info.set_literal(false);
				287	info.set_alternation_literal(false);
				288	Hir { kind: HirKind::Class(class), info: info }
				289	}
				290
				291	/// Creates an anchor assertion HIR expression.
				292	pub fn anchor(anchor: Anchor) -> Hir {
				293	let mut info = HirInfo::new();
				294	info.set_always_utf8(true);
				295	info.set_all_assertions(true);
				296	info.set_anchored_start(false);
				297	info.set_anchored_end(false);
				298	info.set_line_anchored_start(false);
				299	info.set_line_anchored_end(false);
				300	info.set_any_anchored_start(false);
				301	info.set_any_anchored_end(false);
				302	info.set_match_empty(true);
				303	info.set_literal(false);
				304	info.set_alternation_literal(false);
				305	if let Anchor::StartText = anchor {
				306	info.set_anchored_start(true);
				307	info.set_line_anchored_start(true);
				308	info.set_any_anchored_start(true);
				309	}
				310	if let Anchor::EndText = anchor {
				311	info.set_anchored_end(true);
				312	info.set_line_anchored_end(true);
				313	info.set_any_anchored_end(true);
				314	}
				315	if let Anchor::StartLine = anchor {
				316	info.set_line_anchored_start(true);
				317	}
				318	if let Anchor::EndLine = anchor {
				319	info.set_line_anchored_end(true);
				320	}
				321	Hir { kind: HirKind::Anchor(anchor), info: info }
				322	}
				323
				324	/// Creates a word boundary assertion HIR expression.
				325	pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
				326	let mut info = HirInfo::new();
				327	info.set_always_utf8(true);
				328	info.set_all_assertions(true);
				329	info.set_anchored_start(false);
				330	info.set_anchored_end(false);
				331	info.set_line_anchored_start(false);
				332	info.set_line_anchored_end(false);
				333	info.set_any_anchored_start(false);
				334	info.set_any_anchored_end(false);
				335	info.set_literal(false);
				336	info.set_alternation_literal(false);
				337	// A negated word boundary matches the empty string, but a normal
				338	// word boundary does not!
				339	info.set_match_empty(word_boundary.is_negated());
				340	// Negated ASCII word boundaries can match invalid UTF-8.
				341	if let WordBoundary::AsciiNegate = word_boundary {
				342	info.set_always_utf8(false);
				343	}
				344	Hir { kind: HirKind::WordBoundary(word_boundary), info: info }
				345	}
				346
				347	/// Creates a repetition HIR expression.
				348	pub fn repetition(rep: Repetition) -> Hir {
				349	let mut info = HirInfo::new();
				350	info.set_always_utf8(rep.hir.is_always_utf8());
				351	info.set_all_assertions(rep.hir.is_all_assertions());
				352	// If this operator can match the empty string, then it can never
				353	// be anchored.
				354	info.set_anchored_start(
				355	!rep.is_match_empty() && rep.hir.is_anchored_start(),
				356	);
				357	info.set_anchored_end(
				358	!rep.is_match_empty() && rep.hir.is_anchored_end(),
				359	);
				360	info.set_line_anchored_start(
				361	!rep.is_match_empty() && rep.hir.is_anchored_start(),
				362	);
				363	info.set_line_anchored_end(
				364	!rep.is_match_empty() && rep.hir.is_anchored_end(),
				365	);
				366	info.set_any_anchored_start(rep.hir.is_any_anchored_start());
				367	info.set_any_anchored_end(rep.hir.is_any_anchored_end());
				368	info.set_match_empty(rep.is_match_empty() \|\| rep.hir.is_match_empty());
				369	info.set_literal(false);
				370	info.set_alternation_literal(false);
				371	Hir { kind: HirKind::Repetition(rep), info: info }
				372	}
				373
				374	/// Creates a group HIR expression.
				375	pub fn group(group: Group) -> Hir {
				376	let mut info = HirInfo::new();
				377	info.set_always_utf8(group.hir.is_always_utf8());
				378	info.set_all_assertions(group.hir.is_all_assertions());
				379	info.set_anchored_start(group.hir.is_anchored_start());
				380	info.set_anchored_end(group.hir.is_anchored_end());
				381	info.set_line_anchored_start(group.hir.is_line_anchored_start());
				382	info.set_line_anchored_end(group.hir.is_line_anchored_end());
				383	info.set_any_anchored_start(group.hir.is_any_anchored_start());
				384	info.set_any_anchored_end(group.hir.is_any_anchored_end());
				385	info.set_match_empty(group.hir.is_match_empty());
				386	info.set_literal(false);
				387	info.set_alternation_literal(false);
				388	Hir { kind: HirKind::Group(group), info: info }
				389	}
				390
				391	/// Returns the concatenation of the given expressions.
				392	///
				393	/// This flattens the concatenation as appropriate.
				394	pub fn concat(mut exprs: Vec<Hir>) -> Hir {
				395	match exprs.len() {
				396	0 => Hir::empty(),
				397	1 => exprs.pop().unwrap(),
				398	_ => {
				399	let mut info = HirInfo::new();
				400	info.set_always_utf8(true);
				401	info.set_all_assertions(true);
				402	info.set_any_anchored_start(false);
				403	info.set_any_anchored_end(false);
				404	info.set_match_empty(true);
				405	info.set_literal(true);
				406	info.set_alternation_literal(true);
				407
				408	// Some attributes require analyzing all sub-expressions.
				409	for e in &exprs {
				410	let x = info.is_always_utf8() && e.is_always_utf8();
				411	info.set_always_utf8(x);
				412
				413	let x = info.is_all_assertions() && e.is_all_assertions();
				414	info.set_all_assertions(x);
				415
				416	let x = info.is_any_anchored_start()
				417	\|\| e.is_any_anchored_start();
				418	info.set_any_anchored_start(x);
				419
				420	let x =
				421	info.is_any_anchored_end() \|\| e.is_any_anchored_end();
				422	info.set_any_anchored_end(x);
				423
				424	let x = info.is_match_empty() && e.is_match_empty();
				425	info.set_match_empty(x);
				426
				427	let x = info.is_literal() && e.is_literal();
				428	info.set_literal(x);
				429
				430	let x = info.is_alternation_literal()
				431	&& e.is_alternation_literal();
				432	info.set_alternation_literal(x);
				433	}
				434	// Anchored attributes require something slightly more
				435	// sophisticated. Normally, WLOG, to determine whether an
				436	// expression is anchored to the start, we'd only need to check
				437	// the first expression of a concatenation. However,
				438	// expressions like `$\b^` are still anchored to the start,
				439	// but the first expression in the concatenation isn't
				440	// anchored to the start. So the "first" expression to look at
				441	// is actually one that is either not an assertion or is
				442	// specifically the StartText assertion.
				443	info.set_anchored_start(
				444	exprs
				445	.iter()
				446	.take_while(\|e\| {
				447	e.is_anchored_start() \|\| e.is_all_assertions()
				448	})
				449	.any(\|e\| e.is_anchored_start()),
				450	);
				451	// Similarly for the end anchor, but in reverse.
				452	info.set_anchored_end(
				453	exprs
				454	.iter()
				455	.rev()
				456	.take_while(\|e\| {
				457	e.is_anchored_end() \|\| e.is_all_assertions()
				458	})
				459	.any(\|e\| e.is_anchored_end()),
				460	);
				461	// Repeat the process for line anchors.
				462	info.set_line_anchored_start(
				463	exprs
				464	.iter()
				465	.take_while(\|e\| {
				466	e.is_line_anchored_start() \|\| e.is_all_assertions()
				467	})
				468	.any(\|e\| e.is_line_anchored_start()),
				469	);
				470	info.set_line_anchored_end(
				471	exprs
				472	.iter()
				473	.rev()
				474	.take_while(\|e\| {
				475	e.is_line_anchored_end() \|\| e.is_all_assertions()
				476	})
				477	.any(\|e\| e.is_line_anchored_end()),
				478	);
				479	Hir { kind: HirKind::Concat(exprs), info: info }
				480	}
				481	}
				482	}
				483
				484	/// Returns the alternation of the given expressions.
				485	///
				486	/// This flattens the alternation as appropriate.
				487	pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
				488	match exprs.len() {
				489	0 => Hir::empty(),
				490	1 => exprs.pop().unwrap(),
				491	_ => {
				492	let mut info = HirInfo::new();
				493	info.set_always_utf8(true);
				494	info.set_all_assertions(true);
				495	info.set_anchored_start(true);
				496	info.set_anchored_end(true);
				497	info.set_line_anchored_start(true);
				498	info.set_line_anchored_end(true);
				499	info.set_any_anchored_start(false);
				500	info.set_any_anchored_end(false);
				501	info.set_match_empty(false);
				502	info.set_literal(false);
				503	info.set_alternation_literal(true);
				504
				505	// Some attributes require analyzing all sub-expressions.
				506	for e in &exprs {
				507	let x = info.is_always_utf8() && e.is_always_utf8();
				508	info.set_always_utf8(x);
				509
				510	let x = info.is_all_assertions() && e.is_all_assertions();
				511	info.set_all_assertions(x);
				512
				513	let x = info.is_anchored_start() && e.is_anchored_start();
				514	info.set_anchored_start(x);
				515
				516	let x = info.is_anchored_end() && e.is_anchored_end();
				517	info.set_anchored_end(x);
				518
				519	let x = info.is_line_anchored_start()
				520	&& e.is_line_anchored_start();
				521	info.set_line_anchored_start(x);
				522
				523	let x = info.is_line_anchored_end()
				524	&& e.is_line_anchored_end();
				525	info.set_line_anchored_end(x);
				526
				527	let x = info.is_any_anchored_start()
				528	\|\| e.is_any_anchored_start();
				529	info.set_any_anchored_start(x);
				530
				531	let x =
				532	info.is_any_anchored_end() \|\| e.is_any_anchored_end();
				533	info.set_any_anchored_end(x);
				534
				535	let x = info.is_match_empty() \|\| e.is_match_empty();
				536	info.set_match_empty(x);
				537
				538	let x = info.is_alternation_literal() && e.is_literal();
				539	info.set_alternation_literal(x);
				540	}
				541	Hir { kind: HirKind::Alternation(exprs), info: info }
				542	}
				543	}
				544	}
				545
				546	/// Build an HIR expression for `.`.
				547	///
				548	/// A `.` expression matches any character except for `\n`. To build an
				549	/// expression that matches any character, including `\n`, use the `any`
				550	/// method.
				551	///
				552	/// If `bytes` is `true`, then this assumes characters are limited to a
				553	/// single byte.
				554	pub fn dot(bytes: bool) -> Hir {
				555	if bytes {
				556	let mut cls = ClassBytes::empty();
				557	cls.push(ClassBytesRange::new(b'\0', b'\x09'));
				558	cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
				559	Hir::class(Class::Bytes(cls))
				560	} else {
				561	let mut cls = ClassUnicode::empty();
				562	cls.push(ClassUnicodeRange::new('\0', '\x09'));
				563	cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
				564	Hir::class(Class::Unicode(cls))
				565	}
				566	}
				567
				568	/// Build an HIR expression for `(?s).`.
				569	///
				570	/// A `(?s).` expression matches any character, including `\n`. To build an
				571	/// expression that matches any character except for `\n`, then use the
				572	/// `dot` method.
				573	///
				574	/// If `bytes` is `true`, then this assumes characters are limited to a
				575	/// single byte.
				576	pub fn any(bytes: bool) -> Hir {
				577	if bytes {
				578	let mut cls = ClassBytes::empty();
				579	cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
				580	Hir::class(Class::Bytes(cls))
				581	} else {
				582	let mut cls = ClassUnicode::empty();
				583	cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
				584	Hir::class(Class::Unicode(cls))
				585	}
				586	}
				587
				588	/// Return true if and only if this HIR will always match valid UTF-8.
				589	///
				590	/// When this returns false, then it is possible for this HIR expression
				591	/// to match invalid UTF-8.
				592	pub fn is_always_utf8(&self) -> bool {
				593	self.info.is_always_utf8()
				594	}
				595
				596	/// Returns true if and only if this entire HIR expression is made up of
				597	/// zero-width assertions.
				598	///
				599	/// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
				600	/// not `^a`.
				601	pub fn is_all_assertions(&self) -> bool {
				602	self.info.is_all_assertions()
				603	}
				604
				605	/// Return true if and only if this HIR is required to match from the
				606	/// beginning of text. This includes expressions like `^foo`, `^(foo\|bar)`,
				607	/// `^foo\|^bar` but not `^foo\|bar`.
				608	pub fn is_anchored_start(&self) -> bool {
				609	self.info.is_anchored_start()
				610	}
				611
				612	/// Return true if and only if this HIR is required to match at the end
				613	/// of text. This includes expressions like `foo$`, `(foo\|bar)$`,
				614	/// `foo$\|bar$` but not `foo$\|bar`.
				615	pub fn is_anchored_end(&self) -> bool {
				616	self.info.is_anchored_end()
				617	}
				618
				619	/// Return true if and only if this HIR is required to match from the
				620	/// beginning of text or the beginning of a line. This includes expressions
				621	/// like `^foo`, `(?m)^foo`, `^(foo\|bar)`, `^(foo\|bar)`, `(?m)^foo\|^bar`
				622	/// but not `^foo\|bar` or `(?m)^foo\|bar`.
				623	///
				624	/// Note that if `is_anchored_start` is `true`, then
				625	/// `is_line_anchored_start` will also be `true`. The reverse implication
				626	/// is not true. For example, `(?m)^foo` is line anchored, but not
				627	/// `is_anchored_start`.
				628	pub fn is_line_anchored_start(&self) -> bool {
				629	self.info.is_line_anchored_start()
				630	}
				631
				632	/// Return true if and only if this HIR is required to match at the
				633	/// end of text or the end of a line. This includes expressions like
				634	/// `foo$`, `(?m)foo$`, `(foo\|bar)$`, `(?m)(foo\|bar)$`, `foo$\|bar$`,
				635	/// `(?m)(foo\|bar)$`, but not `foo$\|bar` or `(?m)foo$\|bar`.
				636	///
				637	/// Note that if `is_anchored_end` is `true`, then
				638	/// `is_line_anchored_end` will also be `true`. The reverse implication
				639	/// is not true. For example, `(?m)foo$` is line anchored, but not
				640	/// `is_anchored_end`.
				641	pub fn is_line_anchored_end(&self) -> bool {
				642	self.info.is_line_anchored_end()
				643	}
				644
				645	/// Return true if and only if this HIR contains any sub-expression that
				646	/// is required to match at the beginning of text. Specifically, this
				647	/// returns true if the `^` symbol (when multiline mode is disabled) or the
				648	/// `\A` escape appear anywhere in the regex.
				649	pub fn is_any_anchored_start(&self) -> bool {
				650	self.info.is_any_anchored_start()
				651	}
				652
				653	/// Return true if and only if this HIR contains any sub-expression that is
				654	/// required to match at the end of text. Specifically, this returns true
				655	/// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
				656	/// appear anywhere in the regex.
				657	pub fn is_any_anchored_end(&self) -> bool {
				658	self.info.is_any_anchored_end()
				659	}
				660
				661	/// Return true if and only if the empty string is part of the language
				662	/// matched by this regular expression.
				663	///
				664	/// This includes `a`, `a?b`, `a{0}`, `()`, `()+`, `^$`, `a\|b?`, `\B`,
				665	/// but not `a`, `a+` or `\b`.
				666	pub fn is_match_empty(&self) -> bool {
				667	self.info.is_match_empty()
				668	}
				669
				670	/// Return true if and only if this HIR is a simple literal. This is only
				671	/// true when this HIR expression is either itself a `Literal` or a
				672	/// concatenation of only `Literal`s.
				673	///
Haibo Huang	5b6434b	2020-05-28 20:14:37 -0700	[diff] [blame^]	674	/// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`,
				675	/// `` are not (even though that contain sub-expressions that are literals).
Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	676	pub fn is_literal(&self) -> bool {
				677	self.info.is_literal()
				678	}
				679
				680	/// Return true if and only if this HIR is either a simple literal or an
				681	/// alternation of simple literals. This is only
				682	/// true when this HIR expression is either itself a `Literal` or a
				683	/// concatenation of only `Literal`s or an alternation of only `Literal`s.
				684	///
Haibo Huang	5b6434b	2020-05-28 20:14:37 -0700	[diff] [blame^]	685	/// For example, `f`, `foo`, `a\|b\|c`, and `foo\|bar\|baz` are alternation
				686	/// literals, but `f+`, `(foo)`, `foo()`, ``
Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	687	/// are not (even though that contain sub-expressions that are literals).
				688	pub fn is_alternation_literal(&self) -> bool {
				689	self.info.is_alternation_literal()
				690	}
				691	}
				692
				693	impl HirKind {
				694	/// Return true if and only if this HIR is the empty regular expression.
				695	///
				696	/// Note that this is not defined inductively. That is, it only tests if
				697	/// this kind is the `Empty` variant. To get the inductive definition,
				698	/// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
				699	pub fn is_empty(&self) -> bool {
				700	match *self {
				701	HirKind::Empty => true,
				702	_ => false,
				703	}
				704	}
				705
				706	/// Returns true if and only if this kind has any (including possibly
				707	/// empty) subexpressions.
				708	pub fn has_subexprs(&self) -> bool {
				709	match *self {
				710	HirKind::Empty
				711	\| HirKind::Literal(_)
				712	\| HirKind::Class(_)
				713	\| HirKind::Anchor(_)
				714	\| HirKind::WordBoundary(_) => false,
				715	HirKind::Group(_)
				716	\| HirKind::Repetition(_)
				717	\| HirKind::Concat(_)
				718	\| HirKind::Alternation(_) => true,
				719	}
				720	}
				721	}
				722
				723	/// Print a display representation of this Hir.
				724	///
				725	/// The result of this is a valid regular expression pattern string.
				726	///
				727	/// This implementation uses constant stack space and heap space proportional
				728	/// to the size of the `Hir`.
				729	impl fmt::Display for Hir {
				730	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				731	use hir::print::Printer;
				732	Printer::new().print(self, f)
				733	}
				734	}
				735
				736	/// The high-level intermediate representation of a literal.
				737	///
				738	/// A literal corresponds to a single character, where a character is either
				739	/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
				740	/// are preferred whenever possible. In particular, a `Byte` variant is only
				741	/// ever produced when it could match invalid UTF-8.
				742	#[derive(Clone, Debug, Eq, PartialEq)]
				743	pub enum Literal {
				744	/// A single character represented by a Unicode scalar value.
				745	Unicode(char),
				746	/// A single character represented by an arbitrary byte.
				747	Byte(u8),
				748	}
				749
				750	impl Literal {
				751	/// Returns true if and only if this literal corresponds to a Unicode
				752	/// scalar value.
				753	pub fn is_unicode(&self) -> bool {
				754	match *self {
				755	Literal::Unicode(_) => true,
				756	Literal::Byte(b) if b <= 0x7F => true,
				757	Literal::Byte(_) => false,
				758	}
				759	}
				760	}
				761
				762	/// The high-level intermediate representation of a character class.
				763	///
				764	/// A character class corresponds to a set of characters. A character is either
				765	/// defined by a Unicode scalar value or a byte. Unicode characters are used
				766	/// by default, while bytes are used when Unicode mode (via the `u` flag) is
				767	/// disabled.
				768	///
				769	/// A character class, regardless of its character type, is represented by a
				770	/// sequence of non-overlapping non-adjacent ranges of characters.
				771	///
				772	/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
				773	/// be produced even when it exclusively matches valid UTF-8. This is because
				774	/// a `Bytes` variant represents an intention by the author of the regular
				775	/// expression to disable Unicode mode, which in turn impacts the semantics of
				776	/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
				777	/// match the same set of strings.
				778	#[derive(Clone, Debug, Eq, PartialEq)]
				779	pub enum Class {
				780	/// A set of characters represented by Unicode scalar values.
				781	Unicode(ClassUnicode),
				782	/// A set of characters represented by arbitrary bytes (one byte per
				783	/// character).
				784	Bytes(ClassBytes),
				785	}
				786
				787	impl Class {
				788	/// Apply Unicode simple case folding to this character class, in place.
				789	/// The character class will be expanded to include all simple case folded
				790	/// character variants.
				791	///
				792	/// If this is a byte oriented character class, then this will be limited
				793	/// to the ASCII ranges `A-Z` and `a-z`.
				794	pub fn case_fold_simple(&mut self) {
				795	match *self {
				796	Class::Unicode(ref mut x) => x.case_fold_simple(),
				797	Class::Bytes(ref mut x) => x.case_fold_simple(),
				798	}
				799	}
				800
				801	/// Negate this character class in place.
				802	///
				803	/// After completion, this character class will contain precisely the
				804	/// characters that weren't previously in the class.
				805	pub fn negate(&mut self) {
				806	match *self {
				807	Class::Unicode(ref mut x) => x.negate(),
				808	Class::Bytes(ref mut x) => x.negate(),
				809	}
				810	}
				811
				812	/// Returns true if and only if this character class will only ever match
				813	/// valid UTF-8.
				814	///
				815	/// A character class can match invalid UTF-8 only when the following
				816	/// conditions are met:
				817	///
				818	/// 1. The translator was configured to permit generating an expression
				819	/// that can match invalid UTF-8. (By default, this is disabled.)
				820	/// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
				821	/// syntax or in the parser builder. By default, Unicode mode is
				822	/// enabled.
				823	pub fn is_always_utf8(&self) -> bool {
				824	match *self {
				825	Class::Unicode(_) => true,
				826	Class::Bytes(ref x) => x.is_all_ascii(),
				827	}
				828	}
				829	}
				830
				831	/// A set of characters represented by Unicode scalar values.
				832	#[derive(Clone, Debug, Eq, PartialEq)]
				833	pub struct ClassUnicode {
				834	set: IntervalSet<ClassUnicodeRange>,
				835	}
				836
				837	impl ClassUnicode {
				838	/// Create a new class from a sequence of ranges.
				839	///
				840	/// The given ranges do not need to be in any specific order, and ranges
				841	/// may overlap.
				842	pub fn new<I>(ranges: I) -> ClassUnicode
				843	where
				844	I: IntoIterator<Item = ClassUnicodeRange>,
				845	{
				846	ClassUnicode { set: IntervalSet::new(ranges) }
				847	}
				848
				849	/// Create a new class with no ranges.
				850	pub fn empty() -> ClassUnicode {
				851	ClassUnicode::new(vec![])
				852	}
				853
				854	/// Add a new range to this set.
				855	pub fn push(&mut self, range: ClassUnicodeRange) {
				856	self.set.push(range);
				857	}
				858
				859	/// Return an iterator over all ranges in this class.
				860	///
				861	/// The iterator yields ranges in ascending order.
				862	pub fn iter(&self) -> ClassUnicodeIter {
				863	ClassUnicodeIter(self.set.iter())
				864	}
				865
				866	/// Return the underlying ranges as a slice.
				867	pub fn ranges(&self) -> &[ClassUnicodeRange] {
				868	self.set.intervals()
				869	}
				870
				871	/// Expand this character class such that it contains all case folded
				872	/// characters, according to Unicode's "simple" mapping. For example, if
				873	/// this class consists of the range `a-z`, then applying case folding will
				874	/// result in the class containing both the ranges `a-z` and `A-Z`.
				875	///
				876	/// # Panics
				877	///
				878	/// This routine panics when the case mapping data necessary for this
				879	/// routine to complete is unavailable. This occurs when the `unicode-case`
				880	/// feature is not enabled.
				881	///
				882	/// Callers should prefer using `try_case_fold_simple` instead, which will
				883	/// return an error instead of panicking.
				884	pub fn case_fold_simple(&mut self) {
				885	self.set
				886	.case_fold_simple()
				887	.expect("unicode-case feature must be enabled");
				888	}
				889
				890	/// Expand this character class such that it contains all case folded
				891	/// characters, according to Unicode's "simple" mapping. For example, if
				892	/// this class consists of the range `a-z`, then applying case folding will
				893	/// result in the class containing both the ranges `a-z` and `A-Z`.
				894	///
				895	/// # Error
				896	///
				897	/// This routine returns an error when the case mapping data necessary
				898	/// for this routine to complete is unavailable. This occurs when the
				899	/// `unicode-case` feature is not enabled.
				900	pub fn try_case_fold_simple(
				901	&mut self,
				902	) -> result::Result<(), CaseFoldError> {
				903	self.set.case_fold_simple()
				904	}
				905
				906	/// Negate this character class.
				907	///
				908	/// For all `c` where `c` is a Unicode scalar value, if `c` was in this
				909	/// set, then it will not be in this set after negation.
				910	pub fn negate(&mut self) {
				911	self.set.negate();
				912	}
				913
				914	/// Union this character class with the given character class, in place.
				915	pub fn union(&mut self, other: &ClassUnicode) {
				916	self.set.union(&other.set);
				917	}
				918
				919	/// Intersect this character class with the given character class, in
				920	/// place.
				921	pub fn intersect(&mut self, other: &ClassUnicode) {
				922	self.set.intersect(&other.set);
				923	}
				924
				925	/// Subtract the given character class from this character class, in place.
				926	pub fn difference(&mut self, other: &ClassUnicode) {
				927	self.set.difference(&other.set);
				928	}
				929
				930	/// Compute the symmetric difference of the given character classes, in
				931	/// place.
				932	///
				933	/// This computes the symmetric difference of two character classes. This
				934	/// removes all elements in this class that are also in the given class,
				935	/// but all adds all elements from the given class that aren't in this
				936	/// class. That is, the class will contain all elements in either class,
				937	/// but will not contain any elements that are in both classes.
				938	pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
				939	self.set.symmetric_difference(&other.set);
				940	}
				941
				942	/// Returns true if and only if this character class will either match
				943	/// nothing or only ASCII bytes. Stated differently, this returns false
				944	/// if and only if this class contains a non-ASCII codepoint.
				945	pub fn is_all_ascii(&self) -> bool {
				946	self.set.intervals().last().map_or(true, \|r\| r.end <= '\x7F')
				947	}
				948	}
				949
				950	/// An iterator over all ranges in a Unicode character class.
				951	///
				952	/// The lifetime `'a` refers to the lifetime of the underlying class.
				953	#[derive(Debug)]
				954	pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
				955
				956	impl<'a> Iterator for ClassUnicodeIter<'a> {
				957	type Item = &'a ClassUnicodeRange;
				958
				959	fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
				960	self.0.next()
				961	}
				962	}
				963
				964	/// A single range of characters represented by Unicode scalar values.
				965	///
				966	/// The range is closed. That is, the start and end of the range are included
				967	/// in the range.
				968	#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
				969	pub struct ClassUnicodeRange {
				970	start: char,
				971	end: char,
				972	}
				973
				974	impl fmt::Debug for ClassUnicodeRange {
				975	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				976	let start = if !self.start.is_whitespace() && !self.start.is_control()
				977	{
				978	self.start.to_string()
				979	} else {
				980	format!("0x{:X}", self.start as u32)
				981	};
				982	let end = if !self.end.is_whitespace() && !self.end.is_control() {
				983	self.end.to_string()
				984	} else {
				985	format!("0x{:X}", self.end as u32)
				986	};
				987	f.debug_struct("ClassUnicodeRange")
				988	.field("start", &start)
				989	.field("end", &end)
				990	.finish()
				991	}
				992	}
				993
				994	impl Interval for ClassUnicodeRange {
				995	type Bound = char;
				996
				997	#[inline]
				998	fn lower(&self) -> char {
				999	self.start
				1000	}
				1001	#[inline]
				1002	fn upper(&self) -> char {
				1003	self.end
				1004	}
				1005	#[inline]
				1006	fn set_lower(&mut self, bound: char) {
				1007	self.start = bound;
				1008	}
				1009	#[inline]
				1010	fn set_upper(&mut self, bound: char) {
				1011	self.end = bound;
				1012	}
				1013
				1014	/// Apply simple case folding to this Unicode scalar value range.
				1015	///
				1016	/// Additional ranges are appended to the given vector. Canonical ordering
				1017	/// is not maintained in the given vector.
				1018	fn case_fold_simple(
				1019	&self,
				1020	ranges: &mut Vec<ClassUnicodeRange>,
				1021	) -> Result<(), unicode::CaseFoldError> {
				1022	if !unicode::contains_simple_case_mapping(self.start, self.end)? {
				1023	return Ok(());
				1024	}
				1025	let start = self.start as u32;
				1026	let end = (self.end as u32).saturating_add(1);
				1027	let mut next_simple_cp = None;
				1028	for cp in (start..end).filter_map(char::from_u32) {
				1029	if next_simple_cp.map_or(false, \|next\| cp < next) {
				1030	continue;
				1031	}
				1032	let it = match unicode::simple_fold(cp)? {
				1033	Ok(it) => it,
				1034	Err(next) => {
				1035	next_simple_cp = next;
				1036	continue;
				1037	}
				1038	};
				1039	for cp_folded in it {
				1040	ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
				1041	}
				1042	}
				1043	Ok(())
				1044	}
				1045	}
				1046
				1047	impl ClassUnicodeRange {
				1048	/// Create a new Unicode scalar value range for a character class.
				1049	///
				1050	/// The returned range is always in a canonical form. That is, the range
				1051	/// returned always satisfies the invariant that `start <= end`.
				1052	pub fn new(start: char, end: char) -> ClassUnicodeRange {
				1053	ClassUnicodeRange::create(start, end)
				1054	}
				1055
				1056	/// Return the start of this range.
				1057	///
				1058	/// The start of a range is always less than or equal to the end of the
				1059	/// range.
				1060	pub fn start(&self) -> char {
				1061	self.start
				1062	}
				1063
				1064	/// Return the end of this range.
				1065	///
				1066	/// The end of a range is always greater than or equal to the start of the
				1067	/// range.
				1068	pub fn end(&self) -> char {
				1069	self.end
				1070	}
				1071	}
				1072
				1073	/// A set of characters represented by arbitrary bytes (where one byte
				1074	/// corresponds to one character).
				1075	#[derive(Clone, Debug, Eq, PartialEq)]
				1076	pub struct ClassBytes {
				1077	set: IntervalSet<ClassBytesRange>,
				1078	}
				1079
				1080	impl ClassBytes {
				1081	/// Create a new class from a sequence of ranges.
				1082	///
				1083	/// The given ranges do not need to be in any specific order, and ranges
				1084	/// may overlap.
				1085	pub fn new<I>(ranges: I) -> ClassBytes
				1086	where
				1087	I: IntoIterator<Item = ClassBytesRange>,
				1088	{
				1089	ClassBytes { set: IntervalSet::new(ranges) }
				1090	}
				1091
				1092	/// Create a new class with no ranges.
				1093	pub fn empty() -> ClassBytes {
				1094	ClassBytes::new(vec![])
				1095	}
				1096
				1097	/// Add a new range to this set.
				1098	pub fn push(&mut self, range: ClassBytesRange) {
				1099	self.set.push(range);
				1100	}
				1101
				1102	/// Return an iterator over all ranges in this class.
				1103	///
				1104	/// The iterator yields ranges in ascending order.
				1105	pub fn iter(&self) -> ClassBytesIter {
				1106	ClassBytesIter(self.set.iter())
				1107	}
				1108
				1109	/// Return the underlying ranges as a slice.
				1110	pub fn ranges(&self) -> &[ClassBytesRange] {
				1111	self.set.intervals()
				1112	}
				1113
				1114	/// Expand this character class such that it contains all case folded
				1115	/// characters. For example, if this class consists of the range `a-z`,
				1116	/// then applying case folding will result in the class containing both the
				1117	/// ranges `a-z` and `A-Z`.
				1118	///
				1119	/// Note that this only applies ASCII case folding, which is limited to the
				1120	/// characters `a-z` and `A-Z`.
				1121	pub fn case_fold_simple(&mut self) {
				1122	self.set.case_fold_simple().expect("ASCII case folding never fails");
				1123	}
				1124
				1125	/// Negate this byte class.
				1126	///
				1127	/// For all `b` where `b` is a any byte, if `b` was in this set, then it
				1128	/// will not be in this set after negation.
				1129	pub fn negate(&mut self) {
				1130	self.set.negate();
				1131	}
				1132
				1133	/// Union this byte class with the given byte class, in place.
				1134	pub fn union(&mut self, other: &ClassBytes) {
				1135	self.set.union(&other.set);
				1136	}
				1137
				1138	/// Intersect this byte class with the given byte class, in place.
				1139	pub fn intersect(&mut self, other: &ClassBytes) {
				1140	self.set.intersect(&other.set);
				1141	}
				1142
				1143	/// Subtract the given byte class from this byte class, in place.
				1144	pub fn difference(&mut self, other: &ClassBytes) {
				1145	self.set.difference(&other.set);
				1146	}
				1147
				1148	/// Compute the symmetric difference of the given byte classes, in place.
				1149	///
				1150	/// This computes the symmetric difference of two byte classes. This
				1151	/// removes all elements in this class that are also in the given class,
				1152	/// but all adds all elements from the given class that aren't in this
				1153	/// class. That is, the class will contain all elements in either class,
				1154	/// but will not contain any elements that are in both classes.
				1155	pub fn symmetric_difference(&mut self, other: &ClassBytes) {
				1156	self.set.symmetric_difference(&other.set);
				1157	}
				1158
				1159	/// Returns true if and only if this character class will either match
				1160	/// nothing or only ASCII bytes. Stated differently, this returns false
				1161	/// if and only if this class contains a non-ASCII byte.
				1162	pub fn is_all_ascii(&self) -> bool {
				1163	self.set.intervals().last().map_or(true, \|r\| r.end <= 0x7F)
				1164	}
				1165	}
				1166
				1167	/// An iterator over all ranges in a byte character class.
				1168	///
				1169	/// The lifetime `'a` refers to the lifetime of the underlying class.
				1170	#[derive(Debug)]
				1171	pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
				1172
				1173	impl<'a> Iterator for ClassBytesIter<'a> {
				1174	type Item = &'a ClassBytesRange;
				1175
				1176	fn next(&mut self) -> Option<&'a ClassBytesRange> {
				1177	self.0.next()
				1178	}
				1179	}
				1180
				1181	/// A single range of characters represented by arbitrary bytes.
				1182	///
				1183	/// The range is closed. That is, the start and end of the range are included
				1184	/// in the range.
				1185	#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
				1186	pub struct ClassBytesRange {
				1187	start: u8,
				1188	end: u8,
				1189	}
				1190
				1191	impl Interval for ClassBytesRange {
				1192	type Bound = u8;
				1193
				1194	#[inline]
				1195	fn lower(&self) -> u8 {
				1196	self.start
				1197	}
				1198	#[inline]
				1199	fn upper(&self) -> u8 {
				1200	self.end
				1201	}
				1202	#[inline]
				1203	fn set_lower(&mut self, bound: u8) {
				1204	self.start = bound;
				1205	}
				1206	#[inline]
				1207	fn set_upper(&mut self, bound: u8) {
				1208	self.end = bound;
				1209	}
				1210
				1211	/// Apply simple case folding to this byte range. Only ASCII case mappings
				1212	/// (for a-z) are applied.
				1213	///
				1214	/// Additional ranges are appended to the given vector. Canonical ordering
				1215	/// is not maintained in the given vector.
				1216	fn case_fold_simple(
				1217	&self,
				1218	ranges: &mut Vec<ClassBytesRange>,
				1219	) -> Result<(), unicode::CaseFoldError> {
				1220	if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
				1221	let lower = cmp::max(self.start, b'a');
				1222	let upper = cmp::min(self.end, b'z');
				1223	ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
				1224	}
				1225	if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
				1226	let lower = cmp::max(self.start, b'A');
				1227	let upper = cmp::min(self.end, b'Z');
				1228	ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
				1229	}
				1230	Ok(())
				1231	}
				1232	}
				1233
				1234	impl ClassBytesRange {
				1235	/// Create a new byte range for a character class.
				1236	///
				1237	/// The returned range is always in a canonical form. That is, the range
				1238	/// returned always satisfies the invariant that `start <= end`.
				1239	pub fn new(start: u8, end: u8) -> ClassBytesRange {
				1240	ClassBytesRange::create(start, end)
				1241	}
				1242
				1243	/// Return the start of this range.
				1244	///
				1245	/// The start of a range is always less than or equal to the end of the
				1246	/// range.
				1247	pub fn start(&self) -> u8 {
				1248	self.start
				1249	}
				1250
				1251	/// Return the end of this range.
				1252	///
				1253	/// The end of a range is always greater than or equal to the start of the
				1254	/// range.
				1255	pub fn end(&self) -> u8 {
				1256	self.end
				1257	}
				1258	}
				1259
				1260	impl fmt::Debug for ClassBytesRange {
				1261	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				1262	let mut debug = f.debug_struct("ClassBytesRange");
				1263	if self.start <= 0x7F {
				1264	debug.field("start", &(self.start as char));
				1265	} else {
				1266	debug.field("start", &self.start);
				1267	}
				1268	if self.end <= 0x7F {
				1269	debug.field("end", &(self.end as char));
				1270	} else {
				1271	debug.field("end", &self.end);
				1272	}
				1273	debug.finish()
				1274	}
				1275	}
				1276
				1277	/// The high-level intermediate representation for an anchor assertion.
				1278	///
				1279	/// A matching anchor assertion is always zero-length.
				1280	#[derive(Clone, Debug, Eq, PartialEq)]
				1281	pub enum Anchor {
				1282	/// Match the beginning of a line or the beginning of text. Specifically,
				1283	/// this matches at the starting position of the input, or at the position
				1284	/// immediately following a `\n` character.
				1285	StartLine,
				1286	/// Match the end of a line or the end of text. Specifically,
				1287	/// this matches at the end position of the input, or at the position
				1288	/// immediately preceding a `\n` character.
				1289	EndLine,
				1290	/// Match the beginning of text. Specifically, this matches at the starting
				1291	/// position of the input.
				1292	StartText,
				1293	/// Match the end of text. Specifically, this matches at the ending
				1294	/// position of the input.
				1295	EndText,
				1296	}
				1297
				1298	/// The high-level intermediate representation for a word-boundary assertion.
				1299	///
				1300	/// A matching word boundary assertion is always zero-length.
				1301	#[derive(Clone, Debug, Eq, PartialEq)]
				1302	pub enum WordBoundary {
				1303	/// Match a Unicode-aware word boundary. That is, this matches a position
				1304	/// where the left adjacent character and right adjacent character
				1305	/// correspond to a word and non-word or a non-word and word character.
				1306	Unicode,
				1307	/// Match a Unicode-aware negation of a word boundary.
				1308	UnicodeNegate,
				1309	/// Match an ASCII-only word boundary. That is, this matches a position
				1310	/// where the left adjacent character and right adjacent character
				1311	/// correspond to a word and non-word or a non-word and word character.
				1312	Ascii,
				1313	/// Match an ASCII-only negation of a word boundary.
				1314	AsciiNegate,
				1315	}
				1316
				1317	impl WordBoundary {
				1318	/// Returns true if and only if this word boundary assertion is negated.
				1319	pub fn is_negated(&self) -> bool {
				1320	match *self {
				1321	WordBoundary::Unicode \| WordBoundary::Ascii => false,
				1322	WordBoundary::UnicodeNegate \| WordBoundary::AsciiNegate => true,
				1323	}
				1324	}
				1325	}
				1326
				1327	/// The high-level intermediate representation for a group.
				1328	///
				1329	/// This represents one of three possible group types:
				1330	///
				1331	/// 1. A non-capturing group (e.g., `(?:expr)`).
				1332	/// 2. A capturing group (e.g., `(expr)`).
				1333	/// 3. A named capturing group (e.g., `(?P<name>expr)`).
				1334	#[derive(Clone, Debug, Eq, PartialEq)]
				1335	pub struct Group {
				1336	/// The kind of this group. If it is a capturing group, then the kind
				1337	/// contains the capture group index (and the name, if it is a named
				1338	/// group).
				1339	pub kind: GroupKind,
				1340	/// The expression inside the capturing group, which may be empty.
				1341	pub hir: Box<Hir>,
				1342	}
				1343
				1344	/// The kind of group.
				1345	#[derive(Clone, Debug, Eq, PartialEq)]
				1346	pub enum GroupKind {
				1347	/// A normal unnamed capturing group.
				1348	///
				1349	/// The value is the capture index of the group.
				1350	CaptureIndex(u32),
				1351	/// A named capturing group.
				1352	CaptureName {
				1353	/// The name of the group.
				1354	name: String,
				1355	/// The capture index of the group.
				1356	index: u32,
				1357	},
				1358	/// A non-capturing group.
				1359	NonCapturing,
				1360	}
				1361
				1362	/// The high-level intermediate representation of a repetition operator.
				1363	///
				1364	/// A repetition operator permits the repetition of an arbitrary
				1365	/// sub-expression.
				1366	#[derive(Clone, Debug, Eq, PartialEq)]
				1367	pub struct Repetition {
				1368	/// The kind of this repetition operator.
				1369	pub kind: RepetitionKind,
				1370	/// Whether this repetition operator is greedy or not. A greedy operator
				1371	/// will match as much as it can. A non-greedy operator will match as
				1372	/// little as it can.
				1373	///
				1374	/// Typically, operators are greedy by default and are only non-greedy when
				1375	/// a `?` suffix is used, e.g., `(expr)` is greedy while `(expr)?` is
				1376	/// not. However, this can be inverted via the `U` "ungreedy" flag.
				1377	pub greedy: bool,
				1378	/// The expression being repeated.
				1379	pub hir: Box<Hir>,
				1380	}
				1381
				1382	impl Repetition {
				1383	/// Returns true if and only if this repetition operator makes it possible
				1384	/// to match the empty string.
				1385	///
				1386	/// Note that this is not defined inductively. For example, while `a*`
				1387	/// will report `true`, `()+` will not, even though `()` matches the empty
				1388	/// string and one or more occurrences of something that matches the empty
				1389	/// string will always match the empty string. In order to get the
				1390	/// inductive definition, see the corresponding method on
				1391	/// [`Hir`](struct.Hir.html).
				1392	pub fn is_match_empty(&self) -> bool {
				1393	match self.kind {
				1394	RepetitionKind::ZeroOrOne => true,
				1395	RepetitionKind::ZeroOrMore => true,
				1396	RepetitionKind::OneOrMore => false,
				1397	RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
				1398	RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
				1399	RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
				1400	}
				1401	}
				1402	}
				1403
				1404	/// The kind of a repetition operator.
				1405	#[derive(Clone, Debug, Eq, PartialEq)]
				1406	pub enum RepetitionKind {
				1407	/// Matches a sub-expression zero or one times.
				1408	ZeroOrOne,
				1409	/// Matches a sub-expression zero or more times.
				1410	ZeroOrMore,
				1411	/// Matches a sub-expression one or more times.
				1412	OneOrMore,
				1413	/// Matches a sub-expression within a bounded range of times.
				1414	Range(RepetitionRange),
				1415	}
				1416
				1417	/// The kind of a counted repetition operator.
				1418	#[derive(Clone, Debug, Eq, PartialEq)]
				1419	pub enum RepetitionRange {
				1420	/// Matches a sub-expression exactly this many times.
				1421	Exactly(u32),
				1422	/// Matches a sub-expression at least this many times.
				1423	AtLeast(u32),
				1424	/// Matches a sub-expression at least `m` times and at most `n` times.
				1425	Bounded(u32, u32),
				1426	}
				1427
				1428	/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
				1429	/// space but heap space proportional to the depth of the total `Hir`.
				1430	impl Drop for Hir {
				1431	fn drop(&mut self) {
				1432	use std::mem;
				1433
				1434	match *self.kind() {
				1435	HirKind::Empty
				1436	\| HirKind::Literal(_)
				1437	\| HirKind::Class(_)
				1438	\| HirKind::Anchor(_)
				1439	\| HirKind::WordBoundary(_) => return,
				1440	HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
				1441	HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
				1442	HirKind::Concat(ref x) if x.is_empty() => return,
				1443	HirKind::Alternation(ref x) if x.is_empty() => return,
				1444	_ => {}
				1445	}
				1446
				1447	let mut stack = vec![mem::replace(self, Hir::empty())];
				1448	while let Some(mut expr) = stack.pop() {
				1449	match expr.kind {
				1450	HirKind::Empty
				1451	\| HirKind::Literal(_)
				1452	\| HirKind::Class(_)
				1453	\| HirKind::Anchor(_)
				1454	\| HirKind::WordBoundary(_) => {}
				1455	HirKind::Group(ref mut x) => {
				1456	stack.push(mem::replace(&mut x.hir, Hir::empty()));
				1457	}
				1458	HirKind::Repetition(ref mut x) => {
				1459	stack.push(mem::replace(&mut x.hir, Hir::empty()));
				1460	}
				1461	HirKind::Concat(ref mut x) => {
				1462	stack.extend(x.drain(..));
				1463	}
				1464	HirKind::Alternation(ref mut x) => {
				1465	stack.extend(x.drain(..));
				1466	}
				1467	}
				1468	}
				1469	}
				1470	}
				1471
				1472	/// A type that documents various attributes of an HIR expression.
				1473	///
				1474	/// These attributes are typically defined inductively on the HIR.
				1475	#[derive(Clone, Debug, Eq, PartialEq)]
				1476	struct HirInfo {
				1477	/// Represent yes/no questions by a bitfield to conserve space, since
				1478	/// this is included in every HIR expression.
				1479	///
				1480	/// If more attributes need to be added, it is OK to increase the size of
				1481	/// this as appropriate.
				1482	bools: u16,
				1483	}
				1484
				1485	// A simple macro for defining bitfield accessors/mutators.
				1486	macro_rules! define_bool {
				1487	($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
				1488	fn $is_fn_name(&self) -> bool {
				1489	self.bools & (0b1 << $bit) > 0
				1490	}
				1491
				1492	fn $set_fn_name(&mut self, yes: bool) {
				1493	if yes {
				1494	self.bools \|= 1 << $bit;
				1495	} else {
				1496	self.bools &= !(1 << $bit);
				1497	}
				1498	}
Haibo Huang	5b6434b	2020-05-28 20:14:37 -0700	[diff] [blame^]	1499	};
Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	1500	}
				1501
				1502	impl HirInfo {
				1503	fn new() -> HirInfo {
				1504	HirInfo { bools: 0 }
				1505	}
				1506
				1507	define_bool!(0, is_always_utf8, set_always_utf8);
				1508	define_bool!(1, is_all_assertions, set_all_assertions);
				1509	define_bool!(2, is_anchored_start, set_anchored_start);
				1510	define_bool!(3, is_anchored_end, set_anchored_end);
				1511	define_bool!(4, is_line_anchored_start, set_line_anchored_start);
				1512	define_bool!(5, is_line_anchored_end, set_line_anchored_end);
				1513	define_bool!(6, is_any_anchored_start, set_any_anchored_start);
				1514	define_bool!(7, is_any_anchored_end, set_any_anchored_end);
				1515	define_bool!(8, is_match_empty, set_match_empty);
				1516	define_bool!(9, is_literal, set_literal);
				1517	define_bool!(10, is_alternation_literal, set_alternation_literal);
				1518	}
				1519
				1520	#[cfg(test)]
				1521	mod tests {
				1522	use super::*;
				1523
				1524	fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
				1525	let ranges: Vec<ClassUnicodeRange> = ranges
				1526	.iter()
				1527	.map(\|&(s, e)\| ClassUnicodeRange::new(s, e))
				1528	.collect();
				1529	ClassUnicode::new(ranges)
				1530	}
				1531
				1532	fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
				1533	let ranges: Vec<ClassBytesRange> =
				1534	ranges.iter().map(\|&(s, e)\| ClassBytesRange::new(s, e)).collect();
				1535	ClassBytes::new(ranges)
				1536	}
				1537
				1538	fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
				1539	cls.iter().map(\|x\| (x.start(), x.end())).collect()
				1540	}
				1541
				1542	#[cfg(feature = "unicode-case")]
				1543	fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
				1544	let mut cls_ = cls.clone();
				1545	cls_.case_fold_simple();
				1546	cls_
				1547	}
				1548
				1549	fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
				1550	let mut cls_ = cls1.clone();
				1551	cls_.union(cls2);
				1552	cls_
				1553	}
				1554
				1555	fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
				1556	let mut cls_ = cls1.clone();
				1557	cls_.intersect(cls2);
				1558	cls_
				1559	}
				1560
				1561	fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
				1562	let mut cls_ = cls1.clone();
				1563	cls_.difference(cls2);
				1564	cls_
				1565	}
				1566
				1567	fn usymdifference(
				1568	cls1: &ClassUnicode,
				1569	cls2: &ClassUnicode,
				1570	) -> ClassUnicode {
				1571	let mut cls_ = cls1.clone();
				1572	cls_.symmetric_difference(cls2);
				1573	cls_
				1574	}
				1575
				1576	fn unegate(cls: &ClassUnicode) -> ClassUnicode {
				1577	let mut cls_ = cls.clone();
				1578	cls_.negate();
				1579	cls_
				1580	}
				1581
				1582	fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
				1583	cls.iter().map(\|x\| (x.start(), x.end())).collect()
				1584	}
				1585
				1586	fn bcasefold(cls: &ClassBytes) -> ClassBytes {
				1587	let mut cls_ = cls.clone();
				1588	cls_.case_fold_simple();
				1589	cls_
				1590	}
				1591
				1592	fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
				1593	let mut cls_ = cls1.clone();
				1594	cls_.union(cls2);
				1595	cls_
				1596	}
				1597
				1598	fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
				1599	let mut cls_ = cls1.clone();
				1600	cls_.intersect(cls2);
				1601	cls_
				1602	}
				1603
				1604	fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
				1605	let mut cls_ = cls1.clone();
				1606	cls_.difference(cls2);
				1607	cls_
				1608	}
				1609
				1610	fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
				1611	let mut cls_ = cls1.clone();
				1612	cls_.symmetric_difference(cls2);
				1613	cls_
				1614	}
				1615
				1616	fn bnegate(cls: &ClassBytes) -> ClassBytes {
				1617	let mut cls_ = cls.clone();
				1618	cls_.negate();
				1619	cls_
				1620	}
				1621
				1622	#[test]
				1623	fn class_range_canonical_unicode() {
				1624	let range = ClassUnicodeRange::new('\u{00FF}', '\0');
				1625	assert_eq!('\0', range.start());
				1626	assert_eq!('\u{00FF}', range.end());
				1627	}
				1628
				1629	#[test]
				1630	fn class_range_canonical_bytes() {
				1631	let range = ClassBytesRange::new(b'\xFF', b'\0');
				1632	assert_eq!(b'\0', range.start());
				1633	assert_eq!(b'\xFF', range.end());
				1634	}
				1635
				1636	#[test]
				1637	fn class_canonicalize_unicode() {
				1638	let cls = uclass(&[('a', 'c'), ('x', 'z')]);
				1639	let expected = vec![('a', 'c'), ('x', 'z')];
				1640	assert_eq!(expected, uranges(&cls));
				1641
				1642	let cls = uclass(&[('x', 'z'), ('a', 'c')]);
				1643	let expected = vec![('a', 'c'), ('x', 'z')];
				1644	assert_eq!(expected, uranges(&cls));
				1645
				1646	let cls = uclass(&[('x', 'z'), ('w', 'y')]);
				1647	let expected = vec![('w', 'z')];
				1648	assert_eq!(expected, uranges(&cls));
				1649
				1650	let cls = uclass(&[
				1651	('c', 'f'),
				1652	('a', 'g'),
				1653	('d', 'j'),
				1654	('a', 'c'),
				1655	('m', 'p'),
				1656	('l', 's'),
				1657	]);
				1658	let expected = vec![('a', 'j'), ('l', 's')];
				1659	assert_eq!(expected, uranges(&cls));
				1660
				1661	let cls = uclass(&[('x', 'z'), ('u', 'w')]);
				1662	let expected = vec![('u', 'z')];
				1663	assert_eq!(expected, uranges(&cls));
				1664
				1665	let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
				1666	let expected = vec![('\x00', '\u{10FFFF}')];
				1667	assert_eq!(expected, uranges(&cls));
				1668
				1669	let cls = uclass(&[('a', 'a'), ('b', 'b')]);
				1670	let expected = vec![('a', 'b')];
				1671	assert_eq!(expected, uranges(&cls));
				1672	}
				1673
				1674	#[test]
				1675	fn class_canonicalize_bytes() {
				1676	let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
				1677	let expected = vec![(b'a', b'c'), (b'x', b'z')];
				1678	assert_eq!(expected, branges(&cls));
				1679
				1680	let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
				1681	let expected = vec![(b'a', b'c'), (b'x', b'z')];
				1682	assert_eq!(expected, branges(&cls));
				1683
				1684	let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
				1685	let expected = vec![(b'w', b'z')];
				1686	assert_eq!(expected, branges(&cls));
				1687
				1688	let cls = bclass(&[
				1689	(b'c', b'f'),
				1690	(b'a', b'g'),
				1691	(b'd', b'j'),
				1692	(b'a', b'c'),
				1693	(b'm', b'p'),
				1694	(b'l', b's'),
				1695	]);
				1696	let expected = vec![(b'a', b'j'), (b'l', b's')];
				1697	assert_eq!(expected, branges(&cls));
				1698
				1699	let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
				1700	let expected = vec![(b'u', b'z')];
				1701	assert_eq!(expected, branges(&cls));
				1702
				1703	let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
				1704	let expected = vec![(b'\x00', b'\xFF')];
				1705	assert_eq!(expected, branges(&cls));
				1706
				1707	let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
				1708	let expected = vec![(b'a', b'b')];
				1709	assert_eq!(expected, branges(&cls));
				1710	}
				1711
				1712	#[test]
				1713	#[cfg(feature = "unicode-case")]
				1714	fn class_case_fold_unicode() {
				1715	let cls = uclass(&[
				1716	('C', 'F'),
				1717	('A', 'G'),
				1718	('D', 'J'),
				1719	('A', 'C'),
				1720	('M', 'P'),
				1721	('L', 'S'),
				1722	('c', 'f'),
				1723	]);
				1724	let expected = uclass(&[
				1725	('A', 'J'),
				1726	('L', 'S'),
				1727	('a', 'j'),
				1728	('l', 's'),
				1729	('\u{17F}', '\u{17F}'),
				1730	]);
				1731	assert_eq!(expected, ucasefold(&cls));
				1732
				1733	let cls = uclass(&[('A', 'Z')]);
				1734	let expected = uclass(&[
				1735	('A', 'Z'),
				1736	('a', 'z'),
				1737	('\u{17F}', '\u{17F}'),
				1738	('\u{212A}', '\u{212A}'),
				1739	]);
				1740	assert_eq!(expected, ucasefold(&cls));
				1741
				1742	let cls = uclass(&[('a', 'z')]);
				1743	let expected = uclass(&[
				1744	('A', 'Z'),
				1745	('a', 'z'),
				1746	('\u{17F}', '\u{17F}'),
				1747	('\u{212A}', '\u{212A}'),
				1748	]);
				1749	assert_eq!(expected, ucasefold(&cls));
				1750
				1751	let cls = uclass(&[('A', 'A'), ('_', '_')]);
				1752	let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
				1753	assert_eq!(expected, ucasefold(&cls));
				1754
				1755	let cls = uclass(&[('A', 'A'), ('=', '=')]);
				1756	let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
				1757	assert_eq!(expected, ucasefold(&cls));
				1758
				1759	let cls = uclass(&[('\x00', '\x10')]);
				1760	assert_eq!(cls, ucasefold(&cls));
				1761
				1762	let cls = uclass(&[('k', 'k')]);
				1763	let expected =
				1764	uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]);
				1765	assert_eq!(expected, ucasefold(&cls));
				1766
				1767	let cls = uclass(&[('@', '@')]);
				1768	assert_eq!(cls, ucasefold(&cls));
				1769	}
				1770
				1771	#[test]
				1772	#[cfg(not(feature = "unicode-case"))]
				1773	fn class_case_fold_unicode_disabled() {
				1774	let mut cls = uclass(&[
				1775	('C', 'F'),
				1776	('A', 'G'),
				1777	('D', 'J'),
				1778	('A', 'C'),
				1779	('M', 'P'),
				1780	('L', 'S'),
				1781	('c', 'f'),
				1782	]);
				1783	assert!(cls.try_case_fold_simple().is_err());
				1784	}
				1785
				1786	#[test]
				1787	#[should_panic]
				1788	#[cfg(not(feature = "unicode-case"))]
				1789	fn class_case_fold_unicode_disabled_panics() {
				1790	let mut cls = uclass(&[
				1791	('C', 'F'),
				1792	('A', 'G'),
				1793	('D', 'J'),
				1794	('A', 'C'),
				1795	('M', 'P'),
				1796	('L', 'S'),
				1797	('c', 'f'),
				1798	]);
				1799	cls.case_fold_simple();
				1800	}
				1801
				1802	#[test]
				1803	fn class_case_fold_bytes() {
				1804	let cls = bclass(&[
				1805	(b'C', b'F'),
				1806	(b'A', b'G'),
				1807	(b'D', b'J'),
				1808	(b'A', b'C'),
				1809	(b'M', b'P'),
				1810	(b'L', b'S'),
				1811	(b'c', b'f'),
				1812	]);
				1813	let expected =
				1814	bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]);
				1815	assert_eq!(expected, bcasefold(&cls));
				1816
				1817	let cls = bclass(&[(b'A', b'Z')]);
				1818	let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
				1819	assert_eq!(expected, bcasefold(&cls));
				1820
				1821	let cls = bclass(&[(b'a', b'z')]);
				1822	let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
				1823	assert_eq!(expected, bcasefold(&cls));
				1824
				1825	let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
				1826	let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
				1827	assert_eq!(expected, bcasefold(&cls));
				1828
				1829	let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
				1830	let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
				1831	assert_eq!(expected, bcasefold(&cls));
				1832
				1833	let cls = bclass(&[(b'\x00', b'\x10')]);
				1834	assert_eq!(cls, bcasefold(&cls));
				1835
				1836	let cls = bclass(&[(b'k', b'k')]);
				1837	let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
				1838	assert_eq!(expected, bcasefold(&cls));
				1839
				1840	let cls = bclass(&[(b'@', b'@')]);
				1841	assert_eq!(cls, bcasefold(&cls));
				1842	}
				1843
				1844	#[test]
				1845	fn class_negate_unicode() {
				1846	let cls = uclass(&[('a', 'a')]);
				1847	let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
				1848	assert_eq!(expected, unegate(&cls));
				1849
				1850	let cls = uclass(&[('a', 'a'), ('b', 'b')]);
				1851	let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
				1852	assert_eq!(expected, unegate(&cls));
				1853
				1854	let cls = uclass(&[('a', 'c'), ('x', 'z')]);
				1855	let expected = uclass(&[
				1856	('\x00', '\x60'),
				1857	('\x64', '\x77'),
				1858	('\x7B', '\u{10FFFF}'),
				1859	]);
				1860	assert_eq!(expected, unegate(&cls));
				1861
				1862	let cls = uclass(&[('\x00', 'a')]);
				1863	let expected = uclass(&[('\x62', '\u{10FFFF}')]);
				1864	assert_eq!(expected, unegate(&cls));
				1865
				1866	let cls = uclass(&[('a', '\u{10FFFF}')]);
				1867	let expected = uclass(&[('\x00', '\x60')]);
				1868	assert_eq!(expected, unegate(&cls));
				1869
				1870	let cls = uclass(&[('\x00', '\u{10FFFF}')]);
				1871	let expected = uclass(&[]);
				1872	assert_eq!(expected, unegate(&cls));
				1873
				1874	let cls = uclass(&[]);
				1875	let expected = uclass(&[('\x00', '\u{10FFFF}')]);
				1876	assert_eq!(expected, unegate(&cls));
				1877
				1878	let cls =
				1879	uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]);
				1880	let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
				1881	assert_eq!(expected, unegate(&cls));
				1882
				1883	let cls = uclass(&[('\x00', '\u{D7FF}')]);
				1884	let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
				1885	assert_eq!(expected, unegate(&cls));
				1886
				1887	let cls = uclass(&[('\x00', '\u{D7FE}')]);
				1888	let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
				1889	assert_eq!(expected, unegate(&cls));
				1890
				1891	let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
				1892	let expected = uclass(&[('\x00', '\u{D7FF}')]);
				1893	assert_eq!(expected, unegate(&cls));
				1894
				1895	let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
				1896	let expected = uclass(&[('\x00', '\u{E000}')]);
				1897	assert_eq!(expected, unegate(&cls));
				1898	}
				1899
				1900	#[test]
				1901	fn class_negate_bytes() {
				1902	let cls = bclass(&[(b'a', b'a')]);
				1903	let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
				1904	assert_eq!(expected, bnegate(&cls));
				1905
				1906	let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
				1907	let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
				1908	assert_eq!(expected, bnegate(&cls));
				1909
				1910	let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
				1911	let expected = bclass(&[
				1912	(b'\x00', b'\x60'),
				1913	(b'\x64', b'\x77'),
				1914	(b'\x7B', b'\xFF'),
				1915	]);
				1916	assert_eq!(expected, bnegate(&cls));
				1917
				1918	let cls = bclass(&[(b'\x00', b'a')]);
				1919	let expected = bclass(&[(b'\x62', b'\xFF')]);
				1920	assert_eq!(expected, bnegate(&cls));
				1921
				1922	let cls = bclass(&[(b'a', b'\xFF')]);
				1923	let expected = bclass(&[(b'\x00', b'\x60')]);
				1924	assert_eq!(expected, bnegate(&cls));
				1925
				1926	let cls = bclass(&[(b'\x00', b'\xFF')]);
				1927	let expected = bclass(&[]);
				1928	assert_eq!(expected, bnegate(&cls));
				1929
				1930	let cls = bclass(&[]);
				1931	let expected = bclass(&[(b'\x00', b'\xFF')]);
				1932	assert_eq!(expected, bnegate(&cls));
				1933
				1934	let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
				1935	let expected = bclass(&[(b'\xFE', b'\xFE')]);
				1936	assert_eq!(expected, bnegate(&cls));
				1937	}
				1938
				1939	#[test]
				1940	fn class_union_unicode() {
				1941	let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
				1942	let cls2 = uclass(&[('a', 'z')]);
				1943	let expected = uclass(&[('a', 'z'), ('A', 'C')]);
				1944	assert_eq!(expected, uunion(&cls1, &cls2));
				1945	}
				1946
				1947	#[test]
				1948	fn class_union_bytes() {
				1949	let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
				1950	let cls2 = bclass(&[(b'a', b'z')]);
				1951	let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
				1952	assert_eq!(expected, bunion(&cls1, &cls2));
				1953	}
				1954
				1955	#[test]
				1956	fn class_intersect_unicode() {
				1957	let cls1 = uclass(&[]);
				1958	let cls2 = uclass(&[('a', 'a')]);
				1959	let expected = uclass(&[]);
				1960	assert_eq!(expected, uintersect(&cls1, &cls2));
				1961
				1962	let cls1 = uclass(&[('a', 'a')]);
				1963	let cls2 = uclass(&[('a', 'a')]);
				1964	let expected = uclass(&[('a', 'a')]);
				1965	assert_eq!(expected, uintersect(&cls1, &cls2));
				1966
				1967	let cls1 = uclass(&[('a', 'a')]);
				1968	let cls2 = uclass(&[('b', 'b')]);
				1969	let expected = uclass(&[]);
				1970	assert_eq!(expected, uintersect(&cls1, &cls2));
				1971
				1972	let cls1 = uclass(&[('a', 'a')]);
				1973	let cls2 = uclass(&[('a', 'c')]);
				1974	let expected = uclass(&[('a', 'a')]);
				1975	assert_eq!(expected, uintersect(&cls1, &cls2));
				1976
				1977	let cls1 = uclass(&[('a', 'b')]);
				1978	let cls2 = uclass(&[('a', 'c')]);
				1979	let expected = uclass(&[('a', 'b')]);
				1980	assert_eq!(expected, uintersect(&cls1, &cls2));
				1981
				1982	let cls1 = uclass(&[('a', 'b')]);
				1983	let cls2 = uclass(&[('b', 'c')]);
				1984	let expected = uclass(&[('b', 'b')]);
				1985	assert_eq!(expected, uintersect(&cls1, &cls2));
				1986
				1987	let cls1 = uclass(&[('a', 'b')]);
				1988	let cls2 = uclass(&[('c', 'd')]);
				1989	let expected = uclass(&[]);
				1990	assert_eq!(expected, uintersect(&cls1, &cls2));
				1991
				1992	let cls1 = uclass(&[('b', 'c')]);
				1993	let cls2 = uclass(&[('a', 'd')]);
				1994	let expected = uclass(&[('b', 'c')]);
				1995	assert_eq!(expected, uintersect(&cls1, &cls2));
				1996
				1997	let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
				1998	let cls2 = uclass(&[('a', 'h')]);
				1999	let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
				2000	assert_eq!(expected, uintersect(&cls1, &cls2));
				2001
				2002	let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
				2003	let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
				2004	let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
				2005	assert_eq!(expected, uintersect(&cls1, &cls2));
				2006
				2007	let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
				2008	let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
				2009	let expected = uclass(&[]);
				2010	assert_eq!(expected, uintersect(&cls1, &cls2));
				2011
				2012	let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
				2013	let cls2 = uclass(&[('h', 'h')]);
				2014	let expected = uclass(&[('h', 'h')]);
				2015	assert_eq!(expected, uintersect(&cls1, &cls2));
				2016
				2017	let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
				2018	let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
				2019	let expected = uclass(&[]);
				2020	assert_eq!(expected, uintersect(&cls1, &cls2));
				2021
				2022	let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
				2023	let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
				2024	let expected = uclass(&[('b', 'f')]);
				2025	assert_eq!(expected, uintersect(&cls1, &cls2));
				2026	}
				2027
				2028	#[test]
				2029	fn class_intersect_bytes() {
				2030	let cls1 = bclass(&[]);
				2031	let cls2 = bclass(&[(b'a', b'a')]);
				2032	let expected = bclass(&[]);
				2033	assert_eq!(expected, bintersect(&cls1, &cls2));
				2034
				2035	let cls1 = bclass(&[(b'a', b'a')]);
				2036	let cls2 = bclass(&[(b'a', b'a')]);
				2037	let expected = bclass(&[(b'a', b'a')]);
				2038	assert_eq!(expected, bintersect(&cls1, &cls2));
				2039
				2040	let cls1 = bclass(&[(b'a', b'a')]);
				2041	let cls2 = bclass(&[(b'b', b'b')]);
				2042	let expected = bclass(&[]);
				2043	assert_eq!(expected, bintersect(&cls1, &cls2));
				2044
				2045	let cls1 = bclass(&[(b'a', b'a')]);
				2046	let cls2 = bclass(&[(b'a', b'c')]);
				2047	let expected = bclass(&[(b'a', b'a')]);
				2048	assert_eq!(expected, bintersect(&cls1, &cls2));
				2049
				2050	let cls1 = bclass(&[(b'a', b'b')]);
				2051	let cls2 = bclass(&[(b'a', b'c')]);
				2052	let expected = bclass(&[(b'a', b'b')]);
				2053	assert_eq!(expected, bintersect(&cls1, &cls2));
				2054
				2055	let cls1 = bclass(&[(b'a', b'b')]);
				2056	let cls2 = bclass(&[(b'b', b'c')]);
				2057	let expected = bclass(&[(b'b', b'b')]);
				2058	assert_eq!(expected, bintersect(&cls1, &cls2));
				2059
				2060	let cls1 = bclass(&[(b'a', b'b')]);
				2061	let cls2 = bclass(&[(b'c', b'd')]);
				2062	let expected = bclass(&[]);
				2063	assert_eq!(expected, bintersect(&cls1, &cls2));
				2064
				2065	let cls1 = bclass(&[(b'b', b'c')]);
				2066	let cls2 = bclass(&[(b'a', b'd')]);
				2067	let expected = bclass(&[(b'b', b'c')]);
				2068	assert_eq!(expected, bintersect(&cls1, &cls2));
				2069
				2070	let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
				2071	let cls2 = bclass(&[(b'a', b'h')]);
				2072	let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
				2073	assert_eq!(expected, bintersect(&cls1, &cls2));
				2074
				2075	let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
				2076	let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
				2077	let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
				2078	assert_eq!(expected, bintersect(&cls1, &cls2));
				2079
				2080	let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
				2081	let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
				2082	let expected = bclass(&[]);
				2083	assert_eq!(expected, bintersect(&cls1, &cls2));
				2084
				2085	let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
				2086	let cls2 = bclass(&[(b'h', b'h')]);
				2087	let expected = bclass(&[(b'h', b'h')]);
				2088	assert_eq!(expected, bintersect(&cls1, &cls2));
				2089
				2090	let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
				2091	let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
				2092	let expected = bclass(&[]);
				2093	assert_eq!(expected, bintersect(&cls1, &cls2));
				2094
				2095	let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
				2096	let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
				2097	let expected = bclass(&[(b'b', b'f')]);
				2098	assert_eq!(expected, bintersect(&cls1, &cls2));
				2099	}
				2100
				2101	#[test]
				2102	fn class_difference_unicode() {
				2103	let cls1 = uclass(&[('a', 'a')]);
				2104	let cls2 = uclass(&[('a', 'a')]);
				2105	let expected = uclass(&[]);
				2106	assert_eq!(expected, udifference(&cls1, &cls2));
				2107
				2108	let cls1 = uclass(&[('a', 'a')]);
				2109	let cls2 = uclass(&[]);
				2110	let expected = uclass(&[('a', 'a')]);
				2111	assert_eq!(expected, udifference(&cls1, &cls2));
				2112
				2113	let cls1 = uclass(&[]);
				2114	let cls2 = uclass(&[('a', 'a')]);
				2115	let expected = uclass(&[]);
				2116	assert_eq!(expected, udifference(&cls1, &cls2));
				2117
				2118	let cls1 = uclass(&[('a', 'z')]);
				2119	let cls2 = uclass(&[('a', 'a')]);
				2120	let expected = uclass(&[('b', 'z')]);
				2121	assert_eq!(expected, udifference(&cls1, &cls2));
				2122
				2123	let cls1 = uclass(&[('a', 'z')]);
				2124	let cls2 = uclass(&[('z', 'z')]);
				2125	let expected = uclass(&[('a', 'y')]);
				2126	assert_eq!(expected, udifference(&cls1, &cls2));
				2127
				2128	let cls1 = uclass(&[('a', 'z')]);
				2129	let cls2 = uclass(&[('m', 'm')]);
				2130	let expected = uclass(&[('a', 'l'), ('n', 'z')]);
				2131	assert_eq!(expected, udifference(&cls1, &cls2));
				2132
				2133	let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
				2134	let cls2 = uclass(&[('a', 'z')]);
				2135	let expected = uclass(&[]);
				2136	assert_eq!(expected, udifference(&cls1, &cls2));
				2137
				2138	let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
				2139	let cls2 = uclass(&[('d', 'v')]);
				2140	let expected = uclass(&[('a', 'c')]);
				2141	assert_eq!(expected, udifference(&cls1, &cls2));
				2142
				2143	let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
				2144	let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
				2145	let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
				2146	assert_eq!(expected, udifference(&cls1, &cls2));
				2147
				2148	let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
				2149	let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
				2150	let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
				2151	assert_eq!(expected, udifference(&cls1, &cls2));
				2152
				2153	let cls1 = uclass(&[('x', 'z')]);
				2154	let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
				2155	let expected = uclass(&[('x', 'z')]);
				2156	assert_eq!(expected, udifference(&cls1, &cls2));
				2157
				2158	let cls1 = uclass(&[('a', 'z')]);
				2159	let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
				2160	let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
				2161	assert_eq!(expected, udifference(&cls1, &cls2));
				2162	}
				2163
				2164	#[test]
				2165	fn class_difference_bytes() {
				2166	let cls1 = bclass(&[(b'a', b'a')]);
				2167	let cls2 = bclass(&[(b'a', b'a')]);
				2168	let expected = bclass(&[]);
				2169	assert_eq!(expected, bdifference(&cls1, &cls2));
				2170
				2171	let cls1 = bclass(&[(b'a', b'a')]);
				2172	let cls2 = bclass(&[]);
				2173	let expected = bclass(&[(b'a', b'a')]);
				2174	assert_eq!(expected, bdifference(&cls1, &cls2));
				2175
				2176	let cls1 = bclass(&[]);
				2177	let cls2 = bclass(&[(b'a', b'a')]);
				2178	let expected = bclass(&[]);
				2179	assert_eq!(expected, bdifference(&cls1, &cls2));
				2180
				2181	let cls1 = bclass(&[(b'a', b'z')]);
				2182	let cls2 = bclass(&[(b'a', b'a')]);
				2183	let expected = bclass(&[(b'b', b'z')]);
				2184	assert_eq!(expected, bdifference(&cls1, &cls2));
				2185
				2186	let cls1 = bclass(&[(b'a', b'z')]);
				2187	let cls2 = bclass(&[(b'z', b'z')]);
				2188	let expected = bclass(&[(b'a', b'y')]);
				2189	assert_eq!(expected, bdifference(&cls1, &cls2));
				2190
				2191	let cls1 = bclass(&[(b'a', b'z')]);
				2192	let cls2 = bclass(&[(b'm', b'm')]);
				2193	let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
				2194	assert_eq!(expected, bdifference(&cls1, &cls2));
				2195
				2196	let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
				2197	let cls2 = bclass(&[(b'a', b'z')]);
				2198	let expected = bclass(&[]);
				2199	assert_eq!(expected, bdifference(&cls1, &cls2));
				2200
				2201	let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
				2202	let cls2 = bclass(&[(b'd', b'v')]);
				2203	let expected = bclass(&[(b'a', b'c')]);
				2204	assert_eq!(expected, bdifference(&cls1, &cls2));
				2205
				2206	let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
				2207	let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
				2208	let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
				2209	assert_eq!(expected, bdifference(&cls1, &cls2));
				2210
				2211	let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
				2212	let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
				2213	let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
				2214	assert_eq!(expected, bdifference(&cls1, &cls2));
				2215
				2216	let cls1 = bclass(&[(b'x', b'z')]);
				2217	let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
				2218	let expected = bclass(&[(b'x', b'z')]);
				2219	assert_eq!(expected, bdifference(&cls1, &cls2));
				2220
				2221	let cls1 = bclass(&[(b'a', b'z')]);
				2222	let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
				2223	let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
				2224	assert_eq!(expected, bdifference(&cls1, &cls2));
				2225	}
				2226
				2227	#[test]
				2228	fn class_symmetric_difference_unicode() {
				2229	let cls1 = uclass(&[('a', 'm')]);
				2230	let cls2 = uclass(&[('g', 't')]);
				2231	let expected = uclass(&[('a', 'f'), ('n', 't')]);
				2232	assert_eq!(expected, usymdifference(&cls1, &cls2));
				2233	}
				2234
				2235	#[test]
				2236	fn class_symmetric_difference_bytes() {
				2237	let cls1 = bclass(&[(b'a', b'm')]);
				2238	let cls2 = bclass(&[(b'g', b't')]);
				2239	let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
				2240	assert_eq!(expected, bsymdifference(&cls1, &cls2));
				2241	}
				2242
				2243	#[test]
				2244	#[should_panic]
				2245	fn hir_byte_literal_non_ascii() {
				2246	Hir::literal(Literal::Byte(b'a'));
				2247	}
				2248
				2249	// We use a thread with an explicit stack size to test that our destructor
				2250	// for Hir can handle arbitrarily sized expressions in constant stack
				2251	// space. In case we run on a platform without threads (WASM?), we limit
				2252	// this test to Windows/Unix.
				2253	#[test]
				2254	#[cfg(any(unix, windows))]
				2255	fn no_stack_overflow_on_drop() {
				2256	use std::thread;
				2257
				2258	let run = \|\| {
				2259	let mut expr = Hir::empty();
				2260	for _ in 0..100 {
				2261	expr = Hir::group(Group {
				2262	kind: GroupKind::NonCapturing,
				2263	hir: Box::new(expr),
				2264	});
				2265	expr = Hir::repetition(Repetition {
				2266	kind: RepetitionKind::ZeroOrOne,
				2267	greedy: true,
				2268	hir: Box::new(expr),
				2269	});
				2270
				2271	expr = Hir {
				2272	kind: HirKind::Concat(vec![expr]),
				2273	info: HirInfo::new(),
				2274	};
				2275	expr = Hir {
				2276	kind: HirKind::Alternation(vec![expr]),
				2277	info: HirInfo::new(),
				2278	};
				2279	}
				2280	assert!(!expr.kind.is_empty());
				2281	};
				2282
				2283	// We run our test on a thread with a small stack size so we can
				2284	// force the issue more easily.
				2285	thread::Builder::new()
				2286	.stack_size(1 << 10)
				2287	.spawn(run)
				2288	.unwrap()
				2289	.join()
				2290	.unwrap();
				2291	}
				2292	}