Blame - src/ast/mod.rs - platform/external/rust/crates/regex-syntax

blob: 7179f2d4038b755fd1b6696718edac61a62d1c78 [file] [log] [blame]

Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	1	/*!
				2	Defines an abstract syntax for regular expressions.
				3	*/
				4
				5	use std::cmp::Ordering;
				6	use std::error;
				7	use std::fmt;
				8
				9	pub use ast::visitor::{visit, Visitor};
				10
				11	pub mod parse;
				12	pub mod print;
				13	mod visitor;
				14
				15	/// An error that occurred while parsing a regular expression into an abstract
				16	/// syntax tree.
				17	///
				18	/// Note that note all ASTs represents a valid regular expression. For example,
				19	/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
				20	/// valid Unicode property name. That particular error is reported when
				21	/// translating an AST to the high-level intermediate representation (`HIR`).
				22	#[derive(Clone, Debug, Eq, PartialEq)]
				23	pub struct Error {
				24	/// The kind of error.
				25	kind: ErrorKind,
				26	/// The original pattern that the parser generated the error from. Every
				27	/// span in an error is a valid range into this string.
				28	pattern: String,
				29	/// The span of this error.
				30	span: Span,
				31	}
				32
				33	impl Error {
				34	/// Return the type of this error.
				35	pub fn kind(&self) -> &ErrorKind {
				36	&self.kind
				37	}
				38
				39	/// The original pattern string in which this error occurred.
				40	///
				41	/// Every span reported by this error is reported in terms of this string.
				42	pub fn pattern(&self) -> &str {
				43	&self.pattern
				44	}
				45
				46	/// Return the span at which this error occurred.
				47	pub fn span(&self) -> &Span {
				48	&self.span
				49	}
				50
				51	/// Return an auxiliary span. This span exists only for some errors that
				52	/// benefit from being able to point to two locations in the original
				53	/// regular expression. For example, "duplicate" errors will have the
				54	/// main error position set to the duplicate occurrence while its
				55	/// auxiliary span will be set to the initial occurrence.
				56	pub fn auxiliary_span(&self) -> Option<&Span> {
				57	use self::ErrorKind::*;
				58	match self.kind {
				59	FlagDuplicate { ref original } => Some(original),
				60	FlagRepeatedNegation { ref original, .. } => Some(original),
				61	GroupNameDuplicate { ref original, .. } => Some(original),
				62	_ => None,
				63	}
				64	}
				65	}
				66
				67	/// The type of an error that occurred while building an AST.
				68	#[derive(Clone, Debug, Eq, PartialEq)]
				69	pub enum ErrorKind {
				70	/// The capturing group limit was exceeded.
				71	///
				72	/// Note that this represents a limit on the total number of capturing
				73	/// groups in a regex and not necessarily the number of nested capturing
				74	/// groups. That is, the nest limit can be low and it is still possible for
				75	/// this error to occur.
				76	CaptureLimitExceeded,
				77	/// An invalid escape sequence was found in a character class set.
				78	ClassEscapeInvalid,
				79	/// An invalid character class range was found. An invalid range is any
				80	/// range where the start is greater than the end.
				81	ClassRangeInvalid,
				82	/// An invalid range boundary was found in a character class. Range
				83	/// boundaries must be a single literal codepoint, but this error indicates
				84	/// that something else was found, such as a nested class.
				85	ClassRangeLiteral,
				86	/// An opening `[` was found with no corresponding closing `]`.
				87	ClassUnclosed,
				88	/// Note that this error variant is no longer used. Namely, a decimal
				89	/// number can only appear as a repetition quantifier. When the number
				90	/// in a repetition quantifier is empty, then it gets its own specialized
				91	/// error, `RepetitionCountDecimalEmpty`.
				92	DecimalEmpty,
				93	/// An invalid decimal number was given where one was expected.
				94	DecimalInvalid,
				95	/// A bracketed hex literal was empty.
				96	EscapeHexEmpty,
				97	/// A bracketed hex literal did not correspond to a Unicode scalar value.
				98	EscapeHexInvalid,
				99	/// An invalid hexadecimal digit was found.
				100	EscapeHexInvalidDigit,
				101	/// EOF was found before an escape sequence was completed.
				102	EscapeUnexpectedEof,
				103	/// An unrecognized escape sequence.
				104	EscapeUnrecognized,
				105	/// A dangling negation was used when setting flags, e.g., `i-`.
				106	FlagDanglingNegation,
				107	/// A flag was used twice, e.g., `i-i`.
				108	FlagDuplicate {
				109	/// The position of the original flag. The error position
				110	/// points to the duplicate flag.
				111	original: Span,
				112	},
				113	/// The negation operator was used twice, e.g., `-i-s`.
				114	FlagRepeatedNegation {
				115	/// The position of the original negation operator. The error position
				116	/// points to the duplicate negation operator.
				117	original: Span,
				118	},
				119	/// Expected a flag but got EOF, e.g., `(?`.
				120	FlagUnexpectedEof,
				121	/// Unrecognized flag, e.g., `a`.
				122	FlagUnrecognized,
				123	/// A duplicate capture name was found.
				124	GroupNameDuplicate {
				125	/// The position of the initial occurrence of the capture name. The
				126	/// error position itself points to the duplicate occurrence.
				127	original: Span,
				128	},
				129	/// A capture group name is empty, e.g., `(?P<>abc)`.
				130	GroupNameEmpty,
				131	/// An invalid character was seen for a capture group name. This includes
				132	/// errors where the first character is a digit (even though subsequent
				133	/// characters are allowed to be digits).
				134	GroupNameInvalid,
				135	/// A closing `>` could not be found for a capture group name.
				136	GroupNameUnexpectedEof,
				137	/// An unclosed group, e.g., `(ab`.
				138	///
				139	/// The span of this error corresponds to the unclosed parenthesis.
				140	GroupUnclosed,
				141	/// An unopened group, e.g., `ab)`.
				142	GroupUnopened,
				143	/// The nest limit was exceeded. The limit stored here is the limit
				144	/// configured in the parser.
				145	NestLimitExceeded(u32),
				146	/// The range provided in a counted repetition operator is invalid. The
				147	/// range is invalid if the start is greater than the end.
				148	RepetitionCountInvalid,
				149	/// An opening `{` was not followed by a valid decimal value.
				150	/// For example, `x{}` or `x{]}` would fail.
				151	RepetitionCountDecimalEmpty,
				152	/// An opening `{` was found with no corresponding closing `}`.
				153	RepetitionCountUnclosed,
				154	/// A repetition operator was applied to a missing sub-expression. This
				155	/// occurs, for example, in the regex consisting of just a `*` or even
				156	/// `(?i)*`. It is, however, possible to create a repetition operating on
				157	/// an empty sub-expression. For example, `()*` is still considered valid.
				158	RepetitionMissing,
				159	/// The Unicode class is not valid. This typically occurs when a `\p` is
				160	/// followed by something other than a `{`.
				161	UnicodeClassInvalid,
				162	/// When octal support is disabled, this error is produced when an octal
				163	/// escape is used. The octal escape is assumed to be an invocation of
				164	/// a backreference, which is the common case.
				165	UnsupportedBackreference,
				166	/// When syntax similar to PCRE's look-around is used, this error is
				167	/// returned. Some example syntaxes that are rejected include, but are
				168	/// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
				169	/// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
				170	/// error is used to improve the user experience.
				171	UnsupportedLookAround,
				172	/// Hints that destructuring should not be exhaustive.
				173	///
				174	/// This enum may grow additional variants, so this makes sure clients
				175	/// don't count on exhaustive matching. (Otherwise, adding a new variant
				176	/// could break existing code.)
				177	#[doc(hidden)]
				178	__Nonexhaustive,
				179	}
				180
				181	impl error::Error for Error {
				182	// TODO: Remove this method entirely on the next breaking semver release.
				183	#[allow(deprecated)]
				184	fn description(&self) -> &str {
				185	use self::ErrorKind::*;
				186	match self.kind {
				187	CaptureLimitExceeded => "capture group limit exceeded",
				188	ClassEscapeInvalid => "invalid escape sequence in character class",
				189	ClassRangeInvalid => "invalid character class range",
				190	ClassRangeLiteral => "invalid range boundary, must be a literal",
				191	ClassUnclosed => "unclosed character class",
				192	DecimalEmpty => "empty decimal literal",
				193	DecimalInvalid => "invalid decimal literal",
				194	EscapeHexEmpty => "empty hexadecimal literal",
				195	EscapeHexInvalid => "invalid hexadecimal literal",
				196	EscapeHexInvalidDigit => "invalid hexadecimal digit",
				197	EscapeUnexpectedEof => "unexpected eof (escape sequence)",
				198	EscapeUnrecognized => "unrecognized escape sequence",
				199	FlagDanglingNegation => "dangling flag negation operator",
				200	FlagDuplicate { .. } => "duplicate flag",
				201	FlagRepeatedNegation { .. } => "repeated negation",
				202	FlagUnexpectedEof => "unexpected eof (flag)",
				203	FlagUnrecognized => "unrecognized flag",
				204	GroupNameDuplicate { .. } => "duplicate capture group name",
				205	GroupNameEmpty => "empty capture group name",
				206	GroupNameInvalid => "invalid capture group name",
				207	GroupNameUnexpectedEof => "unclosed capture group name",
				208	GroupUnclosed => "unclosed group",
				209	GroupUnopened => "unopened group",
				210	NestLimitExceeded(_) => "nest limit exceeded",
				211	RepetitionCountInvalid => "invalid repetition count range",
				212	RepetitionCountUnclosed => "unclosed counted repetition",
				213	RepetitionMissing => "repetition operator missing expression",
				214	UnicodeClassInvalid => "invalid Unicode character class",
				215	UnsupportedBackreference => "backreferences are not supported",
				216	UnsupportedLookAround => "look-around is not supported",
				217	_ => unreachable!(),
				218	}
				219	}
				220	}
				221
				222	impl fmt::Display for Error {
				223	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				224	::error::Formatter::from(self).fmt(f)
				225	}
				226	}
				227
				228	impl fmt::Display for ErrorKind {
				229	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				230	use self::ErrorKind::*;
				231	match *self {
				232	CaptureLimitExceeded => write!(
				233	f,
				234	"exceeded the maximum number of \
				235	capturing groups ({})",
				236	::std::u32::MAX
				237	),
				238	ClassEscapeInvalid => {
				239	write!(f, "invalid escape sequence found in character class")
				240	}
				241	ClassRangeInvalid => write!(
				242	f,
				243	"invalid character class range, \
				244	the start must be <= the end"
				245	),
				246	ClassRangeLiteral => {
				247	write!(f, "invalid range boundary, must be a literal")
				248	}
				249	ClassUnclosed => write!(f, "unclosed character class"),
				250	DecimalEmpty => write!(f, "decimal literal empty"),
				251	DecimalInvalid => write!(f, "decimal literal invalid"),
				252	EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
				253	EscapeHexInvalid => {
				254	write!(f, "hexadecimal literal is not a Unicode scalar value")
				255	}
				256	EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
				257	EscapeUnexpectedEof => write!(
				258	f,
				259	"incomplete escape sequence, \
				260	reached end of pattern prematurely"
				261	),
				262	EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
				263	FlagDanglingNegation => {
				264	write!(f, "dangling flag negation operator")
				265	}
				266	FlagDuplicate { .. } => write!(f, "duplicate flag"),
				267	FlagRepeatedNegation { .. } => {
				268	write!(f, "flag negation operator repeated")
				269	}
				270	FlagUnexpectedEof => {
				271	write!(f, "expected flag but got end of regex")
				272	}
				273	FlagUnrecognized => write!(f, "unrecognized flag"),
				274	GroupNameDuplicate { .. } => {
				275	write!(f, "duplicate capture group name")
				276	}
				277	GroupNameEmpty => write!(f, "empty capture group name"),
				278	GroupNameInvalid => write!(f, "invalid capture group character"),
				279	GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
				280	GroupUnclosed => write!(f, "unclosed group"),
				281	GroupUnopened => write!(f, "unopened group"),
				282	NestLimitExceeded(limit) => write!(
				283	f,
				284	"exceed the maximum number of \
				285	nested parentheses/brackets ({})",
				286	limit
				287	),
				288	RepetitionCountInvalid => write!(
				289	f,
				290	"invalid repetition count range, \
				291	the start must be <= the end"
				292	),
				293	RepetitionCountDecimalEmpty => {
				294	write!(f, "repetition quantifier expects a valid decimal")
				295	}
				296	RepetitionCountUnclosed => {
				297	write!(f, "unclosed counted repetition")
				298	}
				299	RepetitionMissing => {
				300	write!(f, "repetition operator missing expression")
				301	}
				302	UnicodeClassInvalid => {
				303	write!(f, "invalid Unicode character class")
				304	}
				305	UnsupportedBackreference => {
				306	write!(f, "backreferences are not supported")
				307	}
				308	UnsupportedLookAround => write!(
				309	f,
				310	"look-around, including look-ahead and look-behind, \
				311	is not supported"
				312	),
				313	_ => unreachable!(),
				314	}
				315	}
				316	}
				317
				318	/// Span represents the position information of a single AST item.
				319	///
				320	/// All span positions are absolute byte offsets that can be used on the
				321	/// original regular expression that was parsed.
				322	#[derive(Clone, Copy, Eq, PartialEq)]
				323	pub struct Span {
				324	/// The start byte offset.
				325	pub start: Position,
				326	/// The end byte offset.
				327	pub end: Position,
				328	}
				329
				330	impl fmt::Debug for Span {
				331	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				332	write!(f, "Span({:?}, {:?})", self.start, self.end)
				333	}
				334	}
				335
				336	impl Ord for Span {
				337	fn cmp(&self, other: &Span) -> Ordering {
				338	(&self.start, &self.end).cmp(&(&other.start, &other.end))
				339	}
				340	}
				341
				342	impl PartialOrd for Span {
				343	fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
				344	Some(self.cmp(other))
				345	}
				346	}
				347
				348	/// A single position in a regular expression.
				349	///
				350	/// A position encodes one half of a span, and include the byte offset, line
				351	/// number and column number.
				352	#[derive(Clone, Copy, Eq, PartialEq)]
				353	pub struct Position {
				354	/// The absolute offset of this position, starting at `0` from the
				355	/// beginning of the regular expression pattern string.
				356	pub offset: usize,
				357	/// The line number, starting at `1`.
				358	pub line: usize,
				359	/// The approximate column number, starting at `1`.
				360	pub column: usize,
				361	}
				362
				363	impl fmt::Debug for Position {
				364	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				365	write!(
				366	f,
				367	"Position(o: {:?}, l: {:?}, c: {:?})",
				368	self.offset, self.line, self.column
				369	)
				370	}
				371	}
				372
				373	impl Ord for Position {
				374	fn cmp(&self, other: &Position) -> Ordering {
				375	self.offset.cmp(&other.offset)
				376	}
				377	}
				378
				379	impl PartialOrd for Position {
				380	fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
				381	Some(self.cmp(other))
				382	}
				383	}
				384
				385	impl Span {
				386	/// Create a new span with the given positions.
				387	pub fn new(start: Position, end: Position) -> Span {
				388	Span { start: start, end: end }
				389	}
				390
				391	/// Create a new span using the given position as the start and end.
				392	pub fn splat(pos: Position) -> Span {
				393	Span::new(pos, pos)
				394	}
				395
				396	/// Create a new span by replacing the starting the position with the one
				397	/// given.
				398	pub fn with_start(self, pos: Position) -> Span {
				399	Span { start: pos, ..self }
				400	}
				401
				402	/// Create a new span by replacing the ending the position with the one
				403	/// given.
				404	pub fn with_end(self, pos: Position) -> Span {
				405	Span { end: pos, ..self }
				406	}
				407
				408	/// Returns true if and only if this span occurs on a single line.
				409	pub fn is_one_line(&self) -> bool {
				410	self.start.line == self.end.line
				411	}
				412
				413	/// Returns true if and only if this span is empty. That is, it points to
				414	/// a single position in the concrete syntax of a regular expression.
				415	pub fn is_empty(&self) -> bool {
				416	self.start.offset == self.end.offset
				417	}
				418	}
				419
				420	impl Position {
				421	/// Create a new position with the given information.
				422	///
				423	/// `offset` is the absolute offset of the position, starting at `0` from
				424	/// the beginning of the regular expression pattern string.
				425	///
				426	/// `line` is the line number, starting at `1`.
				427	///
				428	/// `column` is the approximate column number, starting at `1`.
				429	pub fn new(offset: usize, line: usize, column: usize) -> Position {
				430	Position { offset: offset, line: line, column: column }
				431	}
				432	}
				433
				434	/// An abstract syntax tree for a singular expression along with comments
				435	/// found.
				436	///
				437	/// Comments are not stored in the tree itself to avoid complexity. Each
				438	/// comment contains a span of precisely where it occurred in the original
				439	/// regular expression.
				440	#[derive(Clone, Debug, Eq, PartialEq)]
				441	pub struct WithComments {
				442	/// The actual ast.
				443	pub ast: Ast,
				444	/// All comments found in the original regular expression.
				445	pub comments: Vec<Comment>,
				446	}
				447
				448	/// A comment from a regular expression with an associated span.
				449	///
				450	/// A regular expression can only contain comments when the `x` flag is
				451	/// enabled.
				452	#[derive(Clone, Debug, Eq, PartialEq)]
				453	pub struct Comment {
				454	/// The span of this comment, including the beginning `#` and ending `\n`.
				455	pub span: Span,
				456	/// The comment text, starting with the first character following the `#`
				457	/// and ending with the last character preceding the `\n`.
				458	pub comment: String,
				459	}
				460
				461	/// An abstract syntax tree for a single regular expression.
				462	///
				463	/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
				464	/// space proportional to the size of the `Ast`.
				465	///
				466	/// This type defines its own destructor that uses constant stack space and
				467	/// heap space proportional to the size of the `Ast`.
				468	#[derive(Clone, Debug, Eq, PartialEq)]
				469	pub enum Ast {
				470	/// An empty regex that matches everything.
				471	Empty(Span),
				472	/// A set of flags, e.g., `(?is)`.
				473	Flags(SetFlags),
				474	/// A single character literal, which includes escape sequences.
				475	Literal(Literal),
				476	/// The "any character" class.
				477	Dot(Span),
				478	/// A single zero-width assertion.
				479	Assertion(Assertion),
				480	/// A single character class. This includes all forms of character classes
				481	/// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
				482	Class(Class),
				483	/// A repetition operator applied to an arbitrary regular expression.
				484	Repetition(Repetition),
				485	/// A grouped regular expression.
				486	Group(Group),
				487	/// An alternation of regular expressions.
				488	Alternation(Alternation),
				489	/// A concatenation of regular expressions.
				490	Concat(Concat),
				491	}
				492
				493	impl Ast {
				494	/// Return the span of this abstract syntax tree.
				495	pub fn span(&self) -> &Span {
				496	match *self {
				497	Ast::Empty(ref span) => span,
				498	Ast::Flags(ref x) => &x.span,
				499	Ast::Literal(ref x) => &x.span,
				500	Ast::Dot(ref span) => span,
				501	Ast::Assertion(ref x) => &x.span,
				502	Ast::Class(ref x) => x.span(),
				503	Ast::Repetition(ref x) => &x.span,
				504	Ast::Group(ref x) => &x.span,
				505	Ast::Alternation(ref x) => &x.span,
				506	Ast::Concat(ref x) => &x.span,
				507	}
				508	}
				509
				510	/// Return true if and only if this Ast is empty.
				511	pub fn is_empty(&self) -> bool {
				512	match *self {
				513	Ast::Empty(_) => true,
				514	_ => false,
				515	}
				516	}
				517
				518	/// Returns true if and only if this AST has any (including possibly empty)
				519	/// subexpressions.
				520	fn has_subexprs(&self) -> bool {
				521	match *self {
				522	Ast::Empty(_)
				523	\| Ast::Flags(_)
				524	\| Ast::Literal(_)
				525	\| Ast::Dot(_)
				526	\| Ast::Assertion(_) => false,
				527	Ast::Class(_)
				528	\| Ast::Repetition(_)
				529	\| Ast::Group(_)
				530	\| Ast::Alternation(_)
				531	\| Ast::Concat(_) => true,
				532	}
				533	}
				534	}
				535
				536	/// Print a display representation of this Ast.
				537	///
				538	/// This does not preserve any of the original whitespace formatting that may
				539	/// have originally been present in the concrete syntax from which this Ast
				540	/// was generated.
				541	///
				542	/// This implementation uses constant stack space and heap space proportional
				543	/// to the size of the `Ast`.
				544	impl fmt::Display for Ast {
				545	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
				546	use ast::print::Printer;
				547	Printer::new().print(self, f)
				548	}
				549	}
				550
				551	/// An alternation of regular expressions.
				552	#[derive(Clone, Debug, Eq, PartialEq)]
				553	pub struct Alternation {
				554	/// The span of this alternation.
				555	pub span: Span,
				556	/// The alternate regular expressions.
				557	pub asts: Vec<Ast>,
				558	}
				559
				560	impl Alternation {
				561	/// Return this alternation as an AST.
				562	///
				563	/// If this alternation contains zero ASTs, then Ast::Empty is
				564	/// returned. If this alternation contains exactly 1 AST, then the
				565	/// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
				566	pub fn into_ast(mut self) -> Ast {
				567	match self.asts.len() {
				568	0 => Ast::Empty(self.span),
				569	1 => self.asts.pop().unwrap(),
				570	_ => Ast::Alternation(self),
				571	}
				572	}
				573	}
				574
				575	/// A concatenation of regular expressions.
				576	#[derive(Clone, Debug, Eq, PartialEq)]
				577	pub struct Concat {
				578	/// The span of this concatenation.
				579	pub span: Span,
				580	/// The concatenation regular expressions.
				581	pub asts: Vec<Ast>,
				582	}
				583
				584	impl Concat {
				585	/// Return this concatenation as an AST.
				586	///
				587	/// If this concatenation contains zero ASTs, then Ast::Empty is
				588	/// returned. If this concatenation contains exactly 1 AST, then the
				589	/// corresponding AST is returned. Otherwise, Ast::Concat is returned.
				590	pub fn into_ast(mut self) -> Ast {
				591	match self.asts.len() {
				592	0 => Ast::Empty(self.span),
				593	1 => self.asts.pop().unwrap(),
				594	_ => Ast::Concat(self),
				595	}
				596	}
				597	}
				598
				599	/// A single literal expression.
				600	///
				601	/// A literal corresponds to a single Unicode scalar value. Literals may be
				602	/// represented in their literal form, e.g., `a` or in their escaped form,
				603	/// e.g., `\x61`.
				604	#[derive(Clone, Debug, Eq, PartialEq)]
				605	pub struct Literal {
				606	/// The span of this literal.
				607	pub span: Span,
				608	/// The kind of this literal.
				609	pub kind: LiteralKind,
				610	/// The Unicode scalar value corresponding to this literal.
				611	pub c: char,
				612	}
				613
				614	impl Literal {
				615	/// If this literal was written as a `\x` hex escape, then this returns
				616	/// the corresponding byte value. Otherwise, this returns `None`.
				617	pub fn byte(&self) -> Option<u8> {
				618	let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
				619	if self.c as u32 <= 255 && self.kind == short_hex {
				620	Some(self.c as u8)
				621	} else {
				622	None
				623	}
				624	}
				625	}
				626
				627	/// The kind of a single literal expression.
				628	#[derive(Clone, Debug, Eq, PartialEq)]
				629	pub enum LiteralKind {
				630	/// The literal is written verbatim, e.g., `a` or `☃`.
				631	Verbatim,
				632	/// The literal is written as an escape because it is punctuation, e.g.,
				633	/// `\*` or `\[`.
				634	Punctuation,
				635	/// The literal is written as an octal escape, e.g., `\141`.
				636	Octal,
				637	/// The literal is written as a hex code with a fixed number of digits
				638	/// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
				639	/// `\U00000061`.
				640	HexFixed(HexLiteralKind),
				641	/// The literal is written as a hex code with a bracketed number of
				642	/// digits. The only restriction is that the bracketed hex code must refer
				643	/// to a valid Unicode scalar value.
				644	HexBrace(HexLiteralKind),
				645	/// The literal is written as a specially recognized escape, e.g., `\f`
				646	/// or `\n`.
				647	Special(SpecialLiteralKind),
				648	}
				649
				650	/// The type of a special literal.
				651	///
				652	/// A special literal is a special escape sequence recognized by the regex
				653	/// parser, e.g., `\f` or `\n`.
				654	#[derive(Clone, Debug, Eq, PartialEq)]
				655	pub enum SpecialLiteralKind {
				656	/// Bell, spelled `\a` (`\x07`).
				657	Bell,
				658	/// Form feed, spelled `\f` (`\x0C`).
				659	FormFeed,
				660	/// Tab, spelled `\t` (`\x09`).
				661	Tab,
				662	/// Line feed, spelled `\n` (`\x0A`).
				663	LineFeed,
				664	/// Carriage return, spelled `\r` (`\x0D`).
				665	CarriageReturn,
				666	/// Vertical tab, spelled `\v` (`\x0B`).
				667	VerticalTab,
				668	/// Space, spelled `\ ` (`\x20`). Note that this can only appear when
				669	/// parsing in verbose mode.
				670	Space,
				671	}
				672
				673	/// The type of a Unicode hex literal.
				674	///
				675	/// Note that all variants behave the same when used with brackets. They only
				676	/// differ when used without brackets in the number of hex digits that must
				677	/// follow.
				678	#[derive(Clone, Debug, Eq, PartialEq)]
				679	pub enum HexLiteralKind {
				680	/// A `\x` prefix. When used without brackets, this form is limited to
				681	/// two digits.
				682	X,
				683	/// A `\u` prefix. When used without brackets, this form is limited to
				684	/// four digits.
				685	UnicodeShort,
				686	/// A `\U` prefix. When used without brackets, this form is limited to
				687	/// eight digits.
				688	UnicodeLong,
				689	}
				690
				691	impl HexLiteralKind {
				692	/// The number of digits that must be used with this literal form when
				693	/// used without brackets. When used with brackets, there is no
				694	/// restriction on the number of digits.
				695	pub fn digits(&self) -> u32 {
				696	match *self {
				697	HexLiteralKind::X => 2,
				698	HexLiteralKind::UnicodeShort => 4,
				699	HexLiteralKind::UnicodeLong => 8,
				700	}
				701	}
				702	}
				703
				704	/// A single character class expression.
				705	#[derive(Clone, Debug, Eq, PartialEq)]
				706	pub enum Class {
				707	/// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
				708	Unicode(ClassUnicode),
				709	/// A perl character class, e.g., `\d` or `\W`.
				710	Perl(ClassPerl),
				711	/// A bracketed character class set, which may contain zero or more
				712	/// character ranges and/or zero or more nested classes. e.g.,
				713	/// `[a-zA-Z\pL]`.
				714	Bracketed(ClassBracketed),
				715	}
				716
				717	impl Class {
				718	/// Return the span of this character class.
				719	pub fn span(&self) -> &Span {
				720	match *self {
				721	Class::Perl(ref x) => &x.span,
				722	Class::Unicode(ref x) => &x.span,
				723	Class::Bracketed(ref x) => &x.span,
				724	}
				725	}
				726	}
				727
				728	/// A Perl character class.
				729	#[derive(Clone, Debug, Eq, PartialEq)]
				730	pub struct ClassPerl {
				731	/// The span of this class.
				732	pub span: Span,
				733	/// The kind of Perl class.
				734	pub kind: ClassPerlKind,
				735	/// Whether the class is negated or not. e.g., `\d` is not negated but
				736	/// `\D` is.
				737	pub negated: bool,
				738	}
				739
				740	/// The available Perl character classes.
				741	#[derive(Clone, Debug, Eq, PartialEq)]
				742	pub enum ClassPerlKind {
				743	/// Decimal numbers.
				744	Digit,
				745	/// Whitespace.
				746	Space,
				747	/// Word characters.
				748	Word,
				749	}
				750
				751	/// An ASCII character class.
				752	#[derive(Clone, Debug, Eq, PartialEq)]
				753	pub struct ClassAscii {
				754	/// The span of this class.
				755	pub span: Span,
				756	/// The kind of ASCII class.
				757	pub kind: ClassAsciiKind,
				758	/// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
				759	/// but `[[:^alpha:]]` is.
				760	pub negated: bool,
				761	}
				762
				763	/// The available ASCII character classes.
				764	#[derive(Clone, Debug, Eq, PartialEq)]
				765	pub enum ClassAsciiKind {
				766	/// `[0-9A-Za-z]`
				767	Alnum,
				768	/// `[A-Za-z]`
				769	Alpha,
				770	/// `[\x00-\x7F]`
				771	Ascii,
				772	/// `[ \t]`
				773	Blank,
				774	/// `[\x00-\x1F\x7F]`
				775	Cntrl,
				776	/// `[0-9]`
				777	Digit,
				778	/// `[!-~]`
				779	Graph,
				780	/// `[a-z]`
				781	Lower,
				782	/// `[ -~]`
				783	Print,
				784	/// `[!-/:-@\[-`{-~]`
				785	Punct,
				786	/// `[\t\n\v\f\r ]`
				787	Space,
				788	/// `[A-Z]`
				789	Upper,
				790	/// `[0-9A-Za-z_]`
				791	Word,
				792	/// `[0-9A-Fa-f]`
				793	Xdigit,
				794	}
				795
				796	impl ClassAsciiKind {
				797	/// Return the corresponding ClassAsciiKind variant for the given name.
				798	///
				799	/// The name given should correspond to the lowercase version of the
				800	/// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
				801	///
				802	/// If no variant with the corresponding name exists, then `None` is
				803	/// returned.
				804	pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
				805	use self::ClassAsciiKind::*;
				806	match name {
				807	"alnum" => Some(Alnum),
				808	"alpha" => Some(Alpha),
				809	"ascii" => Some(Ascii),
				810	"blank" => Some(Blank),
				811	"cntrl" => Some(Cntrl),
				812	"digit" => Some(Digit),
				813	"graph" => Some(Graph),
				814	"lower" => Some(Lower),
				815	"print" => Some(Print),
				816	"punct" => Some(Punct),
				817	"space" => Some(Space),
				818	"upper" => Some(Upper),
				819	"word" => Some(Word),
				820	"xdigit" => Some(Xdigit),
				821	_ => None,
				822	}
				823	}
				824	}
				825
				826	/// A Unicode character class.
				827	#[derive(Clone, Debug, Eq, PartialEq)]
				828	pub struct ClassUnicode {
				829	/// The span of this class.
				830	pub span: Span,
				831	/// Whether this class is negated or not.
				832	///
				833	/// Note: be careful when using this attribute. This specifically refers
				834	/// to whether the class is written as `\p` or `\P`, where the latter
				835	/// is `negated = true`. However, it also possible to write something like
				836	/// `\P{scx!=Katakana}` which is actually equivalent to
				837	/// `\p{scx=Katakana}` and is therefore not actually negated even though
				838	/// `negated = true` here. To test whether this class is truly negated
				839	/// or not, use the `is_negated` method.
				840	pub negated: bool,
				841	/// The kind of Unicode class.
				842	pub kind: ClassUnicodeKind,
				843	}
				844
				845	impl ClassUnicode {
				846	/// Returns true if this class has been negated.
				847	///
				848	/// Note that this takes the Unicode op into account, if it's present.
				849	/// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
				850	pub fn is_negated(&self) -> bool {
				851	match self.kind {
				852	ClassUnicodeKind::NamedValue {
				853	op: ClassUnicodeOpKind::NotEqual,
				854	..
				855	} => !self.negated,
				856	_ => self.negated,
				857	}
				858	}
				859	}
				860
				861	/// The available forms of Unicode character classes.
				862	#[derive(Clone, Debug, Eq, PartialEq)]
				863	pub enum ClassUnicodeKind {
				864	/// A one letter abbreviated class, e.g., `\pN`.
				865	OneLetter(char),
				866	/// A binary property, general category or script. The string may be
				867	/// empty.
				868	Named(String),
				869	/// A property name and an associated value.
				870	NamedValue {
				871	/// The type of Unicode op used to associate `name` with `value`.
				872	op: ClassUnicodeOpKind,
				873	/// The property name (which may be empty).
				874	name: String,
				875	/// The property value (which may be empty).
				876	value: String,
				877	},
				878	}
				879
				880	/// The type of op used in a Unicode character class.
				881	#[derive(Clone, Debug, Eq, PartialEq)]
				882	pub enum ClassUnicodeOpKind {
				883	/// A property set to a specific value, e.g., `\p{scx=Katakana}`.
				884	Equal,
				885	/// A property set to a specific value using a colon, e.g.,
				886	/// `\p{scx:Katakana}`.
				887	Colon,
				888	/// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
				889	NotEqual,
				890	}
				891
				892	impl ClassUnicodeOpKind {
				893	/// Whether the op is an equality op or not.
				894	pub fn is_equal(&self) -> bool {
				895	match *self {
				896	ClassUnicodeOpKind::Equal \| ClassUnicodeOpKind::Colon => true,
				897	_ => false,
				898	}
				899	}
				900	}
				901
				902	/// A bracketed character class, e.g., `[a-z0-9]`.
				903	#[derive(Clone, Debug, Eq, PartialEq)]
				904	pub struct ClassBracketed {
				905	/// The span of this class.
				906	pub span: Span,
				907	/// Whether this class is negated or not. e.g., `[a]` is not negated but
				908	/// `[^a]` is.
				909	pub negated: bool,
				910	/// The type of this set. A set is either a normal union of things, e.g.,
				911	/// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
				912	pub kind: ClassSet,
				913	}
				914
				915	/// A character class set.
				916	///
				917	/// This type corresponds to the internal structure of a bracketed character
				918	/// class. That is, every bracketed character is one of two types: a union of
				919	/// items (literals, ranges, other bracketed classes) or a tree of binary set
				920	/// operations.
				921	#[derive(Clone, Debug, Eq, PartialEq)]
				922	pub enum ClassSet {
				923	/// An item, which can be a single literal, range, nested character class
				924	/// or a union of items.
				925	Item(ClassSetItem),
				926	/// A single binary operation (i.e., &&, -- or ~~).
				927	BinaryOp(ClassSetBinaryOp),
				928	}
				929
				930	impl ClassSet {
				931	/// Build a set from a union.
				932	pub fn union(ast: ClassSetUnion) -> ClassSet {
				933	ClassSet::Item(ClassSetItem::Union(ast))
				934	}
				935
				936	/// Return the span of this character class set.
				937	pub fn span(&self) -> &Span {
				938	match *self {
				939	ClassSet::Item(ref x) => x.span(),
				940	ClassSet::BinaryOp(ref x) => &x.span,
				941	}
				942	}
				943
				944	/// Return true if and only if this class set is empty.
				945	fn is_empty(&self) -> bool {
				946	match *self {
				947	ClassSet::Item(ClassSetItem::Empty(_)) => true,
				948	_ => false,
				949	}
				950	}
				951	}
				952
				953	/// A single component of a character class set.
				954	#[derive(Clone, Debug, Eq, PartialEq)]
				955	pub enum ClassSetItem {
				956	/// An empty item.
				957	///
				958	/// Note that a bracketed character class cannot contain a single empty
				959	/// item. Empty items can appear when using one of the binary operators.
				960	/// For example, `[&&]` is the intersection of two empty classes.
				961	Empty(Span),
				962	/// A single literal.
				963	Literal(Literal),
				964	/// A range between two literals.
				965	Range(ClassSetRange),
				966	/// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
				967	Ascii(ClassAscii),
				968	/// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
				969	Unicode(ClassUnicode),
				970	/// A perl character class, e.g., `\d` or `\W`.
				971	Perl(ClassPerl),
				972	/// A bracketed character class set, which may contain zero or more
				973	/// character ranges and/or zero or more nested classes. e.g.,
				974	/// `[a-zA-Z\pL]`.
				975	Bracketed(Box<ClassBracketed>),
				976	/// A union of items.
				977	Union(ClassSetUnion),
				978	}
				979
				980	impl ClassSetItem {
				981	/// Return the span of this character class set item.
				982	pub fn span(&self) -> &Span {
				983	match *self {
				984	ClassSetItem::Empty(ref span) => span,
				985	ClassSetItem::Literal(ref x) => &x.span,
				986	ClassSetItem::Range(ref x) => &x.span,
				987	ClassSetItem::Ascii(ref x) => &x.span,
				988	ClassSetItem::Perl(ref x) => &x.span,
				989	ClassSetItem::Unicode(ref x) => &x.span,
				990	ClassSetItem::Bracketed(ref x) => &x.span,
				991	ClassSetItem::Union(ref x) => &x.span,
				992	}
				993	}
				994	}
				995
				996	/// A single character class range in a set.
				997	#[derive(Clone, Debug, Eq, PartialEq)]
				998	pub struct ClassSetRange {
				999	/// The span of this range.
				1000	pub span: Span,
				1001	/// The start of this range.
				1002	pub start: Literal,
				1003	/// The end of this range.
				1004	pub end: Literal,
				1005	}
				1006
				1007	impl ClassSetRange {
				1008	/// Returns true if and only if this character class range is valid.
				1009	///
				1010	/// The only case where a range is invalid is if its start is greater than
				1011	/// its end.
				1012	pub fn is_valid(&self) -> bool {
				1013	self.start.c <= self.end.c
				1014	}
				1015	}
				1016
				1017	/// A union of items inside a character class set.
				1018	#[derive(Clone, Debug, Eq, PartialEq)]
				1019	pub struct ClassSetUnion {
				1020	/// The span of the items in this operation. e.g., the `a-z0-9` in
				1021	/// `[^a-z0-9]`
				1022	pub span: Span,
				1023	/// The sequence of items that make up this union.
				1024	pub items: Vec<ClassSetItem>,
				1025	}
				1026
				1027	impl ClassSetUnion {
				1028	/// Push a new item in this union.
				1029	///
				1030	/// The ending position of this union's span is updated to the ending
				1031	/// position of the span of the item given. If the union is empty, then
				1032	/// the starting position of this union is set to the starting position
				1033	/// of this item.
				1034	///
				1035	/// In other words, if you only use this method to add items to a union
				1036	/// and you set the spans on each item correctly, then you should never
				1037	/// need to adjust the span of the union directly.
				1038	pub fn push(&mut self, item: ClassSetItem) {
				1039	if self.items.is_empty() {
				1040	self.span.start = item.span().start;
				1041	}
				1042	self.span.end = item.span().end;
				1043	self.items.push(item);
				1044	}
				1045
				1046	/// Return this union as a character class set item.
				1047	///
				1048	/// If this union contains zero items, then an empty union is
				1049	/// returned. If this concatenation contains exactly 1 item, then the
				1050	/// corresponding item is returned. Otherwise, ClassSetItem::Union is
				1051	/// returned.
				1052	pub fn into_item(mut self) -> ClassSetItem {
				1053	match self.items.len() {
				1054	0 => ClassSetItem::Empty(self.span),
				1055	1 => self.items.pop().unwrap(),
				1056	_ => ClassSetItem::Union(self),
				1057	}
				1058	}
				1059	}
				1060
				1061	/// A Unicode character class set operation.
				1062	#[derive(Clone, Debug, Eq, PartialEq)]
				1063	pub struct ClassSetBinaryOp {
				1064	/// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
				1065	pub span: Span,
				1066	/// The type of this set operation.
				1067	pub kind: ClassSetBinaryOpKind,
				1068	/// The left hand side of the operation.
				1069	pub lhs: Box<ClassSet>,
				1070	/// The right hand side of the operation.
				1071	pub rhs: Box<ClassSet>,
				1072	}
				1073
				1074	/// The type of a Unicode character class set operation.
				1075	///
				1076	/// Note that this doesn't explicitly represent union since there is no
				1077	/// explicit union operator. Concatenation inside a character class corresponds
				1078	/// to the union operation.
				1079	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
				1080	pub enum ClassSetBinaryOpKind {
				1081	/// The intersection of two sets, e.g., `\pN&&[a-z]`.
				1082	Intersection,
				1083	/// The difference of two sets, e.g., `\pN--[0-9]`.
				1084	Difference,
				1085	/// The symmetric difference of two sets. The symmetric difference is the
				1086	/// set of elements belonging to one but not both sets.
				1087	/// e.g., `[\pL~~[:ascii:]]`.
				1088	SymmetricDifference,
				1089	}
				1090
				1091	/// A single zero-width assertion.
				1092	#[derive(Clone, Debug, Eq, PartialEq)]
				1093	pub struct Assertion {
				1094	/// The span of this assertion.
				1095	pub span: Span,
				1096	/// The assertion kind, e.g., `\b` or `^`.
				1097	pub kind: AssertionKind,
				1098	}
				1099
				1100	/// An assertion kind.
				1101	#[derive(Clone, Debug, Eq, PartialEq)]
				1102	pub enum AssertionKind {
				1103	/// `^`
				1104	StartLine,
				1105	/// `$`
				1106	EndLine,
				1107	/// `\A`
				1108	StartText,
				1109	/// `\z`
				1110	EndText,
				1111	/// `\b`
				1112	WordBoundary,
				1113	/// `\B`
				1114	NotWordBoundary,
				1115	}
				1116
				1117	/// A repetition operation applied to a regular expression.
				1118	#[derive(Clone, Debug, Eq, PartialEq)]
				1119	pub struct Repetition {
				1120	/// The span of this operation.
				1121	pub span: Span,
				1122	/// The actual operation.
				1123	pub op: RepetitionOp,
				1124	/// Whether this operation was applied greedily or not.
				1125	pub greedy: bool,
				1126	/// The regular expression under repetition.
				1127	pub ast: Box<Ast>,
				1128	}
				1129
				1130	/// The repetition operator itself.
				1131	#[derive(Clone, Debug, Eq, PartialEq)]
				1132	pub struct RepetitionOp {
				1133	/// The span of this operator. This includes things like `+`, `*?` and
				1134	/// `{m,n}`.
				1135	pub span: Span,
				1136	/// The type of operation.
				1137	pub kind: RepetitionKind,
				1138	}
				1139
				1140	/// The kind of a repetition operator.
				1141	#[derive(Clone, Debug, Eq, PartialEq)]
				1142	pub enum RepetitionKind {
				1143	/// `?`
				1144	ZeroOrOne,
				1145	/// `*`
				1146	ZeroOrMore,
				1147	/// `+`
				1148	OneOrMore,
				1149	/// `{m,n}`
				1150	Range(RepetitionRange),
				1151	}
				1152
				1153	/// A range repetition operator.
				1154	#[derive(Clone, Debug, Eq, PartialEq)]
				1155	pub enum RepetitionRange {
				1156	/// `{m}`
				1157	Exactly(u32),
				1158	/// `{m,}`
				1159	AtLeast(u32),
				1160	/// `{m,n}`
				1161	Bounded(u32, u32),
				1162	}
				1163
				1164	impl RepetitionRange {
				1165	/// Returns true if and only if this repetition range is valid.
				1166	///
				1167	/// The only case where a repetition range is invalid is if it is bounded
				1168	/// and its start is greater than its end.
				1169	pub fn is_valid(&self) -> bool {
				1170	match *self {
				1171	RepetitionRange::Bounded(s, e) if s > e => false,
				1172	_ => true,
				1173	}
				1174	}
				1175	}
				1176
				1177	/// A grouped regular expression.
				1178	///
				1179	/// This includes both capturing and non-capturing groups. This does not
				1180	/// include flag-only groups like `(?is)`, but does contain any group that
				1181	/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
				1182	/// `(?is:a)`.
				1183	#[derive(Clone, Debug, Eq, PartialEq)]
				1184	pub struct Group {
				1185	/// The span of this group.
				1186	pub span: Span,
				1187	/// The kind of this group.
				1188	pub kind: GroupKind,
				1189	/// The regular expression in this group.
				1190	pub ast: Box<Ast>,
				1191	}
				1192
				1193	impl Group {
				1194	/// If this group is non-capturing, then this returns the (possibly empty)
				1195	/// set of flags. Otherwise, `None` is returned.
				1196	pub fn flags(&self) -> Option<&Flags> {
				1197	match self.kind {
				1198	GroupKind::NonCapturing(ref flags) => Some(flags),
				1199	_ => None,
				1200	}
				1201	}
				1202
				1203	/// Returns true if and only if this group is capturing.
				1204	pub fn is_capturing(&self) -> bool {
				1205	match self.kind {
				1206	GroupKind::CaptureIndex(_) \| GroupKind::CaptureName(_) => true,
				1207	GroupKind::NonCapturing(_) => false,
				1208	}
				1209	}
				1210
				1211	/// Returns the capture index of this group, if this is a capturing group.
				1212	///
				1213	/// This returns a capture index precisely when `is_capturing` is `true`.
				1214	pub fn capture_index(&self) -> Option<u32> {
				1215	match self.kind {
				1216	GroupKind::CaptureIndex(i) => Some(i),
				1217	GroupKind::CaptureName(ref x) => Some(x.index),
				1218	GroupKind::NonCapturing(_) => None,
				1219	}
				1220	}
				1221	}
				1222
				1223	/// The kind of a group.
				1224	#[derive(Clone, Debug, Eq, PartialEq)]
				1225	pub enum GroupKind {
				1226	/// `(a)`
				1227	CaptureIndex(u32),
				1228	/// `(?P<name>a)`
				1229	CaptureName(CaptureName),
				1230	/// `(?:a)` and `(?i:a)`
				1231	NonCapturing(Flags),
				1232	}
				1233
				1234	/// A capture name.
				1235	///
				1236	/// This corresponds to the name itself between the angle brackets in, e.g.,
				1237	/// `(?P<foo>expr)`.
				1238	#[derive(Clone, Debug, Eq, PartialEq)]
				1239	pub struct CaptureName {
				1240	/// The span of this capture name.
				1241	pub span: Span,
				1242	/// The capture name.
				1243	pub name: String,
				1244	/// The capture index.
				1245	pub index: u32,
				1246	}
				1247
				1248	/// A group of flags that is not applied to a particular regular expression.
				1249	#[derive(Clone, Debug, Eq, PartialEq)]
				1250	pub struct SetFlags {
				1251	/// The span of these flags, including the grouping parentheses.
				1252	pub span: Span,
				1253	/// The actual sequence of flags.
				1254	pub flags: Flags,
				1255	}
				1256
				1257	/// A group of flags.
				1258	///
				1259	/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
				1260	#[derive(Clone, Debug, Eq, PartialEq)]
				1261	pub struct Flags {
				1262	/// The span of this group of flags.
				1263	pub span: Span,
				1264	/// A sequence of flag items. Each item is either a flag or a negation
				1265	/// operator.
				1266	pub items: Vec<FlagsItem>,
				1267	}
				1268
				1269	impl Flags {
				1270	/// Add the given item to this sequence of flags.
				1271	///
				1272	/// If the item was added successfully, then `None` is returned. If the
				1273	/// given item is a duplicate, then `Some(i)` is returned, where
				1274	/// `items[i].kind == item.kind`.
				1275	pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
				1276	for (i, x) in self.items.iter().enumerate() {
				1277	if x.kind == item.kind {
				1278	return Some(i);
				1279	}
				1280	}
				1281	self.items.push(item);
				1282	None
				1283	}
				1284
				1285	/// Returns the state of the given flag in this set.
				1286	///
				1287	/// If the given flag is in the set but is negated, then `Some(false)` is
				1288	/// returned.
				1289	///
				1290	/// If the given flag is in the set and is not negated, then `Some(true)`
				1291	/// is returned.
				1292	///
				1293	/// Otherwise, `None` is returned.
				1294	pub fn flag_state(&self, flag: Flag) -> Option<bool> {
				1295	let mut negated = false;
				1296	for x in &self.items {
				1297	match x.kind {
				1298	FlagsItemKind::Negation => {
				1299	negated = true;
				1300	}
				1301	FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
				1302	return Some(!negated);
				1303	}
				1304	_ => {}
				1305	}
				1306	}
				1307	None
				1308	}
				1309	}
				1310
				1311	/// A single item in a group of flags.
				1312	#[derive(Clone, Debug, Eq, PartialEq)]
				1313	pub struct FlagsItem {
				1314	/// The span of this item.
				1315	pub span: Span,
				1316	/// The kind of this item.
				1317	pub kind: FlagsItemKind,
				1318	}
				1319
				1320	/// The kind of an item in a group of flags.
				1321	#[derive(Clone, Debug, Eq, PartialEq)]
				1322	pub enum FlagsItemKind {
				1323	/// A negation operator applied to all subsequent flags in the enclosing
				1324	/// group.
				1325	Negation,
				1326	/// A single flag in a group.
				1327	Flag(Flag),
				1328	}
				1329
				1330	impl FlagsItemKind {
				1331	/// Returns true if and only if this item is a negation operator.
				1332	pub fn is_negation(&self) -> bool {
				1333	match *self {
				1334	FlagsItemKind::Negation => true,
				1335	_ => false,
				1336	}
				1337	}
				1338	}
				1339
				1340	/// A single flag.
				1341	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
				1342	pub enum Flag {
				1343	/// `i`
				1344	CaseInsensitive,
				1345	/// `m`
				1346	MultiLine,
				1347	/// `s`
				1348	DotMatchesNewLine,
				1349	/// `U`
				1350	SwapGreed,
				1351	/// `u`
				1352	Unicode,
				1353	/// `x`
				1354	IgnoreWhitespace,
				1355	}
				1356
				1357	/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
				1358	/// space but heap space proportional to the depth of the `Ast`.
				1359	impl Drop for Ast {
				1360	fn drop(&mut self) {
				1361	use std::mem;
				1362
				1363	match *self {
				1364	Ast::Empty(_)
				1365	\| Ast::Flags(_)
				1366	\| Ast::Literal(_)
				1367	\| Ast::Dot(_)
				1368	\| Ast::Assertion(_)
				1369	// Classes are recursive, so they get their own Drop impl.
				1370	\| Ast::Class(_) => return,
				1371	Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
				1372	Ast::Group(ref x) if !x.ast.has_subexprs() => return,
				1373	Ast::Alternation(ref x) if x.asts.is_empty() => return,
				1374	Ast::Concat(ref x) if x.asts.is_empty() => return,
				1375	_ => {}
				1376	}
				1377
				1378	let empty_span = \|\| Span::splat(Position::new(0, 0, 0));
				1379	let empty_ast = \|\| Ast::Empty(empty_span());
				1380	let mut stack = vec![mem::replace(self, empty_ast())];
				1381	while let Some(mut ast) = stack.pop() {
				1382	match ast {
				1383	Ast::Empty(_)
				1384	\| Ast::Flags(_)
				1385	\| Ast::Literal(_)
				1386	\| Ast::Dot(_)
				1387	\| Ast::Assertion(_)
				1388	// Classes are recursive, so they get their own Drop impl.
				1389	\| Ast::Class(_) => {}
				1390	Ast::Repetition(ref mut x) => {
				1391	stack.push(mem::replace(&mut x.ast, empty_ast()));
				1392	}
				1393	Ast::Group(ref mut x) => {
				1394	stack.push(mem::replace(&mut x.ast, empty_ast()));
				1395	}
				1396	Ast::Alternation(ref mut x) => {
				1397	stack.extend(x.asts.drain(..));
				1398	}
				1399	Ast::Concat(ref mut x) => {
				1400	stack.extend(x.asts.drain(..));
				1401	}
				1402	}
				1403	}
				1404	}
				1405	}
				1406
				1407	/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
				1408	/// stack space but heap space proportional to the depth of the `ClassSet`.
				1409	impl Drop for ClassSet {
				1410	fn drop(&mut self) {
				1411	use std::mem;
				1412
				1413	match *self {
				1414	ClassSet::Item(ref item) => match *item {
				1415	ClassSetItem::Empty(_)
				1416	\| ClassSetItem::Literal(_)
				1417	\| ClassSetItem::Range(_)
				1418	\| ClassSetItem::Ascii(_)
				1419	\| ClassSetItem::Unicode(_)
				1420	\| ClassSetItem::Perl(_) => return,
				1421	ClassSetItem::Bracketed(ref x) => {
				1422	if x.kind.is_empty() {
				1423	return;
				1424	}
				1425	}
				1426	ClassSetItem::Union(ref x) => {
				1427	if x.items.is_empty() {
				1428	return;
				1429	}
				1430	}
				1431	},
				1432	ClassSet::BinaryOp(ref op) => {
				1433	if op.lhs.is_empty() && op.rhs.is_empty() {
				1434	return;
				1435	}
				1436	}
				1437	}
				1438
				1439	let empty_span = \|\| Span::splat(Position::new(0, 0, 0));
				1440	let empty_set = \|\| ClassSet::Item(ClassSetItem::Empty(empty_span()));
				1441	let mut stack = vec![mem::replace(self, empty_set())];
				1442	while let Some(mut set) = stack.pop() {
				1443	match set {
				1444	ClassSet::Item(ref mut item) => match *item {
				1445	ClassSetItem::Empty(_)
				1446	\| ClassSetItem::Literal(_)
				1447	\| ClassSetItem::Range(_)
				1448	\| ClassSetItem::Ascii(_)
				1449	\| ClassSetItem::Unicode(_)
				1450	\| ClassSetItem::Perl(_) => {}
				1451	ClassSetItem::Bracketed(ref mut x) => {
				1452	stack.push(mem::replace(&mut x.kind, empty_set()));
				1453	}
				1454	ClassSetItem::Union(ref mut x) => {
				1455	stack.extend(x.items.drain(..).map(ClassSet::Item));
				1456	}
				1457	},
				1458	ClassSet::BinaryOp(ref mut op) => {
				1459	stack.push(mem::replace(&mut op.lhs, empty_set()));
				1460	stack.push(mem::replace(&mut op.rhs, empty_set()));
				1461	}
				1462	}
				1463	}
				1464	}
				1465	}
				1466
				1467	#[cfg(test)]
				1468	mod tests {
				1469	use super::*;
				1470
				1471	// We use a thread with an explicit stack size to test that our destructor
				1472	// for Ast can handle arbitrarily sized expressions in constant stack
				1473	// space. In case we run on a platform without threads (WASM?), we limit
				1474	// this test to Windows/Unix.
				1475	#[test]
				1476	#[cfg(any(unix, windows))]
				1477	fn no_stack_overflow_on_drop() {
				1478	use std::thread;
				1479
				1480	let run = \|\| {
				1481	let span = \|\| Span::splat(Position::new(0, 0, 0));
				1482	let mut ast = Ast::Empty(span());
				1483	for i in 0..200 {
				1484	ast = Ast::Group(Group {
				1485	span: span(),
				1486	kind: GroupKind::CaptureIndex(i),
				1487	ast: Box::new(ast),
				1488	});
				1489	}
				1490	assert!(!ast.is_empty());
				1491	};
				1492
				1493	// We run our test on a thread with a small stack size so we can
				1494	// force the issue more easily.
				1495	thread::Builder::new()
				1496	.stack_size(1 << 10)
				1497	.spawn(run)
				1498	.unwrap()
				1499	.join()
				1500	.unwrap();
				1501	}
				1502	}