Blame - src/ast/parse.rs - platform/external/rust/crates/regex-syntax

blob: 55c5f79898dc4976caddfd7f6358aa4315190f9a [file] [log] [blame]

Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	1	/*!
				2	This module provides a regular expression parser.
				3	*/
				4
				5	use std::borrow::Borrow;
				6	use std::cell::{Cell, RefCell};
				7	use std::mem;
				8	use std::result;
				9
				10	use ast::{self, Ast, Position, Span};
				11	use either::Either;
				12
				13	use is_meta_character;
				14
				15	type Result<T> = result::Result<T, ast::Error>;
				16
				17	/// A primitive is an expression with no sub-expressions. This includes
				18	/// literals, assertions and non-set character classes. This representation
				19	/// is used as intermediate state in the parser.
				20	///
				21	/// This does not include ASCII character classes, since they can only appear
				22	/// within a set character class.
				23	#[derive(Clone, Debug, Eq, PartialEq)]
				24	enum Primitive {
				25	Literal(ast::Literal),
				26	Assertion(ast::Assertion),
				27	Dot(Span),
				28	Perl(ast::ClassPerl),
				29	Unicode(ast::ClassUnicode),
				30	}
				31
				32	impl Primitive {
				33	/// Return the span of this primitive.
				34	fn span(&self) -> &Span {
				35	match *self {
				36	Primitive::Literal(ref x) => &x.span,
				37	Primitive::Assertion(ref x) => &x.span,
				38	Primitive::Dot(ref span) => span,
				39	Primitive::Perl(ref x) => &x.span,
				40	Primitive::Unicode(ref x) => &x.span,
				41	}
				42	}
				43
				44	/// Convert this primitive into a proper AST.
				45	fn into_ast(self) -> Ast {
				46	match self {
				47	Primitive::Literal(lit) => Ast::Literal(lit),
				48	Primitive::Assertion(assert) => Ast::Assertion(assert),
				49	Primitive::Dot(span) => Ast::Dot(span),
				50	Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
				51	Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
				52	}
				53	}
				54
				55	/// Convert this primitive into an item in a character class.
				56	///
				57	/// If this primitive is not a legal item (i.e., an assertion or a dot),
				58	/// then return an error.
				59	fn into_class_set_item<P: Borrow<Parser>>(
				60	self,
				61	p: &ParserI<P>,
				62	) -> Result<ast::ClassSetItem> {
				63	use self::Primitive::*;
				64	use ast::ClassSetItem;
				65
				66	match self {
				67	Literal(lit) => Ok(ClassSetItem::Literal(lit)),
				68	Perl(cls) => Ok(ClassSetItem::Perl(cls)),
				69	Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
				70	x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
				71	}
				72	}
				73
				74	/// Convert this primitive into a literal in a character class. In
				75	/// particular, literals are the only valid items that can appear in
				76	/// ranges.
				77	///
				78	/// If this primitive is not a legal item (i.e., a class, assertion or a
				79	/// dot), then return an error.
				80	fn into_class_literal<P: Borrow<Parser>>(
				81	self,
				82	p: &ParserI<P>,
				83	) -> Result<ast::Literal> {
				84	use self::Primitive::*;
				85
				86	match self {
				87	Literal(lit) => Ok(lit),
				88	x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
				89	}
				90	}
				91	}
				92
				93	/// Returns true if the given character is a hexadecimal digit.
				94	fn is_hex(c: char) -> bool {
				95	('0' <= c && c <= '9') \|\| ('a' <= c && c <= 'f') \|\| ('A' <= c && c <= 'F')
				96	}
				97
				98	/// Returns true if the given character is a valid in a capture group name.
				99	///
				100	/// If `first` is true, then `c` is treated as the first character in the
Chih-Hung Hsieh	31dfd7f	2020-10-26 13:16:58 -0700	[diff] [blame]	101	/// group name (which must be alphabetic or underscore).
Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	102	fn is_capture_char(c: char, first: bool) -> bool {
				103	c == '_'
Chih-Hung Hsieh	31dfd7f	2020-10-26 13:16:58 -0700	[diff] [blame]	104	\|\| (!first
				105	&& (('0' <= c && c <= '9') \|\| c == '.' \|\| c == '[' \|\| c == ']'))
				106	\|\| ('A' <= c && c <= 'Z')
				107	\|\| ('a' <= c && c <= 'z')
Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	108	}
				109
				110	/// A builder for a regular expression parser.
				111	///
				112	/// This builder permits modifying configuration options for the parser.
				113	#[derive(Clone, Debug)]
				114	pub struct ParserBuilder {
				115	ignore_whitespace: bool,
				116	nest_limit: u32,
				117	octal: bool,
				118	}
				119
				120	impl Default for ParserBuilder {
				121	fn default() -> ParserBuilder {
				122	ParserBuilder::new()
				123	}
				124	}
				125
				126	impl ParserBuilder {
				127	/// Create a new parser builder with a default configuration.
				128	pub fn new() -> ParserBuilder {
				129	ParserBuilder {
				130	ignore_whitespace: false,
				131	nest_limit: 250,
				132	octal: false,
				133	}
				134	}
				135
				136	/// Build a parser from this configuration with the given pattern.
				137	pub fn build(&self) -> Parser {
				138	Parser {
				139	pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
				140	capture_index: Cell::new(0),
				141	nest_limit: self.nest_limit,
				142	octal: self.octal,
				143	initial_ignore_whitespace: self.ignore_whitespace,
				144	ignore_whitespace: Cell::new(self.ignore_whitespace),
				145	comments: RefCell::new(vec![]),
				146	stack_group: RefCell::new(vec![]),
				147	stack_class: RefCell::new(vec![]),
				148	capture_names: RefCell::new(vec![]),
				149	scratch: RefCell::new(String::new()),
				150	}
				151	}
				152
				153	/// Set the nesting limit for this parser.
				154	///
				155	/// The nesting limit controls how deep the abstract syntax tree is allowed
				156	/// to be. If the AST exceeds the given limit (e.g., with too many nested
				157	/// groups), then an error is returned by the parser.
				158	///
				159	/// The purpose of this limit is to act as a heuristic to prevent stack
				160	/// overflow for consumers that do structural induction on an `Ast` using
				161	/// explicit recursion. While this crate never does this (instead using
				162	/// constant stack space and moving the call stack to the heap), other
				163	/// crates may.
				164	///
				165	/// This limit is not checked until the entire Ast is parsed. Therefore,
				166	/// if callers want to put a limit on the amount of heap space used, then
				167	/// they should impose a limit on the length, in bytes, of the concrete
				168	/// pattern string. In particular, this is viable since this parser
				169	/// implementation will limit itself to heap space proportional to the
				170	/// lenth of the pattern string.
				171	///
				172	/// Note that a nest limit of `0` will return a nest limit error for most
				173	/// patterns but not all. For example, a nest limit of `0` permits `a` but
				174	/// not `ab`, since `ab` requires a concatenation, which results in a nest
				175	/// depth of `1`. In general, a nest limit is not something that manifests
				176	/// in an obvious way in the concrete syntax, therefore, it should not be
				177	/// used in a granular way.
				178	pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
				179	self.nest_limit = limit;
				180	self
				181	}
				182
				183	/// Whether to support octal syntax or not.
				184	///
				185	/// Octal syntax is a little-known way of uttering Unicode codepoints in
				186	/// a regular expression. For example, `a`, `\x61`, `\u0061` and
				187	/// `\141` are all equivalent regular expressions, where the last example
				188	/// shows octal syntax.
				189	///
				190	/// While supporting octal syntax isn't in and of itself a problem, it does
				191	/// make good error messages harder. That is, in PCRE based regex engines,
				192	/// syntax like `\0` invokes a backreference, which is explicitly
				193	/// unsupported in Rust's regex engine. However, many users expect it to
				194	/// be supported. Therefore, when octal support is disabled, the error
				195	/// message will explicitly mention that backreferences aren't supported.
				196	///
				197	/// Octal syntax is disabled by default.
				198	pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
				199	self.octal = yes;
				200	self
				201	}
				202
				203	/// Enable verbose mode in the regular expression.
				204	///
				205	/// When enabled, verbose mode permits insigificant whitespace in many
				206	/// places in the regular expression, as well as comments. Comments are
				207	/// started using `#` and continue until the end of the line.
				208	///
				209	/// By default, this is disabled. It may be selectively enabled in the
				210	/// regular expression by using the `x` flag regardless of this setting.
				211	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
				212	self.ignore_whitespace = yes;
				213	self
				214	}
				215	}
				216
				217	/// A regular expression parser.
				218	///
				219	/// This parses a string representation of a regular expression into an
				220	/// abstract syntax tree. The size of the tree is proportional to the length
				221	/// of the regular expression pattern.
				222	///
				223	/// A `Parser` can be configured in more detail via a
				224	/// [`ParserBuilder`](struct.ParserBuilder.html).
				225	#[derive(Clone, Debug)]
				226	pub struct Parser {
				227	/// The current position of the parser.
				228	pos: Cell<Position>,
				229	/// The current capture index.
				230	capture_index: Cell<u32>,
				231	/// The maximum number of open parens/brackets allowed. If the parser
				232	/// exceeds this number, then an error is returned.
				233	nest_limit: u32,
				234	/// Whether to support octal syntax or not. When `false`, the parser will
				235	/// return an error helpfully pointing out that backreferences are not
				236	/// supported.
				237	octal: bool,
				238	/// The initial setting for `ignore_whitespace` as provided by
				239	/// Th`ParserBuilder`. is is used when reseting the parser's state.
				240	initial_ignore_whitespace: bool,
				241	/// Whether whitespace should be ignored. When enabled, comments are
				242	/// also permitted.
				243	ignore_whitespace: Cell<bool>,
				244	/// A list of comments, in order of appearance.
				245	comments: RefCell<Vec<ast::Comment>>,
				246	/// A stack of grouped sub-expressions, including alternations.
				247	stack_group: RefCell<Vec<GroupState>>,
				248	/// A stack of nested character classes. This is only non-empty when
				249	/// parsing a class.
				250	stack_class: RefCell<Vec<ClassState>>,
				251	/// A sorted sequence of capture names. This is used to detect duplicate
				252	/// capture names and report an error if one is detected.
				253	capture_names: RefCell<Vec<ast::CaptureName>>,
				254	/// A scratch buffer used in various places. Mostly this is used to
				255	/// accumulate relevant characters from parts of a pattern.
				256	scratch: RefCell<String>,
				257	}
				258
				259	/// ParserI is the internal parser implementation.
				260	///
				261	/// We use this separate type so that we can carry the provided pattern string
				262	/// along with us. In particular, a `Parser` internal state is not tied to any
				263	/// one pattern, but `ParserI` is.
				264	///
				265	/// This type also lets us use `ParserI<&Parser>` in production code while
				266	/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
				267	/// work against the internal interface of the parser.
				268	#[derive(Clone, Debug)]
				269	struct ParserI<'s, P> {
				270	/// The parser state/configuration.
				271	parser: P,
				272	/// The full regular expression provided by the user.
				273	pattern: &'s str,
				274	}
				275
				276	/// GroupState represents a single stack frame while parsing nested groups
				277	/// and alternations. Each frame records the state up to an opening parenthesis
				278	/// or a alternating bracket `\|`.
				279	#[derive(Clone, Debug)]
				280	enum GroupState {
				281	/// This state is pushed whenever an opening group is found.
				282	Group {
				283	/// The concatenation immediately preceding the opening group.
				284	concat: ast::Concat,
				285	/// The group that has been opened. Its sub-AST is always empty.
				286	group: ast::Group,
				287	/// Whether this group has the `x` flag enabled or not.
				288	ignore_whitespace: bool,
				289	},
				290	/// This state is pushed whenever a new alternation branch is found. If
				291	/// an alternation branch is found and this state is at the top of the
				292	/// stack, then this state should be modified to include the new
				293	/// alternation.
				294	Alternation(ast::Alternation),
				295	}
				296
				297	/// ClassState represents a single stack frame while parsing character classes.
				298	/// Each frame records the state up to an intersection, difference, symmetric
				299	/// difference or nested class.
				300	///
				301	/// Note that a parser's character class stack is only non-empty when parsing
				302	/// a character class. In all other cases, it is empty.
				303	#[derive(Clone, Debug)]
				304	enum ClassState {
				305	/// This state is pushed whenever an opening bracket is found.
				306	Open {
				307	/// The union of class items immediately preceding this class.
				308	union: ast::ClassSetUnion,
				309	/// The class that has been opened. Typically this just corresponds
				310	/// to the `[`, but it can also include `[^` since `^` indicates
				311	/// negation of the class.
				312	set: ast::ClassBracketed,
				313	},
				314	/// This state is pushed when a operator is seen. When popped, the stored
				315	/// set becomes the left hand side of the operator.
				316	Op {
				317	/// The type of the operation, i.e., &&, -- or ~~.
				318	kind: ast::ClassSetBinaryOpKind,
				319	/// The left-hand side of the operator.
				320	lhs: ast::ClassSet,
				321	},
				322	}
				323
				324	impl Parser {
				325	/// Create a new parser with a default configuration.
				326	///
				327	/// The parser can be run with either the `parse` or `parse_with_comments`
				328	/// methods. The parse methods return an abstract syntax tree.
				329	///
				330	/// To set configuration options on the parser, use
				331	/// [`ParserBuilder`](struct.ParserBuilder.html).
				332	pub fn new() -> Parser {
				333	ParserBuilder::new().build()
				334	}
				335
				336	/// Parse the regular expression into an abstract syntax tree.
				337	pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
				338	ParserI::new(self, pattern).parse()
				339	}
				340
				341	/// Parse the regular expression and return an abstract syntax tree with
				342	/// all of the comments found in the pattern.
				343	pub fn parse_with_comments(
				344	&mut self,
				345	pattern: &str,
				346	) -> Result<ast::WithComments> {
				347	ParserI::new(self, pattern).parse_with_comments()
				348	}
				349
				350	/// Reset the internal state of a parser.
				351	///
				352	/// This is called at the beginning of every parse. This prevents the
				353	/// parser from running with inconsistent state (say, if a previous
				354	/// invocation returned an error and the parser is reused).
				355	fn reset(&self) {
				356	// These settings should be in line with the construction
				357	// in `ParserBuilder::build`.
				358	self.pos.set(Position { offset: 0, line: 1, column: 1 });
				359	self.ignore_whitespace.set(self.initial_ignore_whitespace);
				360	self.comments.borrow_mut().clear();
				361	self.stack_group.borrow_mut().clear();
				362	self.stack_class.borrow_mut().clear();
				363	}
				364	}
				365
				366	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
				367	/// Build an internal parser from a parser configuration and a pattern.
				368	fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
				369	ParserI { parser: parser, pattern: pattern }
				370	}
				371
				372	/// Return a reference to the parser state.
				373	fn parser(&self) -> &Parser {
				374	self.parser.borrow()
				375	}
				376
				377	/// Return a reference to the pattern being parsed.
				378	fn pattern(&self) -> &str {
				379	self.pattern.borrow()
				380	}
				381
				382	/// Create a new error with the given span and error type.
				383	fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
				384	ast::Error {
				385	kind: kind,
				386	pattern: self.pattern().to_string(),
				387	span: span,
				388	}
				389	}
				390
				391	/// Return the current offset of the parser.
				392	///
				393	/// The offset starts at `0` from the beginning of the regular expression
				394	/// pattern string.
				395	fn offset(&self) -> usize {
				396	self.parser().pos.get().offset
				397	}
				398
				399	/// Return the current line number of the parser.
				400	///
				401	/// The line number starts at `1`.
				402	fn line(&self) -> usize {
				403	self.parser().pos.get().line
				404	}
				405
				406	/// Return the current column of the parser.
				407	///
				408	/// The column number starts at `1` and is reset whenever a `\n` is seen.
				409	fn column(&self) -> usize {
				410	self.parser().pos.get().column
				411	}
				412
				413	/// Return the next capturing index. Each subsequent call increments the
				414	/// internal index.
				415	///
				416	/// The span given should correspond to the location of the opening
				417	/// parenthesis.
				418	///
				419	/// If the capture limit is exceeded, then an error is returned.
				420	fn next_capture_index(&self, span: Span) -> Result<u32> {
				421	let current = self.parser().capture_index.get();
				422	let i = current.checked_add(1).ok_or_else(\|\| {
				423	self.error(span, ast::ErrorKind::CaptureLimitExceeded)
				424	})?;
				425	self.parser().capture_index.set(i);
				426	Ok(i)
				427	}
				428
				429	/// Adds the given capture name to this parser. If this capture name has
				430	/// already been used, then an error is returned.
				431	fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
				432	let mut names = self.parser().capture_names.borrow_mut();
				433	match names
				434	.binary_search_by_key(&cap.name.as_str(), \|c\| c.name.as_str())
				435	{
				436	Err(i) => {
				437	names.insert(i, cap.clone());
				438	Ok(())
				439	}
				440	Ok(i) => Err(self.error(
				441	cap.span,
				442	ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
				443	)),
				444	}
				445	}
				446
				447	/// Return whether the parser should ignore whitespace or not.
				448	fn ignore_whitespace(&self) -> bool {
				449	self.parser().ignore_whitespace.get()
				450	}
				451
				452	/// Return the character at the current position of the parser.
				453	///
				454	/// This panics if the current position does not point to a valid char.
				455	fn char(&self) -> char {
				456	self.char_at(self.offset())
				457	}
				458
				459	/// Return the character at the given position.
				460	///
				461	/// This panics if the given position does not point to a valid char.
				462	fn char_at(&self, i: usize) -> char {
				463	self.pattern()[i..]
				464	.chars()
				465	.next()
				466	.unwrap_or_else(\|\| panic!("expected char at offset {}", i))
				467	}
				468
				469	/// Bump the parser to the next Unicode scalar value.
				470	///
				471	/// If the end of the input has been reached, then `false` is returned.
				472	fn bump(&self) -> bool {
				473	if self.is_eof() {
				474	return false;
				475	}
				476	let Position { mut offset, mut line, mut column } = self.pos();
				477	if self.char() == '\n' {
				478	line = line.checked_add(1).unwrap();
				479	column = 1;
				480	} else {
				481	column = column.checked_add(1).unwrap();
				482	}
				483	offset += self.char().len_utf8();
				484	self.parser().pos.set(Position {
				485	offset: offset,
				486	line: line,
				487	column: column,
				488	});
				489	self.pattern()[self.offset()..].chars().next().is_some()
				490	}
				491
				492	/// If the substring starting at the current position of the parser has
				493	/// the given prefix, then bump the parser to the character immediately
				494	/// following the prefix and return true. Otherwise, don't bump the parser
				495	/// and return false.
				496	fn bump_if(&self, prefix: &str) -> bool {
				497	if self.pattern()[self.offset()..].starts_with(prefix) {
				498	for _ in 0..prefix.chars().count() {
				499	self.bump();
				500	}
				501	true
				502	} else {
				503	false
				504	}
				505	}
				506
				507	/// Returns true if and only if the parser is positioned at a look-around
				508	/// prefix. The conditions under which this returns true must always
				509	/// correspond to a regular expression that would otherwise be consider
				510	/// invalid.
				511	///
				512	/// This should only be called immediately after parsing the opening of
				513	/// a group or a set of flags.
				514	fn is_lookaround_prefix(&self) -> bool {
				515	self.bump_if("?=")
				516	\|\| self.bump_if("?!")
				517	\|\| self.bump_if("?<=")
				518	\|\| self.bump_if("?<!")
				519	}
				520
				521	/// Bump the parser, and if the `x` flag is enabled, bump through any
				522	/// subsequent spaces. Return true if and only if the parser is not at
				523	/// EOF.
				524	fn bump_and_bump_space(&self) -> bool {
				525	if !self.bump() {
				526	return false;
				527	}
				528	self.bump_space();
				529	!self.is_eof()
				530	}
				531
				532	/// If the `x` flag is enabled (i.e., whitespace insensitivity with
				533	/// comments), then this will advance the parser through all whitespace
				534	/// and comments to the next non-whitespace non-comment byte.
				535	///
				536	/// If the `x` flag is disabled, then this is a no-op.
				537	///
				538	/// This should be used selectively throughout the parser where
				539	/// arbitrary whitespace is permitted when the `x` flag is enabled. For
				540	/// example, `{ 5 , 6}` is equivalent to `{5,6}`.
				541	fn bump_space(&self) {
				542	if !self.ignore_whitespace() {
				543	return;
				544	}
				545	while !self.is_eof() {
				546	if self.char().is_whitespace() {
				547	self.bump();
				548	} else if self.char() == '#' {
				549	let start = self.pos();
				550	let mut comment_text = String::new();
				551	self.bump();
				552	while !self.is_eof() {
				553	let c = self.char();
				554	self.bump();
				555	if c == '\n' {
				556	break;
				557	}
				558	comment_text.push(c);
				559	}
				560	let comment = ast::Comment {
				561	span: Span::new(start, self.pos()),
				562	comment: comment_text,
				563	};
				564	self.parser().comments.borrow_mut().push(comment);
				565	} else {
				566	break;
				567	}
				568	}
				569	}
				570
				571	/// Peek at the next character in the input without advancing the parser.
				572	///
				573	/// If the input has been exhausted, then this returns `None`.
				574	fn peek(&self) -> Option<char> {
				575	if self.is_eof() {
				576	return None;
				577	}
				578	self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
				579	}
				580
				581	/// Like peek, but will ignore spaces when the parser is in whitespace
				582	/// insensitive mode.
				583	fn peek_space(&self) -> Option<char> {
				584	if !self.ignore_whitespace() {
				585	return self.peek();
				586	}
				587	if self.is_eof() {
				588	return None;
				589	}
				590	let mut start = self.offset() + self.char().len_utf8();
				591	let mut in_comment = false;
				592	for (i, c) in self.pattern()[start..].char_indices() {
				593	if c.is_whitespace() {
				594	continue;
				595	} else if !in_comment && c == '#' {
				596	in_comment = true;
				597	} else if in_comment && c == '\n' {
				598	in_comment = false;
				599	} else {
				600	start += i;
				601	break;
				602	}
				603	}
				604	self.pattern()[start..].chars().next()
				605	}
				606
				607	/// Returns true if the next call to `bump` would return false.
				608	fn is_eof(&self) -> bool {
				609	self.offset() == self.pattern().len()
				610	}
				611
				612	/// Return the current position of the parser, which includes the offset,
				613	/// line and column.
				614	fn pos(&self) -> Position {
				615	self.parser().pos.get()
				616	}
				617
				618	/// Create a span at the current position of the parser. Both the start
				619	/// and end of the span are set.
				620	fn span(&self) -> Span {
				621	Span::splat(self.pos())
				622	}
				623
				624	/// Create a span that covers the current character.
				625	fn span_char(&self) -> Span {
				626	let mut next = Position {
				627	offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
				628	line: self.line(),
				629	column: self.column().checked_add(1).unwrap(),
				630	};
				631	if self.char() == '\n' {
				632	next.line += 1;
				633	next.column = 1;
				634	}
				635	Span::new(self.pos(), next)
				636	}
				637
				638	/// Parse and push a single alternation on to the parser's internal stack.
				639	/// If the top of the stack already has an alternation, then add to that
				640	/// instead of pushing a new one.
				641	///
				642	/// The concatenation given corresponds to a single alternation branch.
				643	/// The concatenation returned starts the next branch and is empty.
				644	///
				645	/// This assumes the parser is currently positioned at `\|` and will advance
				646	/// the parser to the character following `\|`.
				647	#[inline(never)]
				648	fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
				649	assert_eq!(self.char(), '\|');
				650	concat.span.end = self.pos();
				651	self.push_or_add_alternation(concat);
				652	self.bump();
				653	Ok(ast::Concat { span: self.span(), asts: vec![] })
				654	}
				655
				656	/// Pushes or adds the given branch of an alternation to the parser's
				657	/// internal stack of state.
				658	fn push_or_add_alternation(&self, concat: ast::Concat) {
				659	use self::GroupState::*;
				660
				661	let mut stack = self.parser().stack_group.borrow_mut();
				662	if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
				663	alts.asts.push(concat.into_ast());
				664	return;
				665	}
				666	stack.push(Alternation(ast::Alternation {
				667	span: Span::new(concat.span.start, self.pos()),
				668	asts: vec![concat.into_ast()],
				669	}));
				670	}
				671
				672	/// Parse and push a group AST (and its parent concatenation) on to the
				673	/// parser's internal stack. Return a fresh concatenation corresponding
				674	/// to the group's sub-AST.
				675	///
				676	/// If a set of flags was found (with no group), then the concatenation
				677	/// is returned with that set of flags added.
				678	///
				679	/// This assumes that the parser is currently positioned on the opening
				680	/// parenthesis. It advances the parser to the character at the start
				681	/// of the sub-expression (or adjoining expression).
				682	///
				683	/// If there was a problem parsing the start of the group, then an error
				684	/// is returned.
				685	#[inline(never)]
				686	fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
				687	assert_eq!(self.char(), '(');
				688	match self.parse_group()? {
				689	Either::Left(set) => {
				690	let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
				691	if let Some(v) = ignore {
				692	self.parser().ignore_whitespace.set(v);
				693	}
				694
				695	concat.asts.push(Ast::Flags(set));
				696	Ok(concat)
				697	}
				698	Either::Right(group) => {
				699	let old_ignore_whitespace = self.ignore_whitespace();
				700	let new_ignore_whitespace = group
				701	.flags()
				702	.and_then(\|f\| f.flag_state(ast::Flag::IgnoreWhitespace))
				703	.unwrap_or(old_ignore_whitespace);
				704	self.parser().stack_group.borrow_mut().push(
				705	GroupState::Group {
				706	concat: concat,
				707	group: group,
				708	ignore_whitespace: old_ignore_whitespace,
				709	},
				710	);
				711	self.parser().ignore_whitespace.set(new_ignore_whitespace);
				712	Ok(ast::Concat { span: self.span(), asts: vec![] })
				713	}
				714	}
				715	}
				716
				717	/// Pop a group AST from the parser's internal stack and set the group's
				718	/// AST to the given concatenation. Return the concatenation containing
				719	/// the group.
				720	///
				721	/// This assumes that the parser is currently positioned on the closing
				722	/// parenthesis and advances the parser to the character following the `)`.
				723	///
				724	/// If no such group could be popped, then an unopened group error is
				725	/// returned.
				726	#[inline(never)]
				727	fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
				728	use self::GroupState::*;
				729
				730	assert_eq!(self.char(), ')');
				731	let mut stack = self.parser().stack_group.borrow_mut();
				732	let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
				733	.pop()
				734	{
				735	Some(Group { concat, group, ignore_whitespace }) => {
				736	(concat, group, ignore_whitespace, None)
				737	}
				738	Some(Alternation(alt)) => match stack.pop() {
				739	Some(Group { concat, group, ignore_whitespace }) => {
				740	(concat, group, ignore_whitespace, Some(alt))
				741	}
				742	None \| Some(Alternation(_)) => {
				743	return Err(self.error(
				744	self.span_char(),
				745	ast::ErrorKind::GroupUnopened,
				746	));
				747	}
				748	},
				749	None => {
				750	return Err(self
				751	.error(self.span_char(), ast::ErrorKind::GroupUnopened));
				752	}
				753	};
				754	self.parser().ignore_whitespace.set(ignore_whitespace);
				755	group_concat.span.end = self.pos();
				756	self.bump();
				757	group.span.end = self.pos();
				758	match alt {
				759	Some(mut alt) => {
				760	alt.span.end = group_concat.span.end;
				761	alt.asts.push(group_concat.into_ast());
				762	group.ast = Box::new(alt.into_ast());
				763	}
				764	None => {
				765	group.ast = Box::new(group_concat.into_ast());
				766	}
				767	}
				768	prior_concat.asts.push(Ast::Group(group));
				769	Ok(prior_concat)
				770	}
				771
				772	/// Pop the last state from the parser's internal stack, if it exists, and
				773	/// add the given concatenation to it. There either must be no state or a
				774	/// single alternation item on the stack. Any other scenario produces an
				775	/// error.
				776	///
				777	/// This assumes that the parser has advanced to the end.
				778	#[inline(never)]
				779	fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
				780	concat.span.end = self.pos();
				781	let mut stack = self.parser().stack_group.borrow_mut();
				782	let ast = match stack.pop() {
				783	None => Ok(concat.into_ast()),
				784	Some(GroupState::Alternation(mut alt)) => {
				785	alt.span.end = self.pos();
				786	alt.asts.push(concat.into_ast());
				787	Ok(Ast::Alternation(alt))
				788	}
				789	Some(GroupState::Group { group, .. }) => {
				790	return Err(
				791	self.error(group.span, ast::ErrorKind::GroupUnclosed)
				792	);
				793	}
				794	};
				795	// If we try to pop again, there should be nothing.
				796	match stack.pop() {
				797	None => ast,
				798	Some(GroupState::Alternation(_)) => {
				799	// This unreachable is unfortunate. This case can't happen
				800	// because the only way we can be here is if there were two
				801	// `GroupState::Alternation`s adjacent in the parser's stack,
				802	// which we guarantee to never happen because we never push a
				803	// `GroupState::Alternation` if one is already at the top of
				804	// the stack.
				805	unreachable!()
				806	}
				807	Some(GroupState::Group { group, .. }) => {
				808	Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
				809	}
				810	}
				811	}
				812
				813	/// Parse the opening of a character class and push the current class
				814	/// parsing context onto the parser's stack. This assumes that the parser
				815	/// is positioned at an opening `[`. The given union should correspond to
				816	/// the union of set items built up before seeing the `[`.
				817	///
				818	/// If there was a problem parsing the opening of the class, then an error
				819	/// is returned. Otherwise, a new union of set items for the class is
				820	/// returned (which may be populated with either a `]` or a `-`).
				821	#[inline(never)]
				822	fn push_class_open(
				823	&self,
				824	parent_union: ast::ClassSetUnion,
				825	) -> Result<ast::ClassSetUnion> {
				826	assert_eq!(self.char(), '[');
				827
				828	let (nested_set, nested_union) = self.parse_set_class_open()?;
				829	self.parser()
				830	.stack_class
				831	.borrow_mut()
				832	.push(ClassState::Open { union: parent_union, set: nested_set });
				833	Ok(nested_union)
				834	}
				835
				836	/// Parse the end of a character class set and pop the character class
				837	/// parser stack. The union given corresponds to the last union built
				838	/// before seeing the closing `]`. The union returned corresponds to the
				839	/// parent character class set with the nested class added to it.
				840	///
				841	/// This assumes that the parser is positioned at a `]` and will advance
				842	/// the parser to the byte immediately following the `]`.
				843	///
				844	/// If the stack is empty after popping, then this returns the final
				845	/// "top-level" character class AST (where a "top-level" character class
				846	/// is one that is not nested inside any other character class).
				847	///
				848	/// If there is no corresponding opening bracket on the parser's stack,
				849	/// then an error is returned.
				850	#[inline(never)]
				851	fn pop_class(
				852	&self,
				853	nested_union: ast::ClassSetUnion,
				854	) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
				855	assert_eq!(self.char(), ']');
				856
				857	let item = ast::ClassSet::Item(nested_union.into_item());
				858	let prevset = self.pop_class_op(item);
				859	let mut stack = self.parser().stack_class.borrow_mut();
				860	match stack.pop() {
				861	None => {
				862	// We can never observe an empty stack:
				863	//
				864	// 1) We are guaranteed to start with a non-empty stack since
				865	// the character class parser is only initiated when it sees
				866	// a `[`.
				867	// 2) If we ever observe an empty stack while popping after
				868	// seeing a `]`, then we signal the character class parser
				869	// to terminate.
				870	panic!("unexpected empty character class stack")
				871	}
				872	Some(ClassState::Op { .. }) => {
				873	// This panic is unfortunate, but this case is impossible
				874	// since we already popped the Op state if one exists above.
				875	// Namely, every push to the class parser stack is guarded by
				876	// whether an existing Op is already on the top of the stack.
				877	// If it is, the existing Op is modified. That is, the stack
				878	// can never have consecutive Op states.
				879	panic!("unexpected ClassState::Op")
				880	}
				881	Some(ClassState::Open { mut union, mut set }) => {
				882	self.bump();
				883	set.span.end = self.pos();
				884	set.kind = prevset;
				885	if stack.is_empty() {
				886	Ok(Either::Right(ast::Class::Bracketed(set)))
				887	} else {
				888	union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
				889	Ok(Either::Left(union))
				890	}
				891	}
				892	}
				893	}
				894
				895	/// Return an "unclosed class" error whose span points to the most
				896	/// recently opened class.
				897	///
				898	/// This should only be called while parsing a character class.
				899	#[inline(never)]
				900	fn unclosed_class_error(&self) -> ast::Error {
				901	for state in self.parser().stack_class.borrow().iter().rev() {
				902	match *state {
				903	ClassState::Open { ref set, .. } => {
				904	return self
				905	.error(set.span, ast::ErrorKind::ClassUnclosed);
				906	}
				907	_ => {}
				908	}
				909	}
				910	// We are guaranteed to have a non-empty stack with at least
				911	// one open bracket, so we should never get here.
				912	panic!("no open character class found")
				913	}
				914
				915	/// Push the current set of class items on to the class parser's stack as
				916	/// the left hand side of the given operator.
				917	///
				918	/// A fresh set union is returned, which should be used to build the right
				919	/// hand side of this operator.
				920	#[inline(never)]
				921	fn push_class_op(
				922	&self,
				923	next_kind: ast::ClassSetBinaryOpKind,
				924	next_union: ast::ClassSetUnion,
				925	) -> ast::ClassSetUnion {
				926	let item = ast::ClassSet::Item(next_union.into_item());
				927	let new_lhs = self.pop_class_op(item);
				928	self.parser()
				929	.stack_class
				930	.borrow_mut()
				931	.push(ClassState::Op { kind: next_kind, lhs: new_lhs });
				932	ast::ClassSetUnion { span: self.span(), items: vec![] }
				933	}
				934
				935	/// Pop a character class set from the character class parser stack. If the
				936	/// top of the stack is just an item (not an operation), then return the
				937	/// given set unchanged. If the top of the stack is an operation, then the
				938	/// given set will be used as the rhs of the operation on the top of the
				939	/// stack. In that case, the binary operation is returned as a set.
				940	#[inline(never)]
				941	fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
				942	let mut stack = self.parser().stack_class.borrow_mut();
				943	let (kind, lhs) = match stack.pop() {
				944	Some(ClassState::Op { kind, lhs }) => (kind, lhs),
				945	Some(state @ ClassState::Open { .. }) => {
				946	stack.push(state);
				947	return rhs;
				948	}
				949	None => unreachable!(),
				950	};
				951	let span = Span::new(lhs.span().start, rhs.span().end);
				952	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
				953	span: span,
				954	kind: kind,
				955	lhs: Box::new(lhs),
				956	rhs: Box::new(rhs),
				957	})
				958	}
				959	}
				960
				961	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
				962	/// Parse the regular expression into an abstract syntax tree.
				963	fn parse(&self) -> Result<Ast> {
				964	self.parse_with_comments().map(\|astc\| astc.ast)
				965	}
				966
				967	/// Parse the regular expression and return an abstract syntax tree with
				968	/// all of the comments found in the pattern.
				969	fn parse_with_comments(&self) -> Result<ast::WithComments> {
				970	assert_eq!(self.offset(), 0, "parser can only be used once");
				971	self.parser().reset();
				972	let mut concat = ast::Concat { span: self.span(), asts: vec![] };
				973	loop {
				974	self.bump_space();
				975	if self.is_eof() {
				976	break;
				977	}
				978	match self.char() {
				979	'(' => concat = self.push_group(concat)?,
				980	')' => concat = self.pop_group(concat)?,
				981	'\|' => concat = self.push_alternate(concat)?,
				982	'[' => {
				983	let class = self.parse_set_class()?;
				984	concat.asts.push(Ast::Class(class));
				985	}
				986	'?' => {
				987	concat = self.parse_uncounted_repetition(
				988	concat,
				989	ast::RepetitionKind::ZeroOrOne,
				990	)?;
				991	}
				992	'*' => {
				993	concat = self.parse_uncounted_repetition(
				994	concat,
				995	ast::RepetitionKind::ZeroOrMore,
				996	)?;
				997	}
				998	'+' => {
				999	concat = self.parse_uncounted_repetition(
				1000	concat,
				1001	ast::RepetitionKind::OneOrMore,
				1002	)?;
				1003	}
				1004	'{' => {
				1005	concat = self.parse_counted_repetition(concat)?;
				1006	}
				1007	_ => concat.asts.push(self.parse_primitive()?.into_ast()),
				1008	}
				1009	}
				1010	let ast = self.pop_group_end(concat)?;
				1011	NestLimiter::new(self).check(&ast)?;
				1012	Ok(ast::WithComments {
				1013	ast: ast,
				1014	comments: mem::replace(
				1015	&mut *self.parser().comments.borrow_mut(),
				1016	vec![],
				1017	),
				1018	})
				1019	}
				1020
				1021	/// Parses an uncounted repetition operation. An uncounted repetition
				1022	/// operator includes ?, * and +, but does not include the {m,n} syntax.
				1023	/// The given `kind` should correspond to the operator observed by the
				1024	/// caller.
				1025	///
				1026	/// This assumes that the paser is currently positioned at the repetition
				1027	/// operator and advances the parser to the first character after the
				1028	/// operator. (Note that the operator may include a single additional `?`,
				1029	/// which makes the operator ungreedy.)
				1030	///
				1031	/// The caller should include the concatenation that is being built. The
				1032	/// concatenation returned includes the repetition operator applied to the
				1033	/// last expression in the given concatenation.
				1034	#[inline(never)]
				1035	fn parse_uncounted_repetition(
				1036	&self,
				1037	mut concat: ast::Concat,
				1038	kind: ast::RepetitionKind,
				1039	) -> Result<ast::Concat> {
				1040	assert!(
				1041	self.char() == '?' \|\| self.char() == '*' \|\| self.char() == '+'
				1042	);
				1043	let op_start = self.pos();
				1044	let ast = match concat.asts.pop() {
				1045	Some(ast) => ast,
				1046	None => {
				1047	return Err(
				1048	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
				1049	)
				1050	}
				1051	};
				1052	match ast {
				1053	Ast::Empty(_) \| Ast::Flags(_) => {
				1054	return Err(
				1055	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
				1056	)
				1057	}
				1058	_ => {}
				1059	}
				1060	let mut greedy = true;
				1061	if self.bump() && self.char() == '?' {
				1062	greedy = false;
				1063	self.bump();
				1064	}
				1065	concat.asts.push(Ast::Repetition(ast::Repetition {
				1066	span: ast.span().with_end(self.pos()),
				1067	op: ast::RepetitionOp {
				1068	span: Span::new(op_start, self.pos()),
				1069	kind: kind,
				1070	},
				1071	greedy: greedy,
				1072	ast: Box::new(ast),
				1073	}));
				1074	Ok(concat)
				1075	}
				1076
				1077	/// Parses a counted repetition operation. A counted repetition operator
				1078	/// corresponds to the {m,n} syntax, and does not include the ?, * or +
				1079	/// operators.
				1080	///
				1081	/// This assumes that the paser is currently positioned at the opening `{`
				1082	/// and advances the parser to the first character after the operator.
				1083	/// (Note that the operator may include a single additional `?`, which
				1084	/// makes the operator ungreedy.)
				1085	///
				1086	/// The caller should include the concatenation that is being built. The
				1087	/// concatenation returned includes the repetition operator applied to the
				1088	/// last expression in the given concatenation.
				1089	#[inline(never)]
				1090	fn parse_counted_repetition(
				1091	&self,
				1092	mut concat: ast::Concat,
				1093	) -> Result<ast::Concat> {
				1094	assert!(self.char() == '{');
				1095	let start = self.pos();
				1096	let ast = match concat.asts.pop() {
				1097	Some(ast) => ast,
				1098	None => {
				1099	return Err(
				1100	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
				1101	)
				1102	}
				1103	};
				1104	match ast {
				1105	Ast::Empty(_) \| Ast::Flags(_) => {
				1106	return Err(
				1107	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
				1108	)
				1109	}
				1110	_ => {}
				1111	}
				1112	if !self.bump_and_bump_space() {
				1113	return Err(self.error(
				1114	Span::new(start, self.pos()),
				1115	ast::ErrorKind::RepetitionCountUnclosed,
				1116	));
				1117	}
				1118	let count_start = specialize_err(
				1119	self.parse_decimal(),
				1120	ast::ErrorKind::DecimalEmpty,
				1121	ast::ErrorKind::RepetitionCountDecimalEmpty,
				1122	)?;
				1123	let mut range = ast::RepetitionRange::Exactly(count_start);
				1124	if self.is_eof() {
				1125	return Err(self.error(
				1126	Span::new(start, self.pos()),
				1127	ast::ErrorKind::RepetitionCountUnclosed,
				1128	));
				1129	}
				1130	if self.char() == ',' {
				1131	if !self.bump_and_bump_space() {
				1132	return Err(self.error(
				1133	Span::new(start, self.pos()),
				1134	ast::ErrorKind::RepetitionCountUnclosed,
				1135	));
				1136	}
				1137	if self.char() != '}' {
				1138	let count_end = specialize_err(
				1139	self.parse_decimal(),
				1140	ast::ErrorKind::DecimalEmpty,
				1141	ast::ErrorKind::RepetitionCountDecimalEmpty,
				1142	)?;
				1143	range = ast::RepetitionRange::Bounded(count_start, count_end);
				1144	} else {
				1145	range = ast::RepetitionRange::AtLeast(count_start);
				1146	}
				1147	}
				1148	if self.is_eof() \|\| self.char() != '}' {
				1149	return Err(self.error(
				1150	Span::new(start, self.pos()),
				1151	ast::ErrorKind::RepetitionCountUnclosed,
				1152	));
				1153	}
				1154
				1155	let mut greedy = true;
				1156	if self.bump_and_bump_space() && self.char() == '?' {
				1157	greedy = false;
				1158	self.bump();
				1159	}
				1160
				1161	let op_span = Span::new(start, self.pos());
				1162	if !range.is_valid() {
				1163	return Err(
				1164	self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
				1165	);
				1166	}
				1167	concat.asts.push(Ast::Repetition(ast::Repetition {
				1168	span: ast.span().with_end(self.pos()),
				1169	op: ast::RepetitionOp {
				1170	span: op_span,
				1171	kind: ast::RepetitionKind::Range(range),
				1172	},
				1173	greedy: greedy,
				1174	ast: Box::new(ast),
				1175	}));
				1176	Ok(concat)
				1177	}
				1178
				1179	/// Parse a group (which contains a sub-expression) or a set of flags.
				1180	///
				1181	/// If a group was found, then it is returned with an empty AST. If a set
				1182	/// of flags is found, then that set is returned.
				1183	///
				1184	/// The parser should be positioned at the opening parenthesis.
				1185	///
				1186	/// This advances the parser to the character before the start of the
				1187	/// sub-expression (in the case of a group) or to the closing parenthesis
				1188	/// immediately following the set of flags.
				1189	///
				1190	/// # Errors
				1191	///
				1192	/// If flags are given and incorrectly specified, then a corresponding
				1193	/// error is returned.
				1194	///
				1195	/// If a capture name is given and it is incorrectly specified, then a
				1196	/// corresponding error is returned.
				1197	#[inline(never)]
				1198	fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
				1199	assert_eq!(self.char(), '(');
				1200	let open_span = self.span_char();
				1201	self.bump();
				1202	self.bump_space();
				1203	if self.is_lookaround_prefix() {
				1204	return Err(self.error(
				1205	Span::new(open_span.start, self.span().end),
				1206	ast::ErrorKind::UnsupportedLookAround,
				1207	));
				1208	}
				1209	let inner_span = self.span();
				1210	if self.bump_if("?P<") {
				1211	let capture_index = self.next_capture_index(open_span)?;
				1212	let cap = self.parse_capture_name(capture_index)?;
				1213	Ok(Either::Right(ast::Group {
				1214	span: open_span,
				1215	kind: ast::GroupKind::CaptureName(cap),
				1216	ast: Box::new(Ast::Empty(self.span())),
				1217	}))
				1218	} else if self.bump_if("?") {
				1219	if self.is_eof() {
				1220	return Err(
				1221	self.error(open_span, ast::ErrorKind::GroupUnclosed)
				1222	);
				1223	}
				1224	let flags = self.parse_flags()?;
				1225	let char_end = self.char();
				1226	self.bump();
				1227	if char_end == ')' {
				1228	// We don't allow empty flags, e.g., `(?)`. We instead
				1229	// interpret it as a repetition operator missing its argument.
				1230	if flags.items.is_empty() {
				1231	return Err(self.error(
				1232	inner_span,
				1233	ast::ErrorKind::RepetitionMissing,
				1234	));
				1235	}
				1236	Ok(Either::Left(ast::SetFlags {
				1237	span: Span { end: self.pos(), ..open_span },
				1238	flags: flags,
				1239	}))
				1240	} else {
				1241	assert_eq!(char_end, ':');
				1242	Ok(Either::Right(ast::Group {
				1243	span: open_span,
				1244	kind: ast::GroupKind::NonCapturing(flags),
				1245	ast: Box::new(Ast::Empty(self.span())),
				1246	}))
				1247	}
				1248	} else {
				1249	let capture_index = self.next_capture_index(open_span)?;
				1250	Ok(Either::Right(ast::Group {
				1251	span: open_span,
				1252	kind: ast::GroupKind::CaptureIndex(capture_index),
				1253	ast: Box::new(Ast::Empty(self.span())),
				1254	}))
				1255	}
				1256	}
				1257
				1258	/// Parses a capture group name. Assumes that the parser is positioned at
				1259	/// the first character in the name following the opening `<` (and may
				1260	/// possibly be EOF). This advances the parser to the first character
				1261	/// following the closing `>`.
				1262	///
				1263	/// The caller must provide the capture index of the group for this name.
				1264	#[inline(never)]
				1265	fn parse_capture_name(
				1266	&self,
				1267	capture_index: u32,
				1268	) -> Result<ast::CaptureName> {
				1269	if self.is_eof() {
				1270	return Err(self
				1271	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
				1272	}
				1273	let start = self.pos();
				1274	loop {
				1275	if self.char() == '>' {
				1276	break;
				1277	}
				1278	if !is_capture_char(self.char(), self.pos() == start) {
				1279	return Err(self.error(
				1280	self.span_char(),
				1281	ast::ErrorKind::GroupNameInvalid,
				1282	));
				1283	}
				1284	if !self.bump() {
				1285	break;
				1286	}
				1287	}
				1288	let end = self.pos();
				1289	if self.is_eof() {
				1290	return Err(self
				1291	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
				1292	}
				1293	assert_eq!(self.char(), '>');
				1294	self.bump();
				1295	let name = &self.pattern()[start.offset..end.offset];
				1296	if name.is_empty() {
				1297	return Err(self.error(
				1298	Span::new(start, start),
				1299	ast::ErrorKind::GroupNameEmpty,
				1300	));
				1301	}
				1302	let capname = ast::CaptureName {
				1303	span: Span::new(start, end),
				1304	name: name.to_string(),
				1305	index: capture_index,
				1306	};
				1307	self.add_capture_name(&capname)?;
				1308	Ok(capname)
				1309	}
				1310
				1311	/// Parse a sequence of flags starting at the current character.
				1312	///
				1313	/// This advances the parser to the character immediately following the
				1314	/// flags, which is guaranteed to be either `:` or `)`.
				1315	///
				1316	/// # Errors
				1317	///
				1318	/// If any flags are duplicated, then an error is returned.
				1319	///
				1320	/// If the negation operator is used more than once, then an error is
				1321	/// returned.
				1322	///
				1323	/// If no flags could be found or if the negation operation is not followed
				1324	/// by any flags, then an error is returned.
				1325	#[inline(never)]
				1326	fn parse_flags(&self) -> Result<ast::Flags> {
				1327	let mut flags = ast::Flags { span: self.span(), items: vec![] };
				1328	let mut last_was_negation = None;
				1329	while self.char() != ':' && self.char() != ')' {
				1330	if self.char() == '-' {
				1331	last_was_negation = Some(self.span_char());
				1332	let item = ast::FlagsItem {
				1333	span: self.span_char(),
				1334	kind: ast::FlagsItemKind::Negation,
				1335	};
				1336	if let Some(i) = flags.add_item(item) {
				1337	return Err(self.error(
				1338	self.span_char(),
				1339	ast::ErrorKind::FlagRepeatedNegation {
				1340	original: flags.items[i].span,
				1341	},
				1342	));
				1343	}
				1344	} else {
				1345	last_was_negation = None;
				1346	let item = ast::FlagsItem {
				1347	span: self.span_char(),
				1348	kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
				1349	};
				1350	if let Some(i) = flags.add_item(item) {
				1351	return Err(self.error(
				1352	self.span_char(),
				1353	ast::ErrorKind::FlagDuplicate {
				1354	original: flags.items[i].span,
				1355	},
				1356	));
				1357	}
				1358	}
				1359	if !self.bump() {
				1360	return Err(
				1361	self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
				1362	);
				1363	}
				1364	}
				1365	if let Some(span) = last_was_negation {
				1366	return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
				1367	}
				1368	flags.span.end = self.pos();
				1369	Ok(flags)
				1370	}
				1371
				1372	/// Parse the current character as a flag. Do not advance the parser.
				1373	///
				1374	/// # Errors
				1375	///
				1376	/// If the flag is not recognized, then an error is returned.
				1377	#[inline(never)]
				1378	fn parse_flag(&self) -> Result<ast::Flag> {
				1379	match self.char() {
				1380	'i' => Ok(ast::Flag::CaseInsensitive),
				1381	'm' => Ok(ast::Flag::MultiLine),
				1382	's' => Ok(ast::Flag::DotMatchesNewLine),
				1383	'U' => Ok(ast::Flag::SwapGreed),
				1384	'u' => Ok(ast::Flag::Unicode),
				1385	'x' => Ok(ast::Flag::IgnoreWhitespace),
				1386	_ => {
				1387	Err(self
				1388	.error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
				1389	}
				1390	}
				1391	}
				1392
				1393	/// Parse a primitive AST. e.g., A literal, non-set character class or
				1394	/// assertion.
				1395	///
				1396	/// This assumes that the parser expects a primitive at the current
				1397	/// location. i.e., All other non-primitive cases have been handled.
				1398	/// For example, if the parser's position is at `\|`, then `\|` will be
				1399	/// treated as a literal (e.g., inside a character class).
				1400	///
				1401	/// This advances the parser to the first character immediately following
				1402	/// the primitive.
				1403	fn parse_primitive(&self) -> Result<Primitive> {
				1404	match self.char() {
				1405	'\\' => self.parse_escape(),
				1406	'.' => {
				1407	let ast = Primitive::Dot(self.span_char());
				1408	self.bump();
				1409	Ok(ast)
				1410	}
				1411	'^' => {
				1412	let ast = Primitive::Assertion(ast::Assertion {
				1413	span: self.span_char(),
				1414	kind: ast::AssertionKind::StartLine,
				1415	});
				1416	self.bump();
				1417	Ok(ast)
				1418	}
				1419	'$' => {
				1420	let ast = Primitive::Assertion(ast::Assertion {
				1421	span: self.span_char(),
				1422	kind: ast::AssertionKind::EndLine,
				1423	});
				1424	self.bump();
				1425	Ok(ast)
				1426	}
				1427	c => {
				1428	let ast = Primitive::Literal(ast::Literal {
				1429	span: self.span_char(),
				1430	kind: ast::LiteralKind::Verbatim,
				1431	c: c,
				1432	});
				1433	self.bump();
				1434	Ok(ast)
				1435	}
				1436	}
				1437	}
				1438
				1439	/// Parse an escape sequence as a primitive AST.
				1440	///
				1441	/// This assumes the parser is positioned at the start of the escape
				1442	/// sequence, i.e., `\`. It advances the parser to the first position
				1443	/// immediately following the escape sequence.
				1444	#[inline(never)]
				1445	fn parse_escape(&self) -> Result<Primitive> {
				1446	assert_eq!(self.char(), '\\');
				1447	let start = self.pos();
				1448	if !self.bump() {
				1449	return Err(self.error(
				1450	Span::new(start, self.pos()),
				1451	ast::ErrorKind::EscapeUnexpectedEof,
				1452	));
				1453	}
				1454	let c = self.char();
				1455	// Put some of the more complicated routines into helpers.
				1456	match c {
				1457	'0'..='7' => {
				1458	if !self.parser().octal {
				1459	return Err(self.error(
				1460	Span::new(start, self.span_char().end),
				1461	ast::ErrorKind::UnsupportedBackreference,
				1462	));
				1463	}
				1464	let mut lit = self.parse_octal();
				1465	lit.span.start = start;
				1466	return Ok(Primitive::Literal(lit));
				1467	}
				1468	'8'..='9' if !self.parser().octal => {
				1469	return Err(self.error(
				1470	Span::new(start, self.span_char().end),
				1471	ast::ErrorKind::UnsupportedBackreference,
				1472	));
				1473	}
				1474	'x' \| 'u' \| 'U' => {
				1475	let mut lit = self.parse_hex()?;
				1476	lit.span.start = start;
				1477	return Ok(Primitive::Literal(lit));
				1478	}
				1479	'p' \| 'P' => {
				1480	let mut cls = self.parse_unicode_class()?;
				1481	cls.span.start = start;
				1482	return Ok(Primitive::Unicode(cls));
				1483	}
				1484	'd' \| 's' \| 'w' \| 'D' \| 'S' \| 'W' => {
				1485	let mut cls = self.parse_perl_class();
				1486	cls.span.start = start;
				1487	return Ok(Primitive::Perl(cls));
				1488	}
				1489	_ => {}
				1490	}
				1491
				1492	// Handle all of the one letter sequences inline.
				1493	self.bump();
				1494	let span = Span::new(start, self.pos());
				1495	if is_meta_character(c) {
				1496	return Ok(Primitive::Literal(ast::Literal {
				1497	span: span,
				1498	kind: ast::LiteralKind::Punctuation,
				1499	c: c,
				1500	}));
				1501	}
				1502	let special = \|kind, c\| {
				1503	Ok(Primitive::Literal(ast::Literal {
				1504	span: span,
				1505	kind: ast::LiteralKind::Special(kind),
				1506	c: c,
				1507	}))
				1508	};
				1509	match c {
				1510	'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
				1511	'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
				1512	't' => special(ast::SpecialLiteralKind::Tab, '\t'),
				1513	'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
				1514	'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
				1515	'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
				1516	' ' if self.ignore_whitespace() => {
				1517	special(ast::SpecialLiteralKind::Space, ' ')
				1518	}
				1519	'A' => Ok(Primitive::Assertion(ast::Assertion {
				1520	span: span,
				1521	kind: ast::AssertionKind::StartText,
				1522	})),
				1523	'z' => Ok(Primitive::Assertion(ast::Assertion {
				1524	span: span,
				1525	kind: ast::AssertionKind::EndText,
				1526	})),
				1527	'b' => Ok(Primitive::Assertion(ast::Assertion {
				1528	span: span,
				1529	kind: ast::AssertionKind::WordBoundary,
				1530	})),
				1531	'B' => Ok(Primitive::Assertion(ast::Assertion {
				1532	span: span,
				1533	kind: ast::AssertionKind::NotWordBoundary,
				1534	})),
				1535	_ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
				1536	}
				1537	}
				1538
				1539	/// Parse an octal representation of a Unicode codepoint up to 3 digits
				1540	/// long. This expects the parser to be positioned at the first octal
				1541	/// digit and advances the parser to the first character immediately
				1542	/// following the octal number. This also assumes that parsing octal
				1543	/// escapes is enabled.
				1544	///
				1545	/// Assuming the preconditions are met, this routine can never fail.
				1546	#[inline(never)]
				1547	fn parse_octal(&self) -> ast::Literal {
				1548	use std::char;
				1549	use std::u32;
				1550
				1551	assert!(self.parser().octal);
				1552	assert!('0' <= self.char() && self.char() <= '7');
				1553	let start = self.pos();
				1554	// Parse up to two more digits.
				1555	while self.bump()
				1556	&& '0' <= self.char()
				1557	&& self.char() <= '7'
				1558	&& self.pos().offset - start.offset <= 2
				1559	{}
				1560	let end = self.pos();
				1561	let octal = &self.pattern()[start.offset..end.offset];
				1562	// Parsing the octal should never fail since the above guarantees a
				1563	// valid number.
				1564	let codepoint =
				1565	u32::from_str_radix(octal, 8).expect("valid octal number");
				1566	// The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
				1567	// invalid Unicode scalar values.
				1568	let c = char::from_u32(codepoint).expect("Unicode scalar value");
				1569	ast::Literal {
				1570	span: Span::new(start, end),
				1571	kind: ast::LiteralKind::Octal,
				1572	c: c,
				1573	}
				1574	}
				1575
				1576	/// Parse a hex representation of a Unicode codepoint. This handles both
				1577	/// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
				1578	/// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
				1579	/// the first character immediately following the hexadecimal literal.
				1580	#[inline(never)]
				1581	fn parse_hex(&self) -> Result<ast::Literal> {
				1582	assert!(
				1583	self.char() == 'x' \|\| self.char() == 'u' \|\| self.char() == 'U'
				1584	);
				1585
				1586	let hex_kind = match self.char() {
				1587	'x' => ast::HexLiteralKind::X,
				1588	'u' => ast::HexLiteralKind::UnicodeShort,
				1589	_ => ast::HexLiteralKind::UnicodeLong,
				1590	};
				1591	if !self.bump_and_bump_space() {
				1592	return Err(
				1593	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
				1594	);
				1595	}
				1596	if self.char() == '{' {
				1597	self.parse_hex_brace(hex_kind)
				1598	} else {
				1599	self.parse_hex_digits(hex_kind)
				1600	}
				1601	}
				1602
				1603	/// Parse an N-digit hex representation of a Unicode codepoint. This
				1604	/// expects the parser to be positioned at the first digit and will advance
				1605	/// the parser to the first character immediately following the escape
				1606	/// sequence.
				1607	///
				1608	/// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
				1609	/// or 8 (for `\UNNNNNNNN`).
				1610	#[inline(never)]
				1611	fn parse_hex_digits(
				1612	&self,
				1613	kind: ast::HexLiteralKind,
				1614	) -> Result<ast::Literal> {
				1615	use std::char;
				1616	use std::u32;
				1617
				1618	let mut scratch = self.parser().scratch.borrow_mut();
				1619	scratch.clear();
				1620
				1621	let start = self.pos();
				1622	for i in 0..kind.digits() {
				1623	if i > 0 && !self.bump_and_bump_space() {
				1624	return Err(self
				1625	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
				1626	}
				1627	if !is_hex(self.char()) {
				1628	return Err(self.error(
				1629	self.span_char(),
				1630	ast::ErrorKind::EscapeHexInvalidDigit,
				1631	));
				1632	}
				1633	scratch.push(self.char());
				1634	}
				1635	// The final bump just moves the parser past the literal, which may
				1636	// be EOF.
				1637	self.bump_and_bump_space();
				1638	let end = self.pos();
				1639	let hex = scratch.as_str();
				1640	match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
				1641	None => Err(self.error(
				1642	Span::new(start, end),
				1643	ast::ErrorKind::EscapeHexInvalid,
				1644	)),
				1645	Some(c) => Ok(ast::Literal {
				1646	span: Span::new(start, end),
				1647	kind: ast::LiteralKind::HexFixed(kind),
				1648	c: c,
				1649	}),
				1650	}
				1651	}
				1652
				1653	/// Parse a hex representation of any Unicode scalar value. This expects
				1654	/// the parser to be positioned at the opening brace `{` and will advance
				1655	/// the parser to the first character following the closing brace `}`.
				1656	#[inline(never)]
				1657	fn parse_hex_brace(
				1658	&self,
				1659	kind: ast::HexLiteralKind,
				1660	) -> Result<ast::Literal> {
				1661	use std::char;
				1662	use std::u32;
				1663
				1664	let mut scratch = self.parser().scratch.borrow_mut();
				1665	scratch.clear();
				1666
				1667	let brace_pos = self.pos();
				1668	let start = self.span_char().end;
				1669	while self.bump_and_bump_space() && self.char() != '}' {
				1670	if !is_hex(self.char()) {
				1671	return Err(self.error(
				1672	self.span_char(),
				1673	ast::ErrorKind::EscapeHexInvalidDigit,
				1674	));
				1675	}
				1676	scratch.push(self.char());
				1677	}
				1678	if self.is_eof() {
				1679	return Err(self.error(
				1680	Span::new(brace_pos, self.pos()),
				1681	ast::ErrorKind::EscapeUnexpectedEof,
				1682	));
				1683	}
				1684	let end = self.pos();
				1685	let hex = scratch.as_str();
				1686	assert_eq!(self.char(), '}');
				1687	self.bump_and_bump_space();
				1688
				1689	if hex.is_empty() {
				1690	return Err(self.error(
				1691	Span::new(brace_pos, self.pos()),
				1692	ast::ErrorKind::EscapeHexEmpty,
				1693	));
				1694	}
				1695	match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
				1696	None => Err(self.error(
				1697	Span::new(start, end),
				1698	ast::ErrorKind::EscapeHexInvalid,
				1699	)),
				1700	Some(c) => Ok(ast::Literal {
				1701	span: Span::new(start, self.pos()),
				1702	kind: ast::LiteralKind::HexBrace(kind),
				1703	c: c,
				1704	}),
				1705	}
				1706	}
				1707
				1708	/// Parse a decimal number into a u32 while trimming leading and trailing
				1709	/// whitespace.
				1710	///
				1711	/// This expects the parser to be positioned at the first position where
				1712	/// a decimal digit could occur. This will advance the parser to the byte
				1713	/// immediately following the last contiguous decimal digit.
				1714	///
				1715	/// If no decimal digit could be found or if there was a problem parsing
				1716	/// the complete set of digits into a u32, then an error is returned.
				1717	fn parse_decimal(&self) -> Result<u32> {
				1718	let mut scratch = self.parser().scratch.borrow_mut();
				1719	scratch.clear();
				1720
				1721	while !self.is_eof() && self.char().is_whitespace() {
				1722	self.bump();
				1723	}
				1724	let start = self.pos();
				1725	while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
				1726	scratch.push(self.char());
				1727	self.bump_and_bump_space();
				1728	}
				1729	let span = Span::new(start, self.pos());
				1730	while !self.is_eof() && self.char().is_whitespace() {
				1731	self.bump_and_bump_space();
				1732	}
				1733	let digits = scratch.as_str();
				1734	if digits.is_empty() {
				1735	return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
				1736	}
				1737	match u32::from_str_radix(digits, 10).ok() {
				1738	Some(n) => Ok(n),
				1739	None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
				1740	}
				1741	}
				1742
				1743	/// Parse a standard character class consisting primarily of characters or
				1744	/// character ranges, but can also contain nested character classes of
				1745	/// any type (sans `.`).
				1746	///
				1747	/// This assumes the parser is positioned at the opening `[`. If parsing
				1748	/// is successful, then the parser is advanced to the position immediately
				1749	/// following the closing `]`.
				1750	#[inline(never)]
				1751	fn parse_set_class(&self) -> Result<ast::Class> {
				1752	assert_eq!(self.char(), '[');
				1753
				1754	let mut union =
				1755	ast::ClassSetUnion { span: self.span(), items: vec![] };
				1756	loop {
				1757	self.bump_space();
				1758	if self.is_eof() {
				1759	return Err(self.unclosed_class_error());
				1760	}
				1761	match self.char() {
				1762	'[' => {
				1763	// If we've already parsed the opening bracket, then
				1764	// attempt to treat this as the beginning of an ASCII
				1765	// class. If ASCII class parsing fails, then the parser
				1766	// backs up to `[`.
				1767	if !self.parser().stack_class.borrow().is_empty() {
				1768	if let Some(cls) = self.maybe_parse_ascii_class() {
				1769	union.push(ast::ClassSetItem::Ascii(cls));
				1770	continue;
				1771	}
				1772	}
				1773	union = self.push_class_open(union)?;
				1774	}
				1775	']' => match self.pop_class(union)? {
				1776	Either::Left(nested_union) => {
				1777	union = nested_union;
				1778	}
				1779	Either::Right(class) => return Ok(class),
				1780	},
				1781	'&' if self.peek() == Some('&') => {
				1782	assert!(self.bump_if("&&"));
				1783	union = self.push_class_op(
				1784	ast::ClassSetBinaryOpKind::Intersection,
				1785	union,
				1786	);
				1787	}
				1788	'-' if self.peek() == Some('-') => {
				1789	assert!(self.bump_if("--"));
				1790	union = self.push_class_op(
				1791	ast::ClassSetBinaryOpKind::Difference,
				1792	union,
				1793	);
				1794	}
				1795	'~' if self.peek() == Some('~') => {
				1796	assert!(self.bump_if("~~"));
				1797	union = self.push_class_op(
				1798	ast::ClassSetBinaryOpKind::SymmetricDifference,
				1799	union,
				1800	);
				1801	}
				1802	_ => {
				1803	union.push(self.parse_set_class_range()?);
				1804	}
				1805	}
				1806	}
				1807	}
				1808
				1809	/// Parse a single primitive item in a character class set. The item to
				1810	/// be parsed can either be one of a simple literal character, a range
				1811	/// between two simple literal characters or a "primitive" character
				1812	/// class like \w or \p{Greek}.
				1813	///
				1814	/// If an invalid escape is found, or if a character class is found where
				1815	/// a simple literal is expected (e.g., in a range), then an error is
				1816	/// returned.
				1817	#[inline(never)]
				1818	fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
				1819	let prim1 = self.parse_set_class_item()?;
				1820	self.bump_space();
				1821	if self.is_eof() {
				1822	return Err(self.unclosed_class_error());
				1823	}
				1824	// If the next char isn't a `-`, then we don't have a range.
				1825	// There are two exceptions. If the char after a `-` is a `]`, then
				1826	// `-` is interpreted as a literal `-`. Alternatively, if the char
				1827	// after a `-` is a `-`, then `--` corresponds to a "difference"
				1828	// operation.
				1829	if self.char() != '-'
				1830	\|\| self.peek_space() == Some(']')
				1831	\|\| self.peek_space() == Some('-')
				1832	{
				1833	return prim1.into_class_set_item(self);
				1834	}
				1835	// OK, now we're parsing a range, so bump past the `-` and parse the
				1836	// second half of the range.
				1837	if !self.bump_and_bump_space() {
				1838	return Err(self.unclosed_class_error());
				1839	}
				1840	let prim2 = self.parse_set_class_item()?;
				1841	let range = ast::ClassSetRange {
				1842	span: Span::new(prim1.span().start, prim2.span().end),
				1843	start: prim1.into_class_literal(self)?,
				1844	end: prim2.into_class_literal(self)?,
				1845	};
				1846	if !range.is_valid() {
				1847	return Err(
				1848	self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
				1849	);
				1850	}
				1851	Ok(ast::ClassSetItem::Range(range))
				1852	}
				1853
				1854	/// Parse a single item in a character class as a primitive, where the
				1855	/// primitive either consists of a verbatim literal or a single escape
				1856	/// sequence.
				1857	///
				1858	/// This assumes the parser is positioned at the beginning of a primitive,
				1859	/// and advances the parser to the first position after the primitive if
				1860	/// successful.
				1861	///
				1862	/// Note that it is the caller's responsibility to report an error if an
				1863	/// illegal primitive was parsed.
				1864	#[inline(never)]
				1865	fn parse_set_class_item(&self) -> Result<Primitive> {
				1866	if self.char() == '\\' {
				1867	self.parse_escape()
				1868	} else {
				1869	let x = Primitive::Literal(ast::Literal {
				1870	span: self.span_char(),
				1871	kind: ast::LiteralKind::Verbatim,
				1872	c: self.char(),
				1873	});
				1874	self.bump();
				1875	Ok(x)
				1876	}
				1877	}
				1878
				1879	/// Parses the opening of a character class set. This includes the opening
				1880	/// bracket along with `^` if present to indicate negation. This also
				1881	/// starts parsing the opening set of unioned items if applicable, since
				1882	/// there are special rules applied to certain characters in the opening
				1883	/// of a character class. For example, `[^]]` is the class of all
				1884	/// characters not equal to `]`. (`]` would need to be escaped in any other
				1885	/// position.) Similarly for `-`.
				1886	///
				1887	/// In all cases, the op inside the returned `ast::ClassBracketed` is an
				1888	/// empty union. This empty union should be replaced with the actual item
				1889	/// when it is popped from the parser's stack.
				1890	///
				1891	/// This assumes the parser is positioned at the opening `[` and advances
				1892	/// the parser to the first non-special byte of the character class.
				1893	///
				1894	/// An error is returned if EOF is found.
				1895	#[inline(never)]
				1896	fn parse_set_class_open(
				1897	&self,
				1898	) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
				1899	assert_eq!(self.char(), '[');
				1900	let start = self.pos();
				1901	if !self.bump_and_bump_space() {
				1902	return Err(self.error(
				1903	Span::new(start, self.pos()),
				1904	ast::ErrorKind::ClassUnclosed,
				1905	));
				1906	}
				1907
				1908	let negated = if self.char() != '^' {
				1909	false
				1910	} else {
				1911	if !self.bump_and_bump_space() {
				1912	return Err(self.error(
				1913	Span::new(start, self.pos()),
				1914	ast::ErrorKind::ClassUnclosed,
				1915	));
				1916	}
				1917	true
				1918	};
				1919	// Accept any number of `-` as literal `-`.
				1920	let mut union =
				1921	ast::ClassSetUnion { span: self.span(), items: vec![] };
				1922	while self.char() == '-' {
				1923	union.push(ast::ClassSetItem::Literal(ast::Literal {
				1924	span: self.span_char(),
				1925	kind: ast::LiteralKind::Verbatim,
				1926	c: '-',
				1927	}));
				1928	if !self.bump_and_bump_space() {
				1929	return Err(self.error(
				1930	Span::new(start, self.pos()),
				1931	ast::ErrorKind::ClassUnclosed,
				1932	));
				1933	}
				1934	}
				1935	// If `]` is the first char in a set, then interpret it as a literal
				1936	// `]`. That is, an empty class is impossible to write.
				1937	if union.items.is_empty() && self.char() == ']' {
				1938	union.push(ast::ClassSetItem::Literal(ast::Literal {
				1939	span: self.span_char(),
				1940	kind: ast::LiteralKind::Verbatim,
				1941	c: ']',
				1942	}));
				1943	if !self.bump_and_bump_space() {
				1944	return Err(self.error(
				1945	Span::new(start, self.pos()),
				1946	ast::ErrorKind::ClassUnclosed,
				1947	));
				1948	}
				1949	}
				1950	let set = ast::ClassBracketed {
				1951	span: Span::new(start, self.pos()),
				1952	negated: negated,
				1953	kind: ast::ClassSet::union(ast::ClassSetUnion {
				1954	span: Span::new(union.span.start, union.span.start),
				1955	items: vec![],
				1956	}),
				1957	};
				1958	Ok((set, union))
				1959	}
				1960
				1961	/// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
				1962	///
				1963	/// This assumes the parser is positioned at the opening `[`.
				1964	///
				1965	/// If no valid ASCII character class could be found, then this does not
				1966	/// advance the parser and `None` is returned. Otherwise, the parser is
				1967	/// advanced to the first byte following the closing `]` and the
				1968	/// corresponding ASCII class is returned.
				1969	#[inline(never)]
				1970	fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
				1971	// ASCII character classes are interesting from a parsing perspective
				1972	// because parsing cannot fail with any interesting error. For example,
				1973	// in order to use an ASCII character class, it must be enclosed in
				1974	// double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
				1975	// of it as "ASCII character characters have the syntax `[:NAME:]`
				1976	// which can only appear within character brackets." This means that
				1977	// things like `[[:lower:]A]` are legal constructs.
				1978	//
				1979	// However, if one types an incorrect ASCII character class, e.g.,
				1980	// `[[:loower:]]`, then we treat that as a normal nested character
				1981	// class containing the characters `:elorw`. One might argue that we
				1982	// should return an error instead since the repeated colons give away
				1983	// the intent to write an ASCII class. But what if the user typed
				1984	// `[[:lower]]` instead? How can we tell that was intended to be an
				1985	// ASCII class and not just a normal nested class?
				1986	//
				1987	// Reasonable people can probably disagree over this, but for better
				1988	// or worse, we implement semantics that never fails at the expense
				1989	// of better failure modes.
				1990	assert_eq!(self.char(), '[');
				1991	// If parsing fails, then we back up the parser to this starting point.
				1992	let start = self.pos();
				1993	let mut negated = false;
				1994	if !self.bump() \|\| self.char() != ':' {
				1995	self.parser().pos.set(start);
				1996	return None;
				1997	}
				1998	if !self.bump() {
				1999	self.parser().pos.set(start);
				2000	return None;
				2001	}
				2002	if self.char() == '^' {
				2003	negated = true;
				2004	if !self.bump() {
				2005	self.parser().pos.set(start);
				2006	return None;
				2007	}
				2008	}
				2009	let name_start = self.offset();
				2010	while self.char() != ':' && self.bump() {}
				2011	if self.is_eof() {
				2012	self.parser().pos.set(start);
				2013	return None;
				2014	}
				2015	let name = &self.pattern()[name_start..self.offset()];
				2016	if !self.bump_if(":]") {
				2017	self.parser().pos.set(start);
				2018	return None;
				2019	}
				2020	let kind = match ast::ClassAsciiKind::from_name(name) {
				2021	Some(kind) => kind,
				2022	None => {
				2023	self.parser().pos.set(start);
				2024	return None;
				2025	}
				2026	};
				2027	Some(ast::ClassAscii {
				2028	span: Span::new(start, self.pos()),
				2029	kind: kind,
				2030	negated: negated,
				2031	})
				2032	}
				2033
				2034	/// Parse a Unicode class in either the single character notation, `\pN`
				2035	/// or the multi-character bracketed notation, `\p{Greek}`. This assumes
				2036	/// the parser is positioned at the `p` (or `P` for negation) and will
				2037	/// advance the parser to the character immediately following the class.
				2038	///
				2039	/// Note that this does not check whether the class name is valid or not.
				2040	#[inline(never)]
				2041	fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
				2042	assert!(self.char() == 'p' \|\| self.char() == 'P');
				2043
				2044	let mut scratch = self.parser().scratch.borrow_mut();
				2045	scratch.clear();
				2046
				2047	let negated = self.char() == 'P';
				2048	if !self.bump_and_bump_space() {
				2049	return Err(
				2050	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
				2051	);
				2052	}
				2053	let (start, kind) = if self.char() == '{' {
				2054	let start = self.span_char().end;
				2055	while self.bump_and_bump_space() && self.char() != '}' {
				2056	scratch.push(self.char());
				2057	}
				2058	if self.is_eof() {
				2059	return Err(self
				2060	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
				2061	}
				2062	assert_eq!(self.char(), '}');
				2063	self.bump();
				2064
				2065	let name = scratch.as_str();
				2066	if let Some(i) = name.find("!=") {
				2067	(
				2068	start,
				2069	ast::ClassUnicodeKind::NamedValue {
				2070	op: ast::ClassUnicodeOpKind::NotEqual,
				2071	name: name[..i].to_string(),
				2072	value: name[i + 2..].to_string(),
				2073	},
				2074	)
				2075	} else if let Some(i) = name.find(':') {
				2076	(
				2077	start,
				2078	ast::ClassUnicodeKind::NamedValue {
				2079	op: ast::ClassUnicodeOpKind::Colon,
				2080	name: name[..i].to_string(),
				2081	value: name[i + 1..].to_string(),
				2082	},
				2083	)
				2084	} else if let Some(i) = name.find('=') {
				2085	(
				2086	start,
				2087	ast::ClassUnicodeKind::NamedValue {
				2088	op: ast::ClassUnicodeOpKind::Equal,
				2089	name: name[..i].to_string(),
				2090	value: name[i + 1..].to_string(),
				2091	},
				2092	)
				2093	} else {
				2094	(start, ast::ClassUnicodeKind::Named(name.to_string()))
				2095	}
				2096	} else {
				2097	let start = self.pos();
				2098	let c = self.char();
				2099	if c == '\\' {
				2100	return Err(self.error(
				2101	self.span_char(),
				2102	ast::ErrorKind::UnicodeClassInvalid,
				2103	));
				2104	}
				2105	self.bump_and_bump_space();
				2106	let kind = ast::ClassUnicodeKind::OneLetter(c);
				2107	(start, kind)
				2108	};
				2109	Ok(ast::ClassUnicode {
				2110	span: Span::new(start, self.pos()),
				2111	negated: negated,
				2112	kind: kind,
				2113	})
				2114	}
				2115
				2116	/// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
				2117	/// parser is currently at a valid character class name and will be
				2118	/// advanced to the character immediately following the class.
				2119	#[inline(never)]
				2120	fn parse_perl_class(&self) -> ast::ClassPerl {
				2121	let c = self.char();
				2122	let span = self.span_char();
				2123	self.bump();
				2124	let (negated, kind) = match c {
				2125	'd' => (false, ast::ClassPerlKind::Digit),
				2126	'D' => (true, ast::ClassPerlKind::Digit),
				2127	's' => (false, ast::ClassPerlKind::Space),
				2128	'S' => (true, ast::ClassPerlKind::Space),
				2129	'w' => (false, ast::ClassPerlKind::Word),
				2130	'W' => (true, ast::ClassPerlKind::Word),
				2131	c => panic!("expected valid Perl class but got '{}'", c),
				2132	};
				2133	ast::ClassPerl { span: span, kind: kind, negated: negated }
				2134	}
				2135	}
				2136
				2137	/// A type that traverses a fully parsed Ast and checks whether its depth
				2138	/// exceeds the specified nesting limit. If it does, then an error is returned.
				2139	#[derive(Debug)]
				2140	struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
				2141	/// The parser that is checking the nest limit.
				2142	p: &'p ParserI<'s, P>,
				2143	/// The current depth while walking an Ast.
				2144	depth: u32,
				2145	}
				2146
				2147	impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
				2148	fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
				2149	NestLimiter { p: p, depth: 0 }
				2150	}
				2151
				2152	#[inline(never)]
				2153	fn check(self, ast: &Ast) -> Result<()> {
				2154	ast::visit(ast, self)
				2155	}
				2156
				2157	fn increment_depth(&mut self, span: &Span) -> Result<()> {
				2158	let new = self.depth.checked_add(1).ok_or_else(\|\| {
				2159	self.p.error(
				2160	span.clone(),
				2161	ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
				2162	)
				2163	})?;
				2164	let limit = self.p.parser().nest_limit;
				2165	if new > limit {
				2166	return Err(self.p.error(
				2167	span.clone(),
				2168	ast::ErrorKind::NestLimitExceeded(limit),
				2169	));
				2170	}
				2171	self.depth = new;
				2172	Ok(())
				2173	}
				2174
				2175	fn decrement_depth(&mut self) {
				2176	// Assuming the correctness of the visitor, this should never drop
				2177	// below 0.
				2178	self.depth = self.depth.checked_sub(1).unwrap();
				2179	}
				2180	}
				2181
				2182	impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
				2183	type Output = ();
				2184	type Err = ast::Error;
				2185
				2186	fn finish(self) -> Result<()> {
				2187	Ok(())
				2188	}
				2189
				2190	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
				2191	let span = match *ast {
				2192	Ast::Empty(_)
				2193	\| Ast::Flags(_)
				2194	\| Ast::Literal(_)
				2195	\| Ast::Dot(_)
				2196	\| Ast::Assertion(_)
				2197	\| Ast::Class(ast::Class::Unicode(_))
				2198	\| Ast::Class(ast::Class::Perl(_)) => {
				2199	// These are all base cases, so we don't increment depth.
				2200	return Ok(());
				2201	}
				2202	Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
				2203	Ast::Repetition(ref x) => &x.span,
				2204	Ast::Group(ref x) => &x.span,
				2205	Ast::Alternation(ref x) => &x.span,
				2206	Ast::Concat(ref x) => &x.span,
				2207	};
				2208	self.increment_depth(span)
				2209	}
				2210
				2211	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
				2212	match *ast {
				2213	Ast::Empty(_)
				2214	\| Ast::Flags(_)
				2215	\| Ast::Literal(_)
				2216	\| Ast::Dot(_)
				2217	\| Ast::Assertion(_)
				2218	\| Ast::Class(ast::Class::Unicode(_))
				2219	\| Ast::Class(ast::Class::Perl(_)) => {
				2220	// These are all base cases, so we don't decrement depth.
				2221	Ok(())
				2222	}
				2223	Ast::Class(ast::Class::Bracketed(_))
				2224	\| Ast::Repetition(_)
				2225	\| Ast::Group(_)
				2226	\| Ast::Alternation(_)
				2227	\| Ast::Concat(_) => {
				2228	self.decrement_depth();
				2229	Ok(())
				2230	}
				2231	}
				2232	}
				2233
				2234	fn visit_class_set_item_pre(
				2235	&mut self,
				2236	ast: &ast::ClassSetItem,
				2237	) -> Result<()> {
				2238	let span = match *ast {
				2239	ast::ClassSetItem::Empty(_)
				2240	\| ast::ClassSetItem::Literal(_)
				2241	\| ast::ClassSetItem::Range(_)
				2242	\| ast::ClassSetItem::Ascii(_)
				2243	\| ast::ClassSetItem::Unicode(_)
				2244	\| ast::ClassSetItem::Perl(_) => {
				2245	// These are all base cases, so we don't increment depth.
				2246	return Ok(());
				2247	}
				2248	ast::ClassSetItem::Bracketed(ref x) => &x.span,
				2249	ast::ClassSetItem::Union(ref x) => &x.span,
				2250	};
				2251	self.increment_depth(span)
				2252	}
				2253
				2254	fn visit_class_set_item_post(
				2255	&mut self,
				2256	ast: &ast::ClassSetItem,
				2257	) -> Result<()> {
				2258	match *ast {
				2259	ast::ClassSetItem::Empty(_)
				2260	\| ast::ClassSetItem::Literal(_)
				2261	\| ast::ClassSetItem::Range(_)
				2262	\| ast::ClassSetItem::Ascii(_)
				2263	\| ast::ClassSetItem::Unicode(_)
				2264	\| ast::ClassSetItem::Perl(_) => {
				2265	// These are all base cases, so we don't decrement depth.
				2266	Ok(())
				2267	}
				2268	ast::ClassSetItem::Bracketed(_) \| ast::ClassSetItem::Union(_) => {
				2269	self.decrement_depth();
				2270	Ok(())
				2271	}
				2272	}
				2273	}
				2274
				2275	fn visit_class_set_binary_op_pre(
				2276	&mut self,
				2277	ast: &ast::ClassSetBinaryOp,
				2278	) -> Result<()> {
				2279	self.increment_depth(&ast.span)
				2280	}
				2281
				2282	fn visit_class_set_binary_op_post(
				2283	&mut self,
				2284	_ast: &ast::ClassSetBinaryOp,
				2285	) -> Result<()> {
				2286	self.decrement_depth();
				2287	Ok(())
				2288	}
				2289	}
				2290
				2291	/// When the result is an error, transforms the ast::ErrorKind from the source
				2292	/// Result into another one. This function is used to return clearer error
				2293	/// messages when possible.
				2294	fn specialize_err<T>(
				2295	result: Result<T>,
				2296	from: ast::ErrorKind,
				2297	to: ast::ErrorKind,
				2298	) -> Result<T> {
				2299	if let Err(e) = result {
				2300	if e.kind == from {
				2301	Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
				2302	} else {
				2303	Err(e)
				2304	}
				2305	} else {
				2306	result
				2307	}
				2308	}
				2309
				2310	#[cfg(test)]
				2311	mod tests {
				2312	use std::ops::Range;
				2313
				2314	use super::{Parser, ParserBuilder, ParserI, Primitive};
				2315	use ast::{self, Ast, Position, Span};
				2316
				2317	// Our own assert_eq, which has slightly better formatting (but honestly
				2318	// still kind of crappy).
				2319	macro_rules! assert_eq {
				2320	($left:expr, $right:expr) => {{
				2321	match (&$left, &$right) {
				2322	(left_val, right_val) => {
				2323	if !(left_val == right_val) {
				2324	panic!(
				2325	"assertion failed: `(left == right)`\n\n\
				2326	left: `{:?}`\nright: `{:?}`\n\n",
				2327	left_val, right_val
				2328	)
				2329	}
				2330	}
				2331	}
				2332	}};
				2333	}
				2334
				2335	// We create these errors to compare with real ast::Errors in the tests.
				2336	// We define equality between TestError and ast::Error to disregard the
				2337	// pattern string in ast::Error, which is annoying to provide in tests.
				2338	#[derive(Clone, Debug)]
				2339	struct TestError {
				2340	span: Span,
				2341	kind: ast::ErrorKind,
				2342	}
				2343
				2344	impl PartialEq<ast::Error> for TestError {
				2345	fn eq(&self, other: &ast::Error) -> bool {
				2346	self.span == other.span && self.kind == other.kind
				2347	}
				2348	}
				2349
				2350	impl PartialEq<TestError> for ast::Error {
				2351	fn eq(&self, other: &TestError) -> bool {
				2352	self.span == other.span && self.kind == other.kind
				2353	}
				2354	}
				2355
				2356	fn s(str: &str) -> String {
				2357	str.to_string()
				2358	}
				2359
				2360	fn parser(pattern: &str) -> ParserI<Parser> {
				2361	ParserI::new(Parser::new(), pattern)
				2362	}
				2363
				2364	fn parser_octal(pattern: &str) -> ParserI<Parser> {
				2365	let parser = ParserBuilder::new().octal(true).build();
				2366	ParserI::new(parser, pattern)
				2367	}
				2368
				2369	fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
				2370	let p = ParserBuilder::new().nest_limit(nest_limit).build();
				2371	ParserI::new(p, pattern)
				2372	}
				2373
				2374	fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
				2375	let p = ParserBuilder::new().ignore_whitespace(true).build();
				2376	ParserI::new(p, pattern)
				2377	}
				2378
				2379	/// Short alias for creating a new span.
				2380	fn nspan(start: Position, end: Position) -> Span {
				2381	Span::new(start, end)
				2382	}
				2383
				2384	/// Short alias for creating a new position.
				2385	fn npos(offset: usize, line: usize, column: usize) -> Position {
				2386	Position::new(offset, line, column)
				2387	}
				2388
				2389	/// Create a new span from the given offset range. This assumes a single
				2390	/// line and sets the columns based on the offsets. i.e., This only works
				2391	/// out of the box for ASCII, which is fine for most tests.
				2392	fn span(range: Range<usize>) -> Span {
				2393	let start = Position::new(range.start, 1, range.start + 1);
				2394	let end = Position::new(range.end, 1, range.end + 1);
				2395	Span::new(start, end)
				2396	}
				2397
				2398	/// Create a new span for the corresponding byte range in the given string.
				2399	fn span_range(subject: &str, range: Range<usize>) -> Span {
				2400	let start = Position {
				2401	offset: range.start,
				2402	line: 1 + subject[..range.start].matches('\n').count(),
				2403	column: 1 + subject[..range.start]
				2404	.chars()
				2405	.rev()
				2406	.position(\|c\| c == '\n')
				2407	.unwrap_or(subject[..range.start].chars().count()),
				2408	};
				2409	let end = Position {
				2410	offset: range.end,
				2411	line: 1 + subject[..range.end].matches('\n').count(),
				2412	column: 1 + subject[..range.end]
				2413	.chars()
				2414	.rev()
				2415	.position(\|c\| c == '\n')
				2416	.unwrap_or(subject[..range.end].chars().count()),
				2417	};
				2418	Span::new(start, end)
				2419	}
				2420
				2421	/// Create a verbatim literal starting at the given position.
				2422	fn lit(c: char, start: usize) -> Ast {
				2423	lit_with(c, span(start..start + c.len_utf8()))
				2424	}
				2425
				2426	/// Create a punctuation literal starting at the given position.
				2427	fn punct_lit(c: char, span: Span) -> Ast {
				2428	Ast::Literal(ast::Literal {
				2429	span: span,
				2430	kind: ast::LiteralKind::Punctuation,
				2431	c: c,
				2432	})
				2433	}
				2434
				2435	/// Create a verbatim literal with the given span.
				2436	fn lit_with(c: char, span: Span) -> Ast {
				2437	Ast::Literal(ast::Literal {
				2438	span: span,
				2439	kind: ast::LiteralKind::Verbatim,
				2440	c: c,
				2441	})
				2442	}
				2443
				2444	/// Create a concatenation with the given range.
				2445	fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
				2446	concat_with(span(range), asts)
				2447	}
				2448
				2449	/// Create a concatenation with the given span.
				2450	fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
				2451	Ast::Concat(ast::Concat { span: span, asts: asts })
				2452	}
				2453
				2454	/// Create an alternation with the given span.
				2455	fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
				2456	Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
				2457	}
				2458
				2459	/// Create a capturing group with the given span.
				2460	fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
				2461	Ast::Group(ast::Group {
				2462	span: span(range),
				2463	kind: ast::GroupKind::CaptureIndex(index),
				2464	ast: Box::new(ast),
				2465	})
				2466	}
				2467
				2468	/// Create an ast::SetFlags.
				2469	///
				2470	/// The given pattern should be the full pattern string. The range given
				2471	/// should correspond to the byte offsets where the flag set occurs.
				2472	///
				2473	/// If negated is true, then the set is interpreted as beginning with a
				2474	/// negation.
				2475	fn flag_set(
				2476	pat: &str,
				2477	range: Range<usize>,
				2478	flag: ast::Flag,
				2479	negated: bool,
				2480	) -> Ast {
				2481	let mut items = vec![ast::FlagsItem {
				2482	span: span_range(pat, (range.end - 2)..(range.end - 1)),
				2483	kind: ast::FlagsItemKind::Flag(flag),
				2484	}];
				2485	if negated {
				2486	items.insert(
				2487	0,
				2488	ast::FlagsItem {
				2489	span: span_range(pat, (range.start + 2)..(range.end - 2)),
				2490	kind: ast::FlagsItemKind::Negation,
				2491	},
				2492	);
				2493	}
				2494	Ast::Flags(ast::SetFlags {
				2495	span: span_range(pat, range.clone()),
				2496	flags: ast::Flags {
				2497	span: span_range(pat, (range.start + 2)..(range.end - 1)),
				2498	items: items,
				2499	},
				2500	})
				2501	}
				2502
				2503	#[test]
				2504	fn parse_nest_limit() {
				2505	// A nest limit of 0 still allows some types of regexes.
				2506	assert_eq!(
				2507	parser_nest_limit("", 0).parse(),
				2508	Ok(Ast::Empty(span(0..0)))
				2509	);
				2510	assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
				2511
				2512	// Test repetition operations, which require one level of nesting.
				2513	assert_eq!(
				2514	parser_nest_limit("a+", 0).parse().unwrap_err(),
				2515	TestError {
				2516	span: span(0..2),
				2517	kind: ast::ErrorKind::NestLimitExceeded(0),
				2518	}
				2519	);
				2520	assert_eq!(
				2521	parser_nest_limit("a+", 1).parse(),
				2522	Ok(Ast::Repetition(ast::Repetition {
				2523	span: span(0..2),
				2524	op: ast::RepetitionOp {
				2525	span: span(1..2),
				2526	kind: ast::RepetitionKind::OneOrMore,
				2527	},
				2528	greedy: true,
				2529	ast: Box::new(lit('a', 0)),
				2530	}))
				2531	);
				2532	assert_eq!(
				2533	parser_nest_limit("(a)+", 1).parse().unwrap_err(),
				2534	TestError {
				2535	span: span(0..3),
				2536	kind: ast::ErrorKind::NestLimitExceeded(1),
				2537	}
				2538	);
				2539	assert_eq!(
				2540	parser_nest_limit("a+*", 1).parse().unwrap_err(),
				2541	TestError {
				2542	span: span(0..2),
				2543	kind: ast::ErrorKind::NestLimitExceeded(1),
				2544	}
				2545	);
				2546	assert_eq!(
				2547	parser_nest_limit("a+*", 2).parse(),
				2548	Ok(Ast::Repetition(ast::Repetition {
				2549	span: span(0..3),
				2550	op: ast::RepetitionOp {
				2551	span: span(2..3),
				2552	kind: ast::RepetitionKind::ZeroOrMore,
				2553	},
				2554	greedy: true,
				2555	ast: Box::new(Ast::Repetition(ast::Repetition {
				2556	span: span(0..2),
				2557	op: ast::RepetitionOp {
				2558	span: span(1..2),
				2559	kind: ast::RepetitionKind::OneOrMore,
				2560	},
				2561	greedy: true,
				2562	ast: Box::new(lit('a', 0)),
				2563	})),
				2564	}))
				2565	);
				2566
				2567	// Test concatenations. A concatenation requires one level of nesting.
				2568	assert_eq!(
				2569	parser_nest_limit("ab", 0).parse().unwrap_err(),
				2570	TestError {
				2571	span: span(0..2),
				2572	kind: ast::ErrorKind::NestLimitExceeded(0),
				2573	}
				2574	);
				2575	assert_eq!(
				2576	parser_nest_limit("ab", 1).parse(),
				2577	Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
				2578	);
				2579	assert_eq!(
				2580	parser_nest_limit("abc", 1).parse(),
				2581	Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
				2582	);
				2583
				2584	// Test alternations. An alternation requires one level of nesting.
				2585	assert_eq!(
				2586	parser_nest_limit("a\|b", 0).parse().unwrap_err(),
				2587	TestError {
				2588	span: span(0..3),
				2589	kind: ast::ErrorKind::NestLimitExceeded(0),
				2590	}
				2591	);
				2592	assert_eq!(
				2593	parser_nest_limit("a\|b", 1).parse(),
				2594	Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
				2595	);
				2596	assert_eq!(
				2597	parser_nest_limit("a\|b\|c", 1).parse(),
				2598	Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
				2599	);
				2600
				2601	// Test character classes. Classes form their own mini-recursive
				2602	// syntax!
				2603	assert_eq!(
				2604	parser_nest_limit("[a]", 0).parse().unwrap_err(),
				2605	TestError {
				2606	span: span(0..3),
				2607	kind: ast::ErrorKind::NestLimitExceeded(0),
				2608	}
				2609	);
				2610	assert_eq!(
				2611	parser_nest_limit("[a]", 1).parse(),
				2612	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				2613	span: span(0..3),
				2614	negated: false,
				2615	kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
				2616	ast::Literal {
				2617	span: span(1..2),
				2618	kind: ast::LiteralKind::Verbatim,
				2619	c: 'a',
				2620	}
				2621	)),
				2622	})))
				2623	);
				2624	assert_eq!(
				2625	parser_nest_limit("[ab]", 1).parse().unwrap_err(),
				2626	TestError {
				2627	span: span(1..3),
				2628	kind: ast::ErrorKind::NestLimitExceeded(1),
				2629	}
				2630	);
				2631	assert_eq!(
				2632	parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
				2633	TestError {
				2634	span: span(3..7),
				2635	kind: ast::ErrorKind::NestLimitExceeded(2),
				2636	}
				2637	);
				2638	assert_eq!(
				2639	parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
				2640	TestError {
				2641	span: span(4..6),
				2642	kind: ast::ErrorKind::NestLimitExceeded(3),
				2643	}
				2644	);
				2645	assert_eq!(
				2646	parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
				2647	TestError {
				2648	span: span(1..5),
				2649	kind: ast::ErrorKind::NestLimitExceeded(1),
				2650	}
				2651	);
				2652	assert_eq!(
				2653	parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
				2654	TestError {
				2655	span: span(4..6),
				2656	kind: ast::ErrorKind::NestLimitExceeded(2),
				2657	}
				2658	);
				2659	}
				2660
				2661	#[test]
				2662	fn parse_comments() {
				2663	let pat = "(?x)
				2664	# This is comment 1.
				2665	foo # This is comment 2.
				2666	# This is comment 3.
				2667	bar
				2668	# This is comment 4.";
				2669	let astc = parser(pat).parse_with_comments().unwrap();
				2670	assert_eq!(
				2671	astc.ast,
				2672	concat_with(
				2673	span_range(pat, 0..pat.len()),
				2674	vec![
				2675	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2676	lit_with('f', span_range(pat, 26..27)),
				2677	lit_with('o', span_range(pat, 27..28)),
				2678	lit_with('o', span_range(pat, 28..29)),
				2679	lit_with('b', span_range(pat, 74..75)),
				2680	lit_with('a', span_range(pat, 75..76)),
				2681	lit_with('r', span_range(pat, 76..77)),
				2682	]
				2683	)
				2684	);
				2685	assert_eq!(
				2686	astc.comments,
				2687	vec![
				2688	ast::Comment {
				2689	span: span_range(pat, 5..26),
				2690	comment: s(" This is comment 1."),
				2691	},
				2692	ast::Comment {
				2693	span: span_range(pat, 30..51),
				2694	comment: s(" This is comment 2."),
				2695	},
				2696	ast::Comment {
				2697	span: span_range(pat, 53..74),
				2698	comment: s(" This is comment 3."),
				2699	},
				2700	ast::Comment {
				2701	span: span_range(pat, 78..98),
				2702	comment: s(" This is comment 4."),
				2703	},
				2704	]
				2705	);
				2706	}
				2707
				2708	#[test]
				2709	fn parse_holistic() {
				2710	assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
				2711	assert_eq!(
				2712	parser(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#\&\-\~").parse(),
				2713	Ok(concat(
				2714	0..36,
				2715	vec![
				2716	punct_lit('\\', span(0..2)),
				2717	punct_lit('.', span(2..4)),
				2718	punct_lit('+', span(4..6)),
				2719	punct_lit('*', span(6..8)),
				2720	punct_lit('?', span(8..10)),
				2721	punct_lit('(', span(10..12)),
				2722	punct_lit(')', span(12..14)),
				2723	punct_lit('\|', span(14..16)),
				2724	punct_lit('[', span(16..18)),
				2725	punct_lit(']', span(18..20)),
				2726	punct_lit('{', span(20..22)),
				2727	punct_lit('}', span(22..24)),
				2728	punct_lit('^', span(24..26)),
				2729	punct_lit('$', span(26..28)),
				2730	punct_lit('#', span(28..30)),
				2731	punct_lit('&', span(30..32)),
				2732	punct_lit('-', span(32..34)),
				2733	punct_lit('~', span(34..36)),
				2734	]
				2735	))
				2736	);
				2737	}
				2738
				2739	#[test]
				2740	fn parse_ignore_whitespace() {
				2741	// Test that basic whitespace insensitivity works.
				2742	let pat = "(?x)a b";
				2743	assert_eq!(
				2744	parser(pat).parse(),
				2745	Ok(concat_with(
				2746	nspan(npos(0, 1, 1), npos(7, 1, 8)),
				2747	vec![
				2748	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2749	lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
				2750	lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
				2751	]
				2752	))
				2753	);
				2754
				2755	// Test that we can toggle whitespace insensitivity.
				2756	let pat = "(?x)a b(?-x)a b";
				2757	assert_eq!(
				2758	parser(pat).parse(),
				2759	Ok(concat_with(
				2760	nspan(npos(0, 1, 1), npos(15, 1, 16)),
				2761	vec![
				2762	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2763	lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
				2764	lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
				2765	flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
				2766	lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
				2767	lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
				2768	lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
				2769	]
				2770	))
				2771	);
				2772
				2773	// Test that nesting whitespace insensitive flags works.
				2774	let pat = "a (?x:a )a ";
				2775	assert_eq!(
				2776	parser(pat).parse(),
				2777	Ok(concat_with(
				2778	span_range(pat, 0..11),
				2779	vec![
				2780	lit_with('a', span_range(pat, 0..1)),
				2781	lit_with(' ', span_range(pat, 1..2)),
				2782	Ast::Group(ast::Group {
				2783	span: span_range(pat, 2..9),
				2784	kind: ast::GroupKind::NonCapturing(ast::Flags {
				2785	span: span_range(pat, 4..5),
				2786	items: vec![ast::FlagsItem {
				2787	span: span_range(pat, 4..5),
				2788	kind: ast::FlagsItemKind::Flag(
				2789	ast::Flag::IgnoreWhitespace
				2790	),
				2791	},],
				2792	}),
				2793	ast: Box::new(lit_with('a', span_range(pat, 6..7))),
				2794	}),
				2795	lit_with('a', span_range(pat, 9..10)),
				2796	lit_with(' ', span_range(pat, 10..11)),
				2797	]
				2798	))
				2799	);
				2800
				2801	// Test that whitespace after an opening paren is insignificant.
				2802	let pat = "(?x)( ?P<foo> a )";
				2803	assert_eq!(
				2804	parser(pat).parse(),
				2805	Ok(concat_with(
				2806	span_range(pat, 0..pat.len()),
				2807	vec![
				2808	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2809	Ast::Group(ast::Group {
				2810	span: span_range(pat, 4..pat.len()),
				2811	kind: ast::GroupKind::CaptureName(ast::CaptureName {
				2812	span: span_range(pat, 9..12),
				2813	name: s("foo"),
				2814	index: 1,
				2815	}),
				2816	ast: Box::new(lit_with('a', span_range(pat, 14..15))),
				2817	}),
				2818	]
				2819	))
				2820	);
				2821	let pat = "(?x)( a )";
				2822	assert_eq!(
				2823	parser(pat).parse(),
				2824	Ok(concat_with(
				2825	span_range(pat, 0..pat.len()),
				2826	vec![
				2827	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2828	Ast::Group(ast::Group {
				2829	span: span_range(pat, 4..pat.len()),
				2830	kind: ast::GroupKind::CaptureIndex(1),
				2831	ast: Box::new(lit_with('a', span_range(pat, 7..8))),
				2832	}),
				2833	]
				2834	))
				2835	);
				2836	let pat = "(?x)( ?: a )";
				2837	assert_eq!(
				2838	parser(pat).parse(),
				2839	Ok(concat_with(
				2840	span_range(pat, 0..pat.len()),
				2841	vec![
				2842	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2843	Ast::Group(ast::Group {
				2844	span: span_range(pat, 4..pat.len()),
				2845	kind: ast::GroupKind::NonCapturing(ast::Flags {
				2846	span: span_range(pat, 8..8),
				2847	items: vec![],
				2848	}),
				2849	ast: Box::new(lit_with('a', span_range(pat, 11..12))),
				2850	}),
				2851	]
				2852	))
				2853	);
				2854	let pat = r"(?x)\x { 53 }";
				2855	assert_eq!(
				2856	parser(pat).parse(),
				2857	Ok(concat_with(
				2858	span_range(pat, 0..pat.len()),
				2859	vec![
				2860	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2861	Ast::Literal(ast::Literal {
				2862	span: span(4..13),
				2863	kind: ast::LiteralKind::HexBrace(
				2864	ast::HexLiteralKind::X
				2865	),
				2866	c: 'S',
				2867	}),
				2868	]
				2869	))
				2870	);
				2871
				2872	// Test that whitespace after an escape is OK.
				2873	let pat = r"(?x)\ ";
				2874	assert_eq!(
				2875	parser(pat).parse(),
				2876	Ok(concat_with(
				2877	span_range(pat, 0..pat.len()),
				2878	vec![
				2879	flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
				2880	Ast::Literal(ast::Literal {
				2881	span: span_range(pat, 4..6),
				2882	kind: ast::LiteralKind::Special(
				2883	ast::SpecialLiteralKind::Space
				2884	),
				2885	c: ' ',
				2886	}),
				2887	]
				2888	))
				2889	);
				2890	// ... but only when `x` mode is enabled.
				2891	let pat = r"\ ";
				2892	assert_eq!(
				2893	parser(pat).parse().unwrap_err(),
				2894	TestError {
				2895	span: span_range(pat, 0..2),
				2896	kind: ast::ErrorKind::EscapeUnrecognized,
				2897	}
				2898	);
				2899	}
				2900
				2901	#[test]
				2902	fn parse_newlines() {
				2903	let pat = ".\n.";
				2904	assert_eq!(
				2905	parser(pat).parse(),
				2906	Ok(concat_with(
				2907	span_range(pat, 0..3),
				2908	vec![
				2909	Ast::Dot(span_range(pat, 0..1)),
				2910	lit_with('\n', span_range(pat, 1..2)),
				2911	Ast::Dot(span_range(pat, 2..3)),
				2912	]
				2913	))
				2914	);
				2915
				2916	let pat = "foobar\nbaz\nquux\n";
				2917	assert_eq!(
				2918	parser(pat).parse(),
				2919	Ok(concat_with(
				2920	span_range(pat, 0..pat.len()),
				2921	vec![
				2922	lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
				2923	lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
				2924	lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
				2925	lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
				2926	lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
				2927	lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
				2928	lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
				2929	lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
				2930	lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
				2931	lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
				2932	lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
				2933	lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
				2934	lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
				2935	lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
				2936	lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
				2937	lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
				2938	]
				2939	))
				2940	);
				2941	}
				2942
				2943	#[test]
				2944	fn parse_uncounted_repetition() {
				2945	assert_eq!(
				2946	parser(r"a*").parse(),
				2947	Ok(Ast::Repetition(ast::Repetition {
				2948	span: span(0..2),
				2949	op: ast::RepetitionOp {
				2950	span: span(1..2),
				2951	kind: ast::RepetitionKind::ZeroOrMore,
				2952	},
				2953	greedy: true,
				2954	ast: Box::new(lit('a', 0)),
				2955	}))
				2956	);
				2957	assert_eq!(
				2958	parser(r"a+").parse(),
				2959	Ok(Ast::Repetition(ast::Repetition {
				2960	span: span(0..2),
				2961	op: ast::RepetitionOp {
				2962	span: span(1..2),
				2963	kind: ast::RepetitionKind::OneOrMore,
				2964	},
				2965	greedy: true,
				2966	ast: Box::new(lit('a', 0)),
				2967	}))
				2968	);
				2969
				2970	assert_eq!(
				2971	parser(r"a?").parse(),
				2972	Ok(Ast::Repetition(ast::Repetition {
				2973	span: span(0..2),
				2974	op: ast::RepetitionOp {
				2975	span: span(1..2),
				2976	kind: ast::RepetitionKind::ZeroOrOne,
				2977	},
				2978	greedy: true,
				2979	ast: Box::new(lit('a', 0)),
				2980	}))
				2981	);
				2982	assert_eq!(
				2983	parser(r"a??").parse(),
				2984	Ok(Ast::Repetition(ast::Repetition {
				2985	span: span(0..3),
				2986	op: ast::RepetitionOp {
				2987	span: span(1..3),
				2988	kind: ast::RepetitionKind::ZeroOrOne,
				2989	},
				2990	greedy: false,
				2991	ast: Box::new(lit('a', 0)),
				2992	}))
				2993	);
				2994	assert_eq!(
				2995	parser(r"a?").parse(),
				2996	Ok(Ast::Repetition(ast::Repetition {
				2997	span: span(0..2),
				2998	op: ast::RepetitionOp {
				2999	span: span(1..2),
				3000	kind: ast::RepetitionKind::ZeroOrOne,
				3001	},
				3002	greedy: true,
				3003	ast: Box::new(lit('a', 0)),
				3004	}))
				3005	);
				3006	assert_eq!(
				3007	parser(r"a?b").parse(),
				3008	Ok(concat(
				3009	0..3,
				3010	vec![
				3011	Ast::Repetition(ast::Repetition {
				3012	span: span(0..2),
				3013	op: ast::RepetitionOp {
				3014	span: span(1..2),
				3015	kind: ast::RepetitionKind::ZeroOrOne,
				3016	},
				3017	greedy: true,
				3018	ast: Box::new(lit('a', 0)),
				3019	}),
				3020	lit('b', 2),
				3021	]
				3022	))
				3023	);
				3024	assert_eq!(
				3025	parser(r"a??b").parse(),
				3026	Ok(concat(
				3027	0..4,
				3028	vec![
				3029	Ast::Repetition(ast::Repetition {
				3030	span: span(0..3),
				3031	op: ast::RepetitionOp {
				3032	span: span(1..3),
				3033	kind: ast::RepetitionKind::ZeroOrOne,
				3034	},
				3035	greedy: false,
				3036	ast: Box::new(lit('a', 0)),
				3037	}),
				3038	lit('b', 3),
				3039	]
				3040	))
				3041	);
				3042	assert_eq!(
				3043	parser(r"ab?").parse(),
				3044	Ok(concat(
				3045	0..3,
				3046	vec![
				3047	lit('a', 0),
				3048	Ast::Repetition(ast::Repetition {
				3049	span: span(1..3),
				3050	op: ast::RepetitionOp {
				3051	span: span(2..3),
				3052	kind: ast::RepetitionKind::ZeroOrOne,
				3053	},
				3054	greedy: true,
				3055	ast: Box::new(lit('b', 1)),
				3056	}),
				3057	]
				3058	))
				3059	);
				3060	assert_eq!(
				3061	parser(r"(ab)?").parse(),
				3062	Ok(Ast::Repetition(ast::Repetition {
				3063	span: span(0..5),
				3064	op: ast::RepetitionOp {
				3065	span: span(4..5),
				3066	kind: ast::RepetitionKind::ZeroOrOne,
				3067	},
				3068	greedy: true,
				3069	ast: Box::new(group(
				3070	0..4,
				3071	1,
				3072	concat(1..3, vec![lit('a', 1), lit('b', 2),])
				3073	)),
				3074	}))
				3075	);
				3076	assert_eq!(
				3077	parser(r"\|a?").parse(),
				3078	Ok(alt(
				3079	0..3,
				3080	vec![
				3081	Ast::Empty(span(0..0)),
				3082	Ast::Repetition(ast::Repetition {
				3083	span: span(1..3),
				3084	op: ast::RepetitionOp {
				3085	span: span(2..3),
				3086	kind: ast::RepetitionKind::ZeroOrOne,
				3087	},
				3088	greedy: true,
				3089	ast: Box::new(lit('a', 1)),
				3090	}),
				3091	]
				3092	))
				3093	);
				3094
				3095	assert_eq!(
				3096	parser(r"*").parse().unwrap_err(),
				3097	TestError {
				3098	span: span(0..0),
				3099	kind: ast::ErrorKind::RepetitionMissing,
				3100	}
				3101	);
				3102	assert_eq!(
				3103	parser(r"(?i)*").parse().unwrap_err(),
				3104	TestError {
				3105	span: span(4..4),
				3106	kind: ast::ErrorKind::RepetitionMissing,
				3107	}
				3108	);
				3109	assert_eq!(
				3110	parser(r"(*)").parse().unwrap_err(),
				3111	TestError {
				3112	span: span(1..1),
				3113	kind: ast::ErrorKind::RepetitionMissing,
				3114	}
				3115	);
				3116	assert_eq!(
				3117	parser(r"(?:?)").parse().unwrap_err(),
				3118	TestError {
				3119	span: span(3..3),
				3120	kind: ast::ErrorKind::RepetitionMissing,
				3121	}
				3122	);
				3123	assert_eq!(
				3124	parser(r"+").parse().unwrap_err(),
				3125	TestError {
				3126	span: span(0..0),
				3127	kind: ast::ErrorKind::RepetitionMissing,
				3128	}
				3129	);
				3130	assert_eq!(
				3131	parser(r"?").parse().unwrap_err(),
				3132	TestError {
				3133	span: span(0..0),
				3134	kind: ast::ErrorKind::RepetitionMissing,
				3135	}
				3136	);
				3137	assert_eq!(
				3138	parser(r"(?)").parse().unwrap_err(),
				3139	TestError {
				3140	span: span(1..1),
				3141	kind: ast::ErrorKind::RepetitionMissing,
				3142	}
				3143	);
				3144	assert_eq!(
				3145	parser(r"\|*").parse().unwrap_err(),
				3146	TestError {
				3147	span: span(1..1),
				3148	kind: ast::ErrorKind::RepetitionMissing,
				3149	}
				3150	);
				3151	assert_eq!(
				3152	parser(r"\|+").parse().unwrap_err(),
				3153	TestError {
				3154	span: span(1..1),
				3155	kind: ast::ErrorKind::RepetitionMissing,
				3156	}
				3157	);
				3158	assert_eq!(
				3159	parser(r"\|?").parse().unwrap_err(),
				3160	TestError {
				3161	span: span(1..1),
				3162	kind: ast::ErrorKind::RepetitionMissing,
				3163	}
				3164	);
				3165	}
				3166
				3167	#[test]
				3168	fn parse_counted_repetition() {
				3169	assert_eq!(
				3170	parser(r"a{5}").parse(),
				3171	Ok(Ast::Repetition(ast::Repetition {
				3172	span: span(0..4),
				3173	op: ast::RepetitionOp {
				3174	span: span(1..4),
				3175	kind: ast::RepetitionKind::Range(
				3176	ast::RepetitionRange::Exactly(5)
				3177	),
				3178	},
				3179	greedy: true,
				3180	ast: Box::new(lit('a', 0)),
				3181	}))
				3182	);
				3183	assert_eq!(
				3184	parser(r"a{5,}").parse(),
				3185	Ok(Ast::Repetition(ast::Repetition {
				3186	span: span(0..5),
				3187	op: ast::RepetitionOp {
				3188	span: span(1..5),
				3189	kind: ast::RepetitionKind::Range(
				3190	ast::RepetitionRange::AtLeast(5)
				3191	),
				3192	},
				3193	greedy: true,
				3194	ast: Box::new(lit('a', 0)),
				3195	}))
				3196	);
				3197	assert_eq!(
				3198	parser(r"a{5,9}").parse(),
				3199	Ok(Ast::Repetition(ast::Repetition {
				3200	span: span(0..6),
				3201	op: ast::RepetitionOp {
				3202	span: span(1..6),
				3203	kind: ast::RepetitionKind::Range(
				3204	ast::RepetitionRange::Bounded(5, 9)
				3205	),
				3206	},
				3207	greedy: true,
				3208	ast: Box::new(lit('a', 0)),
				3209	}))
				3210	);
				3211	assert_eq!(
				3212	parser(r"a{5}?").parse(),
				3213	Ok(Ast::Repetition(ast::Repetition {
				3214	span: span(0..5),
				3215	op: ast::RepetitionOp {
				3216	span: span(1..5),
				3217	kind: ast::RepetitionKind::Range(
				3218	ast::RepetitionRange::Exactly(5)
				3219	),
				3220	},
				3221	greedy: false,
				3222	ast: Box::new(lit('a', 0)),
				3223	}))
				3224	);
				3225	assert_eq!(
				3226	parser(r"ab{5}").parse(),
				3227	Ok(concat(
				3228	0..5,
				3229	vec![
				3230	lit('a', 0),
				3231	Ast::Repetition(ast::Repetition {
				3232	span: span(1..5),
				3233	op: ast::RepetitionOp {
				3234	span: span(2..5),
				3235	kind: ast::RepetitionKind::Range(
				3236	ast::RepetitionRange::Exactly(5)
				3237	),
				3238	},
				3239	greedy: true,
				3240	ast: Box::new(lit('b', 1)),
				3241	}),
				3242	]
				3243	))
				3244	);
				3245	assert_eq!(
				3246	parser(r"ab{5}c").parse(),
				3247	Ok(concat(
				3248	0..6,
				3249	vec![
				3250	lit('a', 0),
				3251	Ast::Repetition(ast::Repetition {
				3252	span: span(1..5),
				3253	op: ast::RepetitionOp {
				3254	span: span(2..5),
				3255	kind: ast::RepetitionKind::Range(
				3256	ast::RepetitionRange::Exactly(5)
				3257	),
				3258	},
				3259	greedy: true,
				3260	ast: Box::new(lit('b', 1)),
				3261	}),
				3262	lit('c', 5),
				3263	]
				3264	))
				3265	);
				3266
				3267	assert_eq!(
				3268	parser(r"a{ 5 }").parse(),
				3269	Ok(Ast::Repetition(ast::Repetition {
				3270	span: span(0..6),
				3271	op: ast::RepetitionOp {
				3272	span: span(1..6),
				3273	kind: ast::RepetitionKind::Range(
				3274	ast::RepetitionRange::Exactly(5)
				3275	),
				3276	},
				3277	greedy: true,
				3278	ast: Box::new(lit('a', 0)),
				3279	}))
				3280	);
				3281	assert_eq!(
				3282	parser(r"a{ 5 , 9 }").parse(),
				3283	Ok(Ast::Repetition(ast::Repetition {
				3284	span: span(0..10),
				3285	op: ast::RepetitionOp {
				3286	span: span(1..10),
				3287	kind: ast::RepetitionKind::Range(
				3288	ast::RepetitionRange::Bounded(5, 9)
				3289	),
				3290	},
				3291	greedy: true,
				3292	ast: Box::new(lit('a', 0)),
				3293	}))
				3294	);
				3295	assert_eq!(
				3296	parser_ignore_whitespace(r"a{5,9} ?").parse(),
				3297	Ok(Ast::Repetition(ast::Repetition {
				3298	span: span(0..8),
				3299	op: ast::RepetitionOp {
				3300	span: span(1..8),
				3301	kind: ast::RepetitionKind::Range(
				3302	ast::RepetitionRange::Bounded(5, 9)
				3303	),
				3304	},
				3305	greedy: false,
				3306	ast: Box::new(lit('a', 0)),
				3307	}))
				3308	);
				3309
				3310	assert_eq!(
				3311	parser(r"(?i){0}").parse().unwrap_err(),
				3312	TestError {
				3313	span: span(4..4),
				3314	kind: ast::ErrorKind::RepetitionMissing,
				3315	}
				3316	);
				3317	assert_eq!(
				3318	parser(r"(?m){1,1}").parse().unwrap_err(),
				3319	TestError {
				3320	span: span(4..4),
				3321	kind: ast::ErrorKind::RepetitionMissing,
				3322	}
				3323	);
				3324	assert_eq!(
				3325	parser(r"a{]}").parse().unwrap_err(),
				3326	TestError {
				3327	span: span(2..2),
				3328	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
				3329	}
				3330	);
				3331	assert_eq!(
				3332	parser(r"a{1,]}").parse().unwrap_err(),
				3333	TestError {
				3334	span: span(4..4),
				3335	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
				3336	}
				3337	);
				3338	assert_eq!(
				3339	parser(r"a{").parse().unwrap_err(),
				3340	TestError {
				3341	span: span(1..2),
				3342	kind: ast::ErrorKind::RepetitionCountUnclosed,
				3343	}
				3344	);
				3345	assert_eq!(
				3346	parser(r"a{}").parse().unwrap_err(),
				3347	TestError {
				3348	span: span(2..2),
				3349	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
				3350	}
				3351	);
				3352	assert_eq!(
				3353	parser(r"a{a").parse().unwrap_err(),
				3354	TestError {
				3355	span: span(2..2),
				3356	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
				3357	}
				3358	);
				3359	assert_eq!(
				3360	parser(r"a{9999999999}").parse().unwrap_err(),
				3361	TestError {
				3362	span: span(2..12),
				3363	kind: ast::ErrorKind::DecimalInvalid,
				3364	}
				3365	);
				3366	assert_eq!(
				3367	parser(r"a{9").parse().unwrap_err(),
				3368	TestError {
				3369	span: span(1..3),
				3370	kind: ast::ErrorKind::RepetitionCountUnclosed,
				3371	}
				3372	);
				3373	assert_eq!(
				3374	parser(r"a{9,a").parse().unwrap_err(),
				3375	TestError {
				3376	span: span(4..4),
				3377	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
				3378	}
				3379	);
				3380	assert_eq!(
				3381	parser(r"a{9,9999999999}").parse().unwrap_err(),
				3382	TestError {
				3383	span: span(4..14),
				3384	kind: ast::ErrorKind::DecimalInvalid,
				3385	}
				3386	);
				3387	assert_eq!(
				3388	parser(r"a{9,").parse().unwrap_err(),
				3389	TestError {
				3390	span: span(1..4),
				3391	kind: ast::ErrorKind::RepetitionCountUnclosed,
				3392	}
				3393	);
				3394	assert_eq!(
				3395	parser(r"a{9,11").parse().unwrap_err(),
				3396	TestError {
				3397	span: span(1..6),
				3398	kind: ast::ErrorKind::RepetitionCountUnclosed,
				3399	}
				3400	);
				3401	assert_eq!(
				3402	parser(r"a{2,1}").parse().unwrap_err(),
				3403	TestError {
				3404	span: span(1..6),
				3405	kind: ast::ErrorKind::RepetitionCountInvalid,
				3406	}
				3407	);
				3408	assert_eq!(
				3409	parser(r"{5}").parse().unwrap_err(),
				3410	TestError {
				3411	span: span(0..0),
				3412	kind: ast::ErrorKind::RepetitionMissing,
				3413	}
				3414	);
				3415	assert_eq!(
				3416	parser(r"\|{5}").parse().unwrap_err(),
				3417	TestError {
				3418	span: span(1..1),
				3419	kind: ast::ErrorKind::RepetitionMissing,
				3420	}
				3421	);
				3422	}
				3423
				3424	#[test]
				3425	fn parse_alternate() {
				3426	assert_eq!(
				3427	parser(r"a\|b").parse(),
				3428	Ok(Ast::Alternation(ast::Alternation {
				3429	span: span(0..3),
				3430	asts: vec![lit('a', 0), lit('b', 2)],
				3431	}))
				3432	);
				3433	assert_eq!(
				3434	parser(r"(a\|b)").parse(),
				3435	Ok(group(
				3436	0..5,
				3437	1,
				3438	Ast::Alternation(ast::Alternation {
				3439	span: span(1..4),
				3440	asts: vec![lit('a', 1), lit('b', 3)],
				3441	})
				3442	))
				3443	);
				3444
				3445	assert_eq!(
				3446	parser(r"a\|b\|c").parse(),
				3447	Ok(Ast::Alternation(ast::Alternation {
				3448	span: span(0..5),
				3449	asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
				3450	}))
				3451	);
				3452	assert_eq!(
				3453	parser(r"ax\|by\|cz").parse(),
				3454	Ok(Ast::Alternation(ast::Alternation {
				3455	span: span(0..8),
				3456	asts: vec![
				3457	concat(0..2, vec![lit('a', 0), lit('x', 1)]),
				3458	concat(3..5, vec![lit('b', 3), lit('y', 4)]),
				3459	concat(6..8, vec![lit('c', 6), lit('z', 7)]),
				3460	],
				3461	}))
				3462	);
				3463	assert_eq!(
				3464	parser(r"(ax\|by\|cz)").parse(),
				3465	Ok(group(
				3466	0..10,
				3467	1,
				3468	Ast::Alternation(ast::Alternation {
				3469	span: span(1..9),
				3470	asts: vec![
				3471	concat(1..3, vec![lit('a', 1), lit('x', 2)]),
				3472	concat(4..6, vec![lit('b', 4), lit('y', 5)]),
				3473	concat(7..9, vec![lit('c', 7), lit('z', 8)]),
				3474	],
				3475	})
				3476	))
				3477	);
				3478	assert_eq!(
				3479	parser(r"(ax\|(by\|(cz)))").parse(),
				3480	Ok(group(
				3481	0..14,
				3482	1,
				3483	alt(
				3484	1..13,
				3485	vec![
				3486	concat(1..3, vec![lit('a', 1), lit('x', 2)]),
				3487	group(
				3488	4..13,
				3489	2,
				3490	alt(
				3491	5..12,
				3492	vec![
				3493	concat(
				3494	5..7,
				3495	vec![lit('b', 5), lit('y', 6)]
				3496	),
				3497	group(
				3498	8..12,
				3499	3,
				3500	concat(
				3501	9..11,
				3502	vec![lit('c', 9), lit('z', 10),]
				3503	)
				3504	),
				3505	]
				3506	)
				3507	),
				3508	]
				3509	)
				3510	))
				3511	);
				3512
				3513	assert_eq!(
				3514	parser(r"\|").parse(),
				3515	Ok(alt(
				3516	0..1,
				3517	vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
				3518	))
				3519	);
				3520	assert_eq!(
				3521	parser(r"\|\|").parse(),
				3522	Ok(alt(
				3523	0..2,
				3524	vec![
				3525	Ast::Empty(span(0..0)),
				3526	Ast::Empty(span(1..1)),
				3527	Ast::Empty(span(2..2)),
				3528	]
				3529	))
				3530	);
				3531	assert_eq!(
				3532	parser(r"a\|").parse(),
				3533	Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
				3534	);
				3535	assert_eq!(
				3536	parser(r"\|a").parse(),
				3537	Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
				3538	);
				3539
				3540	assert_eq!(
				3541	parser(r"(\|)").parse(),
				3542	Ok(group(
				3543	0..3,
				3544	1,
				3545	alt(
				3546	1..2,
				3547	vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
				3548	)
				3549	))
				3550	);
				3551	assert_eq!(
				3552	parser(r"(a\|)").parse(),
				3553	Ok(group(
				3554	0..4,
				3555	1,
				3556	alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
				3557	))
				3558	);
				3559	assert_eq!(
				3560	parser(r"(\|a)").parse(),
				3561	Ok(group(
				3562	0..4,
				3563	1,
				3564	alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
				3565	))
				3566	);
				3567
				3568	assert_eq!(
				3569	parser(r"a\|b)").parse().unwrap_err(),
				3570	TestError {
				3571	span: span(3..4),
				3572	kind: ast::ErrorKind::GroupUnopened,
				3573	}
				3574	);
				3575	assert_eq!(
				3576	parser(r"(a\|b").parse().unwrap_err(),
				3577	TestError {
				3578	span: span(0..1),
				3579	kind: ast::ErrorKind::GroupUnclosed,
				3580	}
				3581	);
				3582	}
				3583
				3584	#[test]
				3585	fn parse_unsupported_lookaround() {
				3586	assert_eq!(
				3587	parser(r"(?=a)").parse().unwrap_err(),
				3588	TestError {
				3589	span: span(0..3),
				3590	kind: ast::ErrorKind::UnsupportedLookAround,
				3591	}
				3592	);
				3593	assert_eq!(
				3594	parser(r"(?!a)").parse().unwrap_err(),
				3595	TestError {
				3596	span: span(0..3),
				3597	kind: ast::ErrorKind::UnsupportedLookAround,
				3598	}
				3599	);
				3600	assert_eq!(
				3601	parser(r"(?<=a)").parse().unwrap_err(),
				3602	TestError {
				3603	span: span(0..4),
				3604	kind: ast::ErrorKind::UnsupportedLookAround,
				3605	}
				3606	);
				3607	assert_eq!(
				3608	parser(r"(?<!a)").parse().unwrap_err(),
				3609	TestError {
				3610	span: span(0..4),
				3611	kind: ast::ErrorKind::UnsupportedLookAround,
				3612	}
				3613	);
				3614	}
				3615
				3616	#[test]
				3617	fn parse_group() {
				3618	assert_eq!(
				3619	parser("(?i)").parse(),
				3620	Ok(Ast::Flags(ast::SetFlags {
				3621	span: span(0..4),
				3622	flags: ast::Flags {
				3623	span: span(2..3),
				3624	items: vec![ast::FlagsItem {
				3625	span: span(2..3),
				3626	kind: ast::FlagsItemKind::Flag(
				3627	ast::Flag::CaseInsensitive
				3628	),
				3629	}],
				3630	},
				3631	}))
				3632	);
				3633	assert_eq!(
				3634	parser("(?iU)").parse(),
				3635	Ok(Ast::Flags(ast::SetFlags {
				3636	span: span(0..5),
				3637	flags: ast::Flags {
				3638	span: span(2..4),
				3639	items: vec![
				3640	ast::FlagsItem {
				3641	span: span(2..3),
				3642	kind: ast::FlagsItemKind::Flag(
				3643	ast::Flag::CaseInsensitive
				3644	),
				3645	},
				3646	ast::FlagsItem {
				3647	span: span(3..4),
				3648	kind: ast::FlagsItemKind::Flag(
				3649	ast::Flag::SwapGreed
				3650	),
				3651	},
				3652	],
				3653	},
				3654	}))
				3655	);
				3656	assert_eq!(
				3657	parser("(?i-U)").parse(),
				3658	Ok(Ast::Flags(ast::SetFlags {
				3659	span: span(0..6),
				3660	flags: ast::Flags {
				3661	span: span(2..5),
				3662	items: vec![
				3663	ast::FlagsItem {
				3664	span: span(2..3),
				3665	kind: ast::FlagsItemKind::Flag(
				3666	ast::Flag::CaseInsensitive
				3667	),
				3668	},
				3669	ast::FlagsItem {
				3670	span: span(3..4),
				3671	kind: ast::FlagsItemKind::Negation,
				3672	},
				3673	ast::FlagsItem {
				3674	span: span(4..5),
				3675	kind: ast::FlagsItemKind::Flag(
				3676	ast::Flag::SwapGreed
				3677	),
				3678	},
				3679	],
				3680	},
				3681	}))
				3682	);
				3683
				3684	assert_eq!(
				3685	parser("()").parse(),
				3686	Ok(Ast::Group(ast::Group {
				3687	span: span(0..2),
				3688	kind: ast::GroupKind::CaptureIndex(1),
				3689	ast: Box::new(Ast::Empty(span(1..1))),
				3690	}))
				3691	);
				3692	assert_eq!(
				3693	parser("(a)").parse(),
				3694	Ok(Ast::Group(ast::Group {
				3695	span: span(0..3),
				3696	kind: ast::GroupKind::CaptureIndex(1),
				3697	ast: Box::new(lit('a', 1)),
				3698	}))
				3699	);
				3700	assert_eq!(
				3701	parser("(())").parse(),
				3702	Ok(Ast::Group(ast::Group {
				3703	span: span(0..4),
				3704	kind: ast::GroupKind::CaptureIndex(1),
				3705	ast: Box::new(Ast::Group(ast::Group {
				3706	span: span(1..3),
				3707	kind: ast::GroupKind::CaptureIndex(2),
				3708	ast: Box::new(Ast::Empty(span(2..2))),
				3709	})),
				3710	}))
				3711	);
				3712
				3713	assert_eq!(
				3714	parser("(?:a)").parse(),
				3715	Ok(Ast::Group(ast::Group {
				3716	span: span(0..5),
				3717	kind: ast::GroupKind::NonCapturing(ast::Flags {
				3718	span: span(2..2),
				3719	items: vec![],
				3720	}),
				3721	ast: Box::new(lit('a', 3)),
				3722	}))
				3723	);
				3724
				3725	assert_eq!(
				3726	parser("(?i:a)").parse(),
				3727	Ok(Ast::Group(ast::Group {
				3728	span: span(0..6),
				3729	kind: ast::GroupKind::NonCapturing(ast::Flags {
				3730	span: span(2..3),
				3731	items: vec![ast::FlagsItem {
				3732	span: span(2..3),
				3733	kind: ast::FlagsItemKind::Flag(
				3734	ast::Flag::CaseInsensitive
				3735	),
				3736	},],
				3737	}),
				3738	ast: Box::new(lit('a', 4)),
				3739	}))
				3740	);
				3741	assert_eq!(
				3742	parser("(?i-U:a)").parse(),
				3743	Ok(Ast::Group(ast::Group {
				3744	span: span(0..8),
				3745	kind: ast::GroupKind::NonCapturing(ast::Flags {
				3746	span: span(2..5),
				3747	items: vec![
				3748	ast::FlagsItem {
				3749	span: span(2..3),
				3750	kind: ast::FlagsItemKind::Flag(
				3751	ast::Flag::CaseInsensitive
				3752	),
				3753	},
				3754	ast::FlagsItem {
				3755	span: span(3..4),
				3756	kind: ast::FlagsItemKind::Negation,
				3757	},
				3758	ast::FlagsItem {
				3759	span: span(4..5),
				3760	kind: ast::FlagsItemKind::Flag(
				3761	ast::Flag::SwapGreed
				3762	),
				3763	},
				3764	],
				3765	}),
				3766	ast: Box::new(lit('a', 6)),
				3767	}))
				3768	);
				3769
				3770	assert_eq!(
				3771	parser("(").parse().unwrap_err(),
				3772	TestError {
				3773	span: span(0..1),
				3774	kind: ast::ErrorKind::GroupUnclosed,
				3775	}
				3776	);
				3777	assert_eq!(
				3778	parser("(?").parse().unwrap_err(),
				3779	TestError {
				3780	span: span(0..1),
				3781	kind: ast::ErrorKind::GroupUnclosed,
				3782	}
				3783	);
				3784	assert_eq!(
				3785	parser("(?P").parse().unwrap_err(),
				3786	TestError {
				3787	span: span(2..3),
				3788	kind: ast::ErrorKind::FlagUnrecognized,
				3789	}
				3790	);
				3791	assert_eq!(
				3792	parser("(?P<").parse().unwrap_err(),
				3793	TestError {
				3794	span: span(4..4),
				3795	kind: ast::ErrorKind::GroupNameUnexpectedEof,
				3796	}
				3797	);
				3798	assert_eq!(
				3799	parser("(a").parse().unwrap_err(),
				3800	TestError {
				3801	span: span(0..1),
				3802	kind: ast::ErrorKind::GroupUnclosed,
				3803	}
				3804	);
				3805	assert_eq!(
				3806	parser("(()").parse().unwrap_err(),
				3807	TestError {
				3808	span: span(0..1),
				3809	kind: ast::ErrorKind::GroupUnclosed,
				3810	}
				3811	);
				3812	assert_eq!(
				3813	parser(")").parse().unwrap_err(),
				3814	TestError {
				3815	span: span(0..1),
				3816	kind: ast::ErrorKind::GroupUnopened,
				3817	}
				3818	);
				3819	assert_eq!(
				3820	parser("a)").parse().unwrap_err(),
				3821	TestError {
				3822	span: span(1..2),
				3823	kind: ast::ErrorKind::GroupUnopened,
				3824	}
				3825	);
				3826	}
				3827
				3828	#[test]
				3829	fn parse_capture_name() {
				3830	assert_eq!(
				3831	parser("(?P<a>z)").parse(),
				3832	Ok(Ast::Group(ast::Group {
				3833	span: span(0..8),
				3834	kind: ast::GroupKind::CaptureName(ast::CaptureName {
				3835	span: span(4..5),
				3836	name: s("a"),
				3837	index: 1,
				3838	}),
				3839	ast: Box::new(lit('z', 6)),
				3840	}))
				3841	);
				3842	assert_eq!(
				3843	parser("(?P<abc>z)").parse(),
				3844	Ok(Ast::Group(ast::Group {
				3845	span: span(0..10),
				3846	kind: ast::GroupKind::CaptureName(ast::CaptureName {
				3847	span: span(4..7),
				3848	name: s("abc"),
				3849	index: 1,
				3850	}),
				3851	ast: Box::new(lit('z', 8)),
				3852	}))
				3853	);
				3854
				3855	assert_eq!(
Chih-Hung Hsieh	31dfd7f	2020-10-26 13:16:58 -0700	[diff] [blame]	3856	parser("(?P<a_1>z)").parse(),
				3857	Ok(Ast::Group(ast::Group {
				3858	span: span(0..10),
				3859	kind: ast::GroupKind::CaptureName(ast::CaptureName {
				3860	span: span(4..7),
				3861	name: s("a_1"),
				3862	index: 1,
				3863	}),
				3864	ast: Box::new(lit('z', 8)),
				3865	}))
				3866	);
				3867
				3868	assert_eq!(
				3869	parser("(?P<a.1>z)").parse(),
				3870	Ok(Ast::Group(ast::Group {
				3871	span: span(0..10),
				3872	kind: ast::GroupKind::CaptureName(ast::CaptureName {
				3873	span: span(4..7),
				3874	name: s("a.1"),
				3875	index: 1,
				3876	}),
				3877	ast: Box::new(lit('z', 8)),
				3878	}))
				3879	);
				3880
				3881	assert_eq!(
				3882	parser("(?P<a[1]>z)").parse(),
				3883	Ok(Ast::Group(ast::Group {
				3884	span: span(0..11),
				3885	kind: ast::GroupKind::CaptureName(ast::CaptureName {
				3886	span: span(4..8),
				3887	name: s("a[1]"),
				3888	index: 1,
				3889	}),
				3890	ast: Box::new(lit('z', 9)),
				3891	}))
				3892	);
				3893
				3894	assert_eq!(
Chih-Hung Hsieh	048fc04	2020-04-16 10:44:22 -0700	[diff] [blame]	3895	parser("(?P<").parse().unwrap_err(),
				3896	TestError {
				3897	span: span(4..4),
				3898	kind: ast::ErrorKind::GroupNameUnexpectedEof,
				3899	}
				3900	);
				3901	assert_eq!(
				3902	parser("(?P<>z)").parse().unwrap_err(),
				3903	TestError {
				3904	span: span(4..4),
				3905	kind: ast::ErrorKind::GroupNameEmpty,
				3906	}
				3907	);
				3908	assert_eq!(
				3909	parser("(?P<a").parse().unwrap_err(),
				3910	TestError {
				3911	span: span(5..5),
				3912	kind: ast::ErrorKind::GroupNameUnexpectedEof,
				3913	}
				3914	);
				3915	assert_eq!(
				3916	parser("(?P<ab").parse().unwrap_err(),
				3917	TestError {
				3918	span: span(6..6),
				3919	kind: ast::ErrorKind::GroupNameUnexpectedEof,
				3920	}
				3921	);
				3922	assert_eq!(
				3923	parser("(?P<0a").parse().unwrap_err(),
				3924	TestError {
				3925	span: span(4..5),
				3926	kind: ast::ErrorKind::GroupNameInvalid,
				3927	}
				3928	);
				3929	assert_eq!(
				3930	parser("(?P<~").parse().unwrap_err(),
				3931	TestError {
				3932	span: span(4..5),
				3933	kind: ast::ErrorKind::GroupNameInvalid,
				3934	}
				3935	);
				3936	assert_eq!(
				3937	parser("(?P<abc~").parse().unwrap_err(),
				3938	TestError {
				3939	span: span(7..8),
				3940	kind: ast::ErrorKind::GroupNameInvalid,
				3941	}
				3942	);
				3943	assert_eq!(
				3944	parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
				3945	TestError {
				3946	span: span(12..13),
				3947	kind: ast::ErrorKind::GroupNameDuplicate {
				3948	original: span(4..5),
				3949	},
				3950	}
				3951	);
				3952	}
				3953
				3954	#[test]
				3955	fn parse_flags() {
				3956	assert_eq!(
				3957	parser("i:").parse_flags(),
				3958	Ok(ast::Flags {
				3959	span: span(0..1),
				3960	items: vec![ast::FlagsItem {
				3961	span: span(0..1),
				3962	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
				3963	}],
				3964	})
				3965	);
				3966	assert_eq!(
				3967	parser("i)").parse_flags(),
				3968	Ok(ast::Flags {
				3969	span: span(0..1),
				3970	items: vec![ast::FlagsItem {
				3971	span: span(0..1),
				3972	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
				3973	}],
				3974	})
				3975	);
				3976
				3977	assert_eq!(
				3978	parser("isU:").parse_flags(),
				3979	Ok(ast::Flags {
				3980	span: span(0..3),
				3981	items: vec![
				3982	ast::FlagsItem {
				3983	span: span(0..1),
				3984	kind: ast::FlagsItemKind::Flag(
				3985	ast::Flag::CaseInsensitive
				3986	),
				3987	},
				3988	ast::FlagsItem {
				3989	span: span(1..2),
				3990	kind: ast::FlagsItemKind::Flag(
				3991	ast::Flag::DotMatchesNewLine
				3992	),
				3993	},
				3994	ast::FlagsItem {
				3995	span: span(2..3),
				3996	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
				3997	},
				3998	],
				3999	})
				4000	);
				4001
				4002	assert_eq!(
				4003	parser("-isU:").parse_flags(),
				4004	Ok(ast::Flags {
				4005	span: span(0..4),
				4006	items: vec![
				4007	ast::FlagsItem {
				4008	span: span(0..1),
				4009	kind: ast::FlagsItemKind::Negation,
				4010	},
				4011	ast::FlagsItem {
				4012	span: span(1..2),
				4013	kind: ast::FlagsItemKind::Flag(
				4014	ast::Flag::CaseInsensitive
				4015	),
				4016	},
				4017	ast::FlagsItem {
				4018	span: span(2..3),
				4019	kind: ast::FlagsItemKind::Flag(
				4020	ast::Flag::DotMatchesNewLine
				4021	),
				4022	},
				4023	ast::FlagsItem {
				4024	span: span(3..4),
				4025	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
				4026	},
				4027	],
				4028	})
				4029	);
				4030	assert_eq!(
				4031	parser("i-sU:").parse_flags(),
				4032	Ok(ast::Flags {
				4033	span: span(0..4),
				4034	items: vec![
				4035	ast::FlagsItem {
				4036	span: span(0..1),
				4037	kind: ast::FlagsItemKind::Flag(
				4038	ast::Flag::CaseInsensitive
				4039	),
				4040	},
				4041	ast::FlagsItem {
				4042	span: span(1..2),
				4043	kind: ast::FlagsItemKind::Negation,
				4044	},
				4045	ast::FlagsItem {
				4046	span: span(2..3),
				4047	kind: ast::FlagsItemKind::Flag(
				4048	ast::Flag::DotMatchesNewLine
				4049	),
				4050	},
				4051	ast::FlagsItem {
				4052	span: span(3..4),
				4053	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
				4054	},
				4055	],
				4056	})
				4057	);
				4058
				4059	assert_eq!(
				4060	parser("isU").parse_flags().unwrap_err(),
				4061	TestError {
				4062	span: span(3..3),
				4063	kind: ast::ErrorKind::FlagUnexpectedEof,
				4064	}
				4065	);
				4066	assert_eq!(
				4067	parser("isUa:").parse_flags().unwrap_err(),
				4068	TestError {
				4069	span: span(3..4),
				4070	kind: ast::ErrorKind::FlagUnrecognized,
				4071	}
				4072	);
				4073	assert_eq!(
				4074	parser("isUi:").parse_flags().unwrap_err(),
				4075	TestError {
				4076	span: span(3..4),
				4077	kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
				4078	}
				4079	);
				4080	assert_eq!(
				4081	parser("i-sU-i:").parse_flags().unwrap_err(),
				4082	TestError {
				4083	span: span(4..5),
				4084	kind: ast::ErrorKind::FlagRepeatedNegation {
				4085	original: span(1..2),
				4086	},
				4087	}
				4088	);
				4089	assert_eq!(
				4090	parser("-)").parse_flags().unwrap_err(),
				4091	TestError {
				4092	span: span(0..1),
				4093	kind: ast::ErrorKind::FlagDanglingNegation,
				4094	}
				4095	);
				4096	assert_eq!(
				4097	parser("i-)").parse_flags().unwrap_err(),
				4098	TestError {
				4099	span: span(1..2),
				4100	kind: ast::ErrorKind::FlagDanglingNegation,
				4101	}
				4102	);
				4103	assert_eq!(
				4104	parser("iU-)").parse_flags().unwrap_err(),
				4105	TestError {
				4106	span: span(2..3),
				4107	kind: ast::ErrorKind::FlagDanglingNegation,
				4108	}
				4109	);
				4110	}
				4111
				4112	#[test]
				4113	fn parse_flag() {
				4114	assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
				4115	assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
				4116	assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
				4117	assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
				4118	assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
				4119	assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
				4120
				4121	assert_eq!(
				4122	parser("a").parse_flag().unwrap_err(),
				4123	TestError {
				4124	span: span(0..1),
				4125	kind: ast::ErrorKind::FlagUnrecognized,
				4126	}
				4127	);
				4128	assert_eq!(
				4129	parser("☃").parse_flag().unwrap_err(),
				4130	TestError {
				4131	span: span_range("☃", 0..3),
				4132	kind: ast::ErrorKind::FlagUnrecognized,
				4133	}
				4134	);
				4135	}
				4136
				4137	#[test]
				4138	fn parse_primitive_non_escape() {
				4139	assert_eq!(
				4140	parser(r".").parse_primitive(),
				4141	Ok(Primitive::Dot(span(0..1)))
				4142	);
				4143	assert_eq!(
				4144	parser(r"^").parse_primitive(),
				4145	Ok(Primitive::Assertion(ast::Assertion {
				4146	span: span(0..1),
				4147	kind: ast::AssertionKind::StartLine,
				4148	}))
				4149	);
				4150	assert_eq!(
				4151	parser(r"$").parse_primitive(),
				4152	Ok(Primitive::Assertion(ast::Assertion {
				4153	span: span(0..1),
				4154	kind: ast::AssertionKind::EndLine,
				4155	}))
				4156	);
				4157
				4158	assert_eq!(
				4159	parser(r"a").parse_primitive(),
				4160	Ok(Primitive::Literal(ast::Literal {
				4161	span: span(0..1),
				4162	kind: ast::LiteralKind::Verbatim,
				4163	c: 'a',
				4164	}))
				4165	);
				4166	assert_eq!(
				4167	parser(r"\|").parse_primitive(),
				4168	Ok(Primitive::Literal(ast::Literal {
				4169	span: span(0..1),
				4170	kind: ast::LiteralKind::Verbatim,
				4171	c: '\|',
				4172	}))
				4173	);
				4174	assert_eq!(
				4175	parser(r"☃").parse_primitive(),
				4176	Ok(Primitive::Literal(ast::Literal {
				4177	span: span_range("☃", 0..3),
				4178	kind: ast::LiteralKind::Verbatim,
				4179	c: '☃',
				4180	}))
				4181	);
				4182	}
				4183
				4184	#[test]
				4185	fn parse_escape() {
				4186	assert_eq!(
				4187	parser(r"\\|").parse_primitive(),
				4188	Ok(Primitive::Literal(ast::Literal {
				4189	span: span(0..2),
				4190	kind: ast::LiteralKind::Punctuation,
				4191	c: '\|',
				4192	}))
				4193	);
				4194	let specials = &[
				4195	(r"\a", '\x07', ast::SpecialLiteralKind::Bell),
				4196	(r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
				4197	(r"\t", '\t', ast::SpecialLiteralKind::Tab),
				4198	(r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
				4199	(r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
				4200	(r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
				4201	];
				4202	for &(pat, c, ref kind) in specials {
				4203	assert_eq!(
				4204	parser(pat).parse_primitive(),
				4205	Ok(Primitive::Literal(ast::Literal {
				4206	span: span(0..2),
				4207	kind: ast::LiteralKind::Special(kind.clone()),
				4208	c: c,
				4209	}))
				4210	);
				4211	}
				4212	assert_eq!(
				4213	parser(r"\A").parse_primitive(),
				4214	Ok(Primitive::Assertion(ast::Assertion {
				4215	span: span(0..2),
				4216	kind: ast::AssertionKind::StartText,
				4217	}))
				4218	);
				4219	assert_eq!(
				4220	parser(r"\z").parse_primitive(),
				4221	Ok(Primitive::Assertion(ast::Assertion {
				4222	span: span(0..2),
				4223	kind: ast::AssertionKind::EndText,
				4224	}))
				4225	);
				4226	assert_eq!(
				4227	parser(r"\b").parse_primitive(),
				4228	Ok(Primitive::Assertion(ast::Assertion {
				4229	span: span(0..2),
				4230	kind: ast::AssertionKind::WordBoundary,
				4231	}))
				4232	);
				4233	assert_eq!(
				4234	parser(r"\B").parse_primitive(),
				4235	Ok(Primitive::Assertion(ast::Assertion {
				4236	span: span(0..2),
				4237	kind: ast::AssertionKind::NotWordBoundary,
				4238	}))
				4239	);
				4240
				4241	assert_eq!(
				4242	parser(r"\").parse_escape().unwrap_err(),
				4243	TestError {
				4244	span: span(0..1),
				4245	kind: ast::ErrorKind::EscapeUnexpectedEof,
				4246	}
				4247	);
				4248	assert_eq!(
				4249	parser(r"\y").parse_escape().unwrap_err(),
				4250	TestError {
				4251	span: span(0..2),
				4252	kind: ast::ErrorKind::EscapeUnrecognized,
				4253	}
				4254	);
				4255	}
				4256
				4257	#[test]
				4258	fn parse_unsupported_backreference() {
				4259	assert_eq!(
				4260	parser(r"\0").parse_escape().unwrap_err(),
				4261	TestError {
				4262	span: span(0..2),
				4263	kind: ast::ErrorKind::UnsupportedBackreference,
				4264	}
				4265	);
				4266	assert_eq!(
				4267	parser(r"\9").parse_escape().unwrap_err(),
				4268	TestError {
				4269	span: span(0..2),
				4270	kind: ast::ErrorKind::UnsupportedBackreference,
				4271	}
				4272	);
				4273	}
				4274
				4275	#[test]
				4276	fn parse_octal() {
				4277	for i in 0..511 {
				4278	let pat = format!(r"\{:o}", i);
				4279	assert_eq!(
				4280	parser_octal(&pat).parse_escape(),
				4281	Ok(Primitive::Literal(ast::Literal {
				4282	span: span(0..pat.len()),
				4283	kind: ast::LiteralKind::Octal,
				4284	c: ::std::char::from_u32(i).unwrap(),
				4285	}))
				4286	);
				4287	}
				4288	assert_eq!(
				4289	parser_octal(r"\778").parse_escape(),
				4290	Ok(Primitive::Literal(ast::Literal {
				4291	span: span(0..3),
				4292	kind: ast::LiteralKind::Octal,
				4293	c: '?',
				4294	}))
				4295	);
				4296	assert_eq!(
				4297	parser_octal(r"\7777").parse_escape(),
				4298	Ok(Primitive::Literal(ast::Literal {
				4299	span: span(0..4),
				4300	kind: ast::LiteralKind::Octal,
				4301	c: '\u{01FF}',
				4302	}))
				4303	);
				4304	assert_eq!(
				4305	parser_octal(r"\778").parse(),
				4306	Ok(Ast::Concat(ast::Concat {
				4307	span: span(0..4),
				4308	asts: vec![
				4309	Ast::Literal(ast::Literal {
				4310	span: span(0..3),
				4311	kind: ast::LiteralKind::Octal,
				4312	c: '?',
				4313	}),
				4314	Ast::Literal(ast::Literal {
				4315	span: span(3..4),
				4316	kind: ast::LiteralKind::Verbatim,
				4317	c: '8',
				4318	}),
				4319	],
				4320	}))
				4321	);
				4322	assert_eq!(
				4323	parser_octal(r"\7777").parse(),
				4324	Ok(Ast::Concat(ast::Concat {
				4325	span: span(0..5),
				4326	asts: vec![
				4327	Ast::Literal(ast::Literal {
				4328	span: span(0..4),
				4329	kind: ast::LiteralKind::Octal,
				4330	c: '\u{01FF}',
				4331	}),
				4332	Ast::Literal(ast::Literal {
				4333	span: span(4..5),
				4334	kind: ast::LiteralKind::Verbatim,
				4335	c: '7',
				4336	}),
				4337	],
				4338	}))
				4339	);
				4340
				4341	assert_eq!(
				4342	parser_octal(r"\8").parse_escape().unwrap_err(),
				4343	TestError {
				4344	span: span(0..2),
				4345	kind: ast::ErrorKind::EscapeUnrecognized,
				4346	}
				4347	);
				4348	}
				4349
				4350	#[test]
				4351	fn parse_hex_two() {
				4352	for i in 0..256 {
				4353	let pat = format!(r"\x{:02x}", i);
				4354	assert_eq!(
				4355	parser(&pat).parse_escape(),
				4356	Ok(Primitive::Literal(ast::Literal {
				4357	span: span(0..pat.len()),
				4358	kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
				4359	c: ::std::char::from_u32(i).unwrap(),
				4360	}))
				4361	);
				4362	}
				4363
				4364	assert_eq!(
				4365	parser(r"\xF").parse_escape().unwrap_err(),
				4366	TestError {
				4367	span: span(3..3),
				4368	kind: ast::ErrorKind::EscapeUnexpectedEof,
				4369	}
				4370	);
				4371	assert_eq!(
				4372	parser(r"\xG").parse_escape().unwrap_err(),
				4373	TestError {
				4374	span: span(2..3),
				4375	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4376	}
				4377	);
				4378	assert_eq!(
				4379	parser(r"\xFG").parse_escape().unwrap_err(),
				4380	TestError {
				4381	span: span(3..4),
				4382	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4383	}
				4384	);
				4385	}
				4386
				4387	#[test]
				4388	fn parse_hex_four() {
				4389	for i in 0..65536 {
				4390	let c = match ::std::char::from_u32(i) {
				4391	None => continue,
				4392	Some(c) => c,
				4393	};
				4394	let pat = format!(r"\u{:04x}", i);
				4395	assert_eq!(
				4396	parser(&pat).parse_escape(),
				4397	Ok(Primitive::Literal(ast::Literal {
				4398	span: span(0..pat.len()),
				4399	kind: ast::LiteralKind::HexFixed(
				4400	ast::HexLiteralKind::UnicodeShort
				4401	),
				4402	c: c,
				4403	}))
				4404	);
				4405	}
				4406
				4407	assert_eq!(
				4408	parser(r"\uF").parse_escape().unwrap_err(),
				4409	TestError {
				4410	span: span(3..3),
				4411	kind: ast::ErrorKind::EscapeUnexpectedEof,
				4412	}
				4413	);
				4414	assert_eq!(
				4415	parser(r"\uG").parse_escape().unwrap_err(),
				4416	TestError {
				4417	span: span(2..3),
				4418	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4419	}
				4420	);
				4421	assert_eq!(
				4422	parser(r"\uFG").parse_escape().unwrap_err(),
				4423	TestError {
				4424	span: span(3..4),
				4425	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4426	}
				4427	);
				4428	assert_eq!(
				4429	parser(r"\uFFG").parse_escape().unwrap_err(),
				4430	TestError {
				4431	span: span(4..5),
				4432	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4433	}
				4434	);
				4435	assert_eq!(
				4436	parser(r"\uFFFG").parse_escape().unwrap_err(),
				4437	TestError {
				4438	span: span(5..6),
				4439	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4440	}
				4441	);
				4442	assert_eq!(
				4443	parser(r"\uD800").parse_escape().unwrap_err(),
				4444	TestError {
				4445	span: span(2..6),
				4446	kind: ast::ErrorKind::EscapeHexInvalid,
				4447	}
				4448	);
				4449	}
				4450
				4451	#[test]
				4452	fn parse_hex_eight() {
				4453	for i in 0..65536 {
				4454	let c = match ::std::char::from_u32(i) {
				4455	None => continue,
				4456	Some(c) => c,
				4457	};
				4458	let pat = format!(r"\U{:08x}", i);
				4459	assert_eq!(
				4460	parser(&pat).parse_escape(),
				4461	Ok(Primitive::Literal(ast::Literal {
				4462	span: span(0..pat.len()),
				4463	kind: ast::LiteralKind::HexFixed(
				4464	ast::HexLiteralKind::UnicodeLong
				4465	),
				4466	c: c,
				4467	}))
				4468	);
				4469	}
				4470
				4471	assert_eq!(
				4472	parser(r"\UF").parse_escape().unwrap_err(),
				4473	TestError {
				4474	span: span(3..3),
				4475	kind: ast::ErrorKind::EscapeUnexpectedEof,
				4476	}
				4477	);
				4478	assert_eq!(
				4479	parser(r"\UG").parse_escape().unwrap_err(),
				4480	TestError {
				4481	span: span(2..3),
				4482	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4483	}
				4484	);
				4485	assert_eq!(
				4486	parser(r"\UFG").parse_escape().unwrap_err(),
				4487	TestError {
				4488	span: span(3..4),
				4489	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4490	}
				4491	);
				4492	assert_eq!(
				4493	parser(r"\UFFG").parse_escape().unwrap_err(),
				4494	TestError {
				4495	span: span(4..5),
				4496	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4497	}
				4498	);
				4499	assert_eq!(
				4500	parser(r"\UFFFG").parse_escape().unwrap_err(),
				4501	TestError {
				4502	span: span(5..6),
				4503	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4504	}
				4505	);
				4506	assert_eq!(
				4507	parser(r"\UFFFFG").parse_escape().unwrap_err(),
				4508	TestError {
				4509	span: span(6..7),
				4510	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4511	}
				4512	);
				4513	assert_eq!(
				4514	parser(r"\UFFFFFG").parse_escape().unwrap_err(),
				4515	TestError {
				4516	span: span(7..8),
				4517	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4518	}
				4519	);
				4520	assert_eq!(
				4521	parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
				4522	TestError {
				4523	span: span(8..9),
				4524	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4525	}
				4526	);
				4527	assert_eq!(
				4528	parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
				4529	TestError {
				4530	span: span(9..10),
				4531	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4532	}
				4533	);
				4534	}
				4535
				4536	#[test]
				4537	fn parse_hex_brace() {
				4538	assert_eq!(
				4539	parser(r"\u{26c4}").parse_escape(),
				4540	Ok(Primitive::Literal(ast::Literal {
				4541	span: span(0..8),
				4542	kind: ast::LiteralKind::HexBrace(
				4543	ast::HexLiteralKind::UnicodeShort
				4544	),
				4545	c: '⛄',
				4546	}))
				4547	);
				4548	assert_eq!(
				4549	parser(r"\U{26c4}").parse_escape(),
				4550	Ok(Primitive::Literal(ast::Literal {
				4551	span: span(0..8),
				4552	kind: ast::LiteralKind::HexBrace(
				4553	ast::HexLiteralKind::UnicodeLong
				4554	),
				4555	c: '⛄',
				4556	}))
				4557	);
				4558	assert_eq!(
				4559	parser(r"\x{26c4}").parse_escape(),
				4560	Ok(Primitive::Literal(ast::Literal {
				4561	span: span(0..8),
				4562	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
				4563	c: '⛄',
				4564	}))
				4565	);
				4566	assert_eq!(
				4567	parser(r"\x{26C4}").parse_escape(),
				4568	Ok(Primitive::Literal(ast::Literal {
				4569	span: span(0..8),
				4570	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
				4571	c: '⛄',
				4572	}))
				4573	);
				4574	assert_eq!(
				4575	parser(r"\x{10fFfF}").parse_escape(),
				4576	Ok(Primitive::Literal(ast::Literal {
				4577	span: span(0..10),
				4578	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
				4579	c: '\u{10FFFF}',
				4580	}))
				4581	);
				4582
				4583	assert_eq!(
				4584	parser(r"\x").parse_escape().unwrap_err(),
				4585	TestError {
				4586	span: span(2..2),
				4587	kind: ast::ErrorKind::EscapeUnexpectedEof,
				4588	}
				4589	);
				4590	assert_eq!(
				4591	parser(r"\x{").parse_escape().unwrap_err(),
				4592	TestError {
				4593	span: span(2..3),
				4594	kind: ast::ErrorKind::EscapeUnexpectedEof,
				4595	}
				4596	);
				4597	assert_eq!(
				4598	parser(r"\x{FF").parse_escape().unwrap_err(),
				4599	TestError {
				4600	span: span(2..5),
				4601	kind: ast::ErrorKind::EscapeUnexpectedEof,
				4602	}
				4603	);
				4604	assert_eq!(
				4605	parser(r"\x{}").parse_escape().unwrap_err(),
				4606	TestError {
				4607	span: span(2..4),
				4608	kind: ast::ErrorKind::EscapeHexEmpty,
				4609	}
				4610	);
				4611	assert_eq!(
				4612	parser(r"\x{FGF}").parse_escape().unwrap_err(),
				4613	TestError {
				4614	span: span(4..5),
				4615	kind: ast::ErrorKind::EscapeHexInvalidDigit,
				4616	}
				4617	);
				4618	assert_eq!(
				4619	parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
				4620	TestError {
				4621	span: span(3..9),
				4622	kind: ast::ErrorKind::EscapeHexInvalid,
				4623	}
				4624	);
				4625	assert_eq!(
				4626	parser(r"\x{D800}").parse_escape().unwrap_err(),
				4627	TestError {
				4628	span: span(3..7),
				4629	kind: ast::ErrorKind::EscapeHexInvalid,
				4630	}
				4631	);
				4632	assert_eq!(
				4633	parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
				4634	TestError {
				4635	span: span(3..12),
				4636	kind: ast::ErrorKind::EscapeHexInvalid,
				4637	}
				4638	);
				4639	}
				4640
				4641	#[test]
				4642	fn parse_decimal() {
				4643	assert_eq!(parser("123").parse_decimal(), Ok(123));
				4644	assert_eq!(parser("0").parse_decimal(), Ok(0));
				4645	assert_eq!(parser("01").parse_decimal(), Ok(1));
				4646
				4647	assert_eq!(
				4648	parser("-1").parse_decimal().unwrap_err(),
				4649	TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
				4650	);
				4651	assert_eq!(
				4652	parser("").parse_decimal().unwrap_err(),
				4653	TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
				4654	);
				4655	assert_eq!(
				4656	parser("9999999999").parse_decimal().unwrap_err(),
				4657	TestError {
				4658	span: span(0..10),
				4659	kind: ast::ErrorKind::DecimalInvalid,
				4660	}
				4661	);
				4662	}
				4663
				4664	#[test]
				4665	fn parse_set_class() {
				4666	fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
				4667	ast::ClassSet::union(ast::ClassSetUnion {
				4668	span: span,
				4669	items: items,
				4670	})
				4671	}
				4672
				4673	fn intersection(
				4674	span: Span,
				4675	lhs: ast::ClassSet,
				4676	rhs: ast::ClassSet,
				4677	) -> ast::ClassSet {
				4678	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
				4679	span: span,
				4680	kind: ast::ClassSetBinaryOpKind::Intersection,
				4681	lhs: Box::new(lhs),
				4682	rhs: Box::new(rhs),
				4683	})
				4684	}
				4685
				4686	fn difference(
				4687	span: Span,
				4688	lhs: ast::ClassSet,
				4689	rhs: ast::ClassSet,
				4690	) -> ast::ClassSet {
				4691	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
				4692	span: span,
				4693	kind: ast::ClassSetBinaryOpKind::Difference,
				4694	lhs: Box::new(lhs),
				4695	rhs: Box::new(rhs),
				4696	})
				4697	}
				4698
				4699	fn symdifference(
				4700	span: Span,
				4701	lhs: ast::ClassSet,
				4702	rhs: ast::ClassSet,
				4703	) -> ast::ClassSet {
				4704	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
				4705	span: span,
				4706	kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
				4707	lhs: Box::new(lhs),
				4708	rhs: Box::new(rhs),
				4709	})
				4710	}
				4711
				4712	fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
				4713	ast::ClassSet::Item(item)
				4714	}
				4715
				4716	fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
				4717	ast::ClassSetItem::Ascii(cls)
				4718	}
				4719
				4720	fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
				4721	ast::ClassSetItem::Unicode(cls)
				4722	}
				4723
				4724	fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
				4725	ast::ClassSetItem::Perl(cls)
				4726	}
				4727
				4728	fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
				4729	ast::ClassSetItem::Bracketed(Box::new(cls))
				4730	}
				4731
				4732	fn lit(span: Span, c: char) -> ast::ClassSetItem {
				4733	ast::ClassSetItem::Literal(ast::Literal {
				4734	span: span,
				4735	kind: ast::LiteralKind::Verbatim,
				4736	c: c,
				4737	})
				4738	}
				4739
				4740	fn empty(span: Span) -> ast::ClassSetItem {
				4741	ast::ClassSetItem::Empty(span)
				4742	}
				4743
				4744	fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
				4745	let pos1 = Position {
				4746	offset: span.start.offset + start.len_utf8(),
				4747	column: span.start.column + 1,
				4748	..span.start
				4749	};
				4750	let pos2 = Position {
				4751	offset: span.end.offset - end.len_utf8(),
				4752	column: span.end.column - 1,
				4753	..span.end
				4754	};
				4755	ast::ClassSetItem::Range(ast::ClassSetRange {
				4756	span: span,
				4757	start: ast::Literal {
				4758	span: Span { end: pos1, ..span },
				4759	kind: ast::LiteralKind::Verbatim,
				4760	c: start,
				4761	},
				4762	end: ast::Literal {
				4763	span: Span { start: pos2, ..span },
				4764	kind: ast::LiteralKind::Verbatim,
				4765	c: end,
				4766	},
				4767	})
				4768	}
				4769
				4770	fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
				4771	ast::ClassAscii {
				4772	span: span,
				4773	kind: ast::ClassAsciiKind::Alnum,
				4774	negated: negated,
				4775	}
				4776	}
				4777
				4778	fn lower(span: Span, negated: bool) -> ast::ClassAscii {
				4779	ast::ClassAscii {
				4780	span: span,
				4781	kind: ast::ClassAsciiKind::Lower,
				4782	negated: negated,
				4783	}
				4784	}
				4785
				4786	assert_eq!(
				4787	parser("[[:alnum:]]").parse(),
				4788	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4789	span: span(0..11),
				4790	negated: false,
				4791	kind: itemset(item_ascii(alnum(span(1..10), false))),
				4792	})))
				4793	);
				4794	assert_eq!(
				4795	parser("[[[:alnum:]]]").parse(),
				4796	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4797	span: span(0..13),
				4798	negated: false,
				4799	kind: itemset(item_bracket(ast::ClassBracketed {
				4800	span: span(1..12),
				4801	negated: false,
				4802	kind: itemset(item_ascii(alnum(span(2..11), false))),
				4803	})),
				4804	})))
				4805	);
				4806	assert_eq!(
				4807	parser("[[:alnum:]&&[:lower:]]").parse(),
				4808	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4809	span: span(0..22),
				4810	negated: false,
				4811	kind: intersection(
				4812	span(1..21),
				4813	itemset(item_ascii(alnum(span(1..10), false))),
				4814	itemset(item_ascii(lower(span(12..21), false))),
				4815	),
				4816	})))
				4817	);
				4818	assert_eq!(
				4819	parser("[[:alnum:]--[:lower:]]").parse(),
				4820	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4821	span: span(0..22),
				4822	negated: false,
				4823	kind: difference(
				4824	span(1..21),
				4825	itemset(item_ascii(alnum(span(1..10), false))),
				4826	itemset(item_ascii(lower(span(12..21), false))),
				4827	),
				4828	})))
				4829	);
				4830	assert_eq!(
				4831	parser("[[:alnum:]~~[:lower:]]").parse(),
				4832	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4833	span: span(0..22),
				4834	negated: false,
				4835	kind: symdifference(
				4836	span(1..21),
				4837	itemset(item_ascii(alnum(span(1..10), false))),
				4838	itemset(item_ascii(lower(span(12..21), false))),
				4839	),
				4840	})))
				4841	);
				4842
				4843	assert_eq!(
				4844	parser("[a]").parse(),
				4845	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4846	span: span(0..3),
				4847	negated: false,
				4848	kind: itemset(lit(span(1..2), 'a')),
				4849	})))
				4850	);
				4851	assert_eq!(
				4852	parser(r"[a\]]").parse(),
				4853	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4854	span: span(0..5),
				4855	negated: false,
				4856	kind: union(
				4857	span(1..4),
				4858	vec![
				4859	lit(span(1..2), 'a'),
				4860	ast::ClassSetItem::Literal(ast::Literal {
				4861	span: span(2..4),
				4862	kind: ast::LiteralKind::Punctuation,
				4863	c: ']',
				4864	}),
				4865	]
				4866	),
				4867	})))
				4868	);
				4869	assert_eq!(
				4870	parser(r"[a\-z]").parse(),
				4871	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4872	span: span(0..6),
				4873	negated: false,
				4874	kind: union(
				4875	span(1..5),
				4876	vec![
				4877	lit(span(1..2), 'a'),
				4878	ast::ClassSetItem::Literal(ast::Literal {
				4879	span: span(2..4),
				4880	kind: ast::LiteralKind::Punctuation,
				4881	c: '-',
				4882	}),
				4883	lit(span(4..5), 'z'),
				4884	]
				4885	),
				4886	})))
				4887	);
				4888	assert_eq!(
				4889	parser("[ab]").parse(),
				4890	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4891	span: span(0..4),
				4892	negated: false,
				4893	kind: union(
				4894	span(1..3),
				4895	vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
				4896	),
				4897	})))
				4898	);
				4899	assert_eq!(
				4900	parser("[a-]").parse(),
				4901	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4902	span: span(0..4),
				4903	negated: false,
				4904	kind: union(
				4905	span(1..3),
				4906	vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
				4907	),
				4908	})))
				4909	);
				4910	assert_eq!(
				4911	parser("[-a]").parse(),
				4912	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4913	span: span(0..4),
				4914	negated: false,
				4915	kind: union(
				4916	span(1..3),
				4917	vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
				4918	),
				4919	})))
				4920	);
				4921	assert_eq!(
				4922	parser(r"[\pL]").parse(),
				4923	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4924	span: span(0..5),
				4925	negated: false,
				4926	kind: itemset(item_unicode(ast::ClassUnicode {
				4927	span: span(1..4),
				4928	negated: false,
				4929	kind: ast::ClassUnicodeKind::OneLetter('L'),
				4930	})),
				4931	})))
				4932	);
				4933	assert_eq!(
				4934	parser(r"[\w]").parse(),
				4935	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4936	span: span(0..4),
				4937	negated: false,
				4938	kind: itemset(item_perl(ast::ClassPerl {
				4939	span: span(1..3),
				4940	kind: ast::ClassPerlKind::Word,
				4941	negated: false,
				4942	})),
				4943	})))
				4944	);
				4945	assert_eq!(
				4946	parser(r"[a\wz]").parse(),
				4947	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4948	span: span(0..6),
				4949	negated: false,
				4950	kind: union(
				4951	span(1..5),
				4952	vec![
				4953	lit(span(1..2), 'a'),
				4954	item_perl(ast::ClassPerl {
				4955	span: span(2..4),
				4956	kind: ast::ClassPerlKind::Word,
				4957	negated: false,
				4958	}),
				4959	lit(span(4..5), 'z'),
				4960	]
				4961	),
				4962	})))
				4963	);
				4964
				4965	assert_eq!(
				4966	parser("[a-z]").parse(),
				4967	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4968	span: span(0..5),
				4969	negated: false,
				4970	kind: itemset(range(span(1..4), 'a', 'z')),
				4971	})))
				4972	);
				4973	assert_eq!(
				4974	parser("[a-cx-z]").parse(),
				4975	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4976	span: span(0..8),
				4977	negated: false,
				4978	kind: union(
				4979	span(1..7),
				4980	vec![
				4981	range(span(1..4), 'a', 'c'),
				4982	range(span(4..7), 'x', 'z'),
				4983	]
				4984	),
				4985	})))
				4986	);
				4987	assert_eq!(
				4988	parser(r"[\w&&a-cx-z]").parse(),
				4989	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				4990	span: span(0..12),
				4991	negated: false,
				4992	kind: intersection(
				4993	span(1..11),
				4994	itemset(item_perl(ast::ClassPerl {
				4995	span: span(1..3),
				4996	kind: ast::ClassPerlKind::Word,
				4997	negated: false,
				4998	})),
				4999	union(
				5000	span(5..11),
				5001	vec![
				5002	range(span(5..8), 'a', 'c'),
				5003	range(span(8..11), 'x', 'z'),
				5004	]
				5005	),
				5006	),
				5007	})))
				5008	);
				5009	assert_eq!(
				5010	parser(r"[a-cx-z&&\w]").parse(),
				5011	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5012	span: span(0..12),
				5013	negated: false,
				5014	kind: intersection(
				5015	span(1..11),
				5016	union(
				5017	span(1..7),
				5018	vec![
				5019	range(span(1..4), 'a', 'c'),
				5020	range(span(4..7), 'x', 'z'),
				5021	]
				5022	),
				5023	itemset(item_perl(ast::ClassPerl {
				5024	span: span(9..11),
				5025	kind: ast::ClassPerlKind::Word,
				5026	negated: false,
				5027	})),
				5028	),
				5029	})))
				5030	);
				5031	assert_eq!(
				5032	parser(r"[a--b--c]").parse(),
				5033	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5034	span: span(0..9),
				5035	negated: false,
				5036	kind: difference(
				5037	span(1..8),
				5038	difference(
				5039	span(1..5),
				5040	itemset(lit(span(1..2), 'a')),
				5041	itemset(lit(span(4..5), 'b')),
				5042	),
				5043	itemset(lit(span(7..8), 'c')),
				5044	),
				5045	})))
				5046	);
				5047	assert_eq!(
				5048	parser(r"[a~~b~~c]").parse(),
				5049	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5050	span: span(0..9),
				5051	negated: false,
				5052	kind: symdifference(
				5053	span(1..8),
				5054	symdifference(
				5055	span(1..5),
				5056	itemset(lit(span(1..2), 'a')),
				5057	itemset(lit(span(4..5), 'b')),
				5058	),
				5059	itemset(lit(span(7..8), 'c')),
				5060	),
				5061	})))
				5062	);
				5063	assert_eq!(
				5064	parser(r"[\^&&^]").parse(),
				5065	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5066	span: span(0..7),
				5067	negated: false,
				5068	kind: intersection(
				5069	span(1..6),
				5070	itemset(ast::ClassSetItem::Literal(ast::Literal {
				5071	span: span(1..3),
				5072	kind: ast::LiteralKind::Punctuation,
				5073	c: '^',
				5074	})),
				5075	itemset(lit(span(5..6), '^')),
				5076	),
				5077	})))
				5078	);
				5079	assert_eq!(
				5080	parser(r"[\&&&&]").parse(),
				5081	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5082	span: span(0..7),
				5083	negated: false,
				5084	kind: intersection(
				5085	span(1..6),
				5086	itemset(ast::ClassSetItem::Literal(ast::Literal {
				5087	span: span(1..3),
				5088	kind: ast::LiteralKind::Punctuation,
				5089	c: '&',
				5090	})),
				5091	itemset(lit(span(5..6), '&')),
				5092	),
				5093	})))
				5094	);
				5095	assert_eq!(
				5096	parser(r"[&&&&]").parse(),
				5097	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5098	span: span(0..6),
				5099	negated: false,
				5100	kind: intersection(
				5101	span(1..5),
				5102	intersection(
				5103	span(1..3),
				5104	itemset(empty(span(1..1))),
				5105	itemset(empty(span(3..3))),
				5106	),
				5107	itemset(empty(span(5..5))),
				5108	),
				5109	})))
				5110	);
				5111
				5112	let pat = "[☃-⛄]";
				5113	assert_eq!(
				5114	parser(pat).parse(),
				5115	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5116	span: span_range(pat, 0..9),
				5117	negated: false,
				5118	kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
				5119	span: span_range(pat, 1..8),
				5120	start: ast::Literal {
				5121	span: span_range(pat, 1..4),
				5122	kind: ast::LiteralKind::Verbatim,
				5123	c: '☃',
				5124	},
				5125	end: ast::Literal {
				5126	span: span_range(pat, 5..8),
				5127	kind: ast::LiteralKind::Verbatim,
				5128	c: '⛄',
				5129	},
				5130	})),
				5131	})))
				5132	);
				5133
				5134	assert_eq!(
				5135	parser(r"[]]").parse(),
				5136	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5137	span: span(0..3),
				5138	negated: false,
				5139	kind: itemset(lit(span(1..2), ']')),
				5140	})))
				5141	);
				5142	assert_eq!(
				5143	parser(r"[]\[]").parse(),
				5144	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5145	span: span(0..5),
				5146	negated: false,
				5147	kind: union(
				5148	span(1..4),
				5149	vec![
				5150	lit(span(1..2), ']'),
				5151	ast::ClassSetItem::Literal(ast::Literal {
				5152	span: span(2..4),
				5153	kind: ast::LiteralKind::Punctuation,
				5154	c: '[',
				5155	}),
				5156	]
				5157	),
				5158	})))
				5159	);
				5160	assert_eq!(
				5161	parser(r"[\[]]").parse(),
				5162	Ok(concat(
				5163	0..5,
				5164	vec![
				5165	Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
				5166	span: span(0..4),
				5167	negated: false,
				5168	kind: itemset(ast::ClassSetItem::Literal(
				5169	ast::Literal {
				5170	span: span(1..3),
				5171	kind: ast::LiteralKind::Punctuation,
				5172	c: '[',
				5173	}
				5174	)),
				5175	})),
				5176	Ast::Literal(ast::Literal {
				5177	span: span(4..5),
				5178	kind: ast::LiteralKind::Verbatim,
				5179	c: ']',
				5180	}),
				5181	]
				5182	))
				5183	);
				5184
				5185	assert_eq!(
				5186	parser("[").parse().unwrap_err(),
				5187	TestError {
				5188	span: span(0..1),
				5189	kind: ast::ErrorKind::ClassUnclosed,
				5190	}
				5191	);
				5192	assert_eq!(
				5193	parser("[[").parse().unwrap_err(),
				5194	TestError {
				5195	span: span(1..2),
				5196	kind: ast::ErrorKind::ClassUnclosed,
				5197	}
				5198	);
				5199	assert_eq!(
				5200	parser("[[-]").parse().unwrap_err(),
				5201	TestError {
				5202	span: span(0..1),
				5203	kind: ast::ErrorKind::ClassUnclosed,
				5204	}
				5205	);
				5206	assert_eq!(
				5207	parser("[[[:alnum:]").parse().unwrap_err(),
				5208	TestError {
				5209	span: span(1..2),
				5210	kind: ast::ErrorKind::ClassUnclosed,
				5211	}
				5212	);
				5213	assert_eq!(
				5214	parser(r"[\b]").parse().unwrap_err(),
				5215	TestError {
				5216	span: span(1..3),
				5217	kind: ast::ErrorKind::ClassEscapeInvalid,
				5218	}
				5219	);
				5220	assert_eq!(
				5221	parser(r"[\w-a]").parse().unwrap_err(),
				5222	TestError {
				5223	span: span(1..3),
				5224	kind: ast::ErrorKind::ClassRangeLiteral,
				5225	}
				5226	);
				5227	assert_eq!(
				5228	parser(r"[a-\w]").parse().unwrap_err(),
				5229	TestError {
				5230	span: span(3..5),
				5231	kind: ast::ErrorKind::ClassRangeLiteral,
				5232	}
				5233	);
				5234	assert_eq!(
				5235	parser(r"[z-a]").parse().unwrap_err(),
				5236	TestError {
				5237	span: span(1..4),
				5238	kind: ast::ErrorKind::ClassRangeInvalid,
				5239	}
				5240	);
				5241
				5242	assert_eq!(
				5243	parser_ignore_whitespace("[a ").parse().unwrap_err(),
				5244	TestError {
				5245	span: span(0..1),
				5246	kind: ast::ErrorKind::ClassUnclosed,
				5247	}
				5248	);
				5249	assert_eq!(
				5250	parser_ignore_whitespace("[a- ").parse().unwrap_err(),
				5251	TestError {
				5252	span: span(0..1),
				5253	kind: ast::ErrorKind::ClassUnclosed,
				5254	}
				5255	);
				5256	}
				5257
				5258	#[test]
				5259	fn parse_set_class_open() {
				5260	assert_eq!(parser("[a]").parse_set_class_open(), {
				5261	let set = ast::ClassBracketed {
				5262	span: span(0..1),
				5263	negated: false,
				5264	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5265	span: span(1..1),
				5266	items: vec![],
				5267	}),
				5268	};
				5269	let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
				5270	Ok((set, union))
				5271	});
				5272	assert_eq!(
				5273	parser_ignore_whitespace("[ a]").parse_set_class_open(),
				5274	{
				5275	let set = ast::ClassBracketed {
				5276	span: span(0..4),
				5277	negated: false,
				5278	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5279	span: span(4..4),
				5280	items: vec![],
				5281	}),
				5282	};
				5283	let union =
				5284	ast::ClassSetUnion { span: span(4..4), items: vec![] };
				5285	Ok((set, union))
				5286	}
				5287	);
				5288	assert_eq!(parser("[^a]").parse_set_class_open(), {
				5289	let set = ast::ClassBracketed {
				5290	span: span(0..2),
				5291	negated: true,
				5292	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5293	span: span(2..2),
				5294	items: vec![],
				5295	}),
				5296	};
				5297	let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
				5298	Ok((set, union))
				5299	});
				5300	assert_eq!(
				5301	parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
				5302	{
				5303	let set = ast::ClassBracketed {
				5304	span: span(0..4),
				5305	negated: true,
				5306	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5307	span: span(4..4),
				5308	items: vec![],
				5309	}),
				5310	};
				5311	let union =
				5312	ast::ClassSetUnion { span: span(4..4), items: vec![] };
				5313	Ok((set, union))
				5314	}
				5315	);
				5316	assert_eq!(parser("[-a]").parse_set_class_open(), {
				5317	let set = ast::ClassBracketed {
				5318	span: span(0..2),
				5319	negated: false,
				5320	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5321	span: span(1..1),
				5322	items: vec![],
				5323	}),
				5324	};
				5325	let union = ast::ClassSetUnion {
				5326	span: span(1..2),
				5327	items: vec![ast::ClassSetItem::Literal(ast::Literal {
				5328	span: span(1..2),
				5329	kind: ast::LiteralKind::Verbatim,
				5330	c: '-',
				5331	})],
				5332	};
				5333	Ok((set, union))
				5334	});
				5335	assert_eq!(
				5336	parser_ignore_whitespace("[ - a]").parse_set_class_open(),
				5337	{
				5338	let set = ast::ClassBracketed {
				5339	span: span(0..4),
				5340	negated: false,
				5341	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5342	span: span(2..2),
				5343	items: vec![],
				5344	}),
				5345	};
				5346	let union = ast::ClassSetUnion {
				5347	span: span(2..3),
				5348	items: vec![ast::ClassSetItem::Literal(ast::Literal {
				5349	span: span(2..3),
				5350	kind: ast::LiteralKind::Verbatim,
				5351	c: '-',
				5352	})],
				5353	};
				5354	Ok((set, union))
				5355	}
				5356	);
				5357	assert_eq!(parser("[^-a]").parse_set_class_open(), {
				5358	let set = ast::ClassBracketed {
				5359	span: span(0..3),
				5360	negated: true,
				5361	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5362	span: span(2..2),
				5363	items: vec![],
				5364	}),
				5365	};
				5366	let union = ast::ClassSetUnion {
				5367	span: span(2..3),
				5368	items: vec![ast::ClassSetItem::Literal(ast::Literal {
				5369	span: span(2..3),
				5370	kind: ast::LiteralKind::Verbatim,
				5371	c: '-',
				5372	})],
				5373	};
				5374	Ok((set, union))
				5375	});
				5376	assert_eq!(parser("[--a]").parse_set_class_open(), {
				5377	let set = ast::ClassBracketed {
				5378	span: span(0..3),
				5379	negated: false,
				5380	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5381	span: span(1..1),
				5382	items: vec![],
				5383	}),
				5384	};
				5385	let union = ast::ClassSetUnion {
				5386	span: span(1..3),
				5387	items: vec![
				5388	ast::ClassSetItem::Literal(ast::Literal {
				5389	span: span(1..2),
				5390	kind: ast::LiteralKind::Verbatim,
				5391	c: '-',
				5392	}),
				5393	ast::ClassSetItem::Literal(ast::Literal {
				5394	span: span(2..3),
				5395	kind: ast::LiteralKind::Verbatim,
				5396	c: '-',
				5397	}),
				5398	],
				5399	};
				5400	Ok((set, union))
				5401	});
				5402	assert_eq!(parser("[]a]").parse_set_class_open(), {
				5403	let set = ast::ClassBracketed {
				5404	span: span(0..2),
				5405	negated: false,
				5406	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5407	span: span(1..1),
				5408	items: vec![],
				5409	}),
				5410	};
				5411	let union = ast::ClassSetUnion {
				5412	span: span(1..2),
				5413	items: vec![ast::ClassSetItem::Literal(ast::Literal {
				5414	span: span(1..2),
				5415	kind: ast::LiteralKind::Verbatim,
				5416	c: ']',
				5417	})],
				5418	};
				5419	Ok((set, union))
				5420	});
				5421	assert_eq!(
				5422	parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
				5423	{
				5424	let set = ast::ClassBracketed {
				5425	span: span(0..4),
				5426	negated: false,
				5427	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5428	span: span(2..2),
				5429	items: vec![],
				5430	}),
				5431	};
				5432	let union = ast::ClassSetUnion {
				5433	span: span(2..3),
				5434	items: vec![ast::ClassSetItem::Literal(ast::Literal {
				5435	span: span(2..3),
				5436	kind: ast::LiteralKind::Verbatim,
				5437	c: ']',
				5438	})],
				5439	};
				5440	Ok((set, union))
				5441	}
				5442	);
				5443	assert_eq!(parser("[^]a]").parse_set_class_open(), {
				5444	let set = ast::ClassBracketed {
				5445	span: span(0..3),
				5446	negated: true,
				5447	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5448	span: span(2..2),
				5449	items: vec![],
				5450	}),
				5451	};
				5452	let union = ast::ClassSetUnion {
				5453	span: span(2..3),
				5454	items: vec![ast::ClassSetItem::Literal(ast::Literal {
				5455	span: span(2..3),
				5456	kind: ast::LiteralKind::Verbatim,
				5457	c: ']',
				5458	})],
				5459	};
				5460	Ok((set, union))
				5461	});
				5462	assert_eq!(parser("[-]a]").parse_set_class_open(), {
				5463	let set = ast::ClassBracketed {
				5464	span: span(0..2),
				5465	negated: false,
				5466	kind: ast::ClassSet::union(ast::ClassSetUnion {
				5467	span: span(1..1),
				5468	items: vec![],
				5469	}),
				5470	};
				5471	let union = ast::ClassSetUnion {
				5472	span: span(1..2),
				5473	items: vec![ast::ClassSetItem::Literal(ast::Literal {
				5474	span: span(1..2),
				5475	kind: ast::LiteralKind::Verbatim,
				5476	c: '-',
				5477	})],
				5478	};
				5479	Ok((set, union))
				5480	});
				5481
				5482	assert_eq!(
				5483	parser("[").parse_set_class_open().unwrap_err(),
				5484	TestError {
				5485	span: span(0..1),
				5486	kind: ast::ErrorKind::ClassUnclosed,
				5487	}
				5488	);
				5489	assert_eq!(
				5490	parser_ignore_whitespace("[ ")
				5491	.parse_set_class_open()
				5492	.unwrap_err(),
				5493	TestError {
				5494	span: span(0..5),
				5495	kind: ast::ErrorKind::ClassUnclosed,
				5496	}
				5497	);
				5498	assert_eq!(
				5499	parser("[^").parse_set_class_open().unwrap_err(),
				5500	TestError {
				5501	span: span(0..2),
				5502	kind: ast::ErrorKind::ClassUnclosed,
				5503	}
				5504	);
				5505	assert_eq!(
				5506	parser("[]").parse_set_class_open().unwrap_err(),
				5507	TestError {
				5508	span: span(0..2),
				5509	kind: ast::ErrorKind::ClassUnclosed,
				5510	}
				5511	);
				5512	assert_eq!(
				5513	parser("[-").parse_set_class_open().unwrap_err(),
				5514	TestError {
				5515	span: span(0..2),
				5516	kind: ast::ErrorKind::ClassUnclosed,
				5517	}
				5518	);
				5519	assert_eq!(
				5520	parser("[--").parse_set_class_open().unwrap_err(),
				5521	TestError {
				5522	span: span(0..3),
				5523	kind: ast::ErrorKind::ClassUnclosed,
				5524	}
				5525	);
				5526	}
				5527
				5528	#[test]
				5529	fn maybe_parse_ascii_class() {
				5530	assert_eq!(
				5531	parser(r"[:alnum:]").maybe_parse_ascii_class(),
				5532	Some(ast::ClassAscii {
				5533	span: span(0..9),
				5534	kind: ast::ClassAsciiKind::Alnum,
				5535	negated: false,
				5536	})
				5537	);
				5538	assert_eq!(
				5539	parser(r"[:alnum:]A").maybe_parse_ascii_class(),
				5540	Some(ast::ClassAscii {
				5541	span: span(0..9),
				5542	kind: ast::ClassAsciiKind::Alnum,
				5543	negated: false,
				5544	})
				5545	);
				5546	assert_eq!(
				5547	parser(r"[:^alnum:]").maybe_parse_ascii_class(),
				5548	Some(ast::ClassAscii {
				5549	span: span(0..10),
				5550	kind: ast::ClassAsciiKind::Alnum,
				5551	negated: true,
				5552	})
				5553	);
				5554
				5555	let p = parser(r"[:");
				5556	assert_eq!(p.maybe_parse_ascii_class(), None);
				5557	assert_eq!(p.offset(), 0);
				5558
				5559	let p = parser(r"[:^");
				5560	assert_eq!(p.maybe_parse_ascii_class(), None);
				5561	assert_eq!(p.offset(), 0);
				5562
				5563	let p = parser(r"[^:alnum:]");
				5564	assert_eq!(p.maybe_parse_ascii_class(), None);
				5565	assert_eq!(p.offset(), 0);
				5566
				5567	let p = parser(r"[:alnnum:]");
				5568	assert_eq!(p.maybe_parse_ascii_class(), None);
				5569	assert_eq!(p.offset(), 0);
				5570
				5571	let p = parser(r"[:alnum]");
				5572	assert_eq!(p.maybe_parse_ascii_class(), None);
				5573	assert_eq!(p.offset(), 0);
				5574
				5575	let p = parser(r"[:alnum:");
				5576	assert_eq!(p.maybe_parse_ascii_class(), None);
				5577	assert_eq!(p.offset(), 0);
				5578	}
				5579
				5580	#[test]
				5581	fn parse_unicode_class() {
				5582	assert_eq!(
				5583	parser(r"\pN").parse_escape(),
				5584	Ok(Primitive::Unicode(ast::ClassUnicode {
				5585	span: span(0..3),
				5586	negated: false,
				5587	kind: ast::ClassUnicodeKind::OneLetter('N'),
				5588	}))
				5589	);
				5590	assert_eq!(
				5591	parser(r"\PN").parse_escape(),
				5592	Ok(Primitive::Unicode(ast::ClassUnicode {
				5593	span: span(0..3),
				5594	negated: true,
				5595	kind: ast::ClassUnicodeKind::OneLetter('N'),
				5596	}))
				5597	);
				5598	assert_eq!(
				5599	parser(r"\p{N}").parse_escape(),
				5600	Ok(Primitive::Unicode(ast::ClassUnicode {
				5601	span: span(0..5),
				5602	negated: false,
				5603	kind: ast::ClassUnicodeKind::Named(s("N")),
				5604	}))
				5605	);
				5606	assert_eq!(
				5607	parser(r"\P{N}").parse_escape(),
				5608	Ok(Primitive::Unicode(ast::ClassUnicode {
				5609	span: span(0..5),
				5610	negated: true,
				5611	kind: ast::ClassUnicodeKind::Named(s("N")),
				5612	}))
				5613	);
				5614	assert_eq!(
				5615	parser(r"\p{Greek}").parse_escape(),
				5616	Ok(Primitive::Unicode(ast::ClassUnicode {
				5617	span: span(0..9),
				5618	negated: false,
				5619	kind: ast::ClassUnicodeKind::Named(s("Greek")),
				5620	}))
				5621	);
				5622
				5623	assert_eq!(
				5624	parser(r"\p{scx:Katakana}").parse_escape(),
				5625	Ok(Primitive::Unicode(ast::ClassUnicode {
				5626	span: span(0..16),
				5627	negated: false,
				5628	kind: ast::ClassUnicodeKind::NamedValue {
				5629	op: ast::ClassUnicodeOpKind::Colon,
				5630	name: s("scx"),
				5631	value: s("Katakana"),
				5632	},
				5633	}))
				5634	);
				5635	assert_eq!(
				5636	parser(r"\p{scx=Katakana}").parse_escape(),
				5637	Ok(Primitive::Unicode(ast::ClassUnicode {
				5638	span: span(0..16),
				5639	negated: false,
				5640	kind: ast::ClassUnicodeKind::NamedValue {
				5641	op: ast::ClassUnicodeOpKind::Equal,
				5642	name: s("scx"),
				5643	value: s("Katakana"),
				5644	},
				5645	}))
				5646	);
				5647	assert_eq!(
				5648	parser(r"\p{scx!=Katakana}").parse_escape(),
				5649	Ok(Primitive::Unicode(ast::ClassUnicode {
				5650	span: span(0..17),
				5651	negated: false,
				5652	kind: ast::ClassUnicodeKind::NamedValue {
				5653	op: ast::ClassUnicodeOpKind::NotEqual,
				5654	name: s("scx"),
				5655	value: s("Katakana"),
				5656	},
				5657	}))
				5658	);
				5659
				5660	assert_eq!(
				5661	parser(r"\p{:}").parse_escape(),
				5662	Ok(Primitive::Unicode(ast::ClassUnicode {
				5663	span: span(0..5),
				5664	negated: false,
				5665	kind: ast::ClassUnicodeKind::NamedValue {
				5666	op: ast::ClassUnicodeOpKind::Colon,
				5667	name: s(""),
				5668	value: s(""),
				5669	},
				5670	}))
				5671	);
				5672	assert_eq!(
				5673	parser(r"\p{=}").parse_escape(),
				5674	Ok(Primitive::Unicode(ast::ClassUnicode {
				5675	span: span(0..5),
				5676	negated: false,
				5677	kind: ast::ClassUnicodeKind::NamedValue {
				5678	op: ast::ClassUnicodeOpKind::Equal,
				5679	name: s(""),
				5680	value: s(""),
				5681	},
				5682	}))
				5683	);
				5684	assert_eq!(
				5685	parser(r"\p{!=}").parse_escape(),
				5686	Ok(Primitive::Unicode(ast::ClassUnicode {
				5687	span: span(0..6),
				5688	negated: false,
				5689	kind: ast::ClassUnicodeKind::NamedValue {
				5690	op: ast::ClassUnicodeOpKind::NotEqual,
				5691	name: s(""),
				5692	value: s(""),
				5693	},
				5694	}))
				5695	);
				5696
				5697	assert_eq!(
				5698	parser(r"\p").parse_escape().unwrap_err(),
				5699	TestError {
				5700	span: span(2..2),
				5701	kind: ast::ErrorKind::EscapeUnexpectedEof,
				5702	}
				5703	);
				5704	assert_eq!(
				5705	parser(r"\p{").parse_escape().unwrap_err(),
				5706	TestError {
				5707	span: span(3..3),
				5708	kind: ast::ErrorKind::EscapeUnexpectedEof,
				5709	}
				5710	);
				5711	assert_eq!(
				5712	parser(r"\p{N").parse_escape().unwrap_err(),
				5713	TestError {
				5714	span: span(4..4),
				5715	kind: ast::ErrorKind::EscapeUnexpectedEof,
				5716	}
				5717	);
				5718	assert_eq!(
				5719	parser(r"\p{Greek").parse_escape().unwrap_err(),
				5720	TestError {
				5721	span: span(8..8),
				5722	kind: ast::ErrorKind::EscapeUnexpectedEof,
				5723	}
				5724	);
				5725
				5726	assert_eq!(
				5727	parser(r"\pNz").parse(),
				5728	Ok(Ast::Concat(ast::Concat {
				5729	span: span(0..4),
				5730	asts: vec![
				5731	Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
				5732	span: span(0..3),
				5733	negated: false,
				5734	kind: ast::ClassUnicodeKind::OneLetter('N'),
				5735	})),
				5736	Ast::Literal(ast::Literal {
				5737	span: span(3..4),
				5738	kind: ast::LiteralKind::Verbatim,
				5739	c: 'z',
				5740	}),
				5741	],
				5742	}))
				5743	);
				5744	assert_eq!(
				5745	parser(r"\p{Greek}z").parse(),
				5746	Ok(Ast::Concat(ast::Concat {
				5747	span: span(0..10),
				5748	asts: vec![
				5749	Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
				5750	span: span(0..9),
				5751	negated: false,
				5752	kind: ast::ClassUnicodeKind::Named(s("Greek")),
				5753	})),
				5754	Ast::Literal(ast::Literal {
				5755	span: span(9..10),
				5756	kind: ast::LiteralKind::Verbatim,
				5757	c: 'z',
				5758	}),
				5759	],
				5760	}))
				5761	);
				5762	assert_eq!(
				5763	parser(r"\p\{").parse().unwrap_err(),
				5764	TestError {
				5765	span: span(2..3),
				5766	kind: ast::ErrorKind::UnicodeClassInvalid,
				5767	}
				5768	);
				5769	assert_eq!(
				5770	parser(r"\P\{").parse().unwrap_err(),
				5771	TestError {
				5772	span: span(2..3),
				5773	kind: ast::ErrorKind::UnicodeClassInvalid,
				5774	}
				5775	);
				5776	}
				5777
				5778	#[test]
				5779	fn parse_perl_class() {
				5780	assert_eq!(
				5781	parser(r"\d").parse_escape(),
				5782	Ok(Primitive::Perl(ast::ClassPerl {
				5783	span: span(0..2),
				5784	kind: ast::ClassPerlKind::Digit,
				5785	negated: false,
				5786	}))
				5787	);
				5788	assert_eq!(
				5789	parser(r"\D").parse_escape(),
				5790	Ok(Primitive::Perl(ast::ClassPerl {
				5791	span: span(0..2),
				5792	kind: ast::ClassPerlKind::Digit,
				5793	negated: true,
				5794	}))
				5795	);
				5796	assert_eq!(
				5797	parser(r"\s").parse_escape(),
				5798	Ok(Primitive::Perl(ast::ClassPerl {
				5799	span: span(0..2),
				5800	kind: ast::ClassPerlKind::Space,
				5801	negated: false,
				5802	}))
				5803	);
				5804	assert_eq!(
				5805	parser(r"\S").parse_escape(),
				5806	Ok(Primitive::Perl(ast::ClassPerl {
				5807	span: span(0..2),
				5808	kind: ast::ClassPerlKind::Space,
				5809	negated: true,
				5810	}))
				5811	);
				5812	assert_eq!(
				5813	parser(r"\w").parse_escape(),
				5814	Ok(Primitive::Perl(ast::ClassPerl {
				5815	span: span(0..2),
				5816	kind: ast::ClassPerlKind::Word,
				5817	negated: false,
				5818	}))
				5819	);
				5820	assert_eq!(
				5821	parser(r"\W").parse_escape(),
				5822	Ok(Primitive::Perl(ast::ClassPerl {
				5823	span: span(0..2),
				5824	kind: ast::ClassPerlKind::Word,
				5825	negated: true,
				5826	}))
				5827	);
				5828
				5829	assert_eq!(
				5830	parser(r"\d").parse(),
				5831	Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
				5832	span: span(0..2),
				5833	kind: ast::ClassPerlKind::Digit,
				5834	negated: false,
				5835	})))
				5836	);
				5837	assert_eq!(
				5838	parser(r"\dz").parse(),
				5839	Ok(Ast::Concat(ast::Concat {
				5840	span: span(0..3),
				5841	asts: vec![
				5842	Ast::Class(ast::Class::Perl(ast::ClassPerl {
				5843	span: span(0..2),
				5844	kind: ast::ClassPerlKind::Digit,
				5845	negated: false,
				5846	})),
				5847	Ast::Literal(ast::Literal {
				5848	span: span(2..3),
				5849	kind: ast::LiteralKind::Verbatim,
				5850	c: 'z',
				5851	}),
				5852	],
				5853	}))
				5854	);
				5855	}
				5856
				5857	// This tests a bug fix where the nest limit checker wasn't decrementing
				5858	// its depth during post-traversal, which causes long regexes to trip
				5859	// the default limit too aggressively.
				5860	#[test]
				5861	fn regression_454_nest_too_big() {
				5862	let pattern = r#"
				5863	2(?:
				5864	[45]\d{3}\|
				5865	7(?:
				5866	1[0-267]\|
				5867	2[0-289]\|
				5868	3[0-29]\|
				5869	4[01]\|
				5870	5[1-3]\|
				5871	6[013]\|
				5872	7[0178]\|
				5873	91
				5874	)\|
				5875	8(?:
				5876	0[125]\|
				5877	[139][1-6]\|
				5878	2[0157-9]\|
				5879	41\|
				5880	6[1-35]\|
				5881	7[1-5]\|
				5882	8[1-8]\|
				5883	90
				5884	)\|
				5885	9(?:
				5886	0[0-2]\|
				5887	1[0-4]\|
				5888	2[568]\|
				5889	3[3-6]\|
				5890	5[5-7]\|
				5891	6[0167]\|
				5892	7[15]\|
				5893	8[0146-9]
				5894	)
				5895	)\d{4}
				5896	"#;
				5897	assert!(parser_nest_limit(pattern, 50).parse().is_ok());
				5898	}
				5899
				5900	// This tests that we treat a trailing `-` in a character class as a
				5901	// literal `-` even when whitespace mode is enabled and there is whitespace
				5902	// after the trailing `-`.
				5903	#[test]
				5904	fn regression_455_trailing_dash_ignore_whitespace() {
				5905	assert!(parser("(?x)[ / - ]").parse().is_ok());
				5906	assert!(parser("(?x)[ a - ]").parse().is_ok());
				5907	assert!(parser(
				5908	"(?x)[
				5909	a
				5910	- ]
				5911	"
				5912	)
				5913	.parse()
				5914	.is_ok());
				5915	assert!(parser(
				5916	"(?x)[
				5917	a # wat
				5918	- ]
				5919	"
				5920	)
				5921	.parse()
				5922	.is_ok());
				5923
				5924	assert!(parser("(?x)[ / -").parse().is_err());
				5925	assert!(parser("(?x)[ / - ").parse().is_err());
				5926	assert!(parser(
				5927	"(?x)[
				5928	/ -
				5929	"
				5930	)
				5931	.parse()
				5932	.is_err());
				5933	assert!(parser(
				5934	"(?x)[
				5935	/ - # wat
				5936	"
				5937	)
				5938	.parse()
				5939	.is_err());
				5940	}
				5941	}