blob: 30fbf66c22bf9b7b5137008439b2d3e91aa75f35 [file] [log] [blame]
David Tolnay80a914f2018-08-30 23:49:53 -07001// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
David Tolnay18c754c2018-08-21 23:26:58 -04009//! Parsing interface for parsing a token stream into a syntax tree node.
David Tolnay80a914f2018-08-30 23:49:53 -070010//!
11//! Parsing in Syn is built on parser functions that take in a [`Cursor`] and
12//! produce a [`PResult<T>`] where `T` is some syntax tree node. `Cursor` is a
13//! cheaply copyable cursor over a range of tokens in a token stream, and
14//! `PResult` is a result that packages together a parsed syntax tree node `T`
15//! with a stream of remaining unparsed tokens after `T` represented as another
16//! `Cursor`, or an [`Error`] if parsing failed.
17//!
18//! [`Cursor`]: ../buffer/index.html
19//! [`PResult<T>`]: type.PResult.html
20//! [`Error`]: struct.Error.html
21//!
22//! This `Cursor`- and `PResult`-based interface is convenient for parser
23//! combinators and parser implementations, but not necessarily when you just
24//! have some tokens that you want to parse. For that we expose the following
25//! two entry points.
26//!
27//! ## The `syn::parse*` functions
28//!
29//! The [`syn::parse`], [`syn::parse2`], and [`syn::parse_str`] functions serve
30//! as an entry point for parsing syntax tree nodes that can be parsed in an
31//! obvious default way. These functions can return any syntax tree node that
David Tolnay8aacee12018-08-31 09:15:15 -070032//! implements the [`Parse`] trait, which includes most types in Syn.
David Tolnay80a914f2018-08-30 23:49:53 -070033//!
34//! [`syn::parse`]: ../fn.parse.html
35//! [`syn::parse2`]: ../fn.parse2.html
36//! [`syn::parse_str`]: ../fn.parse_str.html
David Tolnay8aacee12018-08-31 09:15:15 -070037//! [`Parse`]: trait.Parse.html
David Tolnay80a914f2018-08-30 23:49:53 -070038//!
39//! ```
40//! use syn::Type;
41//!
David Tolnay8aacee12018-08-31 09:15:15 -070042//! # fn run_parser() -> Result<(), syn::parse::Error> {
David Tolnay80a914f2018-08-30 23:49:53 -070043//! let t: Type = syn::parse_str("std::collections::HashMap<String, Value>")?;
44//! # Ok(())
45//! # }
46//! #
47//! # fn main() {
48//! # run_parser().unwrap();
49//! # }
50//! ```
51//!
52//! The [`parse_quote!`] macro also uses this approach.
53//!
54//! [`parse_quote!`]: ../macro.parse_quote.html
55//!
56//! ## The `Parser` trait
57//!
58//! Some types can be parsed in several ways depending on context. For example
59//! an [`Attribute`] can be either "outer" like `#[...]` or "inner" like
60//! `#![...]` and parsing the wrong one would be a bug. Similarly [`Punctuated`]
61//! may or may not allow trailing punctuation, and parsing it the wrong way
62//! would either reject valid input or accept invalid input.
63//!
64//! [`Attribute`]: ../struct.Attribute.html
65//! [`Punctuated`]: ../punctuated/index.html
66//!
67//! The `Synom` trait is not implemented in these cases because there is no good
68//! behavior to consider the default.
69//!
70//! ```ignore
71//! // Can't parse `Punctuated` without knowing whether trailing punctuation
72//! // should be allowed in this context.
73//! let path: Punctuated<PathSegment, Token![::]> = syn::parse(tokens)?;
74//! ```
75//!
76//! In these cases the types provide a choice of parser functions rather than a
77//! single `Synom` implementation, and those parser functions can be invoked
78//! through the [`Parser`] trait.
79//!
80//! [`Parser`]: trait.Parser.html
81//!
82//! ```
83//! # #[macro_use]
84//! # extern crate syn;
85//! #
86//! # extern crate proc_macro2;
87//! # use proc_macro2::TokenStream;
88//! #
David Tolnay3e3f7752018-08-31 09:33:59 -070089//! use syn::parse::Parser;
David Tolnay80a914f2018-08-30 23:49:53 -070090//! use syn::punctuated::Punctuated;
91//! use syn::{PathSegment, Expr, Attribute};
92//!
David Tolnay3e3f7752018-08-31 09:33:59 -070093//! # fn run_parsers() -> Result<(), syn::parse::Error> {
David Tolnay80a914f2018-08-30 23:49:53 -070094//! # let tokens = TokenStream::new().into();
95//! // Parse a nonempty sequence of path segments separated by `::` punctuation
96//! // with no trailing punctuation.
97//! let parser = Punctuated::<PathSegment, Token![::]>::parse_separated_nonempty;
98//! let path = parser.parse(tokens)?;
99//!
100//! # let tokens = TokenStream::new().into();
101//! // Parse a possibly empty sequence of expressions terminated by commas with
102//! // an optional trailing punctuation.
103//! let parser = Punctuated::<Expr, Token![,]>::parse_terminated;
104//! let args = parser.parse(tokens)?;
105//!
106//! # let tokens = TokenStream::new().into();
107//! // Parse zero or more outer attributes but not inner attributes.
David Tolnay3e3f7752018-08-31 09:33:59 -0700108//! let parser = Attribute::parse_outer;
109//! let attrs = parser.parse(tokens)?;
David Tolnay80a914f2018-08-30 23:49:53 -0700110//! #
111//! # Ok(())
112//! # }
113//! #
114//! # fn main() {}
115//! ```
116//!
117//! # Implementing a parser function
118//!
119//! Parser functions are usually implemented using the [`nom`]-style parser
120//! combinator macros provided by Syn, but may also be implemented without
121//! macros be using the low-level [`Cursor`] API directly.
122//!
123//! [`nom`]: https://github.com/Geal/nom
124//!
125//! The following parser combinator macros are available and a `Synom` parsing
126//! example is provided for each one.
127//!
128//! - [`alt!`](../macro.alt.html)
129//! - [`braces!`](../macro.braces.html)
130//! - [`brackets!`](../macro.brackets.html)
131//! - [`call!`](../macro.call.html)
132//! - [`cond!`](../macro.cond.html)
133//! - [`cond_reduce!`](../macro.cond_reduce.html)
134//! - [`custom_keyword!`](../macro.custom_keyword.html)
135//! - [`do_parse!`](../macro.do_parse.html)
136//! - [`epsilon!`](../macro.epsilon.html)
137//! - [`input_end!`](../macro.input_end.html)
138//! - [`keyword!`](../macro.keyword.html)
139//! - [`many0!`](../macro.many0.html)
140//! - [`map!`](../macro.map.html)
141//! - [`not!`](../macro.not.html)
142//! - [`option!`](../macro.option.html)
143//! - [`parens!`](../macro.parens.html)
144//! - [`punct!`](../macro.punct.html)
145//! - [`reject!`](../macro.reject.html)
146//! - [`switch!`](../macro.switch.html)
147//! - [`syn!`](../macro.syn.html)
148//! - [`tuple!`](../macro.tuple.html)
149//! - [`value!`](../macro.value.html)
150//!
151//! *This module is available if Syn is built with the `"parsing"` feature.*
David Tolnay18c754c2018-08-21 23:26:58 -0400152
153use std::cell::Cell;
154use std::fmt::Display;
155use std::marker::PhantomData;
156use std::mem;
157use std::ops::Deref;
David Tolnayeafc8052018-08-25 16:33:53 -0400158use std::rc::Rc;
David Tolnay80a914f2018-08-30 23:49:53 -0700159use std::str::FromStr;
David Tolnayeafc8052018-08-25 16:33:53 -0400160
David Tolnay80a914f2018-08-30 23:49:53 -0700161#[cfg(all(
162 not(all(target_arch = "wasm32", target_os = "unknown")),
163 feature = "proc-macro"
164))]
165use proc_macro;
166use proc_macro2::{self, Delimiter, Group, Ident, Literal, Punct, Span, TokenStream, TokenTree};
David Tolnay18c754c2018-08-21 23:26:58 -0400167
David Tolnay80a914f2018-08-30 23:49:53 -0700168use buffer::{Cursor, TokenBuffer};
David Tolnayb6254182018-08-25 08:44:54 -0400169use error;
David Tolnay94f06632018-08-31 10:17:17 -0700170use lookahead;
171use private;
David Tolnay577d0332018-08-25 21:45:24 -0400172use punctuated::Punctuated;
David Tolnay4fb71232018-08-25 23:14:50 -0400173use token::Token;
David Tolnay18c754c2018-08-21 23:26:58 -0400174
David Tolnayb6254182018-08-25 08:44:54 -0400175pub use error::{Error, Result};
176pub use lookahead::{Lookahead1, Peek};
David Tolnay18c754c2018-08-21 23:26:58 -0400177
178/// Parsing interface implemented by all types that can be parsed in a default
179/// way from a token stream.
180pub trait Parse: Sized {
181 fn parse(input: ParseStream) -> Result<Self>;
182}
183
184/// Input to a Syn parser function.
185pub type ParseStream<'a> = &'a ParseBuffer<'a>;
186
187/// Cursor position within a buffered token stream.
David Tolnay18c754c2018-08-21 23:26:58 -0400188pub struct ParseBuffer<'a> {
189 scope: Span,
190 cell: Cell<Cursor<'static>>,
191 marker: PhantomData<Cursor<'a>>,
David Tolnayeafc8052018-08-25 16:33:53 -0400192 unexpected: Rc<Cell<Option<Span>>>,
193}
194
195impl<'a> Drop for ParseBuffer<'a> {
196 fn drop(&mut self) {
197 if !self.is_empty() && self.unexpected.get().is_none() {
198 self.unexpected.set(Some(self.cursor().span()));
199 }
200 }
David Tolnay18c754c2018-08-21 23:26:58 -0400201}
202
David Tolnay18c754c2018-08-21 23:26:58 -0400203#[derive(Copy, Clone)]
204pub struct StepCursor<'c, 'a> {
205 scope: Span,
206 cursor: Cursor<'c>,
207 marker: PhantomData<fn(Cursor<'c>) -> Cursor<'a>>,
208}
209
210impl<'c, 'a> Deref for StepCursor<'c, 'a> {
211 type Target = Cursor<'c>;
212
213 fn deref(&self) -> &Self::Target {
214 &self.cursor
215 }
216}
217
218impl<'c, 'a> StepCursor<'c, 'a> {
David Tolnay18c754c2018-08-21 23:26:58 -0400219 pub fn advance(self, other: Cursor<'c>) -> Cursor<'a> {
220 unsafe { mem::transmute::<Cursor<'c>, Cursor<'a>>(other) }
221 }
222
David Tolnay18c754c2018-08-21 23:26:58 -0400223 pub fn error<T: Display>(self, message: T) -> Error {
224 error::new_at(self.scope, self.cursor, message)
225 }
226}
227
David Tolnay66cb0c42018-08-31 09:01:30 -0700228fn skip(input: ParseStream) -> bool {
David Tolnay4ac232d2018-08-31 10:18:03 -0700229 input
230 .step(|cursor| {
231 if let Some((_lifetime, rest)) = cursor.lifetime() {
232 Ok((true, rest))
233 } else if let Some((_token, rest)) = cursor.token_tree() {
234 Ok((true, rest))
235 } else {
236 Ok((false, *cursor))
237 }
238 }).unwrap()
David Tolnay66cb0c42018-08-31 09:01:30 -0700239}
240
David Tolnay94f06632018-08-31 10:17:17 -0700241impl<'a> private<ParseBuffer<'a>> {
242 pub fn new(scope: Span, cursor: Cursor, unexpected: Rc<Cell<Option<Span>>>) -> ParseBuffer {
243 let extend = unsafe { mem::transmute::<Cursor, Cursor<'static>>(cursor) };
David Tolnay18c754c2018-08-21 23:26:58 -0400244 ParseBuffer {
245 scope: scope,
246 cell: Cell::new(extend),
247 marker: PhantomData,
David Tolnayeafc8052018-08-25 16:33:53 -0400248 unexpected: unexpected,
David Tolnay18c754c2018-08-21 23:26:58 -0400249 }
250 }
251
David Tolnay94f06632018-08-31 10:17:17 -0700252 pub fn get_unexpected(buffer: &ParseBuffer) -> Rc<Cell<Option<Span>>> {
253 buffer.unexpected.clone()
254 }
255}
256
257impl<'a> ParseBuffer<'a> {
David Tolnay18c754c2018-08-21 23:26:58 -0400258 pub fn cursor(&self) -> Cursor<'a> {
259 self.cell.get()
260 }
261
262 pub fn is_empty(&self) -> bool {
263 self.cursor().eof()
264 }
265
266 pub fn lookahead1(&self) -> Lookahead1<'a> {
David Tolnay94f06632018-08-31 10:17:17 -0700267 lookahead::new(self.scope, self.cursor())
David Tolnay18c754c2018-08-21 23:26:58 -0400268 }
269
270 pub fn parse<T: Parse>(&self) -> Result<T> {
David Tolnayeafc8052018-08-25 16:33:53 -0400271 self.check_unexpected()?;
David Tolnay18c754c2018-08-21 23:26:58 -0400272 T::parse(self)
273 }
274
David Tolnay3a515a02018-08-25 21:08:27 -0400275 pub fn call<T>(&self, function: fn(ParseStream) -> Result<T>) -> Result<T> {
276 function(self)
277 }
278
David Tolnayb77c8b62018-08-25 16:39:41 -0400279 pub fn peek<T: Peek>(&self, token: T) -> bool {
280 self.lookahead1().peek(token)
281 }
282
David Tolnay4fb71232018-08-25 23:14:50 -0400283 pub fn peek2<T: Peek>(&self, token: T) -> bool {
David Tolnay4fb71232018-08-25 23:14:50 -0400284 let ahead = self.fork();
David Tolnay66cb0c42018-08-31 09:01:30 -0700285 skip(&ahead) && ahead.peek(token)
David Tolnay4fb71232018-08-25 23:14:50 -0400286 }
287
288 pub fn peek3<T: Peek>(&self, token: T) -> bool {
David Tolnay4fb71232018-08-25 23:14:50 -0400289 let ahead = self.fork();
David Tolnay66cb0c42018-08-31 09:01:30 -0700290 skip(&ahead) && skip(&ahead) && ahead.peek(token)
David Tolnay4fb71232018-08-25 23:14:50 -0400291 }
292
David Tolnay577d0332018-08-25 21:45:24 -0400293 pub fn parse_terminated<T, P: Parse>(
294 &self,
295 parser: fn(ParseStream) -> Result<T>,
296 ) -> Result<Punctuated<T, P>> {
David Tolnayd0f80212018-08-30 18:32:14 -0700297 Punctuated::parse_terminated_with(self, parser)
David Tolnay577d0332018-08-25 21:45:24 -0400298 }
299
David Tolnayb77c8b62018-08-25 16:39:41 -0400300 pub fn fork(&self) -> Self {
David Tolnay6456a9d2018-08-26 08:11:18 -0400301 ParseBuffer {
302 scope: self.scope,
303 cell: self.cell.clone(),
304 marker: PhantomData,
305 // Not the parent's unexpected. Nothing cares whether the clone
306 // parses all the way.
307 unexpected: Rc::new(Cell::new(None)),
308 }
David Tolnayb77c8b62018-08-25 16:39:41 -0400309 }
310
David Tolnay4fb71232018-08-25 23:14:50 -0400311 pub fn error<T: Display>(&self, message: T) -> Error {
312 error::new_at(self.scope, self.cursor(), message)
313 }
314
David Tolnayb50c65a2018-08-30 21:14:57 -0700315 pub fn step<F, R>(&self, function: F) -> Result<R>
David Tolnay18c754c2018-08-21 23:26:58 -0400316 where
317 F: for<'c> FnOnce(StepCursor<'c, 'a>) -> Result<(R, Cursor<'c>)>,
318 {
David Tolnayeafc8052018-08-25 16:33:53 -0400319 self.check_unexpected()?;
David Tolnay18c754c2018-08-21 23:26:58 -0400320 match function(StepCursor {
321 scope: self.scope,
322 cursor: self.cell.get(),
323 marker: PhantomData,
324 }) {
325 Ok((ret, cursor)) => {
326 self.cell.set(cursor);
327 Ok(ret)
328 }
329 Err(err) => Err(err),
330 }
331 }
David Tolnayeafc8052018-08-25 16:33:53 -0400332
David Tolnay94f06632018-08-31 10:17:17 -0700333 fn check_unexpected(&self) -> Result<()> {
David Tolnayeafc8052018-08-25 16:33:53 -0400334 match self.unexpected.get() {
335 Some(span) => Err(Error::new(span, "unexpected token")),
336 None => Ok(()),
337 }
338 }
David Tolnay18c754c2018-08-21 23:26:58 -0400339}
340
341impl Parse for Ident {
342 fn parse(input: ParseStream) -> Result<Self> {
David Tolnayb50c65a2018-08-30 21:14:57 -0700343 input.step(|cursor| {
David Tolnay18c754c2018-08-21 23:26:58 -0400344 if let Some((ident, rest)) = cursor.ident() {
David Tolnayc4fdb1a2018-08-24 21:11:07 -0400345 match ident.to_string().as_str() {
346 "_"
347 // Based on https://doc.rust-lang.org/grammar.html#keywords
348 // and https://github.com/rust-lang/rfcs/blob/master/text/2421-unreservations-2018.md
349 | "abstract" | "as" | "become" | "box" | "break" | "const"
350 | "continue" | "crate" | "do" | "else" | "enum" | "extern" | "false" | "final"
351 | "fn" | "for" | "if" | "impl" | "in" | "let" | "loop" | "macro" | "match"
352 | "mod" | "move" | "mut" | "override" | "priv" | "proc" | "pub"
353 | "ref" | "return" | "Self" | "self" | "static" | "struct"
354 | "super" | "trait" | "true" | "type" | "typeof" | "unsafe" | "unsized" | "use"
355 | "virtual" | "where" | "while" | "yield" => {}
356 _ => return Ok((ident, rest)),
357 }
David Tolnay18c754c2018-08-21 23:26:58 -0400358 }
David Tolnayc4fdb1a2018-08-24 21:11:07 -0400359 Err(cursor.error("expected identifier"))
David Tolnay18c754c2018-08-21 23:26:58 -0400360 })
361 }
362}
363
David Tolnaya7d69fc2018-08-26 13:30:24 -0400364impl<T: Parse> Parse for Box<T> {
365 fn parse(input: ParseStream) -> Result<Self> {
366 input.parse().map(Box::new)
367 }
368}
369
David Tolnay4fb71232018-08-25 23:14:50 -0400370impl<T: Parse + Token> Parse for Option<T> {
David Tolnay18c754c2018-08-21 23:26:58 -0400371 fn parse(input: ParseStream) -> Result<Self> {
David Tolnay4fb71232018-08-25 23:14:50 -0400372 if T::peek(&input.lookahead1()) {
373 Ok(Some(input.parse()?))
374 } else {
375 Ok(None)
David Tolnay18c754c2018-08-21 23:26:58 -0400376 }
David Tolnay18c754c2018-08-21 23:26:58 -0400377 }
378}
David Tolnay4ac232d2018-08-31 10:18:03 -0700379
David Tolnay80a914f2018-08-30 23:49:53 -0700380impl Parse for TokenStream {
381 fn parse(input: ParseStream) -> Result<Self> {
382 input.step(|cursor| Ok((cursor.token_stream(), Cursor::empty())))
383 }
384}
385
386impl Parse for TokenTree {
387 fn parse(input: ParseStream) -> Result<Self> {
388 input.step(|cursor| match cursor.token_tree() {
389 Some((tt, rest)) => Ok((tt, rest)),
390 None => Err(cursor.error("expected token tree")),
391 })
392 }
393}
394
395impl Parse for Group {
396 fn parse(input: ParseStream) -> Result<Self> {
397 input.step(|cursor| {
398 for delim in &[Delimiter::Parenthesis, Delimiter::Brace, Delimiter::Bracket] {
399 if let Some((inside, span, rest)) = cursor.group(*delim) {
400 let mut group = Group::new(*delim, inside.token_stream());
401 group.set_span(span);
402 return Ok((group, rest));
403 }
404 }
405 Err(cursor.error("expected group token"))
406 })
407 }
408}
409
410impl Parse for Punct {
411 fn parse(input: ParseStream) -> Result<Self> {
412 input.step(|cursor| match cursor.punct() {
413 Some((punct, rest)) => Ok((punct, rest)),
414 None => Err(cursor.error("expected punctuation token")),
415 })
416 }
417}
418
419impl Parse for Literal {
420 fn parse(input: ParseStream) -> Result<Self> {
421 input.step(|cursor| match cursor.literal() {
422 Some((literal, rest)) => Ok((literal, rest)),
423 None => Err(cursor.error("expected literal token")),
424 })
425 }
426}
427
428/// Parser that can parse Rust tokens into a particular syntax tree node.
429///
430/// Refer to the [module documentation] for details about parsing in Syn.
431///
432/// [module documentation]: index.html
433///
434/// *This trait is available if Syn is built with the `"parsing"` feature.*
435pub trait Parser: Sized {
436 type Output;
437
438 /// Parse a proc-macro2 token stream into the chosen syntax tree node.
439 fn parse2(self, tokens: TokenStream) -> Result<Self::Output>;
440
441 /// Parse tokens of source code into the chosen syntax tree node.
442 ///
443 /// *This method is available if Syn is built with both the `"parsing"` and
444 /// `"proc-macro"` features.*
445 #[cfg(all(
446 not(all(target_arch = "wasm32", target_os = "unknown")),
447 feature = "proc-macro"
448 ))]
449 fn parse(self, tokens: proc_macro::TokenStream) -> Result<Self::Output> {
450 self.parse2(proc_macro2::TokenStream::from(tokens))
451 }
452
453 /// Parse a string of Rust code into the chosen syntax tree node.
454 ///
455 /// # Hygiene
456 ///
457 /// Every span in the resulting syntax tree will be set to resolve at the
458 /// macro call site.
459 fn parse_str(self, s: &str) -> Result<Self::Output> {
460 self.parse2(proc_macro2::TokenStream::from_str(s)?)
461 }
462}
463
464impl<F, T> Parser for F
465where
466 F: FnOnce(ParseStream) -> Result<T>,
467{
468 type Output = T;
469
470 fn parse2(self, tokens: TokenStream) -> Result<T> {
471 let buf = TokenBuffer::new2(tokens);
472 let unexpected = Rc::new(Cell::new(None));
David Tolnay94f06632018-08-31 10:17:17 -0700473 let state = private::<ParseBuffer>::new(Span::call_site(), buf.begin(), unexpected);
David Tolnay80a914f2018-08-30 23:49:53 -0700474 let node = self(&state)?;
475 state.check_unexpected()?;
476 if state.is_empty() {
477 Ok(node)
478 } else {
479 Err(state.error("unexpected token"))
480 }
481 }
482}