Merge pull request #649 from CAD97/unfork
Add `<ParseBuffer as Speculative>::advance_to`, the opposite of `fork`
diff --git a/src/buffer.rs b/src/buffer.rs
index 8c32645..a13d683 100644
--- a/src/buffer.rs
+++ b/src/buffer.rs
@@ -348,6 +348,10 @@
}
impl private {
+ pub fn same_scope(a: Cursor, b: Cursor) -> bool {
+ a.scope == b.scope
+ }
+
#[cfg(procmacro2_semver_exempt)]
pub fn open_span_of_group(cursor: Cursor) -> Span {
match *cursor.entry() {
diff --git a/src/discouraged.rs b/src/discouraged.rs
new file mode 100644
index 0000000..48b0fa9
--- /dev/null
+++ b/src/discouraged.rs
@@ -0,0 +1,163 @@
+//! Extensions to the parsing API with niche applicability.
+
+use super::*;
+
+/// Extensions to the `ParseStream` API to support speculative parsing.
+pub trait Speculative {
+ /// Advance this parse stream to the position of a forked parse stream.
+ ///
+ /// This is the opposite operation to [`ParseStream::fork`].
+ /// You can fork a parse stream, perform some speculative parsing, then join
+ /// the original stream to the fork to "commit" the parsing from the fork to
+ /// the main stream.
+ ///
+ /// If you can avoid doing this, you should, as it limits the ability to
+ /// generate useful errors. That said, it is often the only way to parse
+ /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
+ /// is that when the fork fails to parse an `A`, it's impossible to tell
+ /// whether that was because of a syntax error and the user meant to provide
+ /// an `A`, or that the `A`s are finished and its time to start parsing `B`s.
+ /// Use with care.
+ ///
+ /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by parsing
+ /// `B*` and removing the leading members of `A` from the repetition, bypassing
+ /// the need to involve the downsides associated with speculative parsing.
+ ///
+ /// [`ParseStream::fork`]: ../struct.ParseBuffer.html#method.fork
+ ///
+ /// # Example
+ ///
+ /// There has been chatter about the possibility of making the colons in the
+ /// turbofish syntax like `path::to::<T>` no longer required by accepting
+ /// `path::to<T>` in expression position. Specifically, according to [RFC#2544],
+ /// [`PathSegment`] parsing should always try to consume a following `<` token
+ /// as the start of generic arguments, and reset to the `<` if that fails
+ /// (e.g. the token is acting as a less-than operator).
+ ///
+ /// This is the exact kind of parsing behavior which requires the "fork, try,
+ /// commit" behavior that [`ParseStream::fork`] discourages. With `advance_to`,
+ /// we can avoid having to parse the speculatively parsed content a second time.
+ ///
+ /// This change in behavior can be implemented in syn by replacing just the
+ /// `Parse` implementation for `PathSegment`:
+ ///
+ /// ```edition2018
+ /// # use syn::ext::IdentExt;
+ /// use syn::parse::discouraged::Speculative;
+ /// # use syn::parse::{Parse, ParseStream};
+ /// # use syn::{Ident, PathArguments, Result, Token};
+ ///
+ /// pub struct PathSegment {
+ /// pub ident: Ident,
+ /// pub arguments: PathArguments,
+ /// }
+ ///
+ /// # impl<T> From<T> for PathSegment
+ /// # where
+ /// # T: Into<Ident>,
+ /// # {
+ /// # fn from(ident: T) -> Self {
+ /// # PathSegment {
+ /// # ident: ident.into(),
+ /// # arguments: PathArguments::None,
+ /// # }
+ /// # }
+ /// # }
+ ///
+ ///
+ /// impl Parse for PathSegment {
+ /// fn parse(input: ParseStream) -> Result<Self> {
+ /// if input.peek(Token![super])
+ /// || input.peek(Token![self])
+ /// || input.peek(Token![Self])
+ /// || input.peek(Token![crate])
+ /// || input.peek(Token![extern])
+ /// {
+ /// let ident = input.call(Ident::parse_any)?;
+ /// return Ok(PathSegment::from(ident));
+ /// }
+ ///
+ /// let ident = input.parse()?;
+ /// if input.peek(Token![::]) && input.peek3(Token![<]) {
+ /// return Ok(PathSegment {
+ /// ident: ident,
+ /// arguments: PathArguments::AngleBracketed(input.parse()?),
+ /// });
+ /// }
+ /// if input.peek(Token![<]) && !input.peek(Token![<=]) {
+ /// let fork = input.fork();
+ /// if let Ok(arguments) = fork.parse() {
+ /// input.advance_to(&fork);
+ /// return Ok(PathSegment {
+ /// ident: ident,
+ /// arguments: PathArguments::AngleBracketed(arguments),
+ /// });
+ /// }
+ /// }
+ /// Ok(PathSegment::from(ident))
+ /// }
+ /// }
+ ///
+ /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
+ /// ```
+ ///
+ /// # Drawbacks
+ ///
+ /// The main drawback of this style of speculative parsing is in error
+ /// presentation. Even if the lookahead is the "correct" parse, the error
+ /// that is shown is that of the "fallback" parse. To use the same example
+ /// as the turbofish above, take the following unfinished "turbofish":
+ ///
+ /// ```text
+ /// let _ = f<&'a fn(), for<'a> serde::>();
+ /// ```
+ ///
+ /// If this is parsed as generic arguments, we can provide the error message
+ ///
+ /// ```text
+ /// error: expected identifier
+ /// --> src.rs:L:C
+ /// |
+ /// L | let _ = f<&'a fn(), for<'a> serde::>();
+ /// | ^
+ /// ```
+ ///
+ /// but if parsed using the above speculative parsing, it falls back to
+ /// assuming that the `<` is a less-than when it fails to parse the
+ /// generic arguments, and tries to interpret the `&'a` as the start of a
+ /// labelled loop, resulting in the much less helpful error
+ ///
+ /// ```text
+ /// error: expected `:`
+ /// --> src.rs:L:C
+ /// |
+ /// L | let _ = f<&'a fn(), for<'a> serde::>();
+ /// | ^^
+ /// ```
+ ///
+ /// This can be mitigated with various heuristics (two examples: show both
+ /// forks' parse errors, or show the one that consumed more tokens), but
+ /// when you can control the grammar, sticking to something that can be
+ /// parsed LL(3) and without the LL(*) speculative parsing this makes
+ /// possible, displaying reasonable errors becomes much more simple.
+ ///
+ /// [RFC#2544]: https://github.com/rust-lang/rfcs/pull/2544
+ /// [`PathSegment`]: ../../struct.PathSegment.html
+ ///
+ /// # Panics
+ ///
+ /// The forked stream that this joins with must be derived by forking this parse stream.
+ fn advance_to(&self, fork: &Self);
+}
+
+impl<'a> Speculative for ParseBuffer<'a> {
+ fn advance_to(&self, fork: &Self) {
+ if !private::same_scope(self.cursor(), fork.cursor()) {
+ panic!("Fork was not derived from the advancing parse stream");
+ }
+
+ // See comment on `cell` in the struct definition.
+ self.cell
+ .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) })
+ }
+}
diff --git a/src/parse.rs b/src/parse.rs
index c5786e7..487799d 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -188,6 +188,9 @@
//!
//! *This module is available if Syn is built with the `"parsing"` feature.*
+#[path = "discouraged.rs"]
+pub mod discouraged;
+
use std::cell::Cell;
use std::fmt::{self, Debug, Display};
use std::marker::PhantomData;
@@ -745,10 +748,13 @@
/// parse stream. Only use a fork when the amount of work performed against
/// the fork is small and bounded.
///
+ /// For higher level speculative parsing, [`parse::discouraged::Speculative`]
+ /// is provided alongside matching tradeoffs to enable the pattern.
/// For a lower level but occasionally more performant way to perform
/// speculative parsing, consider using [`ParseStream::step`] instead.
///
/// [`ParseStream::step`]: #method.step
+ /// [`parse::discouraged::Speculative`]: ./discouraged/trait.Speculative.html
///
/// # Example
///
diff --git a/tests/test_parse_buffer.rs b/tests/test_parse_buffer.rs
new file mode 100644
index 0000000..f094951
--- /dev/null
+++ b/tests/test_parse_buffer.rs
@@ -0,0 +1,55 @@
+#[macro_use]
+extern crate syn;
+
+use syn::parse::{discouraged::Speculative, Parse, ParseStream, Parser, Result};
+
+#[test]
+#[should_panic(expected = "Fork was not derived from the advancing parse stream")]
+fn smuggled_speculative_cursor_between_sources() {
+ struct BreakRules;
+ impl Parse for BreakRules {
+ fn parse(input1: ParseStream) -> Result<Self> {
+ let nested = |input2: ParseStream| {
+ input1.advance_to(&input2);
+ Ok(Self)
+ };
+ nested.parse_str("")
+ }
+ }
+
+ syn::parse_str::<BreakRules>("").unwrap();
+}
+
+#[test]
+#[should_panic(expected = "Fork was not derived from the advancing parse stream")]
+fn smuggled_speculative_cursor_between_brackets() {
+ struct BreakRules;
+ impl Parse for BreakRules {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let a;
+ let b;
+ parenthesized!(a in input);
+ parenthesized!(b in input);
+ a.advance_to(&b);
+ Ok(Self)
+ }
+ }
+
+ syn::parse_str::<BreakRules>("()()").unwrap();
+}
+
+#[test]
+#[should_panic(expected = "Fork was not derived from the advancing parse stream")]
+fn smuggled_speculative_cursor_into_brackets() {
+ struct BreakRules;
+ impl Parse for BreakRules {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let a;
+ parenthesized!(a in input);
+ input.advance_to(&a);
+ Ok(Self)
+ }
+ }
+
+ syn::parse_str::<BreakRules>("()").unwrap();
+}