Merge pull request #649 from CAD97/unfork Add `<ParseBuffer as Speculative>::advance_to`, the opposite of `fork`

commit: b1582aafa69b250c8c5f60d5ff622823b0c606b7 [log] [tgz]
author: David Tolnay <dtolnay@gmail.com> Sun Jun 23 13:49:07 2019 -0700
committer: GitHub <noreply@github.com> Sun Jun 23 13:49:07 2019 -0700
tree: 92c052d692ba7f4a921fbdd7a3b6099ed5ccecb7
parent: 8f315833391426ff4413619e6e55ca9bfe960a03 [diff]
parent: 6db0f2af03b7477be2f9488ce880c8b477f6f185 [diff]
diff --git a/src/buffer.rs b/src/buffer.rs
index 8c32645..a13d683 100644
--- a/src/buffer.rs
+++ b/src/buffer.rs

@@ -348,6 +348,10 @@
 }
 
 impl private {
+    pub fn same_scope(a: Cursor, b: Cursor) -> bool {
+        a.scope == b.scope
+    }
+
     #[cfg(procmacro2_semver_exempt)]
     pub fn open_span_of_group(cursor: Cursor) -> Span {
         match *cursor.entry() {

diff --git a/src/discouraged.rs b/src/discouraged.rs
new file mode 100644
index 0000000..48b0fa9
--- /dev/null
+++ b/src/discouraged.rs

@@ -0,0 +1,163 @@
+//! Extensions to the parsing API with niche applicability.
+
+use super::*;
+
+/// Extensions to the `ParseStream` API to support speculative parsing.
+pub trait Speculative {
+    /// Advance this parse stream to the position of a forked parse stream.
+    ///
+    /// This is the opposite operation to [`ParseStream::fork`].
+    /// You can fork a parse stream, perform some speculative parsing, then join
+    /// the original stream to the fork to "commit" the parsing from the fork to
+    /// the main stream.
+    ///
+    /// If you can avoid doing this, you should, as it limits the ability to
+    /// generate useful errors. That said, it is often the only way to parse
+    /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
+    /// is that when the fork fails to parse an `A`, it's impossible to tell
+    /// whether that was because of a syntax error and the user meant to provide
+    /// an `A`, or that the `A`s are finished and its time to start parsing `B`s.
+    /// Use with care.
+    ///
+    /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by parsing
+    /// `B*` and removing the leading members of `A` from the repetition, bypassing
+    /// the need to involve the downsides associated with speculative parsing.
+    ///
+    /// [`ParseStream::fork`]: ../struct.ParseBuffer.html#method.fork
+    ///
+    /// # Example
+    ///
+    /// There has been chatter about the possibility of making the colons in the
+    /// turbofish syntax like `path::to::<T>` no longer required by accepting
+    /// `path::to<T>` in expression position. Specifically, according to [RFC#2544],
+    /// [`PathSegment`] parsing should always try to consume a following `<` token
+    /// as the start of generic arguments, and reset to the `<` if that fails
+    /// (e.g. the token is acting as a less-than operator).
+    ///
+    /// This is the exact kind of parsing behavior which requires the "fork, try,
+    /// commit" behavior that [`ParseStream::fork`] discourages. With `advance_to`,
+    /// we can avoid having to parse the speculatively parsed content a second time.
+    ///
+    /// This change in behavior can be implemented in syn by replacing just the
+    /// `Parse` implementation for `PathSegment`:
+    ///
+    /// ```edition2018
+    /// # use syn::ext::IdentExt;
+    /// use syn::parse::discouraged::Speculative;
+    /// # use syn::parse::{Parse, ParseStream};
+    /// # use syn::{Ident, PathArguments, Result, Token};
+    ///
+    /// pub struct PathSegment {
+    ///     pub ident: Ident,
+    ///     pub arguments: PathArguments,
+    /// }
+    ///
+    /// # impl<T> From<T> for PathSegment
+    /// # where
+    /// #     T: Into<Ident>,
+    /// # {
+    /// #     fn from(ident: T) -> Self {
+    /// #         PathSegment {
+    /// #             ident: ident.into(),
+    /// #             arguments: PathArguments::None,
+    /// #         }
+    /// #     }
+    /// # }
+    ///
+    ///
+    /// impl Parse for PathSegment {
+    ///     fn parse(input: ParseStream) -> Result<Self> {
+    ///         if input.peek(Token![super])
+    ///             || input.peek(Token![self])
+    ///             || input.peek(Token![Self])
+    ///             || input.peek(Token![crate])
+    ///             || input.peek(Token![extern])
+    ///         {
+    ///             let ident = input.call(Ident::parse_any)?;
+    ///             return Ok(PathSegment::from(ident));
+    ///         }
+    ///
+    ///         let ident = input.parse()?;
+    ///         if input.peek(Token![::]) && input.peek3(Token![<]) {
+    ///             return Ok(PathSegment {
+    ///                 ident: ident,
+    ///                 arguments: PathArguments::AngleBracketed(input.parse()?),
+    ///             });
+    ///         }
+    ///         if input.peek(Token![<]) && !input.peek(Token![<=]) {
+    ///             let fork = input.fork();
+    ///             if let Ok(arguments) = fork.parse() {
+    ///                 input.advance_to(&fork);
+    ///                 return Ok(PathSegment {
+    ///                     ident: ident,
+    ///                     arguments: PathArguments::AngleBracketed(arguments),
+    ///                 });
+    ///             }
+    ///         }
+    ///         Ok(PathSegment::from(ident))
+    ///     }
+    /// }
+    ///
+    /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
+    /// ```
+    ///
+    /// # Drawbacks
+    ///
+    /// The main drawback of this style of speculative parsing is in error
+    /// presentation. Even if the lookahead is the "correct" parse, the error
+    /// that is shown is that of the "fallback" parse. To use the same example
+    /// as the turbofish above, take the following unfinished "turbofish":
+    ///
+    /// ```text
+    /// let _ = f<&'a fn(), for<'a> serde::>();
+    /// ```
+    ///
+    /// If this is parsed as generic arguments, we can provide the error message
+    ///
+    /// ```text
+    /// error: expected identifier
+    ///  --> src.rs:L:C
+    ///   |
+    /// L | let _ = f<&'a fn(), for<'a> serde::>();
+    ///   |                                    ^
+    /// ```
+    ///
+    /// but if parsed using the above speculative parsing, it falls back to
+    /// assuming that the `<` is a less-than when it fails to parse the
+    /// generic arguments, and tries to interpret the `&'a` as the start of a
+    /// labelled loop, resulting in the much less helpful error
+    ///
+    /// ```text
+    /// error: expected `:`
+    ///  --> src.rs:L:C
+    ///   |
+    /// L | let _ = f<&'a fn(), for<'a> serde::>();
+    ///   |               ^^
+    /// ```
+    ///
+    /// This can be mitigated with various heuristics (two examples: show both
+    /// forks' parse errors, or show the one that consumed more tokens), but
+    /// when you can control the grammar, sticking to something that can be
+    /// parsed LL(3) and without the LL(*) speculative parsing this makes
+    /// possible, displaying reasonable errors becomes much more simple.
+    ///
+    /// [RFC#2544]: https://github.com/rust-lang/rfcs/pull/2544
+    /// [`PathSegment`]: ../../struct.PathSegment.html
+    ///
+    /// # Panics
+    ///
+    /// The forked stream that this joins with must be derived by forking this parse stream.
+    fn advance_to(&self, fork: &Self);
+}
+
+impl<'a> Speculative for ParseBuffer<'a> {
+    fn advance_to(&self, fork: &Self) {
+        if !private::same_scope(self.cursor(), fork.cursor()) {
+            panic!("Fork was not derived from the advancing parse stream");
+        }
+
+        // See comment on `cell` in the struct definition.
+        self.cell
+            .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) })
+    }
+}

diff --git a/src/parse.rs b/src/parse.rs
index c5786e7..487799d 100644
--- a/src/parse.rs
+++ b/src/parse.rs

@@ -188,6 +188,9 @@
 //!
 //! *This module is available if Syn is built with the `"parsing"` feature.*
 
+#[path = "discouraged.rs"]
+pub mod discouraged;
+
 use std::cell::Cell;
 use std::fmt::{self, Debug, Display};
 use std::marker::PhantomData;
@@ -745,10 +748,13 @@
     /// parse stream. Only use a fork when the amount of work performed against
     /// the fork is small and bounded.
     ///
+    /// For higher level speculative parsing, [`parse::discouraged::Speculative`]
+    /// is provided alongside matching tradeoffs to enable the pattern.
     /// For a lower level but occasionally more performant way to perform
     /// speculative parsing, consider using [`ParseStream::step`] instead.
     ///
     /// [`ParseStream::step`]: #method.step
+    /// [`parse::discouraged::Speculative`]: ./discouraged/trait.Speculative.html
     ///
     /// # Example
     ///

diff --git a/tests/test_parse_buffer.rs b/tests/test_parse_buffer.rs
new file mode 100644
index 0000000..f094951
--- /dev/null
+++ b/tests/test_parse_buffer.rs

@@ -0,0 +1,55 @@
+#[macro_use]
+extern crate syn;
+
+use syn::parse::{discouraged::Speculative, Parse, ParseStream, Parser, Result};
+
+#[test]
+#[should_panic(expected = "Fork was not derived from the advancing parse stream")]
+fn smuggled_speculative_cursor_between_sources() {
+    struct BreakRules;
+    impl Parse for BreakRules {
+        fn parse(input1: ParseStream) -> Result<Self> {
+            let nested = |input2: ParseStream| {
+                input1.advance_to(&input2);
+                Ok(Self)
+            };
+            nested.parse_str("")
+        }
+    }
+
+    syn::parse_str::<BreakRules>("").unwrap();
+}
+
+#[test]
+#[should_panic(expected = "Fork was not derived from the advancing parse stream")]
+fn smuggled_speculative_cursor_between_brackets() {
+    struct BreakRules;
+    impl Parse for BreakRules {
+        fn parse(input: ParseStream) -> Result<Self> {
+            let a;
+            let b;
+            parenthesized!(a in input);
+            parenthesized!(b in input);
+            a.advance_to(&b);
+            Ok(Self)
+        }
+    }
+
+    syn::parse_str::<BreakRules>("()()").unwrap();
+}
+
+#[test]
+#[should_panic(expected = "Fork was not derived from the advancing parse stream")]
+fn smuggled_speculative_cursor_into_brackets() {
+    struct BreakRules;
+    impl Parse for BreakRules {
+        fn parse(input: ParseStream) -> Result<Self> {
+            let a;
+            parenthesized!(a in input);
+            input.advance_to(&a);
+            Ok(Self)
+        }
+    }
+
+    syn::parse_str::<BreakRules>("()").unwrap();
+}
commit	b1582aafa69b250c8c5f60d5ff622823b0c606b7	[log] [tgz]
author	David Tolnay <dtolnay@gmail.com>	Sun Jun 23 13:49:07 2019 -0700
committer	GitHub <noreply@github.com>	Sun Jun 23 13:49:07 2019 -0700
tree	92c052d692ba7f4a921fbdd7a3b6099ed5ccecb7
parent	8f315833391426ff4413619e6e55ca9bfe960a03 [diff]
parent	6db0f2af03b7477be2f9488ce880c8b477f6f185 [diff]