cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 1 | //! Extensions to the parsing API with niche applicability. |
| 2 | |
| 3 | use super::*; |
| 4 | |
| 5 | /// Extensions to the `ParseStream` API to support speculative parsing. |
| 6 | pub trait Speculative { |
| 7 | /// Advance this parse stream to the position of a forked parse stream. |
| 8 | /// |
David Tolnay | fa033fa | 2019-06-23 14:07:36 -0700 | [diff] [blame] | 9 | /// This is the opposite operation to [`ParseStream::fork`]. You can fork a |
| 10 | /// parse stream, perform some speculative parsing, then join the original |
| 11 | /// stream to the fork to "commit" the parsing from the fork to the main |
| 12 | /// stream. |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 13 | /// |
| 14 | /// If you can avoid doing this, you should, as it limits the ability to |
| 15 | /// generate useful errors. That said, it is often the only way to parse |
| 16 | /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem |
| 17 | /// is that when the fork fails to parse an `A`, it's impossible to tell |
| 18 | /// whether that was because of a syntax error and the user meant to provide |
David Tolnay | fa033fa | 2019-06-23 14:07:36 -0700 | [diff] [blame] | 19 | /// an `A`, or that the `A`s are finished and its time to start parsing |
| 20 | /// `B`s. Use with care. |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 21 | /// |
David Tolnay | fa033fa | 2019-06-23 14:07:36 -0700 | [diff] [blame] | 22 | /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by |
| 23 | /// parsing `B*` and removing the leading members of `A` from the |
| 24 | /// repetition, bypassing the need to involve the downsides associated with |
| 25 | /// speculative parsing. |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 26 | /// |
| 27 | /// [`ParseStream::fork`]: ../struct.ParseBuffer.html#method.fork |
| 28 | /// |
| 29 | /// # Example |
| 30 | /// |
| 31 | /// There has been chatter about the possibility of making the colons in the |
| 32 | /// turbofish syntax like `path::to::<T>` no longer required by accepting |
David Tolnay | fa033fa | 2019-06-23 14:07:36 -0700 | [diff] [blame] | 33 | /// `path::to<T>` in expression position. Specifically, according to [RFC |
| 34 | /// 2544], [`PathSegment`] parsing should always try to consume a following |
| 35 | /// `<` token as the start of generic arguments, and reset to the `<` if |
| 36 | /// that fails (e.g. the token is acting as a less-than operator). |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 37 | /// |
David Tolnay | fa033fa | 2019-06-23 14:07:36 -0700 | [diff] [blame] | 38 | /// This is the exact kind of parsing behavior which requires the "fork, |
| 39 | /// try, commit" behavior that [`ParseStream::fork`] discourages. With |
| 40 | /// `advance_to`, we can avoid having to parse the speculatively parsed |
| 41 | /// content a second time. |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 42 | /// |
| 43 | /// This change in behavior can be implemented in syn by replacing just the |
| 44 | /// `Parse` implementation for `PathSegment`: |
| 45 | /// |
| 46 | /// ```edition2018 |
| 47 | /// # use syn::ext::IdentExt; |
| 48 | /// use syn::parse::discouraged::Speculative; |
| 49 | /// # use syn::parse::{Parse, ParseStream}; |
| 50 | /// # use syn::{Ident, PathArguments, Result, Token}; |
| 51 | /// |
| 52 | /// pub struct PathSegment { |
| 53 | /// pub ident: Ident, |
| 54 | /// pub arguments: PathArguments, |
| 55 | /// } |
David Tolnay | eb2c214 | 2019-06-23 14:08:30 -0700 | [diff] [blame] | 56 | /// # |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 57 | /// # impl<T> From<T> for PathSegment |
| 58 | /// # where |
| 59 | /// # T: Into<Ident>, |
| 60 | /// # { |
| 61 | /// # fn from(ident: T) -> Self { |
| 62 | /// # PathSegment { |
| 63 | /// # ident: ident.into(), |
| 64 | /// # arguments: PathArguments::None, |
| 65 | /// # } |
| 66 | /// # } |
| 67 | /// # } |
| 68 | /// |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 69 | /// impl Parse for PathSegment { |
| 70 | /// fn parse(input: ParseStream) -> Result<Self> { |
| 71 | /// if input.peek(Token![super]) |
| 72 | /// || input.peek(Token![self]) |
| 73 | /// || input.peek(Token![Self]) |
| 74 | /// || input.peek(Token![crate]) |
| 75 | /// || input.peek(Token![extern]) |
| 76 | /// { |
| 77 | /// let ident = input.call(Ident::parse_any)?; |
| 78 | /// return Ok(PathSegment::from(ident)); |
| 79 | /// } |
| 80 | /// |
| 81 | /// let ident = input.parse()?; |
| 82 | /// if input.peek(Token![::]) && input.peek3(Token![<]) { |
| 83 | /// return Ok(PathSegment { |
| 84 | /// ident: ident, |
| 85 | /// arguments: PathArguments::AngleBracketed(input.parse()?), |
| 86 | /// }); |
| 87 | /// } |
| 88 | /// if input.peek(Token![<]) && !input.peek(Token![<=]) { |
| 89 | /// let fork = input.fork(); |
| 90 | /// if let Ok(arguments) = fork.parse() { |
| 91 | /// input.advance_to(&fork); |
| 92 | /// return Ok(PathSegment { |
| 93 | /// ident: ident, |
| 94 | /// arguments: PathArguments::AngleBracketed(arguments), |
| 95 | /// }); |
| 96 | /// } |
| 97 | /// } |
| 98 | /// Ok(PathSegment::from(ident)) |
| 99 | /// } |
| 100 | /// } |
| 101 | /// |
| 102 | /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); |
| 103 | /// ``` |
| 104 | /// |
cad97 | 810c890 | 2019-06-18 17:39:32 -0400 | [diff] [blame] | 105 | /// # Drawbacks |
| 106 | /// |
| 107 | /// The main drawback of this style of speculative parsing is in error |
| 108 | /// presentation. Even if the lookahead is the "correct" parse, the error |
| 109 | /// that is shown is that of the "fallback" parse. To use the same example |
| 110 | /// as the turbofish above, take the following unfinished "turbofish": |
| 111 | /// |
| 112 | /// ```text |
| 113 | /// let _ = f<&'a fn(), for<'a> serde::>(); |
| 114 | /// ``` |
| 115 | /// |
| 116 | /// If this is parsed as generic arguments, we can provide the error message |
| 117 | /// |
| 118 | /// ```text |
| 119 | /// error: expected identifier |
| 120 | /// --> src.rs:L:C |
| 121 | /// | |
| 122 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); |
| 123 | /// | ^ |
| 124 | /// ``` |
| 125 | /// |
| 126 | /// but if parsed using the above speculative parsing, it falls back to |
David Tolnay | fa033fa | 2019-06-23 14:07:36 -0700 | [diff] [blame] | 127 | /// assuming that the `<` is a less-than when it fails to parse the generic |
| 128 | /// arguments, and tries to interpret the `&'a` as the start of a labelled |
| 129 | /// loop, resulting in the much less helpful error |
cad97 | 810c890 | 2019-06-18 17:39:32 -0400 | [diff] [blame] | 130 | /// |
| 131 | /// ```text |
| 132 | /// error: expected `:` |
| 133 | /// --> src.rs:L:C |
| 134 | /// | |
| 135 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); |
| 136 | /// | ^^ |
| 137 | /// ``` |
| 138 | /// |
| 139 | /// This can be mitigated with various heuristics (two examples: show both |
| 140 | /// forks' parse errors, or show the one that consumed more tokens), but |
| 141 | /// when you can control the grammar, sticking to something that can be |
| 142 | /// parsed LL(3) and without the LL(*) speculative parsing this makes |
| 143 | /// possible, displaying reasonable errors becomes much more simple. |
| 144 | /// |
David Tolnay | c9493ba | 2019-06-23 14:06:30 -0700 | [diff] [blame] | 145 | /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 146 | /// [`PathSegment`]: ../../struct.PathSegment.html |
| 147 | /// |
David Tolnay | 9f7b18a | 2019-06-23 14:00:59 -0700 | [diff] [blame] | 148 | /// # Performance |
| 149 | /// |
| 150 | /// This method performs a cheap fixed amount of work that does not depend |
| 151 | /// on how far apart the two streams are positioned. |
| 152 | /// |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 153 | /// # Panics |
| 154 | /// |
David Tolnay | 8762852 | 2019-06-23 14:06:08 -0700 | [diff] [blame] | 155 | /// The forked stream in the argument of `advance_to` must have been |
| 156 | /// obtained by forking `self`. Attempting to advance to any other stream |
| 157 | /// will cause a panic. |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 158 | fn advance_to(&self, fork: &Self); |
| 159 | } |
| 160 | |
| 161 | impl<'a> Speculative for ParseBuffer<'a> { |
| 162 | fn advance_to(&self, fork: &Self) { |
David Tolnay | 6db0f2a | 2019-06-23 13:37:39 -0700 | [diff] [blame] | 163 | if !private::same_scope(self.cursor(), fork.cursor()) { |
| 164 | panic!("Fork was not derived from the advancing parse stream"); |
| 165 | } |
| 166 | |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 167 | // See comment on `cell` in the struct definition. |
David Tolnay | 5368954 | 2019-06-23 13:02:47 -0700 | [diff] [blame] | 168 | self.cell |
| 169 | .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }) |
cad97 | 89bb945 | 2019-01-20 18:33:48 -0500 | [diff] [blame] | 170 | } |
| 171 | } |