| // Copyright 2018 Syn Developers |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| //! A stably addressed token buffer supporting efficient traversal based on a |
| //! cheaply copyable cursor. |
| //! |
| //! The [`Synom`] trait is implemented for syntax tree types that can be parsed |
| //! from one of these token cursors. |
| //! |
| //! [`Synom`]: ../synom/trait.Synom.html |
| //! |
| //! *This module is available if Syn is built with the `"parsing"` feature.* |
| //! |
| //! # Example |
| //! |
| //! This example shows a basic token parser for parsing a token stream without |
| //! using Syn's parser combinator macros. |
| //! |
| //! ``` |
| //! #![feature(proc_macro)] |
| //! |
| //! extern crate syn; |
| //! extern crate proc_macro; |
| //! |
| //! #[macro_use] |
| //! extern crate quote; |
| //! |
| //! use syn::{token, ExprTuple}; |
| //! use syn::buffer::{Cursor, TokenBuffer}; |
| //! use syn::spanned::Spanned; |
| //! use syn::synom::Synom; |
| //! use proc_macro::{Diagnostic, Span, TokenStream}; |
| //! |
| //! /// A basic token parser for parsing a token stream without using Syn's |
| //! /// parser combinator macros. |
| //! pub struct Parser<'a> { |
| //! cursor: Cursor<'a>, |
| //! } |
| //! |
| //! impl<'a> Parser<'a> { |
| //! pub fn new(cursor: Cursor<'a>) -> Self { |
| //! Parser { cursor } |
| //! } |
| //! |
| //! pub fn current_span(&self) -> Span { |
| //! self.cursor.span().unstable() |
| //! } |
| //! |
| //! pub fn parse<T: Synom>(&mut self) -> Result<T, Diagnostic> { |
| //! let (val, rest) = T::parse(self.cursor) |
| //! .map_err(|e| match T::description() { |
| //! Some(desc) => { |
| //! self.current_span().error(format!("{}: expected {}", e, desc)) |
| //! } |
| //! None => { |
| //! self.current_span().error(e.to_string()) |
| //! } |
| //! })?; |
| //! |
| //! self.cursor = rest; |
| //! Ok(val) |
| //! } |
| //! |
| //! pub fn expect_eof(&mut self) -> Result<(), Diagnostic> { |
| //! if !self.cursor.eof() { |
| //! return Err(self.current_span().error("trailing characters; expected eof")); |
| //! } |
| //! |
| //! Ok(()) |
| //! } |
| //! } |
| //! |
| //! fn eval(input: TokenStream) -> Result<TokenStream, Diagnostic> { |
| //! let buffer = TokenBuffer::new(input); |
| //! let mut parser = Parser::new(buffer.begin()); |
| //! |
| //! // Parse some syntax tree types out of the input tokens. In this case we |
| //! // expect something like: |
| //! // |
| //! // (a, b, c) = (1, 2, 3) |
| //! let a = parser.parse::<ExprTuple>()?; |
| //! parser.parse::<token::Eq>()?; |
| //! let b = parser.parse::<ExprTuple>()?; |
| //! parser.expect_eof()?; |
| //! |
| //! // Perform some validation and report errors. |
| //! let (a_len, b_len) = (a.elems.len(), b.elems.len()); |
| //! if a_len != b_len { |
| //! let diag = b.span().unstable() |
| //! .error(format!("expected {} element(s), got {}", a_len, b_len)) |
| //! .span_note(a.span().unstable(), "because of this"); |
| //! |
| //! return Err(diag); |
| //! } |
| //! |
| //! // Build the output tokens. |
| //! let out = quote! { |
| //! println!("All good! Received two tuples of size {}", #a_len); |
| //! }; |
| //! |
| //! Ok(out.into()) |
| //! } |
| //! # |
| //! # extern crate proc_macro2; |
| //! # |
| //! # // This method exists on proc_macro2::Span but is behind the "nightly" |
| //! # // feature. |
| //! # trait ToUnstableSpan { |
| //! # fn unstable(&self) -> Span; |
| //! # } |
| //! # |
| //! # impl ToUnstableSpan for proc_macro2::Span { |
| //! # fn unstable(&self) -> Span { |
| //! # unimplemented!() |
| //! # } |
| //! # } |
| //! # |
| //! # fn main() {} |
| //! ``` |
| |
| // This module is heavily commented as it contains the only unsafe code in Syn, |
| // and caution should be used when editing it. The public-facing interface is |
| // 100% safe but the implementation is fragile internally. |
| |
| use proc_macro as pm; |
| use proc_macro2::{Delimiter, Literal, Spacing, Span, Term, TokenNode, TokenStream, TokenTree}; |
| |
| use std::ptr; |
| use std::marker::PhantomData; |
| |
| #[cfg(synom_verbose_trace)] |
| use std::fmt::{self, Debug}; |
| |
| /// Internal type which is used instead of `TokenTree` to represent a token tree |
| /// within a `TokenBuffer`. |
| enum Entry { |
| // Mimicking types from proc-macro. |
| Group(Span, Delimiter, TokenBuffer), |
| Term(Span, Term), |
| Op(Span, char, Spacing), |
| Literal(Span, Literal), |
| // End entries contain a raw pointer to the entry from the containing |
| // token tree, or null if this is the outermost level. |
| End(*const Entry), |
| } |
| |
| /// A buffer that can be efficiently traversed multiple times, unlike |
| /// `TokenStream` which requires a deep copy in order to traverse more than |
| /// once. |
| /// |
| /// See the [module documentation] for an example of `TokenBuffer` in action. |
| /// |
| /// [module documentation]: index.html |
| /// |
| /// *This type is available if Syn is built with the `"parsing"` feature.* |
| pub struct TokenBuffer { |
| // NOTE: Do not derive clone on this - there are raw pointers inside which |
| // will be messed up. Moving the `TokenBuffer` itself is safe as the actual |
| // backing slices won't be moved. |
| data: Box<[Entry]>, |
| } |
| |
| impl TokenBuffer { |
| // NOTE: DO NOT MUTATE THE `Vec` RETURNED FROM THIS FUNCTION ONCE IT |
| // RETURNS, THE ADDRESS OF ITS BACKING MEMORY MUST REMAIN STABLE. |
| fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer { |
| // Build up the entries list, recording the locations of any Groups |
| // in the list to be processed later. |
| let mut entries = Vec::new(); |
| let mut seqs = Vec::new(); |
| for tt in stream { |
| match tt.kind { |
| TokenNode::Term(sym) => { |
| entries.push(Entry::Term(tt.span, sym)); |
| } |
| TokenNode::Op(chr, ok) => { |
| entries.push(Entry::Op(tt.span, chr, ok)); |
| } |
| TokenNode::Literal(lit) => { |
| entries.push(Entry::Literal(tt.span, lit)); |
| } |
| TokenNode::Group(delim, seq_stream) => { |
| // Record the index of the interesting entry, and store an |
| // `End(null)` there temporarially. |
| seqs.push((entries.len(), tt.span, delim, seq_stream)); |
| entries.push(Entry::End(ptr::null())); |
| } |
| } |
| } |
| // Add an `End` entry to the end with a reference to the enclosing token |
| // stream which was passed in. |
| entries.push(Entry::End(up)); |
| |
| // NOTE: This is done to ensure that we don't accidentally modify the |
| // length of the backing buffer. The backing buffer must remain at a |
| // constant address after this point, as we are going to store a raw |
| // pointer into it. |
| let mut entries = entries.into_boxed_slice(); |
| for (idx, span, delim, seq_stream) in seqs { |
| // We know that this index refers to one of the temporary |
| // `End(null)` entries, and we know that the last entry is |
| // `End(up)`, so the next index is also valid. |
| let seq_up = &entries[idx + 1] as *const Entry; |
| |
| // The end entry stored at the end of this Entry::Group should |
| // point to the Entry which follows the Group in the list. |
| let inner = Self::inner_new(seq_stream, seq_up); |
| entries[idx] = Entry::Group(span, delim, inner); |
| } |
| |
| TokenBuffer { data: entries } |
| } |
| |
| /// Creates a `TokenBuffer` containing all the tokens from the input |
| /// `TokenStream`. |
| pub fn new(stream: pm::TokenStream) -> TokenBuffer { |
| Self::new2(stream.into()) |
| } |
| |
| /// Creates a `TokenBuffer` containing all the tokens from the input |
| /// `TokenStream`. |
| pub fn new2(stream: TokenStream) -> TokenBuffer { |
| Self::inner_new(stream, ptr::null()) |
| } |
| |
| /// Creates a cursor referencing the first token in the buffer and able to |
| /// traverse until the end of the buffer. |
| pub fn begin(&self) -> Cursor { |
| unsafe { Cursor::create(&self.data[0], &self.data[self.data.len() - 1]) } |
| } |
| } |
| |
| /// A cheaply copyable cursor into a `TokenBuffer`. |
| /// |
| /// This cursor holds a shared reference into the immutable data which is used |
| /// internally to represent a `TokenStream`, and can be efficiently manipulated |
| /// and copied around. |
| /// |
| /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer` |
| /// object and get a cursor to its first token with `begin()`. |
| /// |
| /// Two cursors are equal if they have the same location in the same input |
| /// stream, and have the same scope. |
| /// |
| /// See the [module documentation] for an example of a `Cursor` in action. |
| /// |
| /// [module documentation]: index.html |
| /// |
| /// *This type is available if Syn is built with the `"parsing"` feature.* |
| #[derive(Copy, Clone, Eq, PartialEq)] |
| pub struct Cursor<'a> { |
| /// The current entry which the `Cursor` is pointing at. |
| ptr: *const Entry, |
| /// This is the only `Entry::End(..)` object which this cursor is allowed to |
| /// point at. All other `End` objects are skipped over in `Cursor::create`. |
| scope: *const Entry, |
| /// This uses the &'a reference which guarantees that these pointers are |
| /// still valid. |
| marker: PhantomData<&'a Entry>, |
| } |
| |
| impl<'a> Cursor<'a> { |
| /// Creates a cursor referencing a static empty TokenStream. |
| pub fn empty() -> Self { |
| // It's safe in this situation for us to put an `Entry` object in global |
| // storage, despite it not actually being safe to send across threads |
| // (`Term` is a reference into a thread-local table). This is because |
| // this entry never includes a `Term` object. |
| // |
| // This wrapper struct allows us to break the rules and put a `Sync` |
| // object in global storage. |
| struct UnsafeSyncEntry(Entry); |
| unsafe impl Sync for UnsafeSyncEntry {} |
| static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry)); |
| |
| Cursor { |
| ptr: &EMPTY_ENTRY.0, |
| scope: &EMPTY_ENTRY.0, |
| marker: PhantomData, |
| } |
| } |
| |
| /// This create method intelligently exits non-explicitly-entered |
| /// `None`-delimited scopes when the cursor reaches the end of them, |
| /// allowing for them to be treated transparently. |
| unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self { |
| // NOTE: If we're looking at a `End(..)`, we want to advance the cursor |
| // past it, unless `ptr == scope`, which means that we're at the edge of |
| // our cursor's scope. We should only have `ptr != scope` at the exit |
| // from None-delimited groups entered with `ignore_none`. |
| while let Entry::End(exit) = *ptr { |
| if ptr == scope { |
| break; |
| } |
| ptr = exit; |
| } |
| |
| Cursor { |
| ptr: ptr, |
| scope: scope, |
| marker: PhantomData, |
| } |
| } |
| |
| /// Get the current entry. |
| fn entry(self) -> &'a Entry { |
| unsafe { &*self.ptr } |
| } |
| |
| /// Bump the cursor to point at the next token after the current one. This |
| /// is undefined behavior if the cursor is currently looking at an |
| /// `Entry::End`. |
| unsafe fn bump(self) -> Cursor<'a> { |
| Cursor::create(self.ptr.offset(1), self.scope) |
| } |
| |
| /// If the cursor is looking at a `None`-delimited group, move it to look at |
| /// the first token inside instead. If the group is empty, this will move |
| /// the cursor past the `None`-delimited group. |
| /// |
| /// WARNING: This mutates its argument. |
| fn ignore_none(&mut self) { |
| if let Entry::Group(_, Delimiter::None, ref buf) = *self.entry() { |
| // NOTE: We call `Cursor::create` here to make sure that situations |
| // where we should immediately exit the span after entering it are |
| // handled correctly. |
| unsafe { |
| *self = Cursor::create(&buf.data[0], self.scope); |
| } |
| } |
| } |
| |
| /// Checks whether the cursor is currently pointing at the end of its valid |
| /// scope. |
| #[inline] |
| pub fn eof(self) -> bool { |
| // We're at eof if we're at the end of our scope. |
| self.ptr == self.scope |
| } |
| |
| /// If the cursor is pointing at a `Group` with the given delimiter, returns |
| /// a cursor into that group and one pointing to the next `TokenTree`. |
| pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> { |
| // If we're not trying to enter a none-delimited group, we want to |
| // ignore them. We have to make sure to _not_ ignore them when we want |
| // to enter them, of course. For obvious reasons. |
| if delim != Delimiter::None { |
| self.ignore_none(); |
| } |
| |
| if let Entry::Group(span, group_delim, ref buf) = *self.entry() { |
| if group_delim == delim { |
| return Some((buf.begin(), span, unsafe { self.bump() })); |
| } |
| } |
| |
| None |
| } |
| |
| /// If the cursor is pointing at a `Term`, returns it along with a cursor |
| /// pointing at the next `TokenTree`. |
| pub fn term(mut self) -> Option<(Span, Term, Cursor<'a>)> { |
| self.ignore_none(); |
| match *self.entry() { |
| Entry::Term(span, term) => Some((span, term, unsafe { self.bump() })), |
| _ => None, |
| } |
| } |
| |
| /// If the cursor is pointing at an `Op`, returns it along with a cursor |
| /// pointing at the next `TokenTree`. |
| pub fn op(mut self) -> Option<(Span, char, Spacing, Cursor<'a>)> { |
| self.ignore_none(); |
| match *self.entry() { |
| Entry::Op(span, op, spacing) => Some((span, op, spacing, unsafe { self.bump() })), |
| _ => None, |
| } |
| } |
| |
| /// If the cursor is pointing at a `Literal`, return it along with a cursor |
| /// pointing at the next `TokenTree`. |
| pub fn literal(mut self) -> Option<(Span, Literal, Cursor<'a>)> { |
| self.ignore_none(); |
| match *self.entry() { |
| Entry::Literal(span, ref lit) => Some((span, lit.clone(), unsafe { self.bump() })), |
| _ => None, |
| } |
| } |
| |
| /// Copies all remaining tokens visible from this cursor into a |
| /// `TokenStream`. |
| pub fn token_stream(self) -> TokenStream { |
| let mut tts = Vec::new(); |
| let mut cursor = self; |
| while let Some((tt, rest)) = cursor.token_tree() { |
| tts.push(tt); |
| cursor = rest; |
| } |
| tts.into_iter().collect() |
| } |
| |
| /// If the cursor is pointing at a `TokenTree`, returns it along with a |
| /// cursor pointing at the next `TokenTree`. |
| /// |
| /// Returns `None` if the cursor has reached the end of its stream. |
| /// |
| /// This method does not treat `None`-delimited groups as transparent, and |
| /// will return a `Group(None, ..)` if the cursor is looking at one. |
| pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> { |
| let tree = match *self.entry() { |
| Entry::Group(span, delim, ref buf) => { |
| let stream = buf.begin().token_stream(); |
| TokenTree { |
| span: span, |
| kind: TokenNode::Group(delim, stream), |
| } |
| } |
| Entry::Literal(span, ref lit) => TokenTree { |
| span: span, |
| kind: TokenNode::Literal(lit.clone()), |
| }, |
| Entry::Term(span, sym) => TokenTree { |
| span: span, |
| kind: TokenNode::Term(sym), |
| }, |
| Entry::Op(span, chr, spacing) => TokenTree { |
| span: span, |
| kind: TokenNode::Op(chr, spacing), |
| }, |
| Entry::End(..) => { |
| return None; |
| } |
| }; |
| |
| Some((tree, unsafe { self.bump() })) |
| } |
| |
| /// Returns the `Span` of the current token, or `Span::call_site()` if this |
| /// cursor points to eof. |
| pub fn span(self) -> Span { |
| match *self.entry() { |
| Entry::Group(span, ..) |
| | Entry::Literal(span, ..) |
| | Entry::Term(span, ..) |
| | Entry::Op(span, ..) => span, |
| Entry::End(..) => Span::call_site(), |
| } |
| } |
| } |
| |
| // We do a custom implementation for `Debug` as the default implementation is |
| // pretty useless. |
| #[cfg(synom_verbose_trace)] |
| impl<'a> Debug for Cursor<'a> { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| // Print what the cursor is currently looking at. |
| // This will look like Cursor("some remaining tokens here") |
| f.debug_tuple("Cursor") |
| .field(&self.token_stream().to_string()) |
| .finish() |
| } |
| } |