blob: 05a2760b589e44da4e265ff79f19ab78dd7b1aeb [file] [log] [blame]
David Tolnay55535012018-01-05 16:39:23 -08001// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
Michael Layzell2a60e252017-05-31 21:36:47 -04009//! This module defines a cheaply-copyable cursor into a TokenStream's data.
10//!
11//! It does this by copying the data into a stably-addressed structured buffer,
12//! and holding raw pointers into that buffer to allow walking through delimited
David Tolnayc10676a2017-12-27 23:42:36 -050013//! groups cheaply.
Michael Layzell2a60e252017-05-31 21:36:47 -040014//!
15//! This module is heavily commented as it contains the only unsafe code in
16//! `syn`, and caution should be made when editing it. It provides a safe
17//! interface, but is fragile internally.
18
19use proc_macro2::*;
20
21use std::ptr;
Michael Layzell2a60e252017-05-31 21:36:47 -040022use std::marker::PhantomData;
23
David Tolnayd1ec6ec2018-01-03 00:23:45 -080024#[cfg(synom_verbose_trace)]
David Tolnayf9e1de12017-12-31 00:47:01 -050025use std::fmt::{self, Debug};
26
Michael Layzell2a60e252017-05-31 21:36:47 -040027/// Internal type which is used instead of `TokenTree` to represent a single
David Tolnaydfc886b2018-01-06 08:03:09 -080028/// `TokenTree` within a `TokenBuffer`.
Michael Layzell2a60e252017-05-31 21:36:47 -040029enum Entry {
30 /// Mimicing types from proc-macro.
David Tolnaydfc886b2018-01-06 08:03:09 -080031 Group(Span, Delimiter, TokenBuffer),
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070032 Term(Span, Term),
33 Op(Span, char, Spacing),
Michael Layzell2a60e252017-05-31 21:36:47 -040034 Literal(Span, Literal),
35 /// End entries contain a raw pointer to the entry from the containing
36 /// TokenTree.
37 End(*const Entry),
38}
39
Michael Layzell2a60e252017-05-31 21:36:47 -040040/// A buffer of data which contains a structured representation of the input
41/// `TokenStream` object.
David Tolnaydfc886b2018-01-06 08:03:09 -080042pub struct TokenBuffer {
Michael Layzell2a60e252017-05-31 21:36:47 -040043 // NOTE: Do not derive clone on this - there are raw pointers inside which
David Tolnaydfc886b2018-01-06 08:03:09 -080044 // will be messed up. Moving the `TokenBuffer` itself is safe as the actual
Michael Layzell2a60e252017-05-31 21:36:47 -040045 // backing slices won't be moved.
46 data: Box<[Entry]>,
47}
48
David Tolnaydfc886b2018-01-06 08:03:09 -080049impl TokenBuffer {
Michael Layzell2a60e252017-05-31 21:36:47 -040050 // NOTE: DO NOT MUTATE THE `Vec` RETURNED FROM THIS FUNCTION ONCE IT
51 // RETURNS, THE ADDRESS OF ITS BACKING MEMORY MUST REMAIN STABLE.
David Tolnaydfc886b2018-01-06 08:03:09 -080052 fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070053 // Build up the entries list, recording the locations of any Groups
Michael Layzell2a60e252017-05-31 21:36:47 -040054 // in the list to be processed later.
55 let mut entries = Vec::new();
56 let mut seqs = Vec::new();
David Tolnay50fa4682017-12-26 23:17:22 -050057 for tt in stream {
Michael Layzell2a60e252017-05-31 21:36:47 -040058 match tt.kind {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070059 TokenNode::Term(sym) => {
60 entries.push(Entry::Term(tt.span, sym));
Michael Layzell2a60e252017-05-31 21:36:47 -040061 }
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070062 TokenNode::Op(chr, ok) => {
Michael Layzell2a60e252017-05-31 21:36:47 -040063 entries.push(Entry::Op(tt.span, chr, ok));
64 }
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070065 TokenNode::Literal(lit) => {
Michael Layzell2a60e252017-05-31 21:36:47 -040066 entries.push(Entry::Literal(tt.span, lit));
67 }
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070068 TokenNode::Group(delim, seq_stream) => {
Michael Layzell2a60e252017-05-31 21:36:47 -040069 // Record the index of the interesting entry, and store an
70 // `End(null)` there temporarially.
71 seqs.push((entries.len(), tt.span, delim, seq_stream));
72 entries.push(Entry::End(ptr::null()));
73 }
74 }
75 }
76 // Add an `End` entry to the end with a reference to the enclosing token
77 // stream which was passed in.
78 entries.push(Entry::End(up));
79
80 // NOTE: This is done to ensure that we don't accidentally modify the
81 // length of the backing buffer. The backing buffer must remain at a
82 // constant address after this point, as we are going to store a raw
83 // pointer into it.
84 let mut entries = entries.into_boxed_slice();
85 for (idx, span, delim, seq_stream) in seqs {
86 // We know that this index refers to one of the temporary
87 // `End(null)` entries, and we know that the last entry is
88 // `End(up)`, so the next index is also valid.
89 let seq_up = &entries[idx + 1] as *const Entry;
90
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070091 // The end entry stored at the end of this Entry::Group should
92 // point to the Entry which follows the Group in the list.
Michael Layzell2a60e252017-05-31 21:36:47 -040093 let inner = Self::inner_new(seq_stream, seq_up);
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070094 entries[idx] = Entry::Group(span, delim, inner);
Michael Layzell2a60e252017-05-31 21:36:47 -040095 }
96
David Tolnaydfc886b2018-01-06 08:03:09 -080097 TokenBuffer { data: entries }
Michael Layzell2a60e252017-05-31 21:36:47 -040098 }
99
David Tolnaydfc886b2018-01-06 08:03:09 -0800100 /// Create a new TokenBuffer, which contains the data from the given
Michael Layzell2a60e252017-05-31 21:36:47 -0400101 /// TokenStream.
David Tolnaydfc886b2018-01-06 08:03:09 -0800102 pub fn new(stream: TokenStream) -> TokenBuffer {
Michael Layzell2a60e252017-05-31 21:36:47 -0400103 Self::inner_new(stream, ptr::null())
104 }
105
106 /// Create a cursor referencing the first token in the input.
107 pub fn begin(&self) -> Cursor {
David Tolnay51382052017-12-27 13:46:21 -0500108 unsafe { Cursor::create(&self.data[0], &self.data[self.data.len() - 1]) }
Michael Layzell2a60e252017-05-31 21:36:47 -0400109 }
110}
111
Michael Layzell2a60e252017-05-31 21:36:47 -0400112/// A cursor into an input `TokenStream`'s data. This cursor holds a reference
113/// into the immutable data which is used internally to represent a
114/// `TokenStream`, and can be efficiently manipulated and copied around.
115///
David Tolnaydfc886b2018-01-06 08:03:09 -0800116/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
Michael Layzell2a60e252017-05-31 21:36:47 -0400117/// object and get a cursor to its first token with `begin()`.
118///
119/// Two cursors are equal if they have the same location in the same input
120/// stream, and have the same scope.
121#[derive(Copy, Clone, Eq, PartialEq)]
122pub struct Cursor<'a> {
123 /// The current entry which the `Cursor` is pointing at.
124 ptr: *const Entry,
125 /// This is the only `Entry::End(..)` object which this cursor is allowed to
126 /// point at. All other `End` objects are skipped over in `Cursor::create`.
127 scope: *const Entry,
128 /// This uses the &'a reference which guarantees that these pointers are
129 /// still valid.
130 marker: PhantomData<&'a Entry>,
131}
132
Michael Layzell2a60e252017-05-31 21:36:47 -0400133impl<'a> Cursor<'a> {
134 /// Create a cursor referencing a static empty TokenStream.
135 pub fn empty() -> Self {
Michael Layzell69cf9082017-06-03 12:15:58 -0400136 // It's safe in this situation for us to put an `Entry` object in global
137 // storage, despite it not actually being safe to send across threads
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700138 // (`Term` is a reference into a thread-local table). This is because
139 // this entry never includes a `Term` object.
Michael Layzell69cf9082017-06-03 12:15:58 -0400140 //
141 // This wrapper struct allows us to break the rules and put a `Sync`
142 // object in global storage.
143 struct UnsafeSyncEntry(Entry);
144 unsafe impl Sync for UnsafeSyncEntry {}
David Tolnay51382052017-12-27 13:46:21 -0500145 static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry));
Michael Layzell69cf9082017-06-03 12:15:58 -0400146
Michael Layzell2a60e252017-05-31 21:36:47 -0400147 Cursor {
Michael Layzell69cf9082017-06-03 12:15:58 -0400148 ptr: &EMPTY_ENTRY.0,
149 scope: &EMPTY_ENTRY.0,
Michael Layzell2a60e252017-05-31 21:36:47 -0400150 marker: PhantomData,
151 }
152 }
153
154 /// This create method intelligently exits non-explicitly-entered
155 /// `None`-delimited scopes when the cursor reaches the end of them,
156 /// allowing for them to be treated transparently.
157 unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
158 // NOTE: If we're looking at a `End(..)`, we want to advance the cursor
159 // past it, unless `ptr == scope`, which means that we're at the edge of
160 // our cursor's scope. We should only have `ptr != scope` at the exit
David Tolnayc10676a2017-12-27 23:42:36 -0500161 // from None-delimited groups entered with `ignore_none`.
Michael Layzell2a60e252017-05-31 21:36:47 -0400162 while let Entry::End(exit) = *ptr {
163 if ptr == scope {
164 break;
165 }
166 ptr = exit;
167 }
168
169 Cursor {
170 ptr: ptr,
171 scope: scope,
172 marker: PhantomData,
173 }
174 }
175
176 /// Get the current entry.
177 fn entry(self) -> &'a Entry {
178 unsafe { &*self.ptr }
179 }
180
181 /// Bump the cursor to point at the next token after the current one. This
182 /// is undefined behavior if the cursor is currently looking at an
183 /// `Entry::End`.
184 unsafe fn bump(self) -> Cursor<'a> {
185 Cursor::create(self.ptr.offset(1), self.scope)
186 }
187
David Tolnayc10676a2017-12-27 23:42:36 -0500188 /// If the cursor is looking at a `None`-delimited group, move it to look at
189 /// the first token inside instead. If the group is empty, this will move
190 /// the cursor past the `None`-delimited group.
Michael Layzell2a60e252017-05-31 21:36:47 -0400191 ///
192 /// WARNING: This mutates its argument.
193 fn ignore_none(&mut self) {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700194 if let Entry::Group(_, Delimiter::None, ref buf) = *self.entry() {
Michael Layzell2a60e252017-05-31 21:36:47 -0400195 // NOTE: We call `Cursor::create` here to make sure that situations
196 // where we should immediately exit the span after entering it are
197 // handled correctly.
198 unsafe {
199 *self = Cursor::create(&buf.data[0], self.scope);
200 }
201 }
202 }
203
204 /// Check if the cursor is currently pointing at the end of its valid scope.
205 #[inline]
206 pub fn eof(self) -> bool {
207 // We're at eof if we're at the end of our scope.
208 self.ptr == self.scope
209 }
210
David Tolnayc10676a2017-12-27 23:42:36 -0500211 /// If the cursor is pointing at a Group with the given `Delimiter`, return
212 /// a cursor into that group, and one pointing to the next `TokenTree`.
David Tolnay65729482017-12-31 16:14:50 -0500213 pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> {
David Tolnayc10676a2017-12-27 23:42:36 -0500214 // If we're not trying to enter a none-delimited group, we want to
Michael Layzell2a60e252017-05-31 21:36:47 -0400215 // ignore them. We have to make sure to _not_ ignore them when we want
216 // to enter them, of course. For obvious reasons.
David Tolnayc10676a2017-12-27 23:42:36 -0500217 if delim != Delimiter::None {
Michael Layzell2a60e252017-05-31 21:36:47 -0400218 self.ignore_none();
219 }
220
David Tolnayc10676a2017-12-27 23:42:36 -0500221 if let Entry::Group(span, group_delim, ref buf) = *self.entry() {
222 if group_delim == delim {
David Tolnay65729482017-12-31 16:14:50 -0500223 return Some((buf.begin(), span, unsafe { self.bump() }));
Michael Layzell2a60e252017-05-31 21:36:47 -0400224 }
Michael Layzell2a60e252017-05-31 21:36:47 -0400225 }
David Tolnayc10676a2017-12-27 23:42:36 -0500226
227 None
Michael Layzell2a60e252017-05-31 21:36:47 -0400228 }
229
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700230 /// If the cursor is pointing at a Term, return it and a cursor pointing at
Michael Layzell2a60e252017-05-31 21:36:47 -0400231 /// the next `TokenTree`.
David Tolnay65729482017-12-31 16:14:50 -0500232 pub fn term(mut self) -> Option<(Span, Term, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400233 self.ignore_none();
234 match *self.entry() {
David Tolnay65729482017-12-31 16:14:50 -0500235 Entry::Term(span, term) => Some((span, term, unsafe { self.bump() })),
David Tolnay51382052017-12-27 13:46:21 -0500236 _ => None,
Michael Layzell2a60e252017-05-31 21:36:47 -0400237 }
238 }
239
240 /// If the cursor is pointing at an Op, return it and a cursor pointing
241 /// at the next `TokenTree`.
David Tolnay65729482017-12-31 16:14:50 -0500242 pub fn op(mut self) -> Option<(Span, char, Spacing, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400243 self.ignore_none();
244 match *self.entry() {
David Tolnay65729482017-12-31 16:14:50 -0500245 Entry::Op(span, op, spacing) => Some((span, op, spacing, unsafe { self.bump() })),
David Tolnay51382052017-12-27 13:46:21 -0500246 _ => None,
Michael Layzell2a60e252017-05-31 21:36:47 -0400247 }
248 }
249
250 /// If the cursor is pointing at a Literal, return it and a cursor pointing
251 /// at the next `TokenTree`.
David Tolnay65729482017-12-31 16:14:50 -0500252 pub fn literal(mut self) -> Option<(Span, Literal, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400253 self.ignore_none();
254 match *self.entry() {
David Tolnay65729482017-12-31 16:14:50 -0500255 Entry::Literal(span, ref lit) => Some((span, lit.clone(), unsafe { self.bump() })),
David Tolnay51382052017-12-27 13:46:21 -0500256 _ => None,
Michael Layzell2a60e252017-05-31 21:36:47 -0400257 }
258 }
259
260 /// Copy all remaining tokens visible from this cursor into a `TokenStream`.
261 pub fn token_stream(self) -> TokenStream {
262 let mut tts = Vec::new();
263 let mut cursor = self;
David Tolnay65729482017-12-31 16:14:50 -0500264 while let Some((tt, rest)) = cursor.token_tree() {
Michael Layzell2a60e252017-05-31 21:36:47 -0400265 tts.push(tt);
David Tolnay65729482017-12-31 16:14:50 -0500266 cursor = rest;
Michael Layzell2a60e252017-05-31 21:36:47 -0400267 }
268 tts.into_iter().collect()
269 }
270
271 /// If the cursor is looking at a `TokenTree`, returns it along with a
David Tolnayc10676a2017-12-27 23:42:36 -0500272 /// cursor pointing to the next token in the group, otherwise returns
Michael Layzell2a60e252017-05-31 21:36:47 -0400273 /// `None`.
274 ///
David Tolnayc10676a2017-12-27 23:42:36 -0500275 /// This method does not treat `None`-delimited groups as invisible, and
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700276 /// will return a `Group(None, ..)` if the cursor is looking at one.
David Tolnay65729482017-12-31 16:14:50 -0500277 pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400278 let tree = match *self.entry() {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700279 Entry::Group(span, delim, ref buf) => {
Michael Layzell2a60e252017-05-31 21:36:47 -0400280 let stream = buf.begin().token_stream();
281 TokenTree {
282 span: span,
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700283 kind: TokenNode::Group(delim, stream),
Michael Layzell2a60e252017-05-31 21:36:47 -0400284 }
285 }
David Tolnay51382052017-12-27 13:46:21 -0500286 Entry::Literal(span, ref lit) => TokenTree {
287 span: span,
288 kind: TokenNode::Literal(lit.clone()),
289 },
290 Entry::Term(span, sym) => TokenTree {
291 span: span,
292 kind: TokenNode::Term(sym),
293 },
David Tolnay73c98de2017-12-31 15:56:56 -0500294 Entry::Op(span, chr, spacing) => TokenTree {
David Tolnay51382052017-12-27 13:46:21 -0500295 span: span,
David Tolnay73c98de2017-12-31 15:56:56 -0500296 kind: TokenNode::Op(chr, spacing),
David Tolnay51382052017-12-27 13:46:21 -0500297 },
Michael Layzell2a60e252017-05-31 21:36:47 -0400298 Entry::End(..) => {
299 return None;
300 }
301 };
302
David Tolnay65729482017-12-31 16:14:50 -0500303 Some((tree, unsafe { self.bump() }))
Michael Layzell2a60e252017-05-31 21:36:47 -0400304 }
David Tolnay225efa22017-12-31 16:51:29 -0500305
306 /// Returns the `Span` of the current token, or `Span::call_site()` if this
307 /// cursor points to eof.
308 pub fn span(self) -> Span {
309 match *self.entry() {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800310 Entry::Group(span, ..)
311 | Entry::Literal(span, ..)
312 | Entry::Term(span, ..)
313 | Entry::Op(span, ..) => span,
David Tolnay225efa22017-12-31 16:51:29 -0500314 Entry::End(..) => Span::call_site(),
315 }
316 }
Michael Layzell2a60e252017-05-31 21:36:47 -0400317}
318
319// We do a custom implementation for `Debug` as the default implementation is
320// pretty useless.
David Tolnayd1ec6ec2018-01-03 00:23:45 -0800321#[cfg(synom_verbose_trace)]
David Tolnayf9e1de12017-12-31 00:47:01 -0500322impl<'a> Debug for Cursor<'a> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400323 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Nika Layzellae81b372017-12-05 14:12:33 -0500324 // Print what the cursor is currently looking at.
325 // This will look like Cursor("some remaining tokens here")
326 f.debug_tuple("Cursor")
327 .field(&self.token_stream().to_string())
Michael Layzell2a60e252017-05-31 21:36:47 -0400328 .finish()
329 }
330}