blob: e8e8c1a8a8ae71e24a04598e2577af0f0736fec7 [file] [log] [blame]
David Tolnay55535012018-01-05 16:39:23 -08001// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
David Tolnay7c3e77d2018-01-06 17:42:53 -08009//! A stably addressed token buffer supporting efficient traversal based on a
10//! cheaply copyable cursor.
Michael Layzell2a60e252017-05-31 21:36:47 -040011//!
David Tolnay461d98e2018-01-07 11:07:19 -080012//! *This module is available if Syn is built with the `"parsing"` feature.*
Michael Layzell2a60e252017-05-31 21:36:47 -040013
David Tolnay7c3e77d2018-01-06 17:42:53 -080014// This module is heavily commented as it contains the only unsafe code in Syn,
15// and caution should be used when editing it. The public-facing interface is
16// 100% safe but the implementation is fragile internally.
17
David Tolnay278f9e32018-08-14 22:41:11 -070018#[cfg(all(
19 not(all(target_arch = "wasm32", target_os = "unknown")),
20 feature = "proc-macro"
21))]
David Tolnay7c3e77d2018-01-06 17:42:53 -080022use proc_macro as pm;
David Tolnay66cb0c42018-08-31 09:01:30 -070023use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
Michael Layzell2a60e252017-05-31 21:36:47 -040024
Michael Layzell2a60e252017-05-31 21:36:47 -040025use std::marker::PhantomData;
David Tolnay94d2b792018-04-29 12:26:10 -070026use std::ptr;
Michael Layzell2a60e252017-05-31 21:36:47 -040027
David Tolnay66cb0c42018-08-31 09:01:30 -070028use Lifetime;
29
David Tolnay7c3e77d2018-01-06 17:42:53 -080030/// Internal type which is used instead of `TokenTree` to represent a token tree
31/// within a `TokenBuffer`.
Michael Layzell2a60e252017-05-31 21:36:47 -040032enum Entry {
David Tolnay7c3e77d2018-01-06 17:42:53 -080033 // Mimicking types from proc-macro.
David Tolnay582a45e2018-09-08 17:56:31 -070034 Group(Group, TokenBuffer),
Alex Crichtona74a1c82018-05-16 10:20:44 -070035 Ident(Ident),
36 Punct(Punct),
Alex Crichton9a4dca22018-03-28 06:32:19 -070037 Literal(Literal),
David Tolnay7c3e77d2018-01-06 17:42:53 -080038 // End entries contain a raw pointer to the entry from the containing
39 // token tree, or null if this is the outermost level.
Michael Layzell2a60e252017-05-31 21:36:47 -040040 End(*const Entry),
41}
42
David Tolnay7c3e77d2018-01-06 17:42:53 -080043/// A buffer that can be efficiently traversed multiple times, unlike
44/// `TokenStream` which requires a deep copy in order to traverse more than
45/// once.
46///
David Tolnay461d98e2018-01-07 11:07:19 -080047/// *This type is available if Syn is built with the `"parsing"` feature.*
David Tolnaydfc886b2018-01-06 08:03:09 -080048pub struct TokenBuffer {
Michael Layzell2a60e252017-05-31 21:36:47 -040049 // NOTE: Do not derive clone on this - there are raw pointers inside which
David Tolnaydfc886b2018-01-06 08:03:09 -080050 // will be messed up. Moving the `TokenBuffer` itself is safe as the actual
Michael Layzell2a60e252017-05-31 21:36:47 -040051 // backing slices won't be moved.
52 data: Box<[Entry]>,
53}
54
David Tolnaydfc886b2018-01-06 08:03:09 -080055impl TokenBuffer {
Michael Layzell2a60e252017-05-31 21:36:47 -040056 // NOTE: DO NOT MUTATE THE `Vec` RETURNED FROM THIS FUNCTION ONCE IT
57 // RETURNS, THE ADDRESS OF ITS BACKING MEMORY MUST REMAIN STABLE.
David Tolnaydfc886b2018-01-06 08:03:09 -080058 fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070059 // Build up the entries list, recording the locations of any Groups
Michael Layzell2a60e252017-05-31 21:36:47 -040060 // in the list to be processed later.
61 let mut entries = Vec::new();
62 let mut seqs = Vec::new();
David Tolnay50fa4682017-12-26 23:17:22 -050063 for tt in stream {
Alex Crichton9a4dca22018-03-28 06:32:19 -070064 match tt {
Alex Crichtona74a1c82018-05-16 10:20:44 -070065 TokenTree::Ident(sym) => {
66 entries.push(Entry::Ident(sym));
Michael Layzell2a60e252017-05-31 21:36:47 -040067 }
Alex Crichtona74a1c82018-05-16 10:20:44 -070068 TokenTree::Punct(op) => {
69 entries.push(Entry::Punct(op));
Michael Layzell2a60e252017-05-31 21:36:47 -040070 }
Alex Crichton9a4dca22018-03-28 06:32:19 -070071 TokenTree::Literal(l) => {
72 entries.push(Entry::Literal(l));
Michael Layzell2a60e252017-05-31 21:36:47 -040073 }
Alex Crichton9a4dca22018-03-28 06:32:19 -070074 TokenTree::Group(g) => {
Michael Layzell2a60e252017-05-31 21:36:47 -040075 // Record the index of the interesting entry, and store an
76 // `End(null)` there temporarially.
David Tolnay582a45e2018-09-08 17:56:31 -070077 seqs.push((entries.len(), g));
Michael Layzell2a60e252017-05-31 21:36:47 -040078 entries.push(Entry::End(ptr::null()));
79 }
80 }
81 }
82 // Add an `End` entry to the end with a reference to the enclosing token
83 // stream which was passed in.
84 entries.push(Entry::End(up));
85
86 // NOTE: This is done to ensure that we don't accidentally modify the
87 // length of the backing buffer. The backing buffer must remain at a
88 // constant address after this point, as we are going to store a raw
89 // pointer into it.
90 let mut entries = entries.into_boxed_slice();
David Tolnay582a45e2018-09-08 17:56:31 -070091 for (idx, group) in seqs {
Michael Layzell2a60e252017-05-31 21:36:47 -040092 // We know that this index refers to one of the temporary
93 // `End(null)` entries, and we know that the last entry is
94 // `End(up)`, so the next index is also valid.
95 let seq_up = &entries[idx + 1] as *const Entry;
96
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070097 // The end entry stored at the end of this Entry::Group should
98 // point to the Entry which follows the Group in the list.
David Tolnay582a45e2018-09-08 17:56:31 -070099 let inner = Self::inner_new(group.stream(), seq_up);
100 entries[idx] = Entry::Group(group, inner);
Michael Layzell2a60e252017-05-31 21:36:47 -0400101 }
102
David Tolnaydfc886b2018-01-06 08:03:09 -0800103 TokenBuffer { data: entries }
Michael Layzell2a60e252017-05-31 21:36:47 -0400104 }
105
David Tolnay7c3e77d2018-01-06 17:42:53 -0800106 /// Creates a `TokenBuffer` containing all the tokens from the input
107 /// `TokenStream`.
hcpl4b72a382018-04-04 14:50:24 +0300108 ///
109 /// *This method is available if Syn is built with both the `"parsing"` and
110 /// `"proc-macro"` features.*
David Tolnay278f9e32018-08-14 22:41:11 -0700111 #[cfg(all(
112 not(all(target_arch = "wasm32", target_os = "unknown")),
113 feature = "proc-macro"
114 ))]
David Tolnay7c3e77d2018-01-06 17:42:53 -0800115 pub fn new(stream: pm::TokenStream) -> TokenBuffer {
116 Self::new2(stream.into())
117 }
118
119 /// Creates a `TokenBuffer` containing all the tokens from the input
120 /// `TokenStream`.
121 pub fn new2(stream: TokenStream) -> TokenBuffer {
Michael Layzell2a60e252017-05-31 21:36:47 -0400122 Self::inner_new(stream, ptr::null())
123 }
124
David Tolnay7c3e77d2018-01-06 17:42:53 -0800125 /// Creates a cursor referencing the first token in the buffer and able to
126 /// traverse until the end of the buffer.
Michael Layzell2a60e252017-05-31 21:36:47 -0400127 pub fn begin(&self) -> Cursor {
David Tolnay51382052017-12-27 13:46:21 -0500128 unsafe { Cursor::create(&self.data[0], &self.data[self.data.len() - 1]) }
Michael Layzell2a60e252017-05-31 21:36:47 -0400129 }
130}
131
David Tolnay7c3e77d2018-01-06 17:42:53 -0800132/// A cheaply copyable cursor into a `TokenBuffer`.
133///
134/// This cursor holds a shared reference into the immutable data which is used
135/// internally to represent a `TokenStream`, and can be efficiently manipulated
136/// and copied around.
Michael Layzell2a60e252017-05-31 21:36:47 -0400137///
David Tolnaydfc886b2018-01-06 08:03:09 -0800138/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
Michael Layzell2a60e252017-05-31 21:36:47 -0400139/// object and get a cursor to its first token with `begin()`.
140///
141/// Two cursors are equal if they have the same location in the same input
142/// stream, and have the same scope.
David Tolnay7c3e77d2018-01-06 17:42:53 -0800143///
David Tolnay461d98e2018-01-07 11:07:19 -0800144/// *This type is available if Syn is built with the `"parsing"` feature.*
Michael Layzell2a60e252017-05-31 21:36:47 -0400145#[derive(Copy, Clone, Eq, PartialEq)]
146pub struct Cursor<'a> {
David Tolnay56924f42018-09-02 08:24:58 -0700147 // The current entry which the `Cursor` is pointing at.
Michael Layzell2a60e252017-05-31 21:36:47 -0400148 ptr: *const Entry,
David Tolnay56924f42018-09-02 08:24:58 -0700149 // This is the only `Entry::End(..)` object which this cursor is allowed to
150 // point at. All other `End` objects are skipped over in `Cursor::create`.
Michael Layzell2a60e252017-05-31 21:36:47 -0400151 scope: *const Entry,
David Tolnay56924f42018-09-02 08:24:58 -0700152 // Cursor is covariant in 'a. This field ensures that our pointers are still
153 // valid.
Michael Layzell2a60e252017-05-31 21:36:47 -0400154 marker: PhantomData<&'a Entry>,
155}
156
Michael Layzell2a60e252017-05-31 21:36:47 -0400157impl<'a> Cursor<'a> {
David Tolnay7c3e77d2018-01-06 17:42:53 -0800158 /// Creates a cursor referencing a static empty TokenStream.
Michael Layzell2a60e252017-05-31 21:36:47 -0400159 pub fn empty() -> Self {
Michael Layzell69cf9082017-06-03 12:15:58 -0400160 // It's safe in this situation for us to put an `Entry` object in global
161 // storage, despite it not actually being safe to send across threads
Alex Crichtona74a1c82018-05-16 10:20:44 -0700162 // (`Ident` is a reference into a thread-local table). This is because
163 // this entry never includes a `Ident` object.
Michael Layzell69cf9082017-06-03 12:15:58 -0400164 //
165 // This wrapper struct allows us to break the rules and put a `Sync`
166 // object in global storage.
167 struct UnsafeSyncEntry(Entry);
168 unsafe impl Sync for UnsafeSyncEntry {}
David Tolnay51382052017-12-27 13:46:21 -0500169 static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry));
Michael Layzell69cf9082017-06-03 12:15:58 -0400170
Michael Layzell2a60e252017-05-31 21:36:47 -0400171 Cursor {
Michael Layzell69cf9082017-06-03 12:15:58 -0400172 ptr: &EMPTY_ENTRY.0,
173 scope: &EMPTY_ENTRY.0,
Michael Layzell2a60e252017-05-31 21:36:47 -0400174 marker: PhantomData,
175 }
176 }
177
178 /// This create method intelligently exits non-explicitly-entered
179 /// `None`-delimited scopes when the cursor reaches the end of them,
180 /// allowing for them to be treated transparently.
181 unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
182 // NOTE: If we're looking at a `End(..)`, we want to advance the cursor
183 // past it, unless `ptr == scope`, which means that we're at the edge of
184 // our cursor's scope. We should only have `ptr != scope` at the exit
David Tolnayc10676a2017-12-27 23:42:36 -0500185 // from None-delimited groups entered with `ignore_none`.
Michael Layzell2a60e252017-05-31 21:36:47 -0400186 while let Entry::End(exit) = *ptr {
187 if ptr == scope {
188 break;
189 }
190 ptr = exit;
191 }
192
193 Cursor {
194 ptr: ptr,
195 scope: scope,
196 marker: PhantomData,
197 }
198 }
199
200 /// Get the current entry.
201 fn entry(self) -> &'a Entry {
202 unsafe { &*self.ptr }
203 }
204
205 /// Bump the cursor to point at the next token after the current one. This
206 /// is undefined behavior if the cursor is currently looking at an
207 /// `Entry::End`.
208 unsafe fn bump(self) -> Cursor<'a> {
209 Cursor::create(self.ptr.offset(1), self.scope)
210 }
211
David Tolnayc10676a2017-12-27 23:42:36 -0500212 /// If the cursor is looking at a `None`-delimited group, move it to look at
213 /// the first token inside instead. If the group is empty, this will move
214 /// the cursor past the `None`-delimited group.
Michael Layzell2a60e252017-05-31 21:36:47 -0400215 ///
216 /// WARNING: This mutates its argument.
217 fn ignore_none(&mut self) {
David Tolnay582a45e2018-09-08 17:56:31 -0700218 if let Entry::Group(ref group, ref buf) = *self.entry() {
219 if group.delimiter() == Delimiter::None {
220 // NOTE: We call `Cursor::create` here to make sure that
221 // situations where we should immediately exit the span after
222 // entering it are handled correctly.
223 unsafe {
224 *self = Cursor::create(&buf.data[0], self.scope);
225 }
Michael Layzell2a60e252017-05-31 21:36:47 -0400226 }
227 }
228 }
229
David Tolnay7c3e77d2018-01-06 17:42:53 -0800230 /// Checks whether the cursor is currently pointing at the end of its valid
231 /// scope.
Michael Layzell2a60e252017-05-31 21:36:47 -0400232 #[inline]
233 pub fn eof(self) -> bool {
234 // We're at eof if we're at the end of our scope.
235 self.ptr == self.scope
236 }
237
David Tolnay7c3e77d2018-01-06 17:42:53 -0800238 /// If the cursor is pointing at a `Group` with the given delimiter, returns
239 /// a cursor into that group and one pointing to the next `TokenTree`.
David Tolnay65729482017-12-31 16:14:50 -0500240 pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> {
David Tolnayc10676a2017-12-27 23:42:36 -0500241 // If we're not trying to enter a none-delimited group, we want to
Michael Layzell2a60e252017-05-31 21:36:47 -0400242 // ignore them. We have to make sure to _not_ ignore them when we want
243 // to enter them, of course. For obvious reasons.
David Tolnayc10676a2017-12-27 23:42:36 -0500244 if delim != Delimiter::None {
Michael Layzell2a60e252017-05-31 21:36:47 -0400245 self.ignore_none();
246 }
247
David Tolnay582a45e2018-09-08 17:56:31 -0700248 if let Entry::Group(ref group, ref buf) = *self.entry() {
249 if group.delimiter() == delim {
250 return Some((buf.begin(), group.span(), unsafe { self.bump() }));
Michael Layzell2a60e252017-05-31 21:36:47 -0400251 }
Michael Layzell2a60e252017-05-31 21:36:47 -0400252 }
David Tolnayc10676a2017-12-27 23:42:36 -0500253
254 None
Michael Layzell2a60e252017-05-31 21:36:47 -0400255 }
256
Alex Crichtona74a1c82018-05-16 10:20:44 -0700257 /// If the cursor is pointing at a `Ident`, returns it along with a cursor
David Tolnay7c3e77d2018-01-06 17:42:53 -0800258 /// pointing at the next `TokenTree`.
David Tolnay55a5f3a2018-05-20 18:00:51 -0700259 pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400260 self.ignore_none();
261 match *self.entry() {
David Tolnaya4319b72018-06-02 00:49:15 -0700262 Entry::Ident(ref ident) => Some((ident.clone(), unsafe { self.bump() })),
David Tolnay51382052017-12-27 13:46:21 -0500263 _ => None,
Michael Layzell2a60e252017-05-31 21:36:47 -0400264 }
265 }
266
Alex Crichtona74a1c82018-05-16 10:20:44 -0700267 /// If the cursor is pointing at an `Punct`, returns it along with a cursor
David Tolnay7c3e77d2018-01-06 17:42:53 -0800268 /// pointing at the next `TokenTree`.
David Tolnay55a5f3a2018-05-20 18:00:51 -0700269 pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400270 self.ignore_none();
271 match *self.entry() {
David Tolnay66cb0c42018-08-31 09:01:30 -0700272 Entry::Punct(ref op) if op.as_char() != '\'' => {
273 Some((op.clone(), unsafe { self.bump() }))
274 }
David Tolnay51382052017-12-27 13:46:21 -0500275 _ => None,
Michael Layzell2a60e252017-05-31 21:36:47 -0400276 }
277 }
278
David Tolnay7c3e77d2018-01-06 17:42:53 -0800279 /// If the cursor is pointing at a `Literal`, return it along with a cursor
280 /// pointing at the next `TokenTree`.
Alex Crichton9a4dca22018-03-28 06:32:19 -0700281 pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400282 self.ignore_none();
283 match *self.entry() {
Alex Crichton9a4dca22018-03-28 06:32:19 -0700284 Entry::Literal(ref lit) => Some((lit.clone(), unsafe { self.bump() })),
David Tolnay51382052017-12-27 13:46:21 -0500285 _ => None,
Michael Layzell2a60e252017-05-31 21:36:47 -0400286 }
287 }
288
David Tolnay66cb0c42018-08-31 09:01:30 -0700289 /// If the cursor is pointing at a `Lifetime`, returns it along with a
290 /// cursor pointing at the next `TokenTree`.
291 pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
292 self.ignore_none();
293 match *self.entry() {
294 Entry::Punct(ref op) if op.as_char() == '\'' && op.spacing() == Spacing::Joint => {
295 let next = unsafe { self.bump() };
296 match next.ident() {
297 Some((ident, rest)) => {
298 let lifetime = Lifetime {
299 apostrophe: op.span(),
300 ident: ident,
301 };
302 Some((lifetime, rest))
303 }
304 None => None,
305 }
306 }
307 _ => None,
308 }
309 }
310
David Tolnay7c3e77d2018-01-06 17:42:53 -0800311 /// Copies all remaining tokens visible from this cursor into a
312 /// `TokenStream`.
Michael Layzell2a60e252017-05-31 21:36:47 -0400313 pub fn token_stream(self) -> TokenStream {
314 let mut tts = Vec::new();
315 let mut cursor = self;
David Tolnay65729482017-12-31 16:14:50 -0500316 while let Some((tt, rest)) = cursor.token_tree() {
Michael Layzell2a60e252017-05-31 21:36:47 -0400317 tts.push(tt);
David Tolnay65729482017-12-31 16:14:50 -0500318 cursor = rest;
Michael Layzell2a60e252017-05-31 21:36:47 -0400319 }
320 tts.into_iter().collect()
321 }
322
David Tolnay7c3e77d2018-01-06 17:42:53 -0800323 /// If the cursor is pointing at a `TokenTree`, returns it along with a
324 /// cursor pointing at the next `TokenTree`.
Michael Layzell2a60e252017-05-31 21:36:47 -0400325 ///
David Tolnay7c3e77d2018-01-06 17:42:53 -0800326 /// Returns `None` if the cursor has reached the end of its stream.
327 ///
328 /// This method does not treat `None`-delimited groups as transparent, and
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700329 /// will return a `Group(None, ..)` if the cursor is looking at one.
David Tolnay65729482017-12-31 16:14:50 -0500330 pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400331 let tree = match *self.entry() {
David Tolnay582a45e2018-09-08 17:56:31 -0700332 Entry::Group(ref group, _) => group.clone().into(),
Alex Crichton9a4dca22018-03-28 06:32:19 -0700333 Entry::Literal(ref lit) => lit.clone().into(),
David Tolnaya4319b72018-06-02 00:49:15 -0700334 Entry::Ident(ref ident) => ident.clone().into(),
Alex Crichtona74a1c82018-05-16 10:20:44 -0700335 Entry::Punct(ref op) => op.clone().into(),
Michael Layzell2a60e252017-05-31 21:36:47 -0400336 Entry::End(..) => {
337 return None;
338 }
339 };
340
David Tolnay65729482017-12-31 16:14:50 -0500341 Some((tree, unsafe { self.bump() }))
Michael Layzell2a60e252017-05-31 21:36:47 -0400342 }
David Tolnay225efa22017-12-31 16:51:29 -0500343
344 /// Returns the `Span` of the current token, or `Span::call_site()` if this
345 /// cursor points to eof.
346 pub fn span(self) -> Span {
347 match *self.entry() {
David Tolnay582a45e2018-09-08 17:56:31 -0700348 Entry::Group(ref group, _) => group.span(),
Alex Crichton9a4dca22018-03-28 06:32:19 -0700349 Entry::Literal(ref l) => l.span(),
Alex Crichtona74a1c82018-05-16 10:20:44 -0700350 Entry::Ident(ref t) => t.span(),
351 Entry::Punct(ref o) => o.span(),
David Tolnay225efa22017-12-31 16:51:29 -0500352 Entry::End(..) => Span::call_site(),
353 }
354 }
Michael Layzell2a60e252017-05-31 21:36:47 -0400355}