blob: 0ed3bb4d9076404101023418ddf770f3ac58ccbf [file] [log] [blame]
Michael Layzell2a60e252017-05-31 21:36:47 -04001//! This module defines a cheaply-copyable cursor into a TokenStream's data.
2//!
3//! It does this by copying the data into a stably-addressed structured buffer,
4//! and holding raw pointers into that buffer to allow walking through delimited
5//! sequences cheaply.
6//!
7//! This module is heavily commented as it contains the only unsafe code in
8//! `syn`, and caution should be made when editing it. It provides a safe
9//! interface, but is fragile internally.
10
11use proc_macro2::*;
12
13use std::ptr;
14use std::fmt;
15use std::marker::PhantomData;
16
17/// Internal type which is used instead of `TokenTree` to represent a single
18/// `TokenTree` within a `SynomBuffer`.
19#[derive(Debug)]
20enum Entry {
21 /// Mimicing types from proc-macro.
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070022 Group(Span, Delimiter, SynomBuffer),
23 Term(Span, Term),
24 Op(Span, char, Spacing),
Michael Layzell2a60e252017-05-31 21:36:47 -040025 Literal(Span, Literal),
26 /// End entries contain a raw pointer to the entry from the containing
27 /// TokenTree.
28 End(*const Entry),
29}
30
Michael Layzell2a60e252017-05-31 21:36:47 -040031/// A buffer of data which contains a structured representation of the input
32/// `TokenStream` object.
33#[derive(Debug)]
34pub struct SynomBuffer {
35 // NOTE: Do not derive clone on this - there are raw pointers inside which
36 // will be messed up. Moving the `SynomBuffer` itself is safe as the actual
37 // backing slices won't be moved.
38 data: Box<[Entry]>,
39}
40
41impl SynomBuffer {
42 // NOTE: DO NOT MUTATE THE `Vec` RETURNED FROM THIS FUNCTION ONCE IT
43 // RETURNS, THE ADDRESS OF ITS BACKING MEMORY MUST REMAIN STABLE.
44 fn inner_new(stream: TokenStream, up: *const Entry) -> SynomBuffer {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070045 // Build up the entries list, recording the locations of any Groups
Michael Layzell2a60e252017-05-31 21:36:47 -040046 // in the list to be processed later.
47 let mut entries = Vec::new();
48 let mut seqs = Vec::new();
David Tolnay50fa4682017-12-26 23:17:22 -050049 for tt in stream {
Michael Layzell2a60e252017-05-31 21:36:47 -040050 match tt.kind {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070051 TokenNode::Term(sym) => {
52 entries.push(Entry::Term(tt.span, sym));
Michael Layzell2a60e252017-05-31 21:36:47 -040053 }
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070054 TokenNode::Op(chr, ok) => {
Michael Layzell2a60e252017-05-31 21:36:47 -040055 entries.push(Entry::Op(tt.span, chr, ok));
56 }
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070057 TokenNode::Literal(lit) => {
Michael Layzell2a60e252017-05-31 21:36:47 -040058 entries.push(Entry::Literal(tt.span, lit));
59 }
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070060 TokenNode::Group(delim, seq_stream) => {
Michael Layzell2a60e252017-05-31 21:36:47 -040061 // Record the index of the interesting entry, and store an
62 // `End(null)` there temporarially.
63 seqs.push((entries.len(), tt.span, delim, seq_stream));
64 entries.push(Entry::End(ptr::null()));
65 }
66 }
67 }
68 // Add an `End` entry to the end with a reference to the enclosing token
69 // stream which was passed in.
70 entries.push(Entry::End(up));
71
72 // NOTE: This is done to ensure that we don't accidentally modify the
73 // length of the backing buffer. The backing buffer must remain at a
74 // constant address after this point, as we are going to store a raw
75 // pointer into it.
76 let mut entries = entries.into_boxed_slice();
77 for (idx, span, delim, seq_stream) in seqs {
78 // We know that this index refers to one of the temporary
79 // `End(null)` entries, and we know that the last entry is
80 // `End(up)`, so the next index is also valid.
81 let seq_up = &entries[idx + 1] as *const Entry;
82
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070083 // The end entry stored at the end of this Entry::Group should
84 // point to the Entry which follows the Group in the list.
Michael Layzell2a60e252017-05-31 21:36:47 -040085 let inner = Self::inner_new(seq_stream, seq_up);
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -070086 entries[idx] = Entry::Group(span, delim, inner);
Michael Layzell2a60e252017-05-31 21:36:47 -040087 }
88
89 SynomBuffer {
90 data: entries
91 }
92 }
93
94 /// Create a new SynomBuffer, which contains the data from the given
95 /// TokenStream.
96 pub fn new(stream: TokenStream) -> SynomBuffer {
97 Self::inner_new(stream, ptr::null())
98 }
99
100 /// Create a cursor referencing the first token in the input.
101 pub fn begin(&self) -> Cursor {
102 unsafe {
103 Cursor::create(&self.data[0], &self.data[self.data.len() - 1])
104 }
105 }
106}
107
108#[derive(Copy, Clone, Debug)]
109pub struct SeqInfo<'a> {
110 pub span: Span,
111 pub inside: Cursor<'a>,
112 pub outside: Cursor<'a>,
113}
114
115/// A cursor into an input `TokenStream`'s data. This cursor holds a reference
116/// into the immutable data which is used internally to represent a
117/// `TokenStream`, and can be efficiently manipulated and copied around.
118///
119/// An empty `Cursor` can be created directly, or one may create a `SynomBuffer`
120/// object and get a cursor to its first token with `begin()`.
121///
122/// Two cursors are equal if they have the same location in the same input
123/// stream, and have the same scope.
124#[derive(Copy, Clone, Eq, PartialEq)]
125pub struct Cursor<'a> {
126 /// The current entry which the `Cursor` is pointing at.
127 ptr: *const Entry,
128 /// This is the only `Entry::End(..)` object which this cursor is allowed to
129 /// point at. All other `End` objects are skipped over in `Cursor::create`.
130 scope: *const Entry,
131 /// This uses the &'a reference which guarantees that these pointers are
132 /// still valid.
133 marker: PhantomData<&'a Entry>,
134}
135
Michael Layzell2a60e252017-05-31 21:36:47 -0400136impl<'a> Cursor<'a> {
137 /// Create a cursor referencing a static empty TokenStream.
138 pub fn empty() -> Self {
Michael Layzell69cf9082017-06-03 12:15:58 -0400139 // It's safe in this situation for us to put an `Entry` object in global
140 // storage, despite it not actually being safe to send across threads
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700141 // (`Term` is a reference into a thread-local table). This is because
142 // this entry never includes a `Term` object.
Michael Layzell69cf9082017-06-03 12:15:58 -0400143 //
144 // This wrapper struct allows us to break the rules and put a `Sync`
145 // object in global storage.
146 struct UnsafeSyncEntry(Entry);
147 unsafe impl Sync for UnsafeSyncEntry {}
148 static EMPTY_ENTRY: UnsafeSyncEntry =
149 UnsafeSyncEntry(Entry::End(0 as *const Entry));
150
Michael Layzell2a60e252017-05-31 21:36:47 -0400151 Cursor {
Michael Layzell69cf9082017-06-03 12:15:58 -0400152 ptr: &EMPTY_ENTRY.0,
153 scope: &EMPTY_ENTRY.0,
Michael Layzell2a60e252017-05-31 21:36:47 -0400154 marker: PhantomData,
155 }
156 }
157
158 /// This create method intelligently exits non-explicitly-entered
159 /// `None`-delimited scopes when the cursor reaches the end of them,
160 /// allowing for them to be treated transparently.
161 unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
162 // NOTE: If we're looking at a `End(..)`, we want to advance the cursor
163 // past it, unless `ptr == scope`, which means that we're at the edge of
164 // our cursor's scope. We should only have `ptr != scope` at the exit
165 // from None-delimited sequences entered with `ignore_none`.
166 while let Entry::End(exit) = *ptr {
167 if ptr == scope {
168 break;
169 }
170 ptr = exit;
171 }
172
173 Cursor {
174 ptr: ptr,
175 scope: scope,
176 marker: PhantomData,
177 }
178 }
179
180 /// Get the current entry.
181 fn entry(self) -> &'a Entry {
182 unsafe { &*self.ptr }
183 }
184
185 /// Bump the cursor to point at the next token after the current one. This
186 /// is undefined behavior if the cursor is currently looking at an
187 /// `Entry::End`.
188 unsafe fn bump(self) -> Cursor<'a> {
189 Cursor::create(self.ptr.offset(1), self.scope)
190 }
191
192 /// If the cursor is looking at a `None`-delimited sequence, move it to look
193 /// at the first token inside instead. If the sequence is empty, this will
194 /// move the cursor past the `None`-delimited sequence.
195 ///
196 /// WARNING: This mutates its argument.
197 fn ignore_none(&mut self) {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700198 if let Entry::Group(_, Delimiter::None, ref buf) = *self.entry() {
Michael Layzell2a60e252017-05-31 21:36:47 -0400199 // NOTE: We call `Cursor::create` here to make sure that situations
200 // where we should immediately exit the span after entering it are
201 // handled correctly.
202 unsafe {
203 *self = Cursor::create(&buf.data[0], self.scope);
204 }
205 }
206 }
207
208 /// Check if the cursor is currently pointing at the end of its valid scope.
209 #[inline]
210 pub fn eof(self) -> bool {
211 // We're at eof if we're at the end of our scope.
212 self.ptr == self.scope
213 }
214
215 /// If the cursor is pointing at a Seq with the given `Delimiter`, return a
216 /// cursor into that sequence, and one pointing to the next `TokenTree`.
217 pub fn seq(mut self, seq_delim: Delimiter) -> Option<SeqInfo<'a>> {
218 // If we're not trying to enter a none-delimited sequence, we want to
219 // ignore them. We have to make sure to _not_ ignore them when we want
220 // to enter them, of course. For obvious reasons.
221 if seq_delim != Delimiter::None {
222 self.ignore_none();
223 }
224
225 match *self.entry() {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700226 Entry::Group(span, delim, ref buf) => {
Michael Layzell2a60e252017-05-31 21:36:47 -0400227 if delim != seq_delim {
228 return None;
229 }
230
231 Some(SeqInfo {
232 span: span,
233 inside: buf.begin(),
234 outside: unsafe { self.bump() },
235 })
236 }
237 _ => None
238 }
239 }
240
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700241 /// If the cursor is pointing at a Term, return it and a cursor pointing at
Michael Layzell2a60e252017-05-31 21:36:47 -0400242 /// the next `TokenTree`.
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700243 pub fn word(mut self) -> Option<(Cursor<'a>, Span, Term)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400244 self.ignore_none();
245 match *self.entry() {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700246 Entry::Term(span, sym) => {
Michael Layzell2a60e252017-05-31 21:36:47 -0400247 Some((
248 unsafe { self.bump() },
249 span,
250 sym,
251 ))
252 }
253 _ => None
254 }
255 }
256
257 /// If the cursor is pointing at an Op, return it and a cursor pointing
258 /// at the next `TokenTree`.
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700259 pub fn op(mut self) -> Option<(Cursor<'a>, Span, char, Spacing)> {
Michael Layzell2a60e252017-05-31 21:36:47 -0400260 self.ignore_none();
261 match *self.entry() {
262 Entry::Op(span, chr, kind) => {
263 Some((
264 unsafe { self.bump() },
265 span,
266 chr,
267 kind,
268 ))
269 }
270 _ => None
271 }
272 }
273
274 /// If the cursor is pointing at a Literal, return it and a cursor pointing
275 /// at the next `TokenTree`.
276 pub fn literal(mut self) -> Option<(Cursor<'a>, Span, Literal)> {
277 self.ignore_none();
278 match *self.entry() {
279 Entry::Literal(span, ref lit) => {
280 Some((
281 unsafe { self.bump() },
282 span,
283 lit.clone(),
284 ))
285 }
286 _ => None
287 }
288 }
289
290 /// Copy all remaining tokens visible from this cursor into a `TokenStream`.
291 pub fn token_stream(self) -> TokenStream {
292 let mut tts = Vec::new();
293 let mut cursor = self;
294 while let Some((next, tt)) = cursor.token_tree() {
295 tts.push(tt);
296 cursor = next;
297 }
298 tts.into_iter().collect()
299 }
300
301 /// If the cursor is looking at a `TokenTree`, returns it along with a
302 /// cursor pointing to the next token in the sequence, otherwise returns
303 /// `None`.
304 ///
305 /// This method does not treat `None`-delimited sequences as invisible, and
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700306 /// will return a `Group(None, ..)` if the cursor is looking at one.
Michael Layzell2a60e252017-05-31 21:36:47 -0400307 pub fn token_tree(self) -> Option<(Cursor<'a>, TokenTree)> {
308 let tree = match *self.entry() {
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700309 Entry::Group(span, delim, ref buf) => {
Michael Layzell2a60e252017-05-31 21:36:47 -0400310 let stream = buf.begin().token_stream();
311 TokenTree {
312 span: span,
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700313 kind: TokenNode::Group(delim, stream),
Michael Layzell2a60e252017-05-31 21:36:47 -0400314 }
315 }
316 Entry::Literal(span, ref lit) => {
317 TokenTree {
318 span: span,
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700319 kind: TokenNode::Literal(lit.clone()),
Michael Layzell2a60e252017-05-31 21:36:47 -0400320 }
321 }
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700322 Entry::Term(span, sym) => {
Michael Layzell2a60e252017-05-31 21:36:47 -0400323 TokenTree {
324 span: span,
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700325 kind: TokenNode::Term(sym),
Michael Layzell2a60e252017-05-31 21:36:47 -0400326 }
327 }
328 Entry::Op(span, chr, kind) => {
329 TokenTree {
330 span: span,
Alex Crichtonf9e8f1a2017-07-05 18:20:44 -0700331 kind: TokenNode::Op(chr, kind),
Michael Layzell2a60e252017-05-31 21:36:47 -0400332 }
333 }
334 Entry::End(..) => {
335 return None;
336 }
337 };
338
339 Some((
340 unsafe { self.bump() },
341 tree
342 ))
343 }
344}
345
346// We do a custom implementation for `Debug` as the default implementation is
347// pretty useless.
348impl<'a> fmt::Debug for Cursor<'a> {
349 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Nika Layzellae81b372017-12-05 14:12:33 -0500350 // Print what the cursor is currently looking at.
351 // This will look like Cursor("some remaining tokens here")
352 f.debug_tuple("Cursor")
353 .field(&self.token_stream().to_string())
Michael Layzell2a60e252017-05-31 21:36:47 -0400354 .finish()
355 }
356}