Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 1 | //! This module defines a cheaply-copyable cursor into a TokenStream's data. |
| 2 | //! |
| 3 | //! It does this by copying the data into a stably-addressed structured buffer, |
| 4 | //! and holding raw pointers into that buffer to allow walking through delimited |
| 5 | //! sequences cheaply. |
| 6 | //! |
| 7 | //! This module is heavily commented as it contains the only unsafe code in |
| 8 | //! `syn`, and caution should be made when editing it. It provides a safe |
| 9 | //! interface, but is fragile internally. |
| 10 | |
| 11 | use proc_macro2::*; |
| 12 | |
| 13 | use std::ptr; |
| 14 | use std::fmt; |
| 15 | use std::marker::PhantomData; |
| 16 | |
| 17 | /// Internal type which is used instead of `TokenTree` to represent a single |
| 18 | /// `TokenTree` within a `SynomBuffer`. |
| 19 | #[derive(Debug)] |
| 20 | enum Entry { |
| 21 | /// Mimicing types from proc-macro. |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 22 | Group(Span, Delimiter, SynomBuffer), |
| 23 | Term(Span, Term), |
| 24 | Op(Span, char, Spacing), |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 25 | Literal(Span, Literal), |
| 26 | /// End entries contain a raw pointer to the entry from the containing |
| 27 | /// TokenTree. |
| 28 | End(*const Entry), |
| 29 | } |
| 30 | |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 31 | /// A buffer of data which contains a structured representation of the input |
| 32 | /// `TokenStream` object. |
| 33 | #[derive(Debug)] |
| 34 | pub struct SynomBuffer { |
| 35 | // NOTE: Do not derive clone on this - there are raw pointers inside which |
| 36 | // will be messed up. Moving the `SynomBuffer` itself is safe as the actual |
| 37 | // backing slices won't be moved. |
| 38 | data: Box<[Entry]>, |
| 39 | } |
| 40 | |
| 41 | impl SynomBuffer { |
| 42 | // NOTE: DO NOT MUTATE THE `Vec` RETURNED FROM THIS FUNCTION ONCE IT |
| 43 | // RETURNS, THE ADDRESS OF ITS BACKING MEMORY MUST REMAIN STABLE. |
| 44 | fn inner_new(stream: TokenStream, up: *const Entry) -> SynomBuffer { |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 45 | // Build up the entries list, recording the locations of any Groups |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 46 | // in the list to be processed later. |
| 47 | let mut entries = Vec::new(); |
| 48 | let mut seqs = Vec::new(); |
| 49 | for tt in stream.into_iter() { |
| 50 | match tt.kind { |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 51 | TokenNode::Term(sym) => { |
| 52 | entries.push(Entry::Term(tt.span, sym)); |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 53 | } |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 54 | TokenNode::Op(chr, ok) => { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 55 | entries.push(Entry::Op(tt.span, chr, ok)); |
| 56 | } |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 57 | TokenNode::Literal(lit) => { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 58 | entries.push(Entry::Literal(tt.span, lit)); |
| 59 | } |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 60 | TokenNode::Group(delim, seq_stream) => { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 61 | // Record the index of the interesting entry, and store an |
| 62 | // `End(null)` there temporarially. |
| 63 | seqs.push((entries.len(), tt.span, delim, seq_stream)); |
| 64 | entries.push(Entry::End(ptr::null())); |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | // Add an `End` entry to the end with a reference to the enclosing token |
| 69 | // stream which was passed in. |
| 70 | entries.push(Entry::End(up)); |
| 71 | |
| 72 | // NOTE: This is done to ensure that we don't accidentally modify the |
| 73 | // length of the backing buffer. The backing buffer must remain at a |
| 74 | // constant address after this point, as we are going to store a raw |
| 75 | // pointer into it. |
| 76 | let mut entries = entries.into_boxed_slice(); |
| 77 | for (idx, span, delim, seq_stream) in seqs { |
| 78 | // We know that this index refers to one of the temporary |
| 79 | // `End(null)` entries, and we know that the last entry is |
| 80 | // `End(up)`, so the next index is also valid. |
| 81 | let seq_up = &entries[idx + 1] as *const Entry; |
| 82 | |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 83 | // The end entry stored at the end of this Entry::Group should |
| 84 | // point to the Entry which follows the Group in the list. |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 85 | let inner = Self::inner_new(seq_stream, seq_up); |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 86 | entries[idx] = Entry::Group(span, delim, inner); |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 87 | } |
| 88 | |
| 89 | SynomBuffer { |
| 90 | data: entries |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | /// Create a new SynomBuffer, which contains the data from the given |
| 95 | /// TokenStream. |
| 96 | pub fn new(stream: TokenStream) -> SynomBuffer { |
| 97 | Self::inner_new(stream, ptr::null()) |
| 98 | } |
| 99 | |
| 100 | /// Create a cursor referencing the first token in the input. |
| 101 | pub fn begin(&self) -> Cursor { |
| 102 | unsafe { |
| 103 | Cursor::create(&self.data[0], &self.data[self.data.len() - 1]) |
| 104 | } |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | #[derive(Copy, Clone, Debug)] |
| 109 | pub struct SeqInfo<'a> { |
| 110 | pub span: Span, |
| 111 | pub inside: Cursor<'a>, |
| 112 | pub outside: Cursor<'a>, |
| 113 | } |
| 114 | |
| 115 | /// A cursor into an input `TokenStream`'s data. This cursor holds a reference |
| 116 | /// into the immutable data which is used internally to represent a |
| 117 | /// `TokenStream`, and can be efficiently manipulated and copied around. |
| 118 | /// |
| 119 | /// An empty `Cursor` can be created directly, or one may create a `SynomBuffer` |
| 120 | /// object and get a cursor to its first token with `begin()`. |
| 121 | /// |
| 122 | /// Two cursors are equal if they have the same location in the same input |
| 123 | /// stream, and have the same scope. |
| 124 | #[derive(Copy, Clone, Eq, PartialEq)] |
| 125 | pub struct Cursor<'a> { |
| 126 | /// The current entry which the `Cursor` is pointing at. |
| 127 | ptr: *const Entry, |
| 128 | /// This is the only `Entry::End(..)` object which this cursor is allowed to |
| 129 | /// point at. All other `End` objects are skipped over in `Cursor::create`. |
| 130 | scope: *const Entry, |
| 131 | /// This uses the &'a reference which guarantees that these pointers are |
| 132 | /// still valid. |
| 133 | marker: PhantomData<&'a Entry>, |
| 134 | } |
| 135 | |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 136 | impl<'a> Cursor<'a> { |
| 137 | /// Create a cursor referencing a static empty TokenStream. |
| 138 | pub fn empty() -> Self { |
Michael Layzell | 69cf908 | 2017-06-03 12:15:58 -0400 | [diff] [blame] | 139 | // It's safe in this situation for us to put an `Entry` object in global |
| 140 | // storage, despite it not actually being safe to send across threads |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 141 | // (`Term` is a reference into a thread-local table). This is because |
| 142 | // this entry never includes a `Term` object. |
Michael Layzell | 69cf908 | 2017-06-03 12:15:58 -0400 | [diff] [blame] | 143 | // |
| 144 | // This wrapper struct allows us to break the rules and put a `Sync` |
| 145 | // object in global storage. |
| 146 | struct UnsafeSyncEntry(Entry); |
| 147 | unsafe impl Sync for UnsafeSyncEntry {} |
| 148 | static EMPTY_ENTRY: UnsafeSyncEntry = |
| 149 | UnsafeSyncEntry(Entry::End(0 as *const Entry)); |
| 150 | |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 151 | Cursor { |
Michael Layzell | 69cf908 | 2017-06-03 12:15:58 -0400 | [diff] [blame] | 152 | ptr: &EMPTY_ENTRY.0, |
| 153 | scope: &EMPTY_ENTRY.0, |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 154 | marker: PhantomData, |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | /// This create method intelligently exits non-explicitly-entered |
| 159 | /// `None`-delimited scopes when the cursor reaches the end of them, |
| 160 | /// allowing for them to be treated transparently. |
| 161 | unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self { |
| 162 | // NOTE: If we're looking at a `End(..)`, we want to advance the cursor |
| 163 | // past it, unless `ptr == scope`, which means that we're at the edge of |
| 164 | // our cursor's scope. We should only have `ptr != scope` at the exit |
| 165 | // from None-delimited sequences entered with `ignore_none`. |
| 166 | while let Entry::End(exit) = *ptr { |
| 167 | if ptr == scope { |
| 168 | break; |
| 169 | } |
| 170 | ptr = exit; |
| 171 | } |
| 172 | |
| 173 | Cursor { |
| 174 | ptr: ptr, |
| 175 | scope: scope, |
| 176 | marker: PhantomData, |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | /// Get the current entry. |
| 181 | fn entry(self) -> &'a Entry { |
| 182 | unsafe { &*self.ptr } |
| 183 | } |
| 184 | |
| 185 | /// Bump the cursor to point at the next token after the current one. This |
| 186 | /// is undefined behavior if the cursor is currently looking at an |
| 187 | /// `Entry::End`. |
| 188 | unsafe fn bump(self) -> Cursor<'a> { |
| 189 | Cursor::create(self.ptr.offset(1), self.scope) |
| 190 | } |
| 191 | |
| 192 | /// If the cursor is looking at a `None`-delimited sequence, move it to look |
| 193 | /// at the first token inside instead. If the sequence is empty, this will |
| 194 | /// move the cursor past the `None`-delimited sequence. |
| 195 | /// |
| 196 | /// WARNING: This mutates its argument. |
| 197 | fn ignore_none(&mut self) { |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 198 | if let Entry::Group(_, Delimiter::None, ref buf) = *self.entry() { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 199 | // NOTE: We call `Cursor::create` here to make sure that situations |
| 200 | // where we should immediately exit the span after entering it are |
| 201 | // handled correctly. |
| 202 | unsafe { |
| 203 | *self = Cursor::create(&buf.data[0], self.scope); |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | /// Check if the cursor is currently pointing at the end of its valid scope. |
| 209 | #[inline] |
| 210 | pub fn eof(self) -> bool { |
| 211 | // We're at eof if we're at the end of our scope. |
| 212 | self.ptr == self.scope |
| 213 | } |
| 214 | |
| 215 | /// If the cursor is pointing at a Seq with the given `Delimiter`, return a |
| 216 | /// cursor into that sequence, and one pointing to the next `TokenTree`. |
| 217 | pub fn seq(mut self, seq_delim: Delimiter) -> Option<SeqInfo<'a>> { |
| 218 | // If we're not trying to enter a none-delimited sequence, we want to |
| 219 | // ignore them. We have to make sure to _not_ ignore them when we want |
| 220 | // to enter them, of course. For obvious reasons. |
| 221 | if seq_delim != Delimiter::None { |
| 222 | self.ignore_none(); |
| 223 | } |
| 224 | |
| 225 | match *self.entry() { |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 226 | Entry::Group(span, delim, ref buf) => { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 227 | if delim != seq_delim { |
| 228 | return None; |
| 229 | } |
| 230 | |
| 231 | Some(SeqInfo { |
| 232 | span: span, |
| 233 | inside: buf.begin(), |
| 234 | outside: unsafe { self.bump() }, |
| 235 | }) |
| 236 | } |
| 237 | _ => None |
| 238 | } |
| 239 | } |
| 240 | |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 241 | /// If the cursor is pointing at a Term, return it and a cursor pointing at |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 242 | /// the next `TokenTree`. |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 243 | pub fn word(mut self) -> Option<(Cursor<'a>, Span, Term)> { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 244 | self.ignore_none(); |
| 245 | match *self.entry() { |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 246 | Entry::Term(span, sym) => { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 247 | Some(( |
| 248 | unsafe { self.bump() }, |
| 249 | span, |
| 250 | sym, |
| 251 | )) |
| 252 | } |
| 253 | _ => None |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | /// If the cursor is pointing at an Op, return it and a cursor pointing |
| 258 | /// at the next `TokenTree`. |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 259 | pub fn op(mut self) -> Option<(Cursor<'a>, Span, char, Spacing)> { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 260 | self.ignore_none(); |
| 261 | match *self.entry() { |
| 262 | Entry::Op(span, chr, kind) => { |
| 263 | Some(( |
| 264 | unsafe { self.bump() }, |
| 265 | span, |
| 266 | chr, |
| 267 | kind, |
| 268 | )) |
| 269 | } |
| 270 | _ => None |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | /// If the cursor is pointing at a Literal, return it and a cursor pointing |
| 275 | /// at the next `TokenTree`. |
| 276 | pub fn literal(mut self) -> Option<(Cursor<'a>, Span, Literal)> { |
| 277 | self.ignore_none(); |
| 278 | match *self.entry() { |
| 279 | Entry::Literal(span, ref lit) => { |
| 280 | Some(( |
| 281 | unsafe { self.bump() }, |
| 282 | span, |
| 283 | lit.clone(), |
| 284 | )) |
| 285 | } |
| 286 | _ => None |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | /// Copy all remaining tokens visible from this cursor into a `TokenStream`. |
| 291 | pub fn token_stream(self) -> TokenStream { |
| 292 | let mut tts = Vec::new(); |
| 293 | let mut cursor = self; |
| 294 | while let Some((next, tt)) = cursor.token_tree() { |
| 295 | tts.push(tt); |
| 296 | cursor = next; |
| 297 | } |
| 298 | tts.into_iter().collect() |
| 299 | } |
| 300 | |
| 301 | /// If the cursor is looking at a `TokenTree`, returns it along with a |
| 302 | /// cursor pointing to the next token in the sequence, otherwise returns |
| 303 | /// `None`. |
| 304 | /// |
| 305 | /// This method does not treat `None`-delimited sequences as invisible, and |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 306 | /// will return a `Group(None, ..)` if the cursor is looking at one. |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 307 | pub fn token_tree(self) -> Option<(Cursor<'a>, TokenTree)> { |
| 308 | let tree = match *self.entry() { |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 309 | Entry::Group(span, delim, ref buf) => { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 310 | let stream = buf.begin().token_stream(); |
| 311 | TokenTree { |
| 312 | span: span, |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 313 | kind: TokenNode::Group(delim, stream), |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 314 | } |
| 315 | } |
| 316 | Entry::Literal(span, ref lit) => { |
| 317 | TokenTree { |
| 318 | span: span, |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 319 | kind: TokenNode::Literal(lit.clone()), |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 320 | } |
| 321 | } |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 322 | Entry::Term(span, sym) => { |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 323 | TokenTree { |
| 324 | span: span, |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 325 | kind: TokenNode::Term(sym), |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 326 | } |
| 327 | } |
| 328 | Entry::Op(span, chr, kind) => { |
| 329 | TokenTree { |
| 330 | span: span, |
Alex Crichton | f9e8f1a | 2017-07-05 18:20:44 -0700 | [diff] [blame] | 331 | kind: TokenNode::Op(chr, kind), |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 332 | } |
| 333 | } |
| 334 | Entry::End(..) => { |
| 335 | return None; |
| 336 | } |
| 337 | }; |
| 338 | |
| 339 | Some(( |
| 340 | unsafe { self.bump() }, |
| 341 | tree |
| 342 | )) |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | // We do a custom implementation for `Debug` as the default implementation is |
| 347 | // pretty useless. |
| 348 | impl<'a> fmt::Debug for Cursor<'a> { |
| 349 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
Nika Layzell | ae81b37 | 2017-12-05 14:12:33 -0500 | [diff] [blame] | 350 | // Print what the cursor is currently looking at. |
| 351 | // This will look like Cursor("some remaining tokens here") |
| 352 | f.debug_tuple("Cursor") |
| 353 | .field(&self.token_stream().to_string()) |
Michael Layzell | 2a60e25 | 2017-05-31 21:36:47 -0400 | [diff] [blame] | 354 | .finish() |
| 355 | } |
| 356 | } |