Initial implementation of stable meaningful spans

commit: f8d5f2171a6a66c445d11e8cee46168b93f30bc0 [log] [tgz]
author: Nika Layzell <nika@thelayzells.com> Mon Dec 11 14:07:02 2017 -0500
committer: Nika Layzell <nika@thelayzells.com> Mon Dec 11 14:13:08 2017 -0500
tree: 0464b482433d8bf6f29162b42c4fb7167070c5cc
parent: ea71984b11eff216be73e50f1a3b8a1ad42f4259 [diff] [blame]
diff --git a/src/stable.rs b/src/stable.rs
index 8277d4c..8b951e5 100644
--- a/src/stable.rs
+++ b/src/stable.rs

@@ -1,6 +1,7 @@
 use std::ascii;
 use std::borrow::Borrow;
 use std::cell::RefCell;
+use std::cmp;
 use std::collections::HashMap;
 use std::fmt;
 use std::iter;
@@ -10,9 +11,10 @@
 use std::str::FromStr;
 use std::vec;
 
+use memchr;
 use proc_macro;
 use unicode_xid::UnicodeXID;
-use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
+use strnom::{Cursor, PResult, skip_whitespace, block_comment, whitespace, word_break};
 
 use {TokenTree, TokenNode, Delimiter, Spacing};
 
@@ -38,7 +40,18 @@
     type Err = LexError;
 
     fn from_str(src: &str) -> Result<TokenStream, LexError> {
-        match token_stream(src) {
+        // Create a dummy file & add it to the codemap
+        let cursor = CODEMAP.with(|cm| {
+            let mut cm = cm.borrow_mut();
+            let name = format!("<parsed string {}>", cm.files.len());
+            let span = cm.add_file(&name, src);
+            Cursor {
+                rest: src,
+                off: span.lo,
+            }
+        });
+
+        match token_stream(cursor) {
             Ok((input, output)) => {
                 if skip_whitespace(input).len() != 0 {
                     Err(LexError)
@@ -137,16 +150,188 @@
     }
 }
 
+#[derive(Clone, PartialEq, Eq)]
+pub struct SourceFile {
+    name: String,
+}
+
+impl SourceFile {
+    /// Get the path to this source file as a string.
+    pub fn as_str(&self) -> &str {
+        &self.name
+    }
+
+    pub fn is_real(&self) -> bool {
+        // XXX(nika): Support real files in the future?
+        false
+    }
+}
+
+impl AsRef<str> for SourceFile {
+    fn as_ref(&self) -> &str {
+        self.as_str()
+    }
+}
+
+impl PartialEq<str> for SourceFile {
+    fn eq(&self, other: &str) -> bool {
+        self.as_ref() == other
+    }
+}
+
+impl fmt::Debug for SourceFile {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("SourceFile")
+            .field("path", &self.as_str())
+            .field("is_real", &self.is_real())
+            .finish()
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct LineColumn {
+    pub line: usize,
+    pub column: usize,
+}
+
+thread_local! {
+    static CODEMAP: RefCell<Codemap> = RefCell::new(Codemap {
+        // NOTE: We start with a single dummy file which all call_site() and
+        // def_site() spans reference.
+        files: vec![FileInfo {
+            name: "<unspecified>".to_owned(),
+            span: Span { lo: 0, hi: 0 },
+            lines: vec![0],
+        }],
+    });
+}
+
+struct FileInfo {
+    name: String,
+    span: Span,
+    lines: Vec<usize>,
+}
+
+impl FileInfo {
+    fn offset_line_column(&self, offset: usize) -> LineColumn {
+        assert!(self.span_within(Span { lo: offset as u32, hi: offset as u32 }));
+        let offset = offset - self.span.lo as usize;
+        match self.lines.binary_search(&offset) {
+            Ok(found) => LineColumn {
+                line: found + 1,
+                column: 0
+            },
+            Err(idx) => LineColumn {
+                line: idx,
+                column: offset - self.lines[idx - 1]
+            },
+        }
+    }
+
+    fn span_within(&self, span: Span) -> bool {
+        span.lo >= self.span.lo && span.hi <= self.span.hi
+    }
+}
+
+/// Computes the offsets of each line in the given source string.
+fn lines_offsets(s: &[u8]) -> Vec<usize> {
+    let mut lines = vec![0];
+    let mut prev = 0;
+    while let Some(len) = memchr::memchr(b'\n', &s[prev..]) {
+        prev += len + 1;
+        lines.push(prev);
+    }
+    lines
+}
+
+struct Codemap {
+    files: Vec<FileInfo>,
+}
+
+impl Codemap {
+    fn next_start_pos(&self) -> u32 {
+        // Add 1 so there's always space between files.
+        //
+        // We'll always have at least 1 file, as we initialize our files list
+        // with a dummy file.
+        self.files.last().unwrap().span.hi + 1
+    }
+
+    fn add_file(&mut self, name: &str, src: &str) -> Span {
+        let lines = lines_offsets(src.as_bytes());
+        let lo = self.next_start_pos();
+        // XXX(nika): Shouild we bother doing a checked cast or checked add here?
+        let span = Span { lo: lo, hi: lo + (src.len() as u32) };
+
+        self.files.push(FileInfo {
+            name: name.to_owned(),
+            span: span,
+            lines: lines,
+        });
+
+        span
+    }
+
+    fn fileinfo(&self, span: Span) -> &FileInfo {
+        for file in &self.files {
+            if file.span_within(span) {
+                return file;
+            }
+        }
+        panic!("Invalid span with no related FileInfo!");
+    }
+}
+
 #[derive(Clone, Copy, Debug)]
-pub struct Span;
+pub struct Span { lo: u32, hi: u32 }
 
 impl Span {
     pub fn call_site() -> Span {
-        Span
+        Span { lo: 0, hi: 0 }
     }
 
     pub fn def_site() -> Span {
-        Span
+        Span { lo: 0, hi: 0 }
+    }
+
+    pub fn source_file(&self) -> SourceFile {
+        CODEMAP.with(|cm| {
+            let cm = cm.borrow();
+            let fi = cm.fileinfo(*self);
+            SourceFile {
+                name: fi.name.clone(),
+            }
+        })
+    }
+
+    pub fn start(&self) -> LineColumn {
+        CODEMAP.with(|cm| {
+            let cm = cm.borrow();
+            let fi = cm.fileinfo(*self);
+            fi.offset_line_column(self.lo as usize)
+        })
+    }
+
+    pub fn end(&self) -> LineColumn {
+        CODEMAP.with(|cm| {
+            let cm = cm.borrow();
+            let fi = cm.fileinfo(*self);
+            fi.offset_line_column(self.hi as usize)
+        })
+    }
+
+    pub fn join(&self, other: Span) -> Option<Span> {
+        CODEMAP.with(|cm| {
+            let cm = cm.borrow();
+            // If `other` is not within the same FileInfo as us, return None.
+            if !cm.fileinfo(*self).span_within(other) {
+                return None;
+            }
+            Some(Span {
+                lo: cmp::min(self.lo, other.lo),
+                hi: cmp::max(self.hi, other.hi),
+            })
+        })
     }
 }
 
@@ -349,13 +534,19 @@
     |trees| ::TokenStream(TokenStream { inner: trees })
 ));
 
-named!(token_tree -> TokenTree,
-       map!(token_kind, |s: TokenNode| {
-           TokenTree {
-               span: ::Span(Span),
-               kind: s,
-           }
-       }));
+fn token_tree(input: Cursor) -> PResult<TokenTree> {
+    let input = skip_whitespace(input);
+    let lo = input.off;
+    let (input, kind) = token_kind(input)?;
+    let hi = input.off;
+    Ok((input, TokenTree {
+        span: ::Span(Span {
+            lo: lo,
+            hi: hi,
+        }),
+        kind: kind,
+    }))
+}
 
 named!(token_kind -> TokenNode, alt!(
     map!(delimited, |(d, s)| TokenNode::Group(d, s))
@@ -387,7 +578,7 @@
     ) => { |ts| (Delimiter::Brace, ts) }
 ));
 
-fn symbol(mut input: &str) -> PResult<TokenNode> {
+fn symbol(mut input: Cursor) -> PResult<TokenNode> {
     input = skip_whitespace(input);
 
     let mut chars = input.char_indices();
@@ -410,14 +601,14 @@
         }
     }
 
-    if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
+    if lifetime && &input.rest[..end] != "'static" && KEYWORDS.contains(&&input.rest[1..end]) {
         Err(LexError)
     } else {
-        let (a, b) = input.split_at(end);
+        let a = &input.rest[..end];
         if a == "_" {
-            Ok((b, TokenNode::Op('_', Spacing::Alone)))
+            Ok((input.advance(end), TokenNode::Op('_', Spacing::Alone)))
         } else {
-            Ok((b, TokenNode::Term(::Term::intern(a))))
+            Ok((input.advance(end), TokenNode::Term(::Term::intern(a))))
         }
     }
 }
@@ -433,7 +624,7 @@
     "yield",
 ];
 
-fn literal(input: &str) -> PResult<::Literal> {
+fn literal(input: Cursor) -> PResult<::Literal> {
     let input_no_ws = skip_whitespace(input);
 
     match literal_nocapture(input_no_ws) {
@@ -441,7 +632,7 @@
             let start = input.len() - input_no_ws.len();
             let len = input_no_ws.len() - a.len();
             let end = start + len;
-            Ok((a, ::Literal(Literal(input[start..end].to_string()))))
+            Ok((a, ::Literal(Literal(input.rest[start..end].to_string()))))
         }
         Err(LexError) => Err(LexError),
     }
@@ -480,12 +671,12 @@
     tag!("\"")
 ));
 
-fn cooked_string(input: &str) -> PResult<()> {
+fn cooked_string(input: Cursor) -> PResult<()> {
     let mut chars = input.char_indices().peekable();
     while let Some((byte_offset, ch)) = chars.next() {
         match ch {
             '"' => {
-                return Ok((&input[byte_offset..], ()));
+                return Ok((input.advance(byte_offset), ()));
             }
             '\r' => {
                 if let Some((_, '\n')) = chars.next() {
@@ -544,12 +735,12 @@
     ) => { |_| () }
 ));
 
-fn cooked_byte_string(mut input: &str) -> PResult<()> {
+fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
     let mut bytes = input.bytes().enumerate();
     'outer: while let Some((offset, b)) = bytes.next() {
         match b {
             b'"' => {
-                return Ok((&input[offset..], ()));
+                return Ok((input.advance(offset), ()));
             }
             b'\r' => {
                 if let Some((_, b'\n')) = bytes.next() {
@@ -574,10 +765,10 @@
                     Some((_, b'"'))  => {}
                     Some((newline, b'\n')) |
                     Some((newline, b'\r')) => {
-                        let rest = &input[newline + 1..];
+                        let rest = input.advance(newline + 1);
                         for (offset, ch) in rest.char_indices() {
                             if !ch.is_whitespace() {
-                                input = &rest[offset..];
+                                input = rest.advance(offset);
                                 bytes = input.bytes().enumerate();
                                 continue 'outer;
                             }
@@ -594,7 +785,7 @@
     Err(LexError)
 }
 
-fn raw_string(input: &str) -> PResult<()> {
+fn raw_string(input: Cursor) -> PResult<()> {
     let mut chars = input.char_indices();
     let mut n = 0;
     while let Some((byte_offset, ch)) = chars.next() {
@@ -609,8 +800,8 @@
     }
     for (byte_offset, ch) in chars {
         match ch {
-            '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
-                let rest = &input[byte_offset + 1 + n..];
+            '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => {
+                let rest = input.advance(byte_offset + 1 + n);
                 return Ok((rest, ()))
             }
             '\r' => {}
@@ -628,7 +819,7 @@
     (())
 ));
 
-fn cooked_byte(input: &str) -> PResult<()> {
+fn cooked_byte(input: Cursor) -> PResult<()> {
     let mut bytes = input.bytes().enumerate();
     let ok = match bytes.next().map(|(_, b)| b) {
         Some(b'\\') => {
@@ -648,8 +839,8 @@
     };
     if ok {
         match bytes.next() {
-            Some((offset, _)) => Ok((&input[offset..], ())),
-            None => Ok(("", ())),
+            Some((offset, _)) => Ok((input.advance(offset), ())),
+            None => Ok((input.advance(input.len()), ())),
         }
     } else {
         Err(LexError)
@@ -663,7 +854,7 @@
     (())
 ));
 
-fn cooked_char(input: &str) -> PResult<()> {
+fn cooked_char(input: Cursor) -> PResult<()> {
     let mut chars = input.char_indices();
     let ok = match chars.next().map(|(_, ch)| ch) {
         Some('\\') => {
@@ -683,7 +874,10 @@
         ch => ch.is_some(),
     };
     if ok {
-        Ok((chars.as_str(), ()))
+        match chars.next() {
+            Some((idx, _)) => Ok((input.advance(idx), ())),
+            None => Ok((input.advance(input.len()), ())),
+        }
     } else {
         Err(LexError)
     }
@@ -746,17 +940,17 @@
     true
 }
 
-fn float(input: &str) -> PResult<()> {
+fn float(input: Cursor) -> PResult<()> {
     let (rest, ()) = float_digits(input)?;
     for suffix in &["f32", "f64"] {
         if rest.starts_with(suffix) {
-            return word_break(&rest[suffix.len()..]);
+            return word_break(rest.advance(suffix.len()));
         }
     }
     word_break(rest)
 }
 
-fn float_digits(input: &str) -> PResult<()> {
+fn float_digits(input: Cursor) -> PResult<()> {
     let mut chars = input.chars().peekable();
     match chars.next() {
         Some(ch) if ch >= '0' && ch <= '9' => {}
@@ -795,7 +989,7 @@
         }
     }
 
-    let rest = &input[len..];
+    let rest = input.advance(len);
     if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
         return Err(LexError);
     }
@@ -828,10 +1022,10 @@
         }
     }
 
-    Ok((&input[len..], ()))
+    Ok((input.advance(len), ()))
 }
 
-fn int(input: &str) -> PResult<()> {
+fn int(input: Cursor) -> PResult<()> {
     let (rest, ()) = digits(input)?;
     for suffix in &[
         "isize",
@@ -848,21 +1042,21 @@
         "u128",
     ] {
         if rest.starts_with(suffix) {
-            return word_break(&rest[suffix.len()..]);
+            return word_break(rest.advance(suffix.len()));
         }
     }
     word_break(rest)
 }
 
-fn digits(mut input: &str) -> PResult<()> {
+fn digits(mut input: Cursor) -> PResult<()> {
     let base = if input.starts_with("0x") {
-        input = &input[2..];
+        input = input.advance(2);
         16
     } else if input.starts_with("0o") {
-        input = &input[2..];
+        input = input.advance(2);
         8
     } else if input.starts_with("0b") {
-        input = &input[2..];
+        input = input.advance(2);
         2
     } else {
         10
@@ -893,7 +1087,7 @@
     if empty {
         Err(LexError)
     } else {
-        Ok((&input[len..], ()))
+        Ok((input.advance(len), ()))
     }
 }
 
@@ -903,7 +1097,7 @@
     keyword!("false") => { |_| () }
 ));
 
-fn op(input: &str) -> PResult<(char, Spacing)> {
+fn op(input: Cursor) -> PResult<(char, Spacing)> {
     let input = skip_whitespace(input);
     match op_char(input) {
         Ok((rest, ch)) => {
@@ -917,7 +1111,7 @@
     }
 }
 
-fn op_char(input: &str) -> PResult<char> {
+fn op_char(input: Cursor) -> PResult<char> {
     let mut chars = input.chars();
     let first = match chars.next() {
         Some(ch) => ch,
@@ -927,7 +1121,7 @@
     };
     let recognized = "~!@#$%^&*-=+|;:,<.>/?";
     if recognized.contains(first) {
-        Ok((chars.as_str(), first))
+        Ok((input.advance(first.len_utf8()), first))
     } else {
         Err(LexError)
     }
commit	f8d5f2171a6a66c445d11e8cee46168b93f30bc0	[log] [tgz]
author	Nika Layzell <nika@thelayzells.com>	Mon Dec 11 14:07:02 2017 -0500
committer	Nika Layzell <nika@thelayzells.com>	Mon Dec 11 14:13:08 2017 -0500
tree	0464b482433d8bf6f29162b42c4fb7167070c5cc
parent	ea71984b11eff216be73e50f1a3b8a1ad42f4259 [diff] [blame]