Initial implementation of stable meaningful spans
diff --git a/src/lib.rs b/src/lib.rs
index 979ef34..b70cb4b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -24,6 +24,9 @@
extern crate proc_macro;
#[cfg(not(feature = "unstable"))]
+extern crate memchr;
+
+#[cfg(not(feature = "unstable"))]
extern crate unicode_xid;
use std::fmt;
@@ -103,6 +106,43 @@
}
}
+#[derive(Clone, PartialEq, Eq)]
+pub struct SourceFile(imp::SourceFile);
+
+impl SourceFile {
+ /// Get the path to this source file as a string.
+ pub fn as_str(&self) -> &str {
+ self.0.as_str()
+ }
+
+ pub fn is_real(&self) -> bool {
+ self.0.is_real()
+ }
+}
+
+impl AsRef<str> for SourceFile {
+ fn as_ref(&self) -> &str {
+ self.0.as_ref()
+ }
+}
+
+impl PartialEq<str> for SourceFile {
+ fn eq(&self, other: &str) -> bool {
+ self.0.eq(other)
+ }
+}
+
+impl fmt::Debug for SourceFile {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+// NOTE: We can't easily wrap LineColumn right now, as the version in proc-macro
+// doesn't actually expose the internal `line` and `column` fields, making it
+// mostly useless.
+pub use imp::LineColumn;
+
#[derive(Copy, Clone)]
pub struct Span(imp::Span);
@@ -121,6 +161,24 @@
pub fn def_site() -> Span {
Span(imp::Span::def_site())
}
+
+ pub fn source_file(&self) -> SourceFile {
+ SourceFile(self.0.source_file())
+ }
+
+ pub fn start(&self) -> LineColumn {
+ // XXX(nika): We can't easily wrap LineColumn right now
+ self.0.start()
+ }
+
+ pub fn end(&self) -> LineColumn {
+ // XXX(nika): We can't easily wrap LineColumn right now
+ self.0.end()
+ }
+
+ pub fn join(&self, other: Span) -> Option<Span> {
+ self.0.join(other.0).map(Span)
+ }
}
#[derive(Clone, Debug)]
diff --git a/src/stable.rs b/src/stable.rs
index 8277d4c..8b951e5 100644
--- a/src/stable.rs
+++ b/src/stable.rs
@@ -1,6 +1,7 @@
use std::ascii;
use std::borrow::Borrow;
use std::cell::RefCell;
+use std::cmp;
use std::collections::HashMap;
use std::fmt;
use std::iter;
@@ -10,9 +11,10 @@
use std::str::FromStr;
use std::vec;
+use memchr;
use proc_macro;
use unicode_xid::UnicodeXID;
-use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
+use strnom::{Cursor, PResult, skip_whitespace, block_comment, whitespace, word_break};
use {TokenTree, TokenNode, Delimiter, Spacing};
@@ -38,7 +40,18 @@
type Err = LexError;
fn from_str(src: &str) -> Result<TokenStream, LexError> {
- match token_stream(src) {
+ // Create a dummy file & add it to the codemap
+ let cursor = CODEMAP.with(|cm| {
+ let mut cm = cm.borrow_mut();
+ let name = format!("<parsed string {}>", cm.files.len());
+ let span = cm.add_file(&name, src);
+ Cursor {
+ rest: src,
+ off: span.lo,
+ }
+ });
+
+ match token_stream(cursor) {
Ok((input, output)) => {
if skip_whitespace(input).len() != 0 {
Err(LexError)
@@ -137,16 +150,188 @@
}
}
+#[derive(Clone, PartialEq, Eq)]
+pub struct SourceFile {
+ name: String,
+}
+
+impl SourceFile {
+ /// Get the path to this source file as a string.
+ pub fn as_str(&self) -> &str {
+ &self.name
+ }
+
+ pub fn is_real(&self) -> bool {
+ // XXX(nika): Support real files in the future?
+ false
+ }
+}
+
+impl AsRef<str> for SourceFile {
+ fn as_ref(&self) -> &str {
+ self.as_str()
+ }
+}
+
+impl PartialEq<str> for SourceFile {
+ fn eq(&self, other: &str) -> bool {
+ self.as_ref() == other
+ }
+}
+
+impl fmt::Debug for SourceFile {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("SourceFile")
+ .field("path", &self.as_str())
+ .field("is_real", &self.is_real())
+ .finish()
+ }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct LineColumn {
+ pub line: usize,
+ pub column: usize,
+}
+
+thread_local! {
+ static CODEMAP: RefCell<Codemap> = RefCell::new(Codemap {
+ // NOTE: We start with a single dummy file which all call_site() and
+ // def_site() spans reference.
+ files: vec![FileInfo {
+ name: "<unspecified>".to_owned(),
+ span: Span { lo: 0, hi: 0 },
+ lines: vec![0],
+ }],
+ });
+}
+
+struct FileInfo {
+ name: String,
+ span: Span,
+ lines: Vec<usize>,
+}
+
+impl FileInfo {
+ fn offset_line_column(&self, offset: usize) -> LineColumn {
+ assert!(self.span_within(Span { lo: offset as u32, hi: offset as u32 }));
+ let offset = offset - self.span.lo as usize;
+ match self.lines.binary_search(&offset) {
+ Ok(found) => LineColumn {
+ line: found + 1,
+ column: 0
+ },
+ Err(idx) => LineColumn {
+ line: idx,
+ column: offset - self.lines[idx - 1]
+ },
+ }
+ }
+
+ fn span_within(&self, span: Span) -> bool {
+ span.lo >= self.span.lo && span.hi <= self.span.hi
+ }
+}
+
+/// Computes the offsets of each line in the given source string.
+fn lines_offsets(s: &[u8]) -> Vec<usize> {
+ let mut lines = vec![0];
+ let mut prev = 0;
+ while let Some(len) = memchr::memchr(b'\n', &s[prev..]) {
+ prev += len + 1;
+ lines.push(prev);
+ }
+ lines
+}
+
+struct Codemap {
+ files: Vec<FileInfo>,
+}
+
+impl Codemap {
+ fn next_start_pos(&self) -> u32 {
+ // Add 1 so there's always space between files.
+ //
+ // We'll always have at least 1 file, as we initialize our files list
+ // with a dummy file.
+ self.files.last().unwrap().span.hi + 1
+ }
+
+ fn add_file(&mut self, name: &str, src: &str) -> Span {
+ let lines = lines_offsets(src.as_bytes());
+ let lo = self.next_start_pos();
+ // XXX(nika): Shouild we bother doing a checked cast or checked add here?
+ let span = Span { lo: lo, hi: lo + (src.len() as u32) };
+
+ self.files.push(FileInfo {
+ name: name.to_owned(),
+ span: span,
+ lines: lines,
+ });
+
+ span
+ }
+
+ fn fileinfo(&self, span: Span) -> &FileInfo {
+ for file in &self.files {
+ if file.span_within(span) {
+ return file;
+ }
+ }
+ panic!("Invalid span with no related FileInfo!");
+ }
+}
+
#[derive(Clone, Copy, Debug)]
-pub struct Span;
+pub struct Span { lo: u32, hi: u32 }
impl Span {
pub fn call_site() -> Span {
- Span
+ Span { lo: 0, hi: 0 }
}
pub fn def_site() -> Span {
- Span
+ Span { lo: 0, hi: 0 }
+ }
+
+ pub fn source_file(&self) -> SourceFile {
+ CODEMAP.with(|cm| {
+ let cm = cm.borrow();
+ let fi = cm.fileinfo(*self);
+ SourceFile {
+ name: fi.name.clone(),
+ }
+ })
+ }
+
+ pub fn start(&self) -> LineColumn {
+ CODEMAP.with(|cm| {
+ let cm = cm.borrow();
+ let fi = cm.fileinfo(*self);
+ fi.offset_line_column(self.lo as usize)
+ })
+ }
+
+ pub fn end(&self) -> LineColumn {
+ CODEMAP.with(|cm| {
+ let cm = cm.borrow();
+ let fi = cm.fileinfo(*self);
+ fi.offset_line_column(self.hi as usize)
+ })
+ }
+
+ pub fn join(&self, other: Span) -> Option<Span> {
+ CODEMAP.with(|cm| {
+ let cm = cm.borrow();
+ // If `other` is not within the same FileInfo as us, return None.
+ if !cm.fileinfo(*self).span_within(other) {
+ return None;
+ }
+ Some(Span {
+ lo: cmp::min(self.lo, other.lo),
+ hi: cmp::max(self.hi, other.hi),
+ })
+ })
}
}
@@ -349,13 +534,19 @@
|trees| ::TokenStream(TokenStream { inner: trees })
));
-named!(token_tree -> TokenTree,
- map!(token_kind, |s: TokenNode| {
- TokenTree {
- span: ::Span(Span),
- kind: s,
- }
- }));
+fn token_tree(input: Cursor) -> PResult<TokenTree> {
+ let input = skip_whitespace(input);
+ let lo = input.off;
+ let (input, kind) = token_kind(input)?;
+ let hi = input.off;
+ Ok((input, TokenTree {
+ span: ::Span(Span {
+ lo: lo,
+ hi: hi,
+ }),
+ kind: kind,
+ }))
+}
named!(token_kind -> TokenNode, alt!(
map!(delimited, |(d, s)| TokenNode::Group(d, s))
@@ -387,7 +578,7 @@
) => { |ts| (Delimiter::Brace, ts) }
));
-fn symbol(mut input: &str) -> PResult<TokenNode> {
+fn symbol(mut input: Cursor) -> PResult<TokenNode> {
input = skip_whitespace(input);
let mut chars = input.char_indices();
@@ -410,14 +601,14 @@
}
}
- if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
+ if lifetime && &input.rest[..end] != "'static" && KEYWORDS.contains(&&input.rest[1..end]) {
Err(LexError)
} else {
- let (a, b) = input.split_at(end);
+ let a = &input.rest[..end];
if a == "_" {
- Ok((b, TokenNode::Op('_', Spacing::Alone)))
+ Ok((input.advance(end), TokenNode::Op('_', Spacing::Alone)))
} else {
- Ok((b, TokenNode::Term(::Term::intern(a))))
+ Ok((input.advance(end), TokenNode::Term(::Term::intern(a))))
}
}
}
@@ -433,7 +624,7 @@
"yield",
];
-fn literal(input: &str) -> PResult<::Literal> {
+fn literal(input: Cursor) -> PResult<::Literal> {
let input_no_ws = skip_whitespace(input);
match literal_nocapture(input_no_ws) {
@@ -441,7 +632,7 @@
let start = input.len() - input_no_ws.len();
let len = input_no_ws.len() - a.len();
let end = start + len;
- Ok((a, ::Literal(Literal(input[start..end].to_string()))))
+ Ok((a, ::Literal(Literal(input.rest[start..end].to_string()))))
}
Err(LexError) => Err(LexError),
}
@@ -480,12 +671,12 @@
tag!("\"")
));
-fn cooked_string(input: &str) -> PResult<()> {
+fn cooked_string(input: Cursor) -> PResult<()> {
let mut chars = input.char_indices().peekable();
while let Some((byte_offset, ch)) = chars.next() {
match ch {
'"' => {
- return Ok((&input[byte_offset..], ()));
+ return Ok((input.advance(byte_offset), ()));
}
'\r' => {
if let Some((_, '\n')) = chars.next() {
@@ -544,12 +735,12 @@
) => { |_| () }
));
-fn cooked_byte_string(mut input: &str) -> PResult<()> {
+fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
let mut bytes = input.bytes().enumerate();
'outer: while let Some((offset, b)) = bytes.next() {
match b {
b'"' => {
- return Ok((&input[offset..], ()));
+ return Ok((input.advance(offset), ()));
}
b'\r' => {
if let Some((_, b'\n')) = bytes.next() {
@@ -574,10 +765,10 @@
Some((_, b'"')) => {}
Some((newline, b'\n')) |
Some((newline, b'\r')) => {
- let rest = &input[newline + 1..];
+ let rest = input.advance(newline + 1);
for (offset, ch) in rest.char_indices() {
if !ch.is_whitespace() {
- input = &rest[offset..];
+ input = rest.advance(offset);
bytes = input.bytes().enumerate();
continue 'outer;
}
@@ -594,7 +785,7 @@
Err(LexError)
}
-fn raw_string(input: &str) -> PResult<()> {
+fn raw_string(input: Cursor) -> PResult<()> {
let mut chars = input.char_indices();
let mut n = 0;
while let Some((byte_offset, ch)) = chars.next() {
@@ -609,8 +800,8 @@
}
for (byte_offset, ch) in chars {
match ch {
- '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
- let rest = &input[byte_offset + 1 + n..];
+ '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => {
+ let rest = input.advance(byte_offset + 1 + n);
return Ok((rest, ()))
}
'\r' => {}
@@ -628,7 +819,7 @@
(())
));
-fn cooked_byte(input: &str) -> PResult<()> {
+fn cooked_byte(input: Cursor) -> PResult<()> {
let mut bytes = input.bytes().enumerate();
let ok = match bytes.next().map(|(_, b)| b) {
Some(b'\\') => {
@@ -648,8 +839,8 @@
};
if ok {
match bytes.next() {
- Some((offset, _)) => Ok((&input[offset..], ())),
- None => Ok(("", ())),
+ Some((offset, _)) => Ok((input.advance(offset), ())),
+ None => Ok((input.advance(input.len()), ())),
}
} else {
Err(LexError)
@@ -663,7 +854,7 @@
(())
));
-fn cooked_char(input: &str) -> PResult<()> {
+fn cooked_char(input: Cursor) -> PResult<()> {
let mut chars = input.char_indices();
let ok = match chars.next().map(|(_, ch)| ch) {
Some('\\') => {
@@ -683,7 +874,10 @@
ch => ch.is_some(),
};
if ok {
- Ok((chars.as_str(), ()))
+ match chars.next() {
+ Some((idx, _)) => Ok((input.advance(idx), ())),
+ None => Ok((input.advance(input.len()), ())),
+ }
} else {
Err(LexError)
}
@@ -746,17 +940,17 @@
true
}
-fn float(input: &str) -> PResult<()> {
+fn float(input: Cursor) -> PResult<()> {
let (rest, ()) = float_digits(input)?;
for suffix in &["f32", "f64"] {
if rest.starts_with(suffix) {
- return word_break(&rest[suffix.len()..]);
+ return word_break(rest.advance(suffix.len()));
}
}
word_break(rest)
}
-fn float_digits(input: &str) -> PResult<()> {
+fn float_digits(input: Cursor) -> PResult<()> {
let mut chars = input.chars().peekable();
match chars.next() {
Some(ch) if ch >= '0' && ch <= '9' => {}
@@ -795,7 +989,7 @@
}
}
- let rest = &input[len..];
+ let rest = input.advance(len);
if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
return Err(LexError);
}
@@ -828,10 +1022,10 @@
}
}
- Ok((&input[len..], ()))
+ Ok((input.advance(len), ()))
}
-fn int(input: &str) -> PResult<()> {
+fn int(input: Cursor) -> PResult<()> {
let (rest, ()) = digits(input)?;
for suffix in &[
"isize",
@@ -848,21 +1042,21 @@
"u128",
] {
if rest.starts_with(suffix) {
- return word_break(&rest[suffix.len()..]);
+ return word_break(rest.advance(suffix.len()));
}
}
word_break(rest)
}
-fn digits(mut input: &str) -> PResult<()> {
+fn digits(mut input: Cursor) -> PResult<()> {
let base = if input.starts_with("0x") {
- input = &input[2..];
+ input = input.advance(2);
16
} else if input.starts_with("0o") {
- input = &input[2..];
+ input = input.advance(2);
8
} else if input.starts_with("0b") {
- input = &input[2..];
+ input = input.advance(2);
2
} else {
10
@@ -893,7 +1087,7 @@
if empty {
Err(LexError)
} else {
- Ok((&input[len..], ()))
+ Ok((input.advance(len), ()))
}
}
@@ -903,7 +1097,7 @@
keyword!("false") => { |_| () }
));
-fn op(input: &str) -> PResult<(char, Spacing)> {
+fn op(input: Cursor) -> PResult<(char, Spacing)> {
let input = skip_whitespace(input);
match op_char(input) {
Ok((rest, ch)) => {
@@ -917,7 +1111,7 @@
}
}
-fn op_char(input: &str) -> PResult<char> {
+fn op_char(input: Cursor) -> PResult<char> {
let mut chars = input.chars();
let first = match chars.next() {
Some(ch) => ch,
@@ -927,7 +1121,7 @@
};
let recognized = "~!@#$%^&*-=+|;:,<.>/?";
if recognized.contains(first) {
- Ok((chars.as_str(), first))
+ Ok((input.advance(first.len_utf8()), first))
} else {
Err(LexError)
}
diff --git a/src/strnom.rs b/src/strnom.rs
index 22b39e8..558be8e 100644
--- a/src/strnom.rs
+++ b/src/strnom.rs
@@ -1,12 +1,61 @@
//! Adapted from [`nom`](https://github.com/Geal/nom).
+use std::str::{Chars, CharIndices, Bytes};
+
use unicode_xid::UnicodeXID;
use imp::LexError;
-pub type PResult<'a, O> = Result<(&'a str, O), LexError>;
+#[derive(Copy, Clone, Eq, PartialEq)]
+pub struct Cursor<'a> {
+ pub rest: &'a str,
+ pub off: u32,
+}
-pub fn whitespace(input: &str) -> PResult<()> {
+impl<'a> Cursor<'a> {
+ pub fn advance(&self, amt: usize) -> Cursor<'a> {
+ Cursor {
+ rest: &self.rest[amt..],
+ off: self.off + (amt as u32),
+ }
+ }
+
+ pub fn find(&self, p: char) -> Option<usize> {
+ self.rest.find(p)
+ }
+
+ pub fn starts_with(&self, s: &str) -> bool {
+ self.rest.starts_with(s)
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.rest.is_empty()
+ }
+
+ pub fn len(&self) -> usize {
+ self.rest.len()
+ }
+
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.rest.as_bytes()
+ }
+
+ pub fn bytes(&self) -> Bytes<'a> {
+ self.rest.bytes()
+ }
+
+ pub fn chars(&self) -> Chars<'a> {
+ self.rest.chars()
+ }
+
+ pub fn char_indices(&self) -> CharIndices<'a> {
+ self.rest.char_indices()
+ }
+}
+
+pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
+
+pub fn whitespace(input: Cursor) -> PResult<()> {
if input.is_empty() {
return Err(LexError);
}
@@ -14,7 +63,7 @@
let bytes = input.as_bytes();
let mut i = 0;
while i < bytes.len() {
- let s = &input[i..];
+ let s = input.advance(i);
if bytes[i] == b'/' {
if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
!s.starts_with("//!") {
@@ -50,10 +99,10 @@
Err(LexError)
};
}
- Ok(("", ()))
+ Ok((input.advance(input.len()), ()))
}
-pub fn block_comment(input: &str) -> PResult<&str> {
+pub fn block_comment(input: Cursor) -> PResult<&str> {
if !input.starts_with("/*") {
return Err(LexError);
}
@@ -69,7 +118,7 @@
} else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
depth -= 1;
if depth == 0 {
- return Ok((&input[i + 2..], &input[..i + 2]));
+ return Ok((input.advance(i + 2), &input.rest[..i + 2]));
}
i += 1; // eat '/'
}
@@ -78,7 +127,7 @@
Err(LexError)
}
-pub fn skip_whitespace(input: &str) -> &str {
+pub fn skip_whitespace(input: Cursor) -> Cursor {
match whitespace(input) {
Ok((rest, _)) => rest,
Err(LexError) => input,
@@ -90,7 +139,7 @@
ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
}
-pub fn word_break(input: &str) -> PResult<()> {
+pub fn word_break(input: Cursor) -> PResult<()> {
match input.chars().next() {
Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
Some(_) | None => Ok((input, ())),
@@ -99,7 +148,7 @@
macro_rules! named {
($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
- fn $name(i: &str) -> $crate::strnom::PResult<$o> {
+ fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> {
$submac!(i, $($args)*)
}
};
@@ -228,7 +277,7 @@
}
}
if parsed {
- Ok((&$i[offset..], &$i[..offset]))
+ Ok(($i.advance(offset), &$i.rest[..offset]))
} else {
Err(LexError)
}
@@ -294,7 +343,7 @@
macro_rules! tag {
($i:expr, $tag:expr) => {
if $i.starts_with($tag) {
- Ok((&$i[$tag.len()..], &$i[..$tag.len()]))
+ Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()]))
} else {
Err(LexError)
}
@@ -308,10 +357,10 @@
}
/// Do not use directly. Use `punct!`.
-pub fn punct<'a>(input: &'a str, token: &'static str) -> PResult<'a, &'a str> {
+pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
let input = skip_whitespace(input);
if input.starts_with(token) {
- Ok((&input[token.len()..], token))
+ Ok((input.advance(token.len()), token))
} else {
Err(LexError)
}
@@ -324,7 +373,7 @@
}
/// Do not use directly. Use `keyword!`.
-pub fn keyword<'a>(input: &'a str, token: &'static str) -> PResult<'a, &'a str> {
+pub fn keyword<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
match punct(input, token) {
Ok((rest, _)) => {
match word_break(rest) {
diff --git a/src/unstable.rs b/src/unstable.rs
index 6705fb5..7d4e85c 100644
--- a/src/unstable.rs
+++ b/src/unstable.rs
@@ -159,6 +159,41 @@
}
}
+#[derive(Clone, PartialEq, Eq)]
+pub struct SourceFile(proc_macro::SourceFile);
+
+impl SourceFile {
+ /// Get the path to this source file as a string.
+ pub fn as_str(&self) -> &str {
+ self.0.as_str()
+ }
+
+ pub fn is_real(&self) -> bool {
+ self.0.is_real()
+ }
+}
+
+impl AsRef<str> for SourceFile {
+ fn as_ref(&self) -> &str {
+ self.0.as_ref()
+ }
+}
+
+impl PartialEq<str> for SourceFile {
+ fn eq(&self, other: &str) -> bool {
+ self.0.eq(other)
+ }
+}
+
+impl fmt::Debug for SourceFile {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+// XXX(nika): We can't easily wrap LineColumn right now
+pub use proc_macro::LineColumn;
+
#[derive(Copy, Clone)]
pub struct Span(proc_macro::Span);
@@ -170,6 +205,24 @@
pub fn def_site() -> Span {
Span(proc_macro::Span::def_site())
}
+
+ pub fn source_file(&self) -> SourceFile {
+ SourceFile(self.0.source_file())
+ }
+
+ pub fn start(&self) -> LineColumn {
+ // XXX(nika): We can't easily wrap LineColumn right now
+ self.0.start()
+ }
+
+ pub fn end(&self) -> LineColumn {
+ // XXX(nika): We can't easily wrap LineColumn right now
+ self.0.end()
+ }
+
+ pub fn join(&self, other: Span) -> Option<Span> {
+ self.0.join(other.0).map(Span)
+ }
}
impl fmt::Debug for Span {