Parse based on proc-macro2
diff --git a/synom/Cargo.toml b/synom/Cargo.toml
index fde5ed4..5ef7c0f 100644
--- a/synom/Cargo.toml
+++ b/synom/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "synom"
-version = "0.11.3"
+version = "0.12.0"
authors = ["David Tolnay <dtolnay@gmail.com>"]
license = "MIT/Apache-2.0"
description = "Stripped-down Nom parser used by Syn"
@@ -11,10 +11,10 @@
include = ["Cargo.toml", "src/**/*.rs", "README.md", "LICENSE-APACHE", "LICENSE-MIT"]
[dependencies]
-unicode-xid = "0.0.4"
+proc-macro2 = { git = "https://github.com/mystor/proc-macro2", branch = "byte_literal" }
[dev-dependencies.syn]
-version = "0.11"
+version = "0.12"
path = ".."
features = ["parsing", "full"]
default-features = false
diff --git a/synom/src/helper.rs b/synom/src/helper.rs
index a488359..800874a 100644
--- a/synom/src/helper.rs
+++ b/synom/src/helper.rs
@@ -1,5 +1,4 @@
-use IResult;
-use space::{skip_whitespace, word_break};
+use {IResult, TokenTree, TokenKind, OpKind, Delimiter, InputBuf};
/// Parse a piece of punctuation like "+" or "+=".
///
@@ -33,13 +32,33 @@
// Not public API.
#[doc(hidden)]
-pub fn punct<'a>(input: &'a str, token: &'static str) -> IResult<&'a str, &'a str> {
- let input = skip_whitespace(input);
- if input.starts_with(token) {
- IResult::Done(&input[token.len()..], token)
- } else {
- IResult::Error
+pub fn punct<'a>(input: &'a [TokenTree], token: &'static str) -> IResult<&'a [TokenTree], &'a str> {
+ // Extract the chars from token, so we know how many tokens to expect, check
+ // if we are running past EOF, then confirm that the tokens exist as
+ // requested.
+ let expected = token.chars().collect::<Vec<_>>();
+ if input.len() < expected.len() {
+ return IResult::Error;
}
+ for i in 0..expected.len() {
+ if let TokenKind::Op(c, ok) = input[i].kind {
+ if c != expected[i] {
+ return IResult::Error;
+ }
+
+ // The last token in the sequence does not have to be marked as
+ // OpKind::Joint. Unfortunately OpKind doesn't implement
+ // Eq/PartialEq right now.
+ match ok {
+ OpKind::Alone if i != expected.len() - 1 => return IResult::Error,
+ _ => {}
+ }
+ } else {
+ return IResult::Error;
+ }
+ }
+
+ IResult::Done(&input[expected.len()..], token)
}
/// Parse a keyword like "fn" or "struct".
@@ -83,15 +102,11 @@
// Not public API.
#[doc(hidden)]
-pub fn keyword<'a>(input: &'a str, token: &'static str) -> IResult<&'a str, &'a str> {
- match punct(input, token) {
- IResult::Done(rest, _) => {
- match word_break(rest) {
- IResult::Done(_, _) => IResult::Done(rest, token),
- IResult::Error => IResult::Error,
- }
- }
- IResult::Error => IResult::Error,
+pub fn keyword<'a>(input: &'a [TokenTree], token: &'static str) -> IResult<&'a [TokenTree], &'static str> {
+ match input.first() {
+ Some(&TokenTree{ kind: TokenKind::Word(ref symbol), .. }) if &**symbol == token =>
+ IResult::Done(&input[1..], token),
+ _ => IResult::Error,
}
}
@@ -497,11 +512,11 @@
// Not public API.
#[doc(hidden)]
-pub fn separated_list<'a, T>(mut input: &'a str,
+pub fn separated_list<'a, T>(mut input: &'a [TokenTree],
sep: &'static str,
- f: fn(&'a str) -> IResult<&'a str, T>,
+ f: fn(&'a [TokenTree]) -> IResult<&'a [TokenTree], T>,
terminated: bool)
- -> IResult<&'a str, Vec<T>> {
+ -> IResult<&'a [TokenTree], Vec<T>> {
let mut res = Vec::new();
// get the first element
@@ -541,3 +556,45 @@
}
}
}
+
+#[macro_export]
+macro_rules! delim {
+ ($i:expr, $delim:ident, $fmac:ident!( $($fargs:tt)* )) => {
+ match $crate::helper::delim_impl($i, $crate::Delimiter::$delim) {
+ Some((i, ib)) => {
+ match $fmac!(&*ib, $($fargs)*) {
+ $crate::IResult::Done(rest, val) => {
+ if rest.is_empty() {
+ $crate::IResult::Done(i, val)
+ } else {
+ $crate::IResult::Error
+ }
+ }
+ _ => $crate::IResult::Error,
+ }
+ }
+ _ => $crate::IResult::Error,
+ }
+ };
+ ($i:expr, $delim:ident, $f:expr) => {
+ delim!($i, $delim, call!($f))
+ };
+}
+
+// Not a public API
+#[doc(hidden)]
+pub fn delim_impl(input: &[TokenTree],
+ expected_delim: Delimiter)
+ -> Option<(&[TokenTree], InputBuf)> {
+ // NOTE: The `as u32` hack is being used as `Delimiter` doesn't implement
+ // `PartialEq` or `Eq` despite being a simple c-style enum.
+ match input.first() {
+ Some(&TokenTree {
+ kind: TokenKind::Sequence(delim, ref stream),
+ ..
+ }) if delim as u32 == expected_delim as u32 => {
+ Some((&input[1..], InputBuf::new(stream.clone())))
+ }
+ _ => None
+ }
+}
diff --git a/synom/src/lib.rs b/synom/src/lib.rs
index 97c7833..ae9aebd 100644
--- a/synom/src/lib.rs
+++ b/synom/src/lib.rs
@@ -21,14 +21,51 @@
//! For our use case, this strategy is a huge improvement in usability,
//! correctness, and compile time over nom's `ws!` strategy.
-extern crate unicode_xid;
-
-#[doc(hidden)]
-pub mod space;
+extern crate proc_macro2;
#[doc(hidden)]
pub mod helper;
+// re-export TokenStream et. al. from proc_macro2, so that parsers which want to
+// use us don't have to manually import the type.
+pub use proc_macro2::{TokenStream, TokenTree, TokenKind, Delimiter, OpKind, LexError};
+
+use std::ops::Deref;
+
+/// A `TokenStream` does not provide a data format which is usable as a `synom`
+/// parser input. This type extracts `TokenTrees` from a `TokenStream` into a
+/// buffer, which can be iterated over as the `synom` input type.
+pub struct InputBuf {
+ data: Vec<TokenTree>,
+}
+
+impl InputBuf {
+ /// Transform the input `TokenStream` into a buffer which can be iterated
+ /// over as a `synom` parser input. Use the `Deref` implementation on this
+ /// type to extract the actual buffer type.
+ pub fn new(ts: TokenStream) -> Self {
+ fn flatten_stream(tt: TokenTree) -> Vec<TokenTree> {
+ match tt.kind {
+ TokenKind::Sequence(Delimiter::None, ts) => {
+ ts.into_iter().flat_map(flatten_stream).collect()
+ }
+ _ => vec![tt]
+ }
+ }
+
+ InputBuf {
+ data: ts.into_iter().flat_map(flatten_stream).collect()
+ }
+ }
+}
+
+impl Deref for InputBuf {
+ type Target = [TokenTree];
+ fn deref(&self) -> &[TokenTree] {
+ &self.data
+ }
+}
+
/// The result of a parser.
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum IResult<I, O> {
@@ -39,7 +76,7 @@
Error,
}
-impl<'a, O> IResult<&'a str, O> {
+impl<'a, O> IResult<&'a [TokenTree], O> {
/// Unwraps the result, asserting the the parse is complete. Panics with a
/// message based on the given string if the parse failed or is incomplete.
///
@@ -66,12 +103,11 @@
/// ```
pub fn expect(self, name: &str) -> O {
match self {
- IResult::Done(mut rest, o) => {
- rest = space::skip_whitespace(rest);
+ IResult::Done(rest, o) => {
if rest.is_empty() {
o
} else {
- panic!("unparsed tokens after {}: {:?}", name, rest)
+ panic!("unparsed tokens after {}: {:?}", name, /* rest */ ())
}
}
IResult::Error => panic!("failed to parse {}", name),
@@ -97,13 +133,13 @@
#[macro_export]
macro_rules! named {
($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
- fn $name(i: &str) -> $crate::IResult<&str, $o> {
+ fn $name(i: &[$crate::TokenTree]) -> $crate::IResult<&[$crate::TokenTree], $o> {
$submac!(i, $($args)*)
}
};
(pub $name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
- pub fn $name(i: &str) -> $crate::IResult<&str, $o> {
+ pub fn $name(i: &[$crate::TokenTree]) -> $crate::IResult<&[$crate::TokenTree], $o> {
$submac!(i, $($args)*)
}
};
@@ -564,9 +600,9 @@
//
// Not public API.
#[doc(hidden)]
-pub fn many0<'a, T>(mut input: &'a str,
- f: fn(&'a str) -> IResult<&'a str, T>)
- -> IResult<&'a str, Vec<T>> {
+pub fn many0<'a, T>(mut input: &'a [TokenTree],
+ f: fn(&'a [TokenTree]) -> IResult<&'a [TokenTree], T>)
+ -> IResult<&'a [TokenTree], Vec<T>> {
let mut res = Vec::new();
loop {
@@ -691,54 +727,6 @@
}};
}
-/// Parse the given string from exactly the current position in the input. You
-/// almost always want `punct!` or `keyword!` instead of this.
-///
-/// The `tag!` parser is equivalent to `punct!` but does not ignore leading
-/// whitespace. Both `punct!` and `keyword!` skip over leading whitespace. See
-/// an explanation of synom's whitespace handling strategy in the top-level
-/// crate documentation.
-///
-/// - **Syntax:** `tag!("...")`
-/// - **Output:** `"..."`
-///
-/// ```rust
-/// extern crate syn;
-/// #[macro_use] extern crate synom;
-///
-/// use syn::StrLit;
-/// use syn::parse::string;
-/// use synom::IResult;
-///
-/// // Parse a proposed syntax for an owned string literal: "abc"s
-/// named!(owned_string -> String,
-/// map!(
-/// terminated!(string, tag!("s")),
-/// |lit: StrLit| lit.value
-/// )
-/// );
-///
-/// fn main() {
-/// let input = r#" "abc"s "#;
-/// let parsed = owned_string(input).expect("owned string literal");
-/// println!("{:?}", parsed);
-///
-/// let input = r#" "abc" s "#;
-/// let err = owned_string(input);
-/// assert_eq!(err, IResult::Error);
-/// }
-/// ```
-#[macro_export]
-macro_rules! tag {
- ($i:expr, $tag:expr) => {
- if $i.starts_with($tag) {
- $crate::IResult::Done(&$i[$tag.len()..], &$i[..$tag.len()])
- } else {
- $crate::IResult::Error
- }
- };
-}
-
/// Pattern-match the result of a parser to select which other parser to run.
///
/// - **Syntax:** `switch!(TARGET, PAT1 => THEN1 | PAT2 => THEN2 | ...)`
@@ -1223,3 +1211,20 @@
}
};
}
+
+#[macro_export]
+macro_rules! input_end {
+ ($i:expr,) => {
+ $crate::input_end($i)
+ };
+}
+
+// Not a public API
+#[doc(hidden)]
+pub fn input_end(input: &[TokenTree]) -> IResult<&'static [TokenTree], &'static str> {
+ if input.is_empty() {
+ IResult::Done(&[], "")
+ } else {
+ IResult::Error
+ }
+}
diff --git a/synom/src/space.rs b/synom/src/space.rs
deleted file mode 100644
index 5237522..0000000
--- a/synom/src/space.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-use IResult;
-use unicode_xid::UnicodeXID;
-
-pub fn whitespace(input: &str) -> IResult<&str, ()> {
- if input.is_empty() {
- return IResult::Error;
- }
-
- let bytes = input.as_bytes();
- let mut i = 0;
- while i < bytes.len() {
- let s = &input[i..];
- if bytes[i] == b'/' {
- if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
- !s.starts_with("//!") {
- if let Some(len) = s.find('\n') {
- i += len + 1;
- continue;
- }
- break;
- } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
- !s.starts_with("/*!") {
- match block_comment(s) {
- IResult::Done(_, com) => {
- i += com.len();
- continue;
- }
- IResult::Error => {
- return IResult::Error;
- }
- }
- }
- }
- match bytes[i] {
- b' ' | 0x09...0x0d => {
- i += 1;
- continue;
- }
- b if b <= 0x7f => {}
- _ => {
- let ch = s.chars().next().unwrap();
- if is_whitespace(ch) {
- i += ch.len_utf8();
- continue;
- }
- }
- }
- return if i > 0 {
- IResult::Done(s, ())
- } else {
- IResult::Error
- };
- }
- IResult::Done("", ())
-}
-
-pub fn block_comment(input: &str) -> IResult<&str, &str> {
- if !input.starts_with("/*") {
- return IResult::Error;
- }
-
- let mut depth = 0;
- let bytes = input.as_bytes();
- let mut i = 0;
- let upper = bytes.len() - 1;
- while i < upper {
- if bytes[i] == b'/' && bytes[i + 1] == b'*' {
- depth += 1;
- i += 1; // eat '*'
- } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
- depth -= 1;
- if depth == 0 {
- return IResult::Done(&input[i + 2..], &input[..i + 2]);
- }
- i += 1; // eat '/'
- }
- i += 1;
- }
- IResult::Error
-}
-
-pub fn word_break(input: &str) -> IResult<&str, ()> {
- match input.chars().next() {
- Some(ch) if UnicodeXID::is_xid_continue(ch) => IResult::Error,
- Some(_) | None => IResult::Done(input, ()),
- }
-}
-
-pub fn skip_whitespace(input: &str) -> &str {
- match whitespace(input) {
- IResult::Done(rest, _) => rest,
- IResult::Error => input,
- }
-}
-
-fn is_whitespace(ch: char) -> bool {
- // Rust treats left-to-right mark and right-to-left mark as whitespace
- ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
-}