David Tolnay | b103266 | 2017-05-31 15:52:28 -0700 | [diff] [blame^] | 1 | //! Adapted from [`nom`](https://github.com/Geal/nom) by removing the |
| 2 | //! `IResult::Incomplete` variant. |
| 3 | |
| 4 | use unicode_xid::UnicodeXID; |
| 5 | |
| 6 | pub enum IResult<I, O> { |
| 7 | /// Parsing succeeded. The first field contains the rest of the unparsed |
| 8 | /// data and the second field contains the parse result. |
| 9 | Done(I, O), |
| 10 | /// Parsing failed. |
| 11 | Error, |
| 12 | } |
| 13 | |
| 14 | pub fn whitespace(input: &str) -> IResult<&str, ()> { |
| 15 | if input.is_empty() { |
| 16 | return IResult::Error; |
| 17 | } |
| 18 | |
| 19 | let bytes = input.as_bytes(); |
| 20 | let mut i = 0; |
| 21 | while i < bytes.len() { |
| 22 | let s = &input[i..]; |
| 23 | if bytes[i] == b'/' { |
| 24 | if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) && |
| 25 | !s.starts_with("//!") { |
| 26 | if let Some(len) = s.find('\n') { |
| 27 | i += len + 1; |
| 28 | continue; |
| 29 | } |
| 30 | break; |
| 31 | } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) && |
| 32 | !s.starts_with("/*!") { |
| 33 | match block_comment(s) { |
| 34 | IResult::Done(_, com) => { |
| 35 | i += com.len(); |
| 36 | continue; |
| 37 | } |
| 38 | IResult::Error => { |
| 39 | return IResult::Error; |
| 40 | } |
| 41 | } |
| 42 | } |
| 43 | } |
| 44 | match bytes[i] { |
| 45 | b' ' | 0x09...0x0d => { |
| 46 | i += 1; |
| 47 | continue; |
| 48 | } |
| 49 | b if b <= 0x7f => {} |
| 50 | _ => { |
| 51 | let ch = s.chars().next().unwrap(); |
| 52 | if is_whitespace(ch) { |
| 53 | i += ch.len_utf8(); |
| 54 | continue; |
| 55 | } |
| 56 | } |
| 57 | } |
| 58 | return if i > 0 { |
| 59 | IResult::Done(s, ()) |
| 60 | } else { |
| 61 | IResult::Error |
| 62 | }; |
| 63 | } |
| 64 | IResult::Done("", ()) |
| 65 | } |
| 66 | |
| 67 | pub fn block_comment(input: &str) -> IResult<&str, &str> { |
| 68 | if !input.starts_with("/*") { |
| 69 | return IResult::Error; |
| 70 | } |
| 71 | |
| 72 | let mut depth = 0; |
| 73 | let bytes = input.as_bytes(); |
| 74 | let mut i = 0; |
| 75 | let upper = bytes.len() - 1; |
| 76 | while i < upper { |
| 77 | if bytes[i] == b'/' && bytes[i + 1] == b'*' { |
| 78 | depth += 1; |
| 79 | i += 1; // eat '*' |
| 80 | } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { |
| 81 | depth -= 1; |
| 82 | if depth == 0 { |
| 83 | return IResult::Done(&input[i + 2..], &input[..i + 2]); |
| 84 | } |
| 85 | i += 1; // eat '/' |
| 86 | } |
| 87 | i += 1; |
| 88 | } |
| 89 | IResult::Error |
| 90 | } |
| 91 | |
| 92 | pub fn skip_whitespace(input: &str) -> &str { |
| 93 | match whitespace(input) { |
| 94 | IResult::Done(rest, _) => rest, |
| 95 | IResult::Error => input, |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | fn is_whitespace(ch: char) -> bool { |
| 100 | // Rust treats left-to-right mark and right-to-left mark as whitespace |
| 101 | ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' |
| 102 | } |
| 103 | |
| 104 | fn word_break(input: &str) -> IResult<&str, ()> { |
| 105 | match input.chars().next() { |
| 106 | Some(ch) if UnicodeXID::is_xid_continue(ch) => IResult::Error, |
| 107 | Some(_) | None => IResult::Done(input, ()), |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | macro_rules! named { |
| 112 | ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => { |
| 113 | fn $name(i: &str) -> $crate::strnom::IResult<&str, $o> { |
| 114 | $submac!(i, $($args)*) |
| 115 | } |
| 116 | }; |
| 117 | } |
| 118 | |
| 119 | macro_rules! alt { |
| 120 | ($i:expr, $e:ident | $($rest:tt)*) => { |
| 121 | alt!($i, call!($e) | $($rest)*) |
| 122 | }; |
| 123 | |
| 124 | ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => { |
| 125 | match $subrule!($i, $($args)*) { |
| 126 | res @ $crate::strnom::IResult::Done(_, _) => res, |
| 127 | _ => alt!($i, $($rest)*) |
| 128 | } |
| 129 | }; |
| 130 | |
| 131 | ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => { |
| 132 | match $subrule!($i, $($args)*) { |
| 133 | $crate::strnom::IResult::Done(i, o) => $crate::strnom::IResult::Done(i, $gen(o)), |
| 134 | $crate::strnom::IResult::Error => alt!($i, $($rest)*) |
| 135 | } |
| 136 | }; |
| 137 | |
| 138 | ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => { |
| 139 | alt!($i, call!($e) => { $gen } | $($rest)*) |
| 140 | }; |
| 141 | |
| 142 | ($i:expr, $e:ident => { $gen:expr }) => { |
| 143 | alt!($i, call!($e) => { $gen }) |
| 144 | }; |
| 145 | |
| 146 | ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => { |
| 147 | match $subrule!($i, $($args)*) { |
| 148 | $crate::strnom::IResult::Done(i, o) => $crate::strnom::IResult::Done(i, $gen(o)), |
| 149 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 150 | } |
| 151 | }; |
| 152 | |
| 153 | ($i:expr, $e:ident) => { |
| 154 | alt!($i, call!($e)) |
| 155 | }; |
| 156 | |
| 157 | ($i:expr, $subrule:ident!( $($args:tt)*)) => { |
| 158 | $subrule!($i, $($args)*) |
| 159 | }; |
| 160 | } |
| 161 | |
| 162 | macro_rules! do_parse { |
| 163 | ($i:expr, ( $($rest:expr),* )) => { |
| 164 | $crate::strnom::IResult::Done($i, ( $($rest),* )) |
| 165 | }; |
| 166 | |
| 167 | ($i:expr, $e:ident >> $($rest:tt)*) => { |
| 168 | do_parse!($i, call!($e) >> $($rest)*) |
| 169 | }; |
| 170 | |
| 171 | ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => { |
| 172 | match $submac!($i, $($args)*) { |
| 173 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 174 | $crate::strnom::IResult::Done(i, _) => |
| 175 | do_parse!(i, $($rest)*), |
| 176 | } |
| 177 | }; |
| 178 | |
| 179 | ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => { |
| 180 | do_parse!($i, $field: call!($e) >> $($rest)*) |
| 181 | }; |
| 182 | |
| 183 | ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => { |
| 184 | match $submac!($i, $($args)*) { |
| 185 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 186 | $crate::strnom::IResult::Done(i, o) => { |
| 187 | let $field = o; |
| 188 | do_parse!(i, $($rest)*) |
| 189 | }, |
| 190 | } |
| 191 | }; |
| 192 | } |
| 193 | |
| 194 | macro_rules! peek { |
| 195 | ($i:expr, $submac:ident!( $($args:tt)* )) => { |
| 196 | match $submac!($i, $($args)*) { |
| 197 | $crate::strnom::IResult::Done(_, o) => $crate::strnom::IResult::Done($i, o), |
| 198 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 199 | } |
| 200 | }; |
| 201 | } |
| 202 | |
| 203 | macro_rules! call { |
| 204 | ($i:expr, $fun:expr $(, $args:expr)*) => { |
| 205 | $fun($i $(, $args)*) |
| 206 | }; |
| 207 | } |
| 208 | |
| 209 | macro_rules! option { |
| 210 | ($i:expr, $f:expr) => { |
| 211 | match $f($i) { |
| 212 | $crate::strnom::IResult::Done(i, o) => $crate::strnom::IResult::Done(i, Some(o)), |
| 213 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Done($i, None), |
| 214 | } |
| 215 | }; |
| 216 | } |
| 217 | |
| 218 | macro_rules! take_until { |
| 219 | ($i:expr, $substr:expr) => {{ |
| 220 | if $substr.len() > $i.len() { |
| 221 | $crate::strnom::IResult::Error |
| 222 | } else { |
| 223 | let substr_vec: Vec<char> = $substr.chars().collect(); |
| 224 | let mut window: Vec<char> = vec![]; |
| 225 | let mut offset = $i.len(); |
| 226 | let mut parsed = false; |
| 227 | for (o, c) in $i.char_indices() { |
| 228 | window.push(c); |
| 229 | if window.len() > substr_vec.len() { |
| 230 | window.remove(0); |
| 231 | } |
| 232 | if window == substr_vec { |
| 233 | parsed = true; |
| 234 | window.pop(); |
| 235 | let window_len: usize = window.iter() |
| 236 | .map(|x| x.len_utf8()) |
| 237 | .fold(0, |x, y| x + y); |
| 238 | offset = o - window_len; |
| 239 | break; |
| 240 | } |
| 241 | } |
| 242 | if parsed { |
| 243 | $crate::strnom::IResult::Done(&$i[offset..], &$i[..offset]) |
| 244 | } else { |
| 245 | $crate::strnom::IResult::Error |
| 246 | } |
| 247 | } |
| 248 | }}; |
| 249 | } |
| 250 | |
| 251 | macro_rules! tuple { |
| 252 | ($i:expr, $($rest:tt)*) => { |
| 253 | tuple_parser!($i, (), $($rest)*) |
| 254 | }; |
| 255 | } |
| 256 | |
| 257 | /// Do not use directly. Use `tuple!`. |
| 258 | macro_rules! tuple_parser { |
| 259 | ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => { |
| 260 | tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*) |
| 261 | }; |
| 262 | |
| 263 | ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => { |
| 264 | match $submac!($i, $($args)*) { |
| 265 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 266 | $crate::strnom::IResult::Done(i, o) => |
| 267 | tuple_parser!(i, (o), $($rest)*), |
| 268 | } |
| 269 | }; |
| 270 | |
| 271 | ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => { |
| 272 | match $submac!($i, $($args)*) { |
| 273 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 274 | $crate::strnom::IResult::Done(i, o) => |
| 275 | tuple_parser!(i, ($($parsed)* , o), $($rest)*), |
| 276 | } |
| 277 | }; |
| 278 | |
| 279 | ($i:expr, ($($parsed:tt),*), $e:ident) => { |
| 280 | tuple_parser!($i, ($($parsed),*), call!($e)) |
| 281 | }; |
| 282 | |
| 283 | ($i:expr, (), $submac:ident!( $($args:tt)* )) => { |
| 284 | $submac!($i, $($args)*) |
| 285 | }; |
| 286 | |
| 287 | ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => { |
| 288 | match $submac!($i, $($args)*) { |
| 289 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 290 | $crate::strnom::IResult::Done(i, o) => $crate::strnom::IResult::Done(i, ($($parsed),*, o)) |
| 291 | } |
| 292 | }; |
| 293 | |
| 294 | ($i:expr, ($($parsed:expr),*)) => { |
| 295 | $crate::strnom::IResult::Done($i, ($($parsed),*)) |
| 296 | }; |
| 297 | } |
| 298 | |
| 299 | macro_rules! not { |
| 300 | ($i:expr, $submac:ident!( $($args:tt)* )) => { |
| 301 | match $submac!($i, $($args)*) { |
| 302 | $crate::strnom::IResult::Done(_, _) => $crate::strnom::IResult::Error, |
| 303 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Done($i, ()), |
| 304 | } |
| 305 | }; |
| 306 | } |
| 307 | |
| 308 | macro_rules! tag { |
| 309 | ($i:expr, $tag:expr) => { |
| 310 | if $i.starts_with($tag) { |
| 311 | $crate::strnom::IResult::Done(&$i[$tag.len()..], &$i[..$tag.len()]) |
| 312 | } else { |
| 313 | $crate::strnom::IResult::Error |
| 314 | } |
| 315 | }; |
| 316 | } |
| 317 | |
| 318 | macro_rules! punct { |
| 319 | ($i:expr, $punct:expr) => { |
| 320 | $crate::strnom::punct($i, $punct) |
| 321 | }; |
| 322 | } |
| 323 | |
| 324 | /// Do not use directly. Use `punct!`. |
| 325 | pub fn punct<'a>(input: &'a str, token: &'static str) -> IResult<&'a str, &'a str> { |
| 326 | let input = skip_whitespace(input); |
| 327 | if input.starts_with(token) { |
| 328 | IResult::Done(&input[token.len()..], token) |
| 329 | } else { |
| 330 | IResult::Error |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | macro_rules! keyword { |
| 335 | ($i:expr, $keyword:expr) => { |
| 336 | $crate::strnom::keyword($i, $keyword) |
| 337 | }; |
| 338 | } |
| 339 | |
| 340 | /// Do not use directly. Use `keyword!`. |
| 341 | pub fn keyword<'a>(input: &'a str, token: &'static str) -> IResult<&'a str, &'a str> { |
| 342 | match punct(input, token) { |
| 343 | IResult::Done(rest, _) => { |
| 344 | match word_break(rest) { |
| 345 | IResult::Done(_, _) => IResult::Done(rest, token), |
| 346 | IResult::Error => IResult::Error, |
| 347 | } |
| 348 | } |
| 349 | IResult::Error => IResult::Error, |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | macro_rules! epsilon { |
| 354 | ($i:expr,) => { |
| 355 | $crate::strnom::IResult::Done($i, ()) |
| 356 | }; |
| 357 | } |
| 358 | |
| 359 | macro_rules! preceded { |
| 360 | ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => { |
| 361 | match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) { |
| 362 | $crate::strnom::IResult::Done(remaining, (_, o)) => $crate::strnom::IResult::Done(remaining, o), |
| 363 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 364 | } |
| 365 | }; |
| 366 | |
| 367 | ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => { |
| 368 | preceded!($i, $submac!($($args)*), call!($g)) |
| 369 | }; |
| 370 | } |
| 371 | |
| 372 | macro_rules! delimited { |
| 373 | ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => { |
| 374 | match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) { |
| 375 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 376 | $crate::strnom::IResult::Done(i1, (_, o, _)) => $crate::strnom::IResult::Done(i1, o) |
| 377 | } |
| 378 | }; |
| 379 | } |
| 380 | |
| 381 | macro_rules! map { |
| 382 | ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => { |
| 383 | match $submac!($i, $($args)*) { |
| 384 | $crate::strnom::IResult::Error => $crate::strnom::IResult::Error, |
| 385 | $crate::strnom::IResult::Done(i, o) => { |
| 386 | $crate::strnom::IResult::Done(i, call!(o, $g)) |
| 387 | } |
| 388 | } |
| 389 | }; |
| 390 | |
| 391 | ($i:expr, $f:expr, $g:expr) => { |
| 392 | map!($i, call!($f), $g) |
| 393 | }; |
| 394 | } |
| 395 | |
| 396 | macro_rules! many0 { |
| 397 | ($i:expr, $f:expr) => {{ |
| 398 | let ret; |
| 399 | let mut res = ::std::vec::Vec::new(); |
| 400 | let mut input = $i; |
| 401 | |
| 402 | loop { |
| 403 | if input.is_empty() { |
| 404 | ret = $crate::strnom::IResult::Done(input, res); |
| 405 | break; |
| 406 | } |
| 407 | |
| 408 | match $f(input) { |
| 409 | $crate::strnom::IResult::Error => { |
| 410 | ret = $crate::strnom::IResult::Done(input, res); |
| 411 | break; |
| 412 | } |
| 413 | $crate::strnom::IResult::Done(i, o) => { |
| 414 | // loop trip must always consume (otherwise infinite loops) |
| 415 | if i.len() == input.len() { |
| 416 | ret = $crate::strnom::IResult::Error; |
| 417 | break; |
| 418 | } |
| 419 | |
| 420 | res.push(o); |
| 421 | input = i; |
| 422 | } |
| 423 | } |
| 424 | } |
| 425 | |
| 426 | ret |
| 427 | }}; |
| 428 | } |