blob: 5f23803cdcd85a8f4674723a7ebb2b3b66e88bc0 [file] [log] [blame]
David Tolnay1218e122017-06-01 11:13:45 -07001//! Adapted from [`nom`](https://github.com/Geal/nom).
David Tolnayb1032662017-05-31 15:52:28 -07002
Nika Layzellf8d5f212017-12-11 14:07:02 -05003use std::str::{Chars, CharIndices, Bytes};
4
David Tolnayb1032662017-05-31 15:52:28 -07005use unicode_xid::UnicodeXID;
6
David Tolnay1218e122017-06-01 11:13:45 -07007use imp::LexError;
David Tolnayb1032662017-05-31 15:52:28 -07008
Nika Layzellf8d5f212017-12-11 14:07:02 -05009#[derive(Copy, Clone, Eq, PartialEq)]
10pub struct Cursor<'a> {
11 pub rest: &'a str,
David Tolnayddfca052017-12-31 10:41:24 -050012 #[cfg(procmacro2_unstable)]
Nika Layzellf8d5f212017-12-11 14:07:02 -050013 pub off: u32,
14}
David Tolnay1218e122017-06-01 11:13:45 -070015
Nika Layzellf8d5f212017-12-11 14:07:02 -050016impl<'a> Cursor<'a> {
17 pub fn advance(&self, amt: usize) -> Cursor<'a> {
18 Cursor {
19 rest: &self.rest[amt..],
David Tolnayddfca052017-12-31 10:41:24 -050020 #[cfg(procmacro2_unstable)]
Nika Layzellf8d5f212017-12-11 14:07:02 -050021 off: self.off + (amt as u32),
22 }
23 }
24
25 pub fn find(&self, p: char) -> Option<usize> {
26 self.rest.find(p)
27 }
28
29 pub fn starts_with(&self, s: &str) -> bool {
30 self.rest.starts_with(s)
31 }
32
33 pub fn is_empty(&self) -> bool {
34 self.rest.is_empty()
35 }
36
37 pub fn len(&self) -> usize {
38 self.rest.len()
39 }
40
41 pub fn as_bytes(&self) -> &'a [u8] {
42 self.rest.as_bytes()
43 }
44
45 pub fn bytes(&self) -> Bytes<'a> {
46 self.rest.bytes()
47 }
48
49 pub fn chars(&self) -> Chars<'a> {
50 self.rest.chars()
51 }
52
53 pub fn char_indices(&self) -> CharIndices<'a> {
54 self.rest.char_indices()
55 }
56}
57
58pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
59
60pub fn whitespace(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -070061 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -070062 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -070063 }
64
65 let bytes = input.as_bytes();
66 let mut i = 0;
67 while i < bytes.len() {
Nika Layzellf8d5f212017-12-11 14:07:02 -050068 let s = input.advance(i);
David Tolnayb1032662017-05-31 15:52:28 -070069 if bytes[i] == b'/' {
70 if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
71 !s.starts_with("//!") {
72 if let Some(len) = s.find('\n') {
73 i += len + 1;
74 continue;
75 }
76 break;
77 } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
78 !s.starts_with("/*!") {
David Tolnay1218e122017-06-01 11:13:45 -070079 let (_, com) = block_comment(s)?;
80 i += com.len();
81 continue;
David Tolnayb1032662017-05-31 15:52:28 -070082 }
83 }
84 match bytes[i] {
85 b' ' | 0x09...0x0d => {
86 i += 1;
87 continue;
88 }
89 b if b <= 0x7f => {}
90 _ => {
91 let ch = s.chars().next().unwrap();
92 if is_whitespace(ch) {
93 i += ch.len_utf8();
94 continue;
95 }
96 }
97 }
98 return if i > 0 {
David Tolnay1218e122017-06-01 11:13:45 -070099 Ok((s, ()))
David Tolnayb1032662017-05-31 15:52:28 -0700100 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700101 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700102 };
103 }
Nika Layzellf8d5f212017-12-11 14:07:02 -0500104 Ok((input.advance(input.len()), ()))
David Tolnayb1032662017-05-31 15:52:28 -0700105}
106
Nika Layzellf8d5f212017-12-11 14:07:02 -0500107pub fn block_comment(input: Cursor) -> PResult<&str> {
David Tolnayb1032662017-05-31 15:52:28 -0700108 if !input.starts_with("/*") {
David Tolnay1218e122017-06-01 11:13:45 -0700109 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700110 }
111
112 let mut depth = 0;
113 let bytes = input.as_bytes();
114 let mut i = 0;
115 let upper = bytes.len() - 1;
116 while i < upper {
117 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
118 depth += 1;
119 i += 1; // eat '*'
120 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
121 depth -= 1;
122 if depth == 0 {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500123 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
David Tolnayb1032662017-05-31 15:52:28 -0700124 }
125 i += 1; // eat '/'
126 }
127 i += 1;
128 }
David Tolnay1218e122017-06-01 11:13:45 -0700129 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700130}
131
Nika Layzellf8d5f212017-12-11 14:07:02 -0500132pub fn skip_whitespace(input: Cursor) -> Cursor {
David Tolnayb1032662017-05-31 15:52:28 -0700133 match whitespace(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700134 Ok((rest, _)) => rest,
135 Err(LexError) => input,
David Tolnayb1032662017-05-31 15:52:28 -0700136 }
137}
138
139fn is_whitespace(ch: char) -> bool {
140 // Rust treats left-to-right mark and right-to-left mark as whitespace
141 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
142}
143
Nika Layzellf8d5f212017-12-11 14:07:02 -0500144pub fn word_break(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -0700145 match input.chars().next() {
David Tolnay1218e122017-06-01 11:13:45 -0700146 Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
147 Some(_) | None => Ok((input, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700148 }
149}
150
151macro_rules! named {
152 ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500153 fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> {
David Tolnayb1032662017-05-31 15:52:28 -0700154 $submac!(i, $($args)*)
155 }
156 };
157}
158
159macro_rules! alt {
160 ($i:expr, $e:ident | $($rest:tt)*) => {
161 alt!($i, call!($e) | $($rest)*)
162 };
163
164 ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
165 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700166 res @ Ok(_) => res,
David Tolnayb1032662017-05-31 15:52:28 -0700167 _ => alt!($i, $($rest)*)
168 }
169 };
170
171 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => {
172 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700173 Ok((i, o)) => Ok((i, $gen(o))),
174 Err(LexError) => alt!($i, $($rest)*)
David Tolnayb1032662017-05-31 15:52:28 -0700175 }
176 };
177
178 ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => {
179 alt!($i, call!($e) => { $gen } | $($rest)*)
180 };
181
182 ($i:expr, $e:ident => { $gen:expr }) => {
183 alt!($i, call!($e) => { $gen })
184 };
185
186 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => {
187 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700188 Ok((i, o)) => Ok((i, $gen(o))),
189 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700190 }
191 };
192
193 ($i:expr, $e:ident) => {
194 alt!($i, call!($e))
195 };
196
197 ($i:expr, $subrule:ident!( $($args:tt)*)) => {
198 $subrule!($i, $($args)*)
199 };
200}
201
202macro_rules! do_parse {
203 ($i:expr, ( $($rest:expr),* )) => {
David Tolnay1218e122017-06-01 11:13:45 -0700204 Ok(($i, ( $($rest),* )))
David Tolnayb1032662017-05-31 15:52:28 -0700205 };
206
207 ($i:expr, $e:ident >> $($rest:tt)*) => {
208 do_parse!($i, call!($e) >> $($rest)*)
209 };
210
211 ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
212 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700213 Err(LexError) => Err(LexError),
214 Ok((i, _)) => do_parse!(i, $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700215 }
216 };
217
218 ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => {
219 do_parse!($i, $field: call!($e) >> $($rest)*)
220 };
221
222 ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
223 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700224 Err(LexError) => Err(LexError),
225 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700226 let $field = o;
227 do_parse!(i, $($rest)*)
228 },
229 }
230 };
231}
232
233macro_rules! peek {
234 ($i:expr, $submac:ident!( $($args:tt)* )) => {
235 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700236 Ok((_, o)) => Ok(($i, o)),
237 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700238 }
239 };
240}
241
242macro_rules! call {
243 ($i:expr, $fun:expr $(, $args:expr)*) => {
244 $fun($i $(, $args)*)
245 };
246}
247
248macro_rules! option {
249 ($i:expr, $f:expr) => {
250 match $f($i) {
David Tolnay1218e122017-06-01 11:13:45 -0700251 Ok((i, o)) => Ok((i, Some(o))),
252 Err(LexError) => Ok(($i, None)),
David Tolnayb1032662017-05-31 15:52:28 -0700253 }
254 };
255}
256
257macro_rules! take_until {
258 ($i:expr, $substr:expr) => {{
259 if $substr.len() > $i.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700260 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700261 } else {
262 let substr_vec: Vec<char> = $substr.chars().collect();
263 let mut window: Vec<char> = vec![];
264 let mut offset = $i.len();
265 let mut parsed = false;
266 for (o, c) in $i.char_indices() {
267 window.push(c);
268 if window.len() > substr_vec.len() {
269 window.remove(0);
270 }
271 if window == substr_vec {
272 parsed = true;
273 window.pop();
274 let window_len: usize = window.iter()
275 .map(|x| x.len_utf8())
276 .fold(0, |x, y| x + y);
277 offset = o - window_len;
278 break;
279 }
280 }
281 if parsed {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500282 Ok(($i.advance(offset), &$i.rest[..offset]))
David Tolnayb1032662017-05-31 15:52:28 -0700283 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700284 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700285 }
286 }
287 }};
288}
289
290macro_rules! tuple {
291 ($i:expr, $($rest:tt)*) => {
292 tuple_parser!($i, (), $($rest)*)
293 };
294}
295
296/// Do not use directly. Use `tuple!`.
297macro_rules! tuple_parser {
298 ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => {
299 tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*)
300 };
301
302 ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
303 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700304 Err(LexError) => Err(LexError),
305 Ok((i, o)) => tuple_parser!(i, (o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700306 }
307 };
308
309 ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
310 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700311 Err(LexError) => Err(LexError),
312 Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700313 }
314 };
315
316 ($i:expr, ($($parsed:tt),*), $e:ident) => {
317 tuple_parser!($i, ($($parsed),*), call!($e))
318 };
319
320 ($i:expr, (), $submac:ident!( $($args:tt)* )) => {
321 $submac!($i, $($args)*)
322 };
323
324 ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => {
325 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700326 Err(LexError) => Err(LexError),
327 Ok((i, o)) => Ok((i, ($($parsed),*, o)))
David Tolnayb1032662017-05-31 15:52:28 -0700328 }
329 };
330
331 ($i:expr, ($($parsed:expr),*)) => {
David Tolnay1218e122017-06-01 11:13:45 -0700332 Ok(($i, ($($parsed),*)))
David Tolnayb1032662017-05-31 15:52:28 -0700333 };
334}
335
336macro_rules! not {
337 ($i:expr, $submac:ident!( $($args:tt)* )) => {
338 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700339 Ok((_, _)) => Err(LexError),
340 Err(LexError) => Ok(($i, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700341 }
342 };
343}
344
345macro_rules! tag {
346 ($i:expr, $tag:expr) => {
347 if $i.starts_with($tag) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500348 Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()]))
David Tolnayb1032662017-05-31 15:52:28 -0700349 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700350 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700351 }
352 };
353}
354
355macro_rules! punct {
356 ($i:expr, $punct:expr) => {
357 $crate::strnom::punct($i, $punct)
358 };
359}
360
361/// Do not use directly. Use `punct!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500362pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700363 let input = skip_whitespace(input);
364 if input.starts_with(token) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500365 Ok((input.advance(token.len()), token))
David Tolnayb1032662017-05-31 15:52:28 -0700366 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700367 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700368 }
369}
370
371macro_rules! keyword {
372 ($i:expr, $keyword:expr) => {
373 $crate::strnom::keyword($i, $keyword)
374 };
375}
376
377/// Do not use directly. Use `keyword!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500378pub fn keyword<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700379 match punct(input, token) {
David Tolnay1218e122017-06-01 11:13:45 -0700380 Ok((rest, _)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700381 match word_break(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700382 Ok((_, _)) => Ok((rest, token)),
383 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700384 }
385 }
David Tolnay1218e122017-06-01 11:13:45 -0700386 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700387 }
388}
389
David Tolnayb1032662017-05-31 15:52:28 -0700390macro_rules! preceded {
391 ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => {
392 match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) {
David Tolnay1218e122017-06-01 11:13:45 -0700393 Ok((remaining, (_, o))) => Ok((remaining, o)),
394 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700395 }
396 };
397
398 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
399 preceded!($i, $submac!($($args)*), call!($g))
400 };
401}
402
403macro_rules! delimited {
404 ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => {
405 match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700406 Err(LexError) => Err(LexError),
407 Ok((i1, (_, o, _))) => Ok((i1, o))
David Tolnayb1032662017-05-31 15:52:28 -0700408 }
409 };
410}
411
412macro_rules! map {
413 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
414 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700415 Err(LexError) => Err(LexError),
416 Ok((i, o)) => Ok((i, call!(o, $g)))
David Tolnayb1032662017-05-31 15:52:28 -0700417 }
418 };
419
420 ($i:expr, $f:expr, $g:expr) => {
421 map!($i, call!($f), $g)
422 };
423}
424
425macro_rules! many0 {
426 ($i:expr, $f:expr) => {{
427 let ret;
428 let mut res = ::std::vec::Vec::new();
429 let mut input = $i;
430
431 loop {
432 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -0700433 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700434 break;
435 }
436
437 match $f(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700438 Err(LexError) => {
439 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700440 break;
441 }
David Tolnay1218e122017-06-01 11:13:45 -0700442 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700443 // loop trip must always consume (otherwise infinite loops)
444 if i.len() == input.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700445 ret = Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700446 break;
447 }
448
449 res.push(o);
450 input = i;
451 }
452 }
453 }
454
455 ret
456 }};
457}