blob: 35acff72317bfa98a7b5236d71fe5bedbb4bf724 [file] [log] [blame]
David Tolnay1218e122017-06-01 11:13:45 -07001//! Adapted from [`nom`](https://github.com/Geal/nom).
David Tolnayb1032662017-05-31 15:52:28 -07002
Nika Layzellf8d5f212017-12-11 14:07:02 -05003use std::str::{Chars, CharIndices, Bytes};
4
David Tolnayb1032662017-05-31 15:52:28 -07005use unicode_xid::UnicodeXID;
6
David Tolnay1218e122017-06-01 11:13:45 -07007use imp::LexError;
David Tolnayb1032662017-05-31 15:52:28 -07008
Nika Layzellf8d5f212017-12-11 14:07:02 -05009#[derive(Copy, Clone, Eq, PartialEq)]
10pub struct Cursor<'a> {
11 pub rest: &'a str,
David Tolnay1ebe3972018-01-02 20:14:20 -080012 #[cfg(procmacro2_semver_exempt)]
Nika Layzellf8d5f212017-12-11 14:07:02 -050013 pub off: u32,
14}
David Tolnay1218e122017-06-01 11:13:45 -070015
Nika Layzellf8d5f212017-12-11 14:07:02 -050016impl<'a> Cursor<'a> {
David Tolnay1ebe3972018-01-02 20:14:20 -080017 #[cfg(not(procmacro2_semver_exempt))]
Nika Layzellf8d5f212017-12-11 14:07:02 -050018 pub fn advance(&self, amt: usize) -> Cursor<'a> {
19 Cursor {
20 rest: &self.rest[amt..],
David Tolnay79105e52017-12-31 11:03:04 -050021 }
22 }
David Tolnay1ebe3972018-01-02 20:14:20 -080023 #[cfg(procmacro2_semver_exempt)]
David Tolnay79105e52017-12-31 11:03:04 -050024 pub fn advance(&self, amt: usize) -> Cursor<'a> {
25 Cursor {
26 rest: &self.rest[amt..],
Nika Layzellf8d5f212017-12-11 14:07:02 -050027 off: self.off + (amt as u32),
28 }
29 }
30
31 pub fn find(&self, p: char) -> Option<usize> {
32 self.rest.find(p)
33 }
34
35 pub fn starts_with(&self, s: &str) -> bool {
36 self.rest.starts_with(s)
37 }
38
39 pub fn is_empty(&self) -> bool {
40 self.rest.is_empty()
41 }
42
43 pub fn len(&self) -> usize {
44 self.rest.len()
45 }
46
47 pub fn as_bytes(&self) -> &'a [u8] {
48 self.rest.as_bytes()
49 }
50
51 pub fn bytes(&self) -> Bytes<'a> {
52 self.rest.bytes()
53 }
54
55 pub fn chars(&self) -> Chars<'a> {
56 self.rest.chars()
57 }
58
59 pub fn char_indices(&self) -> CharIndices<'a> {
60 self.rest.char_indices()
61 }
62}
63
64pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
65
66pub fn whitespace(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -070067 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -070068 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -070069 }
70
71 let bytes = input.as_bytes();
72 let mut i = 0;
73 while i < bytes.len() {
Nika Layzellf8d5f212017-12-11 14:07:02 -050074 let s = input.advance(i);
David Tolnayb1032662017-05-31 15:52:28 -070075 if bytes[i] == b'/' {
76 if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
77 !s.starts_with("//!") {
78 if let Some(len) = s.find('\n') {
79 i += len + 1;
80 continue;
81 }
82 break;
83 } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
84 !s.starts_with("/*!") {
David Tolnay1218e122017-06-01 11:13:45 -070085 let (_, com) = block_comment(s)?;
86 i += com.len();
87 continue;
David Tolnayb1032662017-05-31 15:52:28 -070088 }
89 }
90 match bytes[i] {
91 b' ' | 0x09...0x0d => {
92 i += 1;
93 continue;
94 }
95 b if b <= 0x7f => {}
96 _ => {
97 let ch = s.chars().next().unwrap();
98 if is_whitespace(ch) {
99 i += ch.len_utf8();
100 continue;
101 }
102 }
103 }
104 return if i > 0 {
David Tolnay1218e122017-06-01 11:13:45 -0700105 Ok((s, ()))
David Tolnayb1032662017-05-31 15:52:28 -0700106 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700107 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700108 };
109 }
Nika Layzellf8d5f212017-12-11 14:07:02 -0500110 Ok((input.advance(input.len()), ()))
David Tolnayb1032662017-05-31 15:52:28 -0700111}
112
Nika Layzellf8d5f212017-12-11 14:07:02 -0500113pub fn block_comment(input: Cursor) -> PResult<&str> {
David Tolnayb1032662017-05-31 15:52:28 -0700114 if !input.starts_with("/*") {
David Tolnay1218e122017-06-01 11:13:45 -0700115 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700116 }
117
118 let mut depth = 0;
119 let bytes = input.as_bytes();
120 let mut i = 0;
121 let upper = bytes.len() - 1;
122 while i < upper {
123 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
124 depth += 1;
125 i += 1; // eat '*'
126 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
127 depth -= 1;
128 if depth == 0 {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500129 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
David Tolnayb1032662017-05-31 15:52:28 -0700130 }
131 i += 1; // eat '/'
132 }
133 i += 1;
134 }
David Tolnay1218e122017-06-01 11:13:45 -0700135 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700136}
137
Nika Layzellf8d5f212017-12-11 14:07:02 -0500138pub fn skip_whitespace(input: Cursor) -> Cursor {
David Tolnayb1032662017-05-31 15:52:28 -0700139 match whitespace(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700140 Ok((rest, _)) => rest,
141 Err(LexError) => input,
David Tolnayb1032662017-05-31 15:52:28 -0700142 }
143}
144
145fn is_whitespace(ch: char) -> bool {
146 // Rust treats left-to-right mark and right-to-left mark as whitespace
147 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
148}
149
Nika Layzellf8d5f212017-12-11 14:07:02 -0500150pub fn word_break(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -0700151 match input.chars().next() {
David Tolnay1218e122017-06-01 11:13:45 -0700152 Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
153 Some(_) | None => Ok((input, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700154 }
155}
156
157macro_rules! named {
158 ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500159 fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> {
David Tolnayb1032662017-05-31 15:52:28 -0700160 $submac!(i, $($args)*)
161 }
162 };
163}
164
165macro_rules! alt {
166 ($i:expr, $e:ident | $($rest:tt)*) => {
167 alt!($i, call!($e) | $($rest)*)
168 };
169
170 ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
171 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700172 res @ Ok(_) => res,
David Tolnayb1032662017-05-31 15:52:28 -0700173 _ => alt!($i, $($rest)*)
174 }
175 };
176
177 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => {
178 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700179 Ok((i, o)) => Ok((i, $gen(o))),
180 Err(LexError) => alt!($i, $($rest)*)
David Tolnayb1032662017-05-31 15:52:28 -0700181 }
182 };
183
184 ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => {
185 alt!($i, call!($e) => { $gen } | $($rest)*)
186 };
187
188 ($i:expr, $e:ident => { $gen:expr }) => {
189 alt!($i, call!($e) => { $gen })
190 };
191
192 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => {
193 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700194 Ok((i, o)) => Ok((i, $gen(o))),
195 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700196 }
197 };
198
199 ($i:expr, $e:ident) => {
200 alt!($i, call!($e))
201 };
202
203 ($i:expr, $subrule:ident!( $($args:tt)*)) => {
204 $subrule!($i, $($args)*)
205 };
206}
207
208macro_rules! do_parse {
209 ($i:expr, ( $($rest:expr),* )) => {
David Tolnay1218e122017-06-01 11:13:45 -0700210 Ok(($i, ( $($rest),* )))
David Tolnayb1032662017-05-31 15:52:28 -0700211 };
212
213 ($i:expr, $e:ident >> $($rest:tt)*) => {
214 do_parse!($i, call!($e) >> $($rest)*)
215 };
216
217 ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
218 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700219 Err(LexError) => Err(LexError),
220 Ok((i, _)) => do_parse!(i, $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700221 }
222 };
223
224 ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => {
225 do_parse!($i, $field: call!($e) >> $($rest)*)
226 };
227
228 ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
229 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700230 Err(LexError) => Err(LexError),
231 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700232 let $field = o;
233 do_parse!(i, $($rest)*)
234 },
235 }
236 };
237}
238
239macro_rules! peek {
240 ($i:expr, $submac:ident!( $($args:tt)* )) => {
241 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700242 Ok((_, o)) => Ok(($i, o)),
243 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700244 }
245 };
246}
247
248macro_rules! call {
249 ($i:expr, $fun:expr $(, $args:expr)*) => {
250 $fun($i $(, $args)*)
251 };
252}
253
254macro_rules! option {
255 ($i:expr, $f:expr) => {
256 match $f($i) {
David Tolnay1218e122017-06-01 11:13:45 -0700257 Ok((i, o)) => Ok((i, Some(o))),
258 Err(LexError) => Ok(($i, None)),
David Tolnayb1032662017-05-31 15:52:28 -0700259 }
260 };
261}
262
263macro_rules! take_until {
264 ($i:expr, $substr:expr) => {{
265 if $substr.len() > $i.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700266 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700267 } else {
268 let substr_vec: Vec<char> = $substr.chars().collect();
269 let mut window: Vec<char> = vec![];
270 let mut offset = $i.len();
271 let mut parsed = false;
272 for (o, c) in $i.char_indices() {
273 window.push(c);
274 if window.len() > substr_vec.len() {
275 window.remove(0);
276 }
277 if window == substr_vec {
278 parsed = true;
279 window.pop();
280 let window_len: usize = window.iter()
281 .map(|x| x.len_utf8())
282 .fold(0, |x, y| x + y);
283 offset = o - window_len;
284 break;
285 }
286 }
287 if parsed {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500288 Ok(($i.advance(offset), &$i.rest[..offset]))
David Tolnayb1032662017-05-31 15:52:28 -0700289 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700290 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700291 }
292 }
293 }};
294}
295
296macro_rules! tuple {
297 ($i:expr, $($rest:tt)*) => {
298 tuple_parser!($i, (), $($rest)*)
299 };
300}
301
302/// Do not use directly. Use `tuple!`.
303macro_rules! tuple_parser {
304 ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => {
305 tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*)
306 };
307
308 ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
309 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700310 Err(LexError) => Err(LexError),
311 Ok((i, o)) => tuple_parser!(i, (o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700312 }
313 };
314
315 ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
316 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700317 Err(LexError) => Err(LexError),
318 Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700319 }
320 };
321
322 ($i:expr, ($($parsed:tt),*), $e:ident) => {
323 tuple_parser!($i, ($($parsed),*), call!($e))
324 };
325
326 ($i:expr, (), $submac:ident!( $($args:tt)* )) => {
327 $submac!($i, $($args)*)
328 };
329
330 ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => {
331 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700332 Err(LexError) => Err(LexError),
333 Ok((i, o)) => Ok((i, ($($parsed),*, o)))
David Tolnayb1032662017-05-31 15:52:28 -0700334 }
335 };
336
337 ($i:expr, ($($parsed:expr),*)) => {
David Tolnay1218e122017-06-01 11:13:45 -0700338 Ok(($i, ($($parsed),*)))
David Tolnayb1032662017-05-31 15:52:28 -0700339 };
340}
341
342macro_rules! not {
343 ($i:expr, $submac:ident!( $($args:tt)* )) => {
344 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700345 Ok((_, _)) => Err(LexError),
346 Err(LexError) => Ok(($i, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700347 }
348 };
349}
350
351macro_rules! tag {
352 ($i:expr, $tag:expr) => {
353 if $i.starts_with($tag) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500354 Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()]))
David Tolnayb1032662017-05-31 15:52:28 -0700355 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700356 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700357 }
358 };
359}
360
361macro_rules! punct {
362 ($i:expr, $punct:expr) => {
363 $crate::strnom::punct($i, $punct)
364 };
365}
366
367/// Do not use directly. Use `punct!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500368pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700369 let input = skip_whitespace(input);
370 if input.starts_with(token) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500371 Ok((input.advance(token.len()), token))
David Tolnayb1032662017-05-31 15:52:28 -0700372 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700373 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700374 }
375}
376
377macro_rules! keyword {
378 ($i:expr, $keyword:expr) => {
379 $crate::strnom::keyword($i, $keyword)
380 };
381}
382
383/// Do not use directly. Use `keyword!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500384pub fn keyword<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700385 match punct(input, token) {
David Tolnay1218e122017-06-01 11:13:45 -0700386 Ok((rest, _)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700387 match word_break(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700388 Ok((_, _)) => Ok((rest, token)),
389 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700390 }
391 }
David Tolnay1218e122017-06-01 11:13:45 -0700392 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700393 }
394}
395
David Tolnayb1032662017-05-31 15:52:28 -0700396macro_rules! preceded {
397 ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => {
398 match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) {
David Tolnay1218e122017-06-01 11:13:45 -0700399 Ok((remaining, (_, o))) => Ok((remaining, o)),
400 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700401 }
402 };
403
404 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
405 preceded!($i, $submac!($($args)*), call!($g))
406 };
407}
408
409macro_rules! delimited {
410 ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => {
411 match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700412 Err(LexError) => Err(LexError),
413 Ok((i1, (_, o, _))) => Ok((i1, o))
David Tolnayb1032662017-05-31 15:52:28 -0700414 }
415 };
416}
417
418macro_rules! map {
419 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
420 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700421 Err(LexError) => Err(LexError),
422 Ok((i, o)) => Ok((i, call!(o, $g)))
David Tolnayb1032662017-05-31 15:52:28 -0700423 }
424 };
425
426 ($i:expr, $f:expr, $g:expr) => {
427 map!($i, call!($f), $g)
428 };
429}
430
431macro_rules! many0 {
432 ($i:expr, $f:expr) => {{
433 let ret;
434 let mut res = ::std::vec::Vec::new();
435 let mut input = $i;
436
437 loop {
438 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -0700439 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700440 break;
441 }
442
443 match $f(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700444 Err(LexError) => {
445 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700446 break;
447 }
David Tolnay1218e122017-06-01 11:13:45 -0700448 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700449 // loop trip must always consume (otherwise infinite loops)
450 if i.len() == input.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700451 ret = Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700452 break;
453 }
454
455 res.push(o);
456 input = i;
457 }
458 }
459 }
460
461 ret
462 }};
463}