blob: 1fddcd024c99f9f8fce18ba6842d5214e036bccf [file] [log] [blame]
David Tolnay1218e122017-06-01 11:13:45 -07001//! Adapted from [`nom`](https://github.com/Geal/nom).
David Tolnayb1032662017-05-31 15:52:28 -07002
David Tolnayb28f38a2018-03-31 22:02:29 +02003use std::str::{Bytes, CharIndices, Chars};
Nika Layzellf8d5f212017-12-11 14:07:02 -05004
David Tolnayb1032662017-05-31 15:52:28 -07005use unicode_xid::UnicodeXID;
6
David Tolnay1218e122017-06-01 11:13:45 -07007use imp::LexError;
David Tolnayb1032662017-05-31 15:52:28 -07008
Nika Layzellf8d5f212017-12-11 14:07:02 -05009#[derive(Copy, Clone, Eq, PartialEq)]
10pub struct Cursor<'a> {
11 pub rest: &'a str,
David Tolnay1ebe3972018-01-02 20:14:20 -080012 #[cfg(procmacro2_semver_exempt)]
Nika Layzellf8d5f212017-12-11 14:07:02 -050013 pub off: u32,
14}
David Tolnay1218e122017-06-01 11:13:45 -070015
Nika Layzellf8d5f212017-12-11 14:07:02 -050016impl<'a> Cursor<'a> {
David Tolnay1ebe3972018-01-02 20:14:20 -080017 #[cfg(not(procmacro2_semver_exempt))]
Nika Layzellf8d5f212017-12-11 14:07:02 -050018 pub fn advance(&self, amt: usize) -> Cursor<'a> {
19 Cursor {
20 rest: &self.rest[amt..],
David Tolnay79105e52017-12-31 11:03:04 -050021 }
22 }
David Tolnay1ebe3972018-01-02 20:14:20 -080023 #[cfg(procmacro2_semver_exempt)]
David Tolnay79105e52017-12-31 11:03:04 -050024 pub fn advance(&self, amt: usize) -> Cursor<'a> {
25 Cursor {
26 rest: &self.rest[amt..],
Nika Layzellf8d5f212017-12-11 14:07:02 -050027 off: self.off + (amt as u32),
28 }
29 }
30
31 pub fn find(&self, p: char) -> Option<usize> {
32 self.rest.find(p)
33 }
34
35 pub fn starts_with(&self, s: &str) -> bool {
36 self.rest.starts_with(s)
37 }
38
39 pub fn is_empty(&self) -> bool {
40 self.rest.is_empty()
41 }
42
43 pub fn len(&self) -> usize {
44 self.rest.len()
45 }
46
47 pub fn as_bytes(&self) -> &'a [u8] {
48 self.rest.as_bytes()
49 }
50
51 pub fn bytes(&self) -> Bytes<'a> {
52 self.rest.bytes()
53 }
54
55 pub fn chars(&self) -> Chars<'a> {
56 self.rest.chars()
57 }
58
59 pub fn char_indices(&self) -> CharIndices<'a> {
60 self.rest.char_indices()
61 }
62}
63
64pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
65
66pub fn whitespace(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -070067 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -070068 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -070069 }
70
71 let bytes = input.as_bytes();
72 let mut i = 0;
73 while i < bytes.len() {
Nika Layzellf8d5f212017-12-11 14:07:02 -050074 let s = input.advance(i);
David Tolnayb1032662017-05-31 15:52:28 -070075 if bytes[i] == b'/' {
David Tolnayb28f38a2018-03-31 22:02:29 +020076 if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////"))
77 && !s.starts_with("//!")
78 {
David Tolnayb1032662017-05-31 15:52:28 -070079 if let Some(len) = s.find('\n') {
80 i += len + 1;
81 continue;
82 }
83 break;
Alex Crichtonf7df57c2018-01-21 21:05:11 -080084 } else if s.starts_with("/**/") {
85 i += 4;
David Tolnayb28f38a2018-03-31 22:02:29 +020086 continue;
87 } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***"))
88 && !s.starts_with("/*!")
89 {
David Tolnay1218e122017-06-01 11:13:45 -070090 let (_, com) = block_comment(s)?;
91 i += com.len();
92 continue;
David Tolnayb1032662017-05-31 15:52:28 -070093 }
94 }
95 match bytes[i] {
96 b' ' | 0x09...0x0d => {
97 i += 1;
98 continue;
99 }
100 b if b <= 0x7f => {}
101 _ => {
102 let ch = s.chars().next().unwrap();
103 if is_whitespace(ch) {
104 i += ch.len_utf8();
105 continue;
106 }
107 }
108 }
David Tolnayb28f38a2018-03-31 22:02:29 +0200109 return if i > 0 { Ok((s, ())) } else { Err(LexError) };
David Tolnayb1032662017-05-31 15:52:28 -0700110 }
Nika Layzellf8d5f212017-12-11 14:07:02 -0500111 Ok((input.advance(input.len()), ()))
David Tolnayb1032662017-05-31 15:52:28 -0700112}
113
Nika Layzellf8d5f212017-12-11 14:07:02 -0500114pub fn block_comment(input: Cursor) -> PResult<&str> {
David Tolnayb1032662017-05-31 15:52:28 -0700115 if !input.starts_with("/*") {
David Tolnay1218e122017-06-01 11:13:45 -0700116 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700117 }
118
119 let mut depth = 0;
120 let bytes = input.as_bytes();
121 let mut i = 0;
122 let upper = bytes.len() - 1;
123 while i < upper {
124 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
125 depth += 1;
126 i += 1; // eat '*'
127 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
128 depth -= 1;
129 if depth == 0 {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500130 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
David Tolnayb1032662017-05-31 15:52:28 -0700131 }
132 i += 1; // eat '/'
133 }
134 i += 1;
135 }
David Tolnay1218e122017-06-01 11:13:45 -0700136 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700137}
138
Nika Layzellf8d5f212017-12-11 14:07:02 -0500139pub fn skip_whitespace(input: Cursor) -> Cursor {
David Tolnayb1032662017-05-31 15:52:28 -0700140 match whitespace(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700141 Ok((rest, _)) => rest,
142 Err(LexError) => input,
David Tolnayb1032662017-05-31 15:52:28 -0700143 }
144}
145
146fn is_whitespace(ch: char) -> bool {
147 // Rust treats left-to-right mark and right-to-left mark as whitespace
148 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
149}
150
Nika Layzellf8d5f212017-12-11 14:07:02 -0500151pub fn word_break(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -0700152 match input.chars().next() {
David Tolnay1218e122017-06-01 11:13:45 -0700153 Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
154 Some(_) | None => Ok((input, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700155 }
156}
157
158macro_rules! named {
159 ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500160 fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> {
David Tolnayb1032662017-05-31 15:52:28 -0700161 $submac!(i, $($args)*)
162 }
163 };
164}
165
166macro_rules! alt {
167 ($i:expr, $e:ident | $($rest:tt)*) => {
168 alt!($i, call!($e) | $($rest)*)
169 };
170
171 ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
172 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700173 res @ Ok(_) => res,
David Tolnayb1032662017-05-31 15:52:28 -0700174 _ => alt!($i, $($rest)*)
175 }
176 };
177
178 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => {
179 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700180 Ok((i, o)) => Ok((i, $gen(o))),
181 Err(LexError) => alt!($i, $($rest)*)
David Tolnayb1032662017-05-31 15:52:28 -0700182 }
183 };
184
185 ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => {
186 alt!($i, call!($e) => { $gen } | $($rest)*)
187 };
188
189 ($i:expr, $e:ident => { $gen:expr }) => {
190 alt!($i, call!($e) => { $gen })
191 };
192
193 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => {
194 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700195 Ok((i, o)) => Ok((i, $gen(o))),
196 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700197 }
198 };
199
200 ($i:expr, $e:ident) => {
201 alt!($i, call!($e))
202 };
203
204 ($i:expr, $subrule:ident!( $($args:tt)*)) => {
205 $subrule!($i, $($args)*)
206 };
207}
208
209macro_rules! do_parse {
210 ($i:expr, ( $($rest:expr),* )) => {
David Tolnay1218e122017-06-01 11:13:45 -0700211 Ok(($i, ( $($rest),* )))
David Tolnayb1032662017-05-31 15:52:28 -0700212 };
213
214 ($i:expr, $e:ident >> $($rest:tt)*) => {
215 do_parse!($i, call!($e) >> $($rest)*)
216 };
217
218 ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
219 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700220 Err(LexError) => Err(LexError),
221 Ok((i, _)) => do_parse!(i, $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700222 }
223 };
224
225 ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => {
226 do_parse!($i, $field: call!($e) >> $($rest)*)
227 };
228
229 ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
230 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700231 Err(LexError) => Err(LexError),
232 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700233 let $field = o;
234 do_parse!(i, $($rest)*)
235 },
236 }
237 };
238}
239
240macro_rules! peek {
241 ($i:expr, $submac:ident!( $($args:tt)* )) => {
242 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700243 Ok((_, o)) => Ok(($i, o)),
244 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700245 }
246 };
247}
248
249macro_rules! call {
250 ($i:expr, $fun:expr $(, $args:expr)*) => {
251 $fun($i $(, $args)*)
252 };
253}
254
255macro_rules! option {
256 ($i:expr, $f:expr) => {
257 match $f($i) {
David Tolnay1218e122017-06-01 11:13:45 -0700258 Ok((i, o)) => Ok((i, Some(o))),
259 Err(LexError) => Ok(($i, None)),
David Tolnayb1032662017-05-31 15:52:28 -0700260 }
261 };
262}
263
Alex Crichtond7904e52018-01-23 11:08:45 -0800264macro_rules! take_until_newline_or_eof {
265 ($i:expr,) => {{
266 if $i.len() == 0 {
267 Ok(($i, ""))
David Tolnayb1032662017-05-31 15:52:28 -0700268 } else {
Alex Crichtond7904e52018-01-23 11:08:45 -0800269 match $i.find('\n') {
270 Some(i) => Ok(($i.advance(i), &$i.rest[..i])),
David Tolnayb28f38a2018-03-31 22:02:29 +0200271 None => Ok(($i.advance($i.len()), "")),
David Tolnayb1032662017-05-31 15:52:28 -0700272 }
273 }
274 }};
275}
276
277macro_rules! tuple {
278 ($i:expr, $($rest:tt)*) => {
279 tuple_parser!($i, (), $($rest)*)
280 };
281}
282
283/// Do not use directly. Use `tuple!`.
284macro_rules! tuple_parser {
285 ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => {
286 tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*)
287 };
288
289 ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
290 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700291 Err(LexError) => Err(LexError),
292 Ok((i, o)) => tuple_parser!(i, (o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700293 }
294 };
295
296 ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
297 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700298 Err(LexError) => Err(LexError),
299 Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700300 }
301 };
302
303 ($i:expr, ($($parsed:tt),*), $e:ident) => {
304 tuple_parser!($i, ($($parsed),*), call!($e))
305 };
306
307 ($i:expr, (), $submac:ident!( $($args:tt)* )) => {
308 $submac!($i, $($args)*)
309 };
310
311 ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => {
312 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700313 Err(LexError) => Err(LexError),
314 Ok((i, o)) => Ok((i, ($($parsed),*, o)))
David Tolnayb1032662017-05-31 15:52:28 -0700315 }
316 };
317
318 ($i:expr, ($($parsed:expr),*)) => {
David Tolnay1218e122017-06-01 11:13:45 -0700319 Ok(($i, ($($parsed),*)))
David Tolnayb1032662017-05-31 15:52:28 -0700320 };
321}
322
323macro_rules! not {
324 ($i:expr, $submac:ident!( $($args:tt)* )) => {
325 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700326 Ok((_, _)) => Err(LexError),
327 Err(LexError) => Ok(($i, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700328 }
329 };
330}
331
332macro_rules! tag {
333 ($i:expr, $tag:expr) => {
334 if $i.starts_with($tag) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500335 Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()]))
David Tolnayb1032662017-05-31 15:52:28 -0700336 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700337 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700338 }
339 };
340}
341
342macro_rules! punct {
343 ($i:expr, $punct:expr) => {
344 $crate::strnom::punct($i, $punct)
345 };
346}
347
348/// Do not use directly. Use `punct!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500349pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700350 let input = skip_whitespace(input);
351 if input.starts_with(token) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500352 Ok((input.advance(token.len()), token))
David Tolnayb1032662017-05-31 15:52:28 -0700353 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700354 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700355 }
356}
357
David Tolnayb1032662017-05-31 15:52:28 -0700358macro_rules! preceded {
359 ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => {
360 match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) {
David Tolnay1218e122017-06-01 11:13:45 -0700361 Ok((remaining, (_, o))) => Ok((remaining, o)),
362 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700363 }
364 };
365
366 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
367 preceded!($i, $submac!($($args)*), call!($g))
368 };
369}
370
371macro_rules! delimited {
372 ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => {
373 match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700374 Err(LexError) => Err(LexError),
375 Ok((i1, (_, o, _))) => Ok((i1, o))
David Tolnayb1032662017-05-31 15:52:28 -0700376 }
377 };
378}
379
380macro_rules! map {
381 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
382 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700383 Err(LexError) => Err(LexError),
384 Ok((i, o)) => Ok((i, call!(o, $g)))
David Tolnayb1032662017-05-31 15:52:28 -0700385 }
386 };
387
388 ($i:expr, $f:expr, $g:expr) => {
389 map!($i, call!($f), $g)
390 };
391}
392
393macro_rules! many0 {
394 ($i:expr, $f:expr) => {{
395 let ret;
David Tolnayb28f38a2018-03-31 22:02:29 +0200396 let mut res = ::std::vec::Vec::new();
David Tolnayb1032662017-05-31 15:52:28 -0700397 let mut input = $i;
398
399 loop {
400 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -0700401 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700402 break;
403 }
404
405 match $f(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700406 Err(LexError) => {
407 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700408 break;
409 }
David Tolnay1218e122017-06-01 11:13:45 -0700410 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700411 // loop trip must always consume (otherwise infinite loops)
412 if i.len() == input.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700413 ret = Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700414 break;
415 }
416
417 res.push(o);
418 input = i;
419 }
420 }
421 }
422
423 ret
424 }};
425}