blob: 33964f45bda39252c2b9f778dc5ed514d9cfe5ef [file] [log] [blame]
David Tolnay1218e122017-06-01 11:13:45 -07001//! Adapted from [`nom`](https://github.com/Geal/nom).
David Tolnayb1032662017-05-31 15:52:28 -07002
Nika Layzellf8d5f212017-12-11 14:07:02 -05003use std::str::{Chars, CharIndices, Bytes};
4
David Tolnayb1032662017-05-31 15:52:28 -07005use unicode_xid::UnicodeXID;
6
David Tolnay1218e122017-06-01 11:13:45 -07007use imp::LexError;
David Tolnayb1032662017-05-31 15:52:28 -07008
Nika Layzellf8d5f212017-12-11 14:07:02 -05009#[derive(Copy, Clone, Eq, PartialEq)]
10pub struct Cursor<'a> {
11 pub rest: &'a str,
David Tolnay1ebe3972018-01-02 20:14:20 -080012 #[cfg(procmacro2_semver_exempt)]
Nika Layzellf8d5f212017-12-11 14:07:02 -050013 pub off: u32,
14}
David Tolnay1218e122017-06-01 11:13:45 -070015
Nika Layzellf8d5f212017-12-11 14:07:02 -050016impl<'a> Cursor<'a> {
David Tolnay1ebe3972018-01-02 20:14:20 -080017 #[cfg(not(procmacro2_semver_exempt))]
Nika Layzellf8d5f212017-12-11 14:07:02 -050018 pub fn advance(&self, amt: usize) -> Cursor<'a> {
19 Cursor {
20 rest: &self.rest[amt..],
David Tolnay79105e52017-12-31 11:03:04 -050021 }
22 }
David Tolnay1ebe3972018-01-02 20:14:20 -080023 #[cfg(procmacro2_semver_exempt)]
David Tolnay79105e52017-12-31 11:03:04 -050024 pub fn advance(&self, amt: usize) -> Cursor<'a> {
25 Cursor {
26 rest: &self.rest[amt..],
Nika Layzellf8d5f212017-12-11 14:07:02 -050027 off: self.off + (amt as u32),
28 }
29 }
30
31 pub fn find(&self, p: char) -> Option<usize> {
32 self.rest.find(p)
33 }
34
35 pub fn starts_with(&self, s: &str) -> bool {
36 self.rest.starts_with(s)
37 }
38
39 pub fn is_empty(&self) -> bool {
40 self.rest.is_empty()
41 }
42
43 pub fn len(&self) -> usize {
44 self.rest.len()
45 }
46
47 pub fn as_bytes(&self) -> &'a [u8] {
48 self.rest.as_bytes()
49 }
50
51 pub fn bytes(&self) -> Bytes<'a> {
52 self.rest.bytes()
53 }
54
55 pub fn chars(&self) -> Chars<'a> {
56 self.rest.chars()
57 }
58
59 pub fn char_indices(&self) -> CharIndices<'a> {
60 self.rest.char_indices()
61 }
62}
63
64pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
65
66pub fn whitespace(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -070067 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -070068 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -070069 }
70
71 let bytes = input.as_bytes();
72 let mut i = 0;
73 while i < bytes.len() {
Nika Layzellf8d5f212017-12-11 14:07:02 -050074 let s = input.advance(i);
David Tolnayb1032662017-05-31 15:52:28 -070075 if bytes[i] == b'/' {
76 if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
77 !s.starts_with("//!") {
78 if let Some(len) = s.find('\n') {
79 i += len + 1;
80 continue;
81 }
82 break;
Alex Crichtonf7df57c2018-01-21 21:05:11 -080083 } else if s.starts_with("/**/") {
84 i += 4;
85 continue
David Tolnayb1032662017-05-31 15:52:28 -070086 } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
87 !s.starts_with("/*!") {
David Tolnay1218e122017-06-01 11:13:45 -070088 let (_, com) = block_comment(s)?;
89 i += com.len();
90 continue;
David Tolnayb1032662017-05-31 15:52:28 -070091 }
92 }
93 match bytes[i] {
94 b' ' | 0x09...0x0d => {
95 i += 1;
96 continue;
97 }
98 b if b <= 0x7f => {}
99 _ => {
100 let ch = s.chars().next().unwrap();
101 if is_whitespace(ch) {
102 i += ch.len_utf8();
103 continue;
104 }
105 }
106 }
107 return if i > 0 {
David Tolnay1218e122017-06-01 11:13:45 -0700108 Ok((s, ()))
David Tolnayb1032662017-05-31 15:52:28 -0700109 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700110 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700111 };
112 }
Nika Layzellf8d5f212017-12-11 14:07:02 -0500113 Ok((input.advance(input.len()), ()))
David Tolnayb1032662017-05-31 15:52:28 -0700114}
115
Nika Layzellf8d5f212017-12-11 14:07:02 -0500116pub fn block_comment(input: Cursor) -> PResult<&str> {
David Tolnayb1032662017-05-31 15:52:28 -0700117 if !input.starts_with("/*") {
David Tolnay1218e122017-06-01 11:13:45 -0700118 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700119 }
120
121 let mut depth = 0;
122 let bytes = input.as_bytes();
123 let mut i = 0;
124 let upper = bytes.len() - 1;
125 while i < upper {
126 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
127 depth += 1;
128 i += 1; // eat '*'
129 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
130 depth -= 1;
131 if depth == 0 {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500132 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
David Tolnayb1032662017-05-31 15:52:28 -0700133 }
134 i += 1; // eat '/'
135 }
136 i += 1;
137 }
David Tolnay1218e122017-06-01 11:13:45 -0700138 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700139}
140
Nika Layzellf8d5f212017-12-11 14:07:02 -0500141pub fn skip_whitespace(input: Cursor) -> Cursor {
David Tolnayb1032662017-05-31 15:52:28 -0700142 match whitespace(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700143 Ok((rest, _)) => rest,
144 Err(LexError) => input,
David Tolnayb1032662017-05-31 15:52:28 -0700145 }
146}
147
148fn is_whitespace(ch: char) -> bool {
149 // Rust treats left-to-right mark and right-to-left mark as whitespace
150 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
151}
152
Nika Layzellf8d5f212017-12-11 14:07:02 -0500153pub fn word_break(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -0700154 match input.chars().next() {
David Tolnay1218e122017-06-01 11:13:45 -0700155 Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
156 Some(_) | None => Ok((input, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700157 }
158}
159
160macro_rules! named {
161 ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500162 fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> {
David Tolnayb1032662017-05-31 15:52:28 -0700163 $submac!(i, $($args)*)
164 }
165 };
166}
167
168macro_rules! alt {
169 ($i:expr, $e:ident | $($rest:tt)*) => {
170 alt!($i, call!($e) | $($rest)*)
171 };
172
173 ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
174 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700175 res @ Ok(_) => res,
David Tolnayb1032662017-05-31 15:52:28 -0700176 _ => alt!($i, $($rest)*)
177 }
178 };
179
180 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => {
181 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700182 Ok((i, o)) => Ok((i, $gen(o))),
183 Err(LexError) => alt!($i, $($rest)*)
David Tolnayb1032662017-05-31 15:52:28 -0700184 }
185 };
186
187 ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => {
188 alt!($i, call!($e) => { $gen } | $($rest)*)
189 };
190
191 ($i:expr, $e:ident => { $gen:expr }) => {
192 alt!($i, call!($e) => { $gen })
193 };
194
195 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => {
196 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700197 Ok((i, o)) => Ok((i, $gen(o))),
198 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700199 }
200 };
201
202 ($i:expr, $e:ident) => {
203 alt!($i, call!($e))
204 };
205
206 ($i:expr, $subrule:ident!( $($args:tt)*)) => {
207 $subrule!($i, $($args)*)
208 };
209}
210
211macro_rules! do_parse {
212 ($i:expr, ( $($rest:expr),* )) => {
David Tolnay1218e122017-06-01 11:13:45 -0700213 Ok(($i, ( $($rest),* )))
David Tolnayb1032662017-05-31 15:52:28 -0700214 };
215
216 ($i:expr, $e:ident >> $($rest:tt)*) => {
217 do_parse!($i, call!($e) >> $($rest)*)
218 };
219
220 ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
221 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700222 Err(LexError) => Err(LexError),
223 Ok((i, _)) => do_parse!(i, $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700224 }
225 };
226
227 ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => {
228 do_parse!($i, $field: call!($e) >> $($rest)*)
229 };
230
231 ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
232 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700233 Err(LexError) => Err(LexError),
234 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700235 let $field = o;
236 do_parse!(i, $($rest)*)
237 },
238 }
239 };
240}
241
242macro_rules! peek {
243 ($i:expr, $submac:ident!( $($args:tt)* )) => {
244 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700245 Ok((_, o)) => Ok(($i, o)),
246 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700247 }
248 };
249}
250
251macro_rules! call {
252 ($i:expr, $fun:expr $(, $args:expr)*) => {
253 $fun($i $(, $args)*)
254 };
255}
256
257macro_rules! option {
258 ($i:expr, $f:expr) => {
259 match $f($i) {
David Tolnay1218e122017-06-01 11:13:45 -0700260 Ok((i, o)) => Ok((i, Some(o))),
261 Err(LexError) => Ok(($i, None)),
David Tolnayb1032662017-05-31 15:52:28 -0700262 }
263 };
264}
265
266macro_rules! take_until {
267 ($i:expr, $substr:expr) => {{
268 if $substr.len() > $i.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700269 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700270 } else {
271 let substr_vec: Vec<char> = $substr.chars().collect();
272 let mut window: Vec<char> = vec![];
273 let mut offset = $i.len();
274 let mut parsed = false;
275 for (o, c) in $i.char_indices() {
276 window.push(c);
277 if window.len() > substr_vec.len() {
278 window.remove(0);
279 }
280 if window == substr_vec {
281 parsed = true;
282 window.pop();
283 let window_len: usize = window.iter()
284 .map(|x| x.len_utf8())
285 .fold(0, |x, y| x + y);
286 offset = o - window_len;
287 break;
288 }
289 }
290 if parsed {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500291 Ok(($i.advance(offset), &$i.rest[..offset]))
David Tolnayb1032662017-05-31 15:52:28 -0700292 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700293 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700294 }
295 }
296 }};
297}
298
299macro_rules! tuple {
300 ($i:expr, $($rest:tt)*) => {
301 tuple_parser!($i, (), $($rest)*)
302 };
303}
304
305/// Do not use directly. Use `tuple!`.
306macro_rules! tuple_parser {
307 ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => {
308 tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*)
309 };
310
311 ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
312 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700313 Err(LexError) => Err(LexError),
314 Ok((i, o)) => tuple_parser!(i, (o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700315 }
316 };
317
318 ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
319 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700320 Err(LexError) => Err(LexError),
321 Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700322 }
323 };
324
325 ($i:expr, ($($parsed:tt),*), $e:ident) => {
326 tuple_parser!($i, ($($parsed),*), call!($e))
327 };
328
329 ($i:expr, (), $submac:ident!( $($args:tt)* )) => {
330 $submac!($i, $($args)*)
331 };
332
333 ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => {
334 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700335 Err(LexError) => Err(LexError),
336 Ok((i, o)) => Ok((i, ($($parsed),*, o)))
David Tolnayb1032662017-05-31 15:52:28 -0700337 }
338 };
339
340 ($i:expr, ($($parsed:expr),*)) => {
David Tolnay1218e122017-06-01 11:13:45 -0700341 Ok(($i, ($($parsed),*)))
David Tolnayb1032662017-05-31 15:52:28 -0700342 };
343}
344
345macro_rules! not {
346 ($i:expr, $submac:ident!( $($args:tt)* )) => {
347 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700348 Ok((_, _)) => Err(LexError),
349 Err(LexError) => Ok(($i, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700350 }
351 };
352}
353
354macro_rules! tag {
355 ($i:expr, $tag:expr) => {
356 if $i.starts_with($tag) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500357 Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()]))
David Tolnayb1032662017-05-31 15:52:28 -0700358 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700359 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700360 }
361 };
362}
363
364macro_rules! punct {
365 ($i:expr, $punct:expr) => {
366 $crate::strnom::punct($i, $punct)
367 };
368}
369
370/// Do not use directly. Use `punct!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500371pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700372 let input = skip_whitespace(input);
373 if input.starts_with(token) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500374 Ok((input.advance(token.len()), token))
David Tolnayb1032662017-05-31 15:52:28 -0700375 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700376 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700377 }
378}
379
David Tolnayb1032662017-05-31 15:52:28 -0700380macro_rules! preceded {
381 ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => {
382 match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) {
David Tolnay1218e122017-06-01 11:13:45 -0700383 Ok((remaining, (_, o))) => Ok((remaining, o)),
384 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700385 }
386 };
387
388 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
389 preceded!($i, $submac!($($args)*), call!($g))
390 };
391}
392
393macro_rules! delimited {
394 ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => {
395 match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700396 Err(LexError) => Err(LexError),
397 Ok((i1, (_, o, _))) => Ok((i1, o))
David Tolnayb1032662017-05-31 15:52:28 -0700398 }
399 };
400}
401
402macro_rules! map {
403 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
404 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700405 Err(LexError) => Err(LexError),
406 Ok((i, o)) => Ok((i, call!(o, $g)))
David Tolnayb1032662017-05-31 15:52:28 -0700407 }
408 };
409
410 ($i:expr, $f:expr, $g:expr) => {
411 map!($i, call!($f), $g)
412 };
413}
414
415macro_rules! many0 {
416 ($i:expr, $f:expr) => {{
417 let ret;
418 let mut res = ::std::vec::Vec::new();
419 let mut input = $i;
420
421 loop {
422 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -0700423 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700424 break;
425 }
426
427 match $f(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700428 Err(LexError) => {
429 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700430 break;
431 }
David Tolnay1218e122017-06-01 11:13:45 -0700432 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700433 // loop trip must always consume (otherwise infinite loops)
434 if i.len() == input.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700435 ret = Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700436 break;
437 }
438
439 res.push(o);
440 input = i;
441 }
442 }
443 }
444
445 ret
446 }};
447}