blob: 558be8e33a5f836eb66034c2f65f419ea7f93664 [file] [log] [blame]
David Tolnay1218e122017-06-01 11:13:45 -07001//! Adapted from [`nom`](https://github.com/Geal/nom).
David Tolnayb1032662017-05-31 15:52:28 -07002
Nika Layzellf8d5f212017-12-11 14:07:02 -05003use std::str::{Chars, CharIndices, Bytes};
4
David Tolnayb1032662017-05-31 15:52:28 -07005use unicode_xid::UnicodeXID;
6
David Tolnay1218e122017-06-01 11:13:45 -07007use imp::LexError;
David Tolnayb1032662017-05-31 15:52:28 -07008
Nika Layzellf8d5f212017-12-11 14:07:02 -05009#[derive(Copy, Clone, Eq, PartialEq)]
10pub struct Cursor<'a> {
11 pub rest: &'a str,
12 pub off: u32,
13}
David Tolnay1218e122017-06-01 11:13:45 -070014
Nika Layzellf8d5f212017-12-11 14:07:02 -050015impl<'a> Cursor<'a> {
16 pub fn advance(&self, amt: usize) -> Cursor<'a> {
17 Cursor {
18 rest: &self.rest[amt..],
19 off: self.off + (amt as u32),
20 }
21 }
22
23 pub fn find(&self, p: char) -> Option<usize> {
24 self.rest.find(p)
25 }
26
27 pub fn starts_with(&self, s: &str) -> bool {
28 self.rest.starts_with(s)
29 }
30
31 pub fn is_empty(&self) -> bool {
32 self.rest.is_empty()
33 }
34
35 pub fn len(&self) -> usize {
36 self.rest.len()
37 }
38
39 pub fn as_bytes(&self) -> &'a [u8] {
40 self.rest.as_bytes()
41 }
42
43 pub fn bytes(&self) -> Bytes<'a> {
44 self.rest.bytes()
45 }
46
47 pub fn chars(&self) -> Chars<'a> {
48 self.rest.chars()
49 }
50
51 pub fn char_indices(&self) -> CharIndices<'a> {
52 self.rest.char_indices()
53 }
54}
55
56pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
57
58pub fn whitespace(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -070059 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -070060 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -070061 }
62
63 let bytes = input.as_bytes();
64 let mut i = 0;
65 while i < bytes.len() {
Nika Layzellf8d5f212017-12-11 14:07:02 -050066 let s = input.advance(i);
David Tolnayb1032662017-05-31 15:52:28 -070067 if bytes[i] == b'/' {
68 if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
69 !s.starts_with("//!") {
70 if let Some(len) = s.find('\n') {
71 i += len + 1;
72 continue;
73 }
74 break;
75 } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
76 !s.starts_with("/*!") {
David Tolnay1218e122017-06-01 11:13:45 -070077 let (_, com) = block_comment(s)?;
78 i += com.len();
79 continue;
David Tolnayb1032662017-05-31 15:52:28 -070080 }
81 }
82 match bytes[i] {
83 b' ' | 0x09...0x0d => {
84 i += 1;
85 continue;
86 }
87 b if b <= 0x7f => {}
88 _ => {
89 let ch = s.chars().next().unwrap();
90 if is_whitespace(ch) {
91 i += ch.len_utf8();
92 continue;
93 }
94 }
95 }
96 return if i > 0 {
David Tolnay1218e122017-06-01 11:13:45 -070097 Ok((s, ()))
David Tolnayb1032662017-05-31 15:52:28 -070098 } else {
David Tolnay1218e122017-06-01 11:13:45 -070099 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700100 };
101 }
Nika Layzellf8d5f212017-12-11 14:07:02 -0500102 Ok((input.advance(input.len()), ()))
David Tolnayb1032662017-05-31 15:52:28 -0700103}
104
Nika Layzellf8d5f212017-12-11 14:07:02 -0500105pub fn block_comment(input: Cursor) -> PResult<&str> {
David Tolnayb1032662017-05-31 15:52:28 -0700106 if !input.starts_with("/*") {
David Tolnay1218e122017-06-01 11:13:45 -0700107 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700108 }
109
110 let mut depth = 0;
111 let bytes = input.as_bytes();
112 let mut i = 0;
113 let upper = bytes.len() - 1;
114 while i < upper {
115 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
116 depth += 1;
117 i += 1; // eat '*'
118 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
119 depth -= 1;
120 if depth == 0 {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500121 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
David Tolnayb1032662017-05-31 15:52:28 -0700122 }
123 i += 1; // eat '/'
124 }
125 i += 1;
126 }
David Tolnay1218e122017-06-01 11:13:45 -0700127 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700128}
129
Nika Layzellf8d5f212017-12-11 14:07:02 -0500130pub fn skip_whitespace(input: Cursor) -> Cursor {
David Tolnayb1032662017-05-31 15:52:28 -0700131 match whitespace(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700132 Ok((rest, _)) => rest,
133 Err(LexError) => input,
David Tolnayb1032662017-05-31 15:52:28 -0700134 }
135}
136
137fn is_whitespace(ch: char) -> bool {
138 // Rust treats left-to-right mark and right-to-left mark as whitespace
139 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
140}
141
Nika Layzellf8d5f212017-12-11 14:07:02 -0500142pub fn word_break(input: Cursor) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -0700143 match input.chars().next() {
David Tolnay1218e122017-06-01 11:13:45 -0700144 Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
145 Some(_) | None => Ok((input, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700146 }
147}
148
149macro_rules! named {
150 ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500151 fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> {
David Tolnayb1032662017-05-31 15:52:28 -0700152 $submac!(i, $($args)*)
153 }
154 };
155}
156
157macro_rules! alt {
158 ($i:expr, $e:ident | $($rest:tt)*) => {
159 alt!($i, call!($e) | $($rest)*)
160 };
161
162 ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
163 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700164 res @ Ok(_) => res,
David Tolnayb1032662017-05-31 15:52:28 -0700165 _ => alt!($i, $($rest)*)
166 }
167 };
168
169 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => {
170 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700171 Ok((i, o)) => Ok((i, $gen(o))),
172 Err(LexError) => alt!($i, $($rest)*)
David Tolnayb1032662017-05-31 15:52:28 -0700173 }
174 };
175
176 ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => {
177 alt!($i, call!($e) => { $gen } | $($rest)*)
178 };
179
180 ($i:expr, $e:ident => { $gen:expr }) => {
181 alt!($i, call!($e) => { $gen })
182 };
183
184 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => {
185 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700186 Ok((i, o)) => Ok((i, $gen(o))),
187 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700188 }
189 };
190
191 ($i:expr, $e:ident) => {
192 alt!($i, call!($e))
193 };
194
195 ($i:expr, $subrule:ident!( $($args:tt)*)) => {
196 $subrule!($i, $($args)*)
197 };
198}
199
200macro_rules! do_parse {
201 ($i:expr, ( $($rest:expr),* )) => {
David Tolnay1218e122017-06-01 11:13:45 -0700202 Ok(($i, ( $($rest),* )))
David Tolnayb1032662017-05-31 15:52:28 -0700203 };
204
205 ($i:expr, $e:ident >> $($rest:tt)*) => {
206 do_parse!($i, call!($e) >> $($rest)*)
207 };
208
209 ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
210 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700211 Err(LexError) => Err(LexError),
212 Ok((i, _)) => do_parse!(i, $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700213 }
214 };
215
216 ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => {
217 do_parse!($i, $field: call!($e) >> $($rest)*)
218 };
219
220 ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
221 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700222 Err(LexError) => Err(LexError),
223 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700224 let $field = o;
225 do_parse!(i, $($rest)*)
226 },
227 }
228 };
229}
230
231macro_rules! peek {
232 ($i:expr, $submac:ident!( $($args:tt)* )) => {
233 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700234 Ok((_, o)) => Ok(($i, o)),
235 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700236 }
237 };
238}
239
240macro_rules! call {
241 ($i:expr, $fun:expr $(, $args:expr)*) => {
242 $fun($i $(, $args)*)
243 };
244}
245
246macro_rules! option {
247 ($i:expr, $f:expr) => {
248 match $f($i) {
David Tolnay1218e122017-06-01 11:13:45 -0700249 Ok((i, o)) => Ok((i, Some(o))),
250 Err(LexError) => Ok(($i, None)),
David Tolnayb1032662017-05-31 15:52:28 -0700251 }
252 };
253}
254
255macro_rules! take_until {
256 ($i:expr, $substr:expr) => {{
257 if $substr.len() > $i.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700258 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700259 } else {
260 let substr_vec: Vec<char> = $substr.chars().collect();
261 let mut window: Vec<char> = vec![];
262 let mut offset = $i.len();
263 let mut parsed = false;
264 for (o, c) in $i.char_indices() {
265 window.push(c);
266 if window.len() > substr_vec.len() {
267 window.remove(0);
268 }
269 if window == substr_vec {
270 parsed = true;
271 window.pop();
272 let window_len: usize = window.iter()
273 .map(|x| x.len_utf8())
274 .fold(0, |x, y| x + y);
275 offset = o - window_len;
276 break;
277 }
278 }
279 if parsed {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500280 Ok(($i.advance(offset), &$i.rest[..offset]))
David Tolnayb1032662017-05-31 15:52:28 -0700281 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700282 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700283 }
284 }
285 }};
286}
287
288macro_rules! tuple {
289 ($i:expr, $($rest:tt)*) => {
290 tuple_parser!($i, (), $($rest)*)
291 };
292}
293
294/// Do not use directly. Use `tuple!`.
295macro_rules! tuple_parser {
296 ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => {
297 tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*)
298 };
299
300 ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
301 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700302 Err(LexError) => Err(LexError),
303 Ok((i, o)) => tuple_parser!(i, (o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700304 }
305 };
306
307 ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
308 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700309 Err(LexError) => Err(LexError),
310 Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700311 }
312 };
313
314 ($i:expr, ($($parsed:tt),*), $e:ident) => {
315 tuple_parser!($i, ($($parsed),*), call!($e))
316 };
317
318 ($i:expr, (), $submac:ident!( $($args:tt)* )) => {
319 $submac!($i, $($args)*)
320 };
321
322 ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => {
323 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700324 Err(LexError) => Err(LexError),
325 Ok((i, o)) => Ok((i, ($($parsed),*, o)))
David Tolnayb1032662017-05-31 15:52:28 -0700326 }
327 };
328
329 ($i:expr, ($($parsed:expr),*)) => {
David Tolnay1218e122017-06-01 11:13:45 -0700330 Ok(($i, ($($parsed),*)))
David Tolnayb1032662017-05-31 15:52:28 -0700331 };
332}
333
334macro_rules! not {
335 ($i:expr, $submac:ident!( $($args:tt)* )) => {
336 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700337 Ok((_, _)) => Err(LexError),
338 Err(LexError) => Ok(($i, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700339 }
340 };
341}
342
343macro_rules! tag {
344 ($i:expr, $tag:expr) => {
345 if $i.starts_with($tag) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500346 Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()]))
David Tolnayb1032662017-05-31 15:52:28 -0700347 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700348 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700349 }
350 };
351}
352
353macro_rules! punct {
354 ($i:expr, $punct:expr) => {
355 $crate::strnom::punct($i, $punct)
356 };
357}
358
359/// Do not use directly. Use `punct!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500360pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700361 let input = skip_whitespace(input);
362 if input.starts_with(token) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500363 Ok((input.advance(token.len()), token))
David Tolnayb1032662017-05-31 15:52:28 -0700364 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700365 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700366 }
367}
368
369macro_rules! keyword {
370 ($i:expr, $keyword:expr) => {
371 $crate::strnom::keyword($i, $keyword)
372 };
373}
374
375/// Do not use directly. Use `keyword!`.
Nika Layzellf8d5f212017-12-11 14:07:02 -0500376pub fn keyword<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700377 match punct(input, token) {
David Tolnay1218e122017-06-01 11:13:45 -0700378 Ok((rest, _)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700379 match word_break(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700380 Ok((_, _)) => Ok((rest, token)),
381 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700382 }
383 }
David Tolnay1218e122017-06-01 11:13:45 -0700384 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700385 }
386}
387
David Tolnayb1032662017-05-31 15:52:28 -0700388macro_rules! preceded {
389 ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => {
390 match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) {
David Tolnay1218e122017-06-01 11:13:45 -0700391 Ok((remaining, (_, o))) => Ok((remaining, o)),
392 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700393 }
394 };
395
396 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
397 preceded!($i, $submac!($($args)*), call!($g))
398 };
399}
400
401macro_rules! delimited {
402 ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => {
403 match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700404 Err(LexError) => Err(LexError),
405 Ok((i1, (_, o, _))) => Ok((i1, o))
David Tolnayb1032662017-05-31 15:52:28 -0700406 }
407 };
408}
409
410macro_rules! map {
411 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
412 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700413 Err(LexError) => Err(LexError),
414 Ok((i, o)) => Ok((i, call!(o, $g)))
David Tolnayb1032662017-05-31 15:52:28 -0700415 }
416 };
417
418 ($i:expr, $f:expr, $g:expr) => {
419 map!($i, call!($f), $g)
420 };
421}
422
423macro_rules! many0 {
424 ($i:expr, $f:expr) => {{
425 let ret;
426 let mut res = ::std::vec::Vec::new();
427 let mut input = $i;
428
429 loop {
430 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -0700431 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700432 break;
433 }
434
435 match $f(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700436 Err(LexError) => {
437 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700438 break;
439 }
David Tolnay1218e122017-06-01 11:13:45 -0700440 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700441 // loop trip must always consume (otherwise infinite loops)
442 if i.len() == input.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700443 ret = Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700444 break;
445 }
446
447 res.push(o);
448 input = i;
449 }
450 }
451 }
452
453 ret
454 }};
455}