blob: 875c4bf41d7746bd91c54df7ac1f41e41d5c610f [file] [log] [blame]
David Tolnay1218e122017-06-01 11:13:45 -07001//! Adapted from [`nom`](https://github.com/Geal/nom).
David Tolnayb1032662017-05-31 15:52:28 -07002
3use unicode_xid::UnicodeXID;
4
David Tolnay1218e122017-06-01 11:13:45 -07005use imp::LexError;
David Tolnayb1032662017-05-31 15:52:28 -07006
David Tolnay1218e122017-06-01 11:13:45 -07007pub type PResult<'a, O> = Result<(&'a str, O), LexError>;
8
9pub fn whitespace(input: &str) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -070010 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -070011 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -070012 }
13
14 let bytes = input.as_bytes();
15 let mut i = 0;
16 while i < bytes.len() {
17 let s = &input[i..];
18 if bytes[i] == b'/' {
19 if s.starts_with("//") && (!s.starts_with("///") || s.starts_with("////")) &&
20 !s.starts_with("//!") {
21 if let Some(len) = s.find('\n') {
22 i += len + 1;
23 continue;
24 }
25 break;
26 } else if s.starts_with("/*") && (!s.starts_with("/**") || s.starts_with("/***")) &&
27 !s.starts_with("/*!") {
David Tolnay1218e122017-06-01 11:13:45 -070028 let (_, com) = block_comment(s)?;
29 i += com.len();
30 continue;
David Tolnayb1032662017-05-31 15:52:28 -070031 }
32 }
33 match bytes[i] {
34 b' ' | 0x09...0x0d => {
35 i += 1;
36 continue;
37 }
38 b if b <= 0x7f => {}
39 _ => {
40 let ch = s.chars().next().unwrap();
41 if is_whitespace(ch) {
42 i += ch.len_utf8();
43 continue;
44 }
45 }
46 }
47 return if i > 0 {
David Tolnay1218e122017-06-01 11:13:45 -070048 Ok((s, ()))
David Tolnayb1032662017-05-31 15:52:28 -070049 } else {
David Tolnay1218e122017-06-01 11:13:45 -070050 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -070051 };
52 }
David Tolnay1218e122017-06-01 11:13:45 -070053 Ok(("", ()))
David Tolnayb1032662017-05-31 15:52:28 -070054}
55
David Tolnay1218e122017-06-01 11:13:45 -070056pub fn block_comment(input: &str) -> PResult<&str> {
David Tolnayb1032662017-05-31 15:52:28 -070057 if !input.starts_with("/*") {
David Tolnay1218e122017-06-01 11:13:45 -070058 return Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -070059 }
60
61 let mut depth = 0;
62 let bytes = input.as_bytes();
63 let mut i = 0;
64 let upper = bytes.len() - 1;
65 while i < upper {
66 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
67 depth += 1;
68 i += 1; // eat '*'
69 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
70 depth -= 1;
71 if depth == 0 {
David Tolnay1218e122017-06-01 11:13:45 -070072 return Ok((&input[i + 2..], &input[..i + 2]));
David Tolnayb1032662017-05-31 15:52:28 -070073 }
74 i += 1; // eat '/'
75 }
76 i += 1;
77 }
David Tolnay1218e122017-06-01 11:13:45 -070078 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -070079}
80
81pub fn skip_whitespace(input: &str) -> &str {
82 match whitespace(input) {
David Tolnay1218e122017-06-01 11:13:45 -070083 Ok((rest, _)) => rest,
84 Err(LexError) => input,
David Tolnayb1032662017-05-31 15:52:28 -070085 }
86}
87
88fn is_whitespace(ch: char) -> bool {
89 // Rust treats left-to-right mark and right-to-left mark as whitespace
90 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
91}
92
David Tolnay1218e122017-06-01 11:13:45 -070093fn word_break(input: &str) -> PResult<()> {
David Tolnayb1032662017-05-31 15:52:28 -070094 match input.chars().next() {
David Tolnay1218e122017-06-01 11:13:45 -070095 Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
96 Some(_) | None => Ok((input, ())),
David Tolnayb1032662017-05-31 15:52:28 -070097 }
98}
99
100macro_rules! named {
101 ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
David Tolnay1218e122017-06-01 11:13:45 -0700102 fn $name(i: &str) -> $crate::strnom::PResult<$o> {
David Tolnayb1032662017-05-31 15:52:28 -0700103 $submac!(i, $($args)*)
104 }
105 };
106}
107
108macro_rules! alt {
109 ($i:expr, $e:ident | $($rest:tt)*) => {
110 alt!($i, call!($e) | $($rest)*)
111 };
112
113 ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
114 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700115 res @ Ok(_) => res,
David Tolnayb1032662017-05-31 15:52:28 -0700116 _ => alt!($i, $($rest)*)
117 }
118 };
119
120 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => {
121 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700122 Ok((i, o)) => Ok((i, $gen(o))),
123 Err(LexError) => alt!($i, $($rest)*)
David Tolnayb1032662017-05-31 15:52:28 -0700124 }
125 };
126
127 ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => {
128 alt!($i, call!($e) => { $gen } | $($rest)*)
129 };
130
131 ($i:expr, $e:ident => { $gen:expr }) => {
132 alt!($i, call!($e) => { $gen })
133 };
134
135 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => {
136 match $subrule!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700137 Ok((i, o)) => Ok((i, $gen(o))),
138 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700139 }
140 };
141
142 ($i:expr, $e:ident) => {
143 alt!($i, call!($e))
144 };
145
146 ($i:expr, $subrule:ident!( $($args:tt)*)) => {
147 $subrule!($i, $($args)*)
148 };
149}
150
151macro_rules! do_parse {
152 ($i:expr, ( $($rest:expr),* )) => {
David Tolnay1218e122017-06-01 11:13:45 -0700153 Ok(($i, ( $($rest),* )))
David Tolnayb1032662017-05-31 15:52:28 -0700154 };
155
156 ($i:expr, $e:ident >> $($rest:tt)*) => {
157 do_parse!($i, call!($e) >> $($rest)*)
158 };
159
160 ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
161 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700162 Err(LexError) => Err(LexError),
163 Ok((i, _)) => do_parse!(i, $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700164 }
165 };
166
167 ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => {
168 do_parse!($i, $field: call!($e) >> $($rest)*)
169 };
170
171 ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
172 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700173 Err(LexError) => Err(LexError),
174 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700175 let $field = o;
176 do_parse!(i, $($rest)*)
177 },
178 }
179 };
180}
181
182macro_rules! peek {
183 ($i:expr, $submac:ident!( $($args:tt)* )) => {
184 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700185 Ok((_, o)) => Ok(($i, o)),
186 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700187 }
188 };
189}
190
191macro_rules! call {
192 ($i:expr, $fun:expr $(, $args:expr)*) => {
193 $fun($i $(, $args)*)
194 };
195}
196
197macro_rules! option {
198 ($i:expr, $f:expr) => {
199 match $f($i) {
David Tolnay1218e122017-06-01 11:13:45 -0700200 Ok((i, o)) => Ok((i, Some(o))),
201 Err(LexError) => Ok(($i, None)),
David Tolnayb1032662017-05-31 15:52:28 -0700202 }
203 };
204}
205
206macro_rules! take_until {
207 ($i:expr, $substr:expr) => {{
208 if $substr.len() > $i.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700209 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700210 } else {
211 let substr_vec: Vec<char> = $substr.chars().collect();
212 let mut window: Vec<char> = vec![];
213 let mut offset = $i.len();
214 let mut parsed = false;
215 for (o, c) in $i.char_indices() {
216 window.push(c);
217 if window.len() > substr_vec.len() {
218 window.remove(0);
219 }
220 if window == substr_vec {
221 parsed = true;
222 window.pop();
223 let window_len: usize = window.iter()
224 .map(|x| x.len_utf8())
225 .fold(0, |x, y| x + y);
226 offset = o - window_len;
227 break;
228 }
229 }
230 if parsed {
David Tolnay1218e122017-06-01 11:13:45 -0700231 Ok((&$i[offset..], &$i[..offset]))
David Tolnayb1032662017-05-31 15:52:28 -0700232 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700233 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700234 }
235 }
236 }};
237}
238
239macro_rules! tuple {
240 ($i:expr, $($rest:tt)*) => {
241 tuple_parser!($i, (), $($rest)*)
242 };
243}
244
245/// Do not use directly. Use `tuple!`.
246macro_rules! tuple_parser {
247 ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => {
248 tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*)
249 };
250
251 ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
252 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700253 Err(LexError) => Err(LexError),
254 Ok((i, o)) => tuple_parser!(i, (o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700255 }
256 };
257
258 ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
259 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700260 Err(LexError) => Err(LexError),
261 Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*),
David Tolnayb1032662017-05-31 15:52:28 -0700262 }
263 };
264
265 ($i:expr, ($($parsed:tt),*), $e:ident) => {
266 tuple_parser!($i, ($($parsed),*), call!($e))
267 };
268
269 ($i:expr, (), $submac:ident!( $($args:tt)* )) => {
270 $submac!($i, $($args)*)
271 };
272
273 ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => {
274 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700275 Err(LexError) => Err(LexError),
276 Ok((i, o)) => Ok((i, ($($parsed),*, o)))
David Tolnayb1032662017-05-31 15:52:28 -0700277 }
278 };
279
280 ($i:expr, ($($parsed:expr),*)) => {
David Tolnay1218e122017-06-01 11:13:45 -0700281 Ok(($i, ($($parsed),*)))
David Tolnayb1032662017-05-31 15:52:28 -0700282 };
283}
284
285macro_rules! not {
286 ($i:expr, $submac:ident!( $($args:tt)* )) => {
287 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700288 Ok((_, _)) => Err(LexError),
289 Err(LexError) => Ok(($i, ())),
David Tolnayb1032662017-05-31 15:52:28 -0700290 }
291 };
292}
293
294macro_rules! tag {
295 ($i:expr, $tag:expr) => {
296 if $i.starts_with($tag) {
David Tolnay1218e122017-06-01 11:13:45 -0700297 Ok((&$i[$tag.len()..], &$i[..$tag.len()]))
David Tolnayb1032662017-05-31 15:52:28 -0700298 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700299 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700300 }
301 };
302}
303
304macro_rules! punct {
305 ($i:expr, $punct:expr) => {
306 $crate::strnom::punct($i, $punct)
307 };
308}
309
310/// Do not use directly. Use `punct!`.
David Tolnay1218e122017-06-01 11:13:45 -0700311pub fn punct<'a>(input: &'a str, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700312 let input = skip_whitespace(input);
313 if input.starts_with(token) {
David Tolnay1218e122017-06-01 11:13:45 -0700314 Ok((&input[token.len()..], token))
David Tolnayb1032662017-05-31 15:52:28 -0700315 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700316 Err(LexError)
David Tolnayb1032662017-05-31 15:52:28 -0700317 }
318}
319
320macro_rules! keyword {
321 ($i:expr, $keyword:expr) => {
322 $crate::strnom::keyword($i, $keyword)
323 };
324}
325
326/// Do not use directly. Use `keyword!`.
David Tolnay1218e122017-06-01 11:13:45 -0700327pub fn keyword<'a>(input: &'a str, token: &'static str) -> PResult<'a, &'a str> {
David Tolnayb1032662017-05-31 15:52:28 -0700328 match punct(input, token) {
David Tolnay1218e122017-06-01 11:13:45 -0700329 Ok((rest, _)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700330 match word_break(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700331 Ok((_, _)) => Ok((rest, token)),
332 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700333 }
334 }
David Tolnay1218e122017-06-01 11:13:45 -0700335 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700336 }
337}
338
339macro_rules! epsilon {
340 ($i:expr,) => {
David Tolnay1218e122017-06-01 11:13:45 -0700341 Ok(($i, ()))
David Tolnayb1032662017-05-31 15:52:28 -0700342 };
343}
344
345macro_rules! preceded {
346 ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => {
347 match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) {
David Tolnay1218e122017-06-01 11:13:45 -0700348 Ok((remaining, (_, o))) => Ok((remaining, o)),
349 Err(LexError) => Err(LexError),
David Tolnayb1032662017-05-31 15:52:28 -0700350 }
351 };
352
353 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
354 preceded!($i, $submac!($($args)*), call!($g))
355 };
356}
357
358macro_rules! delimited {
359 ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => {
360 match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700361 Err(LexError) => Err(LexError),
362 Ok((i1, (_, o, _))) => Ok((i1, o))
David Tolnayb1032662017-05-31 15:52:28 -0700363 }
364 };
365}
366
367macro_rules! map {
368 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
369 match $submac!($i, $($args)*) {
David Tolnay1218e122017-06-01 11:13:45 -0700370 Err(LexError) => Err(LexError),
371 Ok((i, o)) => Ok((i, call!(o, $g)))
David Tolnayb1032662017-05-31 15:52:28 -0700372 }
373 };
374
375 ($i:expr, $f:expr, $g:expr) => {
376 map!($i, call!($f), $g)
377 };
378}
379
380macro_rules! many0 {
381 ($i:expr, $f:expr) => {{
382 let ret;
383 let mut res = ::std::vec::Vec::new();
384 let mut input = $i;
385
386 loop {
387 if input.is_empty() {
David Tolnay1218e122017-06-01 11:13:45 -0700388 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700389 break;
390 }
391
392 match $f(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700393 Err(LexError) => {
394 ret = Ok((input, res));
David Tolnayb1032662017-05-31 15:52:28 -0700395 break;
396 }
David Tolnay1218e122017-06-01 11:13:45 -0700397 Ok((i, o)) => {
David Tolnayb1032662017-05-31 15:52:28 -0700398 // loop trip must always consume (otherwise infinite loops)
399 if i.len() == input.len() {
David Tolnay1218e122017-06-01 11:13:45 -0700400 ret = Err(LexError);
David Tolnayb1032662017-05-31 15:52:28 -0700401 break;
402 }
403
404 res.push(o);
405 input = i;
406 }
407 }
408 }
409
410 ret
411 }};
412}