blob: a6daf84505ddbfb2d4fd0cddc39563e08733f91e [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::iter;
David Tolnay041bcd42017-06-03 09:18:04 -07007use std::marker::PhantomData;
Alex Crichton44bffbc2017-05-19 17:51:59 -07008use std::ops;
9use std::rc::Rc;
10use std::str::FromStr;
11use std::vec;
12
13use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070014use unicode_xid::UnicodeXID;
David Tolnay744a6b82017-06-01 11:34:29 -070015use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070016
17use {TokenTree, TokenKind, Delimiter, OpKind};
18
David Tolnay977f8282017-05-31 17:41:33 -070019#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070020pub struct TokenStream {
21 inner: Vec<TokenTree>,
22}
23
24#[derive(Debug)]
25pub struct LexError;
26
27impl TokenStream {
28 pub fn empty() -> TokenStream {
29 TokenStream { inner: Vec::new() }
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.inner.len() == 0
34 }
35}
36
37impl FromStr for TokenStream {
38 type Err = LexError;
39
40 fn from_str(src: &str) -> Result<TokenStream, LexError> {
41 match token_stream(src) {
David Tolnay1218e122017-06-01 11:13:45 -070042 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070043 if skip_whitespace(input).len() != 0 {
44 Err(LexError)
45 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070046 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070047 }
48 }
David Tolnay1218e122017-06-01 11:13:45 -070049 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070050 }
51 }
52}
53
54impl fmt::Display for TokenStream {
55 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56 let mut joint = false;
57 for (i, tt) in self.inner.iter().enumerate() {
58 if i != 0 && !joint {
59 write!(f, " ")?;
60 }
61 joint = false;
62 match tt.kind {
63 TokenKind::Sequence(delim, ref stream) => {
64 let (start, end) = match delim {
65 Delimiter::Parenthesis => ("(", ")"),
66 Delimiter::Brace => ("{", "}"),
67 Delimiter::Bracket => ("[", "]"),
68 Delimiter::None => ("", ""),
69 };
Alex Crichton852d53d2017-05-19 19:25:08 -070070 if stream.0.inner.len() == 0 {
71 write!(f, "{} {}", start, end)?
72 } else {
73 write!(f, "{} {} {}", start, stream, end)?
74 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070075 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -070076 TokenKind::Word(ref sym) => write!(f, "{}", sym.as_str())?,
Alex Crichton44bffbc2017-05-19 17:51:59 -070077 TokenKind::Op(ch, ref op) => {
78 write!(f, "{}", ch)?;
79 match *op {
80 OpKind::Alone => {}
81 OpKind::Joint => joint = true,
82 }
83 }
84 TokenKind::Literal(ref literal) => {
85 write!(f, "{}", literal)?;
86 // handle comments
87 if (literal.0).0.starts_with("/") {
88 write!(f, "\n")?;
89 }
90 }
91 }
92 }
93
94 Ok(())
95 }
96}
97
98impl From<proc_macro::TokenStream> for TokenStream {
99 fn from(inner: proc_macro::TokenStream) -> TokenStream {
100 inner.to_string().parse().expect("compiler token stream parse failed")
101 }
102}
103
104impl From<TokenStream> for proc_macro::TokenStream {
105 fn from(inner: TokenStream) -> proc_macro::TokenStream {
106 inner.to_string().parse().expect("failed to parse to compiler tokens")
107 }
108}
109
110
111impl From<TokenTree> for TokenStream {
112 fn from(tree: TokenTree) -> TokenStream {
113 TokenStream { inner: vec![tree] }
114 }
115}
116
117impl iter::FromIterator<TokenStream> for TokenStream {
118 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
119 let mut v = Vec::new();
120
121 for stream in streams.into_iter() {
122 v.extend(stream.inner);
123 }
124
125 TokenStream { inner: v }
126 }
127}
128
129pub type TokenIter = vec::IntoIter<TokenTree>;
130
131impl IntoIterator for TokenStream {
132 type Item = TokenTree;
133 type IntoIter = TokenIter;
134
135 fn into_iter(self) -> TokenIter {
136 self.inner.into_iter()
137 }
138}
139
David Tolnay977f8282017-05-31 17:41:33 -0700140#[derive(Clone, Copy, Default, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700141pub struct Span;
142
143impl Span {
144 pub fn call_site() -> Span {
145 Span
146 }
147}
148
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700149#[derive(Copy, Clone)]
David Tolnay041bcd42017-06-03 09:18:04 -0700150pub struct Symbol {
151 intern: usize,
152 not_send_sync: PhantomData<*const ()>,
153}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700154
155thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
156
157impl<'a> From<&'a str> for Symbol {
158 fn from(string: &'a str) -> Symbol {
David Tolnay041bcd42017-06-03 09:18:04 -0700159 Symbol {
160 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
161 not_send_sync: PhantomData,
162 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700163 }
164}
165
166impl ops::Deref for Symbol {
167 type Target = str;
168
169 fn deref(&self) -> &str {
170 SYMBOLS.with(|interner| {
171 let interner = interner.borrow();
David Tolnay041bcd42017-06-03 09:18:04 -0700172 let s = interner.get(self.intern);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700173 unsafe {
174 &*(s as *const str)
175 }
176 })
177 }
178}
179
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700180impl fmt::Debug for Symbol {
181 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
182 f.debug_tuple("Symbol").field(&&**self).finish()
183 }
184}
185
Alex Crichton44bffbc2017-05-19 17:51:59 -0700186struct Interner {
187 string_to_index: HashMap<MyRc, usize>,
188 index_to_string: Vec<Rc<String>>,
189}
190
191#[derive(Hash, Eq, PartialEq)]
192struct MyRc(Rc<String>);
193
194impl Borrow<str> for MyRc {
195 fn borrow(&self) -> &str {
196 &self.0
197 }
198}
199
200impl Interner {
201 fn new() -> Interner {
202 Interner {
203 string_to_index: HashMap::new(),
204 index_to_string: Vec::new(),
205 }
206 }
207
208 fn intern(&mut self, s: &str) -> usize {
209 if let Some(&idx) = self.string_to_index.get(s) {
210 return idx
211 }
212 let s = Rc::new(s.to_string());
213 self.index_to_string.push(s.clone());
214 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
215 self.index_to_string.len() - 1
216 }
217
218 fn get(&self, idx: usize) -> &str {
219 &self.index_to_string[idx]
220 }
221}
222
David Tolnay977f8282017-05-31 17:41:33 -0700223#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700224pub struct Literal(String);
225
Alex Crichton852d53d2017-05-19 19:25:08 -0700226impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700227 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700228 match byte {
229 0 => Literal(format!("b'\\0'")),
230 b'\"' => Literal(format!("b'\"'")),
231 n => {
232 let mut escaped = "b'".to_string();
233 escaped.extend(ascii::escape_default(n).map(|c| c as char));
234 escaped.push('\'');
235 Literal(escaped)
236 }
237 }
238 }
239
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700240 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700241 let mut escaped = "b\"".to_string();
242 for b in bytes {
243 match *b {
244 b'\0' => escaped.push_str(r"\0"),
245 b'\t' => escaped.push_str(r"\t"),
246 b'\n' => escaped.push_str(r"\n"),
247 b'\r' => escaped.push_str(r"\r"),
248 b'"' => escaped.push_str("\\\""),
249 b'\\' => escaped.push_str("\\\\"),
250 b'\x20' ... b'\x7E' => escaped.push(*b as char),
251 _ => escaped.push_str(&format!("\\x{:02X}", b)),
252 }
253 }
254 escaped.push('"');
255 Literal(escaped)
256 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700257
258 pub fn doccomment(s: &str) -> Literal {
259 Literal(s.to_string())
260 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700261
262 pub fn float(s: &str) -> Literal {
263 Literal(s.to_string())
264 }
265
266 pub fn integer(s: &str) -> Literal {
267 Literal(s.to_string())
268 }
Alex Crichton31316622017-05-26 12:54:47 -0700269
270 pub fn raw_string(s: &str, pounds: usize) -> Literal {
271 let mut ret = format!("r");
272 ret.extend((0..pounds).map(|_| "#"));
273 ret.push('"');
274 ret.push_str(s);
275 ret.push('"');
276 ret.extend((0..pounds).map(|_| "#"));
277 Literal(ret)
278 }
279
280 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700281 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700282 ret.extend((0..pounds).map(|_| "#"));
283 ret.push('"');
284 ret.push_str(s);
285 ret.push('"');
286 ret.extend((0..pounds).map(|_| "#"));
287 Literal(ret)
288 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700289}
290
Alex Crichton44bffbc2017-05-19 17:51:59 -0700291impl fmt::Display for Literal {
292 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293 self.0.fmt(f)
294 }
295}
296
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700297macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700298 ($($t:ty,)*) => {$(
299 impl From<$t> for Literal {
300 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700301 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700302 }
303 }
304 )*}
305}
306
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700307ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700308 u8, u16, u32, u64, usize,
309 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700310}
311
312macro_rules! floats {
313 ($($t:ty,)*) => {$(
314 impl From<$t> for Literal {
315 fn from(t: $t) -> Literal {
316 assert!(!t.is_nan());
317 assert!(!t.is_infinite());
318 Literal(format!(concat!("{}", stringify!($t)), t))
319 }
320 }
321 )*}
322}
323
324floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700325 f32, f64,
326}
327
Alex Crichton44bffbc2017-05-19 17:51:59 -0700328impl<'a> From<&'a str> for Literal {
329 fn from(t: &'a str) -> Literal {
330 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
331 s.push('"');
332 s.insert(0, '"');
333 Literal(s)
334 }
335}
336
337impl From<char> for Literal {
338 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700339 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700340 }
341}
342
David Tolnay8e976c62017-06-01 12:12:29 -0700343named!(token_stream -> ::TokenStream, map!(
344 many0!(token_tree),
345 |trees| ::TokenStream(TokenStream { inner: trees })
346));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700347
348named!(token_tree -> TokenTree,
349 map!(token_kind, |s: TokenKind| {
350 TokenTree {
351 span: ::Span(Span),
352 kind: s,
353 }
354 }));
355
356named!(token_kind -> TokenKind, alt!(
David Tolnay8e976c62017-06-01 12:12:29 -0700357 map!(delimited, |(d, s)| TokenKind::Sequence(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700358 |
David Tolnay8e976c62017-06-01 12:12:29 -0700359 map!(literal, TokenKind::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700360 |
David Tolnay8e976c62017-06-01 12:12:29 -0700361 map!(symbol, TokenKind::Word)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700362 |
David Tolnay643177b2017-06-01 12:02:44 -0700363 map!(op, |(op, kind)| TokenKind::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700364));
365
David Tolnay8e976c62017-06-01 12:12:29 -0700366named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700367 delimited!(
368 punct!("("),
369 token_stream,
370 punct!(")")
371 ) => { |ts| (Delimiter::Parenthesis, ts) }
372 |
373 delimited!(
374 punct!("["),
375 token_stream,
376 punct!("]")
377 ) => { |ts| (Delimiter::Bracket, ts) }
378 |
379 delimited!(
380 punct!("{"),
381 token_stream,
382 punct!("}")
383 ) => { |ts| (Delimiter::Brace, ts) }
384));
385
David Tolnaya202d502017-06-01 12:26:55 -0700386fn symbol(mut input: &str) -> PResult<::Symbol> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700387 input = skip_whitespace(input);
388
389 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700390
391 let lifetime = input.starts_with("'");
392 if lifetime {
393 chars.next();
394 }
395
Alex Crichton44bffbc2017-05-19 17:51:59 -0700396 match chars.next() {
397 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700398 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700399 }
400
David Tolnay214c94c2017-06-01 12:42:56 -0700401 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700402 for (i, ch) in chars {
403 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700404 end = i;
405 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700406 }
407 }
408
David Tolnay214c94c2017-06-01 12:42:56 -0700409 if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
410 Err(LexError)
411 } else {
412 Ok((&input[end..], input[..end].into()))
413 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700414}
415
David Tolnay214c94c2017-06-01 12:42:56 -0700416// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
417static KEYWORDS: &'static [&'static str] = &[
418 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
419 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
420 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
421 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
422 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
423 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
424 "yield",
425];
426
David Tolnay8e976c62017-06-01 12:12:29 -0700427fn literal(input: &str) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700428 let input_no_ws = skip_whitespace(input);
429
430 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700431 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700432 let start = input.len() - input_no_ws.len();
433 let len = input_no_ws.len() - a.len();
434 let end = start + len;
David Tolnay8e976c62017-06-01 12:12:29 -0700435 Ok((a, ::Literal(Literal(input[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700436 }
David Tolnay1218e122017-06-01 11:13:45 -0700437 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700438 }
439}
440
441named!(literal_nocapture -> (), alt!(
442 string
443 |
444 byte_string
445 |
446 byte
447 |
448 character
449 |
450 float
451 |
452 int
453 |
454 boolean
455 |
456 doc_comment
457));
458
459named!(string -> (), alt!(
460 quoted_string
461 |
462 preceded!(
463 punct!("r"),
464 raw_string
465 ) => { |_| () }
466));
467
468named!(quoted_string -> (), delimited!(
469 punct!("\""),
470 cooked_string,
471 tag!("\"")
472));
473
David Tolnay1218e122017-06-01 11:13:45 -0700474fn cooked_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700475 let mut chars = input.char_indices().peekable();
476 while let Some((byte_offset, ch)) = chars.next() {
477 match ch {
478 '"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700479 return Ok((&input[byte_offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700480 }
481 '\r' => {
482 if let Some((_, '\n')) = chars.next() {
483 // ...
484 } else {
485 break;
486 }
487 }
488 '\\' => {
489 match chars.next() {
490 Some((_, 'x')) => {
491 if !backslash_x_char(&mut chars) {
492 break
493 }
494 }
495 Some((_, 'n')) |
496 Some((_, 'r')) |
497 Some((_, 't')) |
498 Some((_, '\\')) |
499 Some((_, '\'')) |
500 Some((_, '"')) |
501 Some((_, '0')) => {}
502 Some((_, 'u')) => {
503 if !backslash_u(&mut chars) {
504 break
505 }
506 }
507 Some((_, '\n')) | Some((_, '\r')) => {
508 while let Some(&(_, ch)) = chars.peek() {
509 if ch.is_whitespace() {
510 chars.next();
511 } else {
512 break;
513 }
514 }
515 }
516 _ => break,
517 }
518 }
519 _ch => {}
520 }
521 }
David Tolnay1218e122017-06-01 11:13:45 -0700522 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700523}
524
525named!(byte_string -> (), alt!(
526 delimited!(
527 punct!("b\""),
528 cooked_byte_string,
529 tag!("\"")
530 ) => { |_| () }
531 |
532 preceded!(
533 punct!("br"),
534 raw_string
535 ) => { |_| () }
536));
537
David Tolnay1218e122017-06-01 11:13:45 -0700538fn cooked_byte_string(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700539 let mut bytes = input.bytes().enumerate();
540 'outer: while let Some((offset, b)) = bytes.next() {
541 match b {
542 b'"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700543 return Ok((&input[offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700544 }
545 b'\r' => {
546 if let Some((_, b'\n')) = bytes.next() {
547 // ...
548 } else {
549 break;
550 }
551 }
552 b'\\' => {
553 match bytes.next() {
554 Some((_, b'x')) => {
555 if !backslash_x_byte(&mut bytes) {
556 break
557 }
558 }
559 Some((_, b'n')) |
560 Some((_, b'r')) |
561 Some((_, b't')) |
562 Some((_, b'\\')) |
563 Some((_, b'0')) |
564 Some((_, b'\'')) |
565 Some((_, b'"')) => {}
566 Some((newline, b'\n')) |
567 Some((newline, b'\r')) => {
568 let rest = &input[newline + 1..];
569 for (offset, ch) in rest.char_indices() {
570 if !ch.is_whitespace() {
571 input = &rest[offset..];
572 bytes = input.bytes().enumerate();
573 continue 'outer;
574 }
575 }
576 break;
577 }
578 _ => break,
579 }
580 }
581 b if b < 0x80 => {}
582 _ => break,
583 }
584 }
David Tolnay1218e122017-06-01 11:13:45 -0700585 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700586}
587
David Tolnay1218e122017-06-01 11:13:45 -0700588fn raw_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700589 let mut chars = input.char_indices();
590 let mut n = 0;
591 while let Some((byte_offset, ch)) = chars.next() {
592 match ch {
593 '"' => {
594 n = byte_offset;
595 break;
596 }
597 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700598 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700599 }
600 }
601 for (byte_offset, ch) in chars {
602 match ch {
603 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
604 let rest = &input[byte_offset + 1 + n..];
David Tolnay1218e122017-06-01 11:13:45 -0700605 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700606 }
607 '\r' => {}
608 _ => {}
609 }
610 }
David Tolnay1218e122017-06-01 11:13:45 -0700611 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700612}
613
614named!(byte -> (), do_parse!(
615 punct!("b") >>
616 tag!("'") >>
617 cooked_byte >>
618 tag!("'") >>
619 (())
620));
621
David Tolnay1218e122017-06-01 11:13:45 -0700622fn cooked_byte(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700623 let mut bytes = input.bytes().enumerate();
624 let ok = match bytes.next().map(|(_, b)| b) {
625 Some(b'\\') => {
626 match bytes.next().map(|(_, b)| b) {
627 Some(b'x') => backslash_x_byte(&mut bytes),
628 Some(b'n') |
629 Some(b'r') |
630 Some(b't') |
631 Some(b'\\') |
632 Some(b'0') |
633 Some(b'\'') |
634 Some(b'"') => true,
635 _ => false,
636 }
637 }
638 b => b.is_some(),
639 };
640 if ok {
641 match bytes.next() {
David Tolnay1218e122017-06-01 11:13:45 -0700642 Some((offset, _)) => Ok((&input[offset..], ())),
643 None => Ok(("", ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700644 }
645 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700646 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700647 }
648}
649
650named!(character -> (), do_parse!(
651 punct!("'") >>
652 cooked_char >>
653 tag!("'") >>
654 (())
655));
656
David Tolnay1218e122017-06-01 11:13:45 -0700657fn cooked_char(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700658 let mut chars = input.char_indices();
659 let ok = match chars.next().map(|(_, ch)| ch) {
660 Some('\\') => {
661 match chars.next().map(|(_, ch)| ch) {
662 Some('x') => backslash_x_char(&mut chars),
663 Some('u') => backslash_u(&mut chars),
664 Some('n') |
665 Some('r') |
666 Some('t') |
667 Some('\\') |
668 Some('0') |
669 Some('\'') |
670 Some('"') => true,
671 _ => false,
672 }
673 }
674 ch => ch.is_some(),
675 };
676 if ok {
David Tolnay1218e122017-06-01 11:13:45 -0700677 Ok((chars.as_str(), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700678 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700679 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700680 }
681}
682
683macro_rules! next_ch {
684 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
685 match $chars.next() {
686 Some((_, ch)) => match ch {
687 $pat $(| $rest)* => ch,
688 _ => return false,
689 },
690 None => return false
691 }
692 };
693}
694
695fn backslash_x_char<I>(chars: &mut I) -> bool
696 where I: Iterator<Item = (usize, char)>
697{
698 next_ch!(chars @ '0'...'7');
699 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
700 true
701}
702
703fn backslash_x_byte<I>(chars: &mut I) -> bool
704 where I: Iterator<Item = (usize, u8)>
705{
706 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
707 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
708 true
709}
710
711fn backslash_u<I>(chars: &mut I) -> bool
712 where I: Iterator<Item = (usize, char)>
713{
714 next_ch!(chars @ '{');
715 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
716 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
717 if b == '}' {
718 return true
719 }
720 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
721 if c == '}' {
722 return true
723 }
724 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
725 if d == '}' {
726 return true
727 }
728 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
729 if e == '}' {
730 return true
731 }
732 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
733 if f == '}' {
734 return true
735 }
736 next_ch!(chars @ '}');
737 true
738}
739
David Tolnay744a6b82017-06-01 11:34:29 -0700740fn float(input: &str) -> PResult<()> {
741 let (rest, ()) = float_digits(input)?;
742 for suffix in &["f32", "f64"] {
743 if rest.starts_with(suffix) {
744 return word_break(&rest[suffix.len()..]);
745 }
746 }
747 word_break(rest)
748}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700749
David Tolnay744a6b82017-06-01 11:34:29 -0700750fn float_digits(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700751 let mut chars = input.chars().peekable();
752 match chars.next() {
753 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700754 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700755 }
756
757 let mut len = 1;
758 let mut has_dot = false;
759 let mut has_exp = false;
760 while let Some(&ch) = chars.peek() {
761 match ch {
762 '0'...'9' | '_' => {
763 chars.next();
764 len += 1;
765 }
766 '.' => {
767 if has_dot {
768 break;
769 }
770 chars.next();
771 if chars.peek()
772 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
773 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700774 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700775 }
776 len += 1;
777 has_dot = true;
778 }
779 'e' | 'E' => {
780 chars.next();
781 len += 1;
782 has_exp = true;
783 break;
784 }
785 _ => break,
786 }
787 }
788
789 let rest = &input[len..];
790 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700791 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700792 }
793
794 if has_exp {
795 let mut has_exp_value = false;
796 while let Some(&ch) = chars.peek() {
797 match ch {
798 '+' | '-' => {
799 if has_exp_value {
800 break;
801 }
802 chars.next();
803 len += 1;
804 }
805 '0'...'9' => {
806 chars.next();
807 len += 1;
808 has_exp_value = true;
809 }
810 '_' => {
811 chars.next();
812 len += 1;
813 }
814 _ => break,
815 }
816 }
817 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -0700818 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700819 }
820 }
821
David Tolnay1218e122017-06-01 11:13:45 -0700822 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700823}
824
David Tolnay744a6b82017-06-01 11:34:29 -0700825fn int(input: &str) -> PResult<()> {
826 let (rest, ()) = digits(input)?;
827 for suffix in &[
828 "isize",
829 "i8",
830 "i16",
831 "i32",
832 "i64",
833 "i128",
834 "usize",
835 "u8",
836 "u16",
837 "u32",
838 "u64",
839 "u128",
840 ] {
841 if rest.starts_with(suffix) {
842 return word_break(&rest[suffix.len()..]);
843 }
844 }
845 word_break(rest)
846}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700847
David Tolnay1218e122017-06-01 11:13:45 -0700848fn digits(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700849 let base = if input.starts_with("0x") {
850 input = &input[2..];
851 16
852 } else if input.starts_with("0o") {
853 input = &input[2..];
854 8
855 } else if input.starts_with("0b") {
856 input = &input[2..];
857 2
858 } else {
859 10
860 };
861
Alex Crichton44bffbc2017-05-19 17:51:59 -0700862 let mut len = 0;
863 let mut empty = true;
864 for b in input.bytes() {
865 let digit = match b {
866 b'0'...b'9' => (b - b'0') as u64,
867 b'a'...b'f' => 10 + (b - b'a') as u64,
868 b'A'...b'F' => 10 + (b - b'A') as u64,
869 b'_' => {
870 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -0700871 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700872 }
873 len += 1;
874 continue;
875 }
876 _ => break,
877 };
878 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -0700879 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700880 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700881 len += 1;
882 empty = false;
883 }
884 if empty {
David Tolnay1218e122017-06-01 11:13:45 -0700885 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700886 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700887 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700888 }
889}
890
891named!(boolean -> (), alt!(
892 keyword!("true") => { |_| () }
893 |
894 keyword!("false") => { |_| () }
895));
896
David Tolnay1218e122017-06-01 11:13:45 -0700897fn op(input: &str) -> PResult<(char, OpKind)> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700898 let input = skip_whitespace(input);
899 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700900 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -0700901 let kind = match op_char(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700902 Ok(_) => OpKind::Joint,
903 Err(LexError) => OpKind::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -0700904 };
David Tolnay1218e122017-06-01 11:13:45 -0700905 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -0700906 }
David Tolnay1218e122017-06-01 11:13:45 -0700907 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700908 }
909}
910
David Tolnay1218e122017-06-01 11:13:45 -0700911fn op_char(input: &str) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700912 let mut chars = input.chars();
913 let first = match chars.next() {
914 Some(ch) => ch,
915 None => {
David Tolnay1218e122017-06-01 11:13:45 -0700916 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -0700917 }
918 };
919 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
920 if recognized.contains(first) {
David Tolnay1218e122017-06-01 11:13:45 -0700921 Ok((chars.as_str(), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700922 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700923 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700924 }
925}
926
Alex Crichton44bffbc2017-05-19 17:51:59 -0700927named!(doc_comment -> (), alt!(
928 do_parse!(
929 punct!("//!") >>
930 take_until!("\n") >>
931 (())
932 )
933 |
934 do_parse!(
935 option!(whitespace) >>
936 peek!(tag!("/*!")) >>
937 block_comment >>
938 (())
939 )
940 |
941 do_parse!(
942 punct!("///") >>
943 not!(tag!("/")) >>
944 take_until!("\n") >>
945 (())
946 )
947 |
948 do_parse!(
949 option!(whitespace) >>
950 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
951 block_comment >>
952 (())
953 )
954));