blob: ba5bb643bb38697beaa09d8ae368235a8fe39723 [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::iter;
7use std::ops;
8use std::rc::Rc;
9use std::str::FromStr;
10use std::vec;
11
12use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070013use unicode_xid::UnicodeXID;
David Tolnay744a6b82017-06-01 11:34:29 -070014use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070015
16use {TokenTree, TokenKind, Delimiter, OpKind};
17
David Tolnay977f8282017-05-31 17:41:33 -070018#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070019pub struct TokenStream {
20 inner: Vec<TokenTree>,
21}
22
23#[derive(Debug)]
24pub struct LexError;
25
26impl TokenStream {
27 pub fn empty() -> TokenStream {
28 TokenStream { inner: Vec::new() }
29 }
30
31 pub fn is_empty(&self) -> bool {
32 self.inner.len() == 0
33 }
34}
35
36impl FromStr for TokenStream {
37 type Err = LexError;
38
39 fn from_str(src: &str) -> Result<TokenStream, LexError> {
40 match token_stream(src) {
David Tolnay1218e122017-06-01 11:13:45 -070041 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070042 if skip_whitespace(input).len() != 0 {
43 Err(LexError)
44 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070045 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070046 }
47 }
David Tolnay1218e122017-06-01 11:13:45 -070048 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070049 }
50 }
51}
52
53impl fmt::Display for TokenStream {
54 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
55 let mut joint = false;
56 for (i, tt) in self.inner.iter().enumerate() {
57 if i != 0 && !joint {
58 write!(f, " ")?;
59 }
60 joint = false;
61 match tt.kind {
62 TokenKind::Sequence(delim, ref stream) => {
63 let (start, end) = match delim {
64 Delimiter::Parenthesis => ("(", ")"),
65 Delimiter::Brace => ("{", "}"),
66 Delimiter::Bracket => ("[", "]"),
67 Delimiter::None => ("", ""),
68 };
Alex Crichton852d53d2017-05-19 19:25:08 -070069 if stream.0.inner.len() == 0 {
70 write!(f, "{} {}", start, end)?
71 } else {
72 write!(f, "{} {} {}", start, stream, end)?
73 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070074 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -070075 TokenKind::Word(ref sym) => write!(f, "{}", sym.as_str())?,
Alex Crichton44bffbc2017-05-19 17:51:59 -070076 TokenKind::Op(ch, ref op) => {
77 write!(f, "{}", ch)?;
78 match *op {
79 OpKind::Alone => {}
80 OpKind::Joint => joint = true,
81 }
82 }
83 TokenKind::Literal(ref literal) => {
84 write!(f, "{}", literal)?;
85 // handle comments
86 if (literal.0).0.starts_with("/") {
87 write!(f, "\n")?;
88 }
89 }
90 }
91 }
92
93 Ok(())
94 }
95}
96
97impl From<proc_macro::TokenStream> for TokenStream {
98 fn from(inner: proc_macro::TokenStream) -> TokenStream {
99 inner.to_string().parse().expect("compiler token stream parse failed")
100 }
101}
102
103impl From<TokenStream> for proc_macro::TokenStream {
104 fn from(inner: TokenStream) -> proc_macro::TokenStream {
105 inner.to_string().parse().expect("failed to parse to compiler tokens")
106 }
107}
108
109
110impl From<TokenTree> for TokenStream {
111 fn from(tree: TokenTree) -> TokenStream {
112 TokenStream { inner: vec![tree] }
113 }
114}
115
116impl iter::FromIterator<TokenStream> for TokenStream {
117 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
118 let mut v = Vec::new();
119
120 for stream in streams.into_iter() {
121 v.extend(stream.inner);
122 }
123
124 TokenStream { inner: v }
125 }
126}
127
128pub type TokenIter = vec::IntoIter<TokenTree>;
129
130impl IntoIterator for TokenStream {
131 type Item = TokenTree;
132 type IntoIter = TokenIter;
133
134 fn into_iter(self) -> TokenIter {
135 self.inner.into_iter()
136 }
137}
138
David Tolnay977f8282017-05-31 17:41:33 -0700139#[derive(Clone, Copy, Default, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700140pub struct Span;
141
142impl Span {
143 pub fn call_site() -> Span {
144 Span
145 }
146}
147
David Tolnay977f8282017-05-31 17:41:33 -0700148#[derive(Copy, Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700149pub struct Symbol(usize);
150
151thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
152
153impl<'a> From<&'a str> for Symbol {
154 fn from(string: &'a str) -> Symbol {
155 Symbol(SYMBOLS.with(|s| s.borrow_mut().intern(string)))
156 }
157}
158
159impl ops::Deref for Symbol {
160 type Target = str;
161
162 fn deref(&self) -> &str {
163 SYMBOLS.with(|interner| {
164 let interner = interner.borrow();
165 let s = interner.get(self.0);
166 unsafe {
167 &*(s as *const str)
168 }
169 })
170 }
171}
172
173struct Interner {
174 string_to_index: HashMap<MyRc, usize>,
175 index_to_string: Vec<Rc<String>>,
176}
177
178#[derive(Hash, Eq, PartialEq)]
179struct MyRc(Rc<String>);
180
181impl Borrow<str> for MyRc {
182 fn borrow(&self) -> &str {
183 &self.0
184 }
185}
186
187impl Interner {
188 fn new() -> Interner {
189 Interner {
190 string_to_index: HashMap::new(),
191 index_to_string: Vec::new(),
192 }
193 }
194
195 fn intern(&mut self, s: &str) -> usize {
196 if let Some(&idx) = self.string_to_index.get(s) {
197 return idx
198 }
199 let s = Rc::new(s.to_string());
200 self.index_to_string.push(s.clone());
201 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
202 self.index_to_string.len() - 1
203 }
204
205 fn get(&self, idx: usize) -> &str {
206 &self.index_to_string[idx]
207 }
208}
209
David Tolnay977f8282017-05-31 17:41:33 -0700210#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700211pub struct Literal(String);
212
Alex Crichton852d53d2017-05-19 19:25:08 -0700213impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700214 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700215 match byte {
216 0 => Literal(format!("b'\\0'")),
217 b'\"' => Literal(format!("b'\"'")),
218 n => {
219 let mut escaped = "b'".to_string();
220 escaped.extend(ascii::escape_default(n).map(|c| c as char));
221 escaped.push('\'');
222 Literal(escaped)
223 }
224 }
225 }
226
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700227 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700228 let mut escaped = "b\"".to_string();
229 for b in bytes {
230 match *b {
231 b'\0' => escaped.push_str(r"\0"),
232 b'\t' => escaped.push_str(r"\t"),
233 b'\n' => escaped.push_str(r"\n"),
234 b'\r' => escaped.push_str(r"\r"),
235 b'"' => escaped.push_str("\\\""),
236 b'\\' => escaped.push_str("\\\\"),
237 b'\x20' ... b'\x7E' => escaped.push(*b as char),
238 _ => escaped.push_str(&format!("\\x{:02X}", b)),
239 }
240 }
241 escaped.push('"');
242 Literal(escaped)
243 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700244
245 pub fn doccomment(s: &str) -> Literal {
246 Literal(s.to_string())
247 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700248
249 pub fn float(s: &str) -> Literal {
250 Literal(s.to_string())
251 }
252
253 pub fn integer(s: &str) -> Literal {
254 Literal(s.to_string())
255 }
Alex Crichton31316622017-05-26 12:54:47 -0700256
257 pub fn raw_string(s: &str, pounds: usize) -> Literal {
258 let mut ret = format!("r");
259 ret.extend((0..pounds).map(|_| "#"));
260 ret.push('"');
261 ret.push_str(s);
262 ret.push('"');
263 ret.extend((0..pounds).map(|_| "#"));
264 Literal(ret)
265 }
266
267 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700268 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700269 ret.extend((0..pounds).map(|_| "#"));
270 ret.push('"');
271 ret.push_str(s);
272 ret.push('"');
273 ret.extend((0..pounds).map(|_| "#"));
274 Literal(ret)
275 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700276}
277
Alex Crichton44bffbc2017-05-19 17:51:59 -0700278impl fmt::Display for Literal {
279 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
280 self.0.fmt(f)
281 }
282}
283
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700284macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700285 ($($t:ty,)*) => {$(
286 impl From<$t> for Literal {
287 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700288 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700289 }
290 }
291 )*}
292}
293
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700294ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700295 u8, u16, u32, u64, usize,
296 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700297}
298
299macro_rules! floats {
300 ($($t:ty,)*) => {$(
301 impl From<$t> for Literal {
302 fn from(t: $t) -> Literal {
303 assert!(!t.is_nan());
304 assert!(!t.is_infinite());
305 Literal(format!(concat!("{}", stringify!($t)), t))
306 }
307 }
308 )*}
309}
310
311floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700312 f32, f64,
313}
314
Alex Crichton44bffbc2017-05-19 17:51:59 -0700315impl<'a> From<&'a str> for Literal {
316 fn from(t: &'a str) -> Literal {
317 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
318 s.push('"');
319 s.insert(0, '"');
320 Literal(s)
321 }
322}
323
324impl From<char> for Literal {
325 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700326 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700327 }
328}
329
David Tolnay8e976c62017-06-01 12:12:29 -0700330named!(token_stream -> ::TokenStream, map!(
331 many0!(token_tree),
332 |trees| ::TokenStream(TokenStream { inner: trees })
333));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700334
335named!(token_tree -> TokenTree,
336 map!(token_kind, |s: TokenKind| {
337 TokenTree {
338 span: ::Span(Span),
339 kind: s,
340 }
341 }));
342
343named!(token_kind -> TokenKind, alt!(
David Tolnay8e976c62017-06-01 12:12:29 -0700344 map!(delimited, |(d, s)| TokenKind::Sequence(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700345 |
David Tolnay8e976c62017-06-01 12:12:29 -0700346 map!(literal, TokenKind::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700347 |
David Tolnay8e976c62017-06-01 12:12:29 -0700348 map!(symbol, TokenKind::Word)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700349 |
David Tolnay643177b2017-06-01 12:02:44 -0700350 map!(op, |(op, kind)| TokenKind::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700351));
352
David Tolnay8e976c62017-06-01 12:12:29 -0700353named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700354 delimited!(
355 punct!("("),
356 token_stream,
357 punct!(")")
358 ) => { |ts| (Delimiter::Parenthesis, ts) }
359 |
360 delimited!(
361 punct!("["),
362 token_stream,
363 punct!("]")
364 ) => { |ts| (Delimiter::Bracket, ts) }
365 |
366 delimited!(
367 punct!("{"),
368 token_stream,
369 punct!("}")
370 ) => { |ts| (Delimiter::Brace, ts) }
371));
372
David Tolnaya202d502017-06-01 12:26:55 -0700373fn symbol(mut input: &str) -> PResult<::Symbol> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700374 input = skip_whitespace(input);
375
376 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700377
378 let lifetime = input.starts_with("'");
379 if lifetime {
380 chars.next();
381 }
382
Alex Crichton44bffbc2017-05-19 17:51:59 -0700383 match chars.next() {
384 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700385 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700386 }
387
David Tolnay214c94c2017-06-01 12:42:56 -0700388 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700389 for (i, ch) in chars {
390 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700391 end = i;
392 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700393 }
394 }
395
David Tolnay214c94c2017-06-01 12:42:56 -0700396 if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
397 Err(LexError)
398 } else {
399 Ok((&input[end..], input[..end].into()))
400 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700401}
402
David Tolnay214c94c2017-06-01 12:42:56 -0700403// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
404static KEYWORDS: &'static [&'static str] = &[
405 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
406 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
407 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
408 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
409 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
410 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
411 "yield",
412];
413
David Tolnay8e976c62017-06-01 12:12:29 -0700414fn literal(input: &str) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700415 let input_no_ws = skip_whitespace(input);
416
417 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700418 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700419 let start = input.len() - input_no_ws.len();
420 let len = input_no_ws.len() - a.len();
421 let end = start + len;
David Tolnay8e976c62017-06-01 12:12:29 -0700422 Ok((a, ::Literal(Literal(input[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700423 }
David Tolnay1218e122017-06-01 11:13:45 -0700424 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700425 }
426}
427
428named!(literal_nocapture -> (), alt!(
429 string
430 |
431 byte_string
432 |
433 byte
434 |
435 character
436 |
437 float
438 |
439 int
440 |
441 boolean
442 |
443 doc_comment
444));
445
446named!(string -> (), alt!(
447 quoted_string
448 |
449 preceded!(
450 punct!("r"),
451 raw_string
452 ) => { |_| () }
453));
454
455named!(quoted_string -> (), delimited!(
456 punct!("\""),
457 cooked_string,
458 tag!("\"")
459));
460
David Tolnay1218e122017-06-01 11:13:45 -0700461fn cooked_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700462 let mut chars = input.char_indices().peekable();
463 while let Some((byte_offset, ch)) = chars.next() {
464 match ch {
465 '"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700466 return Ok((&input[byte_offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700467 }
468 '\r' => {
469 if let Some((_, '\n')) = chars.next() {
470 // ...
471 } else {
472 break;
473 }
474 }
475 '\\' => {
476 match chars.next() {
477 Some((_, 'x')) => {
478 if !backslash_x_char(&mut chars) {
479 break
480 }
481 }
482 Some((_, 'n')) |
483 Some((_, 'r')) |
484 Some((_, 't')) |
485 Some((_, '\\')) |
486 Some((_, '\'')) |
487 Some((_, '"')) |
488 Some((_, '0')) => {}
489 Some((_, 'u')) => {
490 if !backslash_u(&mut chars) {
491 break
492 }
493 }
494 Some((_, '\n')) | Some((_, '\r')) => {
495 while let Some(&(_, ch)) = chars.peek() {
496 if ch.is_whitespace() {
497 chars.next();
498 } else {
499 break;
500 }
501 }
502 }
503 _ => break,
504 }
505 }
506 _ch => {}
507 }
508 }
David Tolnay1218e122017-06-01 11:13:45 -0700509 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700510}
511
512named!(byte_string -> (), alt!(
513 delimited!(
514 punct!("b\""),
515 cooked_byte_string,
516 tag!("\"")
517 ) => { |_| () }
518 |
519 preceded!(
520 punct!("br"),
521 raw_string
522 ) => { |_| () }
523));
524
David Tolnay1218e122017-06-01 11:13:45 -0700525fn cooked_byte_string(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700526 let mut bytes = input.bytes().enumerate();
527 'outer: while let Some((offset, b)) = bytes.next() {
528 match b {
529 b'"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700530 return Ok((&input[offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700531 }
532 b'\r' => {
533 if let Some((_, b'\n')) = bytes.next() {
534 // ...
535 } else {
536 break;
537 }
538 }
539 b'\\' => {
540 match bytes.next() {
541 Some((_, b'x')) => {
542 if !backslash_x_byte(&mut bytes) {
543 break
544 }
545 }
546 Some((_, b'n')) |
547 Some((_, b'r')) |
548 Some((_, b't')) |
549 Some((_, b'\\')) |
550 Some((_, b'0')) |
551 Some((_, b'\'')) |
552 Some((_, b'"')) => {}
553 Some((newline, b'\n')) |
554 Some((newline, b'\r')) => {
555 let rest = &input[newline + 1..];
556 for (offset, ch) in rest.char_indices() {
557 if !ch.is_whitespace() {
558 input = &rest[offset..];
559 bytes = input.bytes().enumerate();
560 continue 'outer;
561 }
562 }
563 break;
564 }
565 _ => break,
566 }
567 }
568 b if b < 0x80 => {}
569 _ => break,
570 }
571 }
David Tolnay1218e122017-06-01 11:13:45 -0700572 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700573}
574
David Tolnay1218e122017-06-01 11:13:45 -0700575fn raw_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700576 let mut chars = input.char_indices();
577 let mut n = 0;
578 while let Some((byte_offset, ch)) = chars.next() {
579 match ch {
580 '"' => {
581 n = byte_offset;
582 break;
583 }
584 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700585 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700586 }
587 }
588 for (byte_offset, ch) in chars {
589 match ch {
590 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
591 let rest = &input[byte_offset + 1 + n..];
David Tolnay1218e122017-06-01 11:13:45 -0700592 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700593 }
594 '\r' => {}
595 _ => {}
596 }
597 }
David Tolnay1218e122017-06-01 11:13:45 -0700598 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700599}
600
601named!(byte -> (), do_parse!(
602 punct!("b") >>
603 tag!("'") >>
604 cooked_byte >>
605 tag!("'") >>
606 (())
607));
608
David Tolnay1218e122017-06-01 11:13:45 -0700609fn cooked_byte(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700610 let mut bytes = input.bytes().enumerate();
611 let ok = match bytes.next().map(|(_, b)| b) {
612 Some(b'\\') => {
613 match bytes.next().map(|(_, b)| b) {
614 Some(b'x') => backslash_x_byte(&mut bytes),
615 Some(b'n') |
616 Some(b'r') |
617 Some(b't') |
618 Some(b'\\') |
619 Some(b'0') |
620 Some(b'\'') |
621 Some(b'"') => true,
622 _ => false,
623 }
624 }
625 b => b.is_some(),
626 };
627 if ok {
628 match bytes.next() {
David Tolnay1218e122017-06-01 11:13:45 -0700629 Some((offset, _)) => Ok((&input[offset..], ())),
630 None => Ok(("", ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700631 }
632 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700633 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700634 }
635}
636
637named!(character -> (), do_parse!(
638 punct!("'") >>
639 cooked_char >>
640 tag!("'") >>
641 (())
642));
643
David Tolnay1218e122017-06-01 11:13:45 -0700644fn cooked_char(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700645 let mut chars = input.char_indices();
646 let ok = match chars.next().map(|(_, ch)| ch) {
647 Some('\\') => {
648 match chars.next().map(|(_, ch)| ch) {
649 Some('x') => backslash_x_char(&mut chars),
650 Some('u') => backslash_u(&mut chars),
651 Some('n') |
652 Some('r') |
653 Some('t') |
654 Some('\\') |
655 Some('0') |
656 Some('\'') |
657 Some('"') => true,
658 _ => false,
659 }
660 }
661 ch => ch.is_some(),
662 };
663 if ok {
David Tolnay1218e122017-06-01 11:13:45 -0700664 Ok((chars.as_str(), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700665 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700666 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700667 }
668}
669
670macro_rules! next_ch {
671 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
672 match $chars.next() {
673 Some((_, ch)) => match ch {
674 $pat $(| $rest)* => ch,
675 _ => return false,
676 },
677 None => return false
678 }
679 };
680}
681
682fn backslash_x_char<I>(chars: &mut I) -> bool
683 where I: Iterator<Item = (usize, char)>
684{
685 next_ch!(chars @ '0'...'7');
686 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
687 true
688}
689
690fn backslash_x_byte<I>(chars: &mut I) -> bool
691 where I: Iterator<Item = (usize, u8)>
692{
693 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
694 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
695 true
696}
697
698fn backslash_u<I>(chars: &mut I) -> bool
699 where I: Iterator<Item = (usize, char)>
700{
701 next_ch!(chars @ '{');
702 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
703 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
704 if b == '}' {
705 return true
706 }
707 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
708 if c == '}' {
709 return true
710 }
711 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
712 if d == '}' {
713 return true
714 }
715 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
716 if e == '}' {
717 return true
718 }
719 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
720 if f == '}' {
721 return true
722 }
723 next_ch!(chars @ '}');
724 true
725}
726
David Tolnay744a6b82017-06-01 11:34:29 -0700727fn float(input: &str) -> PResult<()> {
728 let (rest, ()) = float_digits(input)?;
729 for suffix in &["f32", "f64"] {
730 if rest.starts_with(suffix) {
731 return word_break(&rest[suffix.len()..]);
732 }
733 }
734 word_break(rest)
735}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700736
David Tolnay744a6b82017-06-01 11:34:29 -0700737fn float_digits(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700738 let mut chars = input.chars().peekable();
739 match chars.next() {
740 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700741 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700742 }
743
744 let mut len = 1;
745 let mut has_dot = false;
746 let mut has_exp = false;
747 while let Some(&ch) = chars.peek() {
748 match ch {
749 '0'...'9' | '_' => {
750 chars.next();
751 len += 1;
752 }
753 '.' => {
754 if has_dot {
755 break;
756 }
757 chars.next();
758 if chars.peek()
759 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
760 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700761 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700762 }
763 len += 1;
764 has_dot = true;
765 }
766 'e' | 'E' => {
767 chars.next();
768 len += 1;
769 has_exp = true;
770 break;
771 }
772 _ => break,
773 }
774 }
775
776 let rest = &input[len..];
777 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700778 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700779 }
780
781 if has_exp {
782 let mut has_exp_value = false;
783 while let Some(&ch) = chars.peek() {
784 match ch {
785 '+' | '-' => {
786 if has_exp_value {
787 break;
788 }
789 chars.next();
790 len += 1;
791 }
792 '0'...'9' => {
793 chars.next();
794 len += 1;
795 has_exp_value = true;
796 }
797 '_' => {
798 chars.next();
799 len += 1;
800 }
801 _ => break,
802 }
803 }
804 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -0700805 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700806 }
807 }
808
David Tolnay1218e122017-06-01 11:13:45 -0700809 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700810}
811
David Tolnay744a6b82017-06-01 11:34:29 -0700812fn int(input: &str) -> PResult<()> {
813 let (rest, ()) = digits(input)?;
814 for suffix in &[
815 "isize",
816 "i8",
817 "i16",
818 "i32",
819 "i64",
820 "i128",
821 "usize",
822 "u8",
823 "u16",
824 "u32",
825 "u64",
826 "u128",
827 ] {
828 if rest.starts_with(suffix) {
829 return word_break(&rest[suffix.len()..]);
830 }
831 }
832 word_break(rest)
833}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700834
David Tolnay1218e122017-06-01 11:13:45 -0700835fn digits(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700836 let base = if input.starts_with("0x") {
837 input = &input[2..];
838 16
839 } else if input.starts_with("0o") {
840 input = &input[2..];
841 8
842 } else if input.starts_with("0b") {
843 input = &input[2..];
844 2
845 } else {
846 10
847 };
848
Alex Crichton44bffbc2017-05-19 17:51:59 -0700849 let mut len = 0;
850 let mut empty = true;
851 for b in input.bytes() {
852 let digit = match b {
853 b'0'...b'9' => (b - b'0') as u64,
854 b'a'...b'f' => 10 + (b - b'a') as u64,
855 b'A'...b'F' => 10 + (b - b'A') as u64,
856 b'_' => {
857 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -0700858 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700859 }
860 len += 1;
861 continue;
862 }
863 _ => break,
864 };
865 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -0700866 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700867 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700868 len += 1;
869 empty = false;
870 }
871 if empty {
David Tolnay1218e122017-06-01 11:13:45 -0700872 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700873 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700874 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700875 }
876}
877
878named!(boolean -> (), alt!(
879 keyword!("true") => { |_| () }
880 |
881 keyword!("false") => { |_| () }
882));
883
David Tolnay1218e122017-06-01 11:13:45 -0700884fn op(input: &str) -> PResult<(char, OpKind)> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700885 let input = skip_whitespace(input);
886 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700887 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -0700888 let kind = match op_char(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700889 Ok(_) => OpKind::Joint,
890 Err(LexError) => OpKind::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -0700891 };
David Tolnay1218e122017-06-01 11:13:45 -0700892 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -0700893 }
David Tolnay1218e122017-06-01 11:13:45 -0700894 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700895 }
896}
897
David Tolnay1218e122017-06-01 11:13:45 -0700898fn op_char(input: &str) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700899 let mut chars = input.chars();
900 let first = match chars.next() {
901 Some(ch) => ch,
902 None => {
David Tolnay1218e122017-06-01 11:13:45 -0700903 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -0700904 }
905 };
906 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
907 if recognized.contains(first) {
David Tolnay1218e122017-06-01 11:13:45 -0700908 Ok((chars.as_str(), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700909 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700910 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700911 }
912}
913
Alex Crichton44bffbc2017-05-19 17:51:59 -0700914named!(doc_comment -> (), alt!(
915 do_parse!(
916 punct!("//!") >>
917 take_until!("\n") >>
918 (())
919 )
920 |
921 do_parse!(
922 option!(whitespace) >>
923 peek!(tag!("/*!")) >>
924 block_comment >>
925 (())
926 )
927 |
928 do_parse!(
929 punct!("///") >>
930 not!(tag!("/")) >>
931 take_until!("\n") >>
932 (())
933 )
934 |
935 do_parse!(
936 option!(whitespace) >>
937 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
938 block_comment >>
939 (())
940 )
941));
942
943#[cfg(test)]
944mod tests {
945 use super::*;
946
947 #[test]
948 fn symbols() {
949 assert_eq!(&*Symbol::from("foo"), "foo");
950 assert_eq!(&*Symbol::from("bar"), "bar");
951 }
952
953 #[test]
954 fn literals() {
955 assert_eq!(Literal::from("foo").to_string(), "\"foo\"");
956 assert_eq!(Literal::from("\"").to_string(), "\"\\\"\"");
957 }
958
959 #[test]
960 fn roundtrip() {
961 fn roundtrip(p: &str) {
962 println!("parse: {}", p);
963 let s = p.parse::<TokenStream>().unwrap().to_string();
964 println!("first: {}", s);
965 let s2 = s.to_string().parse::<TokenStream>().unwrap().to_string();
966 assert_eq!(s, s2);
967 }
968 roundtrip("a");
969 roundtrip("<<");
970 roundtrip("<<=");
971 roundtrip("
972 /// a
973 wut
974 ");
975 roundtrip("
976 1
977 1.0
978 1f32
979 2f64
980 1usize
981 4isize
982 4e10
983 1_000
984 1_0i32
985 8u8
986 9
987 0
Michael Layzell9ee75c92017-05-31 19:44:26 -0400988 0xffffffffffffffffffffffffffffffff
Alex Crichton44bffbc2017-05-19 17:51:59 -0700989 ");
David Tolnaya202d502017-06-01 12:26:55 -0700990 roundtrip("'a");
991 roundtrip("'static");
Alex Crichton44bffbc2017-05-19 17:51:59 -0700992 }
David Tolnay744a6b82017-06-01 11:34:29 -0700993
994 #[test]
995 fn fail() {
996 fn fail(p: &str) {
997 if p.parse::<TokenStream>().is_ok() {
998 panic!("should have failed to parse: {}", p);
999 }
1000 }
1001 fail("1x");
1002 fail("1u80");
1003 fail("1f320");
David Tolnaya202d502017-06-01 12:26:55 -07001004 fail("' static");
David Tolnay214c94c2017-06-01 12:42:56 -07001005 fail("'mut");
David Tolnay744a6b82017-06-01 11:34:29 -07001006 }
Alex Crichton44bffbc2017-05-19 17:51:59 -07001007}