blob: 68e4e33206519d51957538d4aab54e92f7ab9318 [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::iter;
7use std::ops;
8use std::rc::Rc;
9use std::str::FromStr;
10use std::vec;
11
12use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070013use unicode_xid::UnicodeXID;
David Tolnay744a6b82017-06-01 11:34:29 -070014use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070015
16use {TokenTree, TokenKind, Delimiter, OpKind};
17
David Tolnay977f8282017-05-31 17:41:33 -070018#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070019pub struct TokenStream {
20 inner: Vec<TokenTree>,
21}
22
23#[derive(Debug)]
24pub struct LexError;
25
26impl TokenStream {
27 pub fn empty() -> TokenStream {
28 TokenStream { inner: Vec::new() }
29 }
30
31 pub fn is_empty(&self) -> bool {
32 self.inner.len() == 0
33 }
34}
35
36impl FromStr for TokenStream {
37 type Err = LexError;
38
39 fn from_str(src: &str) -> Result<TokenStream, LexError> {
40 match token_stream(src) {
David Tolnay1218e122017-06-01 11:13:45 -070041 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070042 if skip_whitespace(input).len() != 0 {
43 Err(LexError)
44 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070045 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070046 }
47 }
David Tolnay1218e122017-06-01 11:13:45 -070048 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070049 }
50 }
51}
52
53impl fmt::Display for TokenStream {
54 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
55 let mut joint = false;
56 for (i, tt) in self.inner.iter().enumerate() {
57 if i != 0 && !joint {
58 write!(f, " ")?;
59 }
60 joint = false;
61 match tt.kind {
62 TokenKind::Sequence(delim, ref stream) => {
63 let (start, end) = match delim {
64 Delimiter::Parenthesis => ("(", ")"),
65 Delimiter::Brace => ("{", "}"),
66 Delimiter::Bracket => ("[", "]"),
67 Delimiter::None => ("", ""),
68 };
Alex Crichton852d53d2017-05-19 19:25:08 -070069 if stream.0.inner.len() == 0 {
70 write!(f, "{} {}", start, end)?
71 } else {
72 write!(f, "{} {} {}", start, stream, end)?
73 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070074 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -070075 TokenKind::Word(ref sym) => write!(f, "{}", sym.as_str())?,
Alex Crichton44bffbc2017-05-19 17:51:59 -070076 TokenKind::Op(ch, ref op) => {
77 write!(f, "{}", ch)?;
78 match *op {
79 OpKind::Alone => {}
80 OpKind::Joint => joint = true,
81 }
82 }
83 TokenKind::Literal(ref literal) => {
84 write!(f, "{}", literal)?;
85 // handle comments
86 if (literal.0).0.starts_with("/") {
87 write!(f, "\n")?;
88 }
89 }
90 }
91 }
92
93 Ok(())
94 }
95}
96
97impl From<proc_macro::TokenStream> for TokenStream {
98 fn from(inner: proc_macro::TokenStream) -> TokenStream {
99 inner.to_string().parse().expect("compiler token stream parse failed")
100 }
101}
102
103impl From<TokenStream> for proc_macro::TokenStream {
104 fn from(inner: TokenStream) -> proc_macro::TokenStream {
105 inner.to_string().parse().expect("failed to parse to compiler tokens")
106 }
107}
108
109
110impl From<TokenTree> for TokenStream {
111 fn from(tree: TokenTree) -> TokenStream {
112 TokenStream { inner: vec![tree] }
113 }
114}
115
116impl iter::FromIterator<TokenStream> for TokenStream {
117 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
118 let mut v = Vec::new();
119
120 for stream in streams.into_iter() {
121 v.extend(stream.inner);
122 }
123
124 TokenStream { inner: v }
125 }
126}
127
128pub type TokenIter = vec::IntoIter<TokenTree>;
129
130impl IntoIterator for TokenStream {
131 type Item = TokenTree;
132 type IntoIter = TokenIter;
133
134 fn into_iter(self) -> TokenIter {
135 self.inner.into_iter()
136 }
137}
138
David Tolnay977f8282017-05-31 17:41:33 -0700139#[derive(Clone, Copy, Default, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700140pub struct Span;
141
142impl Span {
143 pub fn call_site() -> Span {
144 Span
145 }
146}
147
David Tolnay977f8282017-05-31 17:41:33 -0700148#[derive(Copy, Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700149pub struct Symbol(usize);
150
151thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
152
153impl<'a> From<&'a str> for Symbol {
154 fn from(string: &'a str) -> Symbol {
155 Symbol(SYMBOLS.with(|s| s.borrow_mut().intern(string)))
156 }
157}
158
159impl ops::Deref for Symbol {
160 type Target = str;
161
162 fn deref(&self) -> &str {
163 SYMBOLS.with(|interner| {
164 let interner = interner.borrow();
165 let s = interner.get(self.0);
166 unsafe {
167 &*(s as *const str)
168 }
169 })
170 }
171}
172
173struct Interner {
174 string_to_index: HashMap<MyRc, usize>,
175 index_to_string: Vec<Rc<String>>,
176}
177
178#[derive(Hash, Eq, PartialEq)]
179struct MyRc(Rc<String>);
180
181impl Borrow<str> for MyRc {
182 fn borrow(&self) -> &str {
183 &self.0
184 }
185}
186
187impl Interner {
188 fn new() -> Interner {
189 Interner {
190 string_to_index: HashMap::new(),
191 index_to_string: Vec::new(),
192 }
193 }
194
195 fn intern(&mut self, s: &str) -> usize {
196 if let Some(&idx) = self.string_to_index.get(s) {
197 return idx
198 }
199 let s = Rc::new(s.to_string());
200 self.index_to_string.push(s.clone());
201 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
202 self.index_to_string.len() - 1
203 }
204
205 fn get(&self, idx: usize) -> &str {
206 &self.index_to_string[idx]
207 }
208}
209
David Tolnay977f8282017-05-31 17:41:33 -0700210#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700211pub struct Literal(String);
212
Alex Crichton852d53d2017-05-19 19:25:08 -0700213impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700214 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700215 match byte {
216 0 => Literal(format!("b'\\0'")),
217 b'\"' => Literal(format!("b'\"'")),
218 n => {
219 let mut escaped = "b'".to_string();
220 escaped.extend(ascii::escape_default(n).map(|c| c as char));
221 escaped.push('\'');
222 Literal(escaped)
223 }
224 }
225 }
226
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700227 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700228 let mut escaped = "b\"".to_string();
229 for b in bytes {
230 match *b {
231 b'\0' => escaped.push_str(r"\0"),
232 b'\t' => escaped.push_str(r"\t"),
233 b'\n' => escaped.push_str(r"\n"),
234 b'\r' => escaped.push_str(r"\r"),
235 b'"' => escaped.push_str("\\\""),
236 b'\\' => escaped.push_str("\\\\"),
237 b'\x20' ... b'\x7E' => escaped.push(*b as char),
238 _ => escaped.push_str(&format!("\\x{:02X}", b)),
239 }
240 }
241 escaped.push('"');
242 Literal(escaped)
243 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700244
245 pub fn doccomment(s: &str) -> Literal {
246 Literal(s.to_string())
247 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700248
249 pub fn float(s: &str) -> Literal {
250 Literal(s.to_string())
251 }
252
253 pub fn integer(s: &str) -> Literal {
254 Literal(s.to_string())
255 }
Alex Crichton31316622017-05-26 12:54:47 -0700256
257 pub fn raw_string(s: &str, pounds: usize) -> Literal {
258 let mut ret = format!("r");
259 ret.extend((0..pounds).map(|_| "#"));
260 ret.push('"');
261 ret.push_str(s);
262 ret.push('"');
263 ret.extend((0..pounds).map(|_| "#"));
264 Literal(ret)
265 }
266
267 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700268 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700269 ret.extend((0..pounds).map(|_| "#"));
270 ret.push('"');
271 ret.push_str(s);
272 ret.push('"');
273 ret.extend((0..pounds).map(|_| "#"));
274 Literal(ret)
275 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700276}
277
Alex Crichton44bffbc2017-05-19 17:51:59 -0700278impl fmt::Display for Literal {
279 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
280 self.0.fmt(f)
281 }
282}
283
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700284macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700285 ($($t:ty,)*) => {$(
286 impl From<$t> for Literal {
287 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700288 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700289 }
290 }
291 )*}
292}
293
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700294ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700295 u8, u16, u32, u64, usize,
296 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700297}
298
299macro_rules! floats {
300 ($($t:ty,)*) => {$(
301 impl From<$t> for Literal {
302 fn from(t: $t) -> Literal {
303 assert!(!t.is_nan());
304 assert!(!t.is_infinite());
305 Literal(format!(concat!("{}", stringify!($t)), t))
306 }
307 }
308 )*}
309}
310
311floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700312 f32, f64,
313}
314
Alex Crichton44bffbc2017-05-19 17:51:59 -0700315impl<'a> From<&'a str> for Literal {
316 fn from(t: &'a str) -> Literal {
317 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
318 s.push('"');
319 s.insert(0, '"');
320 Literal(s)
321 }
322}
323
324impl From<char> for Literal {
325 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700326 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700327 }
328}
329
David Tolnay8e976c62017-06-01 12:12:29 -0700330named!(token_stream -> ::TokenStream, map!(
331 many0!(token_tree),
332 |trees| ::TokenStream(TokenStream { inner: trees })
333));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700334
335named!(token_tree -> TokenTree,
336 map!(token_kind, |s: TokenKind| {
337 TokenTree {
338 span: ::Span(Span),
339 kind: s,
340 }
341 }));
342
343named!(token_kind -> TokenKind, alt!(
David Tolnay8e976c62017-06-01 12:12:29 -0700344 map!(delimited, |(d, s)| TokenKind::Sequence(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700345 |
David Tolnay8e976c62017-06-01 12:12:29 -0700346 map!(literal, TokenKind::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700347 |
David Tolnay8e976c62017-06-01 12:12:29 -0700348 map!(symbol, TokenKind::Word)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700349 |
David Tolnay643177b2017-06-01 12:02:44 -0700350 map!(op, |(op, kind)| TokenKind::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700351));
352
David Tolnay8e976c62017-06-01 12:12:29 -0700353named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700354 delimited!(
355 punct!("("),
356 token_stream,
357 punct!(")")
358 ) => { |ts| (Delimiter::Parenthesis, ts) }
359 |
360 delimited!(
361 punct!("["),
362 token_stream,
363 punct!("]")
364 ) => { |ts| (Delimiter::Bracket, ts) }
365 |
366 delimited!(
367 punct!("{"),
368 token_stream,
369 punct!("}")
370 ) => { |ts| (Delimiter::Brace, ts) }
371));
372
David Tolnay8e976c62017-06-01 12:12:29 -0700373named!(symbol -> ::Symbol, alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700374 lifetime
375 |
David Tolnay8e976c62017-06-01 12:12:29 -0700376 map!(word, ::Symbol::from)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700377));
378
David Tolnay8e976c62017-06-01 12:12:29 -0700379named!(lifetime -> ::Symbol, preceded!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700380 punct!("'"),
381 alt!(
382 // TODO: can we get rid of this allocation?
David Tolnay8e976c62017-06-01 12:12:29 -0700383 map!(word, |id| ::Symbol::from(format!("'{}", id)))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700384 |
David Tolnay8e976c62017-06-01 12:12:29 -0700385 map!(keyword!("static"), |_| ::Symbol::from("'static"))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700386 )
387));
388
David Tolnay1218e122017-06-01 11:13:45 -0700389fn word(mut input: &str) -> PResult<&str> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700390 input = skip_whitespace(input);
391
392 let mut chars = input.char_indices();
393 match chars.next() {
394 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700395 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700396 }
397
398 for (i, ch) in chars {
399 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay1218e122017-06-01 11:13:45 -0700400 return Ok((&input[i..], &input[..i]))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700401 }
402 }
403
David Tolnay1218e122017-06-01 11:13:45 -0700404 Ok(("", input))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700405}
406
David Tolnay8e976c62017-06-01 12:12:29 -0700407fn literal(input: &str) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700408 let input_no_ws = skip_whitespace(input);
409
410 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700411 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700412 let start = input.len() - input_no_ws.len();
413 let len = input_no_ws.len() - a.len();
414 let end = start + len;
David Tolnay8e976c62017-06-01 12:12:29 -0700415 Ok((a, ::Literal(Literal(input[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700416 }
David Tolnay1218e122017-06-01 11:13:45 -0700417 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700418 }
419}
420
421named!(literal_nocapture -> (), alt!(
422 string
423 |
424 byte_string
425 |
426 byte
427 |
428 character
429 |
430 float
431 |
432 int
433 |
434 boolean
435 |
436 doc_comment
437));
438
439named!(string -> (), alt!(
440 quoted_string
441 |
442 preceded!(
443 punct!("r"),
444 raw_string
445 ) => { |_| () }
446));
447
448named!(quoted_string -> (), delimited!(
449 punct!("\""),
450 cooked_string,
451 tag!("\"")
452));
453
David Tolnay1218e122017-06-01 11:13:45 -0700454fn cooked_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700455 let mut chars = input.char_indices().peekable();
456 while let Some((byte_offset, ch)) = chars.next() {
457 match ch {
458 '"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700459 return Ok((&input[byte_offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700460 }
461 '\r' => {
462 if let Some((_, '\n')) = chars.next() {
463 // ...
464 } else {
465 break;
466 }
467 }
468 '\\' => {
469 match chars.next() {
470 Some((_, 'x')) => {
471 if !backslash_x_char(&mut chars) {
472 break
473 }
474 }
475 Some((_, 'n')) |
476 Some((_, 'r')) |
477 Some((_, 't')) |
478 Some((_, '\\')) |
479 Some((_, '\'')) |
480 Some((_, '"')) |
481 Some((_, '0')) => {}
482 Some((_, 'u')) => {
483 if !backslash_u(&mut chars) {
484 break
485 }
486 }
487 Some((_, '\n')) | Some((_, '\r')) => {
488 while let Some(&(_, ch)) = chars.peek() {
489 if ch.is_whitespace() {
490 chars.next();
491 } else {
492 break;
493 }
494 }
495 }
496 _ => break,
497 }
498 }
499 _ch => {}
500 }
501 }
David Tolnay1218e122017-06-01 11:13:45 -0700502 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700503}
504
505named!(byte_string -> (), alt!(
506 delimited!(
507 punct!("b\""),
508 cooked_byte_string,
509 tag!("\"")
510 ) => { |_| () }
511 |
512 preceded!(
513 punct!("br"),
514 raw_string
515 ) => { |_| () }
516));
517
David Tolnay1218e122017-06-01 11:13:45 -0700518fn cooked_byte_string(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700519 let mut bytes = input.bytes().enumerate();
520 'outer: while let Some((offset, b)) = bytes.next() {
521 match b {
522 b'"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700523 return Ok((&input[offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700524 }
525 b'\r' => {
526 if let Some((_, b'\n')) = bytes.next() {
527 // ...
528 } else {
529 break;
530 }
531 }
532 b'\\' => {
533 match bytes.next() {
534 Some((_, b'x')) => {
535 if !backslash_x_byte(&mut bytes) {
536 break
537 }
538 }
539 Some((_, b'n')) |
540 Some((_, b'r')) |
541 Some((_, b't')) |
542 Some((_, b'\\')) |
543 Some((_, b'0')) |
544 Some((_, b'\'')) |
545 Some((_, b'"')) => {}
546 Some((newline, b'\n')) |
547 Some((newline, b'\r')) => {
548 let rest = &input[newline + 1..];
549 for (offset, ch) in rest.char_indices() {
550 if !ch.is_whitespace() {
551 input = &rest[offset..];
552 bytes = input.bytes().enumerate();
553 continue 'outer;
554 }
555 }
556 break;
557 }
558 _ => break,
559 }
560 }
561 b if b < 0x80 => {}
562 _ => break,
563 }
564 }
David Tolnay1218e122017-06-01 11:13:45 -0700565 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700566}
567
David Tolnay1218e122017-06-01 11:13:45 -0700568fn raw_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700569 let mut chars = input.char_indices();
570 let mut n = 0;
571 while let Some((byte_offset, ch)) = chars.next() {
572 match ch {
573 '"' => {
574 n = byte_offset;
575 break;
576 }
577 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700578 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700579 }
580 }
581 for (byte_offset, ch) in chars {
582 match ch {
583 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
584 let rest = &input[byte_offset + 1 + n..];
David Tolnay1218e122017-06-01 11:13:45 -0700585 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700586 }
587 '\r' => {}
588 _ => {}
589 }
590 }
David Tolnay1218e122017-06-01 11:13:45 -0700591 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700592}
593
594named!(byte -> (), do_parse!(
595 punct!("b") >>
596 tag!("'") >>
597 cooked_byte >>
598 tag!("'") >>
599 (())
600));
601
David Tolnay1218e122017-06-01 11:13:45 -0700602fn cooked_byte(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700603 let mut bytes = input.bytes().enumerate();
604 let ok = match bytes.next().map(|(_, b)| b) {
605 Some(b'\\') => {
606 match bytes.next().map(|(_, b)| b) {
607 Some(b'x') => backslash_x_byte(&mut bytes),
608 Some(b'n') |
609 Some(b'r') |
610 Some(b't') |
611 Some(b'\\') |
612 Some(b'0') |
613 Some(b'\'') |
614 Some(b'"') => true,
615 _ => false,
616 }
617 }
618 b => b.is_some(),
619 };
620 if ok {
621 match bytes.next() {
David Tolnay1218e122017-06-01 11:13:45 -0700622 Some((offset, _)) => Ok((&input[offset..], ())),
623 None => Ok(("", ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700624 }
625 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700626 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700627 }
628}
629
630named!(character -> (), do_parse!(
631 punct!("'") >>
632 cooked_char >>
633 tag!("'") >>
634 (())
635));
636
David Tolnay1218e122017-06-01 11:13:45 -0700637fn cooked_char(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700638 let mut chars = input.char_indices();
639 let ok = match chars.next().map(|(_, ch)| ch) {
640 Some('\\') => {
641 match chars.next().map(|(_, ch)| ch) {
642 Some('x') => backslash_x_char(&mut chars),
643 Some('u') => backslash_u(&mut chars),
644 Some('n') |
645 Some('r') |
646 Some('t') |
647 Some('\\') |
648 Some('0') |
649 Some('\'') |
650 Some('"') => true,
651 _ => false,
652 }
653 }
654 ch => ch.is_some(),
655 };
656 if ok {
David Tolnay1218e122017-06-01 11:13:45 -0700657 Ok((chars.as_str(), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700658 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700659 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700660 }
661}
662
663macro_rules! next_ch {
664 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
665 match $chars.next() {
666 Some((_, ch)) => match ch {
667 $pat $(| $rest)* => ch,
668 _ => return false,
669 },
670 None => return false
671 }
672 };
673}
674
675fn backslash_x_char<I>(chars: &mut I) -> bool
676 where I: Iterator<Item = (usize, char)>
677{
678 next_ch!(chars @ '0'...'7');
679 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
680 true
681}
682
683fn backslash_x_byte<I>(chars: &mut I) -> bool
684 where I: Iterator<Item = (usize, u8)>
685{
686 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
687 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
688 true
689}
690
691fn backslash_u<I>(chars: &mut I) -> bool
692 where I: Iterator<Item = (usize, char)>
693{
694 next_ch!(chars @ '{');
695 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
696 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
697 if b == '}' {
698 return true
699 }
700 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
701 if c == '}' {
702 return true
703 }
704 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
705 if d == '}' {
706 return true
707 }
708 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
709 if e == '}' {
710 return true
711 }
712 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
713 if f == '}' {
714 return true
715 }
716 next_ch!(chars @ '}');
717 true
718}
719
David Tolnay744a6b82017-06-01 11:34:29 -0700720fn float(input: &str) -> PResult<()> {
721 let (rest, ()) = float_digits(input)?;
722 for suffix in &["f32", "f64"] {
723 if rest.starts_with(suffix) {
724 return word_break(&rest[suffix.len()..]);
725 }
726 }
727 word_break(rest)
728}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700729
David Tolnay744a6b82017-06-01 11:34:29 -0700730fn float_digits(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700731 let mut chars = input.chars().peekable();
732 match chars.next() {
733 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700734 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700735 }
736
737 let mut len = 1;
738 let mut has_dot = false;
739 let mut has_exp = false;
740 while let Some(&ch) = chars.peek() {
741 match ch {
742 '0'...'9' | '_' => {
743 chars.next();
744 len += 1;
745 }
746 '.' => {
747 if has_dot {
748 break;
749 }
750 chars.next();
751 if chars.peek()
752 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
753 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700754 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700755 }
756 len += 1;
757 has_dot = true;
758 }
759 'e' | 'E' => {
760 chars.next();
761 len += 1;
762 has_exp = true;
763 break;
764 }
765 _ => break,
766 }
767 }
768
769 let rest = &input[len..];
770 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700771 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700772 }
773
774 if has_exp {
775 let mut has_exp_value = false;
776 while let Some(&ch) = chars.peek() {
777 match ch {
778 '+' | '-' => {
779 if has_exp_value {
780 break;
781 }
782 chars.next();
783 len += 1;
784 }
785 '0'...'9' => {
786 chars.next();
787 len += 1;
788 has_exp_value = true;
789 }
790 '_' => {
791 chars.next();
792 len += 1;
793 }
794 _ => break,
795 }
796 }
797 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -0700798 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700799 }
800 }
801
David Tolnay1218e122017-06-01 11:13:45 -0700802 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700803}
804
David Tolnay744a6b82017-06-01 11:34:29 -0700805fn int(input: &str) -> PResult<()> {
806 let (rest, ()) = digits(input)?;
807 for suffix in &[
808 "isize",
809 "i8",
810 "i16",
811 "i32",
812 "i64",
813 "i128",
814 "usize",
815 "u8",
816 "u16",
817 "u32",
818 "u64",
819 "u128",
820 ] {
821 if rest.starts_with(suffix) {
822 return word_break(&rest[suffix.len()..]);
823 }
824 }
825 word_break(rest)
826}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700827
David Tolnay1218e122017-06-01 11:13:45 -0700828fn digits(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700829 let base = if input.starts_with("0x") {
830 input = &input[2..];
831 16
832 } else if input.starts_with("0o") {
833 input = &input[2..];
834 8
835 } else if input.starts_with("0b") {
836 input = &input[2..];
837 2
838 } else {
839 10
840 };
841
Alex Crichton44bffbc2017-05-19 17:51:59 -0700842 let mut len = 0;
843 let mut empty = true;
844 for b in input.bytes() {
845 let digit = match b {
846 b'0'...b'9' => (b - b'0') as u64,
847 b'a'...b'f' => 10 + (b - b'a') as u64,
848 b'A'...b'F' => 10 + (b - b'A') as u64,
849 b'_' => {
850 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -0700851 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700852 }
853 len += 1;
854 continue;
855 }
856 _ => break,
857 };
858 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -0700859 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700860 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700861 len += 1;
862 empty = false;
863 }
864 if empty {
David Tolnay1218e122017-06-01 11:13:45 -0700865 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700866 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700867 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700868 }
869}
870
871named!(boolean -> (), alt!(
872 keyword!("true") => { |_| () }
873 |
874 keyword!("false") => { |_| () }
875));
876
David Tolnay1218e122017-06-01 11:13:45 -0700877fn op(input: &str) -> PResult<(char, OpKind)> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700878 let input = skip_whitespace(input);
879 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700880 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -0700881 let kind = match op_char(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700882 Ok(_) => OpKind::Joint,
883 Err(LexError) => OpKind::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -0700884 };
David Tolnay1218e122017-06-01 11:13:45 -0700885 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -0700886 }
David Tolnay1218e122017-06-01 11:13:45 -0700887 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700888 }
889}
890
David Tolnay1218e122017-06-01 11:13:45 -0700891fn op_char(input: &str) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700892 let mut chars = input.chars();
893 let first = match chars.next() {
894 Some(ch) => ch,
895 None => {
David Tolnay1218e122017-06-01 11:13:45 -0700896 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -0700897 }
898 };
899 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
900 if recognized.contains(first) {
David Tolnay1218e122017-06-01 11:13:45 -0700901 Ok((chars.as_str(), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700902 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700903 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700904 }
905}
906
Alex Crichton44bffbc2017-05-19 17:51:59 -0700907named!(doc_comment -> (), alt!(
908 do_parse!(
909 punct!("//!") >>
910 take_until!("\n") >>
911 (())
912 )
913 |
914 do_parse!(
915 option!(whitespace) >>
916 peek!(tag!("/*!")) >>
917 block_comment >>
918 (())
919 )
920 |
921 do_parse!(
922 punct!("///") >>
923 not!(tag!("/")) >>
924 take_until!("\n") >>
925 (())
926 )
927 |
928 do_parse!(
929 option!(whitespace) >>
930 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
931 block_comment >>
932 (())
933 )
934));
935
936#[cfg(test)]
937mod tests {
938 use super::*;
939
940 #[test]
941 fn symbols() {
942 assert_eq!(&*Symbol::from("foo"), "foo");
943 assert_eq!(&*Symbol::from("bar"), "bar");
944 }
945
946 #[test]
947 fn literals() {
948 assert_eq!(Literal::from("foo").to_string(), "\"foo\"");
949 assert_eq!(Literal::from("\"").to_string(), "\"\\\"\"");
950 }
951
952 #[test]
953 fn roundtrip() {
954 fn roundtrip(p: &str) {
955 println!("parse: {}", p);
956 let s = p.parse::<TokenStream>().unwrap().to_string();
957 println!("first: {}", s);
958 let s2 = s.to_string().parse::<TokenStream>().unwrap().to_string();
959 assert_eq!(s, s2);
960 }
961 roundtrip("a");
962 roundtrip("<<");
963 roundtrip("<<=");
964 roundtrip("
965 /// a
966 wut
967 ");
968 roundtrip("
969 1
970 1.0
971 1f32
972 2f64
973 1usize
974 4isize
975 4e10
976 1_000
977 1_0i32
978 8u8
979 9
980 0
Michael Layzell9ee75c92017-05-31 19:44:26 -0400981 0xffffffffffffffffffffffffffffffff
Alex Crichton44bffbc2017-05-19 17:51:59 -0700982 ");
983 }
David Tolnay744a6b82017-06-01 11:34:29 -0700984
985 #[test]
986 fn fail() {
987 fn fail(p: &str) {
988 if p.parse::<TokenStream>().is_ok() {
989 panic!("should have failed to parse: {}", p);
990 }
991 }
992 fail("1x");
993 fail("1u80");
994 fail("1f320");
995 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700996}