blob: 911d648823a635c93c80b27b23ec9f332540c825 [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::iter;
David Tolnay041bcd42017-06-03 09:18:04 -07007use std::marker::PhantomData;
Alex Crichton44bffbc2017-05-19 17:51:59 -07008use std::ops;
9use std::rc::Rc;
10use std::str::FromStr;
11use std::vec;
12
13use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070014use unicode_xid::UnicodeXID;
David Tolnay744a6b82017-06-01 11:34:29 -070015use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070016
Alex Crichton1a7f7622017-07-05 17:47:15 -070017use {TokenTree, TokenNode, Delimiter, Spacing};
Alex Crichton44bffbc2017-05-19 17:51:59 -070018
David Tolnay977f8282017-05-31 17:41:33 -070019#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070020pub struct TokenStream {
21 inner: Vec<TokenTree>,
22}
23
24#[derive(Debug)]
25pub struct LexError;
26
27impl TokenStream {
28 pub fn empty() -> TokenStream {
29 TokenStream { inner: Vec::new() }
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.inner.len() == 0
34 }
35}
36
37impl FromStr for TokenStream {
38 type Err = LexError;
39
40 fn from_str(src: &str) -> Result<TokenStream, LexError> {
41 match token_stream(src) {
David Tolnay1218e122017-06-01 11:13:45 -070042 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070043 if skip_whitespace(input).len() != 0 {
44 Err(LexError)
45 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070046 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070047 }
48 }
David Tolnay1218e122017-06-01 11:13:45 -070049 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070050 }
51 }
52}
53
54impl fmt::Display for TokenStream {
55 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56 let mut joint = false;
57 for (i, tt) in self.inner.iter().enumerate() {
58 if i != 0 && !joint {
59 write!(f, " ")?;
60 }
61 joint = false;
62 match tt.kind {
Alex Crichton1a7f7622017-07-05 17:47:15 -070063 TokenNode::Group(delim, ref stream) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070064 let (start, end) = match delim {
65 Delimiter::Parenthesis => ("(", ")"),
66 Delimiter::Brace => ("{", "}"),
67 Delimiter::Bracket => ("[", "]"),
68 Delimiter::None => ("", ""),
69 };
Alex Crichton852d53d2017-05-19 19:25:08 -070070 if stream.0.inner.len() == 0 {
71 write!(f, "{} {}", start, end)?
72 } else {
73 write!(f, "{} {} {}", start, stream, end)?
74 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070075 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070076 TokenNode::Term(ref sym) => write!(f, "{}", sym.as_str())?,
77 TokenNode::Op(ch, ref op) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070078 write!(f, "{}", ch)?;
79 match *op {
Alex Crichton1a7f7622017-07-05 17:47:15 -070080 Spacing::Alone => {}
81 Spacing::Joint => joint = true,
Alex Crichton44bffbc2017-05-19 17:51:59 -070082 }
83 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070084 TokenNode::Literal(ref literal) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070085 write!(f, "{}", literal)?;
86 // handle comments
87 if (literal.0).0.starts_with("/") {
88 write!(f, "\n")?;
89 }
90 }
91 }
92 }
93
94 Ok(())
95 }
96}
97
98impl From<proc_macro::TokenStream> for TokenStream {
99 fn from(inner: proc_macro::TokenStream) -> TokenStream {
100 inner.to_string().parse().expect("compiler token stream parse failed")
101 }
102}
103
104impl From<TokenStream> for proc_macro::TokenStream {
105 fn from(inner: TokenStream) -> proc_macro::TokenStream {
106 inner.to_string().parse().expect("failed to parse to compiler tokens")
107 }
108}
109
110
111impl From<TokenTree> for TokenStream {
112 fn from(tree: TokenTree) -> TokenStream {
113 TokenStream { inner: vec![tree] }
114 }
115}
116
117impl iter::FromIterator<TokenStream> for TokenStream {
118 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
119 let mut v = Vec::new();
120
121 for stream in streams.into_iter() {
122 v.extend(stream.inner);
123 }
124
125 TokenStream { inner: v }
126 }
127}
128
Alex Crichton1a7f7622017-07-05 17:47:15 -0700129pub type TokenTreeIter = vec::IntoIter<TokenTree>;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700130
131impl IntoIterator for TokenStream {
132 type Item = TokenTree;
Alex Crichton1a7f7622017-07-05 17:47:15 -0700133 type IntoIter = TokenTreeIter;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700134
Alex Crichton1a7f7622017-07-05 17:47:15 -0700135 fn into_iter(self) -> TokenTreeIter {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700136 self.inner.into_iter()
137 }
138}
139
David Tolnay977f8282017-05-31 17:41:33 -0700140#[derive(Clone, Copy, Default, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700141pub struct Span;
142
143impl Span {
144 pub fn call_site() -> Span {
145 Span
146 }
147}
148
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700149#[derive(Copy, Clone)]
Alex Crichton1a7f7622017-07-05 17:47:15 -0700150pub struct Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700151 intern: usize,
152 not_send_sync: PhantomData<*const ()>,
153}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700154
155thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
156
Alex Crichton1a7f7622017-07-05 17:47:15 -0700157impl<'a> From<&'a str> for Term {
158 fn from(string: &'a str) -> Term {
159 Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700160 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
161 not_send_sync: PhantomData,
162 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700163 }
164}
165
Alex Crichton1a7f7622017-07-05 17:47:15 -0700166impl ops::Deref for Term {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700167 type Target = str;
168
169 fn deref(&self) -> &str {
170 SYMBOLS.with(|interner| {
171 let interner = interner.borrow();
David Tolnay041bcd42017-06-03 09:18:04 -0700172 let s = interner.get(self.intern);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700173 unsafe {
174 &*(s as *const str)
175 }
176 })
177 }
178}
179
Alex Crichton1a7f7622017-07-05 17:47:15 -0700180impl fmt::Debug for Term {
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700181 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700182 f.debug_tuple("Term").field(&&**self).finish()
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700183 }
184}
185
Alex Crichton44bffbc2017-05-19 17:51:59 -0700186struct Interner {
187 string_to_index: HashMap<MyRc, usize>,
188 index_to_string: Vec<Rc<String>>,
189}
190
191#[derive(Hash, Eq, PartialEq)]
192struct MyRc(Rc<String>);
193
194impl Borrow<str> for MyRc {
195 fn borrow(&self) -> &str {
196 &self.0
197 }
198}
199
200impl Interner {
201 fn new() -> Interner {
202 Interner {
203 string_to_index: HashMap::new(),
204 index_to_string: Vec::new(),
205 }
206 }
207
208 fn intern(&mut self, s: &str) -> usize {
209 if let Some(&idx) = self.string_to_index.get(s) {
210 return idx
211 }
212 let s = Rc::new(s.to_string());
213 self.index_to_string.push(s.clone());
214 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
215 self.index_to_string.len() - 1
216 }
217
218 fn get(&self, idx: usize) -> &str {
219 &self.index_to_string[idx]
220 }
221}
222
David Tolnay977f8282017-05-31 17:41:33 -0700223#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700224pub struct Literal(String);
225
Alex Crichton852d53d2017-05-19 19:25:08 -0700226impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700227 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700228 match byte {
229 0 => Literal(format!("b'\\0'")),
230 b'\"' => Literal(format!("b'\"'")),
231 n => {
232 let mut escaped = "b'".to_string();
233 escaped.extend(ascii::escape_default(n).map(|c| c as char));
234 escaped.push('\'');
235 Literal(escaped)
236 }
237 }
238 }
239
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700240 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700241 let mut escaped = "b\"".to_string();
242 for b in bytes {
243 match *b {
244 b'\0' => escaped.push_str(r"\0"),
245 b'\t' => escaped.push_str(r"\t"),
246 b'\n' => escaped.push_str(r"\n"),
247 b'\r' => escaped.push_str(r"\r"),
248 b'"' => escaped.push_str("\\\""),
249 b'\\' => escaped.push_str("\\\\"),
250 b'\x20' ... b'\x7E' => escaped.push(*b as char),
251 _ => escaped.push_str(&format!("\\x{:02X}", b)),
252 }
253 }
254 escaped.push('"');
255 Literal(escaped)
256 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700257
258 pub fn doccomment(s: &str) -> Literal {
259 Literal(s.to_string())
260 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700261
Alex Crichton1a7f7622017-07-05 17:47:15 -0700262 pub fn float(s: f64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700263 Literal(s.to_string())
264 }
265
Alex Crichton1a7f7622017-07-05 17:47:15 -0700266 pub fn integer(s: i64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700267 Literal(s.to_string())
268 }
Alex Crichton31316622017-05-26 12:54:47 -0700269
270 pub fn raw_string(s: &str, pounds: usize) -> Literal {
271 let mut ret = format!("r");
272 ret.extend((0..pounds).map(|_| "#"));
273 ret.push('"');
274 ret.push_str(s);
275 ret.push('"');
276 ret.extend((0..pounds).map(|_| "#"));
277 Literal(ret)
278 }
279
280 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700281 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700282 ret.extend((0..pounds).map(|_| "#"));
283 ret.push('"');
284 ret.push_str(s);
285 ret.push('"');
286 ret.extend((0..pounds).map(|_| "#"));
287 Literal(ret)
288 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700289}
290
Alex Crichton44bffbc2017-05-19 17:51:59 -0700291impl fmt::Display for Literal {
292 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293 self.0.fmt(f)
294 }
295}
296
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700297macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700298 ($($t:ty,)*) => {$(
299 impl From<$t> for Literal {
300 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700301 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700302 }
303 }
304 )*}
305}
306
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700307ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700308 u8, u16, u32, u64, usize,
309 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700310}
311
312macro_rules! floats {
313 ($($t:ty,)*) => {$(
314 impl From<$t> for Literal {
315 fn from(t: $t) -> Literal {
316 assert!(!t.is_nan());
317 assert!(!t.is_infinite());
318 Literal(format!(concat!("{}", stringify!($t)), t))
319 }
320 }
321 )*}
322}
323
324floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700325 f32, f64,
326}
327
Alex Crichton44bffbc2017-05-19 17:51:59 -0700328impl<'a> From<&'a str> for Literal {
329 fn from(t: &'a str) -> Literal {
330 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
331 s.push('"');
332 s.insert(0, '"');
333 Literal(s)
334 }
335}
336
337impl From<char> for Literal {
338 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700339 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700340 }
341}
342
David Tolnay8e976c62017-06-01 12:12:29 -0700343named!(token_stream -> ::TokenStream, map!(
344 many0!(token_tree),
345 |trees| ::TokenStream(TokenStream { inner: trees })
346));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700347
348named!(token_tree -> TokenTree,
Alex Crichton1a7f7622017-07-05 17:47:15 -0700349 map!(token_kind, |s: TokenNode| {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700350 TokenTree {
351 span: ::Span(Span),
352 kind: s,
353 }
354 }));
355
Alex Crichton1a7f7622017-07-05 17:47:15 -0700356named!(token_kind -> TokenNode, alt!(
357 map!(delimited, |(d, s)| TokenNode::Group(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700358 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700359 map!(literal, TokenNode::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700360 |
Alex Crichton52725f72017-08-28 12:20:58 -0700361 symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700362 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700363 map!(op, |(op, kind)| TokenNode::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700364));
365
David Tolnay8e976c62017-06-01 12:12:29 -0700366named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700367 delimited!(
368 punct!("("),
369 token_stream,
370 punct!(")")
371 ) => { |ts| (Delimiter::Parenthesis, ts) }
372 |
373 delimited!(
374 punct!("["),
375 token_stream,
376 punct!("]")
377 ) => { |ts| (Delimiter::Bracket, ts) }
378 |
379 delimited!(
380 punct!("{"),
381 token_stream,
382 punct!("}")
383 ) => { |ts| (Delimiter::Brace, ts) }
384));
385
Alex Crichton52725f72017-08-28 12:20:58 -0700386fn symbol(mut input: &str) -> PResult<TokenNode> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700387 input = skip_whitespace(input);
388
389 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700390
391 let lifetime = input.starts_with("'");
392 if lifetime {
393 chars.next();
394 }
395
Alex Crichton44bffbc2017-05-19 17:51:59 -0700396 match chars.next() {
397 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700398 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700399 }
400
David Tolnay214c94c2017-06-01 12:42:56 -0700401 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700402 for (i, ch) in chars {
403 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700404 end = i;
405 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700406 }
407 }
408
David Tolnay214c94c2017-06-01 12:42:56 -0700409 if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
410 Err(LexError)
411 } else {
Alex Crichton52725f72017-08-28 12:20:58 -0700412 let (a, b) = input.split_at(end);
413 if a == "_" {
414 Ok((b, TokenNode::Op('_', Spacing::Alone)))
415 } else {
416 Ok((b, TokenNode::Term(::Term::intern(a))))
417 }
David Tolnay214c94c2017-06-01 12:42:56 -0700418 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700419}
420
David Tolnay214c94c2017-06-01 12:42:56 -0700421// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
422static KEYWORDS: &'static [&'static str] = &[
423 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
424 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
425 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
426 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
427 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
428 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
429 "yield",
430];
431
David Tolnay8e976c62017-06-01 12:12:29 -0700432fn literal(input: &str) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700433 let input_no_ws = skip_whitespace(input);
434
435 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700436 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700437 let start = input.len() - input_no_ws.len();
438 let len = input_no_ws.len() - a.len();
439 let end = start + len;
David Tolnay8e976c62017-06-01 12:12:29 -0700440 Ok((a, ::Literal(Literal(input[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700441 }
David Tolnay1218e122017-06-01 11:13:45 -0700442 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700443 }
444}
445
446named!(literal_nocapture -> (), alt!(
447 string
448 |
449 byte_string
450 |
451 byte
452 |
453 character
454 |
455 float
456 |
457 int
458 |
459 boolean
460 |
461 doc_comment
462));
463
464named!(string -> (), alt!(
465 quoted_string
466 |
467 preceded!(
468 punct!("r"),
469 raw_string
470 ) => { |_| () }
471));
472
473named!(quoted_string -> (), delimited!(
474 punct!("\""),
475 cooked_string,
476 tag!("\"")
477));
478
David Tolnay1218e122017-06-01 11:13:45 -0700479fn cooked_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700480 let mut chars = input.char_indices().peekable();
481 while let Some((byte_offset, ch)) = chars.next() {
482 match ch {
483 '"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700484 return Ok((&input[byte_offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700485 }
486 '\r' => {
487 if let Some((_, '\n')) = chars.next() {
488 // ...
489 } else {
490 break;
491 }
492 }
493 '\\' => {
494 match chars.next() {
495 Some((_, 'x')) => {
496 if !backslash_x_char(&mut chars) {
497 break
498 }
499 }
500 Some((_, 'n')) |
501 Some((_, 'r')) |
502 Some((_, 't')) |
503 Some((_, '\\')) |
504 Some((_, '\'')) |
505 Some((_, '"')) |
506 Some((_, '0')) => {}
507 Some((_, 'u')) => {
508 if !backslash_u(&mut chars) {
509 break
510 }
511 }
512 Some((_, '\n')) | Some((_, '\r')) => {
513 while let Some(&(_, ch)) = chars.peek() {
514 if ch.is_whitespace() {
515 chars.next();
516 } else {
517 break;
518 }
519 }
520 }
521 _ => break,
522 }
523 }
524 _ch => {}
525 }
526 }
David Tolnay1218e122017-06-01 11:13:45 -0700527 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700528}
529
530named!(byte_string -> (), alt!(
531 delimited!(
532 punct!("b\""),
533 cooked_byte_string,
534 tag!("\"")
535 ) => { |_| () }
536 |
537 preceded!(
538 punct!("br"),
539 raw_string
540 ) => { |_| () }
541));
542
David Tolnay1218e122017-06-01 11:13:45 -0700543fn cooked_byte_string(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700544 let mut bytes = input.bytes().enumerate();
545 'outer: while let Some((offset, b)) = bytes.next() {
546 match b {
547 b'"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700548 return Ok((&input[offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700549 }
550 b'\r' => {
551 if let Some((_, b'\n')) = bytes.next() {
552 // ...
553 } else {
554 break;
555 }
556 }
557 b'\\' => {
558 match bytes.next() {
559 Some((_, b'x')) => {
560 if !backslash_x_byte(&mut bytes) {
561 break
562 }
563 }
564 Some((_, b'n')) |
565 Some((_, b'r')) |
566 Some((_, b't')) |
567 Some((_, b'\\')) |
568 Some((_, b'0')) |
569 Some((_, b'\'')) |
570 Some((_, b'"')) => {}
571 Some((newline, b'\n')) |
572 Some((newline, b'\r')) => {
573 let rest = &input[newline + 1..];
574 for (offset, ch) in rest.char_indices() {
575 if !ch.is_whitespace() {
576 input = &rest[offset..];
577 bytes = input.bytes().enumerate();
578 continue 'outer;
579 }
580 }
581 break;
582 }
583 _ => break,
584 }
585 }
586 b if b < 0x80 => {}
587 _ => break,
588 }
589 }
David Tolnay1218e122017-06-01 11:13:45 -0700590 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700591}
592
David Tolnay1218e122017-06-01 11:13:45 -0700593fn raw_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700594 let mut chars = input.char_indices();
595 let mut n = 0;
596 while let Some((byte_offset, ch)) = chars.next() {
597 match ch {
598 '"' => {
599 n = byte_offset;
600 break;
601 }
602 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700603 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700604 }
605 }
606 for (byte_offset, ch) in chars {
607 match ch {
608 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
609 let rest = &input[byte_offset + 1 + n..];
David Tolnay1218e122017-06-01 11:13:45 -0700610 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700611 }
612 '\r' => {}
613 _ => {}
614 }
615 }
David Tolnay1218e122017-06-01 11:13:45 -0700616 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700617}
618
619named!(byte -> (), do_parse!(
620 punct!("b") >>
621 tag!("'") >>
622 cooked_byte >>
623 tag!("'") >>
624 (())
625));
626
David Tolnay1218e122017-06-01 11:13:45 -0700627fn cooked_byte(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700628 let mut bytes = input.bytes().enumerate();
629 let ok = match bytes.next().map(|(_, b)| b) {
630 Some(b'\\') => {
631 match bytes.next().map(|(_, b)| b) {
632 Some(b'x') => backslash_x_byte(&mut bytes),
633 Some(b'n') |
634 Some(b'r') |
635 Some(b't') |
636 Some(b'\\') |
637 Some(b'0') |
638 Some(b'\'') |
639 Some(b'"') => true,
640 _ => false,
641 }
642 }
643 b => b.is_some(),
644 };
645 if ok {
646 match bytes.next() {
David Tolnay1218e122017-06-01 11:13:45 -0700647 Some((offset, _)) => Ok((&input[offset..], ())),
648 None => Ok(("", ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700649 }
650 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700651 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700652 }
653}
654
655named!(character -> (), do_parse!(
656 punct!("'") >>
657 cooked_char >>
658 tag!("'") >>
659 (())
660));
661
David Tolnay1218e122017-06-01 11:13:45 -0700662fn cooked_char(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700663 let mut chars = input.char_indices();
664 let ok = match chars.next().map(|(_, ch)| ch) {
665 Some('\\') => {
666 match chars.next().map(|(_, ch)| ch) {
667 Some('x') => backslash_x_char(&mut chars),
668 Some('u') => backslash_u(&mut chars),
669 Some('n') |
670 Some('r') |
671 Some('t') |
672 Some('\\') |
673 Some('0') |
674 Some('\'') |
675 Some('"') => true,
676 _ => false,
677 }
678 }
679 ch => ch.is_some(),
680 };
681 if ok {
David Tolnay1218e122017-06-01 11:13:45 -0700682 Ok((chars.as_str(), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700683 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700684 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700685 }
686}
687
688macro_rules! next_ch {
689 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
690 match $chars.next() {
691 Some((_, ch)) => match ch {
692 $pat $(| $rest)* => ch,
693 _ => return false,
694 },
695 None => return false
696 }
697 };
698}
699
700fn backslash_x_char<I>(chars: &mut I) -> bool
701 where I: Iterator<Item = (usize, char)>
702{
703 next_ch!(chars @ '0'...'7');
704 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
705 true
706}
707
708fn backslash_x_byte<I>(chars: &mut I) -> bool
709 where I: Iterator<Item = (usize, u8)>
710{
711 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
712 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
713 true
714}
715
716fn backslash_u<I>(chars: &mut I) -> bool
717 where I: Iterator<Item = (usize, char)>
718{
719 next_ch!(chars @ '{');
720 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
721 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
722 if b == '}' {
723 return true
724 }
725 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
726 if c == '}' {
727 return true
728 }
729 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
730 if d == '}' {
731 return true
732 }
733 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
734 if e == '}' {
735 return true
736 }
737 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
738 if f == '}' {
739 return true
740 }
741 next_ch!(chars @ '}');
742 true
743}
744
David Tolnay744a6b82017-06-01 11:34:29 -0700745fn float(input: &str) -> PResult<()> {
746 let (rest, ()) = float_digits(input)?;
747 for suffix in &["f32", "f64"] {
748 if rest.starts_with(suffix) {
749 return word_break(&rest[suffix.len()..]);
750 }
751 }
752 word_break(rest)
753}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700754
David Tolnay744a6b82017-06-01 11:34:29 -0700755fn float_digits(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700756 let mut chars = input.chars().peekable();
757 match chars.next() {
758 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700759 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700760 }
761
762 let mut len = 1;
763 let mut has_dot = false;
764 let mut has_exp = false;
765 while let Some(&ch) = chars.peek() {
766 match ch {
767 '0'...'9' | '_' => {
768 chars.next();
769 len += 1;
770 }
771 '.' => {
772 if has_dot {
773 break;
774 }
775 chars.next();
776 if chars.peek()
777 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
778 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700779 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700780 }
781 len += 1;
782 has_dot = true;
783 }
784 'e' | 'E' => {
785 chars.next();
786 len += 1;
787 has_exp = true;
788 break;
789 }
790 _ => break,
791 }
792 }
793
794 let rest = &input[len..];
795 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700796 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700797 }
798
799 if has_exp {
800 let mut has_exp_value = false;
801 while let Some(&ch) = chars.peek() {
802 match ch {
803 '+' | '-' => {
804 if has_exp_value {
805 break;
806 }
807 chars.next();
808 len += 1;
809 }
810 '0'...'9' => {
811 chars.next();
812 len += 1;
813 has_exp_value = true;
814 }
815 '_' => {
816 chars.next();
817 len += 1;
818 }
819 _ => break,
820 }
821 }
822 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -0700823 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700824 }
825 }
826
David Tolnay1218e122017-06-01 11:13:45 -0700827 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700828}
829
David Tolnay744a6b82017-06-01 11:34:29 -0700830fn int(input: &str) -> PResult<()> {
831 let (rest, ()) = digits(input)?;
832 for suffix in &[
833 "isize",
834 "i8",
835 "i16",
836 "i32",
837 "i64",
838 "i128",
839 "usize",
840 "u8",
841 "u16",
842 "u32",
843 "u64",
844 "u128",
845 ] {
846 if rest.starts_with(suffix) {
847 return word_break(&rest[suffix.len()..]);
848 }
849 }
850 word_break(rest)
851}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700852
David Tolnay1218e122017-06-01 11:13:45 -0700853fn digits(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700854 let base = if input.starts_with("0x") {
855 input = &input[2..];
856 16
857 } else if input.starts_with("0o") {
858 input = &input[2..];
859 8
860 } else if input.starts_with("0b") {
861 input = &input[2..];
862 2
863 } else {
864 10
865 };
866
Alex Crichton44bffbc2017-05-19 17:51:59 -0700867 let mut len = 0;
868 let mut empty = true;
869 for b in input.bytes() {
870 let digit = match b {
871 b'0'...b'9' => (b - b'0') as u64,
872 b'a'...b'f' => 10 + (b - b'a') as u64,
873 b'A'...b'F' => 10 + (b - b'A') as u64,
874 b'_' => {
875 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -0700876 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700877 }
878 len += 1;
879 continue;
880 }
881 _ => break,
882 };
883 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -0700884 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700885 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700886 len += 1;
887 empty = false;
888 }
889 if empty {
David Tolnay1218e122017-06-01 11:13:45 -0700890 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700891 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700892 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700893 }
894}
895
896named!(boolean -> (), alt!(
897 keyword!("true") => { |_| () }
898 |
899 keyword!("false") => { |_| () }
900));
901
Alex Crichton1a7f7622017-07-05 17:47:15 -0700902fn op(input: &str) -> PResult<(char, Spacing)> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700903 let input = skip_whitespace(input);
904 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700905 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -0700906 let kind = match op_char(rest) {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700907 Ok(_) => Spacing::Joint,
908 Err(LexError) => Spacing::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -0700909 };
David Tolnay1218e122017-06-01 11:13:45 -0700910 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -0700911 }
David Tolnay1218e122017-06-01 11:13:45 -0700912 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700913 }
914}
915
David Tolnay1218e122017-06-01 11:13:45 -0700916fn op_char(input: &str) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700917 let mut chars = input.chars();
918 let first = match chars.next() {
919 Some(ch) => ch,
920 None => {
David Tolnay1218e122017-06-01 11:13:45 -0700921 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -0700922 }
923 };
924 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
925 if recognized.contains(first) {
David Tolnay1218e122017-06-01 11:13:45 -0700926 Ok((chars.as_str(), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700927 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700928 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700929 }
930}
931
Alex Crichton44bffbc2017-05-19 17:51:59 -0700932named!(doc_comment -> (), alt!(
933 do_parse!(
934 punct!("//!") >>
935 take_until!("\n") >>
936 (())
937 )
938 |
939 do_parse!(
940 option!(whitespace) >>
941 peek!(tag!("/*!")) >>
942 block_comment >>
943 (())
944 )
945 |
946 do_parse!(
947 punct!("///") >>
948 not!(tag!("/")) >>
949 take_until!("\n") >>
950 (())
951 )
952 |
953 do_parse!(
954 option!(whitespace) >>
955 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
956 block_comment >>
957 (())
958 )
959));