blob: 8277d4c37836b168785e1fa863899a1a11497e7c [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::iter;
David Tolnay041bcd42017-06-03 09:18:04 -07007use std::marker::PhantomData;
Alex Crichton44bffbc2017-05-19 17:51:59 -07008use std::ops;
9use std::rc::Rc;
10use std::str::FromStr;
11use std::vec;
12
13use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070014use unicode_xid::UnicodeXID;
David Tolnay744a6b82017-06-01 11:34:29 -070015use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070016
Alex Crichton1a7f7622017-07-05 17:47:15 -070017use {TokenTree, TokenNode, Delimiter, Spacing};
Alex Crichton44bffbc2017-05-19 17:51:59 -070018
David Tolnay977f8282017-05-31 17:41:33 -070019#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070020pub struct TokenStream {
21 inner: Vec<TokenTree>,
22}
23
24#[derive(Debug)]
25pub struct LexError;
26
27impl TokenStream {
28 pub fn empty() -> TokenStream {
29 TokenStream { inner: Vec::new() }
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.inner.len() == 0
34 }
35}
36
37impl FromStr for TokenStream {
38 type Err = LexError;
39
40 fn from_str(src: &str) -> Result<TokenStream, LexError> {
41 match token_stream(src) {
David Tolnay1218e122017-06-01 11:13:45 -070042 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070043 if skip_whitespace(input).len() != 0 {
44 Err(LexError)
45 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070046 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070047 }
48 }
David Tolnay1218e122017-06-01 11:13:45 -070049 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070050 }
51 }
52}
53
54impl fmt::Display for TokenStream {
55 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56 let mut joint = false;
57 for (i, tt) in self.inner.iter().enumerate() {
58 if i != 0 && !joint {
59 write!(f, " ")?;
60 }
61 joint = false;
62 match tt.kind {
Alex Crichton1a7f7622017-07-05 17:47:15 -070063 TokenNode::Group(delim, ref stream) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070064 let (start, end) = match delim {
65 Delimiter::Parenthesis => ("(", ")"),
66 Delimiter::Brace => ("{", "}"),
67 Delimiter::Bracket => ("[", "]"),
68 Delimiter::None => ("", ""),
69 };
Alex Crichton852d53d2017-05-19 19:25:08 -070070 if stream.0.inner.len() == 0 {
71 write!(f, "{} {}", start, end)?
72 } else {
73 write!(f, "{} {} {}", start, stream, end)?
74 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070075 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070076 TokenNode::Term(ref sym) => write!(f, "{}", sym.as_str())?,
77 TokenNode::Op(ch, ref op) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070078 write!(f, "{}", ch)?;
79 match *op {
Alex Crichton1a7f7622017-07-05 17:47:15 -070080 Spacing::Alone => {}
81 Spacing::Joint => joint = true,
Alex Crichton44bffbc2017-05-19 17:51:59 -070082 }
83 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070084 TokenNode::Literal(ref literal) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070085 write!(f, "{}", literal)?;
86 // handle comments
87 if (literal.0).0.starts_with("/") {
88 write!(f, "\n")?;
89 }
90 }
91 }
92 }
93
94 Ok(())
95 }
96}
97
98impl From<proc_macro::TokenStream> for TokenStream {
99 fn from(inner: proc_macro::TokenStream) -> TokenStream {
100 inner.to_string().parse().expect("compiler token stream parse failed")
101 }
102}
103
104impl From<TokenStream> for proc_macro::TokenStream {
105 fn from(inner: TokenStream) -> proc_macro::TokenStream {
106 inner.to_string().parse().expect("failed to parse to compiler tokens")
107 }
108}
109
110
111impl From<TokenTree> for TokenStream {
112 fn from(tree: TokenTree) -> TokenStream {
113 TokenStream { inner: vec![tree] }
114 }
115}
116
117impl iter::FromIterator<TokenStream> for TokenStream {
118 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
119 let mut v = Vec::new();
120
121 for stream in streams.into_iter() {
122 v.extend(stream.inner);
123 }
124
125 TokenStream { inner: v }
126 }
127}
128
Alex Crichton1a7f7622017-07-05 17:47:15 -0700129pub type TokenTreeIter = vec::IntoIter<TokenTree>;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700130
131impl IntoIterator for TokenStream {
132 type Item = TokenTree;
Alex Crichton1a7f7622017-07-05 17:47:15 -0700133 type IntoIter = TokenTreeIter;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700134
Alex Crichton1a7f7622017-07-05 17:47:15 -0700135 fn into_iter(self) -> TokenTreeIter {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700136 self.inner.into_iter()
137 }
138}
139
Alex Crichtone6085b72017-11-21 07:24:25 -0800140#[derive(Clone, Copy, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700141pub struct Span;
142
143impl Span {
144 pub fn call_site() -> Span {
145 Span
146 }
Alex Crichtone6085b72017-11-21 07:24:25 -0800147
148 pub fn def_site() -> Span {
149 Span
150 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700151}
152
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700153#[derive(Copy, Clone)]
Alex Crichton1a7f7622017-07-05 17:47:15 -0700154pub struct Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700155 intern: usize,
156 not_send_sync: PhantomData<*const ()>,
157}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700158
159thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
160
Alex Crichton1a7f7622017-07-05 17:47:15 -0700161impl<'a> From<&'a str> for Term {
162 fn from(string: &'a str) -> Term {
163 Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700164 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
165 not_send_sync: PhantomData,
166 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700167 }
168}
169
Alex Crichton1a7f7622017-07-05 17:47:15 -0700170impl ops::Deref for Term {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700171 type Target = str;
172
173 fn deref(&self) -> &str {
174 SYMBOLS.with(|interner| {
175 let interner = interner.borrow();
David Tolnay041bcd42017-06-03 09:18:04 -0700176 let s = interner.get(self.intern);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700177 unsafe {
178 &*(s as *const str)
179 }
180 })
181 }
182}
183
Alex Crichton1a7f7622017-07-05 17:47:15 -0700184impl fmt::Debug for Term {
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700185 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700186 f.debug_tuple("Term").field(&&**self).finish()
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700187 }
188}
189
Alex Crichton44bffbc2017-05-19 17:51:59 -0700190struct Interner {
191 string_to_index: HashMap<MyRc, usize>,
192 index_to_string: Vec<Rc<String>>,
193}
194
195#[derive(Hash, Eq, PartialEq)]
196struct MyRc(Rc<String>);
197
198impl Borrow<str> for MyRc {
199 fn borrow(&self) -> &str {
200 &self.0
201 }
202}
203
204impl Interner {
205 fn new() -> Interner {
206 Interner {
207 string_to_index: HashMap::new(),
208 index_to_string: Vec::new(),
209 }
210 }
211
212 fn intern(&mut self, s: &str) -> usize {
213 if let Some(&idx) = self.string_to_index.get(s) {
214 return idx
215 }
216 let s = Rc::new(s.to_string());
217 self.index_to_string.push(s.clone());
218 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
219 self.index_to_string.len() - 1
220 }
221
222 fn get(&self, idx: usize) -> &str {
223 &self.index_to_string[idx]
224 }
225}
226
David Tolnay977f8282017-05-31 17:41:33 -0700227#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700228pub struct Literal(String);
229
Alex Crichton852d53d2017-05-19 19:25:08 -0700230impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700231 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700232 match byte {
233 0 => Literal(format!("b'\\0'")),
234 b'\"' => Literal(format!("b'\"'")),
235 n => {
236 let mut escaped = "b'".to_string();
237 escaped.extend(ascii::escape_default(n).map(|c| c as char));
238 escaped.push('\'');
239 Literal(escaped)
240 }
241 }
242 }
243
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700244 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700245 let mut escaped = "b\"".to_string();
246 for b in bytes {
247 match *b {
248 b'\0' => escaped.push_str(r"\0"),
249 b'\t' => escaped.push_str(r"\t"),
250 b'\n' => escaped.push_str(r"\n"),
251 b'\r' => escaped.push_str(r"\r"),
252 b'"' => escaped.push_str("\\\""),
253 b'\\' => escaped.push_str("\\\\"),
254 b'\x20' ... b'\x7E' => escaped.push(*b as char),
255 _ => escaped.push_str(&format!("\\x{:02X}", b)),
256 }
257 }
258 escaped.push('"');
259 Literal(escaped)
260 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700261
262 pub fn doccomment(s: &str) -> Literal {
263 Literal(s.to_string())
264 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700265
Alex Crichton1a7f7622017-07-05 17:47:15 -0700266 pub fn float(s: f64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700267 Literal(s.to_string())
268 }
269
Alex Crichton1a7f7622017-07-05 17:47:15 -0700270 pub fn integer(s: i64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700271 Literal(s.to_string())
272 }
Alex Crichton31316622017-05-26 12:54:47 -0700273
274 pub fn raw_string(s: &str, pounds: usize) -> Literal {
275 let mut ret = format!("r");
276 ret.extend((0..pounds).map(|_| "#"));
277 ret.push('"');
278 ret.push_str(s);
279 ret.push('"');
280 ret.extend((0..pounds).map(|_| "#"));
281 Literal(ret)
282 }
283
284 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700285 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700286 ret.extend((0..pounds).map(|_| "#"));
287 ret.push('"');
288 ret.push_str(s);
289 ret.push('"');
290 ret.extend((0..pounds).map(|_| "#"));
291 Literal(ret)
292 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700293}
294
Alex Crichton44bffbc2017-05-19 17:51:59 -0700295impl fmt::Display for Literal {
296 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
297 self.0.fmt(f)
298 }
299}
300
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700301macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700302 ($($t:ty,)*) => {$(
303 impl From<$t> for Literal {
304 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700305 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700306 }
307 }
308 )*}
309}
310
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700311ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700312 u8, u16, u32, u64, usize,
313 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700314}
315
316macro_rules! floats {
317 ($($t:ty,)*) => {$(
318 impl From<$t> for Literal {
319 fn from(t: $t) -> Literal {
320 assert!(!t.is_nan());
321 assert!(!t.is_infinite());
322 Literal(format!(concat!("{}", stringify!($t)), t))
323 }
324 }
325 )*}
326}
327
328floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700329 f32, f64,
330}
331
Alex Crichton44bffbc2017-05-19 17:51:59 -0700332impl<'a> From<&'a str> for Literal {
333 fn from(t: &'a str) -> Literal {
334 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
335 s.push('"');
336 s.insert(0, '"');
337 Literal(s)
338 }
339}
340
341impl From<char> for Literal {
342 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700343 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700344 }
345}
346
David Tolnay8e976c62017-06-01 12:12:29 -0700347named!(token_stream -> ::TokenStream, map!(
348 many0!(token_tree),
349 |trees| ::TokenStream(TokenStream { inner: trees })
350));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700351
352named!(token_tree -> TokenTree,
Alex Crichton1a7f7622017-07-05 17:47:15 -0700353 map!(token_kind, |s: TokenNode| {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700354 TokenTree {
355 span: ::Span(Span),
356 kind: s,
357 }
358 }));
359
Alex Crichton1a7f7622017-07-05 17:47:15 -0700360named!(token_kind -> TokenNode, alt!(
361 map!(delimited, |(d, s)| TokenNode::Group(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700362 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700363 map!(literal, TokenNode::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700364 |
Alex Crichton52725f72017-08-28 12:20:58 -0700365 symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700366 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700367 map!(op, |(op, kind)| TokenNode::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700368));
369
David Tolnay8e976c62017-06-01 12:12:29 -0700370named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700371 delimited!(
372 punct!("("),
373 token_stream,
374 punct!(")")
375 ) => { |ts| (Delimiter::Parenthesis, ts) }
376 |
377 delimited!(
378 punct!("["),
379 token_stream,
380 punct!("]")
381 ) => { |ts| (Delimiter::Bracket, ts) }
382 |
383 delimited!(
384 punct!("{"),
385 token_stream,
386 punct!("}")
387 ) => { |ts| (Delimiter::Brace, ts) }
388));
389
Alex Crichton52725f72017-08-28 12:20:58 -0700390fn symbol(mut input: &str) -> PResult<TokenNode> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700391 input = skip_whitespace(input);
392
393 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700394
395 let lifetime = input.starts_with("'");
396 if lifetime {
397 chars.next();
398 }
399
Alex Crichton44bffbc2017-05-19 17:51:59 -0700400 match chars.next() {
401 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700402 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700403 }
404
David Tolnay214c94c2017-06-01 12:42:56 -0700405 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700406 for (i, ch) in chars {
407 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700408 end = i;
409 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700410 }
411 }
412
David Tolnay214c94c2017-06-01 12:42:56 -0700413 if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
414 Err(LexError)
415 } else {
Alex Crichton52725f72017-08-28 12:20:58 -0700416 let (a, b) = input.split_at(end);
417 if a == "_" {
418 Ok((b, TokenNode::Op('_', Spacing::Alone)))
419 } else {
420 Ok((b, TokenNode::Term(::Term::intern(a))))
421 }
David Tolnay214c94c2017-06-01 12:42:56 -0700422 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700423}
424
David Tolnay214c94c2017-06-01 12:42:56 -0700425// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
426static KEYWORDS: &'static [&'static str] = &[
427 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
428 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
429 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
430 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
431 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
432 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
433 "yield",
434];
435
David Tolnay8e976c62017-06-01 12:12:29 -0700436fn literal(input: &str) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700437 let input_no_ws = skip_whitespace(input);
438
439 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700440 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700441 let start = input.len() - input_no_ws.len();
442 let len = input_no_ws.len() - a.len();
443 let end = start + len;
David Tolnay8e976c62017-06-01 12:12:29 -0700444 Ok((a, ::Literal(Literal(input[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700445 }
David Tolnay1218e122017-06-01 11:13:45 -0700446 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700447 }
448}
449
450named!(literal_nocapture -> (), alt!(
451 string
452 |
453 byte_string
454 |
455 byte
456 |
457 character
458 |
459 float
460 |
461 int
462 |
463 boolean
464 |
465 doc_comment
466));
467
468named!(string -> (), alt!(
469 quoted_string
470 |
471 preceded!(
472 punct!("r"),
473 raw_string
474 ) => { |_| () }
475));
476
477named!(quoted_string -> (), delimited!(
478 punct!("\""),
479 cooked_string,
480 tag!("\"")
481));
482
David Tolnay1218e122017-06-01 11:13:45 -0700483fn cooked_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700484 let mut chars = input.char_indices().peekable();
485 while let Some((byte_offset, ch)) = chars.next() {
486 match ch {
487 '"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700488 return Ok((&input[byte_offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700489 }
490 '\r' => {
491 if let Some((_, '\n')) = chars.next() {
492 // ...
493 } else {
494 break;
495 }
496 }
497 '\\' => {
498 match chars.next() {
499 Some((_, 'x')) => {
500 if !backslash_x_char(&mut chars) {
501 break
502 }
503 }
504 Some((_, 'n')) |
505 Some((_, 'r')) |
506 Some((_, 't')) |
507 Some((_, '\\')) |
508 Some((_, '\'')) |
509 Some((_, '"')) |
510 Some((_, '0')) => {}
511 Some((_, 'u')) => {
512 if !backslash_u(&mut chars) {
513 break
514 }
515 }
516 Some((_, '\n')) | Some((_, '\r')) => {
517 while let Some(&(_, ch)) = chars.peek() {
518 if ch.is_whitespace() {
519 chars.next();
520 } else {
521 break;
522 }
523 }
524 }
525 _ => break,
526 }
527 }
528 _ch => {}
529 }
530 }
David Tolnay1218e122017-06-01 11:13:45 -0700531 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700532}
533
534named!(byte_string -> (), alt!(
535 delimited!(
536 punct!("b\""),
537 cooked_byte_string,
538 tag!("\"")
539 ) => { |_| () }
540 |
541 preceded!(
542 punct!("br"),
543 raw_string
544 ) => { |_| () }
545));
546
David Tolnay1218e122017-06-01 11:13:45 -0700547fn cooked_byte_string(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700548 let mut bytes = input.bytes().enumerate();
549 'outer: while let Some((offset, b)) = bytes.next() {
550 match b {
551 b'"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700552 return Ok((&input[offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700553 }
554 b'\r' => {
555 if let Some((_, b'\n')) = bytes.next() {
556 // ...
557 } else {
558 break;
559 }
560 }
561 b'\\' => {
562 match bytes.next() {
563 Some((_, b'x')) => {
564 if !backslash_x_byte(&mut bytes) {
565 break
566 }
567 }
568 Some((_, b'n')) |
569 Some((_, b'r')) |
570 Some((_, b't')) |
571 Some((_, b'\\')) |
572 Some((_, b'0')) |
573 Some((_, b'\'')) |
574 Some((_, b'"')) => {}
575 Some((newline, b'\n')) |
576 Some((newline, b'\r')) => {
577 let rest = &input[newline + 1..];
578 for (offset, ch) in rest.char_indices() {
579 if !ch.is_whitespace() {
580 input = &rest[offset..];
581 bytes = input.bytes().enumerate();
582 continue 'outer;
583 }
584 }
585 break;
586 }
587 _ => break,
588 }
589 }
590 b if b < 0x80 => {}
591 _ => break,
592 }
593 }
David Tolnay1218e122017-06-01 11:13:45 -0700594 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700595}
596
David Tolnay1218e122017-06-01 11:13:45 -0700597fn raw_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700598 let mut chars = input.char_indices();
599 let mut n = 0;
600 while let Some((byte_offset, ch)) = chars.next() {
601 match ch {
602 '"' => {
603 n = byte_offset;
604 break;
605 }
606 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700607 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700608 }
609 }
610 for (byte_offset, ch) in chars {
611 match ch {
612 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
613 let rest = &input[byte_offset + 1 + n..];
David Tolnay1218e122017-06-01 11:13:45 -0700614 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700615 }
616 '\r' => {}
617 _ => {}
618 }
619 }
David Tolnay1218e122017-06-01 11:13:45 -0700620 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700621}
622
623named!(byte -> (), do_parse!(
624 punct!("b") >>
625 tag!("'") >>
626 cooked_byte >>
627 tag!("'") >>
628 (())
629));
630
David Tolnay1218e122017-06-01 11:13:45 -0700631fn cooked_byte(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700632 let mut bytes = input.bytes().enumerate();
633 let ok = match bytes.next().map(|(_, b)| b) {
634 Some(b'\\') => {
635 match bytes.next().map(|(_, b)| b) {
636 Some(b'x') => backslash_x_byte(&mut bytes),
637 Some(b'n') |
638 Some(b'r') |
639 Some(b't') |
640 Some(b'\\') |
641 Some(b'0') |
642 Some(b'\'') |
643 Some(b'"') => true,
644 _ => false,
645 }
646 }
647 b => b.is_some(),
648 };
649 if ok {
650 match bytes.next() {
David Tolnay1218e122017-06-01 11:13:45 -0700651 Some((offset, _)) => Ok((&input[offset..], ())),
652 None => Ok(("", ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700653 }
654 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700655 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700656 }
657}
658
659named!(character -> (), do_parse!(
660 punct!("'") >>
661 cooked_char >>
662 tag!("'") >>
663 (())
664));
665
David Tolnay1218e122017-06-01 11:13:45 -0700666fn cooked_char(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700667 let mut chars = input.char_indices();
668 let ok = match chars.next().map(|(_, ch)| ch) {
669 Some('\\') => {
670 match chars.next().map(|(_, ch)| ch) {
671 Some('x') => backslash_x_char(&mut chars),
672 Some('u') => backslash_u(&mut chars),
673 Some('n') |
674 Some('r') |
675 Some('t') |
676 Some('\\') |
677 Some('0') |
678 Some('\'') |
679 Some('"') => true,
680 _ => false,
681 }
682 }
683 ch => ch.is_some(),
684 };
685 if ok {
David Tolnay1218e122017-06-01 11:13:45 -0700686 Ok((chars.as_str(), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700687 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700688 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700689 }
690}
691
692macro_rules! next_ch {
693 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
694 match $chars.next() {
695 Some((_, ch)) => match ch {
696 $pat $(| $rest)* => ch,
697 _ => return false,
698 },
699 None => return false
700 }
701 };
702}
703
704fn backslash_x_char<I>(chars: &mut I) -> bool
705 where I: Iterator<Item = (usize, char)>
706{
707 next_ch!(chars @ '0'...'7');
708 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
709 true
710}
711
712fn backslash_x_byte<I>(chars: &mut I) -> bool
713 where I: Iterator<Item = (usize, u8)>
714{
715 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
716 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
717 true
718}
719
720fn backslash_u<I>(chars: &mut I) -> bool
721 where I: Iterator<Item = (usize, char)>
722{
723 next_ch!(chars @ '{');
724 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
725 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
726 if b == '}' {
727 return true
728 }
729 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
730 if c == '}' {
731 return true
732 }
733 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
734 if d == '}' {
735 return true
736 }
737 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
738 if e == '}' {
739 return true
740 }
741 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
742 if f == '}' {
743 return true
744 }
745 next_ch!(chars @ '}');
746 true
747}
748
David Tolnay744a6b82017-06-01 11:34:29 -0700749fn float(input: &str) -> PResult<()> {
750 let (rest, ()) = float_digits(input)?;
751 for suffix in &["f32", "f64"] {
752 if rest.starts_with(suffix) {
753 return word_break(&rest[suffix.len()..]);
754 }
755 }
756 word_break(rest)
757}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700758
David Tolnay744a6b82017-06-01 11:34:29 -0700759fn float_digits(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700760 let mut chars = input.chars().peekable();
761 match chars.next() {
762 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700763 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700764 }
765
766 let mut len = 1;
767 let mut has_dot = false;
768 let mut has_exp = false;
769 while let Some(&ch) = chars.peek() {
770 match ch {
771 '0'...'9' | '_' => {
772 chars.next();
773 len += 1;
774 }
775 '.' => {
776 if has_dot {
777 break;
778 }
779 chars.next();
780 if chars.peek()
781 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
782 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700783 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700784 }
785 len += 1;
786 has_dot = true;
787 }
788 'e' | 'E' => {
789 chars.next();
790 len += 1;
791 has_exp = true;
792 break;
793 }
794 _ => break,
795 }
796 }
797
798 let rest = &input[len..];
799 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700800 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700801 }
802
803 if has_exp {
804 let mut has_exp_value = false;
805 while let Some(&ch) = chars.peek() {
806 match ch {
807 '+' | '-' => {
808 if has_exp_value {
809 break;
810 }
811 chars.next();
812 len += 1;
813 }
814 '0'...'9' => {
815 chars.next();
816 len += 1;
817 has_exp_value = true;
818 }
819 '_' => {
820 chars.next();
821 len += 1;
822 }
823 _ => break,
824 }
825 }
826 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -0700827 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700828 }
829 }
830
David Tolnay1218e122017-06-01 11:13:45 -0700831 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700832}
833
David Tolnay744a6b82017-06-01 11:34:29 -0700834fn int(input: &str) -> PResult<()> {
835 let (rest, ()) = digits(input)?;
836 for suffix in &[
837 "isize",
838 "i8",
839 "i16",
840 "i32",
841 "i64",
842 "i128",
843 "usize",
844 "u8",
845 "u16",
846 "u32",
847 "u64",
848 "u128",
849 ] {
850 if rest.starts_with(suffix) {
851 return word_break(&rest[suffix.len()..]);
852 }
853 }
854 word_break(rest)
855}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700856
David Tolnay1218e122017-06-01 11:13:45 -0700857fn digits(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700858 let base = if input.starts_with("0x") {
859 input = &input[2..];
860 16
861 } else if input.starts_with("0o") {
862 input = &input[2..];
863 8
864 } else if input.starts_with("0b") {
865 input = &input[2..];
866 2
867 } else {
868 10
869 };
870
Alex Crichton44bffbc2017-05-19 17:51:59 -0700871 let mut len = 0;
872 let mut empty = true;
873 for b in input.bytes() {
874 let digit = match b {
875 b'0'...b'9' => (b - b'0') as u64,
876 b'a'...b'f' => 10 + (b - b'a') as u64,
877 b'A'...b'F' => 10 + (b - b'A') as u64,
878 b'_' => {
879 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -0700880 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700881 }
882 len += 1;
883 continue;
884 }
885 _ => break,
886 };
887 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -0700888 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700889 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700890 len += 1;
891 empty = false;
892 }
893 if empty {
David Tolnay1218e122017-06-01 11:13:45 -0700894 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700895 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700896 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700897 }
898}
899
900named!(boolean -> (), alt!(
901 keyword!("true") => { |_| () }
902 |
903 keyword!("false") => { |_| () }
904));
905
Alex Crichton1a7f7622017-07-05 17:47:15 -0700906fn op(input: &str) -> PResult<(char, Spacing)> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700907 let input = skip_whitespace(input);
908 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700909 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -0700910 let kind = match op_char(rest) {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700911 Ok(_) => Spacing::Joint,
912 Err(LexError) => Spacing::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -0700913 };
David Tolnay1218e122017-06-01 11:13:45 -0700914 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -0700915 }
David Tolnay1218e122017-06-01 11:13:45 -0700916 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700917 }
918}
919
David Tolnay1218e122017-06-01 11:13:45 -0700920fn op_char(input: &str) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700921 let mut chars = input.chars();
922 let first = match chars.next() {
923 Some(ch) => ch,
924 None => {
David Tolnay1218e122017-06-01 11:13:45 -0700925 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -0700926 }
927 };
928 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
929 if recognized.contains(first) {
David Tolnay1218e122017-06-01 11:13:45 -0700930 Ok((chars.as_str(), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700931 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700932 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700933 }
934}
935
Alex Crichton44bffbc2017-05-19 17:51:59 -0700936named!(doc_comment -> (), alt!(
937 do_parse!(
938 punct!("//!") >>
939 take_until!("\n") >>
940 (())
941 )
942 |
943 do_parse!(
944 option!(whitespace) >>
945 peek!(tag!("/*!")) >>
946 block_comment >>
947 (())
948 )
949 |
950 do_parse!(
951 punct!("///") >>
952 not!(tag!("/")) >>
953 take_until!("\n") >>
954 (())
955 )
956 |
957 do_parse!(
958 option!(whitespace) >>
959 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
960 block_comment >>
961 (())
962 )
963));