blob: 116635ccbfc205b30c4d56e2556545ed3a2d2b5e [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::iter;
David Tolnay041bcd42017-06-03 09:18:04 -07007use std::marker::PhantomData;
Alex Crichton44bffbc2017-05-19 17:51:59 -07008use std::ops;
9use std::rc::Rc;
10use std::str::FromStr;
11use std::vec;
12
13use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070014use unicode_xid::UnicodeXID;
David Tolnay744a6b82017-06-01 11:34:29 -070015use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070016
Alex Crichton1a7f7622017-07-05 17:47:15 -070017use {TokenTree, TokenNode, Delimiter, Spacing};
Alex Crichton44bffbc2017-05-19 17:51:59 -070018
David Tolnay977f8282017-05-31 17:41:33 -070019#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070020pub struct TokenStream {
21 inner: Vec<TokenTree>,
22}
23
24#[derive(Debug)]
25pub struct LexError;
26
27impl TokenStream {
28 pub fn empty() -> TokenStream {
29 TokenStream { inner: Vec::new() }
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.inner.len() == 0
34 }
35}
36
37impl FromStr for TokenStream {
38 type Err = LexError;
39
40 fn from_str(src: &str) -> Result<TokenStream, LexError> {
41 match token_stream(src) {
David Tolnay1218e122017-06-01 11:13:45 -070042 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070043 if skip_whitespace(input).len() != 0 {
44 Err(LexError)
45 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070046 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070047 }
48 }
David Tolnay1218e122017-06-01 11:13:45 -070049 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070050 }
51 }
52}
53
54impl fmt::Display for TokenStream {
55 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56 let mut joint = false;
57 for (i, tt) in self.inner.iter().enumerate() {
58 if i != 0 && !joint {
59 write!(f, " ")?;
60 }
61 joint = false;
62 match tt.kind {
Alex Crichton1a7f7622017-07-05 17:47:15 -070063 TokenNode::Group(delim, ref stream) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070064 let (start, end) = match delim {
65 Delimiter::Parenthesis => ("(", ")"),
66 Delimiter::Brace => ("{", "}"),
67 Delimiter::Bracket => ("[", "]"),
68 Delimiter::None => ("", ""),
69 };
Alex Crichton852d53d2017-05-19 19:25:08 -070070 if stream.0.inner.len() == 0 {
71 write!(f, "{} {}", start, end)?
72 } else {
73 write!(f, "{} {} {}", start, stream, end)?
74 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070075 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070076 TokenNode::Term(ref sym) => write!(f, "{}", sym.as_str())?,
77 TokenNode::Op(ch, ref op) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070078 write!(f, "{}", ch)?;
79 match *op {
Alex Crichton1a7f7622017-07-05 17:47:15 -070080 Spacing::Alone => {}
81 Spacing::Joint => joint = true,
Alex Crichton44bffbc2017-05-19 17:51:59 -070082 }
83 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070084 TokenNode::Literal(ref literal) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070085 write!(f, "{}", literal)?;
86 // handle comments
87 if (literal.0).0.starts_with("/") {
88 write!(f, "\n")?;
89 }
90 }
91 }
92 }
93
94 Ok(())
95 }
96}
97
98impl From<proc_macro::TokenStream> for TokenStream {
99 fn from(inner: proc_macro::TokenStream) -> TokenStream {
100 inner.to_string().parse().expect("compiler token stream parse failed")
101 }
102}
103
104impl From<TokenStream> for proc_macro::TokenStream {
105 fn from(inner: TokenStream) -> proc_macro::TokenStream {
106 inner.to_string().parse().expect("failed to parse to compiler tokens")
107 }
108}
109
110
111impl From<TokenTree> for TokenStream {
112 fn from(tree: TokenTree) -> TokenStream {
113 TokenStream { inner: vec![tree] }
114 }
115}
116
117impl iter::FromIterator<TokenStream> for TokenStream {
118 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
119 let mut v = Vec::new();
120
121 for stream in streams.into_iter() {
122 v.extend(stream.inner);
123 }
124
125 TokenStream { inner: v }
126 }
127}
128
Alex Crichton1a7f7622017-07-05 17:47:15 -0700129pub type TokenTreeIter = vec::IntoIter<TokenTree>;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700130
131impl IntoIterator for TokenStream {
132 type Item = TokenTree;
Alex Crichton1a7f7622017-07-05 17:47:15 -0700133 type IntoIter = TokenTreeIter;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700134
Alex Crichton1a7f7622017-07-05 17:47:15 -0700135 fn into_iter(self) -> TokenTreeIter {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700136 self.inner.into_iter()
137 }
138}
139
Alex Crichtone6085b72017-11-21 07:24:25 -0800140#[derive(Clone, Copy, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700141pub struct Span;
142
143impl Span {
144 pub fn call_site() -> Span {
145 Span
146 }
Alex Crichtone6085b72017-11-21 07:24:25 -0800147
148 pub fn def_site() -> Span {
149 Span
150 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700151}
152
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700153#[derive(Copy, Clone)]
Alex Crichton1a7f7622017-07-05 17:47:15 -0700154pub struct Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700155 intern: usize,
156 not_send_sync: PhantomData<*const ()>,
157}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700158
159thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
160
Alex Crichton1a7f7622017-07-05 17:47:15 -0700161impl<'a> From<&'a str> for Term {
162 fn from(string: &'a str) -> Term {
163 Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700164 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
165 not_send_sync: PhantomData,
166 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700167 }
168}
169
Alex Crichton1a7f7622017-07-05 17:47:15 -0700170impl ops::Deref for Term {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700171 type Target = str;
172
173 fn deref(&self) -> &str {
174 SYMBOLS.with(|interner| {
175 let interner = interner.borrow();
David Tolnay041bcd42017-06-03 09:18:04 -0700176 let s = interner.get(self.intern);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700177 unsafe {
178 &*(s as *const str)
179 }
180 })
181 }
182}
183
Alex Crichton1a7f7622017-07-05 17:47:15 -0700184impl fmt::Debug for Term {
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700185 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700186 f.debug_tuple("Term").field(&&**self).finish()
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700187 }
188}
189
Alex Crichton44bffbc2017-05-19 17:51:59 -0700190struct Interner {
191 string_to_index: HashMap<MyRc, usize>,
192 index_to_string: Vec<Rc<String>>,
193}
194
195#[derive(Hash, Eq, PartialEq)]
196struct MyRc(Rc<String>);
197
198impl Borrow<str> for MyRc {
199 fn borrow(&self) -> &str {
200 &self.0
201 }
202}
203
204impl Interner {
205 fn new() -> Interner {
206 Interner {
207 string_to_index: HashMap::new(),
208 index_to_string: Vec::new(),
209 }
210 }
211
212 fn intern(&mut self, s: &str) -> usize {
213 if let Some(&idx) = self.string_to_index.get(s) {
214 return idx
215 }
216 let s = Rc::new(s.to_string());
217 self.index_to_string.push(s.clone());
218 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
219 self.index_to_string.len() - 1
220 }
221
222 fn get(&self, idx: usize) -> &str {
223 &self.index_to_string[idx]
224 }
225}
226
David Tolnay977f8282017-05-31 17:41:33 -0700227#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700228pub struct Literal(String);
229
Alex Crichton852d53d2017-05-19 19:25:08 -0700230impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700231 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700232 match byte {
233 0 => Literal(format!("b'\\0'")),
234 b'\"' => Literal(format!("b'\"'")),
235 n => {
236 let mut escaped = "b'".to_string();
237 escaped.extend(ascii::escape_default(n).map(|c| c as char));
238 escaped.push('\'');
239 Literal(escaped)
240 }
241 }
242 }
243
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700244 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700245 let mut escaped = "b\"".to_string();
246 for b in bytes {
247 match *b {
248 b'\0' => escaped.push_str(r"\0"),
249 b'\t' => escaped.push_str(r"\t"),
250 b'\n' => escaped.push_str(r"\n"),
251 b'\r' => escaped.push_str(r"\r"),
252 b'"' => escaped.push_str("\\\""),
253 b'\\' => escaped.push_str("\\\\"),
254 b'\x20' ... b'\x7E' => escaped.push(*b as char),
255 _ => escaped.push_str(&format!("\\x{:02X}", b)),
256 }
257 }
258 escaped.push('"');
259 Literal(escaped)
260 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700261
262 pub fn doccomment(s: &str) -> Literal {
263 Literal(s.to_string())
264 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700265
Alex Crichton1a7f7622017-07-05 17:47:15 -0700266 pub fn float(s: f64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700267 Literal(s.to_string())
268 }
269
Alex Crichton1a7f7622017-07-05 17:47:15 -0700270 pub fn integer(s: i64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700271 Literal(s.to_string())
272 }
Alex Crichton31316622017-05-26 12:54:47 -0700273
274 pub fn raw_string(s: &str, pounds: usize) -> Literal {
275 let mut ret = format!("r");
276 ret.extend((0..pounds).map(|_| "#"));
277 ret.push('"');
278 ret.push_str(s);
279 ret.push('"');
280 ret.extend((0..pounds).map(|_| "#"));
281 Literal(ret)
282 }
283
284 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700285 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700286 ret.extend((0..pounds).map(|_| "#"));
287 ret.push('"');
288 ret.push_str(s);
289 ret.push('"');
290 ret.extend((0..pounds).map(|_| "#"));
291 Literal(ret)
292 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700293}
294
Alex Crichton44bffbc2017-05-19 17:51:59 -0700295impl fmt::Display for Literal {
296 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
297 self.0.fmt(f)
298 }
299}
300
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700301macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700302 ($($t:ty,)*) => {$(
303 impl From<$t> for Literal {
304 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700305 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700306 }
307 }
308 )*}
309}
310
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700311ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700312 u8, u16, u32, u64, usize,
313 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700314}
315
316macro_rules! floats {
317 ($($t:ty,)*) => {$(
318 impl From<$t> for Literal {
319 fn from(t: $t) -> Literal {
320 assert!(!t.is_nan());
321 assert!(!t.is_infinite());
322 Literal(format!(concat!("{}", stringify!($t)), t))
323 }
324 }
325 )*}
326}
327
328floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700329 f32, f64,
330}
331
Alex Crichton44bffbc2017-05-19 17:51:59 -0700332impl<'a> From<&'a str> for Literal {
333 fn from(t: &'a str) -> Literal {
334 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
335 s.push('"');
336 s.insert(0, '"');
337 Literal(s)
338 }
339}
340
341impl From<char> for Literal {
342 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700343 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700344 }
345}
346
David Tolnay8e976c62017-06-01 12:12:29 -0700347named!(token_stream -> ::TokenStream, map!(
348 many0!(token_tree),
349 |trees| ::TokenStream(TokenStream { inner: trees })
350));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700351
352named!(token_tree -> TokenTree,
Alex Crichton1a7f7622017-07-05 17:47:15 -0700353 map!(token_kind, |s: TokenNode| {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700354 TokenTree {
355 span: ::Span(Span),
356 kind: s,
357 }
358 }));
359
Alex Crichton1a7f7622017-07-05 17:47:15 -0700360named!(token_kind -> TokenNode, alt!(
361 map!(delimited, |(d, s)| TokenNode::Group(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700362 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700363 map!(literal, TokenNode::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700364 |
Alex Crichton52725f72017-08-28 12:20:58 -0700365 symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700366 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700367 map!(op, |(op, kind)| TokenNode::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700368));
369
David Tolnay8e976c62017-06-01 12:12:29 -0700370named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700371 delimited!(
372 punct!("("),
373 token_stream,
374 punct!(")")
375 ) => { |ts| (Delimiter::Parenthesis, ts) }
376 |
377 delimited!(
378 punct!("["),
379 token_stream,
380 punct!("]")
381 ) => { |ts| (Delimiter::Bracket, ts) }
382 |
383 delimited!(
384 punct!("{"),
385 token_stream,
386 punct!("}")
387 ) => { |ts| (Delimiter::Brace, ts) }
388));
389
Alex Crichton52725f72017-08-28 12:20:58 -0700390fn symbol(mut input: &str) -> PResult<TokenNode> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700391 input = skip_whitespace(input);
392
393 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700394
395 let lifetime = input.starts_with("'");
396 if lifetime {
397 chars.next();
398 }
399
Alex Crichton44bffbc2017-05-19 17:51:59 -0700400 match chars.next() {
401 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700402 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700403 }
404
David Tolnay214c94c2017-06-01 12:42:56 -0700405 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700406 for (i, ch) in chars {
407 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700408 end = i;
409 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700410 }
411 }
412
David Tolnay214c94c2017-06-01 12:42:56 -0700413 if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
414 Err(LexError)
415 } else {
Alex Crichton52725f72017-08-28 12:20:58 -0700416 let (a, b) = input.split_at(end);
417 if a == "_" {
418 Ok((b, TokenNode::Op('_', Spacing::Alone)))
419 } else {
420 Ok((b, TokenNode::Term(::Term::intern(a))))
421 }
David Tolnay214c94c2017-06-01 12:42:56 -0700422 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700423}
424
David Tolnay214c94c2017-06-01 12:42:56 -0700425// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
426static KEYWORDS: &'static [&'static str] = &[
427 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
428 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
429 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
430 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
431 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
432 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
433 "yield",
434];
435
David Tolnay8e976c62017-06-01 12:12:29 -0700436fn literal(input: &str) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700437 let input_no_ws = skip_whitespace(input);
438
439 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700440 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700441 let start = input.len() - input_no_ws.len();
442 let len = input_no_ws.len() - a.len();
443 let end = start + len;
David Tolnay8e976c62017-06-01 12:12:29 -0700444 Ok((a, ::Literal(Literal(input[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700445 }
David Tolnay1218e122017-06-01 11:13:45 -0700446 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700447 }
448}
449
450named!(literal_nocapture -> (), alt!(
451 string
452 |
453 byte_string
454 |
455 byte
456 |
457 character
458 |
459 float
460 |
461 int
462 |
463 boolean
464 |
465 doc_comment
466));
467
468named!(string -> (), alt!(
469 quoted_string
470 |
471 preceded!(
472 punct!("r"),
473 raw_string
474 ) => { |_| () }
475));
476
477named!(quoted_string -> (), delimited!(
478 punct!("\""),
479 cooked_string,
480 tag!("\"")
481));
482
David Tolnay1218e122017-06-01 11:13:45 -0700483fn cooked_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700484 let mut chars = input.char_indices().peekable();
485 while let Some((byte_offset, ch)) = chars.next() {
486 match ch {
487 '"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700488 return Ok((&input[byte_offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700489 }
490 '\r' => {
491 if let Some((_, '\n')) = chars.next() {
492 // ...
493 } else {
494 break;
495 }
496 }
497 '\\' => {
498 match chars.next() {
499 Some((_, 'x')) => {
500 if !backslash_x_char(&mut chars) {
501 break
502 }
503 }
504 Some((_, 'n')) |
505 Some((_, 'r')) |
506 Some((_, 't')) |
507 Some((_, '\\')) |
508 Some((_, '\'')) |
509 Some((_, '"')) |
510 Some((_, '0')) => {}
511 Some((_, 'u')) => {
512 if !backslash_u(&mut chars) {
513 break
514 }
515 }
516 Some((_, '\n')) | Some((_, '\r')) => {
517 while let Some(&(_, ch)) = chars.peek() {
518 if ch.is_whitespace() {
519 chars.next();
520 } else {
521 break;
522 }
523 }
524 }
525 _ => break,
526 }
527 }
528 _ch => {}
529 }
530 }
David Tolnay1218e122017-06-01 11:13:45 -0700531 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700532}
533
534named!(byte_string -> (), alt!(
535 delimited!(
536 punct!("b\""),
537 cooked_byte_string,
538 tag!("\"")
539 ) => { |_| () }
540 |
541 preceded!(
542 punct!("br"),
543 raw_string
544 ) => { |_| () }
545));
546
David Tolnay1218e122017-06-01 11:13:45 -0700547fn cooked_byte_string(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700548 let mut bytes = input.bytes().enumerate();
549 'outer: while let Some((offset, b)) = bytes.next() {
550 match b {
551 b'"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700552 return Ok((&input[offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700553 }
554 b'\r' => {
555 if let Some((_, b'\n')) = bytes.next() {
556 // ...
557 } else {
558 break;
559 }
560 }
561 b'\\' => {
562 match bytes.next() {
563 Some((_, b'x')) => {
564 if !backslash_x_byte(&mut bytes) {
565 break
566 }
567 }
568 Some((_, b'n')) |
569 Some((_, b'r')) |
570 Some((_, b't')) |
571 Some((_, b'\\')) |
572 Some((_, b'0')) |
573 Some((_, b'\'')) |
574 Some((_, b'"')) => {}
575 Some((newline, b'\n')) |
576 Some((newline, b'\r')) => {
577 let rest = &input[newline + 1..];
578 for (offset, ch) in rest.char_indices() {
579 if !ch.is_whitespace() {
580 input = &rest[offset..];
581 bytes = input.bytes().enumerate();
582 continue 'outer;
583 }
584 }
585 break;
586 }
587 _ => break,
588 }
589 }
590 b if b < 0x80 => {}
591 _ => break,
592 }
593 }
David Tolnay1218e122017-06-01 11:13:45 -0700594 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700595}
596
David Tolnay1218e122017-06-01 11:13:45 -0700597fn raw_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700598 let mut chars = input.char_indices();
599 let mut n = 0;
600 while let Some((byte_offset, ch)) = chars.next() {
601 match ch {
602 '"' => {
603 n = byte_offset;
604 break;
605 }
606 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700607 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700608 }
609 }
610 for (byte_offset, ch) in chars {
611 match ch {
612 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
613 let rest = &input[byte_offset + 1 + n..];
David Tolnay1218e122017-06-01 11:13:45 -0700614 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700615 }
616 '\r' => {}
617 _ => {}
618 }
619 }
David Tolnay1218e122017-06-01 11:13:45 -0700620 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700621}
622
623named!(byte -> (), do_parse!(
624 punct!("b") >>
625 tag!("'") >>
626 cooked_byte >>
627 tag!("'") >>
628 (())
629));
630
David Tolnay1218e122017-06-01 11:13:45 -0700631fn cooked_byte(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700632 let mut bytes = input.bytes().enumerate();
633 let ok = match bytes.next().map(|(_, b)| b) {
634 Some(b'\\') => {
635 match bytes.next().map(|(_, b)| b) {
636 Some(b'x') => backslash_x_byte(&mut bytes),
637 Some(b'n') |
638 Some(b'r') |
639 Some(b't') |
640 Some(b'\\') |
641 Some(b'0') |
642 Some(b'\'') |
643 Some(b'"') => true,
644 _ => false,
645 }
646 }
647 b => b.is_some(),
648 };
649 if ok {
650 match bytes.next() {
David Tolnay1218e122017-06-01 11:13:45 -0700651 Some((offset, _)) => Ok((&input[offset..], ())),
652 None => Ok(("", ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700653 }
654 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700655 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700656 }
657}
658
659named!(character -> (), do_parse!(
660 punct!("'") >>
661 cooked_char >>
662 tag!("'") >>
663 (())
664));
665
David Tolnay1218e122017-06-01 11:13:45 -0700666fn cooked_char(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700667 let mut chars = input.char_indices();
668 let ok = match chars.next().map(|(_, ch)| ch) {
669 Some('\\') => {
670 match chars.next().map(|(_, ch)| ch) {
671 Some('x') => backslash_x_char(&mut chars),
672 Some('u') => backslash_u(&mut chars),
673 Some('n') |
674 Some('r') |
675 Some('t') |
676 Some('\\') |
677 Some('0') |
678 Some('\'') |
679 Some('"') => true,
680 _ => false,
681 }
682 }
683 ch => ch.is_some(),
684 };
685 if ok {
David Tolnay1218e122017-06-01 11:13:45 -0700686 Ok((chars.as_str(), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700687 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700688 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700689 }
690}
691
692macro_rules! next_ch {
693 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
694 match $chars.next() {
695 Some((_, ch)) => match ch {
696 $pat $(| $rest)* => ch,
697 _ => return false,
698 },
699 None => return false
700 }
701 };
702}
703
704fn backslash_x_char<I>(chars: &mut I) -> bool
705 where I: Iterator<Item = (usize, char)>
706{
707 next_ch!(chars @ '0'...'7');
708 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
709 true
710}
711
712fn backslash_x_byte<I>(chars: &mut I) -> bool
713 where I: Iterator<Item = (usize, u8)>
714{
715 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
716 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
717 true
718}
719
720fn backslash_u<I>(chars: &mut I) -> bool
721 where I: Iterator<Item = (usize, char)>
722{
723 next_ch!(chars @ '{');
724 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
David Tolnay8d109342017-12-25 18:24:45 -0500725 loop {
726 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '_' | '}');
727 if c == '}' {
728 return true;
729 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700730 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700731}
732
David Tolnay744a6b82017-06-01 11:34:29 -0700733fn float(input: &str) -> PResult<()> {
734 let (rest, ()) = float_digits(input)?;
735 for suffix in &["f32", "f64"] {
736 if rest.starts_with(suffix) {
737 return word_break(&rest[suffix.len()..]);
738 }
739 }
740 word_break(rest)
741}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700742
David Tolnay744a6b82017-06-01 11:34:29 -0700743fn float_digits(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700744 let mut chars = input.chars().peekable();
745 match chars.next() {
746 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700747 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700748 }
749
750 let mut len = 1;
751 let mut has_dot = false;
752 let mut has_exp = false;
753 while let Some(&ch) = chars.peek() {
754 match ch {
755 '0'...'9' | '_' => {
756 chars.next();
757 len += 1;
758 }
759 '.' => {
760 if has_dot {
761 break;
762 }
763 chars.next();
764 if chars.peek()
765 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
766 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700767 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700768 }
769 len += 1;
770 has_dot = true;
771 }
772 'e' | 'E' => {
773 chars.next();
774 len += 1;
775 has_exp = true;
776 break;
777 }
778 _ => break,
779 }
780 }
781
782 let rest = &input[len..];
783 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700784 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700785 }
786
787 if has_exp {
788 let mut has_exp_value = false;
789 while let Some(&ch) = chars.peek() {
790 match ch {
791 '+' | '-' => {
792 if has_exp_value {
793 break;
794 }
795 chars.next();
796 len += 1;
797 }
798 '0'...'9' => {
799 chars.next();
800 len += 1;
801 has_exp_value = true;
802 }
803 '_' => {
804 chars.next();
805 len += 1;
806 }
807 _ => break,
808 }
809 }
810 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -0700811 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700812 }
813 }
814
David Tolnay1218e122017-06-01 11:13:45 -0700815 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700816}
817
David Tolnay744a6b82017-06-01 11:34:29 -0700818fn int(input: &str) -> PResult<()> {
819 let (rest, ()) = digits(input)?;
820 for suffix in &[
821 "isize",
822 "i8",
823 "i16",
824 "i32",
825 "i64",
826 "i128",
827 "usize",
828 "u8",
829 "u16",
830 "u32",
831 "u64",
832 "u128",
833 ] {
834 if rest.starts_with(suffix) {
835 return word_break(&rest[suffix.len()..]);
836 }
837 }
838 word_break(rest)
839}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700840
David Tolnay1218e122017-06-01 11:13:45 -0700841fn digits(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700842 let base = if input.starts_with("0x") {
843 input = &input[2..];
844 16
845 } else if input.starts_with("0o") {
846 input = &input[2..];
847 8
848 } else if input.starts_with("0b") {
849 input = &input[2..];
850 2
851 } else {
852 10
853 };
854
Alex Crichton44bffbc2017-05-19 17:51:59 -0700855 let mut len = 0;
856 let mut empty = true;
857 for b in input.bytes() {
858 let digit = match b {
859 b'0'...b'9' => (b - b'0') as u64,
860 b'a'...b'f' => 10 + (b - b'a') as u64,
861 b'A'...b'F' => 10 + (b - b'A') as u64,
862 b'_' => {
863 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -0700864 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700865 }
866 len += 1;
867 continue;
868 }
869 _ => break,
870 };
871 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -0700872 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700873 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700874 len += 1;
875 empty = false;
876 }
877 if empty {
David Tolnay1218e122017-06-01 11:13:45 -0700878 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700879 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700880 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700881 }
882}
883
884named!(boolean -> (), alt!(
885 keyword!("true") => { |_| () }
886 |
887 keyword!("false") => { |_| () }
888));
889
Alex Crichton1a7f7622017-07-05 17:47:15 -0700890fn op(input: &str) -> PResult<(char, Spacing)> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700891 let input = skip_whitespace(input);
892 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700893 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -0700894 let kind = match op_char(rest) {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700895 Ok(_) => Spacing::Joint,
896 Err(LexError) => Spacing::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -0700897 };
David Tolnay1218e122017-06-01 11:13:45 -0700898 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -0700899 }
David Tolnay1218e122017-06-01 11:13:45 -0700900 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700901 }
902}
903
David Tolnay1218e122017-06-01 11:13:45 -0700904fn op_char(input: &str) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700905 let mut chars = input.chars();
906 let first = match chars.next() {
907 Some(ch) => ch,
908 None => {
David Tolnay1218e122017-06-01 11:13:45 -0700909 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -0700910 }
911 };
912 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
913 if recognized.contains(first) {
David Tolnay1218e122017-06-01 11:13:45 -0700914 Ok((chars.as_str(), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700915 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700916 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700917 }
918}
919
Alex Crichton44bffbc2017-05-19 17:51:59 -0700920named!(doc_comment -> (), alt!(
921 do_parse!(
922 punct!("//!") >>
923 take_until!("\n") >>
924 (())
925 )
926 |
927 do_parse!(
928 option!(whitespace) >>
929 peek!(tag!("/*!")) >>
930 block_comment >>
931 (())
932 )
933 |
934 do_parse!(
935 punct!("///") >>
936 not!(tag!("/")) >>
937 take_until!("\n") >>
938 (())
939 )
940 |
941 do_parse!(
942 option!(whitespace) >>
943 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
944 block_comment >>
945 (())
946 )
947));