blob: 26b2686b09b401055808a31699a652fe43439bbe [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::fmt;
6use std::iter;
David Tolnay041bcd42017-06-03 09:18:04 -07007use std::marker::PhantomData;
Alex Crichton44bffbc2017-05-19 17:51:59 -07008use std::ops;
9use std::rc::Rc;
10use std::str::FromStr;
11use std::vec;
12
13use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070014use unicode_xid::UnicodeXID;
David Tolnay744a6b82017-06-01 11:34:29 -070015use strnom::{PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070016
17use {TokenTree, TokenKind, Delimiter, OpKind};
18
David Tolnay977f8282017-05-31 17:41:33 -070019#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070020pub struct TokenStream {
21 inner: Vec<TokenTree>,
22}
23
24#[derive(Debug)]
25pub struct LexError;
26
27impl TokenStream {
28 pub fn empty() -> TokenStream {
29 TokenStream { inner: Vec::new() }
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.inner.len() == 0
34 }
35}
36
37impl FromStr for TokenStream {
38 type Err = LexError;
39
40 fn from_str(src: &str) -> Result<TokenStream, LexError> {
41 match token_stream(src) {
David Tolnay1218e122017-06-01 11:13:45 -070042 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070043 if skip_whitespace(input).len() != 0 {
44 Err(LexError)
45 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070046 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070047 }
48 }
David Tolnay1218e122017-06-01 11:13:45 -070049 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070050 }
51 }
52}
53
54impl fmt::Display for TokenStream {
55 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56 let mut joint = false;
57 for (i, tt) in self.inner.iter().enumerate() {
58 if i != 0 && !joint {
59 write!(f, " ")?;
60 }
61 joint = false;
62 match tt.kind {
63 TokenKind::Sequence(delim, ref stream) => {
64 let (start, end) = match delim {
65 Delimiter::Parenthesis => ("(", ")"),
66 Delimiter::Brace => ("{", "}"),
67 Delimiter::Bracket => ("[", "]"),
68 Delimiter::None => ("", ""),
69 };
Alex Crichton852d53d2017-05-19 19:25:08 -070070 if stream.0.inner.len() == 0 {
71 write!(f, "{} {}", start, end)?
72 } else {
73 write!(f, "{} {} {}", start, stream, end)?
74 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070075 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -070076 TokenKind::Word(ref sym) => write!(f, "{}", sym.as_str())?,
Alex Crichton44bffbc2017-05-19 17:51:59 -070077 TokenKind::Op(ch, ref op) => {
78 write!(f, "{}", ch)?;
79 match *op {
80 OpKind::Alone => {}
81 OpKind::Joint => joint = true,
82 }
83 }
84 TokenKind::Literal(ref literal) => {
85 write!(f, "{}", literal)?;
86 // handle comments
87 if (literal.0).0.starts_with("/") {
88 write!(f, "\n")?;
89 }
90 }
91 }
92 }
93
94 Ok(())
95 }
96}
97
98impl From<proc_macro::TokenStream> for TokenStream {
99 fn from(inner: proc_macro::TokenStream) -> TokenStream {
100 inner.to_string().parse().expect("compiler token stream parse failed")
101 }
102}
103
104impl From<TokenStream> for proc_macro::TokenStream {
105 fn from(inner: TokenStream) -> proc_macro::TokenStream {
106 inner.to_string().parse().expect("failed to parse to compiler tokens")
107 }
108}
109
110
111impl From<TokenTree> for TokenStream {
112 fn from(tree: TokenTree) -> TokenStream {
113 TokenStream { inner: vec![tree] }
114 }
115}
116
117impl iter::FromIterator<TokenStream> for TokenStream {
118 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
119 let mut v = Vec::new();
120
121 for stream in streams.into_iter() {
122 v.extend(stream.inner);
123 }
124
125 TokenStream { inner: v }
126 }
127}
128
129pub type TokenIter = vec::IntoIter<TokenTree>;
130
131impl IntoIterator for TokenStream {
132 type Item = TokenTree;
133 type IntoIter = TokenIter;
134
135 fn into_iter(self) -> TokenIter {
136 self.inner.into_iter()
137 }
138}
139
David Tolnay977f8282017-05-31 17:41:33 -0700140#[derive(Clone, Copy, Default, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700141pub struct Span;
142
143impl Span {
144 pub fn call_site() -> Span {
145 Span
146 }
147}
148
David Tolnay977f8282017-05-31 17:41:33 -0700149#[derive(Copy, Clone, Debug)]
David Tolnay041bcd42017-06-03 09:18:04 -0700150pub struct Symbol {
151 intern: usize,
152 not_send_sync: PhantomData<*const ()>,
153}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700154
155thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
156
157impl<'a> From<&'a str> for Symbol {
158 fn from(string: &'a str) -> Symbol {
David Tolnay041bcd42017-06-03 09:18:04 -0700159 Symbol {
160 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
161 not_send_sync: PhantomData,
162 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700163 }
164}
165
166impl ops::Deref for Symbol {
167 type Target = str;
168
169 fn deref(&self) -> &str {
170 SYMBOLS.with(|interner| {
171 let interner = interner.borrow();
David Tolnay041bcd42017-06-03 09:18:04 -0700172 let s = interner.get(self.intern);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700173 unsafe {
174 &*(s as *const str)
175 }
176 })
177 }
178}
179
180struct Interner {
181 string_to_index: HashMap<MyRc, usize>,
182 index_to_string: Vec<Rc<String>>,
183}
184
185#[derive(Hash, Eq, PartialEq)]
186struct MyRc(Rc<String>);
187
188impl Borrow<str> for MyRc {
189 fn borrow(&self) -> &str {
190 &self.0
191 }
192}
193
194impl Interner {
195 fn new() -> Interner {
196 Interner {
197 string_to_index: HashMap::new(),
198 index_to_string: Vec::new(),
199 }
200 }
201
202 fn intern(&mut self, s: &str) -> usize {
203 if let Some(&idx) = self.string_to_index.get(s) {
204 return idx
205 }
206 let s = Rc::new(s.to_string());
207 self.index_to_string.push(s.clone());
208 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
209 self.index_to_string.len() - 1
210 }
211
212 fn get(&self, idx: usize) -> &str {
213 &self.index_to_string[idx]
214 }
215}
216
David Tolnay977f8282017-05-31 17:41:33 -0700217#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700218pub struct Literal(String);
219
Alex Crichton852d53d2017-05-19 19:25:08 -0700220impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700221 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700222 match byte {
223 0 => Literal(format!("b'\\0'")),
224 b'\"' => Literal(format!("b'\"'")),
225 n => {
226 let mut escaped = "b'".to_string();
227 escaped.extend(ascii::escape_default(n).map(|c| c as char));
228 escaped.push('\'');
229 Literal(escaped)
230 }
231 }
232 }
233
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700234 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700235 let mut escaped = "b\"".to_string();
236 for b in bytes {
237 match *b {
238 b'\0' => escaped.push_str(r"\0"),
239 b'\t' => escaped.push_str(r"\t"),
240 b'\n' => escaped.push_str(r"\n"),
241 b'\r' => escaped.push_str(r"\r"),
242 b'"' => escaped.push_str("\\\""),
243 b'\\' => escaped.push_str("\\\\"),
244 b'\x20' ... b'\x7E' => escaped.push(*b as char),
245 _ => escaped.push_str(&format!("\\x{:02X}", b)),
246 }
247 }
248 escaped.push('"');
249 Literal(escaped)
250 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700251
252 pub fn doccomment(s: &str) -> Literal {
253 Literal(s.to_string())
254 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700255
256 pub fn float(s: &str) -> Literal {
257 Literal(s.to_string())
258 }
259
260 pub fn integer(s: &str) -> Literal {
261 Literal(s.to_string())
262 }
Alex Crichton31316622017-05-26 12:54:47 -0700263
264 pub fn raw_string(s: &str, pounds: usize) -> Literal {
265 let mut ret = format!("r");
266 ret.extend((0..pounds).map(|_| "#"));
267 ret.push('"');
268 ret.push_str(s);
269 ret.push('"');
270 ret.extend((0..pounds).map(|_| "#"));
271 Literal(ret)
272 }
273
274 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700275 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700276 ret.extend((0..pounds).map(|_| "#"));
277 ret.push('"');
278 ret.push_str(s);
279 ret.push('"');
280 ret.extend((0..pounds).map(|_| "#"));
281 Literal(ret)
282 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700283}
284
Alex Crichton44bffbc2017-05-19 17:51:59 -0700285impl fmt::Display for Literal {
286 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
287 self.0.fmt(f)
288 }
289}
290
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700291macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700292 ($($t:ty,)*) => {$(
293 impl From<$t> for Literal {
294 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700295 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700296 }
297 }
298 )*}
299}
300
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700301ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700302 u8, u16, u32, u64, usize,
303 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700304}
305
306macro_rules! floats {
307 ($($t:ty,)*) => {$(
308 impl From<$t> for Literal {
309 fn from(t: $t) -> Literal {
310 assert!(!t.is_nan());
311 assert!(!t.is_infinite());
312 Literal(format!(concat!("{}", stringify!($t)), t))
313 }
314 }
315 )*}
316}
317
318floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700319 f32, f64,
320}
321
Alex Crichton44bffbc2017-05-19 17:51:59 -0700322impl<'a> From<&'a str> for Literal {
323 fn from(t: &'a str) -> Literal {
324 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
325 s.push('"');
326 s.insert(0, '"');
327 Literal(s)
328 }
329}
330
331impl From<char> for Literal {
332 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700333 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700334 }
335}
336
David Tolnay8e976c62017-06-01 12:12:29 -0700337named!(token_stream -> ::TokenStream, map!(
338 many0!(token_tree),
339 |trees| ::TokenStream(TokenStream { inner: trees })
340));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700341
342named!(token_tree -> TokenTree,
343 map!(token_kind, |s: TokenKind| {
344 TokenTree {
345 span: ::Span(Span),
346 kind: s,
347 }
348 }));
349
350named!(token_kind -> TokenKind, alt!(
David Tolnay8e976c62017-06-01 12:12:29 -0700351 map!(delimited, |(d, s)| TokenKind::Sequence(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700352 |
David Tolnay8e976c62017-06-01 12:12:29 -0700353 map!(literal, TokenKind::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700354 |
David Tolnay8e976c62017-06-01 12:12:29 -0700355 map!(symbol, TokenKind::Word)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700356 |
David Tolnay643177b2017-06-01 12:02:44 -0700357 map!(op, |(op, kind)| TokenKind::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700358));
359
David Tolnay8e976c62017-06-01 12:12:29 -0700360named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700361 delimited!(
362 punct!("("),
363 token_stream,
364 punct!(")")
365 ) => { |ts| (Delimiter::Parenthesis, ts) }
366 |
367 delimited!(
368 punct!("["),
369 token_stream,
370 punct!("]")
371 ) => { |ts| (Delimiter::Bracket, ts) }
372 |
373 delimited!(
374 punct!("{"),
375 token_stream,
376 punct!("}")
377 ) => { |ts| (Delimiter::Brace, ts) }
378));
379
David Tolnaya202d502017-06-01 12:26:55 -0700380fn symbol(mut input: &str) -> PResult<::Symbol> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700381 input = skip_whitespace(input);
382
383 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700384
385 let lifetime = input.starts_with("'");
386 if lifetime {
387 chars.next();
388 }
389
Alex Crichton44bffbc2017-05-19 17:51:59 -0700390 match chars.next() {
391 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700392 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700393 }
394
David Tolnay214c94c2017-06-01 12:42:56 -0700395 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700396 for (i, ch) in chars {
397 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700398 end = i;
399 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700400 }
401 }
402
David Tolnay214c94c2017-06-01 12:42:56 -0700403 if lifetime && &input[..end] != "'static" && KEYWORDS.contains(&&input[1..end]) {
404 Err(LexError)
405 } else {
406 Ok((&input[end..], input[..end].into()))
407 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700408}
409
David Tolnay214c94c2017-06-01 12:42:56 -0700410// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
411static KEYWORDS: &'static [&'static str] = &[
412 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
413 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
414 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
415 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
416 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
417 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
418 "yield",
419];
420
David Tolnay8e976c62017-06-01 12:12:29 -0700421fn literal(input: &str) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700422 let input_no_ws = skip_whitespace(input);
423
424 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700425 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700426 let start = input.len() - input_no_ws.len();
427 let len = input_no_ws.len() - a.len();
428 let end = start + len;
David Tolnay8e976c62017-06-01 12:12:29 -0700429 Ok((a, ::Literal(Literal(input[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700430 }
David Tolnay1218e122017-06-01 11:13:45 -0700431 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700432 }
433}
434
435named!(literal_nocapture -> (), alt!(
436 string
437 |
438 byte_string
439 |
440 byte
441 |
442 character
443 |
444 float
445 |
446 int
447 |
448 boolean
449 |
450 doc_comment
451));
452
453named!(string -> (), alt!(
454 quoted_string
455 |
456 preceded!(
457 punct!("r"),
458 raw_string
459 ) => { |_| () }
460));
461
462named!(quoted_string -> (), delimited!(
463 punct!("\""),
464 cooked_string,
465 tag!("\"")
466));
467
David Tolnay1218e122017-06-01 11:13:45 -0700468fn cooked_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700469 let mut chars = input.char_indices().peekable();
470 while let Some((byte_offset, ch)) = chars.next() {
471 match ch {
472 '"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700473 return Ok((&input[byte_offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700474 }
475 '\r' => {
476 if let Some((_, '\n')) = chars.next() {
477 // ...
478 } else {
479 break;
480 }
481 }
482 '\\' => {
483 match chars.next() {
484 Some((_, 'x')) => {
485 if !backslash_x_char(&mut chars) {
486 break
487 }
488 }
489 Some((_, 'n')) |
490 Some((_, 'r')) |
491 Some((_, 't')) |
492 Some((_, '\\')) |
493 Some((_, '\'')) |
494 Some((_, '"')) |
495 Some((_, '0')) => {}
496 Some((_, 'u')) => {
497 if !backslash_u(&mut chars) {
498 break
499 }
500 }
501 Some((_, '\n')) | Some((_, '\r')) => {
502 while let Some(&(_, ch)) = chars.peek() {
503 if ch.is_whitespace() {
504 chars.next();
505 } else {
506 break;
507 }
508 }
509 }
510 _ => break,
511 }
512 }
513 _ch => {}
514 }
515 }
David Tolnay1218e122017-06-01 11:13:45 -0700516 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700517}
518
519named!(byte_string -> (), alt!(
520 delimited!(
521 punct!("b\""),
522 cooked_byte_string,
523 tag!("\"")
524 ) => { |_| () }
525 |
526 preceded!(
527 punct!("br"),
528 raw_string
529 ) => { |_| () }
530));
531
David Tolnay1218e122017-06-01 11:13:45 -0700532fn cooked_byte_string(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700533 let mut bytes = input.bytes().enumerate();
534 'outer: while let Some((offset, b)) = bytes.next() {
535 match b {
536 b'"' => {
David Tolnay1218e122017-06-01 11:13:45 -0700537 return Ok((&input[offset..], ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700538 }
539 b'\r' => {
540 if let Some((_, b'\n')) = bytes.next() {
541 // ...
542 } else {
543 break;
544 }
545 }
546 b'\\' => {
547 match bytes.next() {
548 Some((_, b'x')) => {
549 if !backslash_x_byte(&mut bytes) {
550 break
551 }
552 }
553 Some((_, b'n')) |
554 Some((_, b'r')) |
555 Some((_, b't')) |
556 Some((_, b'\\')) |
557 Some((_, b'0')) |
558 Some((_, b'\'')) |
559 Some((_, b'"')) => {}
560 Some((newline, b'\n')) |
561 Some((newline, b'\r')) => {
562 let rest = &input[newline + 1..];
563 for (offset, ch) in rest.char_indices() {
564 if !ch.is_whitespace() {
565 input = &rest[offset..];
566 bytes = input.bytes().enumerate();
567 continue 'outer;
568 }
569 }
570 break;
571 }
572 _ => break,
573 }
574 }
575 b if b < 0x80 => {}
576 _ => break,
577 }
578 }
David Tolnay1218e122017-06-01 11:13:45 -0700579 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700580}
581
David Tolnay1218e122017-06-01 11:13:45 -0700582fn raw_string(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700583 let mut chars = input.char_indices();
584 let mut n = 0;
585 while let Some((byte_offset, ch)) = chars.next() {
586 match ch {
587 '"' => {
588 n = byte_offset;
589 break;
590 }
591 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700592 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700593 }
594 }
595 for (byte_offset, ch) in chars {
596 match ch {
597 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
598 let rest = &input[byte_offset + 1 + n..];
David Tolnay1218e122017-06-01 11:13:45 -0700599 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700600 }
601 '\r' => {}
602 _ => {}
603 }
604 }
David Tolnay1218e122017-06-01 11:13:45 -0700605 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700606}
607
608named!(byte -> (), do_parse!(
609 punct!("b") >>
610 tag!("'") >>
611 cooked_byte >>
612 tag!("'") >>
613 (())
614));
615
David Tolnay1218e122017-06-01 11:13:45 -0700616fn cooked_byte(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700617 let mut bytes = input.bytes().enumerate();
618 let ok = match bytes.next().map(|(_, b)| b) {
619 Some(b'\\') => {
620 match bytes.next().map(|(_, b)| b) {
621 Some(b'x') => backslash_x_byte(&mut bytes),
622 Some(b'n') |
623 Some(b'r') |
624 Some(b't') |
625 Some(b'\\') |
626 Some(b'0') |
627 Some(b'\'') |
628 Some(b'"') => true,
629 _ => false,
630 }
631 }
632 b => b.is_some(),
633 };
634 if ok {
635 match bytes.next() {
David Tolnay1218e122017-06-01 11:13:45 -0700636 Some((offset, _)) => Ok((&input[offset..], ())),
637 None => Ok(("", ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700638 }
639 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700640 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700641 }
642}
643
644named!(character -> (), do_parse!(
645 punct!("'") >>
646 cooked_char >>
647 tag!("'") >>
648 (())
649));
650
David Tolnay1218e122017-06-01 11:13:45 -0700651fn cooked_char(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700652 let mut chars = input.char_indices();
653 let ok = match chars.next().map(|(_, ch)| ch) {
654 Some('\\') => {
655 match chars.next().map(|(_, ch)| ch) {
656 Some('x') => backslash_x_char(&mut chars),
657 Some('u') => backslash_u(&mut chars),
658 Some('n') |
659 Some('r') |
660 Some('t') |
661 Some('\\') |
662 Some('0') |
663 Some('\'') |
664 Some('"') => true,
665 _ => false,
666 }
667 }
668 ch => ch.is_some(),
669 };
670 if ok {
David Tolnay1218e122017-06-01 11:13:45 -0700671 Ok((chars.as_str(), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700672 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700673 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700674 }
675}
676
677macro_rules! next_ch {
678 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
679 match $chars.next() {
680 Some((_, ch)) => match ch {
681 $pat $(| $rest)* => ch,
682 _ => return false,
683 },
684 None => return false
685 }
686 };
687}
688
689fn backslash_x_char<I>(chars: &mut I) -> bool
690 where I: Iterator<Item = (usize, char)>
691{
692 next_ch!(chars @ '0'...'7');
693 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
694 true
695}
696
697fn backslash_x_byte<I>(chars: &mut I) -> bool
698 where I: Iterator<Item = (usize, u8)>
699{
700 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
701 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
702 true
703}
704
705fn backslash_u<I>(chars: &mut I) -> bool
706 where I: Iterator<Item = (usize, char)>
707{
708 next_ch!(chars @ '{');
709 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
710 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
711 if b == '}' {
712 return true
713 }
714 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
715 if c == '}' {
716 return true
717 }
718 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
719 if d == '}' {
720 return true
721 }
722 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
723 if e == '}' {
724 return true
725 }
726 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
727 if f == '}' {
728 return true
729 }
730 next_ch!(chars @ '}');
731 true
732}
733
David Tolnay744a6b82017-06-01 11:34:29 -0700734fn float(input: &str) -> PResult<()> {
735 let (rest, ()) = float_digits(input)?;
736 for suffix in &["f32", "f64"] {
737 if rest.starts_with(suffix) {
738 return word_break(&rest[suffix.len()..]);
739 }
740 }
741 word_break(rest)
742}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700743
David Tolnay744a6b82017-06-01 11:34:29 -0700744fn float_digits(input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700745 let mut chars = input.chars().peekable();
746 match chars.next() {
747 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700748 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700749 }
750
751 let mut len = 1;
752 let mut has_dot = false;
753 let mut has_exp = false;
754 while let Some(&ch) = chars.peek() {
755 match ch {
756 '0'...'9' | '_' => {
757 chars.next();
758 len += 1;
759 }
760 '.' => {
761 if has_dot {
762 break;
763 }
764 chars.next();
765 if chars.peek()
766 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
767 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700768 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700769 }
770 len += 1;
771 has_dot = true;
772 }
773 'e' | 'E' => {
774 chars.next();
775 len += 1;
776 has_exp = true;
777 break;
778 }
779 _ => break,
780 }
781 }
782
783 let rest = &input[len..];
784 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700785 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700786 }
787
788 if has_exp {
789 let mut has_exp_value = false;
790 while let Some(&ch) = chars.peek() {
791 match ch {
792 '+' | '-' => {
793 if has_exp_value {
794 break;
795 }
796 chars.next();
797 len += 1;
798 }
799 '0'...'9' => {
800 chars.next();
801 len += 1;
802 has_exp_value = true;
803 }
804 '_' => {
805 chars.next();
806 len += 1;
807 }
808 _ => break,
809 }
810 }
811 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -0700812 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700813 }
814 }
815
David Tolnay1218e122017-06-01 11:13:45 -0700816 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700817}
818
David Tolnay744a6b82017-06-01 11:34:29 -0700819fn int(input: &str) -> PResult<()> {
820 let (rest, ()) = digits(input)?;
821 for suffix in &[
822 "isize",
823 "i8",
824 "i16",
825 "i32",
826 "i64",
827 "i128",
828 "usize",
829 "u8",
830 "u16",
831 "u32",
832 "u64",
833 "u128",
834 ] {
835 if rest.starts_with(suffix) {
836 return word_break(&rest[suffix.len()..]);
837 }
838 }
839 word_break(rest)
840}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700841
David Tolnay1218e122017-06-01 11:13:45 -0700842fn digits(mut input: &str) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700843 let base = if input.starts_with("0x") {
844 input = &input[2..];
845 16
846 } else if input.starts_with("0o") {
847 input = &input[2..];
848 8
849 } else if input.starts_with("0b") {
850 input = &input[2..];
851 2
852 } else {
853 10
854 };
855
Alex Crichton44bffbc2017-05-19 17:51:59 -0700856 let mut len = 0;
857 let mut empty = true;
858 for b in input.bytes() {
859 let digit = match b {
860 b'0'...b'9' => (b - b'0') as u64,
861 b'a'...b'f' => 10 + (b - b'a') as u64,
862 b'A'...b'F' => 10 + (b - b'A') as u64,
863 b'_' => {
864 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -0700865 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700866 }
867 len += 1;
868 continue;
869 }
870 _ => break,
871 };
872 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -0700873 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700874 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700875 len += 1;
876 empty = false;
877 }
878 if empty {
David Tolnay1218e122017-06-01 11:13:45 -0700879 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700880 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700881 Ok((&input[len..], ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700882 }
883}
884
885named!(boolean -> (), alt!(
886 keyword!("true") => { |_| () }
887 |
888 keyword!("false") => { |_| () }
889));
890
David Tolnay1218e122017-06-01 11:13:45 -0700891fn op(input: &str) -> PResult<(char, OpKind)> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700892 let input = skip_whitespace(input);
893 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -0700894 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -0700895 let kind = match op_char(rest) {
David Tolnay1218e122017-06-01 11:13:45 -0700896 Ok(_) => OpKind::Joint,
897 Err(LexError) => OpKind::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -0700898 };
David Tolnay1218e122017-06-01 11:13:45 -0700899 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -0700900 }
David Tolnay1218e122017-06-01 11:13:45 -0700901 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700902 }
903}
904
David Tolnay1218e122017-06-01 11:13:45 -0700905fn op_char(input: &str) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -0700906 let mut chars = input.chars();
907 let first = match chars.next() {
908 Some(ch) => ch,
909 None => {
David Tolnay1218e122017-06-01 11:13:45 -0700910 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -0700911 }
912 };
913 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
914 if recognized.contains(first) {
David Tolnay1218e122017-06-01 11:13:45 -0700915 Ok((chars.as_str(), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700916 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700917 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700918 }
919}
920
Alex Crichton44bffbc2017-05-19 17:51:59 -0700921named!(doc_comment -> (), alt!(
922 do_parse!(
923 punct!("//!") >>
924 take_until!("\n") >>
925 (())
926 )
927 |
928 do_parse!(
929 option!(whitespace) >>
930 peek!(tag!("/*!")) >>
931 block_comment >>
932 (())
933 )
934 |
935 do_parse!(
936 punct!("///") >>
937 not!(tag!("/")) >>
938 take_until!("\n") >>
939 (())
940 )
941 |
942 do_parse!(
943 option!(whitespace) >>
944 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
945 block_comment >>
946 (())
947 )
948));