blob: 8b951e520e07bdd956493535b164df039dd60661 [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
Nika Layzellf8d5f212017-12-11 14:07:02 -05004use std::cmp;
Alex Crichton44bffbc2017-05-19 17:51:59 -07005use std::collections::HashMap;
6use std::fmt;
7use std::iter;
David Tolnay041bcd42017-06-03 09:18:04 -07008use std::marker::PhantomData;
Alex Crichton44bffbc2017-05-19 17:51:59 -07009use std::ops;
10use std::rc::Rc;
11use std::str::FromStr;
12use std::vec;
13
Nika Layzellf8d5f212017-12-11 14:07:02 -050014use memchr;
Alex Crichton44bffbc2017-05-19 17:51:59 -070015use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070016use unicode_xid::UnicodeXID;
Nika Layzellf8d5f212017-12-11 14:07:02 -050017use strnom::{Cursor, PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070018
Alex Crichton1a7f7622017-07-05 17:47:15 -070019use {TokenTree, TokenNode, Delimiter, Spacing};
Alex Crichton44bffbc2017-05-19 17:51:59 -070020
David Tolnay977f8282017-05-31 17:41:33 -070021#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070022pub struct TokenStream {
23 inner: Vec<TokenTree>,
24}
25
26#[derive(Debug)]
27pub struct LexError;
28
29impl TokenStream {
30 pub fn empty() -> TokenStream {
31 TokenStream { inner: Vec::new() }
32 }
33
34 pub fn is_empty(&self) -> bool {
35 self.inner.len() == 0
36 }
37}
38
39impl FromStr for TokenStream {
40 type Err = LexError;
41
42 fn from_str(src: &str) -> Result<TokenStream, LexError> {
Nika Layzellf8d5f212017-12-11 14:07:02 -050043 // Create a dummy file & add it to the codemap
44 let cursor = CODEMAP.with(|cm| {
45 let mut cm = cm.borrow_mut();
46 let name = format!("<parsed string {}>", cm.files.len());
47 let span = cm.add_file(&name, src);
48 Cursor {
49 rest: src,
50 off: span.lo,
51 }
52 });
53
54 match token_stream(cursor) {
David Tolnay1218e122017-06-01 11:13:45 -070055 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070056 if skip_whitespace(input).len() != 0 {
57 Err(LexError)
58 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070059 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070060 }
61 }
David Tolnay1218e122017-06-01 11:13:45 -070062 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070063 }
64 }
65}
66
67impl fmt::Display for TokenStream {
68 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
69 let mut joint = false;
70 for (i, tt) in self.inner.iter().enumerate() {
71 if i != 0 && !joint {
72 write!(f, " ")?;
73 }
74 joint = false;
75 match tt.kind {
Alex Crichton1a7f7622017-07-05 17:47:15 -070076 TokenNode::Group(delim, ref stream) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070077 let (start, end) = match delim {
78 Delimiter::Parenthesis => ("(", ")"),
79 Delimiter::Brace => ("{", "}"),
80 Delimiter::Bracket => ("[", "]"),
81 Delimiter::None => ("", ""),
82 };
Alex Crichton852d53d2017-05-19 19:25:08 -070083 if stream.0.inner.len() == 0 {
84 write!(f, "{} {}", start, end)?
85 } else {
86 write!(f, "{} {} {}", start, stream, end)?
87 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070088 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070089 TokenNode::Term(ref sym) => write!(f, "{}", sym.as_str())?,
90 TokenNode::Op(ch, ref op) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070091 write!(f, "{}", ch)?;
92 match *op {
Alex Crichton1a7f7622017-07-05 17:47:15 -070093 Spacing::Alone => {}
94 Spacing::Joint => joint = true,
Alex Crichton44bffbc2017-05-19 17:51:59 -070095 }
96 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070097 TokenNode::Literal(ref literal) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070098 write!(f, "{}", literal)?;
99 // handle comments
100 if (literal.0).0.starts_with("/") {
101 write!(f, "\n")?;
102 }
103 }
104 }
105 }
106
107 Ok(())
108 }
109}
110
111impl From<proc_macro::TokenStream> for TokenStream {
112 fn from(inner: proc_macro::TokenStream) -> TokenStream {
113 inner.to_string().parse().expect("compiler token stream parse failed")
114 }
115}
116
117impl From<TokenStream> for proc_macro::TokenStream {
118 fn from(inner: TokenStream) -> proc_macro::TokenStream {
119 inner.to_string().parse().expect("failed to parse to compiler tokens")
120 }
121}
122
123
124impl From<TokenTree> for TokenStream {
125 fn from(tree: TokenTree) -> TokenStream {
126 TokenStream { inner: vec![tree] }
127 }
128}
129
130impl iter::FromIterator<TokenStream> for TokenStream {
131 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
132 let mut v = Vec::new();
133
134 for stream in streams.into_iter() {
135 v.extend(stream.inner);
136 }
137
138 TokenStream { inner: v }
139 }
140}
141
Alex Crichton1a7f7622017-07-05 17:47:15 -0700142pub type TokenTreeIter = vec::IntoIter<TokenTree>;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700143
144impl IntoIterator for TokenStream {
145 type Item = TokenTree;
Alex Crichton1a7f7622017-07-05 17:47:15 -0700146 type IntoIter = TokenTreeIter;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700147
Alex Crichton1a7f7622017-07-05 17:47:15 -0700148 fn into_iter(self) -> TokenTreeIter {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700149 self.inner.into_iter()
150 }
151}
152
Nika Layzellf8d5f212017-12-11 14:07:02 -0500153#[derive(Clone, PartialEq, Eq)]
154pub struct SourceFile {
155 name: String,
156}
157
158impl SourceFile {
159 /// Get the path to this source file as a string.
160 pub fn as_str(&self) -> &str {
161 &self.name
162 }
163
164 pub fn is_real(&self) -> bool {
165 // XXX(nika): Support real files in the future?
166 false
167 }
168}
169
170impl AsRef<str> for SourceFile {
171 fn as_ref(&self) -> &str {
172 self.as_str()
173 }
174}
175
176impl PartialEq<str> for SourceFile {
177 fn eq(&self, other: &str) -> bool {
178 self.as_ref() == other
179 }
180}
181
182impl fmt::Debug for SourceFile {
183 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
184 f.debug_struct("SourceFile")
185 .field("path", &self.as_str())
186 .field("is_real", &self.is_real())
187 .finish()
188 }
189}
190
191#[derive(Clone, Copy, Debug, PartialEq, Eq)]
192pub struct LineColumn {
193 pub line: usize,
194 pub column: usize,
195}
196
197thread_local! {
198 static CODEMAP: RefCell<Codemap> = RefCell::new(Codemap {
199 // NOTE: We start with a single dummy file which all call_site() and
200 // def_site() spans reference.
201 files: vec![FileInfo {
202 name: "<unspecified>".to_owned(),
203 span: Span { lo: 0, hi: 0 },
204 lines: vec![0],
205 }],
206 });
207}
208
209struct FileInfo {
210 name: String,
211 span: Span,
212 lines: Vec<usize>,
213}
214
215impl FileInfo {
216 fn offset_line_column(&self, offset: usize) -> LineColumn {
217 assert!(self.span_within(Span { lo: offset as u32, hi: offset as u32 }));
218 let offset = offset - self.span.lo as usize;
219 match self.lines.binary_search(&offset) {
220 Ok(found) => LineColumn {
221 line: found + 1,
222 column: 0
223 },
224 Err(idx) => LineColumn {
225 line: idx,
226 column: offset - self.lines[idx - 1]
227 },
228 }
229 }
230
231 fn span_within(&self, span: Span) -> bool {
232 span.lo >= self.span.lo && span.hi <= self.span.hi
233 }
234}
235
236/// Computes the offsets of each line in the given source string.
237fn lines_offsets(s: &[u8]) -> Vec<usize> {
238 let mut lines = vec![0];
239 let mut prev = 0;
240 while let Some(len) = memchr::memchr(b'\n', &s[prev..]) {
241 prev += len + 1;
242 lines.push(prev);
243 }
244 lines
245}
246
247struct Codemap {
248 files: Vec<FileInfo>,
249}
250
251impl Codemap {
252 fn next_start_pos(&self) -> u32 {
253 // Add 1 so there's always space between files.
254 //
255 // We'll always have at least 1 file, as we initialize our files list
256 // with a dummy file.
257 self.files.last().unwrap().span.hi + 1
258 }
259
260 fn add_file(&mut self, name: &str, src: &str) -> Span {
261 let lines = lines_offsets(src.as_bytes());
262 let lo = self.next_start_pos();
263 // XXX(nika): Shouild we bother doing a checked cast or checked add here?
264 let span = Span { lo: lo, hi: lo + (src.len() as u32) };
265
266 self.files.push(FileInfo {
267 name: name.to_owned(),
268 span: span,
269 lines: lines,
270 });
271
272 span
273 }
274
275 fn fileinfo(&self, span: Span) -> &FileInfo {
276 for file in &self.files {
277 if file.span_within(span) {
278 return file;
279 }
280 }
281 panic!("Invalid span with no related FileInfo!");
282 }
283}
284
Alex Crichtone6085b72017-11-21 07:24:25 -0800285#[derive(Clone, Copy, Debug)]
Nika Layzellf8d5f212017-12-11 14:07:02 -0500286pub struct Span { lo: u32, hi: u32 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700287
288impl Span {
289 pub fn call_site() -> Span {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500290 Span { lo: 0, hi: 0 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700291 }
Alex Crichtone6085b72017-11-21 07:24:25 -0800292
293 pub fn def_site() -> Span {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500294 Span { lo: 0, hi: 0 }
295 }
296
297 pub fn source_file(&self) -> SourceFile {
298 CODEMAP.with(|cm| {
299 let cm = cm.borrow();
300 let fi = cm.fileinfo(*self);
301 SourceFile {
302 name: fi.name.clone(),
303 }
304 })
305 }
306
307 pub fn start(&self) -> LineColumn {
308 CODEMAP.with(|cm| {
309 let cm = cm.borrow();
310 let fi = cm.fileinfo(*self);
311 fi.offset_line_column(self.lo as usize)
312 })
313 }
314
315 pub fn end(&self) -> LineColumn {
316 CODEMAP.with(|cm| {
317 let cm = cm.borrow();
318 let fi = cm.fileinfo(*self);
319 fi.offset_line_column(self.hi as usize)
320 })
321 }
322
323 pub fn join(&self, other: Span) -> Option<Span> {
324 CODEMAP.with(|cm| {
325 let cm = cm.borrow();
326 // If `other` is not within the same FileInfo as us, return None.
327 if !cm.fileinfo(*self).span_within(other) {
328 return None;
329 }
330 Some(Span {
331 lo: cmp::min(self.lo, other.lo),
332 hi: cmp::max(self.hi, other.hi),
333 })
334 })
Alex Crichtone6085b72017-11-21 07:24:25 -0800335 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700336}
337
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700338#[derive(Copy, Clone)]
Alex Crichton1a7f7622017-07-05 17:47:15 -0700339pub struct Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700340 intern: usize,
341 not_send_sync: PhantomData<*const ()>,
342}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700343
344thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
345
Alex Crichton1a7f7622017-07-05 17:47:15 -0700346impl<'a> From<&'a str> for Term {
347 fn from(string: &'a str) -> Term {
348 Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700349 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
350 not_send_sync: PhantomData,
351 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700352 }
353}
354
Alex Crichton1a7f7622017-07-05 17:47:15 -0700355impl ops::Deref for Term {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700356 type Target = str;
357
358 fn deref(&self) -> &str {
359 SYMBOLS.with(|interner| {
360 let interner = interner.borrow();
David Tolnay041bcd42017-06-03 09:18:04 -0700361 let s = interner.get(self.intern);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700362 unsafe {
363 &*(s as *const str)
364 }
365 })
366 }
367}
368
Alex Crichton1a7f7622017-07-05 17:47:15 -0700369impl fmt::Debug for Term {
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700370 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700371 f.debug_tuple("Term").field(&&**self).finish()
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700372 }
373}
374
Alex Crichton44bffbc2017-05-19 17:51:59 -0700375struct Interner {
376 string_to_index: HashMap<MyRc, usize>,
377 index_to_string: Vec<Rc<String>>,
378}
379
380#[derive(Hash, Eq, PartialEq)]
381struct MyRc(Rc<String>);
382
383impl Borrow<str> for MyRc {
384 fn borrow(&self) -> &str {
385 &self.0
386 }
387}
388
389impl Interner {
390 fn new() -> Interner {
391 Interner {
392 string_to_index: HashMap::new(),
393 index_to_string: Vec::new(),
394 }
395 }
396
397 fn intern(&mut self, s: &str) -> usize {
398 if let Some(&idx) = self.string_to_index.get(s) {
399 return idx
400 }
401 let s = Rc::new(s.to_string());
402 self.index_to_string.push(s.clone());
403 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
404 self.index_to_string.len() - 1
405 }
406
407 fn get(&self, idx: usize) -> &str {
408 &self.index_to_string[idx]
409 }
410}
411
David Tolnay977f8282017-05-31 17:41:33 -0700412#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700413pub struct Literal(String);
414
Alex Crichton852d53d2017-05-19 19:25:08 -0700415impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700416 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700417 match byte {
418 0 => Literal(format!("b'\\0'")),
419 b'\"' => Literal(format!("b'\"'")),
420 n => {
421 let mut escaped = "b'".to_string();
422 escaped.extend(ascii::escape_default(n).map(|c| c as char));
423 escaped.push('\'');
424 Literal(escaped)
425 }
426 }
427 }
428
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700429 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700430 let mut escaped = "b\"".to_string();
431 for b in bytes {
432 match *b {
433 b'\0' => escaped.push_str(r"\0"),
434 b'\t' => escaped.push_str(r"\t"),
435 b'\n' => escaped.push_str(r"\n"),
436 b'\r' => escaped.push_str(r"\r"),
437 b'"' => escaped.push_str("\\\""),
438 b'\\' => escaped.push_str("\\\\"),
439 b'\x20' ... b'\x7E' => escaped.push(*b as char),
440 _ => escaped.push_str(&format!("\\x{:02X}", b)),
441 }
442 }
443 escaped.push('"');
444 Literal(escaped)
445 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700446
447 pub fn doccomment(s: &str) -> Literal {
448 Literal(s.to_string())
449 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700450
Alex Crichton1a7f7622017-07-05 17:47:15 -0700451 pub fn float(s: f64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700452 Literal(s.to_string())
453 }
454
Alex Crichton1a7f7622017-07-05 17:47:15 -0700455 pub fn integer(s: i64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700456 Literal(s.to_string())
457 }
Alex Crichton31316622017-05-26 12:54:47 -0700458
459 pub fn raw_string(s: &str, pounds: usize) -> Literal {
460 let mut ret = format!("r");
461 ret.extend((0..pounds).map(|_| "#"));
462 ret.push('"');
463 ret.push_str(s);
464 ret.push('"');
465 ret.extend((0..pounds).map(|_| "#"));
466 Literal(ret)
467 }
468
469 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700470 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700471 ret.extend((0..pounds).map(|_| "#"));
472 ret.push('"');
473 ret.push_str(s);
474 ret.push('"');
475 ret.extend((0..pounds).map(|_| "#"));
476 Literal(ret)
477 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700478}
479
Alex Crichton44bffbc2017-05-19 17:51:59 -0700480impl fmt::Display for Literal {
481 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
482 self.0.fmt(f)
483 }
484}
485
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700486macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700487 ($($t:ty,)*) => {$(
488 impl From<$t> for Literal {
489 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700490 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700491 }
492 }
493 )*}
494}
495
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700496ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700497 u8, u16, u32, u64, usize,
498 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700499}
500
501macro_rules! floats {
502 ($($t:ty,)*) => {$(
503 impl From<$t> for Literal {
504 fn from(t: $t) -> Literal {
505 assert!(!t.is_nan());
506 assert!(!t.is_infinite());
507 Literal(format!(concat!("{}", stringify!($t)), t))
508 }
509 }
510 )*}
511}
512
513floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700514 f32, f64,
515}
516
Alex Crichton44bffbc2017-05-19 17:51:59 -0700517impl<'a> From<&'a str> for Literal {
518 fn from(t: &'a str) -> Literal {
519 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
520 s.push('"');
521 s.insert(0, '"');
522 Literal(s)
523 }
524}
525
526impl From<char> for Literal {
527 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700528 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700529 }
530}
531
David Tolnay8e976c62017-06-01 12:12:29 -0700532named!(token_stream -> ::TokenStream, map!(
533 many0!(token_tree),
534 |trees| ::TokenStream(TokenStream { inner: trees })
535));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700536
Nika Layzellf8d5f212017-12-11 14:07:02 -0500537fn token_tree(input: Cursor) -> PResult<TokenTree> {
538 let input = skip_whitespace(input);
539 let lo = input.off;
540 let (input, kind) = token_kind(input)?;
541 let hi = input.off;
542 Ok((input, TokenTree {
543 span: ::Span(Span {
544 lo: lo,
545 hi: hi,
546 }),
547 kind: kind,
548 }))
549}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700550
Alex Crichton1a7f7622017-07-05 17:47:15 -0700551named!(token_kind -> TokenNode, alt!(
552 map!(delimited, |(d, s)| TokenNode::Group(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700553 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700554 map!(literal, TokenNode::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700555 |
Alex Crichton52725f72017-08-28 12:20:58 -0700556 symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700557 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700558 map!(op, |(op, kind)| TokenNode::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700559));
560
David Tolnay8e976c62017-06-01 12:12:29 -0700561named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700562 delimited!(
563 punct!("("),
564 token_stream,
565 punct!(")")
566 ) => { |ts| (Delimiter::Parenthesis, ts) }
567 |
568 delimited!(
569 punct!("["),
570 token_stream,
571 punct!("]")
572 ) => { |ts| (Delimiter::Bracket, ts) }
573 |
574 delimited!(
575 punct!("{"),
576 token_stream,
577 punct!("}")
578 ) => { |ts| (Delimiter::Brace, ts) }
579));
580
Nika Layzellf8d5f212017-12-11 14:07:02 -0500581fn symbol(mut input: Cursor) -> PResult<TokenNode> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700582 input = skip_whitespace(input);
583
584 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700585
586 let lifetime = input.starts_with("'");
587 if lifetime {
588 chars.next();
589 }
590
Alex Crichton44bffbc2017-05-19 17:51:59 -0700591 match chars.next() {
592 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700593 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700594 }
595
David Tolnay214c94c2017-06-01 12:42:56 -0700596 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700597 for (i, ch) in chars {
598 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700599 end = i;
600 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700601 }
602 }
603
Nika Layzellf8d5f212017-12-11 14:07:02 -0500604 if lifetime && &input.rest[..end] != "'static" && KEYWORDS.contains(&&input.rest[1..end]) {
David Tolnay214c94c2017-06-01 12:42:56 -0700605 Err(LexError)
606 } else {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500607 let a = &input.rest[..end];
Alex Crichton52725f72017-08-28 12:20:58 -0700608 if a == "_" {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500609 Ok((input.advance(end), TokenNode::Op('_', Spacing::Alone)))
Alex Crichton52725f72017-08-28 12:20:58 -0700610 } else {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500611 Ok((input.advance(end), TokenNode::Term(::Term::intern(a))))
Alex Crichton52725f72017-08-28 12:20:58 -0700612 }
David Tolnay214c94c2017-06-01 12:42:56 -0700613 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700614}
615
David Tolnay214c94c2017-06-01 12:42:56 -0700616// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
617static KEYWORDS: &'static [&'static str] = &[
618 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
619 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
620 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
621 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
622 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
623 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
624 "yield",
625];
626
Nika Layzellf8d5f212017-12-11 14:07:02 -0500627fn literal(input: Cursor) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700628 let input_no_ws = skip_whitespace(input);
629
630 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700631 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700632 let start = input.len() - input_no_ws.len();
633 let len = input_no_ws.len() - a.len();
634 let end = start + len;
Nika Layzellf8d5f212017-12-11 14:07:02 -0500635 Ok((a, ::Literal(Literal(input.rest[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700636 }
David Tolnay1218e122017-06-01 11:13:45 -0700637 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700638 }
639}
640
641named!(literal_nocapture -> (), alt!(
642 string
643 |
644 byte_string
645 |
646 byte
647 |
648 character
649 |
650 float
651 |
652 int
653 |
654 boolean
655 |
656 doc_comment
657));
658
659named!(string -> (), alt!(
660 quoted_string
661 |
662 preceded!(
663 punct!("r"),
664 raw_string
665 ) => { |_| () }
666));
667
668named!(quoted_string -> (), delimited!(
669 punct!("\""),
670 cooked_string,
671 tag!("\"")
672));
673
Nika Layzellf8d5f212017-12-11 14:07:02 -0500674fn cooked_string(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700675 let mut chars = input.char_indices().peekable();
676 while let Some((byte_offset, ch)) = chars.next() {
677 match ch {
678 '"' => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500679 return Ok((input.advance(byte_offset), ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700680 }
681 '\r' => {
682 if let Some((_, '\n')) = chars.next() {
683 // ...
684 } else {
685 break;
686 }
687 }
688 '\\' => {
689 match chars.next() {
690 Some((_, 'x')) => {
691 if !backslash_x_char(&mut chars) {
692 break
693 }
694 }
695 Some((_, 'n')) |
696 Some((_, 'r')) |
697 Some((_, 't')) |
698 Some((_, '\\')) |
699 Some((_, '\'')) |
700 Some((_, '"')) |
701 Some((_, '0')) => {}
702 Some((_, 'u')) => {
703 if !backslash_u(&mut chars) {
704 break
705 }
706 }
707 Some((_, '\n')) | Some((_, '\r')) => {
708 while let Some(&(_, ch)) = chars.peek() {
709 if ch.is_whitespace() {
710 chars.next();
711 } else {
712 break;
713 }
714 }
715 }
716 _ => break,
717 }
718 }
719 _ch => {}
720 }
721 }
David Tolnay1218e122017-06-01 11:13:45 -0700722 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700723}
724
725named!(byte_string -> (), alt!(
726 delimited!(
727 punct!("b\""),
728 cooked_byte_string,
729 tag!("\"")
730 ) => { |_| () }
731 |
732 preceded!(
733 punct!("br"),
734 raw_string
735 ) => { |_| () }
736));
737
Nika Layzellf8d5f212017-12-11 14:07:02 -0500738fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700739 let mut bytes = input.bytes().enumerate();
740 'outer: while let Some((offset, b)) = bytes.next() {
741 match b {
742 b'"' => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500743 return Ok((input.advance(offset), ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700744 }
745 b'\r' => {
746 if let Some((_, b'\n')) = bytes.next() {
747 // ...
748 } else {
749 break;
750 }
751 }
752 b'\\' => {
753 match bytes.next() {
754 Some((_, b'x')) => {
755 if !backslash_x_byte(&mut bytes) {
756 break
757 }
758 }
759 Some((_, b'n')) |
760 Some((_, b'r')) |
761 Some((_, b't')) |
762 Some((_, b'\\')) |
763 Some((_, b'0')) |
764 Some((_, b'\'')) |
765 Some((_, b'"')) => {}
766 Some((newline, b'\n')) |
767 Some((newline, b'\r')) => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500768 let rest = input.advance(newline + 1);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700769 for (offset, ch) in rest.char_indices() {
770 if !ch.is_whitespace() {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500771 input = rest.advance(offset);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700772 bytes = input.bytes().enumerate();
773 continue 'outer;
774 }
775 }
776 break;
777 }
778 _ => break,
779 }
780 }
781 b if b < 0x80 => {}
782 _ => break,
783 }
784 }
David Tolnay1218e122017-06-01 11:13:45 -0700785 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700786}
787
Nika Layzellf8d5f212017-12-11 14:07:02 -0500788fn raw_string(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700789 let mut chars = input.char_indices();
790 let mut n = 0;
791 while let Some((byte_offset, ch)) = chars.next() {
792 match ch {
793 '"' => {
794 n = byte_offset;
795 break;
796 }
797 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700798 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700799 }
800 }
801 for (byte_offset, ch) in chars {
802 match ch {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500803 '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => {
804 let rest = input.advance(byte_offset + 1 + n);
David Tolnay1218e122017-06-01 11:13:45 -0700805 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700806 }
807 '\r' => {}
808 _ => {}
809 }
810 }
David Tolnay1218e122017-06-01 11:13:45 -0700811 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700812}
813
814named!(byte -> (), do_parse!(
815 punct!("b") >>
816 tag!("'") >>
817 cooked_byte >>
818 tag!("'") >>
819 (())
820));
821
Nika Layzellf8d5f212017-12-11 14:07:02 -0500822fn cooked_byte(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700823 let mut bytes = input.bytes().enumerate();
824 let ok = match bytes.next().map(|(_, b)| b) {
825 Some(b'\\') => {
826 match bytes.next().map(|(_, b)| b) {
827 Some(b'x') => backslash_x_byte(&mut bytes),
828 Some(b'n') |
829 Some(b'r') |
830 Some(b't') |
831 Some(b'\\') |
832 Some(b'0') |
833 Some(b'\'') |
834 Some(b'"') => true,
835 _ => false,
836 }
837 }
838 b => b.is_some(),
839 };
840 if ok {
841 match bytes.next() {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500842 Some((offset, _)) => Ok((input.advance(offset), ())),
843 None => Ok((input.advance(input.len()), ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700844 }
845 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700846 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700847 }
848}
849
850named!(character -> (), do_parse!(
851 punct!("'") >>
852 cooked_char >>
853 tag!("'") >>
854 (())
855));
856
Nika Layzellf8d5f212017-12-11 14:07:02 -0500857fn cooked_char(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700858 let mut chars = input.char_indices();
859 let ok = match chars.next().map(|(_, ch)| ch) {
860 Some('\\') => {
861 match chars.next().map(|(_, ch)| ch) {
862 Some('x') => backslash_x_char(&mut chars),
863 Some('u') => backslash_u(&mut chars),
864 Some('n') |
865 Some('r') |
866 Some('t') |
867 Some('\\') |
868 Some('0') |
869 Some('\'') |
870 Some('"') => true,
871 _ => false,
872 }
873 }
874 ch => ch.is_some(),
875 };
876 if ok {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500877 match chars.next() {
878 Some((idx, _)) => Ok((input.advance(idx), ())),
879 None => Ok((input.advance(input.len()), ())),
880 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700881 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700882 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700883 }
884}
885
886macro_rules! next_ch {
887 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
888 match $chars.next() {
889 Some((_, ch)) => match ch {
890 $pat $(| $rest)* => ch,
891 _ => return false,
892 },
893 None => return false
894 }
895 };
896}
897
898fn backslash_x_char<I>(chars: &mut I) -> bool
899 where I: Iterator<Item = (usize, char)>
900{
901 next_ch!(chars @ '0'...'7');
902 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
903 true
904}
905
906fn backslash_x_byte<I>(chars: &mut I) -> bool
907 where I: Iterator<Item = (usize, u8)>
908{
909 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
910 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
911 true
912}
913
914fn backslash_u<I>(chars: &mut I) -> bool
915 where I: Iterator<Item = (usize, char)>
916{
917 next_ch!(chars @ '{');
918 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
919 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
920 if b == '}' {
921 return true
922 }
923 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
924 if c == '}' {
925 return true
926 }
927 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
928 if d == '}' {
929 return true
930 }
931 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
932 if e == '}' {
933 return true
934 }
935 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
936 if f == '}' {
937 return true
938 }
939 next_ch!(chars @ '}');
940 true
941}
942
Nika Layzellf8d5f212017-12-11 14:07:02 -0500943fn float(input: Cursor) -> PResult<()> {
David Tolnay744a6b82017-06-01 11:34:29 -0700944 let (rest, ()) = float_digits(input)?;
945 for suffix in &["f32", "f64"] {
946 if rest.starts_with(suffix) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500947 return word_break(rest.advance(suffix.len()));
David Tolnay744a6b82017-06-01 11:34:29 -0700948 }
949 }
950 word_break(rest)
951}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700952
Nika Layzellf8d5f212017-12-11 14:07:02 -0500953fn float_digits(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700954 let mut chars = input.chars().peekable();
955 match chars.next() {
956 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700957 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700958 }
959
960 let mut len = 1;
961 let mut has_dot = false;
962 let mut has_exp = false;
963 while let Some(&ch) = chars.peek() {
964 match ch {
965 '0'...'9' | '_' => {
966 chars.next();
967 len += 1;
968 }
969 '.' => {
970 if has_dot {
971 break;
972 }
973 chars.next();
974 if chars.peek()
975 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
976 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700977 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700978 }
979 len += 1;
980 has_dot = true;
981 }
982 'e' | 'E' => {
983 chars.next();
984 len += 1;
985 has_exp = true;
986 break;
987 }
988 _ => break,
989 }
990 }
991
Nika Layzellf8d5f212017-12-11 14:07:02 -0500992 let rest = input.advance(len);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700993 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700994 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700995 }
996
997 if has_exp {
998 let mut has_exp_value = false;
999 while let Some(&ch) = chars.peek() {
1000 match ch {
1001 '+' | '-' => {
1002 if has_exp_value {
1003 break;
1004 }
1005 chars.next();
1006 len += 1;
1007 }
1008 '0'...'9' => {
1009 chars.next();
1010 len += 1;
1011 has_exp_value = true;
1012 }
1013 '_' => {
1014 chars.next();
1015 len += 1;
1016 }
1017 _ => break,
1018 }
1019 }
1020 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -07001021 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001022 }
1023 }
1024
Nika Layzellf8d5f212017-12-11 14:07:02 -05001025 Ok((input.advance(len), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -07001026}
1027
Nika Layzellf8d5f212017-12-11 14:07:02 -05001028fn int(input: Cursor) -> PResult<()> {
David Tolnay744a6b82017-06-01 11:34:29 -07001029 let (rest, ()) = digits(input)?;
1030 for suffix in &[
1031 "isize",
1032 "i8",
1033 "i16",
1034 "i32",
1035 "i64",
1036 "i128",
1037 "usize",
1038 "u8",
1039 "u16",
1040 "u32",
1041 "u64",
1042 "u128",
1043 ] {
1044 if rest.starts_with(suffix) {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001045 return word_break(rest.advance(suffix.len()));
David Tolnay744a6b82017-06-01 11:34:29 -07001046 }
1047 }
1048 word_break(rest)
1049}
Alex Crichton44bffbc2017-05-19 17:51:59 -07001050
Nika Layzellf8d5f212017-12-11 14:07:02 -05001051fn digits(mut input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -07001052 let base = if input.starts_with("0x") {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001053 input = input.advance(2);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001054 16
1055 } else if input.starts_with("0o") {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001056 input = input.advance(2);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001057 8
1058 } else if input.starts_with("0b") {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001059 input = input.advance(2);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001060 2
1061 } else {
1062 10
1063 };
1064
Alex Crichton44bffbc2017-05-19 17:51:59 -07001065 let mut len = 0;
1066 let mut empty = true;
1067 for b in input.bytes() {
1068 let digit = match b {
1069 b'0'...b'9' => (b - b'0') as u64,
1070 b'a'...b'f' => 10 + (b - b'a') as u64,
1071 b'A'...b'F' => 10 + (b - b'A') as u64,
1072 b'_' => {
1073 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -07001074 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001075 }
1076 len += 1;
1077 continue;
1078 }
1079 _ => break,
1080 };
1081 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -07001082 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001083 }
Alex Crichton44bffbc2017-05-19 17:51:59 -07001084 len += 1;
1085 empty = false;
1086 }
1087 if empty {
David Tolnay1218e122017-06-01 11:13:45 -07001088 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -07001089 } else {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001090 Ok((input.advance(len), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -07001091 }
1092}
1093
1094named!(boolean -> (), alt!(
1095 keyword!("true") => { |_| () }
1096 |
1097 keyword!("false") => { |_| () }
1098));
1099
Nika Layzellf8d5f212017-12-11 14:07:02 -05001100fn op(input: Cursor) -> PResult<(char, Spacing)> {
David Tolnayea75c5f2017-05-31 23:40:33 -07001101 let input = skip_whitespace(input);
1102 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -07001103 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -07001104 let kind = match op_char(rest) {
Alex Crichton1a7f7622017-07-05 17:47:15 -07001105 Ok(_) => Spacing::Joint,
1106 Err(LexError) => Spacing::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -07001107 };
David Tolnay1218e122017-06-01 11:13:45 -07001108 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -07001109 }
David Tolnay1218e122017-06-01 11:13:45 -07001110 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -07001111 }
1112}
1113
Nika Layzellf8d5f212017-12-11 14:07:02 -05001114fn op_char(input: Cursor) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -07001115 let mut chars = input.chars();
1116 let first = match chars.next() {
1117 Some(ch) => ch,
1118 None => {
David Tolnay1218e122017-06-01 11:13:45 -07001119 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -07001120 }
1121 };
1122 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
1123 if recognized.contains(first) {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001124 Ok((input.advance(first.len_utf8()), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -07001125 } else {
David Tolnay1218e122017-06-01 11:13:45 -07001126 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -07001127 }
1128}
1129
Alex Crichton44bffbc2017-05-19 17:51:59 -07001130named!(doc_comment -> (), alt!(
1131 do_parse!(
1132 punct!("//!") >>
1133 take_until!("\n") >>
1134 (())
1135 )
1136 |
1137 do_parse!(
1138 option!(whitespace) >>
1139 peek!(tag!("/*!")) >>
1140 block_comment >>
1141 (())
1142 )
1143 |
1144 do_parse!(
1145 punct!("///") >>
1146 not!(tag!("/")) >>
1147 take_until!("\n") >>
1148 (())
1149 )
1150 |
1151 do_parse!(
1152 option!(whitespace) >>
1153 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
1154 block_comment >>
1155 (())
1156 )
1157));