blob: 4bcaf6c219d76bd1dca843d1f6272a9aca362532 [file] [log] [blame]
Alex Crichton76a5cc82017-05-23 07:01:44 -07001use std::ascii;
Alex Crichton44bffbc2017-05-19 17:51:59 -07002use std::borrow::Borrow;
3use std::cell::RefCell;
Nika Layzellf8d5f212017-12-11 14:07:02 -05004use std::cmp;
Alex Crichton44bffbc2017-05-19 17:51:59 -07005use std::collections::HashMap;
6use std::fmt;
7use std::iter;
David Tolnay041bcd42017-06-03 09:18:04 -07008use std::marker::PhantomData;
Alex Crichton44bffbc2017-05-19 17:51:59 -07009use std::ops;
10use std::rc::Rc;
11use std::str::FromStr;
12use std::vec;
13
Nika Layzellf8d5f212017-12-11 14:07:02 -050014use memchr;
Alex Crichton44bffbc2017-05-19 17:51:59 -070015use proc_macro;
David Tolnayb1032662017-05-31 15:52:28 -070016use unicode_xid::UnicodeXID;
Nika Layzellf8d5f212017-12-11 14:07:02 -050017use strnom::{Cursor, PResult, skip_whitespace, block_comment, whitespace, word_break};
Alex Crichton44bffbc2017-05-19 17:51:59 -070018
Alex Crichton1a7f7622017-07-05 17:47:15 -070019use {TokenTree, TokenNode, Delimiter, Spacing};
Alex Crichton44bffbc2017-05-19 17:51:59 -070020
David Tolnay977f8282017-05-31 17:41:33 -070021#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -070022pub struct TokenStream {
23 inner: Vec<TokenTree>,
24}
25
26#[derive(Debug)]
27pub struct LexError;
28
29impl TokenStream {
30 pub fn empty() -> TokenStream {
31 TokenStream { inner: Vec::new() }
32 }
33
34 pub fn is_empty(&self) -> bool {
35 self.inner.len() == 0
36 }
37}
38
39impl FromStr for TokenStream {
40 type Err = LexError;
41
42 fn from_str(src: &str) -> Result<TokenStream, LexError> {
Nika Layzellf8d5f212017-12-11 14:07:02 -050043 // Create a dummy file & add it to the codemap
44 let cursor = CODEMAP.with(|cm| {
45 let mut cm = cm.borrow_mut();
46 let name = format!("<parsed string {}>", cm.files.len());
47 let span = cm.add_file(&name, src);
48 Cursor {
49 rest: src,
50 off: span.lo,
51 }
52 });
53
54 match token_stream(cursor) {
David Tolnay1218e122017-06-01 11:13:45 -070055 Ok((input, output)) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070056 if skip_whitespace(input).len() != 0 {
57 Err(LexError)
58 } else {
David Tolnay8e976c62017-06-01 12:12:29 -070059 Ok(output.0)
Alex Crichton44bffbc2017-05-19 17:51:59 -070060 }
61 }
David Tolnay1218e122017-06-01 11:13:45 -070062 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -070063 }
64 }
65}
66
67impl fmt::Display for TokenStream {
68 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
69 let mut joint = false;
70 for (i, tt) in self.inner.iter().enumerate() {
71 if i != 0 && !joint {
72 write!(f, " ")?;
73 }
74 joint = false;
75 match tt.kind {
Alex Crichton1a7f7622017-07-05 17:47:15 -070076 TokenNode::Group(delim, ref stream) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070077 let (start, end) = match delim {
78 Delimiter::Parenthesis => ("(", ")"),
79 Delimiter::Brace => ("{", "}"),
80 Delimiter::Bracket => ("[", "]"),
81 Delimiter::None => ("", ""),
82 };
Alex Crichton852d53d2017-05-19 19:25:08 -070083 if stream.0.inner.len() == 0 {
84 write!(f, "{} {}", start, end)?
85 } else {
86 write!(f, "{} {} {}", start, stream, end)?
87 }
Alex Crichton44bffbc2017-05-19 17:51:59 -070088 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070089 TokenNode::Term(ref sym) => write!(f, "{}", sym.as_str())?,
90 TokenNode::Op(ch, ref op) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070091 write!(f, "{}", ch)?;
92 match *op {
Alex Crichton1a7f7622017-07-05 17:47:15 -070093 Spacing::Alone => {}
94 Spacing::Joint => joint = true,
Alex Crichton44bffbc2017-05-19 17:51:59 -070095 }
96 }
Alex Crichton1a7f7622017-07-05 17:47:15 -070097 TokenNode::Literal(ref literal) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -070098 write!(f, "{}", literal)?;
99 // handle comments
100 if (literal.0).0.starts_with("/") {
101 write!(f, "\n")?;
102 }
103 }
104 }
105 }
106
107 Ok(())
108 }
109}
110
111impl From<proc_macro::TokenStream> for TokenStream {
112 fn from(inner: proc_macro::TokenStream) -> TokenStream {
113 inner.to_string().parse().expect("compiler token stream parse failed")
114 }
115}
116
117impl From<TokenStream> for proc_macro::TokenStream {
118 fn from(inner: TokenStream) -> proc_macro::TokenStream {
119 inner.to_string().parse().expect("failed to parse to compiler tokens")
120 }
121}
122
123
124impl From<TokenTree> for TokenStream {
125 fn from(tree: TokenTree) -> TokenStream {
126 TokenStream { inner: vec![tree] }
127 }
128}
129
130impl iter::FromIterator<TokenStream> for TokenStream {
131 fn from_iter<I: IntoIterator<Item=TokenStream>>(streams: I) -> Self {
132 let mut v = Vec::new();
133
134 for stream in streams.into_iter() {
135 v.extend(stream.inner);
136 }
137
138 TokenStream { inner: v }
139 }
140}
141
Alex Crichton1a7f7622017-07-05 17:47:15 -0700142pub type TokenTreeIter = vec::IntoIter<TokenTree>;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700143
144impl IntoIterator for TokenStream {
145 type Item = TokenTree;
Alex Crichton1a7f7622017-07-05 17:47:15 -0700146 type IntoIter = TokenTreeIter;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700147
Alex Crichton1a7f7622017-07-05 17:47:15 -0700148 fn into_iter(self) -> TokenTreeIter {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700149 self.inner.into_iter()
150 }
151}
152
Nika Layzellb35a9a32017-12-30 14:34:35 -0500153#[derive(Clone, PartialEq, Eq, Debug)]
154pub struct FileName(String);
155
156impl fmt::Display for FileName {
157 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
158 self.0.fmt(f)
159 }
160}
161
Nika Layzellf8d5f212017-12-11 14:07:02 -0500162#[derive(Clone, PartialEq, Eq)]
163pub struct SourceFile {
Nika Layzellb35a9a32017-12-30 14:34:35 -0500164 name: FileName,
Nika Layzellf8d5f212017-12-11 14:07:02 -0500165}
166
167impl SourceFile {
168 /// Get the path to this source file as a string.
Nika Layzellb35a9a32017-12-30 14:34:35 -0500169 pub fn path(&self) -> &FileName {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500170 &self.name
171 }
172
173 pub fn is_real(&self) -> bool {
174 // XXX(nika): Support real files in the future?
175 false
176 }
177}
178
Nika Layzellb35a9a32017-12-30 14:34:35 -0500179impl AsRef<FileName> for SourceFile {
180 fn as_ref(&self) -> &FileName {
181 self.path()
Nika Layzellf8d5f212017-12-11 14:07:02 -0500182 }
183}
184
185impl fmt::Debug for SourceFile {
186 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
187 f.debug_struct("SourceFile")
Nika Layzellb35a9a32017-12-30 14:34:35 -0500188 .field("path", &self.path())
Nika Layzellf8d5f212017-12-11 14:07:02 -0500189 .field("is_real", &self.is_real())
190 .finish()
191 }
192}
193
194#[derive(Clone, Copy, Debug, PartialEq, Eq)]
195pub struct LineColumn {
196 pub line: usize,
197 pub column: usize,
198}
199
200thread_local! {
201 static CODEMAP: RefCell<Codemap> = RefCell::new(Codemap {
202 // NOTE: We start with a single dummy file which all call_site() and
203 // def_site() spans reference.
204 files: vec![FileInfo {
205 name: "<unspecified>".to_owned(),
206 span: Span { lo: 0, hi: 0 },
207 lines: vec![0],
208 }],
209 });
210}
211
212struct FileInfo {
213 name: String,
214 span: Span,
215 lines: Vec<usize>,
216}
217
218impl FileInfo {
219 fn offset_line_column(&self, offset: usize) -> LineColumn {
220 assert!(self.span_within(Span { lo: offset as u32, hi: offset as u32 }));
221 let offset = offset - self.span.lo as usize;
222 match self.lines.binary_search(&offset) {
223 Ok(found) => LineColumn {
224 line: found + 1,
225 column: 0
226 },
227 Err(idx) => LineColumn {
228 line: idx,
229 column: offset - self.lines[idx - 1]
230 },
231 }
232 }
233
234 fn span_within(&self, span: Span) -> bool {
235 span.lo >= self.span.lo && span.hi <= self.span.hi
236 }
237}
238
239/// Computes the offsets of each line in the given source string.
240fn lines_offsets(s: &[u8]) -> Vec<usize> {
241 let mut lines = vec![0];
242 let mut prev = 0;
243 while let Some(len) = memchr::memchr(b'\n', &s[prev..]) {
244 prev += len + 1;
245 lines.push(prev);
246 }
247 lines
248}
249
250struct Codemap {
251 files: Vec<FileInfo>,
252}
253
254impl Codemap {
255 fn next_start_pos(&self) -> u32 {
256 // Add 1 so there's always space between files.
257 //
258 // We'll always have at least 1 file, as we initialize our files list
259 // with a dummy file.
260 self.files.last().unwrap().span.hi + 1
261 }
262
263 fn add_file(&mut self, name: &str, src: &str) -> Span {
264 let lines = lines_offsets(src.as_bytes());
265 let lo = self.next_start_pos();
266 // XXX(nika): Shouild we bother doing a checked cast or checked add here?
267 let span = Span { lo: lo, hi: lo + (src.len() as u32) };
268
269 self.files.push(FileInfo {
270 name: name.to_owned(),
271 span: span,
272 lines: lines,
273 });
274
275 span
276 }
277
278 fn fileinfo(&self, span: Span) -> &FileInfo {
279 for file in &self.files {
280 if file.span_within(span) {
281 return file;
282 }
283 }
284 panic!("Invalid span with no related FileInfo!");
285 }
286}
287
Alex Crichtone6085b72017-11-21 07:24:25 -0800288#[derive(Clone, Copy, Debug)]
Nika Layzellf8d5f212017-12-11 14:07:02 -0500289pub struct Span { lo: u32, hi: u32 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700290
291impl Span {
292 pub fn call_site() -> Span {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500293 Span { lo: 0, hi: 0 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700294 }
Alex Crichtone6085b72017-11-21 07:24:25 -0800295
296 pub fn def_site() -> Span {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500297 Span { lo: 0, hi: 0 }
298 }
299
300 pub fn source_file(&self) -> SourceFile {
301 CODEMAP.with(|cm| {
302 let cm = cm.borrow();
303 let fi = cm.fileinfo(*self);
304 SourceFile {
Nika Layzellb35a9a32017-12-30 14:34:35 -0500305 name: FileName(fi.name.clone()),
Nika Layzellf8d5f212017-12-11 14:07:02 -0500306 }
307 })
308 }
309
310 pub fn start(&self) -> LineColumn {
311 CODEMAP.with(|cm| {
312 let cm = cm.borrow();
313 let fi = cm.fileinfo(*self);
314 fi.offset_line_column(self.lo as usize)
315 })
316 }
317
318 pub fn end(&self) -> LineColumn {
319 CODEMAP.with(|cm| {
320 let cm = cm.borrow();
321 let fi = cm.fileinfo(*self);
322 fi.offset_line_column(self.hi as usize)
323 })
324 }
325
326 pub fn join(&self, other: Span) -> Option<Span> {
327 CODEMAP.with(|cm| {
328 let cm = cm.borrow();
329 // If `other` is not within the same FileInfo as us, return None.
330 if !cm.fileinfo(*self).span_within(other) {
331 return None;
332 }
333 Some(Span {
334 lo: cmp::min(self.lo, other.lo),
335 hi: cmp::max(self.hi, other.hi),
336 })
337 })
Alex Crichtone6085b72017-11-21 07:24:25 -0800338 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700339}
340
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700341#[derive(Copy, Clone)]
Alex Crichton1a7f7622017-07-05 17:47:15 -0700342pub struct Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700343 intern: usize,
344 not_send_sync: PhantomData<*const ()>,
345}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700346
347thread_local!(static SYMBOLS: RefCell<Interner> = RefCell::new(Interner::new()));
348
Alex Crichton1a7f7622017-07-05 17:47:15 -0700349impl<'a> From<&'a str> for Term {
350 fn from(string: &'a str) -> Term {
351 Term {
David Tolnay041bcd42017-06-03 09:18:04 -0700352 intern: SYMBOLS.with(|s| s.borrow_mut().intern(string)),
353 not_send_sync: PhantomData,
354 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700355 }
356}
357
Alex Crichton1a7f7622017-07-05 17:47:15 -0700358impl ops::Deref for Term {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700359 type Target = str;
360
361 fn deref(&self) -> &str {
362 SYMBOLS.with(|interner| {
363 let interner = interner.borrow();
David Tolnay041bcd42017-06-03 09:18:04 -0700364 let s = interner.get(self.intern);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700365 unsafe {
366 &*(s as *const str)
367 }
368 })
369 }
370}
371
Alex Crichton1a7f7622017-07-05 17:47:15 -0700372impl fmt::Debug for Term {
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700373 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Alex Crichton1a7f7622017-07-05 17:47:15 -0700374 f.debug_tuple("Term").field(&&**self).finish()
David Tolnay8ad3e3e2017-06-03 16:45:00 -0700375 }
376}
377
Alex Crichton44bffbc2017-05-19 17:51:59 -0700378struct Interner {
379 string_to_index: HashMap<MyRc, usize>,
380 index_to_string: Vec<Rc<String>>,
381}
382
383#[derive(Hash, Eq, PartialEq)]
384struct MyRc(Rc<String>);
385
386impl Borrow<str> for MyRc {
387 fn borrow(&self) -> &str {
388 &self.0
389 }
390}
391
392impl Interner {
393 fn new() -> Interner {
394 Interner {
395 string_to_index: HashMap::new(),
396 index_to_string: Vec::new(),
397 }
398 }
399
400 fn intern(&mut self, s: &str) -> usize {
401 if let Some(&idx) = self.string_to_index.get(s) {
402 return idx
403 }
404 let s = Rc::new(s.to_string());
405 self.index_to_string.push(s.clone());
406 self.string_to_index.insert(MyRc(s), self.index_to_string.len() - 1);
407 self.index_to_string.len() - 1
408 }
409
410 fn get(&self, idx: usize) -> &str {
411 &self.index_to_string[idx]
412 }
413}
414
David Tolnay977f8282017-05-31 17:41:33 -0700415#[derive(Clone, Debug)]
Alex Crichton44bffbc2017-05-19 17:51:59 -0700416pub struct Literal(String);
417
Alex Crichton852d53d2017-05-19 19:25:08 -0700418impl Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700419 pub fn byte_char(byte: u8) -> Literal {
Alex Crichton76a5cc82017-05-23 07:01:44 -0700420 match byte {
421 0 => Literal(format!("b'\\0'")),
422 b'\"' => Literal(format!("b'\"'")),
423 n => {
424 let mut escaped = "b'".to_string();
425 escaped.extend(ascii::escape_default(n).map(|c| c as char));
426 escaped.push('\'');
427 Literal(escaped)
428 }
429 }
430 }
431
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700432 pub fn byte_string(bytes: &[u8]) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700433 let mut escaped = "b\"".to_string();
434 for b in bytes {
435 match *b {
436 b'\0' => escaped.push_str(r"\0"),
437 b'\t' => escaped.push_str(r"\t"),
438 b'\n' => escaped.push_str(r"\n"),
439 b'\r' => escaped.push_str(r"\r"),
440 b'"' => escaped.push_str("\\\""),
441 b'\\' => escaped.push_str("\\\\"),
442 b'\x20' ... b'\x7E' => escaped.push(*b as char),
443 _ => escaped.push_str(&format!("\\x{:02X}", b)),
444 }
445 }
446 escaped.push('"');
447 Literal(escaped)
448 }
Alex Crichton76a5cc82017-05-23 07:01:44 -0700449
450 pub fn doccomment(s: &str) -> Literal {
451 Literal(s.to_string())
452 }
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700453
Alex Crichton1a7f7622017-07-05 17:47:15 -0700454 pub fn float(s: f64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700455 Literal(s.to_string())
456 }
457
Alex Crichton1a7f7622017-07-05 17:47:15 -0700458 pub fn integer(s: i64) -> Literal {
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700459 Literal(s.to_string())
460 }
Alex Crichton31316622017-05-26 12:54:47 -0700461
462 pub fn raw_string(s: &str, pounds: usize) -> Literal {
463 let mut ret = format!("r");
464 ret.extend((0..pounds).map(|_| "#"));
465 ret.push('"');
466 ret.push_str(s);
467 ret.push('"');
468 ret.extend((0..pounds).map(|_| "#"));
469 Literal(ret)
470 }
471
472 pub fn raw_byte_string(s: &str, pounds: usize) -> Literal {
Alex Crichton7ed6d282017-05-26 13:42:50 -0700473 let mut ret = format!("br");
Alex Crichton31316622017-05-26 12:54:47 -0700474 ret.extend((0..pounds).map(|_| "#"));
475 ret.push('"');
476 ret.push_str(s);
477 ret.push('"');
478 ret.extend((0..pounds).map(|_| "#"));
479 Literal(ret)
480 }
Alex Crichton852d53d2017-05-19 19:25:08 -0700481}
482
Alex Crichton44bffbc2017-05-19 17:51:59 -0700483impl fmt::Display for Literal {
484 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
485 self.0.fmt(f)
486 }
487}
488
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700489macro_rules! ints {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700490 ($($t:ty,)*) => {$(
491 impl From<$t> for Literal {
492 fn from(t: $t) -> Literal {
Alex Crichton852d53d2017-05-19 19:25:08 -0700493 Literal(format!(concat!("{}", stringify!($t)), t))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700494 }
495 }
496 )*}
497}
498
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700499ints! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700500 u8, u16, u32, u64, usize,
501 i8, i16, i32, i64, isize,
Alex Crichton9c2fb0a2017-05-26 08:49:31 -0700502}
503
504macro_rules! floats {
505 ($($t:ty,)*) => {$(
506 impl From<$t> for Literal {
507 fn from(t: $t) -> Literal {
508 assert!(!t.is_nan());
509 assert!(!t.is_infinite());
510 Literal(format!(concat!("{}", stringify!($t)), t))
511 }
512 }
513 )*}
514}
515
516floats! {
Alex Crichton852d53d2017-05-19 19:25:08 -0700517 f32, f64,
518}
519
Alex Crichton44bffbc2017-05-19 17:51:59 -0700520impl<'a> From<&'a str> for Literal {
521 fn from(t: &'a str) -> Literal {
522 let mut s = t.chars().flat_map(|c| c.escape_default()).collect::<String>();
523 s.push('"');
524 s.insert(0, '"');
525 Literal(s)
526 }
527}
528
529impl From<char> for Literal {
530 fn from(t: char) -> Literal {
Alex Crichton2d0cf0b2017-05-26 14:00:16 -0700531 Literal(format!("'{}'", t.escape_default().collect::<String>()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700532 }
533}
534
David Tolnay8e976c62017-06-01 12:12:29 -0700535named!(token_stream -> ::TokenStream, map!(
536 many0!(token_tree),
537 |trees| ::TokenStream(TokenStream { inner: trees })
538));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700539
Nika Layzellf8d5f212017-12-11 14:07:02 -0500540fn token_tree(input: Cursor) -> PResult<TokenTree> {
541 let input = skip_whitespace(input);
542 let lo = input.off;
543 let (input, kind) = token_kind(input)?;
544 let hi = input.off;
545 Ok((input, TokenTree {
546 span: ::Span(Span {
547 lo: lo,
548 hi: hi,
549 }),
550 kind: kind,
551 }))
552}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700553
Alex Crichton1a7f7622017-07-05 17:47:15 -0700554named!(token_kind -> TokenNode, alt!(
555 map!(delimited, |(d, s)| TokenNode::Group(d, s))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700556 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700557 map!(literal, TokenNode::Literal) // must be before symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700558 |
Alex Crichton52725f72017-08-28 12:20:58 -0700559 symbol
Alex Crichton44bffbc2017-05-19 17:51:59 -0700560 |
Alex Crichton1a7f7622017-07-05 17:47:15 -0700561 map!(op, |(op, kind)| TokenNode::Op(op, kind))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700562));
563
David Tolnay8e976c62017-06-01 12:12:29 -0700564named!(delimited -> (Delimiter, ::TokenStream), alt!(
Alex Crichton44bffbc2017-05-19 17:51:59 -0700565 delimited!(
566 punct!("("),
567 token_stream,
568 punct!(")")
569 ) => { |ts| (Delimiter::Parenthesis, ts) }
570 |
571 delimited!(
572 punct!("["),
573 token_stream,
574 punct!("]")
575 ) => { |ts| (Delimiter::Bracket, ts) }
576 |
577 delimited!(
578 punct!("{"),
579 token_stream,
580 punct!("}")
581 ) => { |ts| (Delimiter::Brace, ts) }
582));
583
Nika Layzellf8d5f212017-12-11 14:07:02 -0500584fn symbol(mut input: Cursor) -> PResult<TokenNode> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700585 input = skip_whitespace(input);
586
587 let mut chars = input.char_indices();
David Tolnaya202d502017-06-01 12:26:55 -0700588
589 let lifetime = input.starts_with("'");
590 if lifetime {
591 chars.next();
592 }
593
Alex Crichton44bffbc2017-05-19 17:51:59 -0700594 match chars.next() {
595 Some((_, ch)) if UnicodeXID::is_xid_start(ch) || ch == '_' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700596 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700597 }
598
David Tolnay214c94c2017-06-01 12:42:56 -0700599 let mut end = input.len();
Alex Crichton44bffbc2017-05-19 17:51:59 -0700600 for (i, ch) in chars {
601 if !UnicodeXID::is_xid_continue(ch) {
David Tolnay214c94c2017-06-01 12:42:56 -0700602 end = i;
603 break;
Alex Crichton44bffbc2017-05-19 17:51:59 -0700604 }
605 }
606
Nika Layzellf8d5f212017-12-11 14:07:02 -0500607 if lifetime && &input.rest[..end] != "'static" && KEYWORDS.contains(&&input.rest[1..end]) {
David Tolnay214c94c2017-06-01 12:42:56 -0700608 Err(LexError)
609 } else {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500610 let a = &input.rest[..end];
Alex Crichton52725f72017-08-28 12:20:58 -0700611 if a == "_" {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500612 Ok((input.advance(end), TokenNode::Op('_', Spacing::Alone)))
Alex Crichton52725f72017-08-28 12:20:58 -0700613 } else {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500614 Ok((input.advance(end), TokenNode::Term(::Term::intern(a))))
Alex Crichton52725f72017-08-28 12:20:58 -0700615 }
David Tolnay214c94c2017-06-01 12:42:56 -0700616 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700617}
618
David Tolnay214c94c2017-06-01 12:42:56 -0700619// From https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/symbol.rs
620static KEYWORDS: &'static [&'static str] = &[
621 "abstract", "alignof", "as", "become", "box", "break", "const", "continue",
622 "crate", "do", "else", "enum", "extern", "false", "final", "fn", "for",
623 "if", "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut",
624 "offsetof", "override", "priv", "proc", "pub", "pure", "ref", "return",
625 "self", "Self", "sizeof", "static", "struct", "super", "trait", "true",
626 "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while",
627 "yield",
628];
629
Nika Layzellf8d5f212017-12-11 14:07:02 -0500630fn literal(input: Cursor) -> PResult<::Literal> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700631 let input_no_ws = skip_whitespace(input);
632
633 match literal_nocapture(input_no_ws) {
David Tolnay1218e122017-06-01 11:13:45 -0700634 Ok((a, ())) => {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700635 let start = input.len() - input_no_ws.len();
636 let len = input_no_ws.len() - a.len();
637 let end = start + len;
Nika Layzellf8d5f212017-12-11 14:07:02 -0500638 Ok((a, ::Literal(Literal(input.rest[start..end].to_string()))))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700639 }
David Tolnay1218e122017-06-01 11:13:45 -0700640 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700641 }
642}
643
644named!(literal_nocapture -> (), alt!(
645 string
646 |
647 byte_string
648 |
649 byte
650 |
651 character
652 |
653 float
654 |
655 int
656 |
657 boolean
658 |
659 doc_comment
660));
661
662named!(string -> (), alt!(
663 quoted_string
664 |
665 preceded!(
666 punct!("r"),
667 raw_string
668 ) => { |_| () }
669));
670
671named!(quoted_string -> (), delimited!(
672 punct!("\""),
673 cooked_string,
674 tag!("\"")
675));
676
Nika Layzellf8d5f212017-12-11 14:07:02 -0500677fn cooked_string(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700678 let mut chars = input.char_indices().peekable();
679 while let Some((byte_offset, ch)) = chars.next() {
680 match ch {
681 '"' => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500682 return Ok((input.advance(byte_offset), ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700683 }
684 '\r' => {
685 if let Some((_, '\n')) = chars.next() {
686 // ...
687 } else {
688 break;
689 }
690 }
691 '\\' => {
692 match chars.next() {
693 Some((_, 'x')) => {
694 if !backslash_x_char(&mut chars) {
695 break
696 }
697 }
698 Some((_, 'n')) |
699 Some((_, 'r')) |
700 Some((_, 't')) |
701 Some((_, '\\')) |
702 Some((_, '\'')) |
703 Some((_, '"')) |
704 Some((_, '0')) => {}
705 Some((_, 'u')) => {
706 if !backslash_u(&mut chars) {
707 break
708 }
709 }
710 Some((_, '\n')) | Some((_, '\r')) => {
711 while let Some(&(_, ch)) = chars.peek() {
712 if ch.is_whitespace() {
713 chars.next();
714 } else {
715 break;
716 }
717 }
718 }
719 _ => break,
720 }
721 }
722 _ch => {}
723 }
724 }
David Tolnay1218e122017-06-01 11:13:45 -0700725 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700726}
727
728named!(byte_string -> (), alt!(
729 delimited!(
730 punct!("b\""),
731 cooked_byte_string,
732 tag!("\"")
733 ) => { |_| () }
734 |
735 preceded!(
736 punct!("br"),
737 raw_string
738 ) => { |_| () }
739));
740
Nika Layzellf8d5f212017-12-11 14:07:02 -0500741fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700742 let mut bytes = input.bytes().enumerate();
743 'outer: while let Some((offset, b)) = bytes.next() {
744 match b {
745 b'"' => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500746 return Ok((input.advance(offset), ()));
Alex Crichton44bffbc2017-05-19 17:51:59 -0700747 }
748 b'\r' => {
749 if let Some((_, b'\n')) = bytes.next() {
750 // ...
751 } else {
752 break;
753 }
754 }
755 b'\\' => {
756 match bytes.next() {
757 Some((_, b'x')) => {
758 if !backslash_x_byte(&mut bytes) {
759 break
760 }
761 }
762 Some((_, b'n')) |
763 Some((_, b'r')) |
764 Some((_, b't')) |
765 Some((_, b'\\')) |
766 Some((_, b'0')) |
767 Some((_, b'\'')) |
768 Some((_, b'"')) => {}
769 Some((newline, b'\n')) |
770 Some((newline, b'\r')) => {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500771 let rest = input.advance(newline + 1);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700772 for (offset, ch) in rest.char_indices() {
773 if !ch.is_whitespace() {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500774 input = rest.advance(offset);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700775 bytes = input.bytes().enumerate();
776 continue 'outer;
777 }
778 }
779 break;
780 }
781 _ => break,
782 }
783 }
784 b if b < 0x80 => {}
785 _ => break,
786 }
787 }
David Tolnay1218e122017-06-01 11:13:45 -0700788 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700789}
790
Nika Layzellf8d5f212017-12-11 14:07:02 -0500791fn raw_string(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700792 let mut chars = input.char_indices();
793 let mut n = 0;
794 while let Some((byte_offset, ch)) = chars.next() {
795 match ch {
796 '"' => {
797 n = byte_offset;
798 break;
799 }
800 '#' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700801 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700802 }
803 }
804 for (byte_offset, ch) in chars {
805 match ch {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500806 '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => {
807 let rest = input.advance(byte_offset + 1 + n);
David Tolnay1218e122017-06-01 11:13:45 -0700808 return Ok((rest, ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -0700809 }
810 '\r' => {}
811 _ => {}
812 }
813 }
David Tolnay1218e122017-06-01 11:13:45 -0700814 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700815}
816
817named!(byte -> (), do_parse!(
818 punct!("b") >>
819 tag!("'") >>
820 cooked_byte >>
821 tag!("'") >>
822 (())
823));
824
Nika Layzellf8d5f212017-12-11 14:07:02 -0500825fn cooked_byte(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700826 let mut bytes = input.bytes().enumerate();
827 let ok = match bytes.next().map(|(_, b)| b) {
828 Some(b'\\') => {
829 match bytes.next().map(|(_, b)| b) {
830 Some(b'x') => backslash_x_byte(&mut bytes),
831 Some(b'n') |
832 Some(b'r') |
833 Some(b't') |
834 Some(b'\\') |
835 Some(b'0') |
836 Some(b'\'') |
837 Some(b'"') => true,
838 _ => false,
839 }
840 }
841 b => b.is_some(),
842 };
843 if ok {
844 match bytes.next() {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500845 Some((offset, _)) => Ok((input.advance(offset), ())),
846 None => Ok((input.advance(input.len()), ())),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700847 }
848 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700849 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700850 }
851}
852
853named!(character -> (), do_parse!(
854 punct!("'") >>
855 cooked_char >>
856 tag!("'") >>
857 (())
858));
859
Nika Layzellf8d5f212017-12-11 14:07:02 -0500860fn cooked_char(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700861 let mut chars = input.char_indices();
862 let ok = match chars.next().map(|(_, ch)| ch) {
863 Some('\\') => {
864 match chars.next().map(|(_, ch)| ch) {
865 Some('x') => backslash_x_char(&mut chars),
866 Some('u') => backslash_u(&mut chars),
867 Some('n') |
868 Some('r') |
869 Some('t') |
870 Some('\\') |
871 Some('0') |
872 Some('\'') |
873 Some('"') => true,
874 _ => false,
875 }
876 }
877 ch => ch.is_some(),
878 };
879 if ok {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500880 match chars.next() {
881 Some((idx, _)) => Ok((input.advance(idx), ())),
882 None => Ok((input.advance(input.len()), ())),
883 }
Alex Crichton44bffbc2017-05-19 17:51:59 -0700884 } else {
David Tolnay1218e122017-06-01 11:13:45 -0700885 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -0700886 }
887}
888
889macro_rules! next_ch {
890 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
891 match $chars.next() {
892 Some((_, ch)) => match ch {
893 $pat $(| $rest)* => ch,
894 _ => return false,
895 },
896 None => return false
897 }
898 };
899}
900
901fn backslash_x_char<I>(chars: &mut I) -> bool
902 where I: Iterator<Item = (usize, char)>
903{
904 next_ch!(chars @ '0'...'7');
905 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
906 true
907}
908
909fn backslash_x_byte<I>(chars: &mut I) -> bool
910 where I: Iterator<Item = (usize, u8)>
911{
912 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
913 next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
914 true
915}
916
917fn backslash_u<I>(chars: &mut I) -> bool
918 where I: Iterator<Item = (usize, char)>
919{
920 next_ch!(chars @ '{');
921 next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
922 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
923 if b == '}' {
924 return true
925 }
926 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
927 if c == '}' {
928 return true
929 }
930 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
931 if d == '}' {
932 return true
933 }
934 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
935 if e == '}' {
936 return true
937 }
938 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
939 if f == '}' {
940 return true
941 }
942 next_ch!(chars @ '}');
943 true
944}
945
Nika Layzellf8d5f212017-12-11 14:07:02 -0500946fn float(input: Cursor) -> PResult<()> {
David Tolnay744a6b82017-06-01 11:34:29 -0700947 let (rest, ()) = float_digits(input)?;
948 for suffix in &["f32", "f64"] {
949 if rest.starts_with(suffix) {
Nika Layzellf8d5f212017-12-11 14:07:02 -0500950 return word_break(rest.advance(suffix.len()));
David Tolnay744a6b82017-06-01 11:34:29 -0700951 }
952 }
953 word_break(rest)
954}
Alex Crichton44bffbc2017-05-19 17:51:59 -0700955
Nika Layzellf8d5f212017-12-11 14:07:02 -0500956fn float_digits(input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -0700957 let mut chars = input.chars().peekable();
958 match chars.next() {
959 Some(ch) if ch >= '0' && ch <= '9' => {}
David Tolnay1218e122017-06-01 11:13:45 -0700960 _ => return Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -0700961 }
962
963 let mut len = 1;
964 let mut has_dot = false;
965 let mut has_exp = false;
966 while let Some(&ch) = chars.peek() {
967 match ch {
968 '0'...'9' | '_' => {
969 chars.next();
970 len += 1;
971 }
972 '.' => {
973 if has_dot {
974 break;
975 }
976 chars.next();
977 if chars.peek()
978 .map(|&ch| ch == '.' || UnicodeXID::is_xid_start(ch))
979 .unwrap_or(false) {
David Tolnay1218e122017-06-01 11:13:45 -0700980 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700981 }
982 len += 1;
983 has_dot = true;
984 }
985 'e' | 'E' => {
986 chars.next();
987 len += 1;
988 has_exp = true;
989 break;
990 }
991 _ => break,
992 }
993 }
994
Nika Layzellf8d5f212017-12-11 14:07:02 -0500995 let rest = input.advance(len);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700996 if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) {
David Tolnay1218e122017-06-01 11:13:45 -0700997 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -0700998 }
999
1000 if has_exp {
1001 let mut has_exp_value = false;
1002 while let Some(&ch) = chars.peek() {
1003 match ch {
1004 '+' | '-' => {
1005 if has_exp_value {
1006 break;
1007 }
1008 chars.next();
1009 len += 1;
1010 }
1011 '0'...'9' => {
1012 chars.next();
1013 len += 1;
1014 has_exp_value = true;
1015 }
1016 '_' => {
1017 chars.next();
1018 len += 1;
1019 }
1020 _ => break,
1021 }
1022 }
1023 if !has_exp_value {
David Tolnay1218e122017-06-01 11:13:45 -07001024 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001025 }
1026 }
1027
Nika Layzellf8d5f212017-12-11 14:07:02 -05001028 Ok((input.advance(len), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -07001029}
1030
Nika Layzellf8d5f212017-12-11 14:07:02 -05001031fn int(input: Cursor) -> PResult<()> {
David Tolnay744a6b82017-06-01 11:34:29 -07001032 let (rest, ()) = digits(input)?;
1033 for suffix in &[
1034 "isize",
1035 "i8",
1036 "i16",
1037 "i32",
1038 "i64",
1039 "i128",
1040 "usize",
1041 "u8",
1042 "u16",
1043 "u32",
1044 "u64",
1045 "u128",
1046 ] {
1047 if rest.starts_with(suffix) {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001048 return word_break(rest.advance(suffix.len()));
David Tolnay744a6b82017-06-01 11:34:29 -07001049 }
1050 }
1051 word_break(rest)
1052}
Alex Crichton44bffbc2017-05-19 17:51:59 -07001053
Nika Layzellf8d5f212017-12-11 14:07:02 -05001054fn digits(mut input: Cursor) -> PResult<()> {
Alex Crichton44bffbc2017-05-19 17:51:59 -07001055 let base = if input.starts_with("0x") {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001056 input = input.advance(2);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001057 16
1058 } else if input.starts_with("0o") {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001059 input = input.advance(2);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001060 8
1061 } else if input.starts_with("0b") {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001062 input = input.advance(2);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001063 2
1064 } else {
1065 10
1066 };
1067
Alex Crichton44bffbc2017-05-19 17:51:59 -07001068 let mut len = 0;
1069 let mut empty = true;
1070 for b in input.bytes() {
1071 let digit = match b {
1072 b'0'...b'9' => (b - b'0') as u64,
1073 b'a'...b'f' => 10 + (b - b'a') as u64,
1074 b'A'...b'F' => 10 + (b - b'A') as u64,
1075 b'_' => {
1076 if empty && base == 10 {
David Tolnay1218e122017-06-01 11:13:45 -07001077 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001078 }
1079 len += 1;
1080 continue;
1081 }
1082 _ => break,
1083 };
1084 if digit >= base {
David Tolnay1218e122017-06-01 11:13:45 -07001085 return Err(LexError);
Alex Crichton44bffbc2017-05-19 17:51:59 -07001086 }
Alex Crichton44bffbc2017-05-19 17:51:59 -07001087 len += 1;
1088 empty = false;
1089 }
1090 if empty {
David Tolnay1218e122017-06-01 11:13:45 -07001091 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -07001092 } else {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001093 Ok((input.advance(len), ()))
Alex Crichton44bffbc2017-05-19 17:51:59 -07001094 }
1095}
1096
1097named!(boolean -> (), alt!(
1098 keyword!("true") => { |_| () }
1099 |
1100 keyword!("false") => { |_| () }
1101));
1102
Nika Layzellf8d5f212017-12-11 14:07:02 -05001103fn op(input: Cursor) -> PResult<(char, Spacing)> {
David Tolnayea75c5f2017-05-31 23:40:33 -07001104 let input = skip_whitespace(input);
1105 match op_char(input) {
David Tolnay1218e122017-06-01 11:13:45 -07001106 Ok((rest, ch)) => {
David Tolnayea75c5f2017-05-31 23:40:33 -07001107 let kind = match op_char(rest) {
Alex Crichton1a7f7622017-07-05 17:47:15 -07001108 Ok(_) => Spacing::Joint,
1109 Err(LexError) => Spacing::Alone,
David Tolnayea75c5f2017-05-31 23:40:33 -07001110 };
David Tolnay1218e122017-06-01 11:13:45 -07001111 Ok((rest, (ch, kind)))
David Tolnayea75c5f2017-05-31 23:40:33 -07001112 }
David Tolnay1218e122017-06-01 11:13:45 -07001113 Err(LexError) => Err(LexError),
Alex Crichton44bffbc2017-05-19 17:51:59 -07001114 }
1115}
1116
Nika Layzellf8d5f212017-12-11 14:07:02 -05001117fn op_char(input: Cursor) -> PResult<char> {
David Tolnayea75c5f2017-05-31 23:40:33 -07001118 let mut chars = input.chars();
1119 let first = match chars.next() {
1120 Some(ch) => ch,
1121 None => {
David Tolnay1218e122017-06-01 11:13:45 -07001122 return Err(LexError);
David Tolnayea75c5f2017-05-31 23:40:33 -07001123 }
1124 };
1125 let recognized = "~!@#$%^&*-=+|;:,<.>/?";
1126 if recognized.contains(first) {
Nika Layzellf8d5f212017-12-11 14:07:02 -05001127 Ok((input.advance(first.len_utf8()), first))
Alex Crichton44bffbc2017-05-19 17:51:59 -07001128 } else {
David Tolnay1218e122017-06-01 11:13:45 -07001129 Err(LexError)
Alex Crichton44bffbc2017-05-19 17:51:59 -07001130 }
1131}
1132
Alex Crichton44bffbc2017-05-19 17:51:59 -07001133named!(doc_comment -> (), alt!(
1134 do_parse!(
1135 punct!("//!") >>
1136 take_until!("\n") >>
1137 (())
1138 )
1139 |
1140 do_parse!(
1141 option!(whitespace) >>
1142 peek!(tag!("/*!")) >>
1143 block_comment >>
1144 (())
1145 )
1146 |
1147 do_parse!(
1148 punct!("///") >>
1149 not!(tag!("/")) >>
1150 take_until!("\n") >>
1151 (())
1152 )
1153 |
1154 do_parse!(
1155 option!(whitespace) >>
1156 peek!(tuple!(tag!("/**"), not!(tag!("*")))) >>
1157 block_comment >>
1158 (())
1159 )
1160));