blob: 041661a249885789c7abb5ed5ea2a93a2daed30b [file] [log] [blame]
David Tolnay55535012018-01-05 16:39:23 -08001// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
David Tolnay360efd22018-01-04 23:35:26 -08009use proc_macro2::{Literal, Span, Term, TokenNode, TokenTree};
10use std::str;
11
12#[cfg(feature = "extra-traits")]
Alex Crichtonccbb45d2017-05-23 10:58:24 -070013use std::hash::{Hash, Hasher};
14
David Tolnay360efd22018-01-04 23:35:26 -080015ast_enum_of_structs! {
16 pub enum Lit {
17 /// A string literal (`"foo"`)
18 pub Str(LitStr #manual_extra_traits {
19 token: Literal,
20 pub span: Span,
21 }),
Alex Crichtonccbb45d2017-05-23 10:58:24 -070022
David Tolnay360efd22018-01-04 23:35:26 -080023 /// A byte string (`b"foo"`)
24 pub ByteStr(LitByteStr #manual_extra_traits {
25 token: Literal,
26 pub span: Span,
27 }),
28
29 /// A byte char (`b'f'`)
30 pub Byte(LitByte #manual_extra_traits {
31 token: Literal,
32 pub span: Span,
33 }),
34
35 /// A character literal (`'a'`)
36 pub Char(LitChar #manual_extra_traits {
37 token: Literal,
38 pub span: Span,
39 }),
40
41 /// An integer literal (`1`)
42 ///
43 /// Holds up to 64 bits of data. Use `LitVerbatim` for any larger
44 /// integer literal.
45 pub Int(LitInt #manual_extra_traits {
46 token: Literal,
47 pub span: Span,
48 }),
49
50 /// A float literal (`1f64` or `1E10f64`)
51 ///
52 /// Must be finite. May not be infinte or NaN.
53 pub Float(LitFloat #manual_extra_traits {
54 token: Literal,
55 pub span: Span,
56 }),
57
58 /// A boolean literal
59 pub Bool(LitBool #manual_extra_traits {
60 pub value: bool,
61 pub span: Span,
62 }),
63
64 pub Verbatim(LitVerbatim #manual_extra_traits {
65 pub token: Literal,
66 pub span: Span,
67 }),
68 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -070069}
70
David Tolnay360efd22018-01-04 23:35:26 -080071impl LitStr {
72 pub fn new(value: &str, span: Span) -> Self {
73 LitStr {
74 token: Literal::string(value),
75 span: span,
76 }
77 }
78
79 pub fn value(&self) -> String {
80 value::parse_lit_str(&self.token.to_string())
81 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -070082}
83
David Tolnay360efd22018-01-04 23:35:26 -080084impl LitByteStr {
85 pub fn new(value: &[u8], span: Span) -> Self {
86 LitByteStr {
87 token: Literal::byte_string(value),
88 span: span,
89 }
90 }
91
92 pub fn value(&self) -> Vec<u8> {
93 value::parse_lit_byte_str(&self.token.to_string())
94 }
95}
96
97impl LitByte {
98 pub fn new(value: u8, span: Span) -> Self {
99 LitByte {
100 token: Literal::byte_char(value),
101 span: span,
102 }
103 }
104
105 pub fn value(&self) -> u8 {
106 value::parse_lit_byte(&self.token.to_string())
107 }
108}
109
110impl LitChar {
111 pub fn new(value: char, span: Span) -> Self {
112 LitChar {
113 token: Literal::character(value),
114 span: span,
115 }
116 }
117
118 pub fn value(&self) -> char {
119 value::parse_lit_char(&self.token.to_string())
120 }
121}
122
123impl LitInt {
124 pub fn new(value: u64, suffix: IntSuffix, span: Span) -> Self {
125 LitInt {
126 token: match suffix {
127 IntSuffix::Isize => Literal::isize(value as isize),
128 IntSuffix::I8 => Literal::i8(value as i8),
129 IntSuffix::I16 => Literal::i16(value as i16),
130 IntSuffix::I32 => Literal::i32(value as i32),
131 IntSuffix::I64 => Literal::i64(value as i64),
132 IntSuffix::I128 => value::to_literal(&format!("{}i128", value)),
133 IntSuffix::Usize => Literal::usize(value as usize),
134 IntSuffix::U8 => Literal::u8(value as u8),
135 IntSuffix::U16 => Literal::u16(value as u16),
136 IntSuffix::U32 => Literal::u32(value as u32),
137 IntSuffix::U64 => Literal::u64(value),
138 IntSuffix::U128 => value::to_literal(&format!("{}u128", value)),
139 IntSuffix::None => Literal::integer(value as i64),
140 },
141 span: span,
142 }
143 }
144
145 pub fn value(&self) -> u64 {
146 value::parse_lit_int(&self.token.to_string()).unwrap()
147 }
148
149 pub fn suffix(&self) -> IntSuffix {
150 let value = self.token.to_string();
151 for (s, suffix) in vec![
152 ("i8", IntSuffix::I8),
153 ("i16", IntSuffix::I16),
154 ("i32", IntSuffix::I32),
155 ("i64", IntSuffix::I64),
156 ("i128", IntSuffix::I128),
157 ("isize", IntSuffix::Isize),
158 ("u8", IntSuffix::U8),
159 ("u16", IntSuffix::U16),
160 ("u32", IntSuffix::U32),
161 ("u64", IntSuffix::U64),
162 ("u128", IntSuffix::U128),
163 ("usize", IntSuffix::Usize),
164 ] {
165 if value.ends_with(s) {
166 return suffix;
167 }
168 }
169 IntSuffix::None
170 }
171}
172
173impl LitFloat {
174 pub fn new(value: f64, suffix: FloatSuffix, span: Span) -> Self {
175 LitFloat {
176 token: match suffix {
177 FloatSuffix::F32 => Literal::f32(value as f32),
178 FloatSuffix::F64 => Literal::f64(value),
179 FloatSuffix::None => Literal::float(value),
180 },
181 span: span,
182 }
183 }
184
185 pub fn value(&self) -> f64 {
186 value::parse_lit_float(&self.token.to_string())
187 }
188
189 pub fn suffix(&self) -> FloatSuffix {
190 let value = self.token.to_string();
David Tolnay61037c62018-01-05 16:21:03 -0800191 for (s, suffix) in vec![("f32", FloatSuffix::F32), ("f64", FloatSuffix::F64)] {
David Tolnay360efd22018-01-04 23:35:26 -0800192 if value.ends_with(s) {
193 return suffix;
194 }
195 }
196 FloatSuffix::None
197 }
198}
199
200macro_rules! lit_extra_traits {
201 ($ty:ident, $field:ident) => {
202 #[cfg(feature = "extra-traits")]
203 impl Eq for $ty {}
204
205 #[cfg(feature = "extra-traits")]
206 impl PartialEq for $ty {
207 fn eq(&self, other: &Self) -> bool {
208 self.$field.to_string() == other.$field.to_string()
209 }
210 }
211
212 #[cfg(feature = "extra-traits")]
213 impl Hash for $ty {
214 fn hash<H>(&self, state: &mut H)
215 where
216 H: Hasher,
217 {
218 self.$field.to_string().hash(state);
219 }
David Tolnay9c76bcb2017-12-26 23:14:59 -0500220 }
Alex Crichton62a0a592017-05-22 13:58:53 -0700221 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700222}
223
David Tolnay360efd22018-01-04 23:35:26 -0800224lit_extra_traits!(LitStr, token);
225lit_extra_traits!(LitByteStr, token);
226lit_extra_traits!(LitByte, token);
227lit_extra_traits!(LitChar, token);
228lit_extra_traits!(LitInt, token);
229lit_extra_traits!(LitFloat, token);
230lit_extra_traits!(LitBool, value);
231lit_extra_traits!(LitVerbatim, token);
232
233ast_enum! {
234 pub enum StrStyle #no_visit {
235 /// A regular string, like `"foo"`
236 Cooked,
237 /// A raw string, like `r##"foo"##`
238 ///
239 /// The unsigned integer is the number of `#` symbols used.
240 Raw(usize),
Alex Crichton62a0a592017-05-22 13:58:53 -0700241 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700242}
243
David Tolnay360efd22018-01-04 23:35:26 -0800244ast_enum! {
245 pub enum IntSuffix #no_visit {
246 I8,
247 I16,
248 I32,
249 I64,
250 I128,
251 Isize,
252 U8,
253 U16,
254 U32,
255 U64,
256 U128,
257 Usize,
258 None,
Pascal Hertleif36342c52016-10-19 10:31:42 +0200259 }
260}
261
David Tolnay360efd22018-01-04 23:35:26 -0800262ast_enum! {
263 pub enum FloatSuffix #no_visit {
264 F32,
265 F64,
266 None,
Alex Crichton2e0229c2017-05-23 09:34:50 -0700267 }
David Tolnay5fe14fc2017-01-27 16:22:08 -0800268}
269
270#[cfg(feature = "parsing")]
David Tolnayf4bbbd92016-09-23 14:41:55 -0700271pub mod parsing {
272 use super::*;
David Tolnayc5ab8c62017-12-26 16:43:39 -0500273 use synom::Synom;
274 use cursor::Cursor;
David Tolnay203557a2017-12-27 23:59:33 -0500275 use parse_error;
276 use synom::PResult;
David Tolnayf4bbbd92016-09-23 14:41:55 -0700277
Alex Crichton954046c2017-05-30 21:49:42 -0700278 impl Synom for Lit {
Michael Layzell92639a52017-06-01 00:07:44 -0400279 fn parse(input: Cursor) -> PResult<Self> {
Michael Layzell589a8f42017-06-02 19:47:01 -0400280 match input.literal() {
David Tolnay360efd22018-01-04 23:35:26 -0800281 Some((span, lit, rest)) => Ok((Lit::new(lit, span), rest)),
David Tolnay73c98de2017-12-31 15:56:56 -0500282 _ => match input.term() {
David Tolnay360efd22018-01-04 23:35:26 -0800283 Some((span, term, rest)) => Ok((
284 Lit::Bool(LitBool {
285 value: if term.as_str() == "true" {
286 true
287 } else if term.as_str() == "false" {
288 false
289 } else {
290 return parse_error();
David Tolnay51382052017-12-27 13:46:21 -0500291 },
David Tolnay360efd22018-01-04 23:35:26 -0800292 span: span,
293 }),
294 rest,
295 )),
Michael Layzell589a8f42017-06-02 19:47:01 -0400296 _ => parse_error(),
David Tolnay51382052017-12-27 13:46:21 -0500297 },
Michael Layzell589a8f42017-06-02 19:47:01 -0400298 }
David Tolnayfa0edf22016-09-23 22:58:24 -0700299 }
Sergio Benitez5680d6a2017-12-29 11:20:29 -0800300
301 fn description() -> Option<&'static str> {
302 Some("literal")
303 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700304 }
David Tolnay360efd22018-01-04 23:35:26 -0800305
306 impl_synom!(LitStr "string literal" switch!(
307 syn!(Lit),
308 Lit::Str(lit) => value!(lit)
309 |
310 _ => reject!()
311 ));
312
313 impl_synom!(LitByteStr "byte string literal" switch!(
314 syn!(Lit),
315 Lit::ByteStr(lit) => value!(lit)
316 |
317 _ => reject!()
318 ));
319
320 impl_synom!(LitByte "byte literal" switch!(
321 syn!(Lit),
322 Lit::Byte(lit) => value!(lit)
323 |
324 _ => reject!()
325 ));
326
327 impl_synom!(LitChar "character literal" switch!(
328 syn!(Lit),
329 Lit::Char(lit) => value!(lit)
330 |
331 _ => reject!()
332 ));
333
334 impl_synom!(LitInt "integer literal" switch!(
335 syn!(Lit),
336 Lit::Int(lit) => value!(lit)
337 |
338 _ => reject!()
339 ));
340
341 impl_synom!(LitFloat "floating point literal" switch!(
342 syn!(Lit),
343 Lit::Float(lit) => value!(lit)
344 |
345 _ => reject!()
346 ));
347
348 impl_synom!(LitBool "boolean literal" switch!(
349 syn!(Lit),
350 Lit::Bool(lit) => value!(lit)
351 |
352 _ => reject!()
353 ));
354
355 impl Lit {
356 pub fn new(token: Literal, span: Span) -> Self {
357 let value = token.to_string();
358
359 match value::byte(&value, 0) {
David Tolnay61037c62018-01-05 16:21:03 -0800360 b'"' | b'r' => {
361 return Lit::Str(LitStr {
David Tolnay360efd22018-01-04 23:35:26 -0800362 token: token,
363 span: span,
David Tolnay61037c62018-01-05 16:21:03 -0800364 })
David Tolnay360efd22018-01-04 23:35:26 -0800365 }
David Tolnay61037c62018-01-05 16:21:03 -0800366 b'b' => match value::byte(&value, 1) {
367 b'"' | b'r' => {
368 return Lit::ByteStr(LitByteStr {
369 token: token,
370 span: span,
371 })
372 }
373 b'\'' => {
374 return Lit::Byte(LitByte {
375 token: token,
376 span: span,
377 })
378 }
379 _ => {}
380 },
381 b'\'' => {
382 return Lit::Char(LitChar {
383 token: token,
384 span: span,
385 })
386 }
David Tolnay360efd22018-01-04 23:35:26 -0800387 b'0'...b'9' => if number_is_int(&value) {
388 return Lit::Int(LitInt {
389 token: token,
390 span: span,
391 });
392 } else if number_is_float(&value) {
393 return Lit::Float(LitFloat {
394 token: token,
395 span: span,
396 });
397 } else {
398 // number overflow
399 return Lit::Verbatim(LitVerbatim {
400 token: token,
401 span: span,
402 });
David Tolnay61037c62018-01-05 16:21:03 -0800403 },
David Tolnay360efd22018-01-04 23:35:26 -0800404 _ => if value == "true" || value == "false" {
405 return Lit::Bool(LitBool {
406 value: value == "true",
407 span: span,
408 });
David Tolnay61037c62018-01-05 16:21:03 -0800409 },
David Tolnay360efd22018-01-04 23:35:26 -0800410 }
411
412 panic!("Unrecognized literal: {}", value);
413 }
414 }
415
416 fn number_is_int(value: &str) -> bool {
417 if number_is_float(value) {
418 false
419 } else {
420 value::parse_lit_int(value).is_some()
421 }
422 }
423
424 fn number_is_float(value: &str) -> bool {
425 if value.contains('.') {
426 true
427 } else if value.starts_with("0x") || value.ends_with("size") {
428 false
429 } else {
430 value.contains('e') || value.contains('E')
431 }
432 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700433}
434
435#[cfg(feature = "printing")]
436mod printing {
437 use super::*;
David Tolnay51382052017-12-27 13:46:21 -0500438 use quote::{ToTokens, Tokens};
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700439
David Tolnay360efd22018-01-04 23:35:26 -0800440 impl ToTokens for LitStr {
David Tolnayf4bbbd92016-09-23 14:41:55 -0700441 fn to_tokens(&self, tokens: &mut Tokens) {
David Tolnay360efd22018-01-04 23:35:26 -0800442 tokens.append(TokenTree {
443 span: self.span,
444 kind: TokenNode::Literal(self.token.clone()),
445 });
446 }
447 }
448
449 impl ToTokens for LitByteStr {
450 fn to_tokens(&self, tokens: &mut Tokens) {
451 tokens.append(TokenTree {
452 span: self.span,
453 kind: TokenNode::Literal(self.token.clone()),
454 });
455 }
456 }
457
458 impl ToTokens for LitByte {
459 fn to_tokens(&self, tokens: &mut Tokens) {
460 tokens.append(TokenTree {
461 span: self.span,
462 kind: TokenNode::Literal(self.token.clone()),
463 });
464 }
465 }
466
467 impl ToTokens for LitChar {
468 fn to_tokens(&self, tokens: &mut Tokens) {
469 tokens.append(TokenTree {
470 span: self.span,
471 kind: TokenNode::Literal(self.token.clone()),
472 });
473 }
474 }
475
476 impl ToTokens for LitInt {
477 fn to_tokens(&self, tokens: &mut Tokens) {
478 tokens.append(TokenTree {
479 span: self.span,
480 kind: TokenNode::Literal(self.token.clone()),
481 });
482 }
483 }
484
485 impl ToTokens for LitFloat {
486 fn to_tokens(&self, tokens: &mut Tokens) {
487 tokens.append(TokenTree {
488 span: self.span,
489 kind: TokenNode::Literal(self.token.clone()),
490 });
491 }
492 }
493
494 impl ToTokens for LitBool {
495 fn to_tokens(&self, tokens: &mut Tokens) {
496 tokens.append(TokenTree {
497 span: self.span,
David Tolnay61037c62018-01-05 16:21:03 -0800498 kind: TokenNode::Term(Term::intern(if self.value { "true" } else { "false" })),
David Tolnay360efd22018-01-04 23:35:26 -0800499 });
500 }
501 }
502
503 impl ToTokens for LitVerbatim {
504 fn to_tokens(&self, tokens: &mut Tokens) {
505 tokens.append(TokenTree {
506 span: self.span,
507 kind: TokenNode::Literal(self.token.clone()),
508 });
509 }
510 }
511}
512
513mod value {
514 use super::*;
515 use std::char;
516 use std::ops::{Index, RangeFrom};
517 use proc_macro2::TokenStream;
518
519 /// Get the byte at offset idx, or a default of `b'\0'` if we're looking
520 /// past the end of the input buffer.
521 pub fn byte<S: AsRef<[u8]> + ?Sized>(s: &S, idx: usize) -> u8 {
522 let s = s.as_ref();
523 if idx < s.len() {
524 s[idx]
525 } else {
526 0
527 }
528 }
529
530 fn next_chr(s: &str) -> char {
531 s.chars().next().unwrap_or('\0')
532 }
533
534 pub fn parse_lit_str(s: &str) -> String {
535 match byte(s, 0) {
536 b'"' => parse_lit_str_cooked(s),
537 b'r' => parse_lit_str_raw(s),
538 _ => unreachable!(),
539 }
540 }
541
David Tolnay76ebcdd2018-01-05 17:07:26 -0800542 // Clippy false positive
543 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
544 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800545 fn parse_lit_str_cooked(mut s: &str) -> String {
546 assert_eq!(byte(s, 0), b'"');
547 s = &s[1..];
548
549 let mut out = String::new();
550 'outer: loop {
551 let ch = match byte(s, 0) {
552 b'"' => break,
553 b'\\' => {
554 let b = byte(s, 1);
555 s = &s[2..];
556 match b {
557 b'x' => {
558 let (byte, rest) = backslash_x(s);
559 s = rest;
560 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800561 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800562 }
563 b'u' => {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800564 let (chr, rest) = backslash_u(s);
David Tolnay360efd22018-01-04 23:35:26 -0800565 s = rest;
566 chr
567 }
568 b'n' => '\n',
569 b'r' => '\r',
570 b't' => '\t',
571 b'\\' => '\\',
572 b'0' => '\0',
573 b'\'' => '\'',
574 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800575 b'\r' | b'\n' => loop {
576 let ch = next_chr(s);
577 if ch.is_whitespace() {
578 s = &s[ch.len_utf8()..];
579 } else {
580 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800581 }
David Tolnay61037c62018-01-05 16:21:03 -0800582 },
583 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800584 }
585 }
586 b'\r' => {
587 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
588 s = &s[2..];
589 '\n'
590 }
591 _ => {
592 let ch = next_chr(s);
593 s = &s[ch.len_utf8()..];
594 ch
595 }
596 };
597 out.push(ch);
598 }
599
600 assert_eq!(s, "\"");
601 out
602 }
603
604 fn parse_lit_str_raw(mut s: &str) -> String {
605 assert_eq!(byte(s, 0), b'r');
606 s = &s[1..];
607
608 let mut pounds = 0;
609 while byte(s, pounds) == b'#' {
610 pounds += 1;
611 }
612 assert_eq!(byte(s, pounds), b'"');
613 assert_eq!(byte(s, s.len() - pounds - 1), b'"');
614 for end in s[s.len() - pounds..].bytes() {
615 assert_eq!(end, b'#');
616 }
617
618 s[pounds + 1..s.len() - pounds - 1].to_owned()
619 }
620
621 pub fn parse_lit_byte_str(s: &str) -> Vec<u8> {
622 assert_eq!(byte(s, 0), b'b');
623 match byte(s, 1) {
624 b'"' => parse_lit_byte_str_cooked(s),
625 b'r' => parse_lit_byte_str_raw(s),
626 _ => unreachable!(),
627 }
628 }
629
David Tolnay76ebcdd2018-01-05 17:07:26 -0800630 // Clippy false positive
631 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
632 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800633 fn parse_lit_byte_str_cooked(mut s: &str) -> Vec<u8> {
634 assert_eq!(byte(s, 0), b'b');
635 assert_eq!(byte(s, 1), b'"');
636 s = &s[2..];
637
638 // We're going to want to have slices which don't respect codepoint boundaries.
639 let mut s = s.as_bytes();
640
641 let mut out = Vec::new();
642 'outer: loop {
643 let byte = match byte(s, 0) {
644 b'"' => break,
645 b'\\' => {
646 let b = byte(s, 1);
647 s = &s[2..];
648 match b {
649 b'x' => {
650 let (b, rest) = backslash_x(s);
651 s = rest;
652 b
653 }
654 b'n' => b'\n',
655 b'r' => b'\r',
656 b't' => b'\t',
657 b'\\' => b'\\',
658 b'0' => b'\0',
659 b'\'' => b'\'',
660 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800661 b'\r' | b'\n' => loop {
662 let byte = byte(s, 0);
David Tolnay76ebcdd2018-01-05 17:07:26 -0800663 let ch = char::from_u32(u32::from(byte)).unwrap();
David Tolnay61037c62018-01-05 16:21:03 -0800664 if ch.is_whitespace() {
665 s = &s[1..];
666 } else {
667 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800668 }
David Tolnay61037c62018-01-05 16:21:03 -0800669 },
670 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800671 }
672 }
673 b'\r' => {
674 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
675 s = &s[2..];
676 b'\n'
677 }
678 b => {
679 s = &s[1..];
680 b
681 }
682 };
683 out.push(byte);
684 }
685
686 assert_eq!(s, b"\"");
687 out
688 }
689
690 fn parse_lit_byte_str_raw(s: &str) -> Vec<u8> {
691 assert_eq!(byte(s, 0), b'b');
692 parse_lit_str_raw(&s[1..]).into_bytes()
693 }
694
695 pub fn parse_lit_byte(s: &str) -> u8 {
696 assert_eq!(byte(s, 0), b'b');
697 assert_eq!(byte(s, 1), b'\'');
698
699 // We're going to want to have slices which don't respect codepoint boundaries.
700 let mut s = s[2..].as_bytes();
701
702 let b = match byte(s, 0) {
703 b'\\' => {
704 let b = byte(s, 1);
705 s = &s[2..];
706 match b {
707 b'x' => {
708 let (b, rest) = backslash_x(s);
709 s = rest;
710 b
711 }
712 b'n' => b'\n',
713 b'r' => b'\r',
714 b't' => b'\t',
715 b'\\' => b'\\',
716 b'0' => b'\0',
717 b'\'' => b'\'',
718 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800719 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800720 }
721 }
722 b => {
723 s = &s[1..];
724 b
725 }
726 };
727
728 assert_eq!(byte(s, 0), b'\'');
729 b
730 }
731
732 pub fn parse_lit_char(mut s: &str) -> char {
733 assert_eq!(byte(s, 0), b'\'');
734 s = &s[1..];
735
736 let ch = match byte(s, 0) {
737 b'\\' => {
738 let b = byte(s, 1);
739 s = &s[2..];
740 match b {
741 b'x' => {
742 let (byte, rest) = backslash_x(s);
743 s = rest;
744 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800745 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800746 }
747 b'u' => {
748 let (chr, rest) = backslash_u(s);
749 s = rest;
750 chr
751 }
752 b'n' => '\n',
753 b'r' => '\r',
754 b't' => '\t',
755 b'\\' => '\\',
756 b'0' => '\0',
757 b'\'' => '\'',
758 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800759 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800760 }
761 }
762 _ => {
763 let ch = next_chr(s);
764 s = &s[ch.len_utf8()..];
765 ch
766 }
767 };
768 assert_eq!(s, "\'", "Expected end of char literal");
769 ch
770 }
771
772 fn backslash_x<S>(s: &S) -> (u8, &S)
David Tolnay61037c62018-01-05 16:21:03 -0800773 where
774 S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized,
David Tolnay360efd22018-01-04 23:35:26 -0800775 {
776 let mut ch = 0;
777 let b0 = byte(s, 0);
778 let b1 = byte(s, 1);
779 ch += 0x10 * match b0 {
780 b'0'...b'9' => b0 - b'0',
781 b'a'...b'f' => 10 + (b0 - b'a'),
782 b'A'...b'F' => 10 + (b0 - b'A'),
783 _ => panic!("unexpected non-hex character after \\x"),
784 };
David Tolnay76ebcdd2018-01-05 17:07:26 -0800785 ch += match b1 {
David Tolnay360efd22018-01-04 23:35:26 -0800786 b'0'...b'9' => b1 - b'0',
787 b'a'...b'f' => 10 + (b1 - b'a'),
788 b'A'...b'F' => 10 + (b1 - b'A'),
789 _ => panic!("unexpected non-hex character after \\x"),
790 };
791 (ch, &s[2..])
792 }
793
794 fn backslash_u(mut s: &str) -> (char, &str) {
795 if byte(s, 0) != b'{' {
796 panic!("expected {{ after \\u");
797 }
798 s = &s[1..];
799
800 let mut ch = 0;
801 for _ in 0..6 {
802 let b = byte(s, 0);
803 match b {
804 b'0'...b'9' => {
805 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800806 ch += u32::from(b - b'0');
David Tolnay360efd22018-01-04 23:35:26 -0800807 s = &s[1..];
808 }
809 b'a'...b'f' => {
810 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800811 ch += u32::from(10 + b - b'a');
David Tolnay360efd22018-01-04 23:35:26 -0800812 s = &s[1..];
813 }
814 b'A'...b'F' => {
815 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800816 ch += u32::from(10 + b - b'A');
David Tolnay360efd22018-01-04 23:35:26 -0800817 s = &s[1..];
818 }
819 b'}' => break,
820 _ => panic!("unexpected non-hex character after \\u"),
821 }
822 }
823 assert!(byte(s, 0) == b'}');
824 s = &s[1..];
825
826 if let Some(ch) = char::from_u32(ch) {
827 (ch, s)
828 } else {
829 panic!("character code {:x} is not a valid unicode character", ch);
830 }
831 }
832
833 pub fn parse_lit_int(mut s: &str) -> Option<u64> {
834 let base = match (byte(s, 0), byte(s, 1)) {
835 (b'0', b'x') => {
836 s = &s[2..];
837 16
838 }
839 (b'0', b'o') => {
840 s = &s[2..];
841 8
842 }
843 (b'0', b'b') => {
844 s = &s[2..];
845 2
846 }
847 (b'0'...b'9', _) => 10,
848 _ => unreachable!(),
849 };
850
851 let mut value = 0u64;
852 loop {
853 let b = byte(s, 0);
854 let digit = match b {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800855 b'0'...b'9' => u64::from(b - b'0'),
856 b'a'...b'f' if base > 10 => 10 + u64::from(b - b'a'),
857 b'A'...b'F' if base > 10 => 10 + u64::from(b - b'A'),
David Tolnay360efd22018-01-04 23:35:26 -0800858 b'_' => {
859 s = &s[1..];
860 continue;
861 }
862 // NOTE: Looking at a floating point literal, we don't want to
863 // consider these integers.
864 b'.' if base == 10 => return None,
865 b'e' | b'E' if base == 10 => return None,
866 _ => break,
867 };
868
869 if digit >= base {
870 panic!("Unexpected digit {:x} out of base range", digit);
871 }
872
873 value = match value.checked_mul(base) {
874 Some(value) => value,
875 None => return None,
876 };
877 value = match value.checked_add(digit) {
878 Some(value) => value,
879 None => return None,
880 };
881 s = &s[1..];
882 }
883
884 Some(value)
885 }
886
887 pub fn parse_lit_float(input: &str) -> f64 {
888 // Rust's floating point literals are very similar to the ones parsed by
889 // the standard library, except that rust's literals can contain
890 // ignorable underscores. Let's remove those underscores.
891 let mut bytes = input.to_owned().into_bytes();
892 let mut write = 0;
893 for read in 0..bytes.len() {
894 if bytes[read] == b'_' {
895 continue; // Don't increase write
David Tolnay76ebcdd2018-01-05 17:07:26 -0800896 }
897 if write != read {
David Tolnay360efd22018-01-04 23:35:26 -0800898 let x = bytes[read];
899 bytes[write] = x;
900 }
901 write += 1;
902 }
903 bytes.truncate(write);
904 let input = String::from_utf8(bytes).unwrap();
David Tolnay76ebcdd2018-01-05 17:07:26 -0800905 let end = input.find('f').unwrap_or_else(|| input.len());
David Tolnay360efd22018-01-04 23:35:26 -0800906 input[..end].parse().unwrap()
907 }
908
909 pub fn to_literal(s: &str) -> Literal {
910 let stream = s.parse::<TokenStream>().unwrap();
911 match stream.into_iter().next().unwrap().kind {
912 TokenNode::Literal(l) => l,
913 _ => unreachable!(),
David Tolnayf17fd2f2016-10-07 23:38:08 -0700914 }
915 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700916}