blob: 56a354d22ff152b48d6cc736c64df3d757546135 [file] [log] [blame]
David Tolnay55535012018-01-05 16:39:23 -08001// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
David Tolnay7d1d1282018-01-06 16:10:51 -08009use proc_macro2::{Literal, Span, TokenNode};
David Tolnay360efd22018-01-04 23:35:26 -080010use std::str;
11
David Tolnay7d1d1282018-01-06 16:10:51 -080012#[cfg(feature = "printing")]
13use proc_macro2::{Term, TokenTree};
14
David Tolnay360efd22018-01-04 23:35:26 -080015#[cfg(feature = "extra-traits")]
Alex Crichtonccbb45d2017-05-23 10:58:24 -070016use std::hash::{Hash, Hasher};
17
David Tolnay360efd22018-01-04 23:35:26 -080018ast_enum_of_structs! {
David Tolnayabf5c2e2018-01-06 23:30:04 -080019 /// A Rust literal such as a string or integer or boolean.
David Tolnay614a0142018-01-07 10:25:43 -080020 ///
21 /// # Syntax tree enum
22 ///
23 /// This type is a [syntax tree enum].
24 ///
25 /// [syntax tree enum]: enum.Expr.html#syntax-tree-enums
David Tolnay360efd22018-01-04 23:35:26 -080026 pub enum Lit {
David Tolnayabf5c2e2018-01-06 23:30:04 -080027 /// A UTF-8 string literal: `"foo"`.
David Tolnay360efd22018-01-04 23:35:26 -080028 pub Str(LitStr #manual_extra_traits {
29 token: Literal,
30 pub span: Span,
31 }),
Alex Crichtonccbb45d2017-05-23 10:58:24 -070032
David Tolnayabf5c2e2018-01-06 23:30:04 -080033 /// A byte string literal: `b"foo"`.
David Tolnay360efd22018-01-04 23:35:26 -080034 pub ByteStr(LitByteStr #manual_extra_traits {
35 token: Literal,
36 pub span: Span,
37 }),
38
David Tolnayabf5c2e2018-01-06 23:30:04 -080039 /// A byte literal: `b'f'`.
David Tolnay360efd22018-01-04 23:35:26 -080040 pub Byte(LitByte #manual_extra_traits {
41 token: Literal,
42 pub span: Span,
43 }),
44
David Tolnayabf5c2e2018-01-06 23:30:04 -080045 /// A character literal: `'a'`.
David Tolnay360efd22018-01-04 23:35:26 -080046 pub Char(LitChar #manual_extra_traits {
47 token: Literal,
48 pub span: Span,
49 }),
50
David Tolnayabf5c2e2018-01-06 23:30:04 -080051 /// An integer literal: `1` or `1u16`.
David Tolnay360efd22018-01-04 23:35:26 -080052 ///
53 /// Holds up to 64 bits of data. Use `LitVerbatim` for any larger
54 /// integer literal.
55 pub Int(LitInt #manual_extra_traits {
56 token: Literal,
57 pub span: Span,
58 }),
59
David Tolnayabf5c2e2018-01-06 23:30:04 -080060 /// A floating point literal: `1f64` or `1.0e10f64`.
David Tolnay360efd22018-01-04 23:35:26 -080061 ///
62 /// Must be finite. May not be infinte or NaN.
63 pub Float(LitFloat #manual_extra_traits {
64 token: Literal,
65 pub span: Span,
66 }),
67
David Tolnayabf5c2e2018-01-06 23:30:04 -080068 /// A boolean literal: `true` or `false`.
David Tolnay360efd22018-01-04 23:35:26 -080069 pub Bool(LitBool #manual_extra_traits {
70 pub value: bool,
71 pub span: Span,
72 }),
73
David Tolnayabf5c2e2018-01-06 23:30:04 -080074 /// A raw token literal not interpreted by Syn, possibly because it
75 /// represents an integer larger than 64 bits.
David Tolnay360efd22018-01-04 23:35:26 -080076 pub Verbatim(LitVerbatim #manual_extra_traits {
77 pub token: Literal,
78 pub span: Span,
79 }),
80 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -070081}
82
David Tolnay360efd22018-01-04 23:35:26 -080083impl LitStr {
84 pub fn new(value: &str, span: Span) -> Self {
85 LitStr {
86 token: Literal::string(value),
87 span: span,
88 }
89 }
90
91 pub fn value(&self) -> String {
92 value::parse_lit_str(&self.token.to_string())
93 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -070094}
95
David Tolnay360efd22018-01-04 23:35:26 -080096impl LitByteStr {
97 pub fn new(value: &[u8], span: Span) -> Self {
98 LitByteStr {
99 token: Literal::byte_string(value),
100 span: span,
101 }
102 }
103
104 pub fn value(&self) -> Vec<u8> {
105 value::parse_lit_byte_str(&self.token.to_string())
106 }
107}
108
109impl LitByte {
110 pub fn new(value: u8, span: Span) -> Self {
111 LitByte {
112 token: Literal::byte_char(value),
113 span: span,
114 }
115 }
116
117 pub fn value(&self) -> u8 {
118 value::parse_lit_byte(&self.token.to_string())
119 }
120}
121
122impl LitChar {
123 pub fn new(value: char, span: Span) -> Self {
124 LitChar {
125 token: Literal::character(value),
126 span: span,
127 }
128 }
129
130 pub fn value(&self) -> char {
131 value::parse_lit_char(&self.token.to_string())
132 }
133}
134
135impl LitInt {
136 pub fn new(value: u64, suffix: IntSuffix, span: Span) -> Self {
137 LitInt {
138 token: match suffix {
139 IntSuffix::Isize => Literal::isize(value as isize),
140 IntSuffix::I8 => Literal::i8(value as i8),
141 IntSuffix::I16 => Literal::i16(value as i16),
142 IntSuffix::I32 => Literal::i32(value as i32),
143 IntSuffix::I64 => Literal::i64(value as i64),
144 IntSuffix::I128 => value::to_literal(&format!("{}i128", value)),
145 IntSuffix::Usize => Literal::usize(value as usize),
146 IntSuffix::U8 => Literal::u8(value as u8),
147 IntSuffix::U16 => Literal::u16(value as u16),
148 IntSuffix::U32 => Literal::u32(value as u32),
149 IntSuffix::U64 => Literal::u64(value),
150 IntSuffix::U128 => value::to_literal(&format!("{}u128", value)),
151 IntSuffix::None => Literal::integer(value as i64),
152 },
153 span: span,
154 }
155 }
156
157 pub fn value(&self) -> u64 {
158 value::parse_lit_int(&self.token.to_string()).unwrap()
159 }
160
161 pub fn suffix(&self) -> IntSuffix {
162 let value = self.token.to_string();
163 for (s, suffix) in vec![
164 ("i8", IntSuffix::I8),
165 ("i16", IntSuffix::I16),
166 ("i32", IntSuffix::I32),
167 ("i64", IntSuffix::I64),
168 ("i128", IntSuffix::I128),
169 ("isize", IntSuffix::Isize),
170 ("u8", IntSuffix::U8),
171 ("u16", IntSuffix::U16),
172 ("u32", IntSuffix::U32),
173 ("u64", IntSuffix::U64),
174 ("u128", IntSuffix::U128),
175 ("usize", IntSuffix::Usize),
176 ] {
177 if value.ends_with(s) {
178 return suffix;
179 }
180 }
181 IntSuffix::None
182 }
183}
184
185impl LitFloat {
186 pub fn new(value: f64, suffix: FloatSuffix, span: Span) -> Self {
187 LitFloat {
188 token: match suffix {
189 FloatSuffix::F32 => Literal::f32(value as f32),
190 FloatSuffix::F64 => Literal::f64(value),
191 FloatSuffix::None => Literal::float(value),
192 },
193 span: span,
194 }
195 }
196
197 pub fn value(&self) -> f64 {
198 value::parse_lit_float(&self.token.to_string())
199 }
200
201 pub fn suffix(&self) -> FloatSuffix {
202 let value = self.token.to_string();
David Tolnay61037c62018-01-05 16:21:03 -0800203 for (s, suffix) in vec![("f32", FloatSuffix::F32), ("f64", FloatSuffix::F64)] {
David Tolnay360efd22018-01-04 23:35:26 -0800204 if value.ends_with(s) {
205 return suffix;
206 }
207 }
208 FloatSuffix::None
209 }
210}
211
212macro_rules! lit_extra_traits {
213 ($ty:ident, $field:ident) => {
214 #[cfg(feature = "extra-traits")]
215 impl Eq for $ty {}
216
217 #[cfg(feature = "extra-traits")]
218 impl PartialEq for $ty {
219 fn eq(&self, other: &Self) -> bool {
220 self.$field.to_string() == other.$field.to_string()
221 }
222 }
223
224 #[cfg(feature = "extra-traits")]
225 impl Hash for $ty {
226 fn hash<H>(&self, state: &mut H)
227 where
228 H: Hasher,
229 {
230 self.$field.to_string().hash(state);
231 }
David Tolnay9c76bcb2017-12-26 23:14:59 -0500232 }
Alex Crichton62a0a592017-05-22 13:58:53 -0700233 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700234}
235
David Tolnay360efd22018-01-04 23:35:26 -0800236lit_extra_traits!(LitStr, token);
237lit_extra_traits!(LitByteStr, token);
238lit_extra_traits!(LitByte, token);
239lit_extra_traits!(LitChar, token);
240lit_extra_traits!(LitInt, token);
241lit_extra_traits!(LitFloat, token);
242lit_extra_traits!(LitBool, value);
243lit_extra_traits!(LitVerbatim, token);
244
245ast_enum! {
David Tolnay05658502018-01-07 09:56:37 -0800246 /// The style of a string literal, either plain quoted or a raw string like
David Tolnayabf5c2e2018-01-06 23:30:04 -0800247 /// `r##"data"##`.
David Tolnay360efd22018-01-04 23:35:26 -0800248 pub enum StrStyle #no_visit {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800249 /// An ordinary string like `"data"`.
David Tolnay360efd22018-01-04 23:35:26 -0800250 Cooked,
David Tolnayabf5c2e2018-01-06 23:30:04 -0800251 /// A raw string like `r##"data"##`.
David Tolnay360efd22018-01-04 23:35:26 -0800252 ///
253 /// The unsigned integer is the number of `#` symbols used.
254 Raw(usize),
Alex Crichton62a0a592017-05-22 13:58:53 -0700255 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700256}
257
David Tolnay360efd22018-01-04 23:35:26 -0800258ast_enum! {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800259 /// The suffix on an integer literal if any, like the `u8` in `127u8`.
David Tolnay360efd22018-01-04 23:35:26 -0800260 pub enum IntSuffix #no_visit {
261 I8,
262 I16,
263 I32,
264 I64,
265 I128,
266 Isize,
267 U8,
268 U16,
269 U32,
270 U64,
271 U128,
272 Usize,
273 None,
Pascal Hertleif36342c52016-10-19 10:31:42 +0200274 }
275}
276
David Tolnay360efd22018-01-04 23:35:26 -0800277ast_enum! {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800278 /// The suffix on a floating point literal if any, like the `f32` in
279 /// `1.0f32`.
David Tolnay360efd22018-01-04 23:35:26 -0800280 pub enum FloatSuffix #no_visit {
281 F32,
282 F64,
283 None,
Alex Crichton2e0229c2017-05-23 09:34:50 -0700284 }
David Tolnay5fe14fc2017-01-27 16:22:08 -0800285}
286
287#[cfg(feature = "parsing")]
David Tolnayf4bbbd92016-09-23 14:41:55 -0700288pub mod parsing {
289 use super::*;
David Tolnayc5ab8c62017-12-26 16:43:39 -0500290 use synom::Synom;
David Tolnaydfc886b2018-01-06 08:03:09 -0800291 use buffer::Cursor;
David Tolnay203557a2017-12-27 23:59:33 -0500292 use parse_error;
293 use synom::PResult;
David Tolnayf4bbbd92016-09-23 14:41:55 -0700294
Alex Crichton954046c2017-05-30 21:49:42 -0700295 impl Synom for Lit {
Michael Layzell92639a52017-06-01 00:07:44 -0400296 fn parse(input: Cursor) -> PResult<Self> {
Michael Layzell589a8f42017-06-02 19:47:01 -0400297 match input.literal() {
David Tolnay360efd22018-01-04 23:35:26 -0800298 Some((span, lit, rest)) => Ok((Lit::new(lit, span), rest)),
David Tolnay73c98de2017-12-31 15:56:56 -0500299 _ => match input.term() {
David Tolnay360efd22018-01-04 23:35:26 -0800300 Some((span, term, rest)) => Ok((
301 Lit::Bool(LitBool {
302 value: if term.as_str() == "true" {
303 true
304 } else if term.as_str() == "false" {
305 false
306 } else {
307 return parse_error();
David Tolnay51382052017-12-27 13:46:21 -0500308 },
David Tolnay360efd22018-01-04 23:35:26 -0800309 span: span,
310 }),
311 rest,
312 )),
Michael Layzell589a8f42017-06-02 19:47:01 -0400313 _ => parse_error(),
David Tolnay51382052017-12-27 13:46:21 -0500314 },
Michael Layzell589a8f42017-06-02 19:47:01 -0400315 }
David Tolnayfa0edf22016-09-23 22:58:24 -0700316 }
Sergio Benitez5680d6a2017-12-29 11:20:29 -0800317
318 fn description() -> Option<&'static str> {
319 Some("literal")
320 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700321 }
David Tolnay360efd22018-01-04 23:35:26 -0800322
323 impl_synom!(LitStr "string literal" switch!(
324 syn!(Lit),
325 Lit::Str(lit) => value!(lit)
326 |
327 _ => reject!()
328 ));
329
330 impl_synom!(LitByteStr "byte string literal" switch!(
331 syn!(Lit),
332 Lit::ByteStr(lit) => value!(lit)
333 |
334 _ => reject!()
335 ));
336
337 impl_synom!(LitByte "byte literal" switch!(
338 syn!(Lit),
339 Lit::Byte(lit) => value!(lit)
340 |
341 _ => reject!()
342 ));
343
344 impl_synom!(LitChar "character literal" switch!(
345 syn!(Lit),
346 Lit::Char(lit) => value!(lit)
347 |
348 _ => reject!()
349 ));
350
351 impl_synom!(LitInt "integer literal" switch!(
352 syn!(Lit),
353 Lit::Int(lit) => value!(lit)
354 |
355 _ => reject!()
356 ));
357
358 impl_synom!(LitFloat "floating point literal" switch!(
359 syn!(Lit),
360 Lit::Float(lit) => value!(lit)
361 |
362 _ => reject!()
363 ));
364
365 impl_synom!(LitBool "boolean literal" switch!(
366 syn!(Lit),
367 Lit::Bool(lit) => value!(lit)
368 |
369 _ => reject!()
370 ));
David Tolnayf4bbbd92016-09-23 14:41:55 -0700371}
372
373#[cfg(feature = "printing")]
374mod printing {
375 use super::*;
David Tolnay51382052017-12-27 13:46:21 -0500376 use quote::{ToTokens, Tokens};
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700377
David Tolnay360efd22018-01-04 23:35:26 -0800378 impl ToTokens for LitStr {
David Tolnayf4bbbd92016-09-23 14:41:55 -0700379 fn to_tokens(&self, tokens: &mut Tokens) {
David Tolnay360efd22018-01-04 23:35:26 -0800380 tokens.append(TokenTree {
381 span: self.span,
382 kind: TokenNode::Literal(self.token.clone()),
383 });
384 }
385 }
386
387 impl ToTokens for LitByteStr {
388 fn to_tokens(&self, tokens: &mut Tokens) {
389 tokens.append(TokenTree {
390 span: self.span,
391 kind: TokenNode::Literal(self.token.clone()),
392 });
393 }
394 }
395
396 impl ToTokens for LitByte {
397 fn to_tokens(&self, tokens: &mut Tokens) {
398 tokens.append(TokenTree {
399 span: self.span,
400 kind: TokenNode::Literal(self.token.clone()),
401 });
402 }
403 }
404
405 impl ToTokens for LitChar {
406 fn to_tokens(&self, tokens: &mut Tokens) {
407 tokens.append(TokenTree {
408 span: self.span,
409 kind: TokenNode::Literal(self.token.clone()),
410 });
411 }
412 }
413
414 impl ToTokens for LitInt {
415 fn to_tokens(&self, tokens: &mut Tokens) {
416 tokens.append(TokenTree {
417 span: self.span,
418 kind: TokenNode::Literal(self.token.clone()),
419 });
420 }
421 }
422
423 impl ToTokens for LitFloat {
424 fn to_tokens(&self, tokens: &mut Tokens) {
425 tokens.append(TokenTree {
426 span: self.span,
427 kind: TokenNode::Literal(self.token.clone()),
428 });
429 }
430 }
431
432 impl ToTokens for LitBool {
433 fn to_tokens(&self, tokens: &mut Tokens) {
434 tokens.append(TokenTree {
435 span: self.span,
David Tolnay61037c62018-01-05 16:21:03 -0800436 kind: TokenNode::Term(Term::intern(if self.value { "true" } else { "false" })),
David Tolnay360efd22018-01-04 23:35:26 -0800437 });
438 }
439 }
440
441 impl ToTokens for LitVerbatim {
442 fn to_tokens(&self, tokens: &mut Tokens) {
443 tokens.append(TokenTree {
444 span: self.span,
445 kind: TokenNode::Literal(self.token.clone()),
446 });
447 }
448 }
449}
450
451mod value {
452 use super::*;
453 use std::char;
454 use std::ops::{Index, RangeFrom};
455 use proc_macro2::TokenStream;
456
David Tolnay7d1d1282018-01-06 16:10:51 -0800457 impl Lit {
458 pub fn new(token: Literal, span: Span) -> Self {
459 let value = token.to_string();
460
461 match value::byte(&value, 0) {
462 b'"' | b'r' => {
463 return Lit::Str(LitStr {
464 token: token,
465 span: span,
466 })
467 }
468 b'b' => match value::byte(&value, 1) {
469 b'"' | b'r' => {
470 return Lit::ByteStr(LitByteStr {
471 token: token,
472 span: span,
473 })
474 }
475 b'\'' => {
476 return Lit::Byte(LitByte {
477 token: token,
478 span: span,
479 })
480 }
481 _ => {}
482 },
483 b'\'' => {
484 return Lit::Char(LitChar {
485 token: token,
486 span: span,
487 })
488 }
489 b'0'...b'9' => if number_is_int(&value) {
490 return Lit::Int(LitInt {
491 token: token,
492 span: span,
493 });
494 } else if number_is_float(&value) {
495 return Lit::Float(LitFloat {
496 token: token,
497 span: span,
498 });
499 } else {
500 // number overflow
501 return Lit::Verbatim(LitVerbatim {
502 token: token,
503 span: span,
504 });
505 },
506 _ => if value == "true" || value == "false" {
507 return Lit::Bool(LitBool {
508 value: value == "true",
509 span: span,
510 });
511 },
512 }
513
514 panic!("Unrecognized literal: {}", value);
515 }
516 }
517
518 fn number_is_int(value: &str) -> bool {
519 if number_is_float(value) {
520 false
521 } else {
522 value::parse_lit_int(value).is_some()
523 }
524 }
525
526 fn number_is_float(value: &str) -> bool {
527 if value.contains('.') {
528 true
529 } else if value.starts_with("0x") || value.ends_with("size") {
530 false
531 } else {
532 value.contains('e') || value.contains('E')
533 }
534 }
535
David Tolnay360efd22018-01-04 23:35:26 -0800536 /// Get the byte at offset idx, or a default of `b'\0'` if we're looking
537 /// past the end of the input buffer.
538 pub fn byte<S: AsRef<[u8]> + ?Sized>(s: &S, idx: usize) -> u8 {
539 let s = s.as_ref();
540 if idx < s.len() {
541 s[idx]
542 } else {
543 0
544 }
545 }
546
547 fn next_chr(s: &str) -> char {
548 s.chars().next().unwrap_or('\0')
549 }
550
551 pub fn parse_lit_str(s: &str) -> String {
552 match byte(s, 0) {
553 b'"' => parse_lit_str_cooked(s),
554 b'r' => parse_lit_str_raw(s),
555 _ => unreachable!(),
556 }
557 }
558
David Tolnay76ebcdd2018-01-05 17:07:26 -0800559 // Clippy false positive
560 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
561 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800562 fn parse_lit_str_cooked(mut s: &str) -> String {
563 assert_eq!(byte(s, 0), b'"');
564 s = &s[1..];
565
566 let mut out = String::new();
567 'outer: loop {
568 let ch = match byte(s, 0) {
569 b'"' => break,
570 b'\\' => {
571 let b = byte(s, 1);
572 s = &s[2..];
573 match b {
574 b'x' => {
575 let (byte, rest) = backslash_x(s);
576 s = rest;
577 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800578 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800579 }
580 b'u' => {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800581 let (chr, rest) = backslash_u(s);
David Tolnay360efd22018-01-04 23:35:26 -0800582 s = rest;
583 chr
584 }
585 b'n' => '\n',
586 b'r' => '\r',
587 b't' => '\t',
588 b'\\' => '\\',
589 b'0' => '\0',
590 b'\'' => '\'',
591 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800592 b'\r' | b'\n' => loop {
593 let ch = next_chr(s);
594 if ch.is_whitespace() {
595 s = &s[ch.len_utf8()..];
596 } else {
597 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800598 }
David Tolnay61037c62018-01-05 16:21:03 -0800599 },
600 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800601 }
602 }
603 b'\r' => {
604 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
605 s = &s[2..];
606 '\n'
607 }
608 _ => {
609 let ch = next_chr(s);
610 s = &s[ch.len_utf8()..];
611 ch
612 }
613 };
614 out.push(ch);
615 }
616
617 assert_eq!(s, "\"");
618 out
619 }
620
621 fn parse_lit_str_raw(mut s: &str) -> String {
622 assert_eq!(byte(s, 0), b'r');
623 s = &s[1..];
624
625 let mut pounds = 0;
626 while byte(s, pounds) == b'#' {
627 pounds += 1;
628 }
629 assert_eq!(byte(s, pounds), b'"');
630 assert_eq!(byte(s, s.len() - pounds - 1), b'"');
631 for end in s[s.len() - pounds..].bytes() {
632 assert_eq!(end, b'#');
633 }
634
635 s[pounds + 1..s.len() - pounds - 1].to_owned()
636 }
637
638 pub fn parse_lit_byte_str(s: &str) -> Vec<u8> {
639 assert_eq!(byte(s, 0), b'b');
640 match byte(s, 1) {
641 b'"' => parse_lit_byte_str_cooked(s),
642 b'r' => parse_lit_byte_str_raw(s),
643 _ => unreachable!(),
644 }
645 }
646
David Tolnay76ebcdd2018-01-05 17:07:26 -0800647 // Clippy false positive
648 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
649 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800650 fn parse_lit_byte_str_cooked(mut s: &str) -> Vec<u8> {
651 assert_eq!(byte(s, 0), b'b');
652 assert_eq!(byte(s, 1), b'"');
653 s = &s[2..];
654
655 // We're going to want to have slices which don't respect codepoint boundaries.
656 let mut s = s.as_bytes();
657
658 let mut out = Vec::new();
659 'outer: loop {
660 let byte = match byte(s, 0) {
661 b'"' => break,
662 b'\\' => {
663 let b = byte(s, 1);
664 s = &s[2..];
665 match b {
666 b'x' => {
667 let (b, rest) = backslash_x(s);
668 s = rest;
669 b
670 }
671 b'n' => b'\n',
672 b'r' => b'\r',
673 b't' => b'\t',
674 b'\\' => b'\\',
675 b'0' => b'\0',
676 b'\'' => b'\'',
677 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800678 b'\r' | b'\n' => loop {
679 let byte = byte(s, 0);
David Tolnay76ebcdd2018-01-05 17:07:26 -0800680 let ch = char::from_u32(u32::from(byte)).unwrap();
David Tolnay61037c62018-01-05 16:21:03 -0800681 if ch.is_whitespace() {
682 s = &s[1..];
683 } else {
684 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800685 }
David Tolnay61037c62018-01-05 16:21:03 -0800686 },
687 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800688 }
689 }
690 b'\r' => {
691 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
692 s = &s[2..];
693 b'\n'
694 }
695 b => {
696 s = &s[1..];
697 b
698 }
699 };
700 out.push(byte);
701 }
702
703 assert_eq!(s, b"\"");
704 out
705 }
706
707 fn parse_lit_byte_str_raw(s: &str) -> Vec<u8> {
708 assert_eq!(byte(s, 0), b'b');
709 parse_lit_str_raw(&s[1..]).into_bytes()
710 }
711
712 pub fn parse_lit_byte(s: &str) -> u8 {
713 assert_eq!(byte(s, 0), b'b');
714 assert_eq!(byte(s, 1), b'\'');
715
716 // We're going to want to have slices which don't respect codepoint boundaries.
717 let mut s = s[2..].as_bytes();
718
719 let b = match byte(s, 0) {
720 b'\\' => {
721 let b = byte(s, 1);
722 s = &s[2..];
723 match b {
724 b'x' => {
725 let (b, rest) = backslash_x(s);
726 s = rest;
727 b
728 }
729 b'n' => b'\n',
730 b'r' => b'\r',
731 b't' => b'\t',
732 b'\\' => b'\\',
733 b'0' => b'\0',
734 b'\'' => b'\'',
735 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800736 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800737 }
738 }
739 b => {
740 s = &s[1..];
741 b
742 }
743 };
744
745 assert_eq!(byte(s, 0), b'\'');
746 b
747 }
748
749 pub fn parse_lit_char(mut s: &str) -> char {
750 assert_eq!(byte(s, 0), b'\'');
751 s = &s[1..];
752
753 let ch = match byte(s, 0) {
754 b'\\' => {
755 let b = byte(s, 1);
756 s = &s[2..];
757 match b {
758 b'x' => {
759 let (byte, rest) = backslash_x(s);
760 s = rest;
761 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800762 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800763 }
764 b'u' => {
765 let (chr, rest) = backslash_u(s);
766 s = rest;
767 chr
768 }
769 b'n' => '\n',
770 b'r' => '\r',
771 b't' => '\t',
772 b'\\' => '\\',
773 b'0' => '\0',
774 b'\'' => '\'',
775 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800776 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800777 }
778 }
779 _ => {
780 let ch = next_chr(s);
781 s = &s[ch.len_utf8()..];
782 ch
783 }
784 };
785 assert_eq!(s, "\'", "Expected end of char literal");
786 ch
787 }
788
789 fn backslash_x<S>(s: &S) -> (u8, &S)
David Tolnay61037c62018-01-05 16:21:03 -0800790 where
791 S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized,
David Tolnay360efd22018-01-04 23:35:26 -0800792 {
793 let mut ch = 0;
794 let b0 = byte(s, 0);
795 let b1 = byte(s, 1);
796 ch += 0x10 * match b0 {
797 b'0'...b'9' => b0 - b'0',
798 b'a'...b'f' => 10 + (b0 - b'a'),
799 b'A'...b'F' => 10 + (b0 - b'A'),
800 _ => panic!("unexpected non-hex character after \\x"),
801 };
David Tolnay76ebcdd2018-01-05 17:07:26 -0800802 ch += match b1 {
David Tolnay360efd22018-01-04 23:35:26 -0800803 b'0'...b'9' => b1 - b'0',
804 b'a'...b'f' => 10 + (b1 - b'a'),
805 b'A'...b'F' => 10 + (b1 - b'A'),
806 _ => panic!("unexpected non-hex character after \\x"),
807 };
808 (ch, &s[2..])
809 }
810
811 fn backslash_u(mut s: &str) -> (char, &str) {
812 if byte(s, 0) != b'{' {
813 panic!("expected {{ after \\u");
814 }
815 s = &s[1..];
816
817 let mut ch = 0;
818 for _ in 0..6 {
819 let b = byte(s, 0);
820 match b {
821 b'0'...b'9' => {
822 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800823 ch += u32::from(b - b'0');
David Tolnay360efd22018-01-04 23:35:26 -0800824 s = &s[1..];
825 }
826 b'a'...b'f' => {
827 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800828 ch += u32::from(10 + b - b'a');
David Tolnay360efd22018-01-04 23:35:26 -0800829 s = &s[1..];
830 }
831 b'A'...b'F' => {
832 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800833 ch += u32::from(10 + b - b'A');
David Tolnay360efd22018-01-04 23:35:26 -0800834 s = &s[1..];
835 }
836 b'}' => break,
837 _ => panic!("unexpected non-hex character after \\u"),
838 }
839 }
840 assert!(byte(s, 0) == b'}');
841 s = &s[1..];
842
843 if let Some(ch) = char::from_u32(ch) {
844 (ch, s)
845 } else {
846 panic!("character code {:x} is not a valid unicode character", ch);
847 }
848 }
849
850 pub fn parse_lit_int(mut s: &str) -> Option<u64> {
851 let base = match (byte(s, 0), byte(s, 1)) {
852 (b'0', b'x') => {
853 s = &s[2..];
854 16
855 }
856 (b'0', b'o') => {
857 s = &s[2..];
858 8
859 }
860 (b'0', b'b') => {
861 s = &s[2..];
862 2
863 }
864 (b'0'...b'9', _) => 10,
865 _ => unreachable!(),
866 };
867
868 let mut value = 0u64;
869 loop {
870 let b = byte(s, 0);
871 let digit = match b {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800872 b'0'...b'9' => u64::from(b - b'0'),
873 b'a'...b'f' if base > 10 => 10 + u64::from(b - b'a'),
874 b'A'...b'F' if base > 10 => 10 + u64::from(b - b'A'),
David Tolnay360efd22018-01-04 23:35:26 -0800875 b'_' => {
876 s = &s[1..];
877 continue;
878 }
879 // NOTE: Looking at a floating point literal, we don't want to
880 // consider these integers.
881 b'.' if base == 10 => return None,
882 b'e' | b'E' if base == 10 => return None,
883 _ => break,
884 };
885
886 if digit >= base {
887 panic!("Unexpected digit {:x} out of base range", digit);
888 }
889
890 value = match value.checked_mul(base) {
891 Some(value) => value,
892 None => return None,
893 };
894 value = match value.checked_add(digit) {
895 Some(value) => value,
896 None => return None,
897 };
898 s = &s[1..];
899 }
900
901 Some(value)
902 }
903
904 pub fn parse_lit_float(input: &str) -> f64 {
905 // Rust's floating point literals are very similar to the ones parsed by
906 // the standard library, except that rust's literals can contain
907 // ignorable underscores. Let's remove those underscores.
908 let mut bytes = input.to_owned().into_bytes();
909 let mut write = 0;
910 for read in 0..bytes.len() {
911 if bytes[read] == b'_' {
912 continue; // Don't increase write
David Tolnay76ebcdd2018-01-05 17:07:26 -0800913 }
914 if write != read {
David Tolnay360efd22018-01-04 23:35:26 -0800915 let x = bytes[read];
916 bytes[write] = x;
917 }
918 write += 1;
919 }
920 bytes.truncate(write);
921 let input = String::from_utf8(bytes).unwrap();
David Tolnay76ebcdd2018-01-05 17:07:26 -0800922 let end = input.find('f').unwrap_or_else(|| input.len());
David Tolnay360efd22018-01-04 23:35:26 -0800923 input[..end].parse().unwrap()
924 }
925
926 pub fn to_literal(s: &str) -> Literal {
927 let stream = s.parse::<TokenStream>().unwrap();
928 match stream.into_iter().next().unwrap().kind {
929 TokenNode::Literal(l) => l,
930 _ => unreachable!(),
David Tolnayf17fd2f2016-10-07 23:38:08 -0700931 }
932 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700933}