blob: 584ccb993149a509eff12d04d1034b75b5d26b84 [file] [log] [blame]
David Tolnay55535012018-01-05 16:39:23 -08001// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
David Tolnay7d1d1282018-01-06 16:10:51 -08009use proc_macro2::{Literal, Span, TokenNode};
David Tolnay360efd22018-01-04 23:35:26 -080010use std::str;
11
David Tolnay7d1d1282018-01-06 16:10:51 -080012#[cfg(feature = "printing")]
13use proc_macro2::{Term, TokenTree};
14
David Tolnay360efd22018-01-04 23:35:26 -080015#[cfg(feature = "extra-traits")]
Alex Crichtonccbb45d2017-05-23 10:58:24 -070016use std::hash::{Hash, Hasher};
17
David Tolnay360efd22018-01-04 23:35:26 -080018ast_enum_of_structs! {
David Tolnayabf5c2e2018-01-06 23:30:04 -080019 /// A Rust literal such as a string or integer or boolean.
David Tolnay614a0142018-01-07 10:25:43 -080020 ///
David Tolnay461d98e2018-01-07 11:07:19 -080021 /// *This type is available if Syn is built with the `"derive"` or `"full"`
22 /// feature.*
23 ///
David Tolnay614a0142018-01-07 10:25:43 -080024 /// # Syntax tree enum
25 ///
26 /// This type is a [syntax tree enum].
27 ///
28 /// [syntax tree enum]: enum.Expr.html#syntax-tree-enums
David Tolnay360efd22018-01-04 23:35:26 -080029 pub enum Lit {
David Tolnayabf5c2e2018-01-06 23:30:04 -080030 /// A UTF-8 string literal: `"foo"`.
David Tolnay461d98e2018-01-07 11:07:19 -080031 ///
32 /// *This type is available if Syn is built with the `"derive"` or
33 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080034 pub Str(LitStr #manual_extra_traits {
35 token: Literal,
36 pub span: Span,
37 }),
Alex Crichtonccbb45d2017-05-23 10:58:24 -070038
David Tolnayabf5c2e2018-01-06 23:30:04 -080039 /// A byte string literal: `b"foo"`.
David Tolnay461d98e2018-01-07 11:07:19 -080040 ///
41 /// *This type is available if Syn is built with the `"derive"` or
42 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080043 pub ByteStr(LitByteStr #manual_extra_traits {
44 token: Literal,
45 pub span: Span,
46 }),
47
David Tolnayabf5c2e2018-01-06 23:30:04 -080048 /// A byte literal: `b'f'`.
David Tolnay461d98e2018-01-07 11:07:19 -080049 ///
50 /// *This type is available if Syn is built with the `"derive"` or
51 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080052 pub Byte(LitByte #manual_extra_traits {
53 token: Literal,
54 pub span: Span,
55 }),
56
David Tolnayabf5c2e2018-01-06 23:30:04 -080057 /// A character literal: `'a'`.
David Tolnay461d98e2018-01-07 11:07:19 -080058 ///
59 /// *This type is available if Syn is built with the `"derive"` or
60 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080061 pub Char(LitChar #manual_extra_traits {
62 token: Literal,
63 pub span: Span,
64 }),
65
David Tolnayabf5c2e2018-01-06 23:30:04 -080066 /// An integer literal: `1` or `1u16`.
David Tolnay360efd22018-01-04 23:35:26 -080067 ///
68 /// Holds up to 64 bits of data. Use `LitVerbatim` for any larger
69 /// integer literal.
David Tolnay461d98e2018-01-07 11:07:19 -080070 ///
71 /// *This type is available if Syn is built with the `"derive"` or
72 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080073 pub Int(LitInt #manual_extra_traits {
74 token: Literal,
75 pub span: Span,
76 }),
77
David Tolnayabf5c2e2018-01-06 23:30:04 -080078 /// A floating point literal: `1f64` or `1.0e10f64`.
David Tolnay360efd22018-01-04 23:35:26 -080079 ///
80 /// Must be finite. May not be infinte or NaN.
David Tolnay461d98e2018-01-07 11:07:19 -080081 ///
82 /// *This type is available if Syn is built with the `"derive"` or
83 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080084 pub Float(LitFloat #manual_extra_traits {
85 token: Literal,
86 pub span: Span,
87 }),
88
David Tolnayabf5c2e2018-01-06 23:30:04 -080089 /// A boolean literal: `true` or `false`.
David Tolnay461d98e2018-01-07 11:07:19 -080090 ///
91 /// *This type is available if Syn is built with the `"derive"` or
92 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080093 pub Bool(LitBool #manual_extra_traits {
94 pub value: bool,
95 pub span: Span,
96 }),
97
David Tolnayabf5c2e2018-01-06 23:30:04 -080098 /// A raw token literal not interpreted by Syn, possibly because it
99 /// represents an integer larger than 64 bits.
David Tolnay461d98e2018-01-07 11:07:19 -0800100 ///
101 /// *This type is available if Syn is built with the `"derive"` or
102 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800103 pub Verbatim(LitVerbatim #manual_extra_traits {
104 pub token: Literal,
105 pub span: Span,
106 }),
107 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700108}
109
David Tolnay360efd22018-01-04 23:35:26 -0800110impl LitStr {
111 pub fn new(value: &str, span: Span) -> Self {
112 LitStr {
113 token: Literal::string(value),
114 span: span,
115 }
116 }
117
118 pub fn value(&self) -> String {
119 value::parse_lit_str(&self.token.to_string())
120 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700121}
122
David Tolnay360efd22018-01-04 23:35:26 -0800123impl LitByteStr {
124 pub fn new(value: &[u8], span: Span) -> Self {
125 LitByteStr {
126 token: Literal::byte_string(value),
127 span: span,
128 }
129 }
130
131 pub fn value(&self) -> Vec<u8> {
132 value::parse_lit_byte_str(&self.token.to_string())
133 }
134}
135
136impl LitByte {
137 pub fn new(value: u8, span: Span) -> Self {
138 LitByte {
139 token: Literal::byte_char(value),
140 span: span,
141 }
142 }
143
144 pub fn value(&self) -> u8 {
145 value::parse_lit_byte(&self.token.to_string())
146 }
147}
148
149impl LitChar {
150 pub fn new(value: char, span: Span) -> Self {
151 LitChar {
152 token: Literal::character(value),
153 span: span,
154 }
155 }
156
157 pub fn value(&self) -> char {
158 value::parse_lit_char(&self.token.to_string())
159 }
160}
161
162impl LitInt {
163 pub fn new(value: u64, suffix: IntSuffix, span: Span) -> Self {
164 LitInt {
165 token: match suffix {
166 IntSuffix::Isize => Literal::isize(value as isize),
167 IntSuffix::I8 => Literal::i8(value as i8),
168 IntSuffix::I16 => Literal::i16(value as i16),
169 IntSuffix::I32 => Literal::i32(value as i32),
170 IntSuffix::I64 => Literal::i64(value as i64),
171 IntSuffix::I128 => value::to_literal(&format!("{}i128", value)),
172 IntSuffix::Usize => Literal::usize(value as usize),
173 IntSuffix::U8 => Literal::u8(value as u8),
174 IntSuffix::U16 => Literal::u16(value as u16),
175 IntSuffix::U32 => Literal::u32(value as u32),
176 IntSuffix::U64 => Literal::u64(value),
177 IntSuffix::U128 => value::to_literal(&format!("{}u128", value)),
178 IntSuffix::None => Literal::integer(value as i64),
179 },
180 span: span,
181 }
182 }
183
184 pub fn value(&self) -> u64 {
185 value::parse_lit_int(&self.token.to_string()).unwrap()
186 }
187
188 pub fn suffix(&self) -> IntSuffix {
189 let value = self.token.to_string();
190 for (s, suffix) in vec![
191 ("i8", IntSuffix::I8),
192 ("i16", IntSuffix::I16),
193 ("i32", IntSuffix::I32),
194 ("i64", IntSuffix::I64),
195 ("i128", IntSuffix::I128),
196 ("isize", IntSuffix::Isize),
197 ("u8", IntSuffix::U8),
198 ("u16", IntSuffix::U16),
199 ("u32", IntSuffix::U32),
200 ("u64", IntSuffix::U64),
201 ("u128", IntSuffix::U128),
202 ("usize", IntSuffix::Usize),
203 ] {
204 if value.ends_with(s) {
205 return suffix;
206 }
207 }
208 IntSuffix::None
209 }
210}
211
212impl LitFloat {
213 pub fn new(value: f64, suffix: FloatSuffix, span: Span) -> Self {
214 LitFloat {
215 token: match suffix {
216 FloatSuffix::F32 => Literal::f32(value as f32),
217 FloatSuffix::F64 => Literal::f64(value),
218 FloatSuffix::None => Literal::float(value),
219 },
220 span: span,
221 }
222 }
223
224 pub fn value(&self) -> f64 {
225 value::parse_lit_float(&self.token.to_string())
226 }
227
228 pub fn suffix(&self) -> FloatSuffix {
229 let value = self.token.to_string();
David Tolnay61037c62018-01-05 16:21:03 -0800230 for (s, suffix) in vec![("f32", FloatSuffix::F32), ("f64", FloatSuffix::F64)] {
David Tolnay360efd22018-01-04 23:35:26 -0800231 if value.ends_with(s) {
232 return suffix;
233 }
234 }
235 FloatSuffix::None
236 }
237}
238
239macro_rules! lit_extra_traits {
240 ($ty:ident, $field:ident) => {
241 #[cfg(feature = "extra-traits")]
242 impl Eq for $ty {}
243
244 #[cfg(feature = "extra-traits")]
245 impl PartialEq for $ty {
246 fn eq(&self, other: &Self) -> bool {
247 self.$field.to_string() == other.$field.to_string()
248 }
249 }
250
251 #[cfg(feature = "extra-traits")]
252 impl Hash for $ty {
253 fn hash<H>(&self, state: &mut H)
254 where
255 H: Hasher,
256 {
257 self.$field.to_string().hash(state);
258 }
David Tolnay9c76bcb2017-12-26 23:14:59 -0500259 }
Alex Crichton62a0a592017-05-22 13:58:53 -0700260 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700261}
262
David Tolnay360efd22018-01-04 23:35:26 -0800263lit_extra_traits!(LitStr, token);
264lit_extra_traits!(LitByteStr, token);
265lit_extra_traits!(LitByte, token);
266lit_extra_traits!(LitChar, token);
267lit_extra_traits!(LitInt, token);
268lit_extra_traits!(LitFloat, token);
269lit_extra_traits!(LitBool, value);
270lit_extra_traits!(LitVerbatim, token);
271
272ast_enum! {
David Tolnay05658502018-01-07 09:56:37 -0800273 /// The style of a string literal, either plain quoted or a raw string like
David Tolnayabf5c2e2018-01-06 23:30:04 -0800274 /// `r##"data"##`.
David Tolnay461d98e2018-01-07 11:07:19 -0800275 ///
276 /// *This type is available if Syn is built with the `"derive"` or `"full"`
277 /// feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800278 pub enum StrStyle #no_visit {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800279 /// An ordinary string like `"data"`.
David Tolnay360efd22018-01-04 23:35:26 -0800280 Cooked,
David Tolnayabf5c2e2018-01-06 23:30:04 -0800281 /// A raw string like `r##"data"##`.
David Tolnay360efd22018-01-04 23:35:26 -0800282 ///
283 /// The unsigned integer is the number of `#` symbols used.
284 Raw(usize),
Alex Crichton62a0a592017-05-22 13:58:53 -0700285 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700286}
287
David Tolnay360efd22018-01-04 23:35:26 -0800288ast_enum! {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800289 /// The suffix on an integer literal if any, like the `u8` in `127u8`.
David Tolnay461d98e2018-01-07 11:07:19 -0800290 ///
291 /// *This type is available if Syn is built with the `"derive"` or `"full"`
292 /// feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800293 pub enum IntSuffix #no_visit {
294 I8,
295 I16,
296 I32,
297 I64,
298 I128,
299 Isize,
300 U8,
301 U16,
302 U32,
303 U64,
304 U128,
305 Usize,
306 None,
Pascal Hertleif36342c52016-10-19 10:31:42 +0200307 }
308}
309
David Tolnay360efd22018-01-04 23:35:26 -0800310ast_enum! {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800311 /// The suffix on a floating point literal if any, like the `f32` in
312 /// `1.0f32`.
David Tolnay461d98e2018-01-07 11:07:19 -0800313 ///
314 /// *This type is available if Syn is built with the `"derive"` or `"full"`
315 /// feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800316 pub enum FloatSuffix #no_visit {
317 F32,
318 F64,
319 None,
Alex Crichton2e0229c2017-05-23 09:34:50 -0700320 }
David Tolnay5fe14fc2017-01-27 16:22:08 -0800321}
322
323#[cfg(feature = "parsing")]
David Tolnayf4bbbd92016-09-23 14:41:55 -0700324pub mod parsing {
325 use super::*;
David Tolnayc5ab8c62017-12-26 16:43:39 -0500326 use synom::Synom;
David Tolnaydfc886b2018-01-06 08:03:09 -0800327 use buffer::Cursor;
David Tolnay203557a2017-12-27 23:59:33 -0500328 use parse_error;
329 use synom::PResult;
David Tolnayf4bbbd92016-09-23 14:41:55 -0700330
Alex Crichton954046c2017-05-30 21:49:42 -0700331 impl Synom for Lit {
Michael Layzell92639a52017-06-01 00:07:44 -0400332 fn parse(input: Cursor) -> PResult<Self> {
Michael Layzell589a8f42017-06-02 19:47:01 -0400333 match input.literal() {
David Tolnay7037c9b2018-01-23 09:34:09 -0800334 Some((span, lit, rest)) => {
335 if lit.to_string().starts_with('/') {
336 // Doc comment literal which is not a Syn literal
337 parse_error()
338 } else {
339 Ok((Lit::new(lit, span), rest))
340 }
341 }
David Tolnay73c98de2017-12-31 15:56:56 -0500342 _ => match input.term() {
David Tolnay360efd22018-01-04 23:35:26 -0800343 Some((span, term, rest)) => Ok((
344 Lit::Bool(LitBool {
345 value: if term.as_str() == "true" {
346 true
347 } else if term.as_str() == "false" {
348 false
349 } else {
350 return parse_error();
David Tolnay51382052017-12-27 13:46:21 -0500351 },
David Tolnay360efd22018-01-04 23:35:26 -0800352 span: span,
353 }),
354 rest,
355 )),
Michael Layzell589a8f42017-06-02 19:47:01 -0400356 _ => parse_error(),
David Tolnay51382052017-12-27 13:46:21 -0500357 },
Michael Layzell589a8f42017-06-02 19:47:01 -0400358 }
David Tolnayfa0edf22016-09-23 22:58:24 -0700359 }
Sergio Benitez5680d6a2017-12-29 11:20:29 -0800360
361 fn description() -> Option<&'static str> {
362 Some("literal")
363 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700364 }
David Tolnay360efd22018-01-04 23:35:26 -0800365
366 impl_synom!(LitStr "string literal" switch!(
367 syn!(Lit),
368 Lit::Str(lit) => value!(lit)
369 |
370 _ => reject!()
371 ));
372
373 impl_synom!(LitByteStr "byte string literal" switch!(
374 syn!(Lit),
375 Lit::ByteStr(lit) => value!(lit)
376 |
377 _ => reject!()
378 ));
379
380 impl_synom!(LitByte "byte literal" switch!(
381 syn!(Lit),
382 Lit::Byte(lit) => value!(lit)
383 |
384 _ => reject!()
385 ));
386
387 impl_synom!(LitChar "character literal" switch!(
388 syn!(Lit),
389 Lit::Char(lit) => value!(lit)
390 |
391 _ => reject!()
392 ));
393
394 impl_synom!(LitInt "integer literal" switch!(
395 syn!(Lit),
396 Lit::Int(lit) => value!(lit)
397 |
398 _ => reject!()
399 ));
400
401 impl_synom!(LitFloat "floating point literal" switch!(
402 syn!(Lit),
403 Lit::Float(lit) => value!(lit)
404 |
405 _ => reject!()
406 ));
407
408 impl_synom!(LitBool "boolean literal" switch!(
409 syn!(Lit),
410 Lit::Bool(lit) => value!(lit)
411 |
412 _ => reject!()
413 ));
David Tolnayf4bbbd92016-09-23 14:41:55 -0700414}
415
416#[cfg(feature = "printing")]
417mod printing {
418 use super::*;
David Tolnay51382052017-12-27 13:46:21 -0500419 use quote::{ToTokens, Tokens};
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700420
David Tolnay360efd22018-01-04 23:35:26 -0800421 impl ToTokens for LitStr {
David Tolnayf4bbbd92016-09-23 14:41:55 -0700422 fn to_tokens(&self, tokens: &mut Tokens) {
David Tolnay360efd22018-01-04 23:35:26 -0800423 tokens.append(TokenTree {
424 span: self.span,
425 kind: TokenNode::Literal(self.token.clone()),
426 });
427 }
428 }
429
430 impl ToTokens for LitByteStr {
431 fn to_tokens(&self, tokens: &mut Tokens) {
432 tokens.append(TokenTree {
433 span: self.span,
434 kind: TokenNode::Literal(self.token.clone()),
435 });
436 }
437 }
438
439 impl ToTokens for LitByte {
440 fn to_tokens(&self, tokens: &mut Tokens) {
441 tokens.append(TokenTree {
442 span: self.span,
443 kind: TokenNode::Literal(self.token.clone()),
444 });
445 }
446 }
447
448 impl ToTokens for LitChar {
449 fn to_tokens(&self, tokens: &mut Tokens) {
450 tokens.append(TokenTree {
451 span: self.span,
452 kind: TokenNode::Literal(self.token.clone()),
453 });
454 }
455 }
456
457 impl ToTokens for LitInt {
458 fn to_tokens(&self, tokens: &mut Tokens) {
459 tokens.append(TokenTree {
460 span: self.span,
461 kind: TokenNode::Literal(self.token.clone()),
462 });
463 }
464 }
465
466 impl ToTokens for LitFloat {
467 fn to_tokens(&self, tokens: &mut Tokens) {
468 tokens.append(TokenTree {
469 span: self.span,
470 kind: TokenNode::Literal(self.token.clone()),
471 });
472 }
473 }
474
475 impl ToTokens for LitBool {
476 fn to_tokens(&self, tokens: &mut Tokens) {
477 tokens.append(TokenTree {
478 span: self.span,
David Tolnay61037c62018-01-05 16:21:03 -0800479 kind: TokenNode::Term(Term::intern(if self.value { "true" } else { "false" })),
David Tolnay360efd22018-01-04 23:35:26 -0800480 });
481 }
482 }
483
484 impl ToTokens for LitVerbatim {
485 fn to_tokens(&self, tokens: &mut Tokens) {
486 tokens.append(TokenTree {
487 span: self.span,
488 kind: TokenNode::Literal(self.token.clone()),
489 });
490 }
491 }
492}
493
494mod value {
495 use super::*;
496 use std::char;
497 use std::ops::{Index, RangeFrom};
498 use proc_macro2::TokenStream;
499
David Tolnay7d1d1282018-01-06 16:10:51 -0800500 impl Lit {
David Tolnay780292d2018-01-22 23:26:44 -0800501 /// Interpret a Syn literal from a proc-macro2 literal.
502 ///
503 /// Not all proc-macro2 literals are valid Syn literals. In particular,
504 /// doc comments are considered by proc-macro2 to be literals but in Syn
505 /// they are [`Attribute`].
506 ///
507 /// [`Attribute`]: struct.Attribute.html
508 ///
509 /// # Panics
510 ///
511 /// Panics if the input is a doc comment literal.
David Tolnay7d1d1282018-01-06 16:10:51 -0800512 pub fn new(token: Literal, span: Span) -> Self {
513 let value = token.to_string();
514
515 match value::byte(&value, 0) {
516 b'"' | b'r' => {
517 return Lit::Str(LitStr {
518 token: token,
519 span: span,
520 })
521 }
522 b'b' => match value::byte(&value, 1) {
523 b'"' | b'r' => {
524 return Lit::ByteStr(LitByteStr {
525 token: token,
526 span: span,
527 })
528 }
529 b'\'' => {
530 return Lit::Byte(LitByte {
531 token: token,
532 span: span,
533 })
534 }
535 _ => {}
536 },
537 b'\'' => {
538 return Lit::Char(LitChar {
539 token: token,
540 span: span,
541 })
542 }
543 b'0'...b'9' => if number_is_int(&value) {
544 return Lit::Int(LitInt {
545 token: token,
546 span: span,
547 });
548 } else if number_is_float(&value) {
549 return Lit::Float(LitFloat {
550 token: token,
551 span: span,
552 });
553 } else {
554 // number overflow
555 return Lit::Verbatim(LitVerbatim {
556 token: token,
557 span: span,
558 });
559 },
560 _ => if value == "true" || value == "false" {
561 return Lit::Bool(LitBool {
562 value: value == "true",
563 span: span,
564 });
565 },
566 }
567
568 panic!("Unrecognized literal: {}", value);
569 }
570 }
571
572 fn number_is_int(value: &str) -> bool {
573 if number_is_float(value) {
574 false
575 } else {
576 value::parse_lit_int(value).is_some()
577 }
578 }
579
580 fn number_is_float(value: &str) -> bool {
581 if value.contains('.') {
582 true
583 } else if value.starts_with("0x") || value.ends_with("size") {
584 false
585 } else {
586 value.contains('e') || value.contains('E')
587 }
588 }
589
David Tolnay360efd22018-01-04 23:35:26 -0800590 /// Get the byte at offset idx, or a default of `b'\0'` if we're looking
591 /// past the end of the input buffer.
592 pub fn byte<S: AsRef<[u8]> + ?Sized>(s: &S, idx: usize) -> u8 {
593 let s = s.as_ref();
594 if idx < s.len() {
595 s[idx]
596 } else {
597 0
598 }
599 }
600
601 fn next_chr(s: &str) -> char {
602 s.chars().next().unwrap_or('\0')
603 }
604
605 pub fn parse_lit_str(s: &str) -> String {
606 match byte(s, 0) {
607 b'"' => parse_lit_str_cooked(s),
608 b'r' => parse_lit_str_raw(s),
609 _ => unreachable!(),
610 }
611 }
612
David Tolnay76ebcdd2018-01-05 17:07:26 -0800613 // Clippy false positive
614 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
615 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800616 fn parse_lit_str_cooked(mut s: &str) -> String {
617 assert_eq!(byte(s, 0), b'"');
618 s = &s[1..];
619
620 let mut out = String::new();
621 'outer: loop {
622 let ch = match byte(s, 0) {
623 b'"' => break,
624 b'\\' => {
625 let b = byte(s, 1);
626 s = &s[2..];
627 match b {
628 b'x' => {
629 let (byte, rest) = backslash_x(s);
630 s = rest;
631 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800632 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800633 }
634 b'u' => {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800635 let (chr, rest) = backslash_u(s);
David Tolnay360efd22018-01-04 23:35:26 -0800636 s = rest;
637 chr
638 }
639 b'n' => '\n',
640 b'r' => '\r',
641 b't' => '\t',
642 b'\\' => '\\',
643 b'0' => '\0',
644 b'\'' => '\'',
645 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800646 b'\r' | b'\n' => loop {
647 let ch = next_chr(s);
648 if ch.is_whitespace() {
649 s = &s[ch.len_utf8()..];
650 } else {
651 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800652 }
David Tolnay61037c62018-01-05 16:21:03 -0800653 },
654 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800655 }
656 }
657 b'\r' => {
658 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
659 s = &s[2..];
660 '\n'
661 }
662 _ => {
663 let ch = next_chr(s);
664 s = &s[ch.len_utf8()..];
665 ch
666 }
667 };
668 out.push(ch);
669 }
670
671 assert_eq!(s, "\"");
672 out
673 }
674
675 fn parse_lit_str_raw(mut s: &str) -> String {
676 assert_eq!(byte(s, 0), b'r');
677 s = &s[1..];
678
679 let mut pounds = 0;
680 while byte(s, pounds) == b'#' {
681 pounds += 1;
682 }
683 assert_eq!(byte(s, pounds), b'"');
684 assert_eq!(byte(s, s.len() - pounds - 1), b'"');
685 for end in s[s.len() - pounds..].bytes() {
686 assert_eq!(end, b'#');
687 }
688
689 s[pounds + 1..s.len() - pounds - 1].to_owned()
690 }
691
692 pub fn parse_lit_byte_str(s: &str) -> Vec<u8> {
693 assert_eq!(byte(s, 0), b'b');
694 match byte(s, 1) {
695 b'"' => parse_lit_byte_str_cooked(s),
696 b'r' => parse_lit_byte_str_raw(s),
697 _ => unreachable!(),
698 }
699 }
700
David Tolnay76ebcdd2018-01-05 17:07:26 -0800701 // Clippy false positive
702 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
703 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800704 fn parse_lit_byte_str_cooked(mut s: &str) -> Vec<u8> {
705 assert_eq!(byte(s, 0), b'b');
706 assert_eq!(byte(s, 1), b'"');
707 s = &s[2..];
708
709 // We're going to want to have slices which don't respect codepoint boundaries.
710 let mut s = s.as_bytes();
711
712 let mut out = Vec::new();
713 'outer: loop {
714 let byte = match byte(s, 0) {
715 b'"' => break,
716 b'\\' => {
717 let b = byte(s, 1);
718 s = &s[2..];
719 match b {
720 b'x' => {
721 let (b, rest) = backslash_x(s);
722 s = rest;
723 b
724 }
725 b'n' => b'\n',
726 b'r' => b'\r',
727 b't' => b'\t',
728 b'\\' => b'\\',
729 b'0' => b'\0',
730 b'\'' => b'\'',
731 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800732 b'\r' | b'\n' => loop {
733 let byte = byte(s, 0);
David Tolnay76ebcdd2018-01-05 17:07:26 -0800734 let ch = char::from_u32(u32::from(byte)).unwrap();
David Tolnay61037c62018-01-05 16:21:03 -0800735 if ch.is_whitespace() {
736 s = &s[1..];
737 } else {
738 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800739 }
David Tolnay61037c62018-01-05 16:21:03 -0800740 },
741 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800742 }
743 }
744 b'\r' => {
745 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
746 s = &s[2..];
747 b'\n'
748 }
749 b => {
750 s = &s[1..];
751 b
752 }
753 };
754 out.push(byte);
755 }
756
757 assert_eq!(s, b"\"");
758 out
759 }
760
761 fn parse_lit_byte_str_raw(s: &str) -> Vec<u8> {
762 assert_eq!(byte(s, 0), b'b');
763 parse_lit_str_raw(&s[1..]).into_bytes()
764 }
765
766 pub fn parse_lit_byte(s: &str) -> u8 {
767 assert_eq!(byte(s, 0), b'b');
768 assert_eq!(byte(s, 1), b'\'');
769
770 // We're going to want to have slices which don't respect codepoint boundaries.
771 let mut s = s[2..].as_bytes();
772
773 let b = match byte(s, 0) {
774 b'\\' => {
775 let b = byte(s, 1);
776 s = &s[2..];
777 match b {
778 b'x' => {
779 let (b, rest) = backslash_x(s);
780 s = rest;
781 b
782 }
783 b'n' => b'\n',
784 b'r' => b'\r',
785 b't' => b'\t',
786 b'\\' => b'\\',
787 b'0' => b'\0',
788 b'\'' => b'\'',
789 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800790 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800791 }
792 }
793 b => {
794 s = &s[1..];
795 b
796 }
797 };
798
799 assert_eq!(byte(s, 0), b'\'');
800 b
801 }
802
803 pub fn parse_lit_char(mut s: &str) -> char {
804 assert_eq!(byte(s, 0), b'\'');
805 s = &s[1..];
806
807 let ch = match byte(s, 0) {
808 b'\\' => {
809 let b = byte(s, 1);
810 s = &s[2..];
811 match b {
812 b'x' => {
813 let (byte, rest) = backslash_x(s);
814 s = rest;
815 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800816 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800817 }
818 b'u' => {
819 let (chr, rest) = backslash_u(s);
820 s = rest;
821 chr
822 }
823 b'n' => '\n',
824 b'r' => '\r',
825 b't' => '\t',
826 b'\\' => '\\',
827 b'0' => '\0',
828 b'\'' => '\'',
829 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800830 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800831 }
832 }
833 _ => {
834 let ch = next_chr(s);
835 s = &s[ch.len_utf8()..];
836 ch
837 }
838 };
839 assert_eq!(s, "\'", "Expected end of char literal");
840 ch
841 }
842
843 fn backslash_x<S>(s: &S) -> (u8, &S)
David Tolnay61037c62018-01-05 16:21:03 -0800844 where
845 S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized,
David Tolnay360efd22018-01-04 23:35:26 -0800846 {
847 let mut ch = 0;
848 let b0 = byte(s, 0);
849 let b1 = byte(s, 1);
850 ch += 0x10 * match b0 {
851 b'0'...b'9' => b0 - b'0',
852 b'a'...b'f' => 10 + (b0 - b'a'),
853 b'A'...b'F' => 10 + (b0 - b'A'),
854 _ => panic!("unexpected non-hex character after \\x"),
855 };
David Tolnay76ebcdd2018-01-05 17:07:26 -0800856 ch += match b1 {
David Tolnay360efd22018-01-04 23:35:26 -0800857 b'0'...b'9' => b1 - b'0',
858 b'a'...b'f' => 10 + (b1 - b'a'),
859 b'A'...b'F' => 10 + (b1 - b'A'),
860 _ => panic!("unexpected non-hex character after \\x"),
861 };
862 (ch, &s[2..])
863 }
864
865 fn backslash_u(mut s: &str) -> (char, &str) {
866 if byte(s, 0) != b'{' {
867 panic!("expected {{ after \\u");
868 }
869 s = &s[1..];
870
871 let mut ch = 0;
872 for _ in 0..6 {
873 let b = byte(s, 0);
874 match b {
875 b'0'...b'9' => {
876 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800877 ch += u32::from(b - b'0');
David Tolnay360efd22018-01-04 23:35:26 -0800878 s = &s[1..];
879 }
880 b'a'...b'f' => {
881 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800882 ch += u32::from(10 + b - b'a');
David Tolnay360efd22018-01-04 23:35:26 -0800883 s = &s[1..];
884 }
885 b'A'...b'F' => {
886 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800887 ch += u32::from(10 + b - b'A');
David Tolnay360efd22018-01-04 23:35:26 -0800888 s = &s[1..];
889 }
890 b'}' => break,
891 _ => panic!("unexpected non-hex character after \\u"),
892 }
893 }
894 assert!(byte(s, 0) == b'}');
895 s = &s[1..];
896
897 if let Some(ch) = char::from_u32(ch) {
898 (ch, s)
899 } else {
900 panic!("character code {:x} is not a valid unicode character", ch);
901 }
902 }
903
904 pub fn parse_lit_int(mut s: &str) -> Option<u64> {
905 let base = match (byte(s, 0), byte(s, 1)) {
906 (b'0', b'x') => {
907 s = &s[2..];
908 16
909 }
910 (b'0', b'o') => {
911 s = &s[2..];
912 8
913 }
914 (b'0', b'b') => {
915 s = &s[2..];
916 2
917 }
918 (b'0'...b'9', _) => 10,
919 _ => unreachable!(),
920 };
921
922 let mut value = 0u64;
923 loop {
924 let b = byte(s, 0);
925 let digit = match b {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800926 b'0'...b'9' => u64::from(b - b'0'),
927 b'a'...b'f' if base > 10 => 10 + u64::from(b - b'a'),
928 b'A'...b'F' if base > 10 => 10 + u64::from(b - b'A'),
David Tolnay360efd22018-01-04 23:35:26 -0800929 b'_' => {
930 s = &s[1..];
931 continue;
932 }
933 // NOTE: Looking at a floating point literal, we don't want to
934 // consider these integers.
935 b'.' if base == 10 => return None,
936 b'e' | b'E' if base == 10 => return None,
937 _ => break,
938 };
939
940 if digit >= base {
941 panic!("Unexpected digit {:x} out of base range", digit);
942 }
943
944 value = match value.checked_mul(base) {
945 Some(value) => value,
946 None => return None,
947 };
948 value = match value.checked_add(digit) {
949 Some(value) => value,
950 None => return None,
951 };
952 s = &s[1..];
953 }
954
955 Some(value)
956 }
957
958 pub fn parse_lit_float(input: &str) -> f64 {
959 // Rust's floating point literals are very similar to the ones parsed by
960 // the standard library, except that rust's literals can contain
961 // ignorable underscores. Let's remove those underscores.
962 let mut bytes = input.to_owned().into_bytes();
963 let mut write = 0;
964 for read in 0..bytes.len() {
965 if bytes[read] == b'_' {
966 continue; // Don't increase write
David Tolnay76ebcdd2018-01-05 17:07:26 -0800967 }
968 if write != read {
David Tolnay360efd22018-01-04 23:35:26 -0800969 let x = bytes[read];
970 bytes[write] = x;
971 }
972 write += 1;
973 }
974 bytes.truncate(write);
975 let input = String::from_utf8(bytes).unwrap();
David Tolnay76ebcdd2018-01-05 17:07:26 -0800976 let end = input.find('f').unwrap_or_else(|| input.len());
David Tolnay360efd22018-01-04 23:35:26 -0800977 input[..end].parse().unwrap()
978 }
979
980 pub fn to_literal(s: &str) -> Literal {
981 let stream = s.parse::<TokenStream>().unwrap();
982 match stream.into_iter().next().unwrap().kind {
983 TokenNode::Literal(l) => l,
984 _ => unreachable!(),
David Tolnayf17fd2f2016-10-07 23:38:08 -0700985 }
986 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700987}