blob: 0176415263ea7a3750df4c4efb177d3840a53268 [file] [log] [blame]
David Tolnay55535012018-01-05 16:39:23 -08001// Copyright 2018 Syn Developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
David Tolnay7d1d1282018-01-06 16:10:51 -08009use proc_macro2::{Literal, Span, TokenNode};
David Tolnay360efd22018-01-04 23:35:26 -080010use std::str;
11
David Tolnay7d1d1282018-01-06 16:10:51 -080012#[cfg(feature = "printing")]
13use proc_macro2::{Term, TokenTree};
14
David Tolnayd53ac2b2018-01-27 19:00:06 -080015#[cfg(feature = "parsing")]
16use proc_macro2::TokenStream;
17#[cfg(feature = "parsing")]
18use {ParseError, Synom};
19
David Tolnay360efd22018-01-04 23:35:26 -080020#[cfg(feature = "extra-traits")]
Alex Crichtonccbb45d2017-05-23 10:58:24 -070021use std::hash::{Hash, Hasher};
22
David Tolnay360efd22018-01-04 23:35:26 -080023ast_enum_of_structs! {
David Tolnayabf5c2e2018-01-06 23:30:04 -080024 /// A Rust literal such as a string or integer or boolean.
David Tolnay614a0142018-01-07 10:25:43 -080025 ///
David Tolnay461d98e2018-01-07 11:07:19 -080026 /// *This type is available if Syn is built with the `"derive"` or `"full"`
27 /// feature.*
28 ///
David Tolnay614a0142018-01-07 10:25:43 -080029 /// # Syntax tree enum
30 ///
31 /// This type is a [syntax tree enum].
32 ///
33 /// [syntax tree enum]: enum.Expr.html#syntax-tree-enums
David Tolnay360efd22018-01-04 23:35:26 -080034 pub enum Lit {
David Tolnayabf5c2e2018-01-06 23:30:04 -080035 /// A UTF-8 string literal: `"foo"`.
David Tolnay461d98e2018-01-07 11:07:19 -080036 ///
37 /// *This type is available if Syn is built with the `"derive"` or
38 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080039 pub Str(LitStr #manual_extra_traits {
40 token: Literal,
41 pub span: Span,
42 }),
Alex Crichtonccbb45d2017-05-23 10:58:24 -070043
David Tolnayabf5c2e2018-01-06 23:30:04 -080044 /// A byte string literal: `b"foo"`.
David Tolnay461d98e2018-01-07 11:07:19 -080045 ///
46 /// *This type is available if Syn is built with the `"derive"` or
47 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080048 pub ByteStr(LitByteStr #manual_extra_traits {
49 token: Literal,
50 pub span: Span,
51 }),
52
David Tolnayabf5c2e2018-01-06 23:30:04 -080053 /// A byte literal: `b'f'`.
David Tolnay461d98e2018-01-07 11:07:19 -080054 ///
55 /// *This type is available if Syn is built with the `"derive"` or
56 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080057 pub Byte(LitByte #manual_extra_traits {
58 token: Literal,
59 pub span: Span,
60 }),
61
David Tolnayabf5c2e2018-01-06 23:30:04 -080062 /// A character literal: `'a'`.
David Tolnay461d98e2018-01-07 11:07:19 -080063 ///
64 /// *This type is available if Syn is built with the `"derive"` or
65 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080066 pub Char(LitChar #manual_extra_traits {
67 token: Literal,
68 pub span: Span,
69 }),
70
David Tolnayabf5c2e2018-01-06 23:30:04 -080071 /// An integer literal: `1` or `1u16`.
David Tolnay360efd22018-01-04 23:35:26 -080072 ///
73 /// Holds up to 64 bits of data. Use `LitVerbatim` for any larger
74 /// integer literal.
David Tolnay461d98e2018-01-07 11:07:19 -080075 ///
76 /// *This type is available if Syn is built with the `"derive"` or
77 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080078 pub Int(LitInt #manual_extra_traits {
79 token: Literal,
80 pub span: Span,
81 }),
82
David Tolnayabf5c2e2018-01-06 23:30:04 -080083 /// A floating point literal: `1f64` or `1.0e10f64`.
David Tolnay360efd22018-01-04 23:35:26 -080084 ///
85 /// Must be finite. May not be infinte or NaN.
David Tolnay461d98e2018-01-07 11:07:19 -080086 ///
87 /// *This type is available if Syn is built with the `"derive"` or
88 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080089 pub Float(LitFloat #manual_extra_traits {
90 token: Literal,
91 pub span: Span,
92 }),
93
David Tolnayabf5c2e2018-01-06 23:30:04 -080094 /// A boolean literal: `true` or `false`.
David Tolnay461d98e2018-01-07 11:07:19 -080095 ///
96 /// *This type is available if Syn is built with the `"derive"` or
97 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -080098 pub Bool(LitBool #manual_extra_traits {
99 pub value: bool,
100 pub span: Span,
101 }),
102
David Tolnayabf5c2e2018-01-06 23:30:04 -0800103 /// A raw token literal not interpreted by Syn, possibly because it
104 /// represents an integer larger than 64 bits.
David Tolnay461d98e2018-01-07 11:07:19 -0800105 ///
106 /// *This type is available if Syn is built with the `"derive"` or
107 /// `"full"` feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800108 pub Verbatim(LitVerbatim #manual_extra_traits {
109 pub token: Literal,
110 pub span: Span,
111 }),
112 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700113}
114
David Tolnay360efd22018-01-04 23:35:26 -0800115impl LitStr {
116 pub fn new(value: &str, span: Span) -> Self {
117 LitStr {
118 token: Literal::string(value),
119 span: span,
120 }
121 }
122
123 pub fn value(&self) -> String {
124 value::parse_lit_str(&self.token.to_string())
125 }
David Tolnayd53ac2b2018-01-27 19:00:06 -0800126
127 /// Parse a syntax tree node from the content of this string literal.
128 ///
129 /// All spans in the syntax tree will point to the span of this `LitStr`.
130 #[cfg(feature = "parsing")]
131 pub fn parse<T: Synom>(&self) -> Result<T, ParseError> {
132 // Parse string literal into a token stream with every span equal to the
133 // original literal's span.
134 fn spanned_tokens(s: &LitStr) -> Result<TokenStream, ParseError> {
135 let stream = ::parse_str(&s.value())?;
136 Ok(respan_token_stream(stream, s.span))
137 }
138
139 // Token stream with every span replaced by the given one.
140 fn respan_token_stream(stream: TokenStream, span: Span) -> TokenStream {
141 stream.into_iter().map(|token| respan_token_tree(token, span)).collect()
142 }
143
144 // Token tree with every span replaced by the given one.
145 fn respan_token_tree(token: TokenTree, span: Span) -> TokenTree {
146 TokenTree {
147 span: span,
148 kind: match token.kind {
149 TokenNode::Group(delimiter, nested) => {
150 TokenNode::Group(delimiter, respan_token_stream(nested, span))
151 }
152 other => other,
153 },
154 }
155 }
156
157 spanned_tokens(self).and_then(::parse2)
158 }
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700159}
160
David Tolnay360efd22018-01-04 23:35:26 -0800161impl LitByteStr {
162 pub fn new(value: &[u8], span: Span) -> Self {
163 LitByteStr {
164 token: Literal::byte_string(value),
165 span: span,
166 }
167 }
168
169 pub fn value(&self) -> Vec<u8> {
170 value::parse_lit_byte_str(&self.token.to_string())
171 }
172}
173
174impl LitByte {
175 pub fn new(value: u8, span: Span) -> Self {
176 LitByte {
177 token: Literal::byte_char(value),
178 span: span,
179 }
180 }
181
182 pub fn value(&self) -> u8 {
183 value::parse_lit_byte(&self.token.to_string())
184 }
185}
186
187impl LitChar {
188 pub fn new(value: char, span: Span) -> Self {
189 LitChar {
190 token: Literal::character(value),
191 span: span,
192 }
193 }
194
195 pub fn value(&self) -> char {
196 value::parse_lit_char(&self.token.to_string())
197 }
198}
199
200impl LitInt {
201 pub fn new(value: u64, suffix: IntSuffix, span: Span) -> Self {
202 LitInt {
203 token: match suffix {
204 IntSuffix::Isize => Literal::isize(value as isize),
205 IntSuffix::I8 => Literal::i8(value as i8),
206 IntSuffix::I16 => Literal::i16(value as i16),
207 IntSuffix::I32 => Literal::i32(value as i32),
208 IntSuffix::I64 => Literal::i64(value as i64),
209 IntSuffix::I128 => value::to_literal(&format!("{}i128", value)),
210 IntSuffix::Usize => Literal::usize(value as usize),
211 IntSuffix::U8 => Literal::u8(value as u8),
212 IntSuffix::U16 => Literal::u16(value as u16),
213 IntSuffix::U32 => Literal::u32(value as u32),
214 IntSuffix::U64 => Literal::u64(value),
215 IntSuffix::U128 => value::to_literal(&format!("{}u128", value)),
216 IntSuffix::None => Literal::integer(value as i64),
217 },
218 span: span,
219 }
220 }
221
222 pub fn value(&self) -> u64 {
223 value::parse_lit_int(&self.token.to_string()).unwrap()
224 }
225
226 pub fn suffix(&self) -> IntSuffix {
227 let value = self.token.to_string();
228 for (s, suffix) in vec![
229 ("i8", IntSuffix::I8),
230 ("i16", IntSuffix::I16),
231 ("i32", IntSuffix::I32),
232 ("i64", IntSuffix::I64),
233 ("i128", IntSuffix::I128),
234 ("isize", IntSuffix::Isize),
235 ("u8", IntSuffix::U8),
236 ("u16", IntSuffix::U16),
237 ("u32", IntSuffix::U32),
238 ("u64", IntSuffix::U64),
239 ("u128", IntSuffix::U128),
240 ("usize", IntSuffix::Usize),
241 ] {
242 if value.ends_with(s) {
243 return suffix;
244 }
245 }
246 IntSuffix::None
247 }
248}
249
250impl LitFloat {
251 pub fn new(value: f64, suffix: FloatSuffix, span: Span) -> Self {
252 LitFloat {
253 token: match suffix {
254 FloatSuffix::F32 => Literal::f32(value as f32),
255 FloatSuffix::F64 => Literal::f64(value),
256 FloatSuffix::None => Literal::float(value),
257 },
258 span: span,
259 }
260 }
261
262 pub fn value(&self) -> f64 {
263 value::parse_lit_float(&self.token.to_string())
264 }
265
266 pub fn suffix(&self) -> FloatSuffix {
267 let value = self.token.to_string();
David Tolnay61037c62018-01-05 16:21:03 -0800268 for (s, suffix) in vec![("f32", FloatSuffix::F32), ("f64", FloatSuffix::F64)] {
David Tolnay360efd22018-01-04 23:35:26 -0800269 if value.ends_with(s) {
270 return suffix;
271 }
272 }
273 FloatSuffix::None
274 }
275}
276
277macro_rules! lit_extra_traits {
278 ($ty:ident, $field:ident) => {
279 #[cfg(feature = "extra-traits")]
280 impl Eq for $ty {}
281
282 #[cfg(feature = "extra-traits")]
283 impl PartialEq for $ty {
284 fn eq(&self, other: &Self) -> bool {
285 self.$field.to_string() == other.$field.to_string()
286 }
287 }
288
289 #[cfg(feature = "extra-traits")]
290 impl Hash for $ty {
291 fn hash<H>(&self, state: &mut H)
292 where
293 H: Hasher,
294 {
295 self.$field.to_string().hash(state);
296 }
David Tolnay9c76bcb2017-12-26 23:14:59 -0500297 }
Alex Crichton62a0a592017-05-22 13:58:53 -0700298 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700299}
300
David Tolnay360efd22018-01-04 23:35:26 -0800301lit_extra_traits!(LitStr, token);
302lit_extra_traits!(LitByteStr, token);
303lit_extra_traits!(LitByte, token);
304lit_extra_traits!(LitChar, token);
305lit_extra_traits!(LitInt, token);
306lit_extra_traits!(LitFloat, token);
307lit_extra_traits!(LitBool, value);
308lit_extra_traits!(LitVerbatim, token);
309
310ast_enum! {
David Tolnay05658502018-01-07 09:56:37 -0800311 /// The style of a string literal, either plain quoted or a raw string like
David Tolnayabf5c2e2018-01-06 23:30:04 -0800312 /// `r##"data"##`.
David Tolnay461d98e2018-01-07 11:07:19 -0800313 ///
314 /// *This type is available if Syn is built with the `"derive"` or `"full"`
315 /// feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800316 pub enum StrStyle #no_visit {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800317 /// An ordinary string like `"data"`.
David Tolnay360efd22018-01-04 23:35:26 -0800318 Cooked,
David Tolnayabf5c2e2018-01-06 23:30:04 -0800319 /// A raw string like `r##"data"##`.
David Tolnay360efd22018-01-04 23:35:26 -0800320 ///
321 /// The unsigned integer is the number of `#` symbols used.
322 Raw(usize),
Alex Crichton62a0a592017-05-22 13:58:53 -0700323 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700324}
325
David Tolnay360efd22018-01-04 23:35:26 -0800326ast_enum! {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800327 /// The suffix on an integer literal if any, like the `u8` in `127u8`.
David Tolnay461d98e2018-01-07 11:07:19 -0800328 ///
329 /// *This type is available if Syn is built with the `"derive"` or `"full"`
330 /// feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800331 pub enum IntSuffix #no_visit {
332 I8,
333 I16,
334 I32,
335 I64,
336 I128,
337 Isize,
338 U8,
339 U16,
340 U32,
341 U64,
342 U128,
343 Usize,
344 None,
Pascal Hertleif36342c52016-10-19 10:31:42 +0200345 }
346}
347
David Tolnay360efd22018-01-04 23:35:26 -0800348ast_enum! {
David Tolnayabf5c2e2018-01-06 23:30:04 -0800349 /// The suffix on a floating point literal if any, like the `f32` in
350 /// `1.0f32`.
David Tolnay461d98e2018-01-07 11:07:19 -0800351 ///
352 /// *This type is available if Syn is built with the `"derive"` or `"full"`
353 /// feature.*
David Tolnay360efd22018-01-04 23:35:26 -0800354 pub enum FloatSuffix #no_visit {
355 F32,
356 F64,
357 None,
Alex Crichton2e0229c2017-05-23 09:34:50 -0700358 }
David Tolnay5fe14fc2017-01-27 16:22:08 -0800359}
360
361#[cfg(feature = "parsing")]
David Tolnayf4bbbd92016-09-23 14:41:55 -0700362pub mod parsing {
363 use super::*;
David Tolnayc5ab8c62017-12-26 16:43:39 -0500364 use synom::Synom;
David Tolnaydfc886b2018-01-06 08:03:09 -0800365 use buffer::Cursor;
David Tolnay203557a2017-12-27 23:59:33 -0500366 use parse_error;
367 use synom::PResult;
David Tolnayf4bbbd92016-09-23 14:41:55 -0700368
Alex Crichton954046c2017-05-30 21:49:42 -0700369 impl Synom for Lit {
Michael Layzell92639a52017-06-01 00:07:44 -0400370 fn parse(input: Cursor) -> PResult<Self> {
Michael Layzell589a8f42017-06-02 19:47:01 -0400371 match input.literal() {
David Tolnay7037c9b2018-01-23 09:34:09 -0800372 Some((span, lit, rest)) => {
373 if lit.to_string().starts_with('/') {
374 // Doc comment literal which is not a Syn literal
375 parse_error()
376 } else {
377 Ok((Lit::new(lit, span), rest))
378 }
379 }
David Tolnay73c98de2017-12-31 15:56:56 -0500380 _ => match input.term() {
David Tolnay360efd22018-01-04 23:35:26 -0800381 Some((span, term, rest)) => Ok((
382 Lit::Bool(LitBool {
383 value: if term.as_str() == "true" {
384 true
385 } else if term.as_str() == "false" {
386 false
387 } else {
388 return parse_error();
David Tolnay51382052017-12-27 13:46:21 -0500389 },
David Tolnay360efd22018-01-04 23:35:26 -0800390 span: span,
391 }),
392 rest,
393 )),
Michael Layzell589a8f42017-06-02 19:47:01 -0400394 _ => parse_error(),
David Tolnay51382052017-12-27 13:46:21 -0500395 },
Michael Layzell589a8f42017-06-02 19:47:01 -0400396 }
David Tolnayfa0edf22016-09-23 22:58:24 -0700397 }
Sergio Benitez5680d6a2017-12-29 11:20:29 -0800398
399 fn description() -> Option<&'static str> {
400 Some("literal")
401 }
David Tolnayf4bbbd92016-09-23 14:41:55 -0700402 }
David Tolnay360efd22018-01-04 23:35:26 -0800403
404 impl_synom!(LitStr "string literal" switch!(
405 syn!(Lit),
406 Lit::Str(lit) => value!(lit)
407 |
408 _ => reject!()
409 ));
410
411 impl_synom!(LitByteStr "byte string literal" switch!(
412 syn!(Lit),
413 Lit::ByteStr(lit) => value!(lit)
414 |
415 _ => reject!()
416 ));
417
418 impl_synom!(LitByte "byte literal" switch!(
419 syn!(Lit),
420 Lit::Byte(lit) => value!(lit)
421 |
422 _ => reject!()
423 ));
424
425 impl_synom!(LitChar "character literal" switch!(
426 syn!(Lit),
427 Lit::Char(lit) => value!(lit)
428 |
429 _ => reject!()
430 ));
431
432 impl_synom!(LitInt "integer literal" switch!(
433 syn!(Lit),
434 Lit::Int(lit) => value!(lit)
435 |
436 _ => reject!()
437 ));
438
439 impl_synom!(LitFloat "floating point literal" switch!(
440 syn!(Lit),
441 Lit::Float(lit) => value!(lit)
442 |
443 _ => reject!()
444 ));
445
446 impl_synom!(LitBool "boolean literal" switch!(
447 syn!(Lit),
448 Lit::Bool(lit) => value!(lit)
449 |
450 _ => reject!()
451 ));
David Tolnayf4bbbd92016-09-23 14:41:55 -0700452}
453
454#[cfg(feature = "printing")]
455mod printing {
456 use super::*;
David Tolnay51382052017-12-27 13:46:21 -0500457 use quote::{ToTokens, Tokens};
Alex Crichtonccbb45d2017-05-23 10:58:24 -0700458
David Tolnay360efd22018-01-04 23:35:26 -0800459 impl ToTokens for LitStr {
David Tolnayf4bbbd92016-09-23 14:41:55 -0700460 fn to_tokens(&self, tokens: &mut Tokens) {
David Tolnay360efd22018-01-04 23:35:26 -0800461 tokens.append(TokenTree {
462 span: self.span,
463 kind: TokenNode::Literal(self.token.clone()),
464 });
465 }
466 }
467
468 impl ToTokens for LitByteStr {
469 fn to_tokens(&self, tokens: &mut Tokens) {
470 tokens.append(TokenTree {
471 span: self.span,
472 kind: TokenNode::Literal(self.token.clone()),
473 });
474 }
475 }
476
477 impl ToTokens for LitByte {
478 fn to_tokens(&self, tokens: &mut Tokens) {
479 tokens.append(TokenTree {
480 span: self.span,
481 kind: TokenNode::Literal(self.token.clone()),
482 });
483 }
484 }
485
486 impl ToTokens for LitChar {
487 fn to_tokens(&self, tokens: &mut Tokens) {
488 tokens.append(TokenTree {
489 span: self.span,
490 kind: TokenNode::Literal(self.token.clone()),
491 });
492 }
493 }
494
495 impl ToTokens for LitInt {
496 fn to_tokens(&self, tokens: &mut Tokens) {
497 tokens.append(TokenTree {
498 span: self.span,
499 kind: TokenNode::Literal(self.token.clone()),
500 });
501 }
502 }
503
504 impl ToTokens for LitFloat {
505 fn to_tokens(&self, tokens: &mut Tokens) {
506 tokens.append(TokenTree {
507 span: self.span,
508 kind: TokenNode::Literal(self.token.clone()),
509 });
510 }
511 }
512
513 impl ToTokens for LitBool {
514 fn to_tokens(&self, tokens: &mut Tokens) {
515 tokens.append(TokenTree {
516 span: self.span,
David Tolnay61037c62018-01-05 16:21:03 -0800517 kind: TokenNode::Term(Term::intern(if self.value { "true" } else { "false" })),
David Tolnay360efd22018-01-04 23:35:26 -0800518 });
519 }
520 }
521
522 impl ToTokens for LitVerbatim {
523 fn to_tokens(&self, tokens: &mut Tokens) {
524 tokens.append(TokenTree {
525 span: self.span,
526 kind: TokenNode::Literal(self.token.clone()),
527 });
528 }
529 }
530}
531
532mod value {
533 use super::*;
534 use std::char;
535 use std::ops::{Index, RangeFrom};
536 use proc_macro2::TokenStream;
537
David Tolnay7d1d1282018-01-06 16:10:51 -0800538 impl Lit {
David Tolnay780292d2018-01-22 23:26:44 -0800539 /// Interpret a Syn literal from a proc-macro2 literal.
540 ///
541 /// Not all proc-macro2 literals are valid Syn literals. In particular,
542 /// doc comments are considered by proc-macro2 to be literals but in Syn
543 /// they are [`Attribute`].
544 ///
545 /// [`Attribute`]: struct.Attribute.html
546 ///
547 /// # Panics
548 ///
549 /// Panics if the input is a doc comment literal.
David Tolnay7d1d1282018-01-06 16:10:51 -0800550 pub fn new(token: Literal, span: Span) -> Self {
551 let value = token.to_string();
552
553 match value::byte(&value, 0) {
554 b'"' | b'r' => {
555 return Lit::Str(LitStr {
556 token: token,
557 span: span,
558 })
559 }
560 b'b' => match value::byte(&value, 1) {
561 b'"' | b'r' => {
562 return Lit::ByteStr(LitByteStr {
563 token: token,
564 span: span,
565 })
566 }
567 b'\'' => {
568 return Lit::Byte(LitByte {
569 token: token,
570 span: span,
571 })
572 }
573 _ => {}
574 },
575 b'\'' => {
576 return Lit::Char(LitChar {
577 token: token,
578 span: span,
579 })
580 }
581 b'0'...b'9' => if number_is_int(&value) {
582 return Lit::Int(LitInt {
583 token: token,
584 span: span,
585 });
586 } else if number_is_float(&value) {
587 return Lit::Float(LitFloat {
588 token: token,
589 span: span,
590 });
591 } else {
592 // number overflow
593 return Lit::Verbatim(LitVerbatim {
594 token: token,
595 span: span,
596 });
597 },
598 _ => if value == "true" || value == "false" {
599 return Lit::Bool(LitBool {
600 value: value == "true",
601 span: span,
602 });
603 },
604 }
605
606 panic!("Unrecognized literal: {}", value);
607 }
608 }
609
610 fn number_is_int(value: &str) -> bool {
611 if number_is_float(value) {
612 false
613 } else {
614 value::parse_lit_int(value).is_some()
615 }
616 }
617
618 fn number_is_float(value: &str) -> bool {
619 if value.contains('.') {
620 true
621 } else if value.starts_with("0x") || value.ends_with("size") {
622 false
623 } else {
624 value.contains('e') || value.contains('E')
625 }
626 }
627
David Tolnay360efd22018-01-04 23:35:26 -0800628 /// Get the byte at offset idx, or a default of `b'\0'` if we're looking
629 /// past the end of the input buffer.
630 pub fn byte<S: AsRef<[u8]> + ?Sized>(s: &S, idx: usize) -> u8 {
631 let s = s.as_ref();
632 if idx < s.len() {
633 s[idx]
634 } else {
635 0
636 }
637 }
638
639 fn next_chr(s: &str) -> char {
640 s.chars().next().unwrap_or('\0')
641 }
642
643 pub fn parse_lit_str(s: &str) -> String {
644 match byte(s, 0) {
645 b'"' => parse_lit_str_cooked(s),
646 b'r' => parse_lit_str_raw(s),
647 _ => unreachable!(),
648 }
649 }
650
David Tolnay76ebcdd2018-01-05 17:07:26 -0800651 // Clippy false positive
652 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
653 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800654 fn parse_lit_str_cooked(mut s: &str) -> String {
655 assert_eq!(byte(s, 0), b'"');
656 s = &s[1..];
657
658 let mut out = String::new();
659 'outer: loop {
660 let ch = match byte(s, 0) {
661 b'"' => break,
662 b'\\' => {
663 let b = byte(s, 1);
664 s = &s[2..];
665 match b {
666 b'x' => {
667 let (byte, rest) = backslash_x(s);
668 s = rest;
669 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800670 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800671 }
672 b'u' => {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800673 let (chr, rest) = backslash_u(s);
David Tolnay360efd22018-01-04 23:35:26 -0800674 s = rest;
675 chr
676 }
677 b'n' => '\n',
678 b'r' => '\r',
679 b't' => '\t',
680 b'\\' => '\\',
681 b'0' => '\0',
682 b'\'' => '\'',
683 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800684 b'\r' | b'\n' => loop {
685 let ch = next_chr(s);
686 if ch.is_whitespace() {
687 s = &s[ch.len_utf8()..];
688 } else {
689 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800690 }
David Tolnay61037c62018-01-05 16:21:03 -0800691 },
692 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800693 }
694 }
695 b'\r' => {
696 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
697 s = &s[2..];
698 '\n'
699 }
700 _ => {
701 let ch = next_chr(s);
702 s = &s[ch.len_utf8()..];
703 ch
704 }
705 };
706 out.push(ch);
707 }
708
709 assert_eq!(s, "\"");
710 out
711 }
712
713 fn parse_lit_str_raw(mut s: &str) -> String {
714 assert_eq!(byte(s, 0), b'r');
715 s = &s[1..];
716
717 let mut pounds = 0;
718 while byte(s, pounds) == b'#' {
719 pounds += 1;
720 }
721 assert_eq!(byte(s, pounds), b'"');
722 assert_eq!(byte(s, s.len() - pounds - 1), b'"');
723 for end in s[s.len() - pounds..].bytes() {
724 assert_eq!(end, b'#');
725 }
726
727 s[pounds + 1..s.len() - pounds - 1].to_owned()
728 }
729
730 pub fn parse_lit_byte_str(s: &str) -> Vec<u8> {
731 assert_eq!(byte(s, 0), b'b');
732 match byte(s, 1) {
733 b'"' => parse_lit_byte_str_cooked(s),
734 b'r' => parse_lit_byte_str_raw(s),
735 _ => unreachable!(),
736 }
737 }
738
David Tolnay76ebcdd2018-01-05 17:07:26 -0800739 // Clippy false positive
740 // https://github.com/rust-lang-nursery/rust-clippy/issues/2329
741 #[cfg_attr(feature = "cargo-clippy", allow(needless_continue))]
David Tolnay360efd22018-01-04 23:35:26 -0800742 fn parse_lit_byte_str_cooked(mut s: &str) -> Vec<u8> {
743 assert_eq!(byte(s, 0), b'b');
744 assert_eq!(byte(s, 1), b'"');
745 s = &s[2..];
746
747 // We're going to want to have slices which don't respect codepoint boundaries.
748 let mut s = s.as_bytes();
749
750 let mut out = Vec::new();
751 'outer: loop {
752 let byte = match byte(s, 0) {
753 b'"' => break,
754 b'\\' => {
755 let b = byte(s, 1);
756 s = &s[2..];
757 match b {
758 b'x' => {
759 let (b, rest) = backslash_x(s);
760 s = rest;
761 b
762 }
763 b'n' => b'\n',
764 b'r' => b'\r',
765 b't' => b'\t',
766 b'\\' => b'\\',
767 b'0' => b'\0',
768 b'\'' => b'\'',
769 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800770 b'\r' | b'\n' => loop {
771 let byte = byte(s, 0);
David Tolnay76ebcdd2018-01-05 17:07:26 -0800772 let ch = char::from_u32(u32::from(byte)).unwrap();
David Tolnay61037c62018-01-05 16:21:03 -0800773 if ch.is_whitespace() {
774 s = &s[1..];
775 } else {
776 continue 'outer;
David Tolnay360efd22018-01-04 23:35:26 -0800777 }
David Tolnay61037c62018-01-05 16:21:03 -0800778 },
779 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800780 }
781 }
782 b'\r' => {
783 assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
784 s = &s[2..];
785 b'\n'
786 }
787 b => {
788 s = &s[1..];
789 b
790 }
791 };
792 out.push(byte);
793 }
794
795 assert_eq!(s, b"\"");
796 out
797 }
798
799 fn parse_lit_byte_str_raw(s: &str) -> Vec<u8> {
800 assert_eq!(byte(s, 0), b'b');
801 parse_lit_str_raw(&s[1..]).into_bytes()
802 }
803
804 pub fn parse_lit_byte(s: &str) -> u8 {
805 assert_eq!(byte(s, 0), b'b');
806 assert_eq!(byte(s, 1), b'\'');
807
808 // We're going to want to have slices which don't respect codepoint boundaries.
809 let mut s = s[2..].as_bytes();
810
811 let b = match byte(s, 0) {
812 b'\\' => {
813 let b = byte(s, 1);
814 s = &s[2..];
815 match b {
816 b'x' => {
817 let (b, rest) = backslash_x(s);
818 s = rest;
819 b
820 }
821 b'n' => b'\n',
822 b'r' => b'\r',
823 b't' => b'\t',
824 b'\\' => b'\\',
825 b'0' => b'\0',
826 b'\'' => b'\'',
827 b'"' => b'"',
David Tolnay61037c62018-01-05 16:21:03 -0800828 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800829 }
830 }
831 b => {
832 s = &s[1..];
833 b
834 }
835 };
836
837 assert_eq!(byte(s, 0), b'\'');
838 b
839 }
840
841 pub fn parse_lit_char(mut s: &str) -> char {
842 assert_eq!(byte(s, 0), b'\'');
843 s = &s[1..];
844
845 let ch = match byte(s, 0) {
846 b'\\' => {
847 let b = byte(s, 1);
848 s = &s[2..];
849 match b {
850 b'x' => {
851 let (byte, rest) = backslash_x(s);
852 s = rest;
853 assert!(byte <= 0x80, "Invalid \\x byte in string literal");
David Tolnay76ebcdd2018-01-05 17:07:26 -0800854 char::from_u32(u32::from(byte)).unwrap()
David Tolnay360efd22018-01-04 23:35:26 -0800855 }
856 b'u' => {
857 let (chr, rest) = backslash_u(s);
858 s = rest;
859 chr
860 }
861 b'n' => '\n',
862 b'r' => '\r',
863 b't' => '\t',
864 b'\\' => '\\',
865 b'0' => '\0',
866 b'\'' => '\'',
867 b'"' => '"',
David Tolnay61037c62018-01-05 16:21:03 -0800868 b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
David Tolnay360efd22018-01-04 23:35:26 -0800869 }
870 }
871 _ => {
872 let ch = next_chr(s);
873 s = &s[ch.len_utf8()..];
874 ch
875 }
876 };
877 assert_eq!(s, "\'", "Expected end of char literal");
878 ch
879 }
880
881 fn backslash_x<S>(s: &S) -> (u8, &S)
David Tolnay61037c62018-01-05 16:21:03 -0800882 where
883 S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized,
David Tolnay360efd22018-01-04 23:35:26 -0800884 {
885 let mut ch = 0;
886 let b0 = byte(s, 0);
887 let b1 = byte(s, 1);
888 ch += 0x10 * match b0 {
889 b'0'...b'9' => b0 - b'0',
890 b'a'...b'f' => 10 + (b0 - b'a'),
891 b'A'...b'F' => 10 + (b0 - b'A'),
892 _ => panic!("unexpected non-hex character after \\x"),
893 };
David Tolnay76ebcdd2018-01-05 17:07:26 -0800894 ch += match b1 {
David Tolnay360efd22018-01-04 23:35:26 -0800895 b'0'...b'9' => b1 - b'0',
896 b'a'...b'f' => 10 + (b1 - b'a'),
897 b'A'...b'F' => 10 + (b1 - b'A'),
898 _ => panic!("unexpected non-hex character after \\x"),
899 };
900 (ch, &s[2..])
901 }
902
903 fn backslash_u(mut s: &str) -> (char, &str) {
904 if byte(s, 0) != b'{' {
905 panic!("expected {{ after \\u");
906 }
907 s = &s[1..];
908
909 let mut ch = 0;
910 for _ in 0..6 {
911 let b = byte(s, 0);
912 match b {
913 b'0'...b'9' => {
914 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800915 ch += u32::from(b - b'0');
David Tolnay360efd22018-01-04 23:35:26 -0800916 s = &s[1..];
917 }
918 b'a'...b'f' => {
919 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800920 ch += u32::from(10 + b - b'a');
David Tolnay360efd22018-01-04 23:35:26 -0800921 s = &s[1..];
922 }
923 b'A'...b'F' => {
924 ch *= 0x10;
David Tolnay76ebcdd2018-01-05 17:07:26 -0800925 ch += u32::from(10 + b - b'A');
David Tolnay360efd22018-01-04 23:35:26 -0800926 s = &s[1..];
927 }
928 b'}' => break,
929 _ => panic!("unexpected non-hex character after \\u"),
930 }
931 }
932 assert!(byte(s, 0) == b'}');
933 s = &s[1..];
934
935 if let Some(ch) = char::from_u32(ch) {
936 (ch, s)
937 } else {
938 panic!("character code {:x} is not a valid unicode character", ch);
939 }
940 }
941
942 pub fn parse_lit_int(mut s: &str) -> Option<u64> {
943 let base = match (byte(s, 0), byte(s, 1)) {
944 (b'0', b'x') => {
945 s = &s[2..];
946 16
947 }
948 (b'0', b'o') => {
949 s = &s[2..];
950 8
951 }
952 (b'0', b'b') => {
953 s = &s[2..];
954 2
955 }
956 (b'0'...b'9', _) => 10,
957 _ => unreachable!(),
958 };
959
960 let mut value = 0u64;
961 loop {
962 let b = byte(s, 0);
963 let digit = match b {
David Tolnay76ebcdd2018-01-05 17:07:26 -0800964 b'0'...b'9' => u64::from(b - b'0'),
965 b'a'...b'f' if base > 10 => 10 + u64::from(b - b'a'),
966 b'A'...b'F' if base > 10 => 10 + u64::from(b - b'A'),
David Tolnay360efd22018-01-04 23:35:26 -0800967 b'_' => {
968 s = &s[1..];
969 continue;
970 }
971 // NOTE: Looking at a floating point literal, we don't want to
972 // consider these integers.
973 b'.' if base == 10 => return None,
974 b'e' | b'E' if base == 10 => return None,
975 _ => break,
976 };
977
978 if digit >= base {
979 panic!("Unexpected digit {:x} out of base range", digit);
980 }
981
982 value = match value.checked_mul(base) {
983 Some(value) => value,
984 None => return None,
985 };
986 value = match value.checked_add(digit) {
987 Some(value) => value,
988 None => return None,
989 };
990 s = &s[1..];
991 }
992
993 Some(value)
994 }
995
996 pub fn parse_lit_float(input: &str) -> f64 {
997 // Rust's floating point literals are very similar to the ones parsed by
998 // the standard library, except that rust's literals can contain
999 // ignorable underscores. Let's remove those underscores.
1000 let mut bytes = input.to_owned().into_bytes();
1001 let mut write = 0;
1002 for read in 0..bytes.len() {
1003 if bytes[read] == b'_' {
1004 continue; // Don't increase write
David Tolnay76ebcdd2018-01-05 17:07:26 -08001005 }
1006 if write != read {
David Tolnay360efd22018-01-04 23:35:26 -08001007 let x = bytes[read];
1008 bytes[write] = x;
1009 }
1010 write += 1;
1011 }
1012 bytes.truncate(write);
1013 let input = String::from_utf8(bytes).unwrap();
David Tolnay76ebcdd2018-01-05 17:07:26 -08001014 let end = input.find('f').unwrap_or_else(|| input.len());
David Tolnay360efd22018-01-04 23:35:26 -08001015 input[..end].parse().unwrap()
1016 }
1017
1018 pub fn to_literal(s: &str) -> Literal {
1019 let stream = s.parse::<TokenStream>().unwrap();
1020 match stream.into_iter().next().unwrap().kind {
1021 TokenNode::Literal(l) => l,
1022 _ => unreachable!(),
David Tolnayf17fd2f2016-10-07 23:38:08 -07001023 }
1024 }
David Tolnayf4bbbd92016-09-23 14:41:55 -07001025}