David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 1 | #![cfg(feature = "parsing")] |
| 2 | |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame^] | 3 | use std::{char, str}; |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 4 | use nom::{self, IResult}; |
| 5 | |
| 6 | pub fn escaped_string(input: &str) -> IResult<&str, String> { |
| 7 | let mut s = String::new(); |
| 8 | let mut chars = input.char_indices().peekable(); |
| 9 | while let Some((byte_offset, ch)) = chars.next() { |
| 10 | match ch { |
| 11 | '"' => { |
| 12 | return IResult::Done(&input[byte_offset..], s); |
| 13 | } |
| 14 | '\\' => { |
| 15 | match chars.next() { |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame^] | 16 | Some((_, 'x')) => match backslash_x(&mut chars) { |
| 17 | Some(ch) => s.push(ch), |
| 18 | None => break, |
| 19 | }, |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 20 | Some((_, 'n')) => s.push('\n'), |
| 21 | Some((_, 'r')) => s.push('\r'), |
| 22 | Some((_, 't')) => s.push('\t'), |
| 23 | Some((_, '\\')) => s.push('\\'), |
| 24 | Some((_, '0')) => s.push('\0'), |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame^] | 25 | Some((_, 'u')) => match backslash_u(&mut chars) { |
| 26 | Some(ch) => s.push(ch), |
| 27 | None => break, |
| 28 | }, |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 29 | Some((_, '\'')) => s.push('\''), |
| 30 | Some((_, '"')) => s.push('"'), |
| 31 | Some((_, '\n')) => { |
| 32 | while let Some(&(_, ch)) = chars.peek() { |
| 33 | if ch.is_whitespace() { |
| 34 | chars.next(); |
| 35 | } else { |
| 36 | break; |
| 37 | } |
| 38 | } |
| 39 | } |
| 40 | _ => break, |
| 41 | } |
| 42 | } |
| 43 | ch => { |
| 44 | s.push(ch); |
| 45 | } |
| 46 | } |
| 47 | } |
| 48 | IResult::Error(nom::Err::Position(nom::ErrorKind::Escaped, input)) |
| 49 | } |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame^] | 50 | |
| 51 | macro_rules! next_char { |
| 52 | ($chars:ident @ $pat:pat $(| $rest:pat)*) => { |
| 53 | match $chars.next() { |
| 54 | Some((_, ch)) => match ch { |
| 55 | $pat $(| $rest)* => ch, |
| 56 | _ => return None, |
| 57 | }, |
| 58 | None => return None, |
| 59 | } |
| 60 | }; |
| 61 | } |
| 62 | |
| 63 | fn backslash_x<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> { |
| 64 | let a = next_char!(chars @ '0'...'7'); |
| 65 | let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 66 | char_from_hex_bytes(&[a as u8, b as u8]) |
| 67 | } |
| 68 | |
| 69 | fn backslash_u<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> { |
| 70 | next_char!(chars @ '{'); |
| 71 | let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 72 | let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 73 | if b == '}' { |
| 74 | return char_from_hex_bytes(&[a as u8]); |
| 75 | } |
| 76 | let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 77 | if c == '}' { |
| 78 | return char_from_hex_bytes(&[a as u8, b as u8]); |
| 79 | } |
| 80 | let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 81 | if d == '}' { |
| 82 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8]); |
| 83 | } |
| 84 | let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 85 | if e == '}' { |
| 86 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]); |
| 87 | } |
| 88 | let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 89 | if f == '}' { |
| 90 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]); |
| 91 | } |
| 92 | next_char!(chars @ '}'); |
| 93 | char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8]) |
| 94 | } |
| 95 | |
| 96 | /// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'. |
| 97 | fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> { |
| 98 | let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) }; |
| 99 | char::from_u32(u32::from_str_radix(hex_str, 16).unwrap()) |
| 100 | } |
| 101 | |
| 102 | #[test] |
| 103 | fn test_escaped_string() { |
| 104 | let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#; |
| 105 | let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}"; |
| 106 | assert_eq!(escaped_string(input), IResult::Done("\"", expected.to_string())); |
| 107 | } |