blob: cd973646d582ead428b61f246fc27eba638bc23a [file] [log] [blame]
David Tolnay886d8ea2016-09-13 08:34:07 -07001#![cfg(feature = "parsing")]
2
David Tolnayd3bd58a2016-09-13 10:58:37 -07003use std::{char, str};
David Tolnay886d8ea2016-09-13 08:34:07 -07004use nom::{self, IResult};
5
6pub fn escaped_string(input: &str) -> IResult<&str, String> {
7 let mut s = String::new();
8 let mut chars = input.char_indices().peekable();
9 while let Some((byte_offset, ch)) = chars.next() {
10 match ch {
11 '"' => {
12 return IResult::Done(&input[byte_offset..], s);
13 }
14 '\\' => {
15 match chars.next() {
David Tolnayd3bd58a2016-09-13 10:58:37 -070016 Some((_, 'x')) => match backslash_x(&mut chars) {
17 Some(ch) => s.push(ch),
18 None => break,
19 },
David Tolnay886d8ea2016-09-13 08:34:07 -070020 Some((_, 'n')) => s.push('\n'),
21 Some((_, 'r')) => s.push('\r'),
22 Some((_, 't')) => s.push('\t'),
23 Some((_, '\\')) => s.push('\\'),
24 Some((_, '0')) => s.push('\0'),
David Tolnayd3bd58a2016-09-13 10:58:37 -070025 Some((_, 'u')) => match backslash_u(&mut chars) {
26 Some(ch) => s.push(ch),
27 None => break,
28 },
David Tolnay886d8ea2016-09-13 08:34:07 -070029 Some((_, '\'')) => s.push('\''),
30 Some((_, '"')) => s.push('"'),
31 Some((_, '\n')) => {
32 while let Some(&(_, ch)) = chars.peek() {
33 if ch.is_whitespace() {
34 chars.next();
35 } else {
36 break;
37 }
38 }
39 }
40 _ => break,
41 }
42 }
43 ch => {
44 s.push(ch);
45 }
46 }
47 }
48 IResult::Error(nom::Err::Position(nom::ErrorKind::Escaped, input))
49}
David Tolnayd3bd58a2016-09-13 10:58:37 -070050
51macro_rules! next_char {
52 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
53 match $chars.next() {
54 Some((_, ch)) => match ch {
55 $pat $(| $rest)* => ch,
56 _ => return None,
57 },
58 None => return None,
59 }
60 };
61}
62
63fn backslash_x<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> {
64 let a = next_char!(chars @ '0'...'7');
65 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
66 char_from_hex_bytes(&[a as u8, b as u8])
67}
68
69fn backslash_u<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> {
70 next_char!(chars @ '{');
71 let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
72 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
73 if b == '}' {
74 return char_from_hex_bytes(&[a as u8]);
75 }
76 let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
77 if c == '}' {
78 return char_from_hex_bytes(&[a as u8, b as u8]);
79 }
80 let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
81 if d == '}' {
82 return char_from_hex_bytes(&[a as u8, b as u8, c as u8]);
83 }
84 let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
85 if e == '}' {
86 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]);
87 }
88 let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
89 if f == '}' {
90 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]);
91 }
92 next_char!(chars @ '}');
93 char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8])
94}
95
96/// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'.
97fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> {
98 let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) };
99 char::from_u32(u32::from_str_radix(hex_str, 16).unwrap())
100}
101
102#[test]
103fn test_escaped_string() {
104 let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#;
105 let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
106 assert_eq!(escaped_string(input), IResult::Done("\"", expected.to_string()));
107}