blob: eb30d9071026d739d85bea4acaa76d0bc4d1832c [file] [log] [blame]
David Tolnayd3bd58a2016-09-13 10:58:37 -07001use std::{char, str};
David Tolnaya73e0f02016-10-26 23:25:49 -07002use std::num::ParseIntError;
David Tolnayb5a7b142016-09-13 22:46:39 -07003use nom::IResult;
David Tolnay886d8ea2016-09-13 08:34:07 -07004
David Tolnay210884d2016-10-01 08:18:42 -07005pub fn cooked_string(input: &str) -> IResult<&str, String> {
David Tolnay886d8ea2016-09-13 08:34:07 -07006 let mut s = String::new();
7 let mut chars = input.char_indices().peekable();
8 while let Some((byte_offset, ch)) = chars.next() {
9 match ch {
10 '"' => {
11 return IResult::Done(&input[byte_offset..], s);
12 }
13 '\\' => {
14 match chars.next() {
David Tolnaydaaf7742016-10-03 11:11:43 -070015 Some((_, 'x')) => {
David Tolnaya73e0f02016-10-26 23:25:49 -070016 match backslash_x_char(&mut chars) {
David Tolnaydaaf7742016-10-03 11:11:43 -070017 Some(ch) => s.push(ch),
18 None => break,
19 }
20 }
David Tolnay886d8ea2016-09-13 08:34:07 -070021 Some((_, 'n')) => s.push('\n'),
22 Some((_, 'r')) => s.push('\r'),
23 Some((_, 't')) => s.push('\t'),
24 Some((_, '\\')) => s.push('\\'),
25 Some((_, '0')) => s.push('\0'),
David Tolnaydaaf7742016-10-03 11:11:43 -070026 Some((_, 'u')) => {
27 match backslash_u(&mut chars) {
28 Some(ch) => s.push(ch),
29 None => break,
30 }
31 }
David Tolnay886d8ea2016-09-13 08:34:07 -070032 Some((_, '\'')) => s.push('\''),
33 Some((_, '"')) => s.push('"'),
34 Some((_, '\n')) => {
35 while let Some(&(_, ch)) = chars.peek() {
36 if ch.is_whitespace() {
37 chars.next();
38 } else {
39 break;
40 }
41 }
42 }
43 _ => break,
44 }
45 }
46 ch => {
47 s.push(ch);
48 }
49 }
50 }
David Tolnayb5a7b142016-09-13 22:46:39 -070051 IResult::Error
David Tolnay886d8ea2016-09-13 08:34:07 -070052}
David Tolnayd3bd58a2016-09-13 10:58:37 -070053
David Tolnaya73e0f02016-10-26 23:25:49 -070054pub fn cooked_byte_string(mut input: &str) -> IResult<&str, Vec<u8>> {
55 let mut vec = Vec::new();
56 let mut bytes = input.bytes().enumerate();
57 'outer: while let Some((offset, b)) = bytes.next() {
58 match b {
59 b'"' => {
60 return IResult::Done(&input[offset..], vec);
61 }
62 b'\\' => {
63 match bytes.next() {
64 Some((_, b'x')) => {
65 match backslash_x_byte(&mut bytes) {
66 Some(b) => vec.push(b),
67 None => break,
68 }
69 }
70 Some((_, b'n')) => vec.push(b'\n'),
71 Some((_, b'r')) => vec.push(b'\r'),
72 Some((_, b't')) => vec.push(b'\t'),
73 Some((_, b'\\')) => vec.push(b'\\'),
74 Some((_, b'0')) => vec.push(b'\0'),
75 Some((_, b'\'')) => vec.push(b'\''),
76 Some((_, b'"')) => vec.push(b'"'),
77 Some((newline, b'\n')) => {
78 let rest = &input[newline + 1..];
79 for (offset, ch) in rest.char_indices() {
80 if !ch.is_whitespace() {
81 input = &rest[offset..];
82 bytes = input.bytes().enumerate();
83 continue 'outer;
84 }
85 }
86 break;
87 }
88 _ => break,
89 }
90 }
91 b if b < 0x80 => {
92 vec.push(b);
93 }
94 _ => break,
95 }
96 }
97 IResult::Error
98}
99
David Tolnay615cf6a2016-10-08 23:07:02 -0700100pub fn cooked_char(input: &str) -> IResult<&str, char> {
101 let mut chars = input.char_indices();
102 let ch = match chars.next().map(|(_, ch)| ch) {
103 Some('\\') => {
104 match chars.next().map(|(_, ch)| ch) {
David Tolnaya73e0f02016-10-26 23:25:49 -0700105 Some('x') => backslash_x_char(&mut chars),
David Tolnay615cf6a2016-10-08 23:07:02 -0700106 Some('n') => Some('\n'),
107 Some('r') => Some('\r'),
108 Some('t') => Some('\t'),
109 Some('\\') => Some('\\'),
110 Some('0') => Some('\0'),
111 Some('u') => backslash_u(&mut chars),
112 Some('\'') => Some('\''),
113 Some('"') => Some('"'),
114 _ => None,
115 }
116 }
117 ch => ch,
118 };
119 match ch {
120 Some(ch) => IResult::Done(chars.as_str(), ch),
121 None => IResult::Error,
122 }
123}
124
David Tolnayfe373a32016-10-26 23:51:19 -0700125pub fn cooked_byte(input: &str) -> IResult<&str, u8> {
126 let mut bytes = input.bytes().enumerate();
127 let b = match bytes.next().map(|(_, b)| b) {
128 Some(b'\\') => {
129 match bytes.next().map(|(_, b)| b) {
130 Some(b'x') => backslash_x_byte(&mut bytes),
131 Some(b'n') => Some(b'\n'),
132 Some(b'r') => Some(b'\r'),
133 Some(b't') => Some(b'\t'),
134 Some(b'\\') => Some(b'\\'),
135 Some(b'0') => Some(b'\0'),
136 Some(b'\'') => Some(b'\''),
137 Some(b'"') => Some(b'"'),
138 _ => None,
139 }
140 }
141 b => b,
142 };
143 match b {
144 Some(b) => {
145 match bytes.next() {
146 Some((offset, _)) => IResult::Done(&input[offset..], b),
147 None => IResult::Done("", b),
148 }
149 }
150 None => IResult::Error,
151 }
152}
153
David Tolnay210884d2016-10-01 08:18:42 -0700154pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> {
155 let mut chars = input.char_indices();
156 let mut n = 0;
157 while let Some((byte_offset, ch)) = chars.next() {
158 match ch {
159 '"' => {
160 n = byte_offset;
161 break;
162 }
David Tolnaydaaf7742016-10-03 11:11:43 -0700163 '#' => {}
David Tolnay210884d2016-10-01 08:18:42 -0700164 _ => return IResult::Error,
165 }
166 }
David Tolnay590cdfd2016-10-01 08:51:55 -0700167 for (byte_offset, ch) in chars {
David Tolnay210884d2016-10-01 08:18:42 -0700168 if ch == '"' && input[byte_offset + 1..].starts_with(&input[..n]) {
169 let rest = &input[byte_offset + 1 + n..];
David Tolnaydaaf7742016-10-03 11:11:43 -0700170 let value = &input[n + 1..byte_offset];
David Tolnay210884d2016-10-01 08:18:42 -0700171 return IResult::Done(rest, (value.to_owned(), n));
172 }
173 }
174 IResult::Error
175}
176
David Tolnaya73e0f02016-10-26 23:25:49 -0700177macro_rules! next_ch {
David Tolnayd3bd58a2016-09-13 10:58:37 -0700178 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
179 match $chars.next() {
180 Some((_, ch)) => match ch {
181 $pat $(| $rest)* => ch,
182 _ => return None,
183 },
184 None => return None,
185 }
186 };
187}
188
David Tolnaya73e0f02016-10-26 23:25:49 -0700189trait FromStrRadix: Sized {
190 fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError>;
191}
192
193impl FromStrRadix for u8 {
194 fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
195 u8::from_str_radix(src, radix)
196 }
197}
198
199impl FromStrRadix for u32 {
200 fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
201 u32::from_str_radix(src, radix)
202 }
203}
204
205macro_rules! from_hex {
206 ($($ch:ident)+) => {{
207 let hex_bytes = &[$($ch as u8),*];
208 let hex_str = str::from_utf8(hex_bytes).unwrap();
209 FromStrRadix::from_str_radix(hex_str, 16).unwrap()
210 }};
211}
212
David Tolnay590cdfd2016-10-01 08:51:55 -0700213#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
David Tolnaya73e0f02016-10-26 23:25:49 -0700214fn backslash_x_char<I>(chars: &mut I) -> Option<char>
David Tolnaydaaf7742016-10-03 11:11:43 -0700215 where I: Iterator<Item = (usize, char)>
216{
David Tolnaya73e0f02016-10-26 23:25:49 -0700217 let a = next_ch!(chars @ '0'...'7');
218 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
219 char::from_u32(from_hex!(a b))
220}
221
222#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
223fn backslash_x_byte<I>(chars: &mut I) -> Option<u8>
224 where I: Iterator<Item = (usize, u8)>
225{
226 let a = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
227 let b = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
228 Some(from_hex!(a b))
David Tolnayd3bd58a2016-09-13 10:58:37 -0700229}
230
David Tolnay590cdfd2016-10-01 08:51:55 -0700231#[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))]
David Tolnaydaaf7742016-10-03 11:11:43 -0700232fn backslash_u<I>(chars: &mut I) -> Option<char>
233 where I: Iterator<Item = (usize, char)>
234{
David Tolnaya73e0f02016-10-26 23:25:49 -0700235 next_ch!(chars @ '{');
236 let a = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
237 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700238 if b == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700239 return char::from_u32(from_hex!(a));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700240 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700241 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700242 if c == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700243 return char::from_u32(from_hex!(a b));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700244 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700245 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700246 if d == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700247 return char::from_u32(from_hex!(a b c));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700248 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700249 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700250 if e == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700251 return char::from_u32(from_hex!(a b c d));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700252 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700253 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700254 if f == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700255 return char::from_u32(from_hex!(a b c d e));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700256 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700257 next_ch!(chars @ '}');
258 char::from_u32(from_hex!(a b c d e f))
David Tolnayd3bd58a2016-09-13 10:58:37 -0700259}
260
261#[test]
David Tolnay210884d2016-10-01 08:18:42 -0700262fn test_cooked_string() {
David Tolnaya73e0f02016-10-26 23:25:49 -0700263 let input = "\\x62 \\\n \\u{7} \\u{64} \\u{bf5} \\u{12ba} \\u{1F395} \\u{102345}\"";
David Tolnayd3bd58a2016-09-13 10:58:37 -0700264 let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
David Tolnay210884d2016-10-01 08:18:42 -0700265 assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string()));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700266}
David Tolnaya73e0f02016-10-26 23:25:49 -0700267
268#[test]
269fn test_cooked_byte_string() {
270 let input = "\\x62 \\\n \\xEF\"";
271 let expected = b"\x62 \xEF";
272 assert_eq!(cooked_byte_string(input), IResult::Done("\"", expected.to_vec()));
273}