blob: d8831ebdb620a7ce7204464f006f336c3400741b [file] [log] [blame]
David Tolnayd3bd58a2016-09-13 10:58:37 -07001use std::{char, str};
David Tolnaya73e0f02016-10-26 23:25:49 -07002use std::num::ParseIntError;
David Tolnayb5a7b142016-09-13 22:46:39 -07003use nom::IResult;
David Tolnay886d8ea2016-09-13 08:34:07 -07004
David Tolnay210884d2016-10-01 08:18:42 -07005pub fn cooked_string(input: &str) -> IResult<&str, String> {
David Tolnay886d8ea2016-09-13 08:34:07 -07006 let mut s = String::new();
7 let mut chars = input.char_indices().peekable();
8 while let Some((byte_offset, ch)) = chars.next() {
9 match ch {
10 '"' => {
11 return IResult::Done(&input[byte_offset..], s);
12 }
David Tolnay70843162016-10-30 14:17:26 -070013 '\r' => {
14 if let Some((_, '\n')) = chars.next() {
15 s.push('\n');
16 } else {
17 break;
18 }
19 }
David Tolnay886d8ea2016-09-13 08:34:07 -070020 '\\' => {
21 match chars.next() {
David Tolnaydaaf7742016-10-03 11:11:43 -070022 Some((_, 'x')) => {
David Tolnaya73e0f02016-10-26 23:25:49 -070023 match backslash_x_char(&mut chars) {
David Tolnaydaaf7742016-10-03 11:11:43 -070024 Some(ch) => s.push(ch),
25 None => break,
26 }
27 }
David Tolnay886d8ea2016-09-13 08:34:07 -070028 Some((_, 'n')) => s.push('\n'),
29 Some((_, 'r')) => s.push('\r'),
30 Some((_, 't')) => s.push('\t'),
31 Some((_, '\\')) => s.push('\\'),
32 Some((_, '0')) => s.push('\0'),
David Tolnaydaaf7742016-10-03 11:11:43 -070033 Some((_, 'u')) => {
34 match backslash_u(&mut chars) {
35 Some(ch) => s.push(ch),
36 None => break,
37 }
38 }
David Tolnay886d8ea2016-09-13 08:34:07 -070039 Some((_, '\'')) => s.push('\''),
40 Some((_, '"')) => s.push('"'),
David Tolnay70843162016-10-30 14:17:26 -070041 Some((_, '\n')) | Some((_, '\r')) => {
David Tolnay886d8ea2016-09-13 08:34:07 -070042 while let Some(&(_, ch)) = chars.peek() {
43 if ch.is_whitespace() {
44 chars.next();
45 } else {
46 break;
47 }
48 }
49 }
50 _ => break,
51 }
52 }
53 ch => {
54 s.push(ch);
55 }
56 }
57 }
David Tolnayb5a7b142016-09-13 22:46:39 -070058 IResult::Error
David Tolnay886d8ea2016-09-13 08:34:07 -070059}
David Tolnayd3bd58a2016-09-13 10:58:37 -070060
David Tolnaya73e0f02016-10-26 23:25:49 -070061pub fn cooked_byte_string(mut input: &str) -> IResult<&str, Vec<u8>> {
62 let mut vec = Vec::new();
63 let mut bytes = input.bytes().enumerate();
64 'outer: while let Some((offset, b)) = bytes.next() {
65 match b {
66 b'"' => {
67 return IResult::Done(&input[offset..], vec);
68 }
David Tolnay70843162016-10-30 14:17:26 -070069 b'\r' => {
70 if let Some((_, b'\n')) = bytes.next() {
71 vec.push(b'\n');
72 } else {
73 break;
74 }
75 }
David Tolnaya73e0f02016-10-26 23:25:49 -070076 b'\\' => {
77 match bytes.next() {
78 Some((_, b'x')) => {
79 match backslash_x_byte(&mut bytes) {
80 Some(b) => vec.push(b),
81 None => break,
82 }
83 }
84 Some((_, b'n')) => vec.push(b'\n'),
85 Some((_, b'r')) => vec.push(b'\r'),
86 Some((_, b't')) => vec.push(b'\t'),
87 Some((_, b'\\')) => vec.push(b'\\'),
88 Some((_, b'0')) => vec.push(b'\0'),
89 Some((_, b'\'')) => vec.push(b'\''),
90 Some((_, b'"')) => vec.push(b'"'),
David Tolnayc1fea502016-10-30 17:54:02 -070091 Some((newline, b'\n')) |
92 Some((newline, b'\r')) => {
David Tolnaya73e0f02016-10-26 23:25:49 -070093 let rest = &input[newline + 1..];
94 for (offset, ch) in rest.char_indices() {
95 if !ch.is_whitespace() {
96 input = &rest[offset..];
97 bytes = input.bytes().enumerate();
98 continue 'outer;
99 }
100 }
101 break;
102 }
103 _ => break,
104 }
105 }
106 b if b < 0x80 => {
107 vec.push(b);
108 }
109 _ => break,
110 }
111 }
112 IResult::Error
113}
114
David Tolnay615cf6a2016-10-08 23:07:02 -0700115pub fn cooked_char(input: &str) -> IResult<&str, char> {
116 let mut chars = input.char_indices();
117 let ch = match chars.next().map(|(_, ch)| ch) {
118 Some('\\') => {
119 match chars.next().map(|(_, ch)| ch) {
David Tolnaya73e0f02016-10-26 23:25:49 -0700120 Some('x') => backslash_x_char(&mut chars),
David Tolnay615cf6a2016-10-08 23:07:02 -0700121 Some('n') => Some('\n'),
122 Some('r') => Some('\r'),
123 Some('t') => Some('\t'),
124 Some('\\') => Some('\\'),
125 Some('0') => Some('\0'),
126 Some('u') => backslash_u(&mut chars),
127 Some('\'') => Some('\''),
128 Some('"') => Some('"'),
129 _ => None,
130 }
131 }
132 ch => ch,
133 };
134 match ch {
135 Some(ch) => IResult::Done(chars.as_str(), ch),
136 None => IResult::Error,
137 }
138}
139
David Tolnayfe373a32016-10-26 23:51:19 -0700140pub fn cooked_byte(input: &str) -> IResult<&str, u8> {
141 let mut bytes = input.bytes().enumerate();
142 let b = match bytes.next().map(|(_, b)| b) {
143 Some(b'\\') => {
144 match bytes.next().map(|(_, b)| b) {
145 Some(b'x') => backslash_x_byte(&mut bytes),
146 Some(b'n') => Some(b'\n'),
147 Some(b'r') => Some(b'\r'),
148 Some(b't') => Some(b'\t'),
149 Some(b'\\') => Some(b'\\'),
150 Some(b'0') => Some(b'\0'),
151 Some(b'\'') => Some(b'\''),
152 Some(b'"') => Some(b'"'),
153 _ => None,
154 }
155 }
156 b => b,
157 };
158 match b {
159 Some(b) => {
160 match bytes.next() {
161 Some((offset, _)) => IResult::Done(&input[offset..], b),
162 None => IResult::Done("", b),
163 }
164 }
165 None => IResult::Error,
166 }
167}
168
David Tolnay210884d2016-10-01 08:18:42 -0700169pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> {
170 let mut chars = input.char_indices();
171 let mut n = 0;
172 while let Some((byte_offset, ch)) = chars.next() {
173 match ch {
174 '"' => {
175 n = byte_offset;
176 break;
177 }
David Tolnaydaaf7742016-10-03 11:11:43 -0700178 '#' => {}
David Tolnay210884d2016-10-01 08:18:42 -0700179 _ => return IResult::Error,
180 }
181 }
David Tolnay70843162016-10-30 14:17:26 -0700182 let mut s = String::new();
David Tolnay590cdfd2016-10-01 08:51:55 -0700183 for (byte_offset, ch) in chars {
David Tolnay70843162016-10-30 14:17:26 -0700184 match ch {
185 '"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
186 let rest = &input[byte_offset + 1 + n..];
187 return IResult::Done(rest, (s, n));
188 }
189 '\r' => {}
190 _ => s.push(ch),
David Tolnay210884d2016-10-01 08:18:42 -0700191 }
192 }
193 IResult::Error
194}
195
David Tolnaya73e0f02016-10-26 23:25:49 -0700196macro_rules! next_ch {
David Tolnayd3bd58a2016-09-13 10:58:37 -0700197 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
198 match $chars.next() {
199 Some((_, ch)) => match ch {
200 $pat $(| $rest)* => ch,
201 _ => return None,
202 },
203 None => return None,
204 }
205 };
206}
207
David Tolnaya73e0f02016-10-26 23:25:49 -0700208trait FromStrRadix: Sized {
209 fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError>;
210}
211
212impl FromStrRadix for u8 {
213 fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
214 u8::from_str_radix(src, radix)
215 }
216}
217
218impl FromStrRadix for u32 {
219 fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
220 u32::from_str_radix(src, radix)
221 }
222}
223
224macro_rules! from_hex {
225 ($($ch:ident)+) => {{
226 let hex_bytes = &[$($ch as u8),*];
227 let hex_str = str::from_utf8(hex_bytes).unwrap();
228 FromStrRadix::from_str_radix(hex_str, 16).unwrap()
229 }};
230}
231
David Tolnay590cdfd2016-10-01 08:51:55 -0700232#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
David Tolnaya73e0f02016-10-26 23:25:49 -0700233fn backslash_x_char<I>(chars: &mut I) -> Option<char>
David Tolnaydaaf7742016-10-03 11:11:43 -0700234 where I: Iterator<Item = (usize, char)>
235{
David Tolnaya73e0f02016-10-26 23:25:49 -0700236 let a = next_ch!(chars @ '0'...'7');
237 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
238 char::from_u32(from_hex!(a b))
239}
240
241#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
242fn backslash_x_byte<I>(chars: &mut I) -> Option<u8>
243 where I: Iterator<Item = (usize, u8)>
244{
245 let a = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
246 let b = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
247 Some(from_hex!(a b))
David Tolnayd3bd58a2016-09-13 10:58:37 -0700248}
249
David Tolnay590cdfd2016-10-01 08:51:55 -0700250#[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))]
David Tolnaydaaf7742016-10-03 11:11:43 -0700251fn backslash_u<I>(chars: &mut I) -> Option<char>
252 where I: Iterator<Item = (usize, char)>
253{
David Tolnaya73e0f02016-10-26 23:25:49 -0700254 next_ch!(chars @ '{');
255 let a = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
256 let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700257 if b == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700258 return char::from_u32(from_hex!(a));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700259 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700260 let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700261 if c == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700262 return char::from_u32(from_hex!(a b));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700263 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700264 let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700265 if d == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700266 return char::from_u32(from_hex!(a b c));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700267 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700268 let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700269 if e == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700270 return char::from_u32(from_hex!(a b c d));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700271 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700272 let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
David Tolnayd3bd58a2016-09-13 10:58:37 -0700273 if f == '}' {
David Tolnaya73e0f02016-10-26 23:25:49 -0700274 return char::from_u32(from_hex!(a b c d e));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700275 }
David Tolnaya73e0f02016-10-26 23:25:49 -0700276 next_ch!(chars @ '}');
277 char::from_u32(from_hex!(a b c d e f))
David Tolnayd3bd58a2016-09-13 10:58:37 -0700278}
279
280#[test]
David Tolnay210884d2016-10-01 08:18:42 -0700281fn test_cooked_string() {
David Tolnaya73e0f02016-10-26 23:25:49 -0700282 let input = "\\x62 \\\n \\u{7} \\u{64} \\u{bf5} \\u{12ba} \\u{1F395} \\u{102345}\"";
David Tolnayd3bd58a2016-09-13 10:58:37 -0700283 let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
David Tolnay210884d2016-10-01 08:18:42 -0700284 assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string()));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700285}
David Tolnaya73e0f02016-10-26 23:25:49 -0700286
287#[test]
288fn test_cooked_byte_string() {
289 let input = "\\x62 \\\n \\xEF\"";
290 let expected = b"\x62 \xEF";
291 assert_eq!(cooked_byte_string(input), IResult::Done("\"", expected.to_vec()));
292}