David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 1 | use std::{char, str}; |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 2 | use std::num::ParseIntError; |
David Tolnay | b5a7b14 | 2016-09-13 22:46:39 -0700 | [diff] [blame] | 3 | use nom::IResult; |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 4 | |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 5 | pub fn cooked_string(input: &str) -> IResult<&str, String> { |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 6 | let mut s = String::new(); |
| 7 | let mut chars = input.char_indices().peekable(); |
| 8 | while let Some((byte_offset, ch)) = chars.next() { |
| 9 | match ch { |
| 10 | '"' => { |
| 11 | return IResult::Done(&input[byte_offset..], s); |
| 12 | } |
David Tolnay | 7084316 | 2016-10-30 14:17:26 -0700 | [diff] [blame] | 13 | '\r' => { |
| 14 | if let Some((_, '\n')) = chars.next() { |
| 15 | s.push('\n'); |
| 16 | } else { |
| 17 | break; |
| 18 | } |
| 19 | } |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 20 | '\\' => { |
| 21 | match chars.next() { |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 22 | Some((_, 'x')) => { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 23 | match backslash_x_char(&mut chars) { |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 24 | Some(ch) => s.push(ch), |
| 25 | None => break, |
| 26 | } |
| 27 | } |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 28 | Some((_, 'n')) => s.push('\n'), |
| 29 | Some((_, 'r')) => s.push('\r'), |
| 30 | Some((_, 't')) => s.push('\t'), |
| 31 | Some((_, '\\')) => s.push('\\'), |
| 32 | Some((_, '0')) => s.push('\0'), |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 33 | Some((_, 'u')) => { |
| 34 | match backslash_u(&mut chars) { |
| 35 | Some(ch) => s.push(ch), |
| 36 | None => break, |
| 37 | } |
| 38 | } |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 39 | Some((_, '\'')) => s.push('\''), |
| 40 | Some((_, '"')) => s.push('"'), |
David Tolnay | 7084316 | 2016-10-30 14:17:26 -0700 | [diff] [blame] | 41 | Some((_, '\n')) | Some((_, '\r')) => { |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 42 | while let Some(&(_, ch)) = chars.peek() { |
| 43 | if ch.is_whitespace() { |
| 44 | chars.next(); |
| 45 | } else { |
| 46 | break; |
| 47 | } |
| 48 | } |
| 49 | } |
| 50 | _ => break, |
| 51 | } |
| 52 | } |
| 53 | ch => { |
| 54 | s.push(ch); |
| 55 | } |
| 56 | } |
| 57 | } |
David Tolnay | b5a7b14 | 2016-09-13 22:46:39 -0700 | [diff] [blame] | 58 | IResult::Error |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 59 | } |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 60 | |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 61 | pub fn cooked_byte_string(mut input: &str) -> IResult<&str, Vec<u8>> { |
| 62 | let mut vec = Vec::new(); |
| 63 | let mut bytes = input.bytes().enumerate(); |
| 64 | 'outer: while let Some((offset, b)) = bytes.next() { |
| 65 | match b { |
| 66 | b'"' => { |
| 67 | return IResult::Done(&input[offset..], vec); |
| 68 | } |
David Tolnay | 7084316 | 2016-10-30 14:17:26 -0700 | [diff] [blame] | 69 | b'\r' => { |
| 70 | if let Some((_, b'\n')) = bytes.next() { |
| 71 | vec.push(b'\n'); |
| 72 | } else { |
| 73 | break; |
| 74 | } |
| 75 | } |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 76 | b'\\' => { |
| 77 | match bytes.next() { |
| 78 | Some((_, b'x')) => { |
| 79 | match backslash_x_byte(&mut bytes) { |
| 80 | Some(b) => vec.push(b), |
| 81 | None => break, |
| 82 | } |
| 83 | } |
| 84 | Some((_, b'n')) => vec.push(b'\n'), |
| 85 | Some((_, b'r')) => vec.push(b'\r'), |
| 86 | Some((_, b't')) => vec.push(b'\t'), |
| 87 | Some((_, b'\\')) => vec.push(b'\\'), |
| 88 | Some((_, b'0')) => vec.push(b'\0'), |
| 89 | Some((_, b'\'')) => vec.push(b'\''), |
| 90 | Some((_, b'"')) => vec.push(b'"'), |
David Tolnay | c1fea50 | 2016-10-30 17:54:02 -0700 | [diff] [blame] | 91 | Some((newline, b'\n')) | |
| 92 | Some((newline, b'\r')) => { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 93 | let rest = &input[newline + 1..]; |
| 94 | for (offset, ch) in rest.char_indices() { |
| 95 | if !ch.is_whitespace() { |
| 96 | input = &rest[offset..]; |
| 97 | bytes = input.bytes().enumerate(); |
| 98 | continue 'outer; |
| 99 | } |
| 100 | } |
| 101 | break; |
| 102 | } |
| 103 | _ => break, |
| 104 | } |
| 105 | } |
| 106 | b if b < 0x80 => { |
| 107 | vec.push(b); |
| 108 | } |
| 109 | _ => break, |
| 110 | } |
| 111 | } |
| 112 | IResult::Error |
| 113 | } |
| 114 | |
David Tolnay | 615cf6a | 2016-10-08 23:07:02 -0700 | [diff] [blame] | 115 | pub fn cooked_char(input: &str) -> IResult<&str, char> { |
| 116 | let mut chars = input.char_indices(); |
| 117 | let ch = match chars.next().map(|(_, ch)| ch) { |
| 118 | Some('\\') => { |
| 119 | match chars.next().map(|(_, ch)| ch) { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 120 | Some('x') => backslash_x_char(&mut chars), |
David Tolnay | 615cf6a | 2016-10-08 23:07:02 -0700 | [diff] [blame] | 121 | Some('n') => Some('\n'), |
| 122 | Some('r') => Some('\r'), |
| 123 | Some('t') => Some('\t'), |
| 124 | Some('\\') => Some('\\'), |
| 125 | Some('0') => Some('\0'), |
| 126 | Some('u') => backslash_u(&mut chars), |
| 127 | Some('\'') => Some('\''), |
| 128 | Some('"') => Some('"'), |
| 129 | _ => None, |
| 130 | } |
| 131 | } |
| 132 | ch => ch, |
| 133 | }; |
| 134 | match ch { |
| 135 | Some(ch) => IResult::Done(chars.as_str(), ch), |
| 136 | None => IResult::Error, |
| 137 | } |
| 138 | } |
| 139 | |
David Tolnay | fe373a3 | 2016-10-26 23:51:19 -0700 | [diff] [blame] | 140 | pub fn cooked_byte(input: &str) -> IResult<&str, u8> { |
| 141 | let mut bytes = input.bytes().enumerate(); |
| 142 | let b = match bytes.next().map(|(_, b)| b) { |
| 143 | Some(b'\\') => { |
| 144 | match bytes.next().map(|(_, b)| b) { |
| 145 | Some(b'x') => backslash_x_byte(&mut bytes), |
| 146 | Some(b'n') => Some(b'\n'), |
| 147 | Some(b'r') => Some(b'\r'), |
| 148 | Some(b't') => Some(b'\t'), |
| 149 | Some(b'\\') => Some(b'\\'), |
| 150 | Some(b'0') => Some(b'\0'), |
| 151 | Some(b'\'') => Some(b'\''), |
| 152 | Some(b'"') => Some(b'"'), |
| 153 | _ => None, |
| 154 | } |
| 155 | } |
| 156 | b => b, |
| 157 | }; |
| 158 | match b { |
| 159 | Some(b) => { |
| 160 | match bytes.next() { |
| 161 | Some((offset, _)) => IResult::Done(&input[offset..], b), |
| 162 | None => IResult::Done("", b), |
| 163 | } |
| 164 | } |
| 165 | None => IResult::Error, |
| 166 | } |
| 167 | } |
| 168 | |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 169 | pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> { |
| 170 | let mut chars = input.char_indices(); |
| 171 | let mut n = 0; |
| 172 | while let Some((byte_offset, ch)) = chars.next() { |
| 173 | match ch { |
| 174 | '"' => { |
| 175 | n = byte_offset; |
| 176 | break; |
| 177 | } |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 178 | '#' => {} |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 179 | _ => return IResult::Error, |
| 180 | } |
| 181 | } |
David Tolnay | 7084316 | 2016-10-30 14:17:26 -0700 | [diff] [blame] | 182 | let mut s = String::new(); |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 183 | for (byte_offset, ch) in chars { |
David Tolnay | 7084316 | 2016-10-30 14:17:26 -0700 | [diff] [blame] | 184 | match ch { |
| 185 | '"' if input[byte_offset + 1..].starts_with(&input[..n]) => { |
| 186 | let rest = &input[byte_offset + 1 + n..]; |
| 187 | return IResult::Done(rest, (s, n)); |
| 188 | } |
| 189 | '\r' => {} |
| 190 | _ => s.push(ch), |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 191 | } |
| 192 | } |
| 193 | IResult::Error |
| 194 | } |
| 195 | |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 196 | macro_rules! next_ch { |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 197 | ($chars:ident @ $pat:pat $(| $rest:pat)*) => { |
| 198 | match $chars.next() { |
| 199 | Some((_, ch)) => match ch { |
| 200 | $pat $(| $rest)* => ch, |
| 201 | _ => return None, |
| 202 | }, |
| 203 | None => return None, |
| 204 | } |
| 205 | }; |
| 206 | } |
| 207 | |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 208 | trait FromStrRadix: Sized { |
| 209 | fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError>; |
| 210 | } |
| 211 | |
| 212 | impl FromStrRadix for u8 { |
| 213 | fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> { |
| 214 | u8::from_str_radix(src, radix) |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | impl FromStrRadix for u32 { |
| 219 | fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> { |
| 220 | u32::from_str_radix(src, radix) |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | macro_rules! from_hex { |
| 225 | ($($ch:ident)+) => {{ |
| 226 | let hex_bytes = &[$($ch as u8),*]; |
| 227 | let hex_str = str::from_utf8(hex_bytes).unwrap(); |
| 228 | FromStrRadix::from_str_radix(hex_str, 16).unwrap() |
| 229 | }}; |
| 230 | } |
| 231 | |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 232 | #[cfg_attr(feature = "clippy", allow(diverging_sub_expression))] |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 233 | fn backslash_x_char<I>(chars: &mut I) -> Option<char> |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 234 | where I: Iterator<Item = (usize, char)> |
| 235 | { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 236 | let a = next_ch!(chars @ '0'...'7'); |
| 237 | let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 238 | char::from_u32(from_hex!(a b)) |
| 239 | } |
| 240 | |
| 241 | #[cfg_attr(feature = "clippy", allow(diverging_sub_expression))] |
| 242 | fn backslash_x_byte<I>(chars: &mut I) -> Option<u8> |
| 243 | where I: Iterator<Item = (usize, u8)> |
| 244 | { |
| 245 | let a = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F'); |
| 246 | let b = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F'); |
| 247 | Some(from_hex!(a b)) |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 248 | } |
| 249 | |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 250 | #[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))] |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 251 | fn backslash_u<I>(chars: &mut I) -> Option<char> |
| 252 | where I: Iterator<Item = (usize, char)> |
| 253 | { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 254 | next_ch!(chars @ '{'); |
| 255 | let a = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 256 | let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 257 | if b == '}' { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 258 | return char::from_u32(from_hex!(a)); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 259 | } |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 260 | let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 261 | if c == '}' { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 262 | return char::from_u32(from_hex!(a b)); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 263 | } |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 264 | let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 265 | if d == '}' { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 266 | return char::from_u32(from_hex!(a b c)); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 267 | } |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 268 | let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 269 | if e == '}' { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 270 | return char::from_u32(from_hex!(a b c d)); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 271 | } |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 272 | let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 273 | if f == '}' { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 274 | return char::from_u32(from_hex!(a b c d e)); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 275 | } |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 276 | next_ch!(chars @ '}'); |
| 277 | char::from_u32(from_hex!(a b c d e f)) |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 278 | } |
| 279 | |
| 280 | #[test] |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 281 | fn test_cooked_string() { |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 282 | let input = "\\x62 \\\n \\u{7} \\u{64} \\u{bf5} \\u{12ba} \\u{1F395} \\u{102345}\""; |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 283 | let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}"; |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 284 | assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string())); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 285 | } |
David Tolnay | a73e0f0 | 2016-10-26 23:25:49 -0700 | [diff] [blame] | 286 | |
| 287 | #[test] |
| 288 | fn test_cooked_byte_string() { |
| 289 | let input = "\\x62 \\\n \\xEF\""; |
| 290 | let expected = b"\x62 \xEF"; |
| 291 | assert_eq!(cooked_byte_string(input), IResult::Done("\"", expected.to_vec())); |
| 292 | } |