David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 1 | use std::{char, str}; |
David Tolnay | b5a7b14 | 2016-09-13 22:46:39 -0700 | [diff] [blame] | 2 | use nom::IResult; |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 3 | |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 4 | pub fn cooked_string(input: &str) -> IResult<&str, String> { |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 5 | let mut s = String::new(); |
| 6 | let mut chars = input.char_indices().peekable(); |
| 7 | while let Some((byte_offset, ch)) = chars.next() { |
| 8 | match ch { |
| 9 | '"' => { |
| 10 | return IResult::Done(&input[byte_offset..], s); |
| 11 | } |
| 12 | '\\' => { |
| 13 | match chars.next() { |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 14 | Some((_, 'x')) => { |
| 15 | match backslash_x(&mut chars) { |
| 16 | Some(ch) => s.push(ch), |
| 17 | None => break, |
| 18 | } |
| 19 | } |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 20 | Some((_, 'n')) => s.push('\n'), |
| 21 | Some((_, 'r')) => s.push('\r'), |
| 22 | Some((_, 't')) => s.push('\t'), |
| 23 | Some((_, '\\')) => s.push('\\'), |
| 24 | Some((_, '0')) => s.push('\0'), |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 25 | Some((_, 'u')) => { |
| 26 | match backslash_u(&mut chars) { |
| 27 | Some(ch) => s.push(ch), |
| 28 | None => break, |
| 29 | } |
| 30 | } |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 31 | Some((_, '\'')) => s.push('\''), |
| 32 | Some((_, '"')) => s.push('"'), |
| 33 | Some((_, '\n')) => { |
| 34 | while let Some(&(_, ch)) = chars.peek() { |
| 35 | if ch.is_whitespace() { |
| 36 | chars.next(); |
| 37 | } else { |
| 38 | break; |
| 39 | } |
| 40 | } |
| 41 | } |
| 42 | _ => break, |
| 43 | } |
| 44 | } |
| 45 | ch => { |
| 46 | s.push(ch); |
| 47 | } |
| 48 | } |
| 49 | } |
David Tolnay | b5a7b14 | 2016-09-13 22:46:39 -0700 | [diff] [blame] | 50 | IResult::Error |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 51 | } |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 52 | |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 53 | pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> { |
| 54 | let mut chars = input.char_indices(); |
| 55 | let mut n = 0; |
| 56 | while let Some((byte_offset, ch)) = chars.next() { |
| 57 | match ch { |
| 58 | '"' => { |
| 59 | n = byte_offset; |
| 60 | break; |
| 61 | } |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 62 | '#' => {} |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 63 | _ => return IResult::Error, |
| 64 | } |
| 65 | } |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 66 | for (byte_offset, ch) in chars { |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 67 | if ch == '"' && input[byte_offset + 1..].starts_with(&input[..n]) { |
| 68 | let rest = &input[byte_offset + 1 + n..]; |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 69 | let value = &input[n + 1..byte_offset]; |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 70 | return IResult::Done(rest, (value.to_owned(), n)); |
| 71 | } |
| 72 | } |
| 73 | IResult::Error |
| 74 | } |
| 75 | |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 76 | macro_rules! next_char { |
| 77 | ($chars:ident @ $pat:pat $(| $rest:pat)*) => { |
| 78 | match $chars.next() { |
| 79 | Some((_, ch)) => match ch { |
| 80 | $pat $(| $rest)* => ch, |
| 81 | _ => return None, |
| 82 | }, |
| 83 | None => return None, |
| 84 | } |
| 85 | }; |
| 86 | } |
| 87 | |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 88 | #[cfg_attr(feature = "clippy", allow(diverging_sub_expression))] |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 89 | fn backslash_x<I>(chars: &mut I) -> Option<char> |
| 90 | where I: Iterator<Item = (usize, char)> |
| 91 | { |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 92 | let a = next_char!(chars @ '0'...'7'); |
| 93 | let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 94 | char_from_hex_bytes(&[a as u8, b as u8]) |
| 95 | } |
| 96 | |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 97 | #[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))] |
David Tolnay | daaf774 | 2016-10-03 11:11:43 -0700 | [diff] [blame] | 98 | fn backslash_u<I>(chars: &mut I) -> Option<char> |
| 99 | where I: Iterator<Item = (usize, char)> |
| 100 | { |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 101 | next_char!(chars @ '{'); |
| 102 | let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 103 | let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 104 | if b == '}' { |
| 105 | return char_from_hex_bytes(&[a as u8]); |
| 106 | } |
| 107 | let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 108 | if c == '}' { |
| 109 | return char_from_hex_bytes(&[a as u8, b as u8]); |
| 110 | } |
| 111 | let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 112 | if d == '}' { |
| 113 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8]); |
| 114 | } |
| 115 | let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 116 | if e == '}' { |
| 117 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]); |
| 118 | } |
| 119 | let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 120 | if f == '}' { |
| 121 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]); |
| 122 | } |
| 123 | next_char!(chars @ '}'); |
| 124 | char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8]) |
| 125 | } |
| 126 | |
| 127 | /// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'. |
| 128 | fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> { |
| 129 | let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) }; |
| 130 | char::from_u32(u32::from_str_radix(hex_str, 16).unwrap()) |
| 131 | } |
| 132 | |
| 133 | #[test] |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 134 | fn test_cooked_string() { |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 135 | let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#; |
| 136 | let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}"; |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 137 | assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string())); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 138 | } |