David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 1 | use std::{char, str}; |
David Tolnay | b5a7b14 | 2016-09-13 22:46:39 -0700 | [diff] [blame] | 2 | use nom::IResult; |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 3 | |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 4 | pub fn cooked_string(input: &str) -> IResult<&str, String> { |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 5 | let mut s = String::new(); |
| 6 | let mut chars = input.char_indices().peekable(); |
| 7 | while let Some((byte_offset, ch)) = chars.next() { |
| 8 | match ch { |
| 9 | '"' => { |
| 10 | return IResult::Done(&input[byte_offset..], s); |
| 11 | } |
| 12 | '\\' => { |
| 13 | match chars.next() { |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 14 | Some((_, 'x')) => match backslash_x(&mut chars) { |
| 15 | Some(ch) => s.push(ch), |
| 16 | None => break, |
| 17 | }, |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 18 | Some((_, 'n')) => s.push('\n'), |
| 19 | Some((_, 'r')) => s.push('\r'), |
| 20 | Some((_, 't')) => s.push('\t'), |
| 21 | Some((_, '\\')) => s.push('\\'), |
| 22 | Some((_, '0')) => s.push('\0'), |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 23 | Some((_, 'u')) => match backslash_u(&mut chars) { |
| 24 | Some(ch) => s.push(ch), |
| 25 | None => break, |
| 26 | }, |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 27 | Some((_, '\'')) => s.push('\''), |
| 28 | Some((_, '"')) => s.push('"'), |
| 29 | Some((_, '\n')) => { |
| 30 | while let Some(&(_, ch)) = chars.peek() { |
| 31 | if ch.is_whitespace() { |
| 32 | chars.next(); |
| 33 | } else { |
| 34 | break; |
| 35 | } |
| 36 | } |
| 37 | } |
| 38 | _ => break, |
| 39 | } |
| 40 | } |
| 41 | ch => { |
| 42 | s.push(ch); |
| 43 | } |
| 44 | } |
| 45 | } |
David Tolnay | b5a7b14 | 2016-09-13 22:46:39 -0700 | [diff] [blame] | 46 | IResult::Error |
David Tolnay | 886d8ea | 2016-09-13 08:34:07 -0700 | [diff] [blame] | 47 | } |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 48 | |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 49 | pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> { |
| 50 | let mut chars = input.char_indices(); |
| 51 | let mut n = 0; |
| 52 | while let Some((byte_offset, ch)) = chars.next() { |
| 53 | match ch { |
| 54 | '"' => { |
| 55 | n = byte_offset; |
| 56 | break; |
| 57 | } |
| 58 | '#' => {}, |
| 59 | _ => return IResult::Error, |
| 60 | } |
| 61 | } |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 62 | for (byte_offset, ch) in chars { |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 63 | if ch == '"' && input[byte_offset + 1..].starts_with(&input[..n]) { |
| 64 | let rest = &input[byte_offset + 1 + n..]; |
| 65 | let value = &input[n + 1 .. byte_offset]; |
| 66 | return IResult::Done(rest, (value.to_owned(), n)); |
| 67 | } |
| 68 | } |
| 69 | IResult::Error |
| 70 | } |
| 71 | |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 72 | macro_rules! next_char { |
| 73 | ($chars:ident @ $pat:pat $(| $rest:pat)*) => { |
| 74 | match $chars.next() { |
| 75 | Some((_, ch)) => match ch { |
| 76 | $pat $(| $rest)* => ch, |
| 77 | _ => return None, |
| 78 | }, |
| 79 | None => return None, |
| 80 | } |
| 81 | }; |
| 82 | } |
| 83 | |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 84 | #[cfg_attr(feature = "clippy", allow(diverging_sub_expression))] |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 85 | fn backslash_x<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> { |
| 86 | let a = next_char!(chars @ '0'...'7'); |
| 87 | let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 88 | char_from_hex_bytes(&[a as u8, b as u8]) |
| 89 | } |
| 90 | |
David Tolnay | 590cdfd | 2016-10-01 08:51:55 -0700 | [diff] [blame] | 91 | #[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))] |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 92 | fn backslash_u<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> { |
| 93 | next_char!(chars @ '{'); |
| 94 | let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F'); |
| 95 | let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 96 | if b == '}' { |
| 97 | return char_from_hex_bytes(&[a as u8]); |
| 98 | } |
| 99 | let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 100 | if c == '}' { |
| 101 | return char_from_hex_bytes(&[a as u8, b as u8]); |
| 102 | } |
| 103 | let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 104 | if d == '}' { |
| 105 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8]); |
| 106 | } |
| 107 | let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 108 | if e == '}' { |
| 109 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]); |
| 110 | } |
| 111 | let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}'); |
| 112 | if f == '}' { |
| 113 | return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]); |
| 114 | } |
| 115 | next_char!(chars @ '}'); |
| 116 | char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8]) |
| 117 | } |
| 118 | |
| 119 | /// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'. |
| 120 | fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> { |
| 121 | let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) }; |
| 122 | char::from_u32(u32::from_str_radix(hex_str, 16).unwrap()) |
| 123 | } |
| 124 | |
| 125 | #[test] |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 126 | fn test_cooked_string() { |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 127 | let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#; |
| 128 | let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}"; |
David Tolnay | 210884d | 2016-10-01 08:18:42 -0700 | [diff] [blame] | 129 | assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string())); |
David Tolnay | d3bd58a | 2016-09-13 10:58:37 -0700 | [diff] [blame] | 130 | } |