blob: fa19bbea8a3473011e784cba69f4d5f9b9a3fd8e [file] [log] [blame]
David Tolnayd3bd58a2016-09-13 10:58:37 -07001use std::{char, str};
David Tolnayb5a7b142016-09-13 22:46:39 -07002use nom::IResult;
David Tolnay886d8ea2016-09-13 08:34:07 -07003
David Tolnay210884d2016-10-01 08:18:42 -07004pub fn cooked_string(input: &str) -> IResult<&str, String> {
David Tolnay886d8ea2016-09-13 08:34:07 -07005 let mut s = String::new();
6 let mut chars = input.char_indices().peekable();
7 while let Some((byte_offset, ch)) = chars.next() {
8 match ch {
9 '"' => {
10 return IResult::Done(&input[byte_offset..], s);
11 }
12 '\\' => {
13 match chars.next() {
David Tolnayd3bd58a2016-09-13 10:58:37 -070014 Some((_, 'x')) => match backslash_x(&mut chars) {
15 Some(ch) => s.push(ch),
16 None => break,
17 },
David Tolnay886d8ea2016-09-13 08:34:07 -070018 Some((_, 'n')) => s.push('\n'),
19 Some((_, 'r')) => s.push('\r'),
20 Some((_, 't')) => s.push('\t'),
21 Some((_, '\\')) => s.push('\\'),
22 Some((_, '0')) => s.push('\0'),
David Tolnayd3bd58a2016-09-13 10:58:37 -070023 Some((_, 'u')) => match backslash_u(&mut chars) {
24 Some(ch) => s.push(ch),
25 None => break,
26 },
David Tolnay886d8ea2016-09-13 08:34:07 -070027 Some((_, '\'')) => s.push('\''),
28 Some((_, '"')) => s.push('"'),
29 Some((_, '\n')) => {
30 while let Some(&(_, ch)) = chars.peek() {
31 if ch.is_whitespace() {
32 chars.next();
33 } else {
34 break;
35 }
36 }
37 }
38 _ => break,
39 }
40 }
41 ch => {
42 s.push(ch);
43 }
44 }
45 }
David Tolnayb5a7b142016-09-13 22:46:39 -070046 IResult::Error
David Tolnay886d8ea2016-09-13 08:34:07 -070047}
David Tolnayd3bd58a2016-09-13 10:58:37 -070048
David Tolnay210884d2016-10-01 08:18:42 -070049pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> {
50 let mut chars = input.char_indices();
51 let mut n = 0;
52 while let Some((byte_offset, ch)) = chars.next() {
53 match ch {
54 '"' => {
55 n = byte_offset;
56 break;
57 }
58 '#' => {},
59 _ => return IResult::Error,
60 }
61 }
David Tolnay590cdfd2016-10-01 08:51:55 -070062 for (byte_offset, ch) in chars {
David Tolnay210884d2016-10-01 08:18:42 -070063 if ch == '"' && input[byte_offset + 1..].starts_with(&input[..n]) {
64 let rest = &input[byte_offset + 1 + n..];
65 let value = &input[n + 1 .. byte_offset];
66 return IResult::Done(rest, (value.to_owned(), n));
67 }
68 }
69 IResult::Error
70}
71
David Tolnayd3bd58a2016-09-13 10:58:37 -070072macro_rules! next_char {
73 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
74 match $chars.next() {
75 Some((_, ch)) => match ch {
76 $pat $(| $rest)* => ch,
77 _ => return None,
78 },
79 None => return None,
80 }
81 };
82}
83
David Tolnay590cdfd2016-10-01 08:51:55 -070084#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
David Tolnayd3bd58a2016-09-13 10:58:37 -070085fn backslash_x<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> {
86 let a = next_char!(chars @ '0'...'7');
87 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
88 char_from_hex_bytes(&[a as u8, b as u8])
89}
90
David Tolnay590cdfd2016-10-01 08:51:55 -070091#[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))]
David Tolnayd3bd58a2016-09-13 10:58:37 -070092fn backslash_u<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> {
93 next_char!(chars @ '{');
94 let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
95 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
96 if b == '}' {
97 return char_from_hex_bytes(&[a as u8]);
98 }
99 let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
100 if c == '}' {
101 return char_from_hex_bytes(&[a as u8, b as u8]);
102 }
103 let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
104 if d == '}' {
105 return char_from_hex_bytes(&[a as u8, b as u8, c as u8]);
106 }
107 let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
108 if e == '}' {
109 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]);
110 }
111 let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
112 if f == '}' {
113 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]);
114 }
115 next_char!(chars @ '}');
116 char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8])
117}
118
119/// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'.
120fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> {
121 let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) };
122 char::from_u32(u32::from_str_radix(hex_str, 16).unwrap())
123}
124
125#[test]
David Tolnay210884d2016-10-01 08:18:42 -0700126fn test_cooked_string() {
David Tolnayd3bd58a2016-09-13 10:58:37 -0700127 let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#;
128 let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
David Tolnay210884d2016-10-01 08:18:42 -0700129 assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string()));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700130}