blob: 8eccb4ea582a6d6769088054714a9f014df5fd4f [file] [log] [blame]
David Tolnayd3bd58a2016-09-13 10:58:37 -07001use std::{char, str};
David Tolnayb5a7b142016-09-13 22:46:39 -07002use nom::IResult;
David Tolnay886d8ea2016-09-13 08:34:07 -07003
David Tolnay210884d2016-10-01 08:18:42 -07004pub fn cooked_string(input: &str) -> IResult<&str, String> {
David Tolnay886d8ea2016-09-13 08:34:07 -07005 let mut s = String::new();
6 let mut chars = input.char_indices().peekable();
7 while let Some((byte_offset, ch)) = chars.next() {
8 match ch {
9 '"' => {
10 return IResult::Done(&input[byte_offset..], s);
11 }
12 '\\' => {
13 match chars.next() {
David Tolnaydaaf7742016-10-03 11:11:43 -070014 Some((_, 'x')) => {
15 match backslash_x(&mut chars) {
16 Some(ch) => s.push(ch),
17 None => break,
18 }
19 }
David Tolnay886d8ea2016-09-13 08:34:07 -070020 Some((_, 'n')) => s.push('\n'),
21 Some((_, 'r')) => s.push('\r'),
22 Some((_, 't')) => s.push('\t'),
23 Some((_, '\\')) => s.push('\\'),
24 Some((_, '0')) => s.push('\0'),
David Tolnaydaaf7742016-10-03 11:11:43 -070025 Some((_, 'u')) => {
26 match backslash_u(&mut chars) {
27 Some(ch) => s.push(ch),
28 None => break,
29 }
30 }
David Tolnay886d8ea2016-09-13 08:34:07 -070031 Some((_, '\'')) => s.push('\''),
32 Some((_, '"')) => s.push('"'),
33 Some((_, '\n')) => {
34 while let Some(&(_, ch)) = chars.peek() {
35 if ch.is_whitespace() {
36 chars.next();
37 } else {
38 break;
39 }
40 }
41 }
42 _ => break,
43 }
44 }
45 ch => {
46 s.push(ch);
47 }
48 }
49 }
David Tolnayb5a7b142016-09-13 22:46:39 -070050 IResult::Error
David Tolnay886d8ea2016-09-13 08:34:07 -070051}
David Tolnayd3bd58a2016-09-13 10:58:37 -070052
David Tolnay210884d2016-10-01 08:18:42 -070053pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> {
54 let mut chars = input.char_indices();
55 let mut n = 0;
56 while let Some((byte_offset, ch)) = chars.next() {
57 match ch {
58 '"' => {
59 n = byte_offset;
60 break;
61 }
David Tolnaydaaf7742016-10-03 11:11:43 -070062 '#' => {}
David Tolnay210884d2016-10-01 08:18:42 -070063 _ => return IResult::Error,
64 }
65 }
David Tolnay590cdfd2016-10-01 08:51:55 -070066 for (byte_offset, ch) in chars {
David Tolnay210884d2016-10-01 08:18:42 -070067 if ch == '"' && input[byte_offset + 1..].starts_with(&input[..n]) {
68 let rest = &input[byte_offset + 1 + n..];
David Tolnaydaaf7742016-10-03 11:11:43 -070069 let value = &input[n + 1..byte_offset];
David Tolnay210884d2016-10-01 08:18:42 -070070 return IResult::Done(rest, (value.to_owned(), n));
71 }
72 }
73 IResult::Error
74}
75
David Tolnayd3bd58a2016-09-13 10:58:37 -070076macro_rules! next_char {
77 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
78 match $chars.next() {
79 Some((_, ch)) => match ch {
80 $pat $(| $rest)* => ch,
81 _ => return None,
82 },
83 None => return None,
84 }
85 };
86}
87
David Tolnay590cdfd2016-10-01 08:51:55 -070088#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
David Tolnaydaaf7742016-10-03 11:11:43 -070089fn backslash_x<I>(chars: &mut I) -> Option<char>
90 where I: Iterator<Item = (usize, char)>
91{
David Tolnayd3bd58a2016-09-13 10:58:37 -070092 let a = next_char!(chars @ '0'...'7');
93 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
94 char_from_hex_bytes(&[a as u8, b as u8])
95}
96
David Tolnay590cdfd2016-10-01 08:51:55 -070097#[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))]
David Tolnaydaaf7742016-10-03 11:11:43 -070098fn backslash_u<I>(chars: &mut I) -> Option<char>
99 where I: Iterator<Item = (usize, char)>
100{
David Tolnayd3bd58a2016-09-13 10:58:37 -0700101 next_char!(chars @ '{');
102 let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
103 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
104 if b == '}' {
105 return char_from_hex_bytes(&[a as u8]);
106 }
107 let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
108 if c == '}' {
109 return char_from_hex_bytes(&[a as u8, b as u8]);
110 }
111 let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
112 if d == '}' {
113 return char_from_hex_bytes(&[a as u8, b as u8, c as u8]);
114 }
115 let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
116 if e == '}' {
117 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]);
118 }
119 let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
120 if f == '}' {
121 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]);
122 }
123 next_char!(chars @ '}');
124 char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8])
125}
126
127/// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'.
128fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> {
129 let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) };
130 char::from_u32(u32::from_str_radix(hex_str, 16).unwrap())
131}
132
133#[test]
David Tolnay210884d2016-10-01 08:18:42 -0700134fn test_cooked_string() {
David Tolnayd3bd58a2016-09-13 10:58:37 -0700135 let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#;
136 let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
David Tolnay210884d2016-10-01 08:18:42 -0700137 assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string()));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700138}