blob: 3ff52e3cbde3a9b20b182ce13f165074359d1dd2 [file] [log] [blame]
David Tolnay886d8ea2016-09-13 08:34:07 -07001#![cfg(feature = "parsing")]
2
David Tolnayd3bd58a2016-09-13 10:58:37 -07003use std::{char, str};
David Tolnayb5a7b142016-09-13 22:46:39 -07004use nom::IResult;
David Tolnay886d8ea2016-09-13 08:34:07 -07005
David Tolnay210884d2016-10-01 08:18:42 -07006pub fn cooked_string(input: &str) -> IResult<&str, String> {
David Tolnay886d8ea2016-09-13 08:34:07 -07007 let mut s = String::new();
8 let mut chars = input.char_indices().peekable();
9 while let Some((byte_offset, ch)) = chars.next() {
10 match ch {
11 '"' => {
12 return IResult::Done(&input[byte_offset..], s);
13 }
14 '\\' => {
15 match chars.next() {
David Tolnayd3bd58a2016-09-13 10:58:37 -070016 Some((_, 'x')) => match backslash_x(&mut chars) {
17 Some(ch) => s.push(ch),
18 None => break,
19 },
David Tolnay886d8ea2016-09-13 08:34:07 -070020 Some((_, 'n')) => s.push('\n'),
21 Some((_, 'r')) => s.push('\r'),
22 Some((_, 't')) => s.push('\t'),
23 Some((_, '\\')) => s.push('\\'),
24 Some((_, '0')) => s.push('\0'),
David Tolnayd3bd58a2016-09-13 10:58:37 -070025 Some((_, 'u')) => match backslash_u(&mut chars) {
26 Some(ch) => s.push(ch),
27 None => break,
28 },
David Tolnay886d8ea2016-09-13 08:34:07 -070029 Some((_, '\'')) => s.push('\''),
30 Some((_, '"')) => s.push('"'),
31 Some((_, '\n')) => {
32 while let Some(&(_, ch)) = chars.peek() {
33 if ch.is_whitespace() {
34 chars.next();
35 } else {
36 break;
37 }
38 }
39 }
40 _ => break,
41 }
42 }
43 ch => {
44 s.push(ch);
45 }
46 }
47 }
David Tolnayb5a7b142016-09-13 22:46:39 -070048 IResult::Error
David Tolnay886d8ea2016-09-13 08:34:07 -070049}
David Tolnayd3bd58a2016-09-13 10:58:37 -070050
David Tolnay210884d2016-10-01 08:18:42 -070051pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> {
52 let mut chars = input.char_indices();
53 let mut n = 0;
54 while let Some((byte_offset, ch)) = chars.next() {
55 match ch {
56 '"' => {
57 n = byte_offset;
58 break;
59 }
60 '#' => {},
61 _ => return IResult::Error,
62 }
63 }
64 while let Some((byte_offset, ch)) = chars.next() {
65 if ch == '"' && input[byte_offset + 1..].starts_with(&input[..n]) {
66 let rest = &input[byte_offset + 1 + n..];
67 let value = &input[n + 1 .. byte_offset];
68 return IResult::Done(rest, (value.to_owned(), n));
69 }
70 }
71 IResult::Error
72}
73
David Tolnayd3bd58a2016-09-13 10:58:37 -070074macro_rules! next_char {
75 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
76 match $chars.next() {
77 Some((_, ch)) => match ch {
78 $pat $(| $rest)* => ch,
79 _ => return None,
80 },
81 None => return None,
82 }
83 };
84}
85
86fn backslash_x<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> {
87 let a = next_char!(chars @ '0'...'7');
88 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
89 char_from_hex_bytes(&[a as u8, b as u8])
90}
91
David Tolnayaed77b02016-09-23 20:50:31 -070092#[cfg_attr(feature = "clippy", allow(many_single_char_names))]
David Tolnayd3bd58a2016-09-13 10:58:37 -070093fn backslash_u<I>(chars: &mut I) -> Option<char> where I: Iterator<Item = (usize, char)> {
94 next_char!(chars @ '{');
95 let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
96 let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
97 if b == '}' {
98 return char_from_hex_bytes(&[a as u8]);
99 }
100 let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
101 if c == '}' {
102 return char_from_hex_bytes(&[a as u8, b as u8]);
103 }
104 let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
105 if d == '}' {
106 return char_from_hex_bytes(&[a as u8, b as u8, c as u8]);
107 }
108 let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
109 if e == '}' {
110 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]);
111 }
112 let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
113 if f == '}' {
114 return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]);
115 }
116 next_char!(chars @ '}');
117 char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8])
118}
119
120/// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'.
121fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> {
122 let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) };
123 char::from_u32(u32::from_str_radix(hex_str, 16).unwrap())
124}
125
126#[test]
David Tolnay210884d2016-10-01 08:18:42 -0700127fn test_cooked_string() {
David Tolnayd3bd58a2016-09-13 10:58:37 -0700128 let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#;
129 let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
David Tolnay210884d2016-10-01 08:18:42 -0700130 assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string()));
David Tolnayd3bd58a2016-09-13 10:58:37 -0700131}