Allow hex escapes greater than \x7F in byte strings
diff --git a/src/escape.rs b/src/escape.rs
index 65c2615..2adf06b 100644
--- a/src/escape.rs
+++ b/src/escape.rs
@@ -1,4 +1,5 @@
use std::{char, str};
+use std::num::ParseIntError;
use nom::IResult;
pub fn cooked_string(input: &str) -> IResult<&str, String> {
@@ -12,7 +13,7 @@
'\\' => {
match chars.next() {
Some((_, 'x')) => {
- match backslash_x(&mut chars) {
+ match backslash_x_char(&mut chars) {
Some(ch) => s.push(ch),
None => break,
}
@@ -50,12 +51,58 @@
IResult::Error
}
+pub fn cooked_byte_string(mut input: &str) -> IResult<&str, Vec<u8>> {
+ let mut vec = Vec::new();
+ let mut bytes = input.bytes().enumerate();
+ 'outer: while let Some((offset, b)) = bytes.next() {
+ match b {
+ b'"' => {
+ return IResult::Done(&input[offset..], vec);
+ }
+ b'\\' => {
+ match bytes.next() {
+ Some((_, b'x')) => {
+ match backslash_x_byte(&mut bytes) {
+ Some(b) => vec.push(b),
+ None => break,
+ }
+ }
+ Some((_, b'n')) => vec.push(b'\n'),
+ Some((_, b'r')) => vec.push(b'\r'),
+ Some((_, b't')) => vec.push(b'\t'),
+ Some((_, b'\\')) => vec.push(b'\\'),
+ Some((_, b'0')) => vec.push(b'\0'),
+ Some((_, b'\'')) => vec.push(b'\''),
+ Some((_, b'"')) => vec.push(b'"'),
+ Some((newline, b'\n')) => {
+ let rest = &input[newline + 1..];
+ for (offset, ch) in rest.char_indices() {
+ if !ch.is_whitespace() {
+ input = &rest[offset..];
+ bytes = input.bytes().enumerate();
+ continue 'outer;
+ }
+ }
+ break;
+ }
+ _ => break,
+ }
+ }
+ b if b < 0x80 => {
+ vec.push(b);
+ }
+ _ => break,
+ }
+ }
+ IResult::Error
+}
+
pub fn cooked_char(input: &str) -> IResult<&str, char> {
let mut chars = input.char_indices();
let ch = match chars.next().map(|(_, ch)| ch) {
Some('\\') => {
match chars.next().map(|(_, ch)| ch) {
- Some('x') => backslash_x(&mut chars),
+ Some('x') => backslash_x_char(&mut chars),
Some('n') => Some('\n'),
Some('r') => Some('\r'),
Some('t') => Some('\t'),
@@ -98,7 +145,7 @@
IResult::Error
}
-macro_rules! next_char {
+macro_rules! next_ch {
($chars:ident @ $pat:pat $(| $rest:pat)*) => {
match $chars.next() {
Some((_, ch)) => match ch {
@@ -110,54 +157,88 @@
};
}
+trait FromStrRadix: Sized {
+ fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError>;
+}
+
+impl FromStrRadix for u8 {
+ fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
+ u8::from_str_radix(src, radix)
+ }
+}
+
+impl FromStrRadix for u32 {
+ fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
+ u32::from_str_radix(src, radix)
+ }
+}
+
+macro_rules! from_hex {
+ ($($ch:ident)+) => {{
+ let hex_bytes = &[$($ch as u8),*];
+ let hex_str = str::from_utf8(hex_bytes).unwrap();
+ FromStrRadix::from_str_radix(hex_str, 16).unwrap()
+ }};
+}
+
#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
-fn backslash_x<I>(chars: &mut I) -> Option<char>
+fn backslash_x_char<I>(chars: &mut I) -> Option<char>
where I: Iterator<Item = (usize, char)>
{
- let a = next_char!(chars @ '0'...'7');
- let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
- char_from_hex_bytes(&[a as u8, b as u8])
+ let a = next_ch!(chars @ '0'...'7');
+ let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
+ char::from_u32(from_hex!(a b))
+}
+
+#[cfg_attr(feature = "clippy", allow(diverging_sub_expression))]
+fn backslash_x_byte<I>(chars: &mut I) -> Option<u8>
+ where I: Iterator<Item = (usize, u8)>
+{
+ let a = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
+ let b = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
+ Some(from_hex!(a b))
}
#[cfg_attr(feature = "clippy", allow(diverging_sub_expression, many_single_char_names))]
fn backslash_u<I>(chars: &mut I) -> Option<char>
where I: Iterator<Item = (usize, char)>
{
- next_char!(chars @ '{');
- let a = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
- let b = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
+ next_ch!(chars @ '{');
+ let a = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
+ let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if b == '}' {
- return char_from_hex_bytes(&[a as u8]);
+ return char::from_u32(from_hex!(a));
}
- let c = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
+ let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if c == '}' {
- return char_from_hex_bytes(&[a as u8, b as u8]);
+ return char::from_u32(from_hex!(a b));
}
- let d = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
+ let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if d == '}' {
- return char_from_hex_bytes(&[a as u8, b as u8, c as u8]);
+ return char::from_u32(from_hex!(a b c));
}
- let e = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
+ let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if e == '}' {
- return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8]);
+ return char::from_u32(from_hex!(a b c d));
}
- let f = next_char!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
+ let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if f == '}' {
- return char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8]);
+ return char::from_u32(from_hex!(a b c d e));
}
- next_char!(chars @ '}');
- char_from_hex_bytes(&[a as u8, b as u8, c as u8, d as u8, e as u8, f as u8])
-}
-
-/// Assumes the bytes are all '0'...'9' | 'a'...'f' | 'A'...'F'.
-fn char_from_hex_bytes(hex_bytes: &[u8]) -> Option<char> {
- let hex_str = unsafe { str::from_utf8_unchecked(hex_bytes) };
- char::from_u32(u32::from_str_radix(hex_str, 16).unwrap())
+ next_ch!(chars @ '}');
+ char::from_u32(from_hex!(a b c d e f))
}
#[test]
fn test_cooked_string() {
- let input = r#"\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}""#;
+ let input = "\\x62 \\\n \\u{7} \\u{64} \\u{bf5} \\u{12ba} \\u{1F395} \\u{102345}\"";
let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
assert_eq!(cooked_string(input), IResult::Done("\"", expected.to_string()));
}
+
+#[test]
+fn test_cooked_byte_string() {
+ let input = "\\x62 \\\n \\xEF\"";
+ let expected = b"\x62 \xEF";
+ assert_eq!(cooked_byte_string(input), IResult::Done("\"", expected.to_vec()));
+}
diff --git a/src/lit.rs b/src/lit.rs
index 34cd341..4829803 100644
--- a/src/lit.rs
+++ b/src/lit.rs
@@ -129,7 +129,7 @@
#[cfg(feature = "parsing")]
pub mod parsing {
use super::*;
- use escape::{cooked_char, cooked_string, raw_string};
+ use escape::{cooked_byte_string, cooked_char, cooked_string, raw_string};
use space::skip_whitespace;
use nom::IResult;
use unicode_xid::UnicodeXID;
@@ -168,9 +168,9 @@
named!(byte_string -> Lit, alt!(
delimited!(
punct!("b\""),
- cooked_string,
+ cooked_byte_string,
tag!("\"")
- ) => { |s: String| Lit::ByteStr(s.into_bytes(), StrStyle::Cooked) }
+ ) => { |vec| Lit::ByteStr(vec, StrStyle::Cooked) }
|
preceded!(
punct!("br"),