Upgrade rust/crates/regex-syntax to 0.6.20
Test: make
Change-Id: If42f1186cfdfae7f6752ca48aa7501c7626459a6
diff --git a/src/ast/parse.rs b/src/ast/parse.rs
index f5b4548..55c5f79 100644
--- a/src/ast/parse.rs
+++ b/src/ast/parse.rs
@@ -98,12 +98,13 @@
/// Returns true if the given character is a valid in a capture group name.
///
/// If `first` is true, then `c` is treated as the first character in the
-/// group name (which is not allowed to be a digit).
+/// group name (which must be alphabetic or underscore).
fn is_capture_char(c: char, first: bool) -> bool {
c == '_'
- || (!first && c >= '0' && c <= '9')
- || (c >= 'a' && c <= 'z')
- || (c >= 'A' && c <= 'Z')
+ || (!first
+ && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
+ || ('A' <= c && c <= 'Z')
+ || ('a' <= c && c <= 'z')
}
/// A builder for a regular expression parser.
@@ -3852,6 +3853,45 @@
);
assert_eq!(
+ parser("(?P<a_1>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..10),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..7),
+ name: s("a_1"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 8)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?P<a.1>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..10),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..7),
+ name: s("a.1"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 8)),
+ }))
+ );
+
+ assert_eq!(
+ parser("(?P<a[1]>z)").parse(),
+ Ok(Ast::Group(ast::Group {
+ span: span(0..11),
+ kind: ast::GroupKind::CaptureName(ast::CaptureName {
+ span: span(4..8),
+ name: s("a[1]"),
+ index: 1,
+ }),
+ ast: Box::new(lit('z', 9)),
+ }))
+ );
+
+ assert_eq!(
parser("(?P<").parse().unwrap_err(),
TestError {
span: span(4..4),
diff --git a/src/lib.rs b/src/lib.rs
index 7892668..f35c913 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -198,7 +198,7 @@
///
/// These are the only characters that are allowed to be escaped, with one
/// exception: an ASCII space character may be escaped when extended mode (with
-/// the `x` flag) is enabld. In particular, `is_meta_character(' ')` returns
+/// the `x` flag) is enabled. In particular, `is_meta_character(' ')` returns
/// `false`.
///
/// Note that the set of characters for which this function returns `true` or
diff --git a/src/unicode.rs b/src/unicode.rs
index 7e41439..a78362b 100644
--- a/src/unicode.rs
+++ b/src/unicode.rs
@@ -237,8 +237,16 @@
fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
let norm = symbolic_name_normalize(name);
- if let Some(canon) = canonical_prop(&norm)? {
- return Ok(CanonicalClassQuery::Binary(canon));
+ // This is a special case where 'cf' refers to the 'Format' general
+ // category, but where the 'cf' abbreviation is also an abbreviation
+ // for the 'Case_Folding' property. But we want to treat it as
+ // a general category. (Currently, we don't even support the
+ // 'Case_Folding' property. But if we do in the future, users will be
+ // required to spell it out.)
+ if norm != "cf" {
+ if let Some(canon) = canonical_prop(&norm)? {
+ return Ok(CanonicalClassQuery::Binary(canon));
+ }
}
if let Some(canon) = canonical_gencat(&norm)? {
return Ok(CanonicalClassQuery::GeneralCategory(canon));