Mark ab/6881855 as merged
Bug: 172690556
Change-Id: I33f7c91200baa9ee69532b7b53584f508162bd13
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index bd8a005..4e6ed7c 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "691606773f525be32a59a0c28eae203a79663706"
+ "sha1": "3bcae3b6954ea3628ef0974f0a2604a6ae248408"
}
}
diff --git a/Android.bp b/Android.bp
index d122541..ac83b2d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -34,8 +34,8 @@
}
// dependent_library ["feature_list"]
-// aho-corasick-0.7.13 "default,std"
+// aho-corasick-0.7.14 "default,std"
// lazy_static-1.4.0
-// memchr-2.3.3 "default,std,use_std"
-// regex-syntax-0.6.18 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
+// memchr-2.3.4 "default,std,use_std"
+// regex-syntax-0.6.21 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
// thread_local-1.0.1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7e528d..9dfe497 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,54 @@
+1.4.2 (2020-11-01)
+==================
+This is a small bug fix release that bans `\P{any}`. We previously banned empty
+classes like `[^\w\W]`, but missed the `\P{any}` case. In the future, we hope
+to permit empty classes.
+
+* [BUG #722](https://github.com/rust-lang/regex/issues/722):
+ Ban `\P{any}` to avoid a panic in the regex compiler. Found by OSS-Fuzz.
+
+
+1.4.1 (2020-10-13)
+==================
+This is a small bug fix release that makes `\p{cf}` work. Previously, it would
+report "property not found" even though `cf` is a valid abbreviation for the
+`Format` general category.
+
+* [BUG #719](https://github.com/rust-lang/regex/issues/719):
+ Fixes bug that prevented `\p{cf}` from working.
+
+
+1.4.0 (2020-10-11)
+==================
+This releases has a few minor documentation fixes as well as some very minor
+API additions. The MSRV remains at Rust 1.28 for now, but this is intended to
+increase to at least Rust 1.41.1 soon.
+
+This release also adds support for OSS-Fuzz. Kudos to
+[@DavidKorczynski](https://github.com/DavidKorczynski)
+for doing the heavy lifting for that!
+
+New features:
+
+* [FEATURE #649](https://github.com/rust-lang/regex/issues/649):
+ Support `[`, `]` and `.` in capture group names.
+* [FEATURE #687](https://github.com/rust-lang/regex/issues/687):
+ Add `is_empty` predicate to `RegexSet`.
+* [FEATURE #689](https://github.com/rust-lang/regex/issues/689):
+ Implement `Clone` for `SubCaptureMatches`.
+* [FEATURE #715](https://github.com/rust-lang/regex/issues/715):
+ Add `empty` constructor to `RegexSet` for convenience.
+
+Bug fixes:
+
+* [BUG #694](https://github.com/rust-lang/regex/issues/694):
+ Fix doc example for `Replacer::replace_append`.
+* [BUG #698](https://github.com/rust-lang/regex/issues/698):
+ Clarify docs for `s` flag when using a `bytes::Regex`.
+* [BUG #711](https://github.com/rust-lang/regex/issues/711):
+ Clarify `is_match` docs to indicate that it can match anywhere in string.
+
+
1.3.9 (2020-05-28)
==================
This release fixes a MSRV (Minimum Support Rust Version) regression in the
@@ -6,7 +57,7 @@
Bug fixes:
-* [BUG #685](https://github.com/rust-lang/regex/issue/685):
+* [BUG #685](https://github.com/rust-lang/regex/issues/685):
Remove use of `doc_comment` crate, which cannot be used before Rust 1.43.
@@ -22,9 +73,9 @@
* [BUG #523](https://github.com/rust-lang/regex/pull/523):
Add note to documentation that spaces can be escaped in `x` mode.
-* [BUG #524](https://github.com/rust-lang/regex/issue/524):
+* [BUG #524](https://github.com/rust-lang/regex/issues/524):
Add support for empty sub-expressions, including empty alternations.
-* [BUG #659](https://github.com/rust-lang/regex/issue/659):
+* [BUG #659](https://github.com/rust-lang/regex/issues/659):
Fix match bug caused by an empty sub-expression miscompilation.
diff --git a/Cargo.toml b/Cargo.toml
index 02caabb..b6213ea 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@
[package]
name = "regex"
-version = "1.3.9"
+version = "1.4.2"
authors = ["The Rust Project Developers"]
exclude = ["/scripts/*", "/.github/*"]
autotests = false
@@ -80,7 +80,7 @@
optional = true
[dependencies.regex-syntax]
-version = "0.6.18"
+version = "0.6.21"
default-features = false
[dependencies.thread_local]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 1296ae0..dac60bb 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
[package]
name = "regex"
-version = "1.3.9" #:version
+version = "1.4.2" #:version
authors = ["The Rust Project Developers"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@@ -118,7 +118,7 @@
# For parsing regular expressions.
[dependencies.regex-syntax]
path = "regex-syntax"
-version = "0.6.18"
+version = "0.6.21"
default-features = false
[dev-dependencies]
diff --git a/METADATA b/METADATA
index 6ba3db5..663a944 100644
--- a/METADATA
+++ b/METADATA
@@ -1,5 +1,5 @@
name: "regex"
-description: "A Rust library for parsing, compiling, and executing regular expressions. Its syntax is similar to Perl-style regular expressions, but lacks a few features like look around and backreferences. In exchange, all searches execute in linear time with respect to the size of the regular expression and search text. Much of the syntax and implementation is inspired by RE2."
+description: "An implementation of regular expressions for Rust. This implementation uses finite automata and guarantees linear time matching on all inputs."
third_party {
url {
type: HOMEPAGE
@@ -7,13 +7,13 @@
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/regex/regex-1.3.9.crate"
+ value: "https://static.crates.io/crates/regex/regex-1.4.2.crate"
}
- version: "1.3.9"
+ version: "1.4.2"
license_type: NOTICE
last_upgrade_date {
year: 2020
- month: 5
- day: 28
+ month: 11
+ day: 2
}
}
diff --git a/TEST_MAPPING b/TEST_MAPPING
new file mode 100644
index 0000000..7bc6ed2
--- /dev/null
+++ b/TEST_MAPPING
@@ -0,0 +1,12 @@
+// Generated by cargo2android.py for tests in Android.bp
+{
+ "presubmit": [
+ {
+ "host": true,
+ "name": "libsqlite3-sys_host_test_src_lib"
+ },
+ {
+ "name": "libsqlite3-sys_device_test_src_lib"
+ }
+ ]
+}
diff --git a/src/compile.rs b/src/compile.rs
index ad54040..cdc583c 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -222,7 +222,7 @@
/// hole
/// ```
///
- /// To compile two expressions, e1 and e2, concatinated together we
+ /// To compile two expressions, e1 and e2, concatenated together we
/// would do:
///
/// ```ignore
diff --git a/src/dfa.rs b/src/dfa.rs
index decc3b9..2a365ee 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -679,7 +679,7 @@
}
} else if next_si & STATE_START > 0 {
// A start state isn't in the common case because we may
- // what to do quick prefix scanning. If the program doesn't
+ // want to do quick prefix scanning. If the program doesn't
// have a detected prefix, then start states are actually
// considered common and this case is never reached.
debug_assert!(self.has_prefix());
@@ -725,7 +725,7 @@
}
}
- // Run the DFA once more on the special EOF senitnel value.
+ // Run the DFA once more on the special EOF sentinel value.
// We don't care about the special bits in the state pointer any more,
// so get rid of them.
prev_si &= STATE_MAX;
@@ -830,7 +830,7 @@
}
}
- // Run the DFA once more on the special EOF senitnel value.
+ // Run the DFA once more on the special EOF sentinel value.
prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
None => return Result::Quit,
Some(STATE_DEAD) => return result.set_non_match(0),
@@ -913,8 +913,8 @@
if self.state(si).flags().has_empty() {
// Compute the flags immediately preceding the current byte.
// This means we only care about the "end" or "end line" flags.
- // (The "start" flags are computed immediately proceding the
- // current byte and is handled below.)
+ // (The "start" flags are computed immediately following the
+ // current byte and are handled below.)
let mut flags = EmptyFlags::default();
if b.is_eof() {
flags.end = true;
@@ -1048,7 +1048,7 @@
///
/// If matching starts after the beginning of the input, then only start
/// line should be set if the preceding byte is `\n`. End line should never
- /// be set in this case. (Even if the proceding byte is a `\n`, it will
+ /// be set in this case. (Even if the following byte is a `\n`, it will
/// be handled in a subsequent DFA state.)
fn follow_epsilons(
&mut self,
diff --git a/src/expand.rs b/src/expand.rs
index 528f55e..fd2ab03 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -24,7 +24,7 @@
continue;
}
debug_assert!(!replacement.is_empty());
- let cap_ref = match find_cap_ref(replacement) {
+ let cap_ref = match find_cap_ref(replacement.as_bytes()) {
Some(cap_ref) => cap_ref,
None => {
dst.push_str("$");
@@ -125,19 +125,15 @@
/// starting at the beginning of `replacement`.
///
/// If no such valid reference could be found, None is returned.
-fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
- replacement: &T,
-) -> Option<CaptureRef> {
+fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
let mut i = 0;
let rep: &[u8] = replacement.as_ref();
if rep.len() <= 1 || rep[0] != b'$' {
return None;
}
- let mut brace = false;
i += 1;
if rep[i] == b'{' {
- brace = true;
- i += 1;
+ return find_cap_ref_braced(rep, i + 1);
}
let mut cap_end = i;
while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
@@ -151,12 +147,6 @@
// check with either unsafe or by parsing the number straight from &[u8].
let cap =
str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
- if brace {
- if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
- return None;
- }
- cap_end += 1;
- }
Some(CaptureRef {
cap: match cap.parse::<u32>() {
Ok(i) => Ref::Number(i as usize),
@@ -166,6 +156,31 @@
})
}
+fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> {
+ let start = i;
+ while rep.get(i).map_or(false, |&b| b != b'}') {
+ i += 1;
+ }
+ if !rep.get(i).map_or(false, |&b| b == b'}') {
+ return None;
+ }
+ // When looking at braced names, we don't put any restrictions on the name,
+ // so it's possible it could be invalid UTF-8. But a capture group name
+ // can never be invalid UTF-8, so if we have invalid UTF-8, then we can
+ // safely return None.
+ let cap = match str::from_utf8(&rep[start..i]) {
+ Err(_) => return None,
+ Ok(cap) => cap,
+ };
+ Some(CaptureRef {
+ cap: match cap.parse::<u32>() {
+ Ok(i) => Ref::Number(i as usize),
+ Err(_) => Ref::Named(cap),
+ },
+ end: i + 1,
+ })
+}
+
/// Returns true if and only if the given byte is allowed in a capture name.
fn is_valid_cap_letter(b: &u8) -> bool {
match *b {
@@ -182,13 +197,13 @@
($name:ident, $text:expr) => {
#[test]
fn $name() {
- assert_eq!(None, find_cap_ref($text));
+ assert_eq!(None, find_cap_ref($text.as_bytes()));
}
};
($name:ident, $text:expr, $capref:expr) => {
#[test]
fn $name() {
- assert_eq!(Some($capref), find_cap_ref($text));
+ assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
}
};
}
@@ -204,7 +219,8 @@
find!(find_cap_ref3, "$0", c!(0, 2));
find!(find_cap_ref4, "$5", c!(5, 2));
find!(find_cap_ref5, "$10", c!(10, 3));
- // see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers
+ // See https://github.com/rust-lang/regex/pull/585
+ // for more on characters following numbers
find!(find_cap_ref6, "$42a", c!("42a", 4));
find!(find_cap_ref7, "${42}a", c!(42, 5));
find!(find_cap_ref8, "${42");
@@ -217,4 +233,6 @@
find!(find_cap_ref15, "$1_$2", c!("1_", 3));
find!(find_cap_ref16, "$x-$y", c!("x", 2));
find!(find_cap_ref17, "$x_$y", c!("x_", 3));
+ find!(find_cap_ref18, "${#}", c!("#", 4));
+ find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
}
diff --git a/src/lib.rs b/src/lib.rs
index e0a0975..bdcebd4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -365,7 +365,7 @@
<pre class="rust">
(exp) numbered capture group (indexed by opening parenthesis)
-(?P<name>exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
+(?P<name>exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
(?:exp) non-capturing group
(?flags) set flags within current group
(?flags:exp) set flags for exp (non-capturing)
@@ -562,7 +562,7 @@
[Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
* **unicode-gencat** -
Provide the data for
- [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
+ [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
This includes, but is not limited to, `Decimal_Number`, `Letter`,
`Math_Symbol`, `Number` and `Punctuation`.
* **unicode-perl** -
@@ -731,8 +731,8 @@
literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that
matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when
enabled.
-6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value.
-When the `s` flag is enabled, `.` matches any byte.
+6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the
+`s` flag is additionally enabled, `.` matches any byte.
# Performance
diff --git a/src/pikevm.rs b/src/pikevm.rs
index c106c76..299087d 100644
--- a/src/pikevm.rs
+++ b/src/pikevm.rs
@@ -8,7 +8,7 @@
//
// It can do more than the DFA can (specifically, record capture locations
// and execute Unicode word boundary assertions), but at a slower speed.
-// Specifically, the Pike VM exectues a DFA implicitly by repeatedly expanding
+// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding
// epsilon transitions. That is, the Pike VM engine can be in multiple states
// at once where as the DFA is only ever in one state at a time.
//
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index 69f0b33..ca01e0e 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -119,7 +119,8 @@
RegexBuilder::new(re).build()
}
- /// Returns true if and only if the regex matches the string given.
+ /// Returns true if and only if there is a match for the regex in the
+ /// string given.
///
/// It is recommended to use this method if all you need to do is test
/// a match, since the underlying matching engine may be able to do less
@@ -930,17 +931,22 @@
/// Expands all instances of `$name` in `replacement` to the corresponding
/// capture group `name`, and writes them to the `dst` buffer given.
///
- /// `name` may be an integer corresponding to the index of the
- /// capture group (counted by order of opening parenthesis where `0` is the
+ /// `name` may be an integer corresponding to the index of the capture
+ /// group (counted by order of opening parenthesis where `0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// If `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
- /// The longest possible name is used. e.g., `$1a` looks up the capture
- /// group named `1a` and not the capture group at index `1`. To exert more
- /// precise control over the name, use braces, e.g., `${1}a`.
+ /// The longest possible name consisting of the characters `[_0-9A-Za-z]`
+ /// is used. e.g., `$1a` looks up the capture group named `1a` and not the
+ /// capture group at index `1`. To exert more precise control over the
+ /// name, or to refer to a capture group name that uses characters outside
+ /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
+ /// using braces, any sequence of valid UTF-8 bytes is permitted. If the
+ /// sequence does not refer to a capture group name in the corresponding
+ /// regex, then it is replaced with an empty string.
///
/// To write a literal `$` use `$$`.
pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
@@ -1051,6 +1057,7 @@
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
+#[derive(Clone)]
pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
diff --git a/src/re_set.rs b/src/re_set.rs
index fc2b61a..b8954be 100644
--- a/src/re_set.rs
+++ b/src/re_set.rs
@@ -96,6 +96,19 @@
RegexSetBuilder::new(exprs).build()
}
+ /// Create a new empty regex set.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use regex::RegexSet;
+ /// let set = RegexSet::empty();
+ /// assert!(set.is_empty());
+ /// ```
+ pub fn empty() -> RegexSet {
+ RegexSetBuilder::new(&[""; 0]).build().unwrap()
+ }
+
/// Returns true if and only if one of the regexes in this set matches
/// the text given.
///
@@ -207,6 +220,11 @@
self.0.regex_strings().len()
}
+ /// Returns `true` if this set contains no regular expressions.
+ pub fn is_empty(&self) -> bool {
+ self.0.regex_strings().is_empty()
+ }
+
/// Returns the patterns that this set will match on.
///
/// This function can be used to determine the pattern for a match. The
diff --git a/src/re_trait.rs b/src/re_trait.rs
index b56804e..d14a9f7 100644
--- a/src/re_trait.rs
+++ b/src/re_trait.rs
@@ -51,6 +51,7 @@
/// Positions are byte indices in terms of the original string matched.
///
/// `'c` is the lifetime of the captures.
+#[derive(Clone)]
pub struct SubCapturesPosIter<'c> {
idx: usize,
locs: &'c Locations,
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index b746599..ea95c1b 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -175,7 +175,8 @@
RegexBuilder::new(re).build()
}
- /// Returns true if and only if the regex matches the string given.
+ /// Returns true if and only if there is a match for the regex in the
+ /// string given.
///
/// It is recommended to use this method if all you need to do is test
/// a match, since the underlying matching engine may be able to do less
@@ -947,17 +948,22 @@
/// Expands all instances of `$name` in `replacement` to the corresponding
/// capture group `name`, and writes them to the `dst` buffer given.
///
- /// `name` may be an integer corresponding to the index of the
- /// capture group (counted by order of opening parenthesis where `0` is the
+ /// `name` may be an integer corresponding to the index of the capture
+ /// group (counted by order of opening parenthesis where `0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// If `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
- /// The longest possible name is used. e.g., `$1a` looks up the capture
- /// group named `1a` and not the capture group at index `1`. To exert more
- /// precise control over the name, use braces, e.g., `${1}a`.
+ /// The longest possible name consisting of the characters `[_0-9A-Za-z]`
+ /// is used. e.g., `$1a` looks up the capture group named `1a` and not the
+ /// capture group at index `1`. To exert more precise control over the
+ /// name, or to refer to a capture group name that uses characters outside
+ /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
+ /// using braces, any sequence of characters is permitted. If the sequence
+ /// does not refer to a capture group name in the corresponding regex, then
+ /// it is replaced with an empty string.
///
/// To write a literal `$` use `$$`.
pub fn expand(&self, replacement: &str, dst: &mut String) {
@@ -1053,6 +1059,7 @@
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
+#[derive(Clone)]
pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
@@ -1122,7 +1129,7 @@
/// have a match at capture group `0`.
///
/// For example, a no-op replacement would be
- /// `dst.extend(caps.get(0).unwrap().as_str())`.
+ /// `dst.push_str(caps.get(0).unwrap().as_str())`.
fn replace_append(&mut self, caps: &Captures, dst: &mut String);
/// Return a fixed unchanging replacement string.
diff --git a/tests/api.rs b/tests/api.rs
index 0d4962c..c7250a8 100644
--- a/tests/api.rs
+++ b/tests/api.rs
@@ -195,6 +195,18 @@
);
expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", "");
+expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%");
+expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc[");
+expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{");
+expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}");
+expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%");
+expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%");
+expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc[");
+expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "[");
+expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "[");
+expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "[");
+expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "[");
+
split!(
split1,
r"(?-u)\s+",
diff --git a/tests/regression_fuzz.rs b/tests/regression_fuzz.rs
new file mode 100644
index 0000000..5f92ed0
--- /dev/null
+++ b/tests/regression_fuzz.rs
@@ -0,0 +1,19 @@
+// These tests are only run for the "default" test target because some of them
+// can take quite a long time. Some of them take long enough that it's not
+// practical to run them in debug mode. :-/
+
+// See: https://oss-fuzz.com/testcase-detail/5673225499181056
+//
+// Ignored by default since it takes too long in debug mode (almost a minute).
+#[test]
+#[ignore]
+fn fuzz1() {
+ regex!(r"1}{55}{0}*{1}{55}{55}{5}*{1}{55}+{56}|;**");
+}
+
+// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505
+// See: https://github.com/rust-lang/regex/issues/722
+#[test]
+fn empty_any_errors_no_panic() {
+ assert!(regex_new!(r"\P{any}").is_err());
+}
diff --git a/tests/set.rs b/tests/set.rs
index 648feec..37fcf87 100644
--- a/tests/set.rs
+++ b/tests/set.rs
@@ -54,3 +54,14 @@
let set = regex_set!(&["a", "b"]);
assert_eq!(vec!["a", "b"], set.patterns());
}
+
+#[test]
+fn len_and_empty() {
+ let empty = regex_set!(&[""; 0]);
+ assert_eq!(empty.len(), 0);
+ assert!(empty.is_empty());
+
+ let not_empty = regex_set!(&["ab", "b"]);
+ assert_eq!(not_empty.len(), 2);
+ assert!(!not_empty.is_empty());
+}
diff --git a/tests/test_default.rs b/tests/test_default.rs
index c0979c1..241e580 100644
--- a/tests/test_default.rs
+++ b/tests/test_default.rs
@@ -49,6 +49,7 @@
mod multiline;
mod noparse;
mod regression;
+mod regression_fuzz;
mod replace;
mod searcher;
mod set;
diff --git a/tests/unicode.rs b/tests/unicode.rs
index 52522f4..9f1cd0c 100644
--- a/tests/unicode.rs
+++ b/tests/unicode.rs
@@ -74,6 +74,9 @@
Some((0, 3))
);
mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
+// See: https://github.com/rust-lang/regex/issues/719
+mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
+mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
mat!(
uni_class_gencat_initial_punctuation,
r"\p{Initial_Punctuation}",