Upgrade rust/crates/regex to 1.3.9
Test: None
Change-Id: Ic5726cda9566f808b7dd21d8ce9a3045250f8a54
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 02890e7..bd8a005 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "adb4aa3ce437ba1978af540071f85e302cced3ec"
+ "sha1": "691606773f525be32a59a0c28eae203a79663706"
}
}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2db8688..c7e528d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,33 @@
+1.3.9 (2020-05-28)
+==================
+This release fixes a MSRV (Minimum Support Rust Version) regression in the
+1.3.8 release. Namely, while 1.3.8 compiles on Rust 1.28, it actually does not
+compile on other Rust versions, such as Rust 1.39.
+
+Bug fixes:
+
+* [BUG #685](https://github.com/rust-lang/regex/issue/685):
+ Remove use of `doc_comment` crate, which cannot be used before Rust 1.43.
+
+
+1.3.8 (2020-05-28)
+==================
+This release contains a couple of important bug fixes driven
+by better support for empty-subexpressions in regexes. For
+example, regexes like `b|` are now allowed. Major thanks to
+[@sliquister](https://github.com/sliquister) for implementing support for this
+in [#677](https://github.com/rust-lang/regex/pull/677).
+
+Bug fixes:
+
+* [BUG #523](https://github.com/rust-lang/regex/pull/523):
+ Add note to documentation that spaces can be escaped in `x` mode.
+* [BUG #524](https://github.com/rust-lang/regex/issue/524):
+ Add support for empty sub-expressions, including empty alternations.
+* [BUG #659](https://github.com/rust-lang/regex/issue/659):
+ Fix match bug caused by an empty sub-expression miscompilation.
+
+
1.3.7 (2020-04-17)
==================
This release contains a small bug fix that fixes how `regex` forwards crate
diff --git a/Cargo.toml b/Cargo.toml
index 0b2f0b9..02caabb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@
[package]
name = "regex"
-version = "1.3.7"
+version = "1.3.9"
authors = ["The Rust Project Developers"]
exclude = ["/scripts/*", "/.github/*"]
autotests = false
@@ -80,15 +80,12 @@
optional = true
[dependencies.regex-syntax]
-version = "0.6.17"
+version = "0.6.18"
default-features = false
[dependencies.thread_local]
version = "1"
optional = true
-[dev-dependencies.doc-comment]
-version = "0.3"
-
[dev-dependencies.lazy_static]
version = "1"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 1f7863f..1296ae0 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
[package]
name = "regex"
-version = "1.3.7" #:version
+version = "1.3.9" #:version
authors = ["The Rust Project Developers"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@@ -118,7 +118,7 @@
# For parsing regular expressions.
[dependencies.regex-syntax]
path = "regex-syntax"
-version = "0.6.17"
+version = "0.6.18"
default-features = false
[dev-dependencies]
@@ -129,7 +129,10 @@
# For generating random test data.
rand = "0.6.5"
# To check README's example
-doc-comment = "0.3"
+# TODO: Re-enable this once the MSRV is 1.43 or greater.
+# See: https://github.com/rust-lang/regex/issues/684
+# See: https://github.com/rust-lang/regex/issues/685
+# doc-comment = "0.3"
# Run the test suite on the default behavior of Regex::new.
# This includes a mish mash of NFAs and DFAs, which are chosen automatically
diff --git a/METADATA b/METADATA
index 53b95fe..6eff2f3 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@
type: GIT
value: "https://github.com/rust-lang/regex"
}
- version: "1.3.7"
+ version: "1.3.9"
license_type: NOTICE
last_upgrade_date {
year: 2020
- month: 4
- day: 17
+ month: 5
+ day: 28
}
}
diff --git a/src/compile.rs b/src/compile.rs
index 2418400..ad54040 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -15,6 +15,7 @@
use Error;
type Result = result::Result<Patch, Error>;
+type ResultOrEmpty = result::Result<Option<Patch>, Error>;
#[derive(Debug)]
struct Patch {
@@ -132,7 +133,7 @@
self.compiled.start = dotstar_patch.entry;
}
self.compiled.captures = vec![None];
- let patch = self.c_capture(0, expr)?;
+ let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst());
if self.compiled.needs_dotstar() {
self.fill(dotstar_patch.hole, patch.entry);
} else {
@@ -167,14 +168,16 @@
for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
- let Patch { hole, entry } = self.c_capture(0, expr)?;
+ let Patch { hole, entry } =
+ self.c_capture(0, expr)?.unwrap_or(self.next_inst());
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
self.push_compiled(Inst::Match(i));
prev_hole = self.fill_split(split, Some(entry), None);
}
let i = exprs.len() - 1;
- let Patch { hole, entry } = self.c_capture(0, &exprs[i])?;
+ let Patch { hole, entry } =
+ self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst());
self.fill(prev_hole, entry);
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
@@ -242,13 +245,16 @@
/// method you will see that it does exactly this, though it handles
/// a list of expressions rather than just the two that we use for
/// an example.
- fn c(&mut self, expr: &Hir) -> Result {
+ ///
+ /// Ok(None) is returned when an expression is compiled to no
+ /// instruction, and so no patch.entry value makes sense.
+ fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
use prog;
use syntax::hir::HirKind::*;
self.check_size()?;
match *expr.kind() {
- Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }),
+ Empty => Ok(None),
Literal(hir::Literal::Unicode(c)) => self.c_char(c),
Literal(hir::Literal::Byte(b)) => {
assert!(self.compiled.uses_bytes());
@@ -357,7 +363,7 @@
}
}
- fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> Result {
+ fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
if self.num_exprs > 1 || self.compiled.is_dfa {
// Don't ever compile Save instructions for regex sets because
// they are never used. They are also never used in DFA programs
@@ -366,11 +372,11 @@
} else {
let entry = self.insts.len();
let hole = self.push_hole(InstHole::Save { slot: first_slot });
- let patch = self.c(expr)?;
+ let patch = self.c(expr)?.unwrap_or(self.next_inst());
self.fill(hole, patch.entry);
self.fill_to_next(patch.hole);
let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
- Ok(Patch { hole: hole, entry: entry })
+ Ok(Some(Patch { hole: hole, entry: entry }))
}
}
@@ -381,36 +387,38 @@
greedy: false,
hir: Box::new(Hir::any(true)),
}))?
+ .unwrap()
} else {
self.c(&Hir::repetition(hir::Repetition {
kind: hir::RepetitionKind::ZeroOrMore,
greedy: false,
hir: Box::new(Hir::any(false)),
}))?
+ .unwrap()
})
}
- fn c_char(&mut self, c: char) -> Result {
+ fn c_char(&mut self, c: char) -> ResultOrEmpty {
if self.compiled.uses_bytes() {
if c.is_ascii() {
let b = c as u8;
let hole =
self.push_hole(InstHole::Bytes { start: b, end: b });
self.byte_classes.set_range(b, b);
- Ok(Patch { hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
} else {
self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
}
} else {
let hole = self.push_hole(InstHole::Char { c: c });
- Ok(Patch { hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
}
}
- fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> Result {
+ fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
assert!(!ranges.is_empty());
if self.compiled.uses_bytes() {
- CompileClass { c: self, ranges: ranges }.compile()
+ Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
} else {
let ranges: Vec<(char, char)> =
ranges.iter().map(|r| (r.start(), r.end())).collect();
@@ -419,15 +427,18 @@
} else {
self.push_hole(InstHole::Ranges { ranges: ranges })
};
- Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
}
}
- fn c_byte(&mut self, b: u8) -> Result {
+ fn c_byte(&mut self, b: u8) -> ResultOrEmpty {
self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
}
- fn c_class_bytes(&mut self, ranges: &[hir::ClassBytesRange]) -> Result {
+ fn c_class_bytes(
+ &mut self,
+ ranges: &[hir::ClassBytesRange],
+ ) -> ResultOrEmpty {
debug_assert!(!ranges.is_empty());
let first_split_entry = self.insts.len();
@@ -451,35 +462,39 @@
self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }),
);
self.fill(prev_hole, next);
- Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
}
- fn c_empty_look(&mut self, look: EmptyLook) -> Result {
+ fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty {
let hole = self.push_hole(InstHole::EmptyLook { look: look });
- Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
}
- fn c_concat<'a, I>(&mut self, exprs: I) -> Result
+ fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
where
I: IntoIterator<Item = &'a Hir>,
{
let mut exprs = exprs.into_iter();
- let first = match exprs.next() {
- Some(expr) => expr,
- None => {
- return Ok(Patch { hole: Hole::None, entry: self.insts.len() })
+ let Patch { mut hole, entry } = loop {
+ match exprs.next() {
+ None => return Ok(None),
+ Some(e) => {
+ if let Some(p) = self.c(e)? {
+ break p;
+ }
+ }
}
};
- let Patch { mut hole, entry } = self.c(first)?;
for e in exprs {
- let p = self.c(e)?;
- self.fill(hole, p.entry);
- hole = p.hole;
+ if let Some(p) = self.c(e)? {
+ self.fill(hole, p.entry);
+ hole = p.hole;
+ }
}
- Ok(Patch { hole: hole, entry: entry })
+ Ok(Some(Patch { hole: hole, entry: entry }))
}
- fn c_alternate(&mut self, exprs: &[Hir]) -> Result {
+ fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty {
debug_assert!(
exprs.len() >= 2,
"alternates must have at least 2 exprs"
@@ -492,43 +507,43 @@
// patched to point to the same location.
let mut holes = vec![];
- let mut prev_hole = Hole::None;
+ // true indicates that the hole is a split where we want to fill
+ // the second branch.
+ let mut prev_hole = (Hole::None, false);
for e in &exprs[0..exprs.len() - 1] {
- self.fill_to_next(prev_hole);
- let split = self.push_split_hole();
- let prev_entry = self.insts.len();
- let Patch { hole, entry } = self.c(e)?;
- if prev_entry == self.insts.len() {
- // TODO(burntsushi): It is kind of silly that we don't support
- // empty-subexpressions in alternates, but it is supremely
- // awkward to support them in the existing compiler
- // infrastructure. This entire compiler needs to be thrown out
- // anyway, so don't feel too bad.
- return Err(Error::Syntax(
- "alternations cannot currently contain \
- empty sub-expressions"
- .to_string(),
- ));
+ if prev_hole.1 {
+ let next = self.insts.len();
+ self.fill_split(prev_hole.0, None, Some(next));
+ } else {
+ self.fill_to_next(prev_hole.0);
}
+ let split = self.push_split_hole();
+ if let Some(Patch { hole, entry }) = self.c(e)? {
+ holes.push(hole);
+ prev_hole = (self.fill_split(split, Some(entry), None), false);
+ } else {
+ let (split1, split2) = split.dup_one();
+ holes.push(split1);
+ prev_hole = (split2, true);
+ }
+ }
+ if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? {
holes.push(hole);
- prev_hole = self.fill_split(split, Some(entry), None);
+ if prev_hole.1 {
+ self.fill_split(prev_hole.0, None, Some(entry));
+ } else {
+ self.fill(prev_hole.0, entry);
+ }
+ } else {
+ // We ignore prev_hole.1. When it's true, it means we have two
+ // empty branches both pushing prev_hole.0 into holes, so both
+ // branches will go to the same place anyway.
+ holes.push(prev_hole.0);
}
- let prev_entry = self.insts.len();
- let Patch { hole, entry } = self.c(&exprs[exprs.len() - 1])?;
- if prev_entry == self.insts.len() {
- // TODO(burntsushi): See TODO above.
- return Err(Error::Syntax(
- "alternations cannot currently contain \
- empty sub-expressions"
- .to_string(),
- ));
- }
- holes.push(hole);
- self.fill(prev_hole, entry);
- Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
}
- fn c_repeat(&mut self, rep: &hir::Repetition) -> Result {
+ fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
use syntax::hir::RepetitionKind::*;
match rep.kind {
ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
@@ -546,24 +561,37 @@
}
}
- fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result {
+ fn c_repeat_zero_or_one(
+ &mut self,
+ expr: &Hir,
+ greedy: bool,
+ ) -> ResultOrEmpty {
let split_entry = self.insts.len();
let split = self.push_split_hole();
- let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
-
+ let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+ Some(p) => p,
+ None => return self.pop_split_hole(),
+ };
let split_hole = if greedy {
self.fill_split(split, Some(entry_rep), None)
} else {
self.fill_split(split, None, Some(entry_rep))
};
let holes = vec![hole_rep, split_hole];
- Ok(Patch { hole: Hole::Many(holes), entry: split_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry }))
}
- fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
+ fn c_repeat_zero_or_more(
+ &mut self,
+ expr: &Hir,
+ greedy: bool,
+ ) -> ResultOrEmpty {
let split_entry = self.insts.len();
let split = self.push_split_hole();
- let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+ let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+ Some(p) => p,
+ None => return self.pop_split_hole(),
+ };
self.fill(hole_rep, split_entry);
let split_hole = if greedy {
@@ -571,11 +599,18 @@
} else {
self.fill_split(split, None, Some(entry_rep))
};
- Ok(Patch { hole: split_hole, entry: split_entry })
+ Ok(Some(Patch { hole: split_hole, entry: split_entry }))
}
- fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
- let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+ fn c_repeat_one_or_more(
+ &mut self,
+ expr: &Hir,
+ greedy: bool,
+ ) -> ResultOrEmpty {
+ let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+ Some(p) => p,
+ None => return Ok(None),
+ };
self.fill_to_next(hole_rep);
let split = self.push_split_hole();
@@ -584,7 +619,7 @@
} else {
self.fill_split(split, None, Some(entry_rep))
};
- Ok(Patch { hole: split_hole, entry: entry_rep })
+ Ok(Some(Patch { hole: split_hole, entry: entry_rep }))
}
fn c_repeat_range_min_or_more(
@@ -592,12 +627,20 @@
expr: &Hir,
greedy: bool,
min: u32,
- ) -> Result {
+ ) -> ResultOrEmpty {
let min = u32_to_usize(min);
- let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
- let patch_rep = self.c_repeat_zero_or_more(expr, greedy)?;
- self.fill(patch_concat.hole, patch_rep.entry);
- Ok(Patch { hole: patch_rep.hole, entry: patch_concat.entry })
+ // Using next_inst() is ok, because we can't return it (concat would
+ // have to return Some(_) while c_repeat_range_min_or_more returns
+ // None).
+ let patch_concat = self
+ .c_concat(iter::repeat(expr).take(min))?
+ .unwrap_or(self.next_inst());
+ if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? {
+ self.fill(patch_concat.hole, patch_rep.entry);
+ Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry }))
+ } else {
+ Ok(None)
+ }
}
fn c_repeat_range(
@@ -606,13 +649,17 @@
greedy: bool,
min: u32,
max: u32,
- ) -> Result {
+ ) -> ResultOrEmpty {
let (min, max) = (u32_to_usize(min), u32_to_usize(max));
+ debug_assert!(min <= max);
let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
- let initial_entry = patch_concat.entry;
if min == max {
return Ok(patch_concat);
}
+ // Same reasoning as in c_repeat_range_min_or_more (we know that min <
+ // max at this point).
+ let patch_concat = patch_concat.unwrap_or(self.next_inst());
+ let initial_entry = patch_concat.entry;
// It is much simpler to compile, e.g., `a{2,5}` as:
//
// aaa?a?a?
@@ -637,7 +684,10 @@
for _ in min..max {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
- let Patch { hole, entry } = self.c(expr)?;
+ let Patch { hole, entry } = match self.c(expr)? {
+ Some(p) => p,
+ None => return self.pop_split_hole(),
+ };
prev_hole = hole;
if greedy {
holes.push(self.fill_split(split, Some(entry), None));
@@ -646,7 +696,14 @@
}
}
holes.push(prev_hole);
- Ok(Patch { hole: Hole::Many(holes), entry: initial_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry }))
+ }
+
+ /// Can be used as a default value for the c_* functions when the call to
+ /// c_function is followed by inserting at least one instruction that is
+ /// always executed after the ones written by the c* function.
+ fn next_inst(&self) -> Patch {
+ Patch { hole: Hole::None, entry: self.insts.len() }
}
fn fill(&mut self, hole: Hole, goto: InstPtr) {
@@ -726,6 +783,11 @@
Hole::One(hole)
}
+ fn pop_split_hole(&mut self) -> ResultOrEmpty {
+ self.insts.pop();
+ Ok(None)
+ }
+
fn check_size(&self) -> result::Result<(), Error> {
use std::mem::size_of;
@@ -744,6 +806,17 @@
Many(Vec<Hole>),
}
+impl Hole {
+ fn dup_one(self) -> (Self, Self) {
+ match self {
+ Hole::One(pc) => (Hole::One(pc), Hole::One(pc)),
+ Hole::None | Hole::Many(_) => {
+ unreachable!("must be called on single hole")
+ }
+ }
+ }
+}
+
#[derive(Clone, Debug)]
enum MaybeInst {
Compiled(Inst),
@@ -755,13 +828,22 @@
impl MaybeInst {
fn fill(&mut self, goto: InstPtr) {
- let filled = match *self {
- MaybeInst::Uncompiled(ref inst) => inst.fill(goto),
+ let maybeinst = match *self {
+ MaybeInst::Split => MaybeInst::Split1(goto),
+ MaybeInst::Uncompiled(ref inst) => {
+ MaybeInst::Compiled(inst.fill(goto))
+ }
MaybeInst::Split1(goto1) => {
- Inst::Split(InstSplit { goto1: goto1, goto2: goto })
+ MaybeInst::Compiled(Inst::Split(InstSplit {
+ goto1: goto1,
+ goto2: goto,
+ }))
}
MaybeInst::Split2(goto2) => {
- Inst::Split(InstSplit { goto1: goto, goto2: goto2 })
+ MaybeInst::Compiled(Inst::Split(InstSplit {
+ goto1: goto,
+ goto2: goto2,
+ }))
}
_ => unreachable!(
"not all instructions were compiled! \
@@ -769,7 +851,7 @@
self
),
};
- *self = MaybeInst::Compiled(filled);
+ *self = maybeinst;
}
fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
diff --git a/src/lib.rs b/src/lib.rs
index 2a74bf8..e0a0975 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -152,8 +152,9 @@
```
If you wish to match against whitespace in this mode, you can still use `\s`,
-`\n`, `\t`, etc. For escaping a single space character, you can use its hex
-character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.
+`\n`, `\t`, etc. For escaping a single space character, you can escape it
+directly with `\ `, use its hex character code `\x20` or temporarily disable
+the `x` flag, e.g., `(?-x: )`.
# Example: match multiple regular expressions simultaneously
@@ -621,8 +622,8 @@
#[cfg(feature = "perf-literal")]
extern crate aho_corasick;
-#[cfg(test)]
-extern crate doc_comment;
+// #[cfg(doctest)]
+// extern crate doc_comment;
#[cfg(feature = "perf-literal")]
extern crate memchr;
#[cfg(test)]
@@ -632,8 +633,8 @@
#[cfg(feature = "perf-cache")]
extern crate thread_local;
-#[cfg(test)]
-doc_comment::doctest!("../README.md");
+// #[cfg(doctest)]
+// doc_comment::doctest!("../README.md");
#[cfg(feature = "std")]
pub use error::Error;
diff --git a/tests/crazy.rs b/tests/crazy.rs
index 8c72273..56f6cad 100644
--- a/tests/crazy.rs
+++ b/tests/crazy.rs
@@ -118,6 +118,18 @@
matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
+matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
+matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
+matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
// Test that the DFA can handle pathological cases.
// (This should result in the DFA's cache being flushed too frequently, which
diff --git a/tests/noparse.rs b/tests/noparse.rs
index 62eb5be..8ded1dc 100644
--- a/tests/noparse.rs
+++ b/tests/noparse.rs
@@ -26,6 +26,8 @@
noparse!(fail_bad_flag, "(?a)a");
noparse!(fail_too_big, "a{10000000}");
noparse!(fail_counted_no_close, "a{1001");
+noparse!(fail_counted_decreasing, "a{2,1}");
+noparse!(fail_counted_nonnegative, "a{-1,1}");
noparse!(fail_unfinished_cap, "(?");
noparse!(fail_unfinished_escape, "\\");
noparse!(fail_octal_digit, r"\8");
@@ -41,10 +43,3 @@
noparse!(fail_range_end_no_begin, r"[a-\A]");
noparse!(fail_range_end_no_end, r"[a-\z]");
noparse!(fail_range_end_no_boundary, r"[a-\b]");
-noparse!(fail_empty_alt1, r"|z");
-noparse!(fail_empty_alt2, r"z|");
-noparse!(fail_empty_alt3, r"|");
-noparse!(fail_empty_alt4, r"||");
-noparse!(fail_empty_alt5, r"()|z");
-noparse!(fail_empty_alt6, r"z|()");
-noparse!(fail_empty_alt7, r"(|)");
diff --git a/tests/regression.rs b/tests/regression.rs
index 686fe35..44b9083 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -210,3 +210,10 @@
(4, 7),
(12, 15)
);
+
+// See: https://github.com/rust-lang/regex/issues/659
+//
+// Note that 'Ј' is not 'j', but cyrillic Je
+// https://en.wikipedia.org/wiki/Je_(Cyrillic)
+ismatch!(empty_group_match, r"()Ј01", "zЈ01", true);
+matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5));
diff --git a/tests/set.rs b/tests/set.rs
index 3e9755c..648feec 100644
--- a/tests/set.rs
+++ b/tests/set.rs
@@ -17,6 +17,17 @@
matset!(set17, &[".*a"], "a", 0);
matset!(set18, &["a", "β"], "β", 1);
+// regexes that match the empty string
+matset!(setempty1, &["", "a"], "abc", 0, 1);
+matset!(setempty2, &["", "b"], "abc", 0, 1);
+matset!(setempty3, &["", "z"], "abc", 0);
+matset!(setempty4, &["a", ""], "abc", 0, 1);
+matset!(setempty5, &["b", ""], "abc", 0, 1);
+matset!(setempty6, &["z", ""], "abc", 1);
+matset!(setempty7, &["b", "(?:)"], "abc", 0, 1);
+matset!(setempty8, &["(?:)", "b"], "abc", 0, 1);
+matset!(setempty9, &["c(?:)", "b"], "abc", 0, 1);
+
nomatset!(nset1, &["a", "a"], "b");
nomatset!(nset2, &["^foo", "bar$"], "bar foo");
nomatset!(