Upgrade rust/crates/regex to 1.3.9 Test: None Change-Id: Ic5726cda9566f808b7dd21d8ce9a3045250f8a54

commit: 49cbe5f97db66f1f8d354938952d3fdbcd4a1925 [log] [tgz]
author: Haibo Huang <hhb@google.com> Thu May 28 20:14:24 2020 -0700
committer: Haibo Huang <hhb@google.com> Thu May 28 20:14:24 2020 -0700
tree: a2319a1f6b03069734c84ddaaa73a733d0646d79
parent: 42134b99b59d452af33538ecd644bbf3771d5ec0 [diff]
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 02890e7..bd8a005 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json

@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "adb4aa3ce437ba1978af540071f85e302cced3ec"
+    "sha1": "691606773f525be32a59a0c28eae203a79663706"
   }
 }

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2db8688..c7e528d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md

@@ -1,3 +1,33 @@
+1.3.9 (2020-05-28)
+==================
+This release fixes a MSRV (Minimum Support Rust Version) regression in the
+1.3.8 release. Namely, while 1.3.8 compiles on Rust 1.28, it actually does not
+compile on other Rust versions, such as Rust 1.39.
+
+Bug fixes:
+
+* [BUG #685](https://github.com/rust-lang/regex/issue/685):
+  Remove use of `doc_comment` crate, which cannot be used before Rust 1.43.
+
+
+1.3.8 (2020-05-28)
+==================
+This release contains a couple of important bug fixes driven
+by better support for empty-subexpressions in regexes. For
+example, regexes like `b|` are now allowed. Major thanks to
+[@sliquister](https://github.com/sliquister) for implementing support for this
+in [#677](https://github.com/rust-lang/regex/pull/677).
+
+Bug fixes:
+
+* [BUG #523](https://github.com/rust-lang/regex/pull/523):
+  Add note to documentation that spaces can be escaped in `x` mode.
+* [BUG #524](https://github.com/rust-lang/regex/issue/524):
+  Add support for empty sub-expressions, including empty alternations.
+* [BUG #659](https://github.com/rust-lang/regex/issue/659):
+  Fix match bug caused by an empty sub-expression miscompilation.
+
+
 1.3.7 (2020-04-17)
 ==================
 This release contains a small bug fix that fixes how `regex` forwards crate

diff --git a/Cargo.toml b/Cargo.toml
index 0b2f0b9..02caabb 100644
--- a/Cargo.toml
+++ b/Cargo.toml

@@ -12,7 +12,7 @@
 
 [package]
 name = "regex"
-version = "1.3.7"
+version = "1.3.9"
 authors = ["The Rust Project Developers"]
 exclude = ["/scripts/*", "/.github/*"]
 autotests = false
@@ -80,15 +80,12 @@
 optional = true
 
 [dependencies.regex-syntax]
-version = "0.6.17"
+version = "0.6.18"
 default-features = false
 
 [dependencies.thread_local]
 version = "1"
 optional = true
-[dev-dependencies.doc-comment]
-version = "0.3"
-
 [dev-dependencies.lazy_static]
 version = "1"
 

diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 1f7863f..1296ae0 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig

@@ -1,6 +1,6 @@
 [package]
 name = "regex"
-version = "1.3.7"  #:version
+version = "1.3.9"  #:version
 authors = ["The Rust Project Developers"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
@@ -118,7 +118,7 @@
 # For parsing regular expressions.
 [dependencies.regex-syntax]
 path = "regex-syntax"
-version = "0.6.17"
+version = "0.6.18"
 default-features = false
 
 [dev-dependencies]
@@ -129,7 +129,10 @@
 # For generating random test data.
 rand = "0.6.5"
 # To check README's example
-doc-comment = "0.3"
+# TODO: Re-enable this once the MSRV is 1.43 or greater.
+# See: https://github.com/rust-lang/regex/issues/684
+# See: https://github.com/rust-lang/regex/issues/685
+# doc-comment = "0.3"
 
 # Run the test suite on the default behavior of Regex::new.
 # This includes a mish mash of NFAs and DFAs, which are chosen automatically

diff --git a/METADATA b/METADATA
index 53b95fe..6eff2f3 100644
--- a/METADATA
+++ b/METADATA

@@ -9,11 +9,11 @@
     type: GIT
     value: "https://github.com/rust-lang/regex"
   }
-  version: "1.3.7"
+  version: "1.3.9"
   license_type: NOTICE
   last_upgrade_date {
     year: 2020
-    month: 4
-    day: 17
+    month: 5
+    day: 28
   }
 }

diff --git a/src/compile.rs b/src/compile.rs
index 2418400..ad54040 100644
--- a/src/compile.rs
+++ b/src/compile.rs

@@ -15,6 +15,7 @@
 use Error;
 
 type Result = result::Result<Patch, Error>;
+type ResultOrEmpty = result::Result<Option<Patch>, Error>;
 
 #[derive(Debug)]
 struct Patch {
@@ -132,7 +133,7 @@
             self.compiled.start = dotstar_patch.entry;
         }
         self.compiled.captures = vec![None];
-        let patch = self.c_capture(0, expr)?;
+        let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst());
         if self.compiled.needs_dotstar() {
             self.fill(dotstar_patch.hole, patch.entry);
         } else {
@@ -167,14 +168,16 @@
         for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
-            let Patch { hole, entry } = self.c_capture(0, expr)?;
+            let Patch { hole, entry } =
+                self.c_capture(0, expr)?.unwrap_or(self.next_inst());
             self.fill_to_next(hole);
             self.compiled.matches.push(self.insts.len());
             self.push_compiled(Inst::Match(i));
             prev_hole = self.fill_split(split, Some(entry), None);
         }
         let i = exprs.len() - 1;
-        let Patch { hole, entry } = self.c_capture(0, &exprs[i])?;
+        let Patch { hole, entry } =
+            self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst());
         self.fill(prev_hole, entry);
         self.fill_to_next(hole);
         self.compiled.matches.push(self.insts.len());
@@ -242,13 +245,16 @@
     /// method you will see that it does exactly this, though it handles
     /// a list of expressions rather than just the two that we use for
     /// an example.
-    fn c(&mut self, expr: &Hir) -> Result {
+    ///
+    /// Ok(None) is returned when an expression is compiled to no
+    /// instruction, and so no patch.entry value makes sense.
+    fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
         use prog;
         use syntax::hir::HirKind::*;
 
         self.check_size()?;
         match *expr.kind() {
-            Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }),
+            Empty => Ok(None),
             Literal(hir::Literal::Unicode(c)) => self.c_char(c),
             Literal(hir::Literal::Byte(b)) => {
                 assert!(self.compiled.uses_bytes());
@@ -357,7 +363,7 @@
         }
     }
 
-    fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> Result {
+    fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
         if self.num_exprs > 1 || self.compiled.is_dfa {
             // Don't ever compile Save instructions for regex sets because
             // they are never used. They are also never used in DFA programs
@@ -366,11 +372,11 @@
         } else {
             let entry = self.insts.len();
             let hole = self.push_hole(InstHole::Save { slot: first_slot });
-            let patch = self.c(expr)?;
+            let patch = self.c(expr)?.unwrap_or(self.next_inst());
             self.fill(hole, patch.entry);
             self.fill_to_next(patch.hole);
             let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
-            Ok(Patch { hole: hole, entry: entry })
+            Ok(Some(Patch { hole: hole, entry: entry }))
         }
     }
 
@@ -381,36 +387,38 @@
                 greedy: false,
                 hir: Box::new(Hir::any(true)),
             }))?
+            .unwrap()
         } else {
             self.c(&Hir::repetition(hir::Repetition {
                 kind: hir::RepetitionKind::ZeroOrMore,
                 greedy: false,
                 hir: Box::new(Hir::any(false)),
             }))?
+            .unwrap()
         })
     }
 
-    fn c_char(&mut self, c: char) -> Result {
+    fn c_char(&mut self, c: char) -> ResultOrEmpty {
         if self.compiled.uses_bytes() {
             if c.is_ascii() {
                 let b = c as u8;
                 let hole =
                     self.push_hole(InstHole::Bytes { start: b, end: b });
                 self.byte_classes.set_range(b, b);
-                Ok(Patch { hole, entry: self.insts.len() - 1 })
+                Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
             } else {
                 self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
             }
         } else {
             let hole = self.push_hole(InstHole::Char { c: c });
-            Ok(Patch { hole, entry: self.insts.len() - 1 })
+            Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
         }
     }
 
-    fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> Result {
+    fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
         assert!(!ranges.is_empty());
         if self.compiled.uses_bytes() {
-            CompileClass { c: self, ranges: ranges }.compile()
+            Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
         } else {
             let ranges: Vec<(char, char)> =
                 ranges.iter().map(|r| (r.start(), r.end())).collect();
@@ -419,15 +427,18 @@
             } else {
                 self.push_hole(InstHole::Ranges { ranges: ranges })
             };
-            Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+            Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
         }
     }
 
-    fn c_byte(&mut self, b: u8) -> Result {
+    fn c_byte(&mut self, b: u8) -> ResultOrEmpty {
         self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
     }
 
-    fn c_class_bytes(&mut self, ranges: &[hir::ClassBytesRange]) -> Result {
+    fn c_class_bytes(
+        &mut self,
+        ranges: &[hir::ClassBytesRange],
+    ) -> ResultOrEmpty {
         debug_assert!(!ranges.is_empty());
 
         let first_split_entry = self.insts.len();
@@ -451,35 +462,39 @@
             self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }),
         );
         self.fill(prev_hole, next);
-        Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+        Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
     }
 
-    fn c_empty_look(&mut self, look: EmptyLook) -> Result {
+    fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty {
         let hole = self.push_hole(InstHole::EmptyLook { look: look });
-        Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+        Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
     }
 
-    fn c_concat<'a, I>(&mut self, exprs: I) -> Result
+    fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
     where
         I: IntoIterator<Item = &'a Hir>,
     {
         let mut exprs = exprs.into_iter();
-        let first = match exprs.next() {
-            Some(expr) => expr,
-            None => {
-                return Ok(Patch { hole: Hole::None, entry: self.insts.len() })
+        let Patch { mut hole, entry } = loop {
+            match exprs.next() {
+                None => return Ok(None),
+                Some(e) => {
+                    if let Some(p) = self.c(e)? {
+                        break p;
+                    }
+                }
             }
         };
-        let Patch { mut hole, entry } = self.c(first)?;
         for e in exprs {
-            let p = self.c(e)?;
-            self.fill(hole, p.entry);
-            hole = p.hole;
+            if let Some(p) = self.c(e)? {
+                self.fill(hole, p.entry);
+                hole = p.hole;
+            }
         }
-        Ok(Patch { hole: hole, entry: entry })
+        Ok(Some(Patch { hole: hole, entry: entry }))
     }
 
-    fn c_alternate(&mut self, exprs: &[Hir]) -> Result {
+    fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty {
         debug_assert!(
             exprs.len() >= 2,
             "alternates must have at least 2 exprs"
@@ -492,43 +507,43 @@
         // patched to point to the same location.
         let mut holes = vec![];
 
-        let mut prev_hole = Hole::None;
+        // true indicates that the hole is a split where we want to fill
+        // the second branch.
+        let mut prev_hole = (Hole::None, false);
         for e in &exprs[0..exprs.len() - 1] {
-            self.fill_to_next(prev_hole);
-            let split = self.push_split_hole();
-            let prev_entry = self.insts.len();
-            let Patch { hole, entry } = self.c(e)?;
-            if prev_entry == self.insts.len() {
-                // TODO(burntsushi): It is kind of silly that we don't support
-                // empty-subexpressions in alternates, but it is supremely
-                // awkward to support them in the existing compiler
-                // infrastructure. This entire compiler needs to be thrown out
-                // anyway, so don't feel too bad.
-                return Err(Error::Syntax(
-                    "alternations cannot currently contain \
-                     empty sub-expressions"
-                        .to_string(),
-                ));
+            if prev_hole.1 {
+                let next = self.insts.len();
+                self.fill_split(prev_hole.0, None, Some(next));
+            } else {
+                self.fill_to_next(prev_hole.0);
             }
+            let split = self.push_split_hole();
+            if let Some(Patch { hole, entry }) = self.c(e)? {
+                holes.push(hole);
+                prev_hole = (self.fill_split(split, Some(entry), None), false);
+            } else {
+                let (split1, split2) = split.dup_one();
+                holes.push(split1);
+                prev_hole = (split2, true);
+            }
+        }
+        if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? {
             holes.push(hole);
-            prev_hole = self.fill_split(split, Some(entry), None);
+            if prev_hole.1 {
+                self.fill_split(prev_hole.0, None, Some(entry));
+            } else {
+                self.fill(prev_hole.0, entry);
+            }
+        } else {
+            // We ignore prev_hole.1. When it's true, it means we have two
+            // empty branches both pushing prev_hole.0 into holes, so both
+            // branches will go to the same place anyway.
+            holes.push(prev_hole.0);
         }
-        let prev_entry = self.insts.len();
-        let Patch { hole, entry } = self.c(&exprs[exprs.len() - 1])?;
-        if prev_entry == self.insts.len() {
-            // TODO(burntsushi): See TODO above.
-            return Err(Error::Syntax(
-                "alternations cannot currently contain \
-                 empty sub-expressions"
-                    .to_string(),
-            ));
-        }
-        holes.push(hole);
-        self.fill(prev_hole, entry);
-        Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+        Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
     }
 
-    fn c_repeat(&mut self, rep: &hir::Repetition) -> Result {
+    fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
         use syntax::hir::RepetitionKind::*;
         match rep.kind {
             ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
@@ -546,24 +561,37 @@
         }
     }
 
-    fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result {
+    fn c_repeat_zero_or_one(
+        &mut self,
+        expr: &Hir,
+        greedy: bool,
+    ) -> ResultOrEmpty {
         let split_entry = self.insts.len();
         let split = self.push_split_hole();
-        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
-
+        let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+            Some(p) => p,
+            None => return self.pop_split_hole(),
+        };
         let split_hole = if greedy {
             self.fill_split(split, Some(entry_rep), None)
         } else {
             self.fill_split(split, None, Some(entry_rep))
         };
         let holes = vec![hole_rep, split_hole];
-        Ok(Patch { hole: Hole::Many(holes), entry: split_entry })
+        Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry }))
     }
 
-    fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
+    fn c_repeat_zero_or_more(
+        &mut self,
+        expr: &Hir,
+        greedy: bool,
+    ) -> ResultOrEmpty {
         let split_entry = self.insts.len();
         let split = self.push_split_hole();
-        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+        let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+            Some(p) => p,
+            None => return self.pop_split_hole(),
+        };
 
         self.fill(hole_rep, split_entry);
         let split_hole = if greedy {
@@ -571,11 +599,18 @@
         } else {
             self.fill_split(split, None, Some(entry_rep))
         };
-        Ok(Patch { hole: split_hole, entry: split_entry })
+        Ok(Some(Patch { hole: split_hole, entry: split_entry }))
     }
 
-    fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
-        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+    fn c_repeat_one_or_more(
+        &mut self,
+        expr: &Hir,
+        greedy: bool,
+    ) -> ResultOrEmpty {
+        let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+            Some(p) => p,
+            None => return Ok(None),
+        };
         self.fill_to_next(hole_rep);
         let split = self.push_split_hole();
 
@@ -584,7 +619,7 @@
         } else {
             self.fill_split(split, None, Some(entry_rep))
         };
-        Ok(Patch { hole: split_hole, entry: entry_rep })
+        Ok(Some(Patch { hole: split_hole, entry: entry_rep }))
     }
 
     fn c_repeat_range_min_or_more(
@@ -592,12 +627,20 @@
         expr: &Hir,
         greedy: bool,
         min: u32,
-    ) -> Result {
+    ) -> ResultOrEmpty {
         let min = u32_to_usize(min);
-        let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
-        let patch_rep = self.c_repeat_zero_or_more(expr, greedy)?;
-        self.fill(patch_concat.hole, patch_rep.entry);
-        Ok(Patch { hole: patch_rep.hole, entry: patch_concat.entry })
+        // Using next_inst() is ok, because we can't return it (concat would
+        // have to return Some(_) while c_repeat_range_min_or_more returns
+        // None).
+        let patch_concat = self
+            .c_concat(iter::repeat(expr).take(min))?
+            .unwrap_or(self.next_inst());
+        if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? {
+            self.fill(patch_concat.hole, patch_rep.entry);
+            Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry }))
+        } else {
+            Ok(None)
+        }
     }
 
     fn c_repeat_range(
@@ -606,13 +649,17 @@
         greedy: bool,
         min: u32,
         max: u32,
-    ) -> Result {
+    ) -> ResultOrEmpty {
         let (min, max) = (u32_to_usize(min), u32_to_usize(max));
+        debug_assert!(min <= max);
         let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
-        let initial_entry = patch_concat.entry;
         if min == max {
             return Ok(patch_concat);
         }
+        // Same reasoning as in c_repeat_range_min_or_more (we know that min <
+        // max at this point).
+        let patch_concat = patch_concat.unwrap_or(self.next_inst());
+        let initial_entry = patch_concat.entry;
         // It is much simpler to compile, e.g., `a{2,5}` as:
         //
         //     aaa?a?a?
@@ -637,7 +684,10 @@
         for _ in min..max {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
-            let Patch { hole, entry } = self.c(expr)?;
+            let Patch { hole, entry } = match self.c(expr)? {
+                Some(p) => p,
+                None => return self.pop_split_hole(),
+            };
             prev_hole = hole;
             if greedy {
                 holes.push(self.fill_split(split, Some(entry), None));
@@ -646,7 +696,14 @@
             }
         }
         holes.push(prev_hole);
-        Ok(Patch { hole: Hole::Many(holes), entry: initial_entry })
+        Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry }))
+    }
+
+    /// Can be used as a default value for the c_* functions when the call to
+    /// c_function is followed by inserting at least one instruction that is
+    /// always executed after the ones written by the c* function.
+    fn next_inst(&self) -> Patch {
+        Patch { hole: Hole::None, entry: self.insts.len() }
     }
 
     fn fill(&mut self, hole: Hole, goto: InstPtr) {
@@ -726,6 +783,11 @@
         Hole::One(hole)
     }
 
+    fn pop_split_hole(&mut self) -> ResultOrEmpty {
+        self.insts.pop();
+        Ok(None)
+    }
+
     fn check_size(&self) -> result::Result<(), Error> {
         use std::mem::size_of;
 
@@ -744,6 +806,17 @@
     Many(Vec<Hole>),
 }
 
+impl Hole {
+    fn dup_one(self) -> (Self, Self) {
+        match self {
+            Hole::One(pc) => (Hole::One(pc), Hole::One(pc)),
+            Hole::None | Hole::Many(_) => {
+                unreachable!("must be called on single hole")
+            }
+        }
+    }
+}
+
 #[derive(Clone, Debug)]
 enum MaybeInst {
     Compiled(Inst),
@@ -755,13 +828,22 @@
 
 impl MaybeInst {
     fn fill(&mut self, goto: InstPtr) {
-        let filled = match *self {
-            MaybeInst::Uncompiled(ref inst) => inst.fill(goto),
+        let maybeinst = match *self {
+            MaybeInst::Split => MaybeInst::Split1(goto),
+            MaybeInst::Uncompiled(ref inst) => {
+                MaybeInst::Compiled(inst.fill(goto))
+            }
             MaybeInst::Split1(goto1) => {
-                Inst::Split(InstSplit { goto1: goto1, goto2: goto })
+                MaybeInst::Compiled(Inst::Split(InstSplit {
+                    goto1: goto1,
+                    goto2: goto,
+                }))
             }
             MaybeInst::Split2(goto2) => {
-                Inst::Split(InstSplit { goto1: goto, goto2: goto2 })
+                MaybeInst::Compiled(Inst::Split(InstSplit {
+                    goto1: goto,
+                    goto2: goto2,
+                }))
             }
             _ => unreachable!(
                 "not all instructions were compiled! \
@@ -769,7 +851,7 @@
                 self
             ),
         };
-        *self = MaybeInst::Compiled(filled);
+        *self = maybeinst;
     }
 
     fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {

diff --git a/src/lib.rs b/src/lib.rs
index 2a74bf8..e0a0975 100644
--- a/src/lib.rs
+++ b/src/lib.rs

@@ -152,8 +152,9 @@
 ```
 
 If you wish to match against whitespace in this mode, you can still use `\s`,
-`\n`, `\t`, etc. For escaping a single space character, you can use its hex
-character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.
+`\n`, `\t`, etc. For escaping a single space character, you can escape it
+directly with `\ `, use its hex character code `\x20` or temporarily disable
+the `x` flag, e.g., `(?-x: )`.
 
 # Example: match multiple regular expressions simultaneously
 
@@ -621,8 +622,8 @@
 
 #[cfg(feature = "perf-literal")]
 extern crate aho_corasick;
-#[cfg(test)]
-extern crate doc_comment;
+// #[cfg(doctest)]
+// extern crate doc_comment;
 #[cfg(feature = "perf-literal")]
 extern crate memchr;
 #[cfg(test)]
@@ -632,8 +633,8 @@
 #[cfg(feature = "perf-cache")]
 extern crate thread_local;
 
-#[cfg(test)]
-doc_comment::doctest!("../README.md");
+// #[cfg(doctest)]
+// doc_comment::doctest!("../README.md");
 
 #[cfg(feature = "std")]
 pub use error::Error;

diff --git a/tests/crazy.rs b/tests/crazy.rs
index 8c72273..56f6cad 100644
--- a/tests/crazy.rs
+++ b/tests/crazy.rs

@@ -118,6 +118,18 @@
 matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
 matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
 matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
+matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
+matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
+matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
 
 // Test that the DFA can handle pathological cases.
 // (This should result in the DFA's cache being flushed too frequently, which

diff --git a/tests/noparse.rs b/tests/noparse.rs
index 62eb5be..8ded1dc 100644
--- a/tests/noparse.rs
+++ b/tests/noparse.rs

@@ -26,6 +26,8 @@
 noparse!(fail_bad_flag, "(?a)a");
 noparse!(fail_too_big, "a{10000000}");
 noparse!(fail_counted_no_close, "a{1001");
+noparse!(fail_counted_decreasing, "a{2,1}");
+noparse!(fail_counted_nonnegative, "a{-1,1}");
 noparse!(fail_unfinished_cap, "(?");
 noparse!(fail_unfinished_escape, "\\");
 noparse!(fail_octal_digit, r"\8");
@@ -41,10 +43,3 @@
 noparse!(fail_range_end_no_begin, r"[a-\A]");
 noparse!(fail_range_end_no_end, r"[a-\z]");
 noparse!(fail_range_end_no_boundary, r"[a-\b]");
-noparse!(fail_empty_alt1, r"|z");
-noparse!(fail_empty_alt2, r"z|");
-noparse!(fail_empty_alt3, r"|");
-noparse!(fail_empty_alt4, r"||");
-noparse!(fail_empty_alt5, r"()|z");
-noparse!(fail_empty_alt6, r"z|()");
-noparse!(fail_empty_alt7, r"(|)");

diff --git a/tests/regression.rs b/tests/regression.rs
index 686fe35..44b9083 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs

@@ -210,3 +210,10 @@
     (4, 7),
     (12, 15)
 );
+
+// See: https://github.com/rust-lang/regex/issues/659
+//
+// Note that 'Ј' is not 'j', but cyrillic Je
+// https://en.wikipedia.org/wiki/Je_(Cyrillic)
+ismatch!(empty_group_match, r"()Ј01", "zЈ01", true);
+matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5));

diff --git a/tests/set.rs b/tests/set.rs
index 3e9755c..648feec 100644
--- a/tests/set.rs
+++ b/tests/set.rs

@@ -17,6 +17,17 @@
 matset!(set17, &[".*a"], "a", 0);
 matset!(set18, &["a", "β"], "β", 1);
 
+// regexes that match the empty string
+matset!(setempty1, &["", "a"], "abc", 0, 1);
+matset!(setempty2, &["", "b"], "abc", 0, 1);
+matset!(setempty3, &["", "z"], "abc", 0);
+matset!(setempty4, &["a", ""], "abc", 0, 1);
+matset!(setempty5, &["b", ""], "abc", 0, 1);
+matset!(setempty6, &["z", ""], "abc", 1);
+matset!(setempty7, &["b", "(?:)"], "abc", 0, 1);
+matset!(setempty8, &["(?:)", "b"], "abc", 0, 1);
+matset!(setempty9, &["c(?:)", "b"], "abc", 0, 1);
+
 nomatset!(nset1, &["a", "a"], "b");
 nomatset!(nset2, &["^foo", "bar$"], "bar foo");
 nomatset!(
commit	49cbe5f97db66f1f8d354938952d3fdbcd4a1925	[log] [tgz]
author	Haibo Huang <hhb@google.com>	Thu May 28 20:14:24 2020 -0700
committer	Haibo Huang <hhb@google.com>	Thu May 28 20:14:24 2020 -0700
tree	a2319a1f6b03069734c84ddaaa73a733d0646d79
parent	42134b99b59d452af33538ecd644bbf3771d5ec0 [diff]