Upgrade rust/crates/regex to 1.3.9
Test: None
Change-Id: Ic5726cda9566f808b7dd21d8ce9a3045250f8a54
diff --git a/src/compile.rs b/src/compile.rs
index 2418400..ad54040 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -15,6 +15,7 @@
use Error;
type Result = result::Result<Patch, Error>;
+type ResultOrEmpty = result::Result<Option<Patch>, Error>;
#[derive(Debug)]
struct Patch {
@@ -132,7 +133,7 @@
self.compiled.start = dotstar_patch.entry;
}
self.compiled.captures = vec![None];
- let patch = self.c_capture(0, expr)?;
+ let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst());
if self.compiled.needs_dotstar() {
self.fill(dotstar_patch.hole, patch.entry);
} else {
@@ -167,14 +168,16 @@
for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
- let Patch { hole, entry } = self.c_capture(0, expr)?;
+ let Patch { hole, entry } =
+ self.c_capture(0, expr)?.unwrap_or(self.next_inst());
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
self.push_compiled(Inst::Match(i));
prev_hole = self.fill_split(split, Some(entry), None);
}
let i = exprs.len() - 1;
- let Patch { hole, entry } = self.c_capture(0, &exprs[i])?;
+ let Patch { hole, entry } =
+ self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst());
self.fill(prev_hole, entry);
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
@@ -242,13 +245,16 @@
/// method you will see that it does exactly this, though it handles
/// a list of expressions rather than just the two that we use for
/// an example.
- fn c(&mut self, expr: &Hir) -> Result {
+ ///
+ /// Ok(None) is returned when an expression is compiled to no
+ /// instruction, and so no patch.entry value makes sense.
+ fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
use prog;
use syntax::hir::HirKind::*;
self.check_size()?;
match *expr.kind() {
- Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }),
+ Empty => Ok(None),
Literal(hir::Literal::Unicode(c)) => self.c_char(c),
Literal(hir::Literal::Byte(b)) => {
assert!(self.compiled.uses_bytes());
@@ -357,7 +363,7 @@
}
}
- fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> Result {
+ fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
if self.num_exprs > 1 || self.compiled.is_dfa {
// Don't ever compile Save instructions for regex sets because
// they are never used. They are also never used in DFA programs
@@ -366,11 +372,11 @@
} else {
let entry = self.insts.len();
let hole = self.push_hole(InstHole::Save { slot: first_slot });
- let patch = self.c(expr)?;
+ let patch = self.c(expr)?.unwrap_or(self.next_inst());
self.fill(hole, patch.entry);
self.fill_to_next(patch.hole);
let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
- Ok(Patch { hole: hole, entry: entry })
+ Ok(Some(Patch { hole: hole, entry: entry }))
}
}
@@ -381,36 +387,38 @@
greedy: false,
hir: Box::new(Hir::any(true)),
}))?
+ .unwrap()
} else {
self.c(&Hir::repetition(hir::Repetition {
kind: hir::RepetitionKind::ZeroOrMore,
greedy: false,
hir: Box::new(Hir::any(false)),
}))?
+ .unwrap()
})
}
- fn c_char(&mut self, c: char) -> Result {
+ fn c_char(&mut self, c: char) -> ResultOrEmpty {
if self.compiled.uses_bytes() {
if c.is_ascii() {
let b = c as u8;
let hole =
self.push_hole(InstHole::Bytes { start: b, end: b });
self.byte_classes.set_range(b, b);
- Ok(Patch { hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
} else {
self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
}
} else {
let hole = self.push_hole(InstHole::Char { c: c });
- Ok(Patch { hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
}
}
- fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> Result {
+ fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
assert!(!ranges.is_empty());
if self.compiled.uses_bytes() {
- CompileClass { c: self, ranges: ranges }.compile()
+ Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
} else {
let ranges: Vec<(char, char)> =
ranges.iter().map(|r| (r.start(), r.end())).collect();
@@ -419,15 +427,18 @@
} else {
self.push_hole(InstHole::Ranges { ranges: ranges })
};
- Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
}
}
- fn c_byte(&mut self, b: u8) -> Result {
+ fn c_byte(&mut self, b: u8) -> ResultOrEmpty {
self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
}
- fn c_class_bytes(&mut self, ranges: &[hir::ClassBytesRange]) -> Result {
+ fn c_class_bytes(
+ &mut self,
+ ranges: &[hir::ClassBytesRange],
+ ) -> ResultOrEmpty {
debug_assert!(!ranges.is_empty());
let first_split_entry = self.insts.len();
@@ -451,35 +462,39 @@
self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }),
);
self.fill(prev_hole, next);
- Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
}
- fn c_empty_look(&mut self, look: EmptyLook) -> Result {
+ fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty {
let hole = self.push_hole(InstHole::EmptyLook { look: look });
- Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
+ Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
}
- fn c_concat<'a, I>(&mut self, exprs: I) -> Result
+ fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
where
I: IntoIterator<Item = &'a Hir>,
{
let mut exprs = exprs.into_iter();
- let first = match exprs.next() {
- Some(expr) => expr,
- None => {
- return Ok(Patch { hole: Hole::None, entry: self.insts.len() })
+ let Patch { mut hole, entry } = loop {
+ match exprs.next() {
+ None => return Ok(None),
+ Some(e) => {
+ if let Some(p) = self.c(e)? {
+ break p;
+ }
+ }
}
};
- let Patch { mut hole, entry } = self.c(first)?;
for e in exprs {
- let p = self.c(e)?;
- self.fill(hole, p.entry);
- hole = p.hole;
+ if let Some(p) = self.c(e)? {
+ self.fill(hole, p.entry);
+ hole = p.hole;
+ }
}
- Ok(Patch { hole: hole, entry: entry })
+ Ok(Some(Patch { hole: hole, entry: entry }))
}
- fn c_alternate(&mut self, exprs: &[Hir]) -> Result {
+ fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty {
debug_assert!(
exprs.len() >= 2,
"alternates must have at least 2 exprs"
@@ -492,43 +507,43 @@
// patched to point to the same location.
let mut holes = vec![];
- let mut prev_hole = Hole::None;
+ // true indicates that the hole is a split where we want to fill
+ // the second branch.
+ let mut prev_hole = (Hole::None, false);
for e in &exprs[0..exprs.len() - 1] {
- self.fill_to_next(prev_hole);
- let split = self.push_split_hole();
- let prev_entry = self.insts.len();
- let Patch { hole, entry } = self.c(e)?;
- if prev_entry == self.insts.len() {
- // TODO(burntsushi): It is kind of silly that we don't support
- // empty-subexpressions in alternates, but it is supremely
- // awkward to support them in the existing compiler
- // infrastructure. This entire compiler needs to be thrown out
- // anyway, so don't feel too bad.
- return Err(Error::Syntax(
- "alternations cannot currently contain \
- empty sub-expressions"
- .to_string(),
- ));
+ if prev_hole.1 {
+ let next = self.insts.len();
+ self.fill_split(prev_hole.0, None, Some(next));
+ } else {
+ self.fill_to_next(prev_hole.0);
}
+ let split = self.push_split_hole();
+ if let Some(Patch { hole, entry }) = self.c(e)? {
+ holes.push(hole);
+ prev_hole = (self.fill_split(split, Some(entry), None), false);
+ } else {
+ let (split1, split2) = split.dup_one();
+ holes.push(split1);
+ prev_hole = (split2, true);
+ }
+ }
+ if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? {
holes.push(hole);
- prev_hole = self.fill_split(split, Some(entry), None);
+ if prev_hole.1 {
+ self.fill_split(prev_hole.0, None, Some(entry));
+ } else {
+ self.fill(prev_hole.0, entry);
+ }
+ } else {
+ // We ignore prev_hole.1. When it's true, it means we have two
+ // empty branches both pushing prev_hole.0 into holes, so both
+ // branches will go to the same place anyway.
+ holes.push(prev_hole.0);
}
- let prev_entry = self.insts.len();
- let Patch { hole, entry } = self.c(&exprs[exprs.len() - 1])?;
- if prev_entry == self.insts.len() {
- // TODO(burntsushi): See TODO above.
- return Err(Error::Syntax(
- "alternations cannot currently contain \
- empty sub-expressions"
- .to_string(),
- ));
- }
- holes.push(hole);
- self.fill(prev_hole, entry);
- Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
}
- fn c_repeat(&mut self, rep: &hir::Repetition) -> Result {
+ fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
use syntax::hir::RepetitionKind::*;
match rep.kind {
ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
@@ -546,24 +561,37 @@
}
}
- fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result {
+ fn c_repeat_zero_or_one(
+ &mut self,
+ expr: &Hir,
+ greedy: bool,
+ ) -> ResultOrEmpty {
let split_entry = self.insts.len();
let split = self.push_split_hole();
- let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
-
+ let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+ Some(p) => p,
+ None => return self.pop_split_hole(),
+ };
let split_hole = if greedy {
self.fill_split(split, Some(entry_rep), None)
} else {
self.fill_split(split, None, Some(entry_rep))
};
let holes = vec![hole_rep, split_hole];
- Ok(Patch { hole: Hole::Many(holes), entry: split_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry }))
}
- fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
+ fn c_repeat_zero_or_more(
+ &mut self,
+ expr: &Hir,
+ greedy: bool,
+ ) -> ResultOrEmpty {
let split_entry = self.insts.len();
let split = self.push_split_hole();
- let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+ let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+ Some(p) => p,
+ None => return self.pop_split_hole(),
+ };
self.fill(hole_rep, split_entry);
let split_hole = if greedy {
@@ -571,11 +599,18 @@
} else {
self.fill_split(split, None, Some(entry_rep))
};
- Ok(Patch { hole: split_hole, entry: split_entry })
+ Ok(Some(Patch { hole: split_hole, entry: split_entry }))
}
- fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
- let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
+ fn c_repeat_one_or_more(
+ &mut self,
+ expr: &Hir,
+ greedy: bool,
+ ) -> ResultOrEmpty {
+ let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
+ Some(p) => p,
+ None => return Ok(None),
+ };
self.fill_to_next(hole_rep);
let split = self.push_split_hole();
@@ -584,7 +619,7 @@
} else {
self.fill_split(split, None, Some(entry_rep))
};
- Ok(Patch { hole: split_hole, entry: entry_rep })
+ Ok(Some(Patch { hole: split_hole, entry: entry_rep }))
}
fn c_repeat_range_min_or_more(
@@ -592,12 +627,20 @@
expr: &Hir,
greedy: bool,
min: u32,
- ) -> Result {
+ ) -> ResultOrEmpty {
let min = u32_to_usize(min);
- let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
- let patch_rep = self.c_repeat_zero_or_more(expr, greedy)?;
- self.fill(patch_concat.hole, patch_rep.entry);
- Ok(Patch { hole: patch_rep.hole, entry: patch_concat.entry })
+ // Using next_inst() is ok, because we can't return it (concat would
+ // have to return Some(_) while c_repeat_range_min_or_more returns
+ // None).
+ let patch_concat = self
+ .c_concat(iter::repeat(expr).take(min))?
+ .unwrap_or(self.next_inst());
+ if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? {
+ self.fill(patch_concat.hole, patch_rep.entry);
+ Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry }))
+ } else {
+ Ok(None)
+ }
}
fn c_repeat_range(
@@ -606,13 +649,17 @@
greedy: bool,
min: u32,
max: u32,
- ) -> Result {
+ ) -> ResultOrEmpty {
let (min, max) = (u32_to_usize(min), u32_to_usize(max));
+ debug_assert!(min <= max);
let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
- let initial_entry = patch_concat.entry;
if min == max {
return Ok(patch_concat);
}
+ // Same reasoning as in c_repeat_range_min_or_more (we know that min <
+ // max at this point).
+ let patch_concat = patch_concat.unwrap_or(self.next_inst());
+ let initial_entry = patch_concat.entry;
// It is much simpler to compile, e.g., `a{2,5}` as:
//
// aaa?a?a?
@@ -637,7 +684,10 @@
for _ in min..max {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
- let Patch { hole, entry } = self.c(expr)?;
+ let Patch { hole, entry } = match self.c(expr)? {
+ Some(p) => p,
+ None => return self.pop_split_hole(),
+ };
prev_hole = hole;
if greedy {
holes.push(self.fill_split(split, Some(entry), None));
@@ -646,7 +696,14 @@
}
}
holes.push(prev_hole);
- Ok(Patch { hole: Hole::Many(holes), entry: initial_entry })
+ Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry }))
+ }
+
+ /// Can be used as a default value for the c_* functions when the call to
+ /// c_function is followed by inserting at least one instruction that is
+ /// always executed after the ones written by the c* function.
+ fn next_inst(&self) -> Patch {
+ Patch { hole: Hole::None, entry: self.insts.len() }
}
fn fill(&mut self, hole: Hole, goto: InstPtr) {
@@ -726,6 +783,11 @@
Hole::One(hole)
}
+ fn pop_split_hole(&mut self) -> ResultOrEmpty {
+ self.insts.pop();
+ Ok(None)
+ }
+
fn check_size(&self) -> result::Result<(), Error> {
use std::mem::size_of;
@@ -744,6 +806,17 @@
Many(Vec<Hole>),
}
+impl Hole {
+ fn dup_one(self) -> (Self, Self) {
+ match self {
+ Hole::One(pc) => (Hole::One(pc), Hole::One(pc)),
+ Hole::None | Hole::Many(_) => {
+ unreachable!("must be called on single hole")
+ }
+ }
+ }
+}
+
#[derive(Clone, Debug)]
enum MaybeInst {
Compiled(Inst),
@@ -755,13 +828,22 @@
impl MaybeInst {
fn fill(&mut self, goto: InstPtr) {
- let filled = match *self {
- MaybeInst::Uncompiled(ref inst) => inst.fill(goto),
+ let maybeinst = match *self {
+ MaybeInst::Split => MaybeInst::Split1(goto),
+ MaybeInst::Uncompiled(ref inst) => {
+ MaybeInst::Compiled(inst.fill(goto))
+ }
MaybeInst::Split1(goto1) => {
- Inst::Split(InstSplit { goto1: goto1, goto2: goto })
+ MaybeInst::Compiled(Inst::Split(InstSplit {
+ goto1: goto1,
+ goto2: goto,
+ }))
}
MaybeInst::Split2(goto2) => {
- Inst::Split(InstSplit { goto1: goto, goto2: goto2 })
+ MaybeInst::Compiled(Inst::Split(InstSplit {
+ goto1: goto,
+ goto2: goto2,
+ }))
}
_ => unreachable!(
"not all instructions were compiled! \
@@ -769,7 +851,7 @@
self
),
};
- *self = MaybeInst::Compiled(filled);
+ *self = maybeinst;
}
fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
diff --git a/src/lib.rs b/src/lib.rs
index 2a74bf8..e0a0975 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -152,8 +152,9 @@
```
If you wish to match against whitespace in this mode, you can still use `\s`,
-`\n`, `\t`, etc. For escaping a single space character, you can use its hex
-character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.
+`\n`, `\t`, etc. For escaping a single space character, you can escape it
+directly with `\ `, use its hex character code `\x20` or temporarily disable
+the `x` flag, e.g., `(?-x: )`.
# Example: match multiple regular expressions simultaneously
@@ -621,8 +622,8 @@
#[cfg(feature = "perf-literal")]
extern crate aho_corasick;
-#[cfg(test)]
-extern crate doc_comment;
+// #[cfg(doctest)]
+// extern crate doc_comment;
#[cfg(feature = "perf-literal")]
extern crate memchr;
#[cfg(test)]
@@ -632,8 +633,8 @@
#[cfg(feature = "perf-cache")]
extern crate thread_local;
-#[cfg(test)]
-doc_comment::doctest!("../README.md");
+// #[cfg(doctest)]
+// doc_comment::doctest!("../README.md");
#[cfg(feature = "std")]
pub use error::Error;