Chih-Hung Hsieh | e42c505 | 2020-04-16 10:44:21 -0700 | [diff] [blame^] | 1 | use regex::internal::ExecBuilder; |
| 2 | |
| 3 | /// Given a regex, check if all of the backends produce the same |
| 4 | /// results on a number of different inputs. |
| 5 | /// |
| 6 | /// For now this just throws quickcheck at the problem, which |
| 7 | /// is not very good because it only really tests half of the |
| 8 | /// problem space. It is pretty unlikely that a random string |
| 9 | /// will match any given regex, so this will probably just |
| 10 | /// be checking that the different backends fail in the same |
| 11 | /// way. This is still worthwhile to test, but is definitely not |
| 12 | /// the whole story. |
| 13 | /// |
| 14 | /// TODO(ethan): In order to cover the other half of the problem |
| 15 | /// space, we should generate a random matching string by inspecting |
| 16 | /// the AST of the input regex. The right way to do this probably |
| 17 | /// involves adding a custom Arbitrary instance around a couple |
| 18 | /// of newtypes. That way we can respect the quickcheck size hinting |
| 19 | /// and shrinking and whatnot. |
| 20 | pub fn backends_are_consistent(re: &str) -> Result<u64, String> { |
| 21 | let standard_backends = vec![ |
| 22 | ( |
| 23 | "bounded_backtracking_re", |
| 24 | ExecBuilder::new(re) |
| 25 | .bounded_backtracking() |
| 26 | .build() |
| 27 | .map(|exec| exec.into_regex()) |
| 28 | .map_err(|err| format!("{}", err))?, |
| 29 | ), |
| 30 | ( |
| 31 | "pikevm_re", |
| 32 | ExecBuilder::new(re) |
| 33 | .nfa() |
| 34 | .build() |
| 35 | .map(|exec| exec.into_regex()) |
| 36 | .map_err(|err| format!("{}", err))?, |
| 37 | ), |
| 38 | ( |
| 39 | "default_re", |
| 40 | ExecBuilder::new(re) |
| 41 | .build() |
| 42 | .map(|exec| exec.into_regex()) |
| 43 | .map_err(|err| format!("{}", err))?, |
| 44 | ), |
| 45 | ]; |
| 46 | |
| 47 | let utf8bytes_backends = vec![ |
| 48 | ( |
| 49 | "bounded_backtracking_utf8bytes_re", |
| 50 | ExecBuilder::new(re) |
| 51 | .bounded_backtracking() |
| 52 | .bytes(true) |
| 53 | .build() |
| 54 | .map(|exec| exec.into_regex()) |
| 55 | .map_err(|err| format!("{}", err))?, |
| 56 | ), |
| 57 | ( |
| 58 | "pikevm_utf8bytes_re", |
| 59 | ExecBuilder::new(re) |
| 60 | .nfa() |
| 61 | .bytes(true) |
| 62 | .build() |
| 63 | .map(|exec| exec.into_regex()) |
| 64 | .map_err(|err| format!("{}", err))?, |
| 65 | ), |
| 66 | ( |
| 67 | "default_utf8bytes_re", |
| 68 | ExecBuilder::new(re) |
| 69 | .bytes(true) |
| 70 | .build() |
| 71 | .map(|exec| exec.into_regex()) |
| 72 | .map_err(|err| format!("{}", err))?, |
| 73 | ), |
| 74 | ]; |
| 75 | |
| 76 | let bytes_backends = vec![ |
| 77 | ( |
| 78 | "bounded_backtracking_bytes_re", |
| 79 | ExecBuilder::new(re) |
| 80 | .bounded_backtracking() |
| 81 | .only_utf8(false) |
| 82 | .build() |
| 83 | .map(|exec| exec.into_byte_regex()) |
| 84 | .map_err(|err| format!("{}", err))?, |
| 85 | ), |
| 86 | ( |
| 87 | "pikevm_bytes_re", |
| 88 | ExecBuilder::new(re) |
| 89 | .nfa() |
| 90 | .only_utf8(false) |
| 91 | .build() |
| 92 | .map(|exec| exec.into_byte_regex()) |
| 93 | .map_err(|err| format!("{}", err))?, |
| 94 | ), |
| 95 | ( |
| 96 | "default_bytes_re", |
| 97 | ExecBuilder::new(re) |
| 98 | .only_utf8(false) |
| 99 | .build() |
| 100 | .map(|exec| exec.into_byte_regex()) |
| 101 | .map_err(|err| format!("{}", err))?, |
| 102 | ), |
| 103 | ]; |
| 104 | |
| 105 | Ok(string_checker::check_backends(&standard_backends)? |
| 106 | + string_checker::check_backends(&utf8bytes_backends)? |
| 107 | + bytes_checker::check_backends(&bytes_backends)?) |
| 108 | } |
| 109 | |
| 110 | // |
| 111 | // A consistency checker parameterized by the input type (&str or &[u8]). |
| 112 | // |
| 113 | |
| 114 | macro_rules! checker { |
| 115 | ($module_name:ident, $regex_type:path, $mk_input:expr) => { |
| 116 | mod $module_name { |
| 117 | use quickcheck; |
| 118 | use quickcheck::{Arbitrary, TestResult}; |
| 119 | |
| 120 | pub fn check_backends( |
| 121 | backends: &[(&str, $regex_type)], |
| 122 | ) -> Result<u64, String> { |
| 123 | let mut total_passed = 0; |
| 124 | for regex in backends[1..].iter() { |
| 125 | total_passed += quickcheck_regex_eq(&backends[0], regex)?; |
| 126 | } |
| 127 | |
| 128 | Ok(total_passed) |
| 129 | } |
| 130 | |
| 131 | fn quickcheck_regex_eq( |
| 132 | &(name1, ref re1): &(&str, $regex_type), |
| 133 | &(name2, ref re2): &(&str, $regex_type), |
| 134 | ) -> Result<u64, String> { |
| 135 | quickcheck::QuickCheck::new() |
| 136 | .quicktest(RegexEqualityTest::new( |
| 137 | re1.clone(), |
| 138 | re2.clone(), |
| 139 | )) |
| 140 | .map_err(|err| { |
| 141 | format!( |
| 142 | "{}(/{}/) and {}(/{}/) are inconsistent.\ |
| 143 | QuickCheck Err: {:?}", |
| 144 | name1, re1, name2, re2, err |
| 145 | ) |
| 146 | }) |
| 147 | } |
| 148 | |
| 149 | struct RegexEqualityTest { |
| 150 | re1: $regex_type, |
| 151 | re2: $regex_type, |
| 152 | } |
| 153 | impl RegexEqualityTest { |
| 154 | fn new(re1: $regex_type, re2: $regex_type) -> Self { |
| 155 | RegexEqualityTest { re1: re1, re2: re2 } |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | impl quickcheck::Testable for RegexEqualityTest { |
| 160 | fn result<G: quickcheck::Gen>( |
| 161 | &self, |
| 162 | gen: &mut G, |
| 163 | ) -> TestResult { |
| 164 | let input = $mk_input(gen); |
| 165 | let input = &input; |
| 166 | |
| 167 | if self.re1.find(&input) != self.re2.find(input) { |
| 168 | return TestResult::error(format!( |
| 169 | "find mismatch input={:?}", |
| 170 | input |
| 171 | )); |
| 172 | } |
| 173 | |
| 174 | let cap1 = self.re1.captures(input); |
| 175 | let cap2 = self.re2.captures(input); |
| 176 | match (cap1, cap2) { |
| 177 | (None, None) => {} |
| 178 | (Some(cap1), Some(cap2)) => { |
| 179 | for (c1, c2) in cap1.iter().zip(cap2.iter()) { |
| 180 | if c1 != c2 { |
| 181 | return TestResult::error(format!( |
| 182 | "captures mismatch input={:?}", |
| 183 | input |
| 184 | )); |
| 185 | } |
| 186 | } |
| 187 | } |
| 188 | _ => { |
| 189 | return TestResult::error(format!( |
| 190 | "captures mismatch input={:?}", |
| 191 | input |
| 192 | )) |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | let fi1 = self.re1.find_iter(input); |
| 197 | let fi2 = self.re2.find_iter(input); |
| 198 | for (m1, m2) in fi1.zip(fi2) { |
| 199 | if m1 != m2 { |
| 200 | return TestResult::error(format!( |
| 201 | "find_iter mismatch input={:?}", |
| 202 | input |
| 203 | )); |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | let ci1 = self.re1.captures_iter(input); |
| 208 | let ci2 = self.re2.captures_iter(input); |
| 209 | for (cap1, cap2) in ci1.zip(ci2) { |
| 210 | for (c1, c2) in cap1.iter().zip(cap2.iter()) { |
| 211 | if c1 != c2 { |
| 212 | return TestResult::error(format!( |
| 213 | "captures_iter mismatch input={:?}", |
| 214 | input |
| 215 | )); |
| 216 | } |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | let s1 = self.re1.split(input); |
| 221 | let s2 = self.re2.split(input); |
| 222 | for (chunk1, chunk2) in s1.zip(s2) { |
| 223 | if chunk1 != chunk2 { |
| 224 | return TestResult::error(format!( |
| 225 | "split mismatch input={:?}", |
| 226 | input |
| 227 | )); |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | TestResult::from_bool(true) |
| 232 | } |
| 233 | } |
| 234 | } // mod |
| 235 | }; // rule case |
| 236 | } // macro_rules! |
| 237 | |
| 238 | checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen)); |
| 239 | checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary( |
| 240 | gen |
| 241 | )); |