Chih-Hung Hsieh | e42c505 | 2020-04-16 10:44:21 -0700 | [diff] [blame] | 1 | // See: https://github.com/rust-lang/regex/issues/48 |
| 2 | #[test] |
| 3 | fn invalid_regexes_no_crash() { |
| 4 | assert!(regex_new!("(*)").is_err()); |
| 5 | assert!(regex_new!("(?:?)").is_err()); |
| 6 | assert!(regex_new!("(?)").is_err()); |
| 7 | assert!(regex_new!("*").is_err()); |
| 8 | } |
| 9 | |
| 10 | // See: https://github.com/rust-lang/regex/issues/98 |
| 11 | #[test] |
| 12 | fn regression_many_repeat_stack_overflow() { |
| 13 | let re = regex!("^.{1,2500}"); |
| 14 | assert_eq!(vec![(0, 1)], findall!(re, "a")); |
| 15 | } |
| 16 | |
| 17 | // See: https://github.com/rust-lang/regex/issues/555 |
| 18 | #[test] |
| 19 | fn regression_invalid_repetition_expr() { |
| 20 | assert!(regex_new!("(?m){1,1}").is_err()); |
| 21 | } |
| 22 | |
| 23 | // See: https://github.com/rust-lang/regex/issues/527 |
| 24 | #[test] |
| 25 | fn regression_invalid_flags_expression() { |
| 26 | assert!(regex_new!("(((?x)))").is_ok()); |
| 27 | } |
| 28 | |
| 29 | // See: https://github.com/rust-lang/regex/issues/75 |
| 30 | mat!(regression_unsorted_binary_search_1, r"(?i-u)[a_]+", "A_", Some((0, 2))); |
| 31 | mat!(regression_unsorted_binary_search_2, r"(?i-u)[A_]+", "a_", Some((0, 2))); |
| 32 | |
| 33 | // See: https://github.com/rust-lang/regex/issues/99 |
| 34 | #[cfg(feature = "unicode-case")] |
| 35 | mat!(regression_negated_char_class_1, r"(?i)[^x]", "x", None); |
| 36 | #[cfg(feature = "unicode-case")] |
| 37 | mat!(regression_negated_char_class_2, r"(?i)[^x]", "X", None); |
| 38 | |
| 39 | // See: https://github.com/rust-lang/regex/issues/101 |
| 40 | mat!(regression_ascii_word_underscore, r"[[:word:]]", "_", Some((0, 1))); |
| 41 | |
| 42 | // See: https://github.com/rust-lang/regex/issues/129 |
| 43 | #[test] |
| 44 | fn regression_captures_rep() { |
| 45 | let re = regex!(r"([a-f]){2}(?P<foo>[x-z])"); |
| 46 | let caps = re.captures(text!("abx")).unwrap(); |
| 47 | assert_eq!(match_text!(caps.name("foo").unwrap()), text!("x")); |
| 48 | } |
| 49 | |
| 50 | // See: https://github.com/rust-lang/regex/issues/153 |
| 51 | mat!(regression_alt_in_alt1, r"ab?|$", "az", Some((0, 1))); |
| 52 | mat!(regression_alt_in_alt2, r"^(.*?)(\n|\r\n?|$)", "ab\rcd", Some((0, 3))); |
| 53 | |
| 54 | // See: https://github.com/rust-lang/regex/issues/169 |
| 55 | mat!(regression_leftmost_first_prefix, r"z*azb", "azb", Some((0, 3))); |
| 56 | |
| 57 | // See: https://github.com/rust-lang/regex/issues/76 |
| 58 | #[cfg(all(feature = "unicode-case", feature = "unicode-gencat"))] |
| 59 | mat!(uni_case_lower_nocase_flag, r"(?i)\p{Ll}+", "ΛΘΓΔα", Some((0, 10))); |
| 60 | |
| 61 | // See: https://github.com/rust-lang/regex/issues/191 |
| 62 | mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3))); |
| 63 | |
| 64 | // burntsushi was bad and didn't create an issue for this bug. |
| 65 | mat!(anchored_prefix1, r"^a[[:^space:]]", "a ", None); |
| 66 | mat!(anchored_prefix2, r"^a[[:^space:]]", "foo boo a ", None); |
| 67 | mat!(anchored_prefix3, r"^-[a-z]", "r-f", None); |
| 68 | |
| 69 | // See: https://github.com/rust-lang/regex/issues/204 |
| 70 | #[cfg(feature = "unicode-perl")] |
| 71 | split!( |
| 72 | split_on_word_boundary, |
| 73 | r"\b", |
| 74 | r"Should this (work?)", |
| 75 | &[ |
| 76 | t!(""), |
| 77 | t!("Should"), |
| 78 | t!(" "), |
| 79 | t!("this"), |
| 80 | t!(" ("), |
| 81 | t!("work"), |
| 82 | t!("?)") |
| 83 | ] |
| 84 | ); |
| 85 | #[cfg(feature = "unicode-perl")] |
| 86 | matiter!( |
| 87 | word_boundary_dfa, |
| 88 | r"\b", |
| 89 | "a b c", |
| 90 | (0, 0), |
| 91 | (1, 1), |
| 92 | (2, 2), |
| 93 | (3, 3), |
| 94 | (4, 4), |
| 95 | (5, 5) |
| 96 | ); |
| 97 | |
| 98 | // See: https://github.com/rust-lang/regex/issues/268 |
| 99 | matiter!(partial_anchor, r"^a|b", "ba", (0, 1)); |
| 100 | |
| 101 | // See: https://github.com/rust-lang/regex/issues/280 |
| 102 | ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false); |
| 103 | ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false); |
| 104 | |
| 105 | // See: https://github.com/rust-lang/regex/issues/289 |
| 106 | mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4))); |
| 107 | |
| 108 | // See: https://github.com/rust-lang/regex/issues/291 |
| 109 | mat!( |
| 110 | lits_unambiguous2, |
| 111 | r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$", |
| 112 | "CIMG2341", |
| 113 | Some((0, 8)), |
| 114 | Some((0, 4)), |
| 115 | None, |
| 116 | Some((0, 4)), |
| 117 | Some((4, 8)) |
| 118 | ); |
| 119 | |
| 120 | // See: https://github.com/rust-lang/regex/issues/271 |
| 121 | mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4))); |
| 122 | mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4))); |
| 123 | mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4))); |
| 124 | #[cfg(feature = "unicode-perl")] |
| 125 | mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1))); |
| 126 | |
| 127 | // See: https://github.com/rust-lang/regex/issues/321 |
| 128 | ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false); |
| 129 | ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false); |
| 130 | |
| 131 | // See: https://github.com/BurntSushi/ripgrep/issues/1203 |
| 132 | ismatch!(reverse_suffix1, r"[0-4][0-4][0-4]000", "153.230000", true); |
| 133 | ismatch!(reverse_suffix2, r"[0-9][0-9][0-9]000", "153.230000\n", true); |
| 134 | matiter!(reverse_suffix3, r"[0-9][0-9][0-9]000", "153.230000\n", (4, 10)); |
| 135 | |
| 136 | // See: https://github.com/rust-lang/regex/issues/334 |
| 137 | // See: https://github.com/rust-lang/regex/issues/557 |
| 138 | mat!( |
| 139 | captures_after_dfa_premature_end1, |
| 140 | r"a(b*(X|$))?", |
| 141 | "abcbX", |
| 142 | Some((0, 1)), |
| 143 | None, |
| 144 | None |
| 145 | ); |
| 146 | mat!( |
| 147 | captures_after_dfa_premature_end2, |
| 148 | r"a(bc*(X|$))?", |
| 149 | "abcbX", |
| 150 | Some((0, 1)), |
| 151 | None, |
| 152 | None |
| 153 | ); |
| 154 | mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", Some((0, 0))); |
| 155 | |
| 156 | // See: https://github.com/rust-lang/regex/issues/437 |
| 157 | ismatch!( |
| 158 | literal_panic, |
| 159 | r"typename type\-parameter\-[0-9]+\-[0-9]+::.+", |
| 160 | "test", |
| 161 | false |
| 162 | ); |
| 163 | |
| 164 | // See: https://github.com/rust-lang/regex/issues/533 |
| 165 | ismatch!( |
| 166 | blank_matches_nothing_between_space_and_tab, |
| 167 | r"[[:blank:]]", |
| 168 | "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ |
| 169 | \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ |
| 170 | \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", |
| 171 | false |
| 172 | ); |
| 173 | |
| 174 | ismatch!( |
| 175 | inverted_blank_matches_everything_between_space_and_tab, |
| 176 | r"^[[:^blank:]]+$", |
| 177 | "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ |
| 178 | \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ |
| 179 | \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", |
| 180 | true |
| 181 | ); |
| 182 | |
| 183 | // Tests that our Aho-Corasick optimization works correctly. It only |
| 184 | // kicks in when we have >32 literals. By "works correctly," we mean that |
| 185 | // leftmost-first match semantics are properly respected. That is, samwise |
| 186 | // should match, not sam. |
| 187 | mat!( |
| 188 | ahocorasick1, |
| 189 | "samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|\ |
| 190 | A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z", |
| 191 | "samwise", |
| 192 | Some((0, 7)) |
| 193 | ); |
| 194 | |
| 195 | // See: https://github.com/BurntSushi/ripgrep/issues/1247 |
| 196 | #[test] |
| 197 | #[cfg(feature = "unicode-perl")] |
| 198 | fn regression_nfa_stops1() { |
| 199 | let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); |
| 200 | assert_eq!(0, re.find_iter(b"s\xE4").count()); |
| 201 | } |
| 202 | |
| 203 | // See: https://github.com/rust-lang/regex/issues/640 |
| 204 | #[cfg(feature = "unicode-case")] |
| 205 | matiter!( |
| 206 | flags_are_unset, |
| 207 | r"((?i)foo)|Bar", |
| 208 | "foo Foo bar Bar", |
| 209 | (0, 3), |
| 210 | (4, 7), |
| 211 | (12, 15) |
| 212 | ); |
Haibo Huang | 49cbe5f | 2020-05-28 20:14:24 -0700 | [diff] [blame^] | 213 | |
| 214 | // See: https://github.com/rust-lang/regex/issues/659 |
| 215 | // |
| 216 | // Note that 'Ј' is not 'j', but cyrillic Je |
| 217 | // https://en.wikipedia.org/wiki/Je_(Cyrillic) |
| 218 | ismatch!(empty_group_match, r"()Ј01", "zЈ01", true); |
| 219 | matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5)); |