blob: 293ac1ae7280d324684f92b09027be2d41b8e0e1 [file] [log] [blame]
Chih-Hung Hsiehe42c5052020-04-16 10:44:21 -07001mat!(ascii_literal, r"a", "a", Some((0, 1)));
2
3// Some crazy expressions from regular-expressions.info.
4mat!(
5 match_ranges,
6 r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
7 "num: 255",
8 Some((5, 8))
9);
10mat!(
11 match_ranges_not,
12 r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
13 "num: 256",
14 None
15);
16mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
17mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
18mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
19mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
20mat!(
21 match_email,
22 r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
23 "mine is jam.slam@gmail.com ",
24 Some((8, 26))
25);
26mat!(
27 match_email_not,
28 r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
29 "mine is jam.slam@gmail ",
30 None
31);
32mat!(
33 match_email_big,
34 r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
35 "mine is jam.slam@gmail.com ",
36 Some((8, 26))
37);
38mat!(
39 match_date1,
40 r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
41 "1900-01-01",
42 Some((0, 10))
43);
44mat!(
45 match_date2,
46 r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
47 "1900-00-01",
48 None
49);
50mat!(
51 match_date3,
52 r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
53 "1900-13-01",
54 None
55);
56
57// Do some crazy dancing with the start/end assertions.
58matiter!(match_start_end_empty, r"^$", "", (0, 0));
59matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
60matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
61matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
62matiter!(
63 match_start_end_empty_rep,
64 r"(?:^$)*",
65 "a\nb\nc",
66 (0, 0),
67 (1, 1),
68 (2, 2),
69 (3, 3),
70 (4, 4),
71 (5, 5)
72);
73matiter!(
74 match_start_end_empty_rep_rev,
75 r"(?:$^)*",
76 "a\nb\nc",
77 (0, 0),
78 (1, 1),
79 (2, 2),
80 (3, 3),
81 (4, 4),
82 (5, 5)
83);
84
85// Test negated character classes.
86mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
87mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
88mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3)));
89mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
90mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2)));
91mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3)));
92mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3)));
93mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
94
95// Test that repeated empty expressions don't loop forever.
96mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
97mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
98mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
99mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
100mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
101mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
102mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
103mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
104mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
105mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
106mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
107mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
108
109// Test that we handle various flavors of empty expressions.
110matiter!(match_empty1, r"", "", (0, 0));
111matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
112matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
113matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
114matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
115matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
116matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
117matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
118matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
119matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
120matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
Haibo Huang49cbe5f2020-05-28 20:14:24 -0700121matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
122matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
123matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
124matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
125matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
126matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
127matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
128matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
129matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
130matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
131matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
132matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
Chih-Hung Hsiehe42c5052020-04-16 10:44:21 -0700133
134// Test that the DFA can handle pathological cases.
135// (This should result in the DFA's cache being flushed too frequently, which
136// should cause it to quit and fall back to the NFA algorithm.)
137#[test]
138fn dfa_handles_pathological_case() {
139 fn ones_and_zeroes(count: usize) -> String {
Elliott Hughesffb60302021-04-01 17:11:40 -0700140 use rand::rngs::SmallRng;
141 use rand::{Rng, SeedableRng};
Chih-Hung Hsiehe42c5052020-04-16 10:44:21 -0700142
Elliott Hughesffb60302021-04-01 17:11:40 -0700143 let mut rng = SmallRng::from_entropy();
Chih-Hung Hsiehe42c5052020-04-16 10:44:21 -0700144 let mut s = String::new();
145 for _ in 0..count {
146 if rng.gen() {
147 s.push('1');
148 } else {
149 s.push('0');
150 }
151 }
152 s
153 }
154
155 let re = regex!(r"[01]*1[01]{20}$");
156 let text = {
157 let mut pieces = ones_and_zeroes(100_000);
158 pieces.push('1');
159 pieces.push_str(&ones_and_zeroes(20));
160 pieces
161 };
162 assert!(re.is_match(text!(&*text)));
163}
164
165#[test]
166fn nest_limit_makes_it_parse() {
167 use regex::RegexBuilder;
168
169 RegexBuilder::new(
170 r#"(?-u)
171 2(?:
172 [45]\d{3}|
173 7(?:
174 1[0-267]|
175 2[0-289]|
176 3[0-29]|
177 4[01]|
178 5[1-3]|
179 6[013]|
180 7[0178]|
181 91
182 )|
183 8(?:
184 0[125]|
185 [139][1-6]|
186 2[0157-9]|
187 41|
188 6[1-35]|
189 7[1-5]|
190 8[1-8]|
191 90
192 )|
193 9(?:
194 0[0-2]|
195 1[0-4]|
196 2[568]|
197 3[3-6]|
198 5[5-7]|
199 6[0167]|
200 7[15]|
201 8[0146-9]
202 )
203 )\d{4}|
204 3(?:
205 12?[5-7]\d{2}|
206 0(?:
207 2(?:
208 [025-79]\d|
209 [348]\d{1,2}
210 )|
211 3(?:
212 [2-4]\d|
213 [56]\d?
214 )
215 )|
216 2(?:
217 1\d{2}|
218 2(?:
219 [12]\d|
220 [35]\d{1,2}|
221 4\d?
222 )
223 )|
224 3(?:
225 1\d{2}|
226 2(?:
227 [2356]\d|
228 4\d{1,2}
229 )
230 )|
231 4(?:
232 1\d{2}|
233 2(?:
234 2\d{1,2}|
235 [47]|
236 5\d{2}
237 )
238 )|
239 5(?:
240 1\d{2}|
241 29
242 )|
243 [67]1\d{2}|
244 8(?:
245 1\d{2}|
246 2(?:
247 2\d{2}|
248 3|
249 4\d
250 )
251 )
252 )\d{3}|
253 4(?:
254 0(?:
255 2(?:
256 [09]\d|
257 7
258 )|
259 33\d{2}
260 )|
261 1\d{3}|
262 2(?:
263 1\d{2}|
264 2(?:
265 [25]\d?|
266 [348]\d|
267 [67]\d{1,2}
268 )
269 )|
270 3(?:
271 1\d{2}(?:
272 \d{2}
273 )?|
274 2(?:
275 [045]\d|
276 [236-9]\d{1,2}
277 )|
278 32\d{2}
279 )|
280 4(?:
281 [18]\d{2}|
282 2(?:
283 [2-46]\d{2}|
284 3
285 )|
286 5[25]\d{2}
287 )|
288 5(?:
289 1\d{2}|
290 2(?:
291 3\d|
292 5
293 )
294 )|
295 6(?:
296 [18]\d{2}|
297 2(?:
298 3(?:
299 \d{2}
300 )?|
301 [46]\d{1,2}|
302 5\d{2}|
303 7\d
304 )|
305 5(?:
306 3\d?|
307 4\d|
308 [57]\d{1,2}|
309 6\d{2}|
310 8
311 )
312 )|
313 71\d{2}|
314 8(?:
315 [18]\d{2}|
316 23\d{2}|
317 54\d{2}
318 )|
319 9(?:
320 [18]\d{2}|
321 2[2-5]\d{2}|
322 53\d{1,2}
323 )
324 )\d{3}|
325 5(?:
326 02[03489]\d{2}|
327 1\d{2}|
328 2(?:
329 1\d{2}|
330 2(?:
331 2(?:
332 \d{2}
333 )?|
334 [457]\d{2}
335 )
336 )|
337 3(?:
338 1\d{2}|
339 2(?:
340 [37](?:
341 \d{2}
342 )?|
343 [569]\d{2}
344 )
345 )|
346 4(?:
347 1\d{2}|
348 2[46]\d{2}
349 )|
350 5(?:
351 1\d{2}|
352 26\d{1,2}
353 )|
354 6(?:
355 [18]\d{2}|
356 2|
357 53\d{2}
358 )|
359 7(?:
360 1|
361 24
362 )\d{2}|
363 8(?:
364 1|
365 26
366 )\d{2}|
367 91\d{2}
368 )\d{3}|
369 6(?:
370 0(?:
371 1\d{2}|
372 2(?:
373 3\d{2}|
374 4\d{1,2}
375 )
376 )|
377 2(?:
378 2[2-5]\d{2}|
379 5(?:
380 [3-5]\d{2}|
381 7
382 )|
383 8\d{2}
384 )|
385 3(?:
386 1|
387 2[3478]
388 )\d{2}|
389 4(?:
390 1|
391 2[34]
392 )\d{2}|
393 5(?:
394 1|
395 2[47]
396 )\d{2}|
397 6(?:
398 [18]\d{2}|
399 6(?:
400 2(?:
401 2\d|
402 [34]\d{2}
403 )|
404 5(?:
405 [24]\d{2}|
406 3\d|
407 5\d{1,2}
408 )
409 )
410 )|
411 72[2-5]\d{2}|
412 8(?:
413 1\d{2}|
414 2[2-5]\d{2}
415 )|
416 9(?:
417 1\d{2}|
418 2[2-6]\d{2}
419 )
420 )\d{3}|
421 7(?:
422 (?:
423 02|
424 [3-589]1|
425 6[12]|
426 72[24]
427 )\d{2}|
428 21\d{3}|
429 32
430 )\d{3}|
431 8(?:
432 (?:
433 4[12]|
434 [5-7]2|
435 1\d?
436 )|
437 (?:
438 0|
439 3[12]|
440 [5-7]1|
441 217
442 )\d
443 )\d{4}|
444 9(?:
445 [35]1|
446 (?:
447 [024]2|
448 81
449 )\d|
450 (?:
451 1|
452 [24]1
453 )\d{2}
454 )\d{3}
455 "#,
456 )
457 .build()
458 .unwrap();
459}