blob: c5fec5b3e4a884b03a5717653864499fa7730bb2 [file] [log] [blame]
Alexander Gutkin0d4c5232013-02-28 13:47:27 +00001// Copyright 2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Exhaustive testing of regular expression matching.
6
7#include "util/test.h"
8#include "re2/re2.h"
9#include "re2/testing/exhaustive_tester.h"
10
11DECLARE_string(regexp_engines);
12
13namespace re2 {
14
15// Test empty string matches (aka "(?:)")
16TEST(EmptyString, Exhaustive) {
17 ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
18 RegexpGenerator::EgrepOps(),
19 5, Split("", "ab"), "", "");
20}
21
22// Test escaped versions of regexp syntax.
23TEST(Punctuation, Literals) {
24 vector<string> alphabet = Explode("()*+?{}[]\\^$.");
25 vector<string> escaped = alphabet;
26 for (int i = 0; i < escaped.size(); i++)
27 escaped[i] = "\\" + escaped[i];
28 ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
29 2, alphabet, "", "");
30}
31
32// Test ^ $ . \A \z in presence of line endings.
33// Have to wrap the empty-width ones in (?:) so that
34// they can be repeated -- PCRE rejects ^* but allows (?:^)*
35TEST(LineEnds, Exhaustive) {
36 ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
37 RegexpGenerator::EgrepOps(),
38 4, Explode("ab\n"), "", "");
39}
40
41// Test what does and does not match \n.
42// This would be a good test, except that PCRE seems to have a bug:
43// in single-byte character set mode (the default),
44// [^a] matches \n, but in UTF-8 mode it does not.
45// So when we run the test, the tester complains that
46// we don't agree with PCRE, but it's PCRE that is at fault.
47// For what it's worth, Perl gets this right (matches
48// regardless of whether UTF-8 input is selected):
49//
50// #!/usr/bin/perl
51// use POSIX qw(locale_h);
52// print "matches in latin1\n" if "\n" =~ /[^a]/;
53// setlocale("en_US.utf8");
54// print "matches in utf8\n" if "\n" =~ /[^a]/;
55//
56// The rule chosen for RE2 is that by default, like Perl,
57// dot does not match \n but negated character classes [^a] do.
58// (?s) will allow dot to match \n; there is no way in RE2
59// to stop [^a] from matching \n, though the underlying library
60// provides a mechanism, and RE2 could add new syntax if needed.
61//
62// TEST(Newlines, Exhaustive) {
63// vector<string> empty_vector;
64// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
65// RegexpGenerator::EgrepOps(),
66// 4, Explode("a\n"), "");
67// }
68
69} // namespace re2
70