blob: 6e16e7757f13c2e888e4f82b09fd865e83148527 [file] [log] [blame]
Alexander Gutkin0d4c5232013-02-28 13:47:27 +00001// Copyright 2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Comparative tester for regular expression matching.
6// Checks all implementations against each other.
7
8#ifndef RE2_TESTING_TESTER_H__
9#define RE2_TESTING_TESTER_H__
10
11#include "re2/stringpiece.h"
12#include "re2/prog.h"
13#include "re2/regexp.h"
14#include "re2/re2.h"
15#include "util/pcre.h"
16
17namespace re2 {
18
19class Regexp;
20
21// All the supported regexp engines.
22enum Engine {
23 kEngineBacktrack = 0, // Prog::BadSearchBacktrack
24 kEngineNFA, // Prog::SearchNFA
25 kEngineDFA, // Prog::SearchDFA, only ask whether it matched
26 kEngineDFA1, // Prog::SearchDFA, ask for match[0]
27 kEngineOnePass, // Prog::SearchOnePass, if applicable
28 kEngineBitState, // Prog::SearchBitState
29 kEngineRE2, // RE2, all submatches
30 kEngineRE2a, // RE2, only ask for match[0]
31 kEngineRE2b, // RE2, only ask whether it matched
32 kEnginePCRE, // PCRE (util/pcre.h)
33
34 kEngineMax,
35};
36
37// Make normal math on the enum preserve the type.
38// By default, C++ doesn't define ++ on enum, and e+1 has type int.
39static inline void operator++(Engine& e, int unused) {
40 e = static_cast<Engine>(e+1);
41}
42
43static inline Engine operator+(Engine e, int i) {
44 return static_cast<Engine>(static_cast<int>(e)+i);
45}
46
47// A TestInstance caches per-regexp state for a given
48// regular expression in a given configuration
49// (UTF-8 vs Latin1, longest vs first match, etc.).
50class TestInstance {
51 public:
52 struct Result;
53
54 TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
55 Regexp::ParseFlags flags);
56 ~TestInstance();
57 Regexp::ParseFlags flags() { return flags_; }
58 bool error() { return error_; }
59
60 // Runs a single test case: search in text, which is in context,
61 // using the given anchoring.
62 bool RunCase(const StringPiece& text, const StringPiece& context,
63 Prog::Anchor anchor);
64
65 private:
66 // Runs a single search using the named engine type.
67 void RunSearch(Engine type,
68 const StringPiece& text, const StringPiece& context,
69 Prog::Anchor anchor,
70 Result *result);
71
72 void LogMatch(const char* prefix, Engine e, const StringPiece& text,
73 const StringPiece& context, Prog::Anchor anchor);
74
75 const StringPiece& regexp_str_; // regexp being tested
76 Prog::MatchKind kind_; // kind of match
77 Regexp::ParseFlags flags_; // flags for parsing regexp_str_
78 bool error_; // error during constructor?
79
80 Regexp* regexp_; // parsed regexp
81 int num_captures_; // regexp_->NumCaptures() cached
82 Prog* prog_; // compiled program
83 Prog* rprog_; // compiled reverse program
84 PCRE* re_; // PCRE implementation
85 RE2* re2_; // RE2 implementation
86
87 DISALLOW_EVIL_CONSTRUCTORS(TestInstance);
88};
89
90// A group of TestInstances for all possible configurations.
91class Tester {
92 public:
93 explicit Tester(const StringPiece& regexp);
94 ~Tester();
95
96 bool error() { return error_; }
97
98 // Runs a single test case: search in text, which is in context,
99 // using the given anchoring.
100 bool TestCase(const StringPiece& text, const StringPiece& context,
101 Prog::Anchor anchor);
102
103 // Run TestCase(text, text, anchor) for all anchoring modes.
104 bool TestInput(const StringPiece& text);
105
106 // Run TestCase(text, context, anchor) for all anchoring modes.
107 bool TestInputInContext(const StringPiece& text, const StringPiece& context);
108
109 private:
110 bool error_;
111 vector<TestInstance*> v_;
112
113 DISALLOW_EVIL_CONSTRUCTORS(Tester);
114};
115
116// Run all possible tests using regexp and text.
117bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
118
119} // namespace re2
120
121#endif // RE2_TESTING_TESTER_H__