blob: ca7627f5d59b17d90a691bae770cc062ed99285c [file] [log] [blame]
Alexander Gutkin0d4c5232013-02-28 13:47:27 +00001// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Benchmarks for regular expression implementations.
6
7#include "util/test.h"
8#include "re2/prog.h"
9#include "re2/re2.h"
10#include "re2/regexp.h"
11#include "util/pcre.h"
12#include "util/benchmark.h"
13
14namespace re2 {
15void Test();
16void MemoryUsage();
17} // namespace re2
18
19typedef testing::MallocCounter MallocCounter;
20
21namespace re2 {
22
23void Test() {
24 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
25 CHECK(re);
26 Prog* prog = re->CompileToProg(0);
27 CHECK(prog);
28 CHECK(prog->IsOnePass());
29 const char* text = "650-253-0001";
30 StringPiece sp[4];
31 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
32 CHECK_EQ(sp[0], "650-253-0001");
33 CHECK_EQ(sp[1], "650");
34 CHECK_EQ(sp[2], "253");
35 CHECK_EQ(sp[3], "0001");
36 delete prog;
37 re->Decref();
38 LOG(INFO) << "test passed\n";
39}
40
41void MemoryUsage() {
42 const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
43 const char* text = "650-253-0001";
44 {
45 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
46 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
47 CHECK(re);
48 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
49 // because LOG(INFO) might do a big allocation before they get evaluated.
50 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
51 mc.Reset();
52
53 Prog* prog = re->CompileToProg(0);
54 CHECK(prog);
55 CHECK(prog->IsOnePass());
56 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
57 mc.Reset();
58
59 StringPiece sp[4];
60 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
61 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
62 delete prog;
63 re->Decref();
64 }
65
66 {
67 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
68
69 PCRE re(regexp, PCRE::UTF8);
70 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
71 PCRE::FullMatch(text, re);
72 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
73 }
74
75 {
76 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
77
78 PCRE* re = new PCRE(regexp, PCRE::UTF8);
79 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
80 PCRE::FullMatch(text, *re);
81 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
82 delete re;
83 }
84
85 {
86 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
87
88 RE2 re(regexp);
89 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
90 RE2::FullMatch(text, re);
91 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
92 }
93
94 fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n",
95 static_cast<int>(sizeof(PCRE)),
96 static_cast<int>(sizeof(RE2)),
97 static_cast<int>(sizeof(Prog)),
98 static_cast<int>(sizeof(Prog::Inst)));
99}
100
101// Regular expression implementation wrappers.
102// Defined at bottom of file, but they are repetitive
103// and not interesting.
104
105typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
106 Prog::Anchor anchor, bool expect_match);
107
108SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
109 SearchPCRE, SearchRE2,
110 SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
111 SearchCachedPCRE, SearchCachedRE2;
112
113typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
114
115ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
116 Parse1PCRE, Parse1RE2,
117 Parse1Backtrack,
118 Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
119 Parse1CachedPCRE, Parse1CachedRE2,
120 Parse1CachedBacktrack;
121
122ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
123 Parse3PCRE, Parse3RE2,
124 Parse3Backtrack,
125 Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
126 Parse3CachedPCRE, Parse3CachedRE2,
127 Parse3CachedBacktrack;
128
129ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
130
131ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
132
133// Benchmark: failed search for regexp in random text.
134
135// Generate random text that won't contain the search string,
136// to test worst-case search behavior.
137void MakeText(string* text, int nbytes) {
138 text->resize(nbytes);
139 srand(0);
140 for (int i = 0; i < nbytes; i++) {
141 if (!rand()%30)
142 (*text)[i] = '\n';
143 else
144 (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20;
145 }
146}
147
148// Makes text of size nbytes, then calls run to search
149// the text for regexp iters times.
150void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
151 StopBenchmarkTiming();
152 string s;
153 MakeText(&s, nbytes);
154 BenchmarkMemoryUsage();
155 StartBenchmarkTiming();
156 search(iters, regexp, s, Prog::kUnanchored, false);
157 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
158}
159
160// These two are easy because they start with an A,
161// giving the search loop something to memchr for.
162#define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
163#define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
164
165// This is a little harder, since it starts with a character class
166// and thus can't be memchr'ed. Could look for ABC and work backward,
167// but no one does that.
168#define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
169
170// This is a fair amount harder, because of the leading [ -~]*.
171// A bad backtracking implementation will take O(text^2) time to
172// figure out there's no match.
173#define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
174
175// This stresses engines that are trying to track parentheses.
176#define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
177 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
178
179void Search_Easy0_CachedDFA(int i, int n) { Search(i, n, EASY0, SearchCachedDFA); }
180void Search_Easy0_CachedNFA(int i, int n) { Search(i, n, EASY0, SearchCachedNFA); }
181void Search_Easy0_CachedPCRE(int i, int n) { Search(i, n, EASY0, SearchCachedPCRE); }
182void Search_Easy0_CachedRE2(int i, int n) { Search(i, n, EASY0, SearchCachedRE2); }
183
184BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
185BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
186#ifdef USEPCRE
187BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
188#endif
189BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
190
191void Search_Easy1_CachedDFA(int i, int n) { Search(i, n, EASY1, SearchCachedDFA); }
192void Search_Easy1_CachedNFA(int i, int n) { Search(i, n, EASY1, SearchCachedNFA); }
193void Search_Easy1_CachedPCRE(int i, int n) { Search(i, n, EASY1, SearchCachedPCRE); }
194void Search_Easy1_CachedRE2(int i, int n) { Search(i, n, EASY1, SearchCachedRE2); }
195
196BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
197BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
198#ifdef USEPCRE
199BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
200#endif
201BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
202
203void Search_Medium_CachedDFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedDFA); }
204void Search_Medium_CachedNFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedNFA); }
205void Search_Medium_CachedPCRE(int i, int n) { Search(i, n, MEDIUM, SearchCachedPCRE); }
206void Search_Medium_CachedRE2(int i, int n) { Search(i, n, MEDIUM, SearchCachedRE2); }
207
208BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
209BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
210#ifdef USEPCRE
211BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
212#endif
213BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
214
215void Search_Hard_CachedDFA(int i, int n) { Search(i, n, HARD, SearchCachedDFA); }
216void Search_Hard_CachedNFA(int i, int n) { Search(i, n, HARD, SearchCachedNFA); }
217void Search_Hard_CachedPCRE(int i, int n) { Search(i, n, HARD, SearchCachedPCRE); }
218void Search_Hard_CachedRE2(int i, int n) { Search(i, n, HARD, SearchCachedRE2); }
219
220BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
221BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
222#ifdef USEPCRE
223BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
224#endif
225BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
226
227void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCachedDFA); }
228void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCachedNFA); }
229void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCachedPCRE); }
230void Search_Parens_CachedRE2(int i, int n) { Search(i, n, PARENS, SearchCachedRE2); }
231
232BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
233BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
234#ifdef USEPCRE
235BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
236#endif
237BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
238
239void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
240 StopBenchmarkTiming();
241 string s;
242 s.append(nbytes/2, 'x');
243 string regexp = "^" + s + ".*$";
244 string t;
245 MakeText(&t, nbytes/2);
246 s += t;
247 BenchmarkMemoryUsage();
248 StartBenchmarkTiming();
249 search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
250 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
251}
252
253void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCachedDFA); }
254void Search_BigFixed_CachedNFA(int i, int n) { SearchBigFixed(i, n, SearchCachedNFA); }
255void Search_BigFixed_CachedPCRE(int i, int n) { SearchBigFixed(i, n, SearchCachedPCRE); }
256void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCachedRE2); }
257
258BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
259BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
260#ifdef USEPCRE
261BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
262#endif
263BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
264
265// Benchmark: FindAndConsume
266void FindAndConsume(int iters, int nbytes) {
267 StopBenchmarkTiming();
268 string s;
269 MakeText(&s, nbytes);
270 s.append("Hello World");
271 StartBenchmarkTiming();
272 RE2 re("((Hello World))");
273 for (int i = 0; i < iters; i++) {
274 StringPiece t = s;
275 StringPiece u;
276 CHECK(RE2::FindAndConsume(&t, re, &u));
277 CHECK_EQ(u, "Hello World");
278 }
279 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
280}
281
282BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
283
284// Benchmark: successful anchored search.
285
286void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
287 string s;
288 MakeText(&s, nbytes);
289 BenchmarkMemoryUsage();
290 search(iters, regexp, s, Prog::kAnchored, true);
291 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
292}
293
294// Unambiguous search (RE2 can use OnePass).
295
296void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); }
297void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
298void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); }
299void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); }
300
301BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
302#ifdef USEPCRE
303BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
304#endif
305BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
306BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
307
308void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
309void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
310void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
311void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
312
313BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
314#ifdef USEPCRE
315BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
316#endif
317BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
318BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
319
320// Ambiguous search (RE2 cannot use OnePass).
321
322void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchDFA); }
323void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchPCRE); }
324void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchRE2); }
325void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", SearchBitState); }
326
327BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
328#ifdef USEPCRE
329BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
330#endif
331BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
332BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
333
334void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
335void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
336void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
337
338BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
339#ifdef USEPCRE
340BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
341#endif
342BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
343
344// Benchmark: use regexp to find phone number.
345
346void SearchDigits(int iters, SearchImpl* search) {
347 const char *text = "650-253-0001";
348 int len = strlen(text);
349 BenchmarkMemoryUsage();
350 search(iters, "([0-9]+)-([0-9]+)-([0-9]+)",
351 StringPiece(text, len), Prog::kAnchored, true);
352 SetBenchmarkItemsProcessed(iters);
353}
354
355void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); }
356void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); }
357void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); }
358void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); }
359void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); }
360void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); }
361
362BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
363BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
364BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
365#ifdef USEPCRE
366BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
367#endif
368BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
369BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
370
371// Benchmark: use regexp to parse digit fields in phone number.
372
373void Parse3Digits(int iters,
374 void (*parse3)(int, const char*, const StringPiece&)) {
375 BenchmarkMemoryUsage();
376 parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
377 SetBenchmarkItemsProcessed(iters);
378}
379
380void Parse_Digits_NFA(int i) { Parse3Digits(i, Parse3NFA); }
381void Parse_Digits_OnePass(int i) { Parse3Digits(i, Parse3OnePass); }
382void Parse_Digits_PCRE(int i) { Parse3Digits(i, Parse3PCRE); }
383void Parse_Digits_RE2(int i) { Parse3Digits(i, Parse3RE2); }
384void Parse_Digits_Backtrack(int i) { Parse3Digits(i, Parse3Backtrack); }
385void Parse_Digits_BitState(int i) { Parse3Digits(i, Parse3BitState); }
386
387BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
388BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
389#ifdef USEPCRE
390BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
391#endif
392BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
393BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
394BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
395
396void Parse_CachedDigits_NFA(int i) { Parse3Digits(i, Parse3CachedNFA); }
397void Parse_CachedDigits_OnePass(int i) { Parse3Digits(i, Parse3CachedOnePass); }
398void Parse_CachedDigits_PCRE(int i) { Parse3Digits(i, Parse3CachedPCRE); }
399void Parse_CachedDigits_RE2(int i) { Parse3Digits(i, Parse3CachedRE2); }
400void Parse_CachedDigits_Backtrack(int i) { Parse3Digits(i, Parse3CachedBacktrack); }
401void Parse_CachedDigits_BitState(int i) { Parse3Digits(i, Parse3CachedBitState); }
402
403BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
404BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
405#ifdef USEPCRE
406BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
407#endif
408BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
409BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
410BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
411
412void Parse3DigitDs(int iters,
413 void (*parse3)(int, const char*, const StringPiece&)) {
414 BenchmarkMemoryUsage();
415 parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
416 SetBenchmarkItemsProcessed(iters);
417}
418
419void Parse_DigitDs_NFA(int i) { Parse3DigitDs(i, Parse3NFA); }
420void Parse_DigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3OnePass); }
421void Parse_DigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3PCRE); }
422void Parse_DigitDs_RE2(int i) { Parse3DigitDs(i, Parse3RE2); }
423void Parse_DigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
424void Parse_DigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
425
426BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
427BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
428#ifdef USEPCRE
429BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
430#endif
431BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
432BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
433BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
434
435void Parse_CachedDigitDs_NFA(int i) { Parse3DigitDs(i, Parse3CachedNFA); }
436void Parse_CachedDigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3CachedOnePass); }
437void Parse_CachedDigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3CachedPCRE); }
438void Parse_CachedDigitDs_RE2(int i) { Parse3DigitDs(i, Parse3CachedRE2); }
439void Parse_CachedDigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
440void Parse_CachedDigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
441
442BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
443BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
444#ifdef USEPCRE
445BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
446#endif
447BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
448BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
449BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
450
451// Benchmark: splitting off leading number field.
452
453void Parse1Split(int iters,
454 void (*parse1)(int, const char*, const StringPiece&)) {
455 BenchmarkMemoryUsage();
456 parse1(iters, "[0-9]+-(.*)", "650-253-0001");
457 SetBenchmarkItemsProcessed(iters);
458}
459
460void Parse_Split_NFA(int i) { Parse1Split(i, Parse1NFA); }
461void Parse_Split_OnePass(int i) { Parse1Split(i, Parse1OnePass); }
462void Parse_Split_PCRE(int i) { Parse1Split(i, Parse1PCRE); }
463void Parse_Split_RE2(int i) { Parse1Split(i, Parse1RE2); }
464void Parse_Split_BitState(int i) { Parse1Split(i, Parse1BitState); }
465
466BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
467BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
468#ifdef USEPCRE
469BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
470#endif
471BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
472BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
473
474void Parse_CachedSplit_NFA(int i) { Parse1Split(i, Parse1CachedNFA); }
475void Parse_CachedSplit_OnePass(int i) { Parse1Split(i, Parse1CachedOnePass); }
476void Parse_CachedSplit_PCRE(int i) { Parse1Split(i, Parse1CachedPCRE); }
477void Parse_CachedSplit_RE2(int i) { Parse1Split(i, Parse1CachedRE2); }
478void Parse_CachedSplit_BitState(int i) { Parse1Split(i, Parse1CachedBitState); }
479
480BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
481BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
482#ifdef USEPCRE
483BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
484#endif
485BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
486BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
487
488// Benchmark: splitting off leading number field but harder (ambiguous regexp).
489
490void Parse1SplitHard(int iters,
491 void (*run)(int, const char*, const StringPiece&)) {
492 BenchmarkMemoryUsage();
493 run(iters, "[0-9]+.(.*)", "650-253-0001");
494 SetBenchmarkItemsProcessed(iters);
495}
496
497void Parse_SplitHard_NFA(int i) { Parse1SplitHard(i, Parse1NFA); }
498void Parse_SplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1PCRE); }
499void Parse_SplitHard_RE2(int i) { Parse1SplitHard(i, Parse1RE2); }
500void Parse_SplitHard_BitState(int i) { Parse1SplitHard(i, Parse1BitState); }
501
502#ifdef USEPCRE
503BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
504#endif
505BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
506BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
507BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
508
509void Parse_CachedSplitHard_NFA(int i) { Parse1SplitHard(i, Parse1CachedNFA); }
510void Parse_CachedSplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1CachedPCRE); }
511void Parse_CachedSplitHard_RE2(int i) { Parse1SplitHard(i, Parse1CachedRE2); }
512void Parse_CachedSplitHard_BitState(int i) { Parse1SplitHard(i, Parse1CachedBitState); }
513void Parse_CachedSplitHard_Backtrack(int i) { Parse1SplitHard(i, Parse1CachedBacktrack); }
514
515#ifdef USEPCRE
516BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
517#endif
518BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
519BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
520BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
521BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
522
523// Benchmark: Parse1SplitHard, big text, small match.
524
525void Parse1SplitBig1(int iters,
526 void (*run)(int, const char*, const StringPiece&)) {
527 string s;
528 s.append(100000, 'x');
529 s.append("650-253-0001");
530 BenchmarkMemoryUsage();
531 run(iters, "[0-9]+.(.*)", s);
532 SetBenchmarkItemsProcessed(iters);
533}
534
535void Parse_CachedSplitBig1_PCRE(int i) { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
536void Parse_CachedSplitBig1_RE2(int i) { Parse1SplitBig1(i, SearchParse1CachedRE2); }
537
538#ifdef USEPCRE
539BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
540#endif
541BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
542
543// Benchmark: Parse1SplitHard, big text, big match.
544
545void Parse1SplitBig2(int iters,
546 void (*run)(int, const char*, const StringPiece&)) {
547 string s;
548 s.append("650-253-");
549 s.append(100000, '0');
550 BenchmarkMemoryUsage();
551 run(iters, "[0-9]+.(.*)", s);
552 SetBenchmarkItemsProcessed(iters);
553}
554
555void Parse_CachedSplitBig2_PCRE(int i) { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
556void Parse_CachedSplitBig2_RE2(int i) { Parse1SplitBig2(i, SearchParse1CachedRE2); }
557
558#ifdef USEPCRE
559BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
560#endif
561BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
562
563// Benchmark: measure time required to parse (but not execute)
564// a simple regular expression.
565
566void ParseRegexp(int iters, const string& regexp) {
567 for (int i = 0; i < iters; i++) {
568 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
569 CHECK(re);
570 re->Decref();
571 }
572}
573
574void SimplifyRegexp(int iters, const string& regexp) {
575 for (int i = 0; i < iters; i++) {
576 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
577 CHECK(re);
578 Regexp* sre = re->Simplify();
579 CHECK(sre);
580 sre->Decref();
581 re->Decref();
582 }
583}
584
585void NullWalkRegexp(int iters, const string& regexp) {
586 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
587 CHECK(re);
588 for (int i = 0; i < iters; i++) {
589 re->NullWalk();
590 }
591 re->Decref();
592}
593
594void SimplifyCompileRegexp(int iters, const string& regexp) {
595 for (int i = 0; i < iters; i++) {
596 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
597 CHECK(re);
598 Regexp* sre = re->Simplify();
599 CHECK(sre);
600 Prog* prog = sre->CompileToProg(0);
601 CHECK(prog);
602 delete prog;
603 sre->Decref();
604 re->Decref();
605 }
606}
607
608void CompileRegexp(int iters, const string& regexp) {
609 for (int i = 0; i < iters; i++) {
610 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
611 CHECK(re);
612 Prog* prog = re->CompileToProg(0);
613 CHECK(prog);
614 delete prog;
615 re->Decref();
616 }
617}
618
619void CompileToProg(int iters, const string& regexp) {
620 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
621 CHECK(re);
622 for (int i = 0; i < iters; i++) {
623 Prog* prog = re->CompileToProg(0);
624 CHECK(prog);
625 delete prog;
626 }
627 re->Decref();
628}
629
630void CompileByteMap(int iters, const string& regexp) {
631 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
632 CHECK(re);
633 Prog* prog = re->CompileToProg(0);
634 CHECK(prog);
635 for (int i = 0; i < iters; i++) {
636 prog->ComputeByteMap();
637 }
638 delete prog;
639 re->Decref();
640}
641
642void CompilePCRE(int iters, const string& regexp) {
643 for (int i = 0; i < iters; i++) {
644 PCRE re(regexp, PCRE::UTF8);
645 CHECK_EQ(re.error(), "");
646 }
647}
648
649void CompileRE2(int iters, const string& regexp) {
650 for (int i = 0; i < iters; i++) {
651 RE2 re(regexp);
652 CHECK_EQ(re.error(), "");
653 }
654}
655
656void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
657 run(iters, regexp);
658 SetBenchmarkItemsProcessed(iters);
659}
660
661} // namespace re2
662
663DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
664
665namespace re2 {
666
667void BM_PCRE_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
668void BM_Regexp_Parse(int i) { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
669void BM_Regexp_Simplify(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
670void BM_CompileToProg(int i) { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
671void BM_CompileByteMap(int i) { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
672void BM_Regexp_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
673void BM_Regexp_SimplifyCompile(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
674void BM_Regexp_NullWalk(int i) { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
675void BM_RE2_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
676
677#ifdef USEPCRE
678BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
679#endif
680BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
681BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
682BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
683BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
684BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
685BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
686BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
687BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
688
689
690// Makes text of size nbytes, then calls run to search
691// the text for regexp iters times.
692void SearchPhone(int iters, int nbytes, ParseImpl* search) {
693 StopBenchmarkTiming();
694 string s;
695 MakeText(&s, nbytes);
696 s.append("(650) 253-0001");
697 BenchmarkMemoryUsage();
698 StartBenchmarkTiming();
699 search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
700 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
701}
702
703void SearchPhone_CachedPCRE(int i, int n) {
704 SearchPhone(i, n, SearchParse2CachedPCRE);
705}
706void SearchPhone_CachedRE2(int i, int n) {
707 SearchPhone(i, n, SearchParse2CachedRE2);
708}
709
710#ifdef USEPCRE
711BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
712#endif
713BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
714
715/*
716TODO(rsc): Make this work again.
717
718// Generates and returns a string over binary alphabet {0,1} that contains
719// all possible binary sequences of length n as subsequences. The obvious
720// brute force method would generate a string of length n * 2^n, but this
721// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
722// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
723static string DeBruijnString(int n) {
724 CHECK_LT(n, 8*sizeof(int));
725 CHECK_GT(n, 0);
726
727 vector<bool> did(1<<n);
728 for (int i = 0; i < 1<<n; i++)
729 did[i] = false;
730
731 string s;
732 for (int i = 0; i < n-1; i++)
733 s.append("0");
734 int bits = 0;
735 int mask = (1<<n) - 1;
736 for (int i = 0; i < (1<<n); i++) {
737 bits <<= 1;
738 bits &= mask;
739 if (!did[bits|1]) {
740 bits |= 1;
741 s.append("1");
742 } else {
743 s.append("0");
744 }
745 CHECK(!did[bits]);
746 did[bits] = true;
747 }
748 return s;
749}
750
751void CacheFill(int iters, int n, SearchImpl *srch) {
752 string s = DeBruijnString(n+1);
753 string t;
754 for (int i = n+1; i < 20; i++) {
755 t = s + s;
756 swap(s, t);
757 }
758 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
759 Prog::kUnanchored, true);
760 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size());
761}
762
763void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
764void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
765void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
766void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
767
768// BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
769// for the static BenchmarkRegisterer, which makes it unusable inside
770// a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
771// to make the identifiers distinct (only possible when 'a' is a simple
772// expression like 2, not like 1+1).
773#define MY_BENCHMARK_WITH_ARG(n, a) \
774 bool __benchmark_ ## n ## a = \
775 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
776
777#define DO24(A, B) \
778 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
779 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
780 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
781 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
782
783DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
784DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
785DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
786DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
787
788#undef DO24
789#undef MY_BENCHMARK_WITH_ARG
790*/
791
792////////////////////////////////////////////////////////////////////////
793//
794// Implementation routines. Sad that there are so many,
795// but all the interfaces are slightly different.
796
797// Runs implementation to search for regexp in text, iters times.
798// Expect_match says whether the regexp should be found.
799// Anchored says whether to run an anchored search.
800
801void SearchDFA(int iters, const char* regexp, const StringPiece& text,
802 Prog::Anchor anchor, bool expect_match) {
803 for (int i = 0; i < iters; i++) {
804 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
805 CHECK(re);
806 Prog* prog = re->CompileToProg(0);
807 CHECK(prog);
808 bool failed = false;
809 CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch,
810 NULL, &failed, NULL),
811 expect_match);
812 CHECK(!failed);
813 delete prog;
814 re->Decref();
815 }
816}
817
818void SearchNFA(int iters, const char* regexp, const StringPiece& text,
819 Prog::Anchor anchor, bool expect_match) {
820 for (int i = 0; i < iters; i++) {
821 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
822 CHECK(re);
823 Prog* prog = re->CompileToProg(0);
824 CHECK(prog);
825 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
826 expect_match);
827 delete prog;
828 re->Decref();
829 }
830}
831
832void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
833 Prog::Anchor anchor, bool expect_match) {
834 for (int i = 0; i < iters; i++) {
835 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
836 CHECK(re);
837 Prog* prog = re->CompileToProg(0);
838 CHECK(prog);
839 CHECK(prog->IsOnePass());
840 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
841 expect_match);
842 delete prog;
843 re->Decref();
844 }
845}
846
847void SearchBitState(int iters, const char* regexp, const StringPiece& text,
848 Prog::Anchor anchor, bool expect_match) {
849 for (int i = 0; i < iters; i++) {
850 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
851 CHECK(re);
852 Prog* prog = re->CompileToProg(0);
853 CHECK(prog);
854 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
855 expect_match);
856 delete prog;
857 re->Decref();
858 }
859}
860
861void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
862 Prog::Anchor anchor, bool expect_match) {
863 for (int i = 0; i < iters; i++) {
864 PCRE re(regexp, PCRE::UTF8);
865 CHECK_EQ(re.error(), "");
866 if (anchor == Prog::kAnchored)
867 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
868 else
869 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
870 }
871}
872
873void SearchRE2(int iters, const char* regexp, const StringPiece& text,
874 Prog::Anchor anchor, bool expect_match) {
875 for (int i = 0; i < iters; i++) {
876 RE2 re(regexp);
877 CHECK_EQ(re.error(), "");
878 if (anchor == Prog::kAnchored)
879 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
880 else
881 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
882 }
883}
884
885// SearchCachedXXX is like SearchXXX but only does the
886// regexp parsing and compiling once. This lets us measure
887// search time without the per-regexp overhead.
888
889void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
890 Prog::Anchor anchor, bool expect_match) {
891 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
892 CHECK(re);
893 Prog* prog = re->CompileToProg(1LL<<31);
894 CHECK(prog);
895 for (int i = 0; i < iters; i++) {
896 bool failed = false;
897 CHECK_EQ(prog->SearchDFA(text, NULL, anchor,
898 Prog::kFirstMatch, NULL, &failed, NULL),
899 expect_match);
900 CHECK(!failed);
901 }
902 delete prog;
903 re->Decref();
904}
905
906void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
907 Prog::Anchor anchor, bool expect_match) {
908 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
909 CHECK(re);
910 Prog* prog = re->CompileToProg(0);
911 CHECK(prog);
912 for (int i = 0; i < iters; i++) {
913 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
914 expect_match);
915 }
916 delete prog;
917 re->Decref();
918}
919
920void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
921 Prog::Anchor anchor, bool expect_match) {
922 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
923 CHECK(re);
924 Prog* prog = re->CompileToProg(0);
925 CHECK(prog);
926 CHECK(prog->IsOnePass());
927 for (int i = 0; i < iters; i++)
928 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
929 expect_match);
930 delete prog;
931 re->Decref();
932}
933
934void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
935 Prog::Anchor anchor, bool expect_match) {
936 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
937 CHECK(re);
938 Prog* prog = re->CompileToProg(0);
939 CHECK(prog);
940 for (int i = 0; i < iters; i++)
941 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
942 expect_match);
943 delete prog;
944 re->Decref();
945}
946
947void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
948 Prog::Anchor anchor, bool expect_match) {
949 PCRE re(regexp, PCRE::UTF8);
950 CHECK_EQ(re.error(), "");
951 for (int i = 0; i < iters; i++) {
952 if (anchor == Prog::kAnchored)
953 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
954 else
955 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
956 }
957}
958
959void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
960 Prog::Anchor anchor, bool expect_match) {
961 RE2 re(regexp);
962 CHECK_EQ(re.error(), "");
963 for (int i = 0; i < iters; i++) {
964 if (anchor == Prog::kAnchored)
965 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
966 else
967 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
968 }
969}
970
971
972// Runs implementation to full match regexp against text,
973// extracting three submatches. Expects match always.
974
975void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
976 for (int i = 0; i < iters; i++) {
977 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
978 CHECK(re);
979 Prog* prog = re->CompileToProg(0);
980 CHECK(prog);
981 StringPiece sp[4]; // 4 because sp[0] is whole match.
982 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
983 delete prog;
984 re->Decref();
985 }
986}
987
988void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
989 for (int i = 0; i < iters; i++) {
990 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
991 CHECK(re);
992 Prog* prog = re->CompileToProg(0);
993 CHECK(prog);
994 CHECK(prog->IsOnePass());
995 StringPiece sp[4]; // 4 because sp[0] is whole match.
996 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
997 delete prog;
998 re->Decref();
999 }
1000}
1001
1002void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
1003 for (int i = 0; i < iters; i++) {
1004 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1005 CHECK(re);
1006 Prog* prog = re->CompileToProg(0);
1007 CHECK(prog);
1008 StringPiece sp[4]; // 4 because sp[0] is whole match.
1009 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1010 delete prog;
1011 re->Decref();
1012 }
1013}
1014
1015void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
1016 for (int i = 0; i < iters; i++) {
1017 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1018 CHECK(re);
1019 Prog* prog = re->CompileToProg(0);
1020 CHECK(prog);
1021 StringPiece sp[4]; // 4 because sp[0] is whole match.
1022 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1023 delete prog;
1024 re->Decref();
1025 }
1026}
1027
1028void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
1029 for (int i = 0; i < iters; i++) {
1030 PCRE re(regexp, PCRE::UTF8);
1031 CHECK_EQ(re.error(), "");
1032 StringPiece sp1, sp2, sp3;
1033 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1034 }
1035}
1036
1037void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
1038 for (int i = 0; i < iters; i++) {
1039 RE2 re(regexp);
1040 CHECK_EQ(re.error(), "");
1041 StringPiece sp1, sp2, sp3;
1042 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1043 }
1044}
1045
1046void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1047 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1048 CHECK(re);
1049 Prog* prog = re->CompileToProg(0);
1050 CHECK(prog);
1051 StringPiece sp[4]; // 4 because sp[0] is whole match.
1052 for (int i = 0; i < iters; i++) {
1053 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1054 }
1055 delete prog;
1056 re->Decref();
1057}
1058
1059void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1060 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1061 CHECK(re);
1062 Prog* prog = re->CompileToProg(0);
1063 CHECK(prog);
1064 CHECK(prog->IsOnePass());
1065 StringPiece sp[4]; // 4 because sp[0] is whole match.
1066 for (int i = 0; i < iters; i++)
1067 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1068 delete prog;
1069 re->Decref();
1070}
1071
1072void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1073 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1074 CHECK(re);
1075 Prog* prog = re->CompileToProg(0);
1076 CHECK(prog);
1077 StringPiece sp[4]; // 4 because sp[0] is whole match.
1078 for (int i = 0; i < iters; i++)
1079 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1080 delete prog;
1081 re->Decref();
1082}
1083
1084void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1085 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1086 CHECK(re);
1087 Prog* prog = re->CompileToProg(0);
1088 CHECK(prog);
1089 StringPiece sp[4]; // 4 because sp[0] is whole match.
1090 for (int i = 0; i < iters; i++)
1091 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1092 delete prog;
1093 re->Decref();
1094}
1095
1096void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1097 PCRE re(regexp, PCRE::UTF8);
1098 CHECK_EQ(re.error(), "");
1099 StringPiece sp1, sp2, sp3;
1100 for (int i = 0; i < iters; i++) {
1101 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1102 }
1103}
1104
1105void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1106 RE2 re(regexp);
1107 CHECK_EQ(re.error(), "");
1108 StringPiece sp1, sp2, sp3;
1109 for (int i = 0; i < iters; i++) {
1110 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1111 }
1112}
1113
1114
1115// Runs implementation to full match regexp against text,
1116// extracting three submatches. Expects match always.
1117
1118void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
1119 for (int i = 0; i < iters; i++) {
1120 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1121 CHECK(re);
1122 Prog* prog = re->CompileToProg(0);
1123 CHECK(prog);
1124 StringPiece sp[2]; // 2 because sp[0] is whole match.
1125 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1126 delete prog;
1127 re->Decref();
1128 }
1129}
1130
1131void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
1132 for (int i = 0; i < iters; i++) {
1133 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1134 CHECK(re);
1135 Prog* prog = re->CompileToProg(0);
1136 CHECK(prog);
1137 CHECK(prog->IsOnePass());
1138 StringPiece sp[2]; // 2 because sp[0] is whole match.
1139 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1140 delete prog;
1141 re->Decref();
1142 }
1143}
1144
1145void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
1146 for (int i = 0; i < iters; i++) {
1147 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1148 CHECK(re);
1149 Prog* prog = re->CompileToProg(0);
1150 CHECK(prog);
1151 StringPiece sp[2]; // 2 because sp[0] is whole match.
1152 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1153 delete prog;
1154 re->Decref();
1155 }
1156}
1157
1158void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
1159 for (int i = 0; i < iters; i++) {
1160 PCRE re(regexp, PCRE::UTF8);
1161 CHECK_EQ(re.error(), "");
1162 StringPiece sp1;
1163 CHECK(PCRE::FullMatch(text, re, &sp1));
1164 }
1165}
1166
1167void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
1168 for (int i = 0; i < iters; i++) {
1169 RE2 re(regexp);
1170 CHECK_EQ(re.error(), "");
1171 StringPiece sp1;
1172 CHECK(RE2::FullMatch(text, re, &sp1));
1173 }
1174}
1175
1176void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1177 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1178 CHECK(re);
1179 Prog* prog = re->CompileToProg(0);
1180 CHECK(prog);
1181 StringPiece sp[2]; // 2 because sp[0] is whole match.
1182 for (int i = 0; i < iters; i++) {
1183 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1184 }
1185 delete prog;
1186 re->Decref();
1187}
1188
1189void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1190 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1191 CHECK(re);
1192 Prog* prog = re->CompileToProg(0);
1193 CHECK(prog);
1194 CHECK(prog->IsOnePass());
1195 StringPiece sp[2]; // 2 because sp[0] is whole match.
1196 for (int i = 0; i < iters; i++)
1197 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1198 delete prog;
1199 re->Decref();
1200}
1201
1202void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1203 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1204 CHECK(re);
1205 Prog* prog = re->CompileToProg(0);
1206 CHECK(prog);
1207 StringPiece sp[2]; // 2 because sp[0] is whole match.
1208 for (int i = 0; i < iters; i++)
1209 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1210 delete prog;
1211 re->Decref();
1212}
1213
1214void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1215 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1216 CHECK(re);
1217 Prog* prog = re->CompileToProg(0);
1218 CHECK(prog);
1219 StringPiece sp[2]; // 2 because sp[0] is whole match.
1220 for (int i = 0; i < iters; i++)
1221 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1222 delete prog;
1223 re->Decref();
1224}
1225
1226void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1227 PCRE re(regexp, PCRE::UTF8);
1228 CHECK_EQ(re.error(), "");
1229 StringPiece sp1;
1230 for (int i = 0; i < iters; i++) {
1231 CHECK(PCRE::FullMatch(text, re, &sp1));
1232 }
1233}
1234
1235void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1236 RE2 re(regexp);
1237 CHECK_EQ(re.error(), "");
1238 StringPiece sp1;
1239 for (int i = 0; i < iters; i++) {
1240 CHECK(RE2::FullMatch(text, re, &sp1));
1241 }
1242}
1243
1244void SearchParse2CachedPCRE(int iters, const char* regexp,
1245 const StringPiece& text) {
1246 PCRE re(regexp, PCRE::UTF8);
1247 CHECK_EQ(re.error(), "");
1248 for (int i = 0; i < iters; i++) {
1249 StringPiece sp1, sp2;
1250 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1251 }
1252}
1253
1254void SearchParse2CachedRE2(int iters, const char* regexp,
1255 const StringPiece& text) {
1256 RE2 re(regexp);
1257 CHECK_EQ(re.error(), "");
1258 for (int i = 0; i < iters; i++) {
1259 StringPiece sp1, sp2;
1260 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1261 }
1262}
1263
1264void SearchParse1CachedPCRE(int iters, const char* regexp,
1265 const StringPiece& text) {
1266 PCRE re(regexp, PCRE::UTF8);
1267 CHECK_EQ(re.error(), "");
1268 for (int i = 0; i < iters; i++) {
1269 StringPiece sp1;
1270 CHECK(PCRE::PartialMatch(text, re, &sp1));
1271 }
1272}
1273
1274void SearchParse1CachedRE2(int iters, const char* regexp,
1275 const StringPiece& text) {
1276 RE2 re(regexp);
1277 CHECK_EQ(re.error(), "");
1278 for (int i = 0; i < iters; i++) {
1279 StringPiece sp1;
1280 CHECK(RE2::PartialMatch(text, re, &sp1));
1281 }
1282}
1283
1284void EmptyPartialMatchPCRE(int n) {
1285 PCRE re("");
1286 for (int i = 0; i < n; i++) {
1287 PCRE::PartialMatch("", re);
1288 }
1289}
1290
1291void EmptyPartialMatchRE2(int n) {
1292 RE2 re("");
1293 for (int i = 0; i < n; i++) {
1294 RE2::PartialMatch("", re);
1295 }
1296}
1297#ifdef USEPCRE
1298BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1299#endif
1300BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1301
1302void SimplePartialMatchPCRE(int n) {
1303 PCRE re("abcdefg");
1304 for (int i = 0; i < n; i++) {
1305 PCRE::PartialMatch("abcdefg", re);
1306 }
1307}
1308
1309void SimplePartialMatchRE2(int n) {
1310 RE2 re("abcdefg");
1311 for (int i = 0; i < n; i++) {
1312 RE2::PartialMatch("abcdefg", re);
1313 }
1314}
1315#ifdef USEPCRE
1316BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1317#endif
1318BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1319
1320static string http_text =
1321 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1322 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1323
1324void HTTPPartialMatchPCRE(int n) {
1325 StringPiece a;
1326 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1327 for (int i = 0; i < n; i++) {
1328 PCRE::PartialMatch(http_text, re, &a);
1329 }
1330}
1331
1332void HTTPPartialMatchRE2(int n) {
1333 StringPiece a;
1334 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1335 for (int i = 0; i < n; i++) {
1336 RE2::PartialMatch(http_text, re, &a);
1337 }
1338}
1339
1340#ifdef USEPCRE
1341BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1342#endif
1343BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1344
1345static string http_smalltext =
1346 "GET /abc HTTP/1.1";
1347
1348void SmallHTTPPartialMatchPCRE(int n) {
1349 StringPiece a;
1350 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1351 for (int i = 0; i < n; i++) {
1352 PCRE::PartialMatch(http_text, re, &a);
1353 }
1354}
1355
1356void SmallHTTPPartialMatchRE2(int n) {
1357 StringPiece a;
1358 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1359 for (int i = 0; i < n; i++) {
1360 RE2::PartialMatch(http_text, re, &a);
1361 }
1362}
1363
1364#ifdef USEPCRE
1365BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1366#endif
1367BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1368
1369void DotMatchPCRE(int n) {
1370 StringPiece a;
1371 PCRE re("(?-s)^(.+)");
1372 for (int i = 0; i < n; i++) {
1373 PCRE::PartialMatch(http_text, re, &a);
1374 }
1375}
1376
1377void DotMatchRE2(int n) {
1378 StringPiece a;
1379 RE2 re("(?-s)^(.+)");
1380 for (int i = 0; i < n; i++) {
1381 RE2::PartialMatch(http_text, re, &a);
1382 }
1383}
1384
1385#ifdef USEPCRE
1386BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1387#endif
1388BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1389
1390void ASCIIMatchPCRE(int n) {
1391 StringPiece a;
1392 PCRE re("(?-s)^([ -~]+)");
1393 for (int i = 0; i < n; i++) {
1394 PCRE::PartialMatch(http_text, re, &a);
1395 }
1396}
1397
1398void ASCIIMatchRE2(int n) {
1399 StringPiece a;
1400 RE2 re("(?-s)^([ -~]+)");
1401 for (int i = 0; i < n; i++) {
1402 RE2::PartialMatch(http_text, re, &a);
1403 }
1404}
1405
1406#ifdef USEPCRE
1407BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1408#endif
1409BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1410
1411void FullMatchPCRE(int iter, int n, const char *regexp) {
1412 StopBenchmarkTiming();
1413 string s;
1414 MakeText(&s, n);
1415 s += "ABCDEFGHIJ";
1416 BenchmarkMemoryUsage();
1417 PCRE re(regexp);
1418 StartBenchmarkTiming();
1419 for (int i = 0; i < iter; i++)
1420 CHECK(PCRE::FullMatch(s, re));
1421 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1422}
1423
1424void FullMatchRE2(int iter, int n, const char *regexp) {
1425 StopBenchmarkTiming();
1426 string s;
1427 MakeText(&s, n);
1428 s += "ABCDEFGHIJ";
1429 BenchmarkMemoryUsage();
1430 RE2 re(regexp, RE2::Latin1);
1431 StartBenchmarkTiming();
1432 for (int i = 0; i < iter; i++)
1433 CHECK(RE2::FullMatch(s, re));
1434 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1435}
1436
1437void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
1438void FullMatch_DotStar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*"); }
1439
1440void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
1441void FullMatch_DotStarDollar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*$"); }
1442
1443void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
1444void FullMatch_DotStarCapture_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
1445
1446#ifdef USEPCRE
1447BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1448#endif
1449BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
1450
1451#ifdef USEPCRE
1452BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1453#endif
1454BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
1455
1456#ifdef USEPCRE
1457BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1458#endif
1459BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
1460
1461} // namespace re2