Alexander Gutkin | 0d4c523 | 2013-02-28 13:47:27 +0000 | [diff] [blame] | 1 | // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Benchmarks for regular expression implementations. |
| 6 | |
| 7 | #include "util/test.h" |
| 8 | #include "re2/prog.h" |
| 9 | #include "re2/re2.h" |
| 10 | #include "re2/regexp.h" |
| 11 | #include "util/pcre.h" |
| 12 | #include "util/benchmark.h" |
| 13 | |
| 14 | namespace re2 { |
| 15 | void Test(); |
| 16 | void MemoryUsage(); |
| 17 | } // namespace re2 |
| 18 | |
| 19 | typedef testing::MallocCounter MallocCounter; |
| 20 | |
| 21 | namespace re2 { |
| 22 | |
| 23 | void Test() { |
| 24 | Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL); |
| 25 | CHECK(re); |
| 26 | Prog* prog = re->CompileToProg(0); |
| 27 | CHECK(prog); |
| 28 | CHECK(prog->IsOnePass()); |
| 29 | const char* text = "650-253-0001"; |
| 30 | StringPiece sp[4]; |
| 31 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 32 | CHECK_EQ(sp[0], "650-253-0001"); |
| 33 | CHECK_EQ(sp[1], "650"); |
| 34 | CHECK_EQ(sp[2], "253"); |
| 35 | CHECK_EQ(sp[3], "0001"); |
| 36 | delete prog; |
| 37 | re->Decref(); |
| 38 | LOG(INFO) << "test passed\n"; |
| 39 | } |
| 40 | |
| 41 | void MemoryUsage() { |
| 42 | const char* regexp = "(\\d+)-(\\d+)-(\\d+)"; |
| 43 | const char* text = "650-253-0001"; |
| 44 | { |
| 45 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
| 46 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 47 | CHECK(re); |
| 48 | // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly, |
| 49 | // because LOG(INFO) might do a big allocation before they get evaluated. |
| 50 | fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 51 | mc.Reset(); |
| 52 | |
| 53 | Prog* prog = re->CompileToProg(0); |
| 54 | CHECK(prog); |
| 55 | CHECK(prog->IsOnePass()); |
| 56 | fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 57 | mc.Reset(); |
| 58 | |
| 59 | StringPiece sp[4]; |
| 60 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 61 | fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 62 | delete prog; |
| 63 | re->Decref(); |
| 64 | } |
| 65 | |
| 66 | { |
| 67 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
| 68 | |
| 69 | PCRE re(regexp, PCRE::UTF8); |
| 70 | fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 71 | PCRE::FullMatch(text, re); |
| 72 | fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 73 | } |
| 74 | |
| 75 | { |
| 76 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
| 77 | |
| 78 | PCRE* re = new PCRE(regexp, PCRE::UTF8); |
| 79 | fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 80 | PCRE::FullMatch(text, *re); |
| 81 | fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 82 | delete re; |
| 83 | } |
| 84 | |
| 85 | { |
| 86 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
| 87 | |
| 88 | RE2 re(regexp); |
| 89 | fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 90 | RE2::FullMatch(text, re); |
| 91 | fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); |
| 92 | } |
| 93 | |
| 94 | fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n", |
| 95 | static_cast<int>(sizeof(PCRE)), |
| 96 | static_cast<int>(sizeof(RE2)), |
| 97 | static_cast<int>(sizeof(Prog)), |
| 98 | static_cast<int>(sizeof(Prog::Inst))); |
| 99 | } |
| 100 | |
| 101 | // Regular expression implementation wrappers. |
| 102 | // Defined at bottom of file, but they are repetitive |
| 103 | // and not interesting. |
| 104 | |
| 105 | typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text, |
| 106 | Prog::Anchor anchor, bool expect_match); |
| 107 | |
| 108 | SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, |
| 109 | SearchPCRE, SearchRE2, |
| 110 | SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState, |
| 111 | SearchCachedPCRE, SearchCachedRE2; |
| 112 | |
| 113 | typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text); |
| 114 | |
| 115 | ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, |
| 116 | Parse1PCRE, Parse1RE2, |
| 117 | Parse1Backtrack, |
| 118 | Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState, |
| 119 | Parse1CachedPCRE, Parse1CachedRE2, |
| 120 | Parse1CachedBacktrack; |
| 121 | |
| 122 | ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, |
| 123 | Parse3PCRE, Parse3RE2, |
| 124 | Parse3Backtrack, |
| 125 | Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState, |
| 126 | Parse3CachedPCRE, Parse3CachedRE2, |
| 127 | Parse3CachedBacktrack; |
| 128 | |
| 129 | ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2; |
| 130 | |
| 131 | ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2; |
| 132 | |
| 133 | // Benchmark: failed search for regexp in random text. |
| 134 | |
| 135 | // Generate random text that won't contain the search string, |
| 136 | // to test worst-case search behavior. |
| 137 | void MakeText(string* text, int nbytes) { |
| 138 | text->resize(nbytes); |
| 139 | srand(0); |
| 140 | for (int i = 0; i < nbytes; i++) { |
| 141 | if (!rand()%30) |
| 142 | (*text)[i] = '\n'; |
| 143 | else |
| 144 | (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20; |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | // Makes text of size nbytes, then calls run to search |
| 149 | // the text for regexp iters times. |
| 150 | void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { |
| 151 | StopBenchmarkTiming(); |
| 152 | string s; |
| 153 | MakeText(&s, nbytes); |
| 154 | BenchmarkMemoryUsage(); |
| 155 | StartBenchmarkTiming(); |
| 156 | search(iters, regexp, s, Prog::kUnanchored, false); |
| 157 | SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); |
| 158 | } |
| 159 | |
| 160 | // These two are easy because they start with an A, |
| 161 | // giving the search loop something to memchr for. |
| 162 | #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" |
| 163 | #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" |
| 164 | |
| 165 | // This is a little harder, since it starts with a character class |
| 166 | // and thus can't be memchr'ed. Could look for ABC and work backward, |
| 167 | // but no one does that. |
| 168 | #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" |
| 169 | |
| 170 | // This is a fair amount harder, because of the leading [ -~]*. |
| 171 | // A bad backtracking implementation will take O(text^2) time to |
| 172 | // figure out there's no match. |
| 173 | #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" |
| 174 | |
| 175 | // This stresses engines that are trying to track parentheses. |
| 176 | #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \ |
| 177 | "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" |
| 178 | |
| 179 | void Search_Easy0_CachedDFA(int i, int n) { Search(i, n, EASY0, SearchCachedDFA); } |
| 180 | void Search_Easy0_CachedNFA(int i, int n) { Search(i, n, EASY0, SearchCachedNFA); } |
| 181 | void Search_Easy0_CachedPCRE(int i, int n) { Search(i, n, EASY0, SearchCachedPCRE); } |
| 182 | void Search_Easy0_CachedRE2(int i, int n) { Search(i, n, EASY0, SearchCachedRE2); } |
| 183 | |
| 184 | BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 185 | BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
| 186 | #ifdef USEPCRE |
| 187 | BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 188 | #endif |
| 189 | BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 190 | |
| 191 | void Search_Easy1_CachedDFA(int i, int n) { Search(i, n, EASY1, SearchCachedDFA); } |
| 192 | void Search_Easy1_CachedNFA(int i, int n) { Search(i, n, EASY1, SearchCachedNFA); } |
| 193 | void Search_Easy1_CachedPCRE(int i, int n) { Search(i, n, EASY1, SearchCachedPCRE); } |
| 194 | void Search_Easy1_CachedRE2(int i, int n) { Search(i, n, EASY1, SearchCachedRE2); } |
| 195 | |
| 196 | BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 197 | BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
| 198 | #ifdef USEPCRE |
| 199 | BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 200 | #endif |
| 201 | BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 202 | |
| 203 | void Search_Medium_CachedDFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedDFA); } |
| 204 | void Search_Medium_CachedNFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedNFA); } |
| 205 | void Search_Medium_CachedPCRE(int i, int n) { Search(i, n, MEDIUM, SearchCachedPCRE); } |
| 206 | void Search_Medium_CachedRE2(int i, int n) { Search(i, n, MEDIUM, SearchCachedRE2); } |
| 207 | |
| 208 | BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 209 | BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
| 210 | #ifdef USEPCRE |
| 211 | BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
| 212 | #endif |
| 213 | BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 214 | |
| 215 | void Search_Hard_CachedDFA(int i, int n) { Search(i, n, HARD, SearchCachedDFA); } |
| 216 | void Search_Hard_CachedNFA(int i, int n) { Search(i, n, HARD, SearchCachedNFA); } |
| 217 | void Search_Hard_CachedPCRE(int i, int n) { Search(i, n, HARD, SearchCachedPCRE); } |
| 218 | void Search_Hard_CachedRE2(int i, int n) { Search(i, n, HARD, SearchCachedRE2); } |
| 219 | |
| 220 | BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 221 | BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
| 222 | #ifdef USEPCRE |
| 223 | BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs()); |
| 224 | #endif |
| 225 | BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 226 | |
| 227 | void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCachedDFA); } |
| 228 | void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCachedNFA); } |
| 229 | void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCachedPCRE); } |
| 230 | void Search_Parens_CachedRE2(int i, int n) { Search(i, n, PARENS, SearchCachedRE2); } |
| 231 | |
| 232 | BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 233 | BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
| 234 | #ifdef USEPCRE |
| 235 | BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs()); |
| 236 | #endif |
| 237 | BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 238 | |
| 239 | void SearchBigFixed(int iters, int nbytes, SearchImpl* search) { |
| 240 | StopBenchmarkTiming(); |
| 241 | string s; |
| 242 | s.append(nbytes/2, 'x'); |
| 243 | string regexp = "^" + s + ".*$"; |
| 244 | string t; |
| 245 | MakeText(&t, nbytes/2); |
| 246 | s += t; |
| 247 | BenchmarkMemoryUsage(); |
| 248 | StartBenchmarkTiming(); |
| 249 | search(iters, regexp.c_str(), s, Prog::kUnanchored, true); |
| 250 | SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); |
| 251 | } |
| 252 | |
| 253 | void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCachedDFA); } |
| 254 | void Search_BigFixed_CachedNFA(int i, int n) { SearchBigFixed(i, n, SearchCachedNFA); } |
| 255 | void Search_BigFixed_CachedPCRE(int i, int n) { SearchBigFixed(i, n, SearchCachedPCRE); } |
| 256 | void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCachedRE2); } |
| 257 | |
| 258 | BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs()); |
| 259 | BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs()); |
| 260 | #ifdef USEPCRE |
| 261 | BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs()); |
| 262 | #endif |
| 263 | BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs()); |
| 264 | |
| 265 | // Benchmark: FindAndConsume |
| 266 | void FindAndConsume(int iters, int nbytes) { |
| 267 | StopBenchmarkTiming(); |
| 268 | string s; |
| 269 | MakeText(&s, nbytes); |
| 270 | s.append("Hello World"); |
| 271 | StartBenchmarkTiming(); |
| 272 | RE2 re("((Hello World))"); |
| 273 | for (int i = 0; i < iters; i++) { |
| 274 | StringPiece t = s; |
| 275 | StringPiece u; |
| 276 | CHECK(RE2::FindAndConsume(&t, re, &u)); |
| 277 | CHECK_EQ(u, "Hello World"); |
| 278 | } |
| 279 | SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); |
| 280 | } |
| 281 | |
| 282 | BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 283 | |
| 284 | // Benchmark: successful anchored search. |
| 285 | |
| 286 | void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) { |
| 287 | string s; |
| 288 | MakeText(&s, nbytes); |
| 289 | BenchmarkMemoryUsage(); |
| 290 | search(iters, regexp, s, Prog::kAnchored, true); |
| 291 | SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); |
| 292 | } |
| 293 | |
| 294 | // Unambiguous search (RE2 can use OnePass). |
| 295 | |
| 296 | void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); } |
| 297 | void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); } |
| 298 | void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); } |
| 299 | void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); } |
| 300 | |
| 301 | BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 302 | #ifdef USEPCRE |
| 303 | BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 304 | #endif |
| 305 | BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 306 | BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); |
| 307 | |
| 308 | void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); } |
| 309 | void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); } |
| 310 | void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); } |
| 311 | void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); } |
| 312 | |
| 313 | BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 314 | #ifdef USEPCRE |
| 315 | BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 316 | #endif |
| 317 | BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 318 | BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); |
| 319 | |
| 320 | // Ambiguous search (RE2 cannot use OnePass). |
| 321 | |
| 322 | void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchDFA); } |
| 323 | void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchPCRE); } |
| 324 | void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchRE2); } |
| 325 | void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", SearchBitState); } |
| 326 | |
| 327 | BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 328 | #ifdef USEPCRE |
| 329 | BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 330 | #endif |
| 331 | BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 332 | BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); |
| 333 | |
| 334 | void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedDFA); } |
| 335 | void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); } |
| 336 | void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedRE2); } |
| 337 | |
| 338 | BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 339 | #ifdef USEPCRE |
| 340 | BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 341 | #endif |
| 342 | BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 343 | |
| 344 | // Benchmark: use regexp to find phone number. |
| 345 | |
| 346 | void SearchDigits(int iters, SearchImpl* search) { |
| 347 | const char *text = "650-253-0001"; |
| 348 | int len = strlen(text); |
| 349 | BenchmarkMemoryUsage(); |
| 350 | search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", |
| 351 | StringPiece(text, len), Prog::kAnchored, true); |
| 352 | SetBenchmarkItemsProcessed(iters); |
| 353 | } |
| 354 | |
| 355 | void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); } |
| 356 | void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); } |
| 357 | void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); } |
| 358 | void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); } |
| 359 | void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); } |
| 360 | void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); } |
| 361 | |
| 362 | BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs()); |
| 363 | BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs()); |
| 364 | BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs()); |
| 365 | #ifdef USEPCRE |
| 366 | BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs()); |
| 367 | #endif |
| 368 | BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs()); |
| 369 | BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs()); |
| 370 | |
| 371 | // Benchmark: use regexp to parse digit fields in phone number. |
| 372 | |
| 373 | void Parse3Digits(int iters, |
| 374 | void (*parse3)(int, const char*, const StringPiece&)) { |
| 375 | BenchmarkMemoryUsage(); |
| 376 | parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001"); |
| 377 | SetBenchmarkItemsProcessed(iters); |
| 378 | } |
| 379 | |
| 380 | void Parse_Digits_NFA(int i) { Parse3Digits(i, Parse3NFA); } |
| 381 | void Parse_Digits_OnePass(int i) { Parse3Digits(i, Parse3OnePass); } |
| 382 | void Parse_Digits_PCRE(int i) { Parse3Digits(i, Parse3PCRE); } |
| 383 | void Parse_Digits_RE2(int i) { Parse3Digits(i, Parse3RE2); } |
| 384 | void Parse_Digits_Backtrack(int i) { Parse3Digits(i, Parse3Backtrack); } |
| 385 | void Parse_Digits_BitState(int i) { Parse3Digits(i, Parse3BitState); } |
| 386 | |
| 387 | BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs()); |
| 388 | BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs()); |
| 389 | #ifdef USEPCRE |
| 390 | BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs()); |
| 391 | #endif |
| 392 | BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs()); |
| 393 | BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs()); |
| 394 | BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs()); |
| 395 | |
| 396 | void Parse_CachedDigits_NFA(int i) { Parse3Digits(i, Parse3CachedNFA); } |
| 397 | void Parse_CachedDigits_OnePass(int i) { Parse3Digits(i, Parse3CachedOnePass); } |
| 398 | void Parse_CachedDigits_PCRE(int i) { Parse3Digits(i, Parse3CachedPCRE); } |
| 399 | void Parse_CachedDigits_RE2(int i) { Parse3Digits(i, Parse3CachedRE2); } |
| 400 | void Parse_CachedDigits_Backtrack(int i) { Parse3Digits(i, Parse3CachedBacktrack); } |
| 401 | void Parse_CachedDigits_BitState(int i) { Parse3Digits(i, Parse3CachedBitState); } |
| 402 | |
| 403 | BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs()); |
| 404 | BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs()); |
| 405 | #ifdef USEPCRE |
| 406 | BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs()); |
| 407 | #endif |
| 408 | BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs()); |
| 409 | BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs()); |
| 410 | BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs()); |
| 411 | |
| 412 | void Parse3DigitDs(int iters, |
| 413 | void (*parse3)(int, const char*, const StringPiece&)) { |
| 414 | BenchmarkMemoryUsage(); |
| 415 | parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001"); |
| 416 | SetBenchmarkItemsProcessed(iters); |
| 417 | } |
| 418 | |
| 419 | void Parse_DigitDs_NFA(int i) { Parse3DigitDs(i, Parse3NFA); } |
| 420 | void Parse_DigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3OnePass); } |
| 421 | void Parse_DigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3PCRE); } |
| 422 | void Parse_DigitDs_RE2(int i) { Parse3DigitDs(i, Parse3RE2); } |
| 423 | void Parse_DigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); } |
| 424 | void Parse_DigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); } |
| 425 | |
| 426 | BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs()); |
| 427 | BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs()); |
| 428 | #ifdef USEPCRE |
| 429 | BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs()); |
| 430 | #endif |
| 431 | BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs()); |
| 432 | BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs()); |
| 433 | BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs()); |
| 434 | |
| 435 | void Parse_CachedDigitDs_NFA(int i) { Parse3DigitDs(i, Parse3CachedNFA); } |
| 436 | void Parse_CachedDigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3CachedOnePass); } |
| 437 | void Parse_CachedDigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3CachedPCRE); } |
| 438 | void Parse_CachedDigitDs_RE2(int i) { Parse3DigitDs(i, Parse3CachedRE2); } |
| 439 | void Parse_CachedDigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); } |
| 440 | void Parse_CachedDigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); } |
| 441 | |
| 442 | BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs()); |
| 443 | BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs()); |
| 444 | #ifdef USEPCRE |
| 445 | BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs()); |
| 446 | #endif |
| 447 | BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs()); |
| 448 | BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs()); |
| 449 | BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs()); |
| 450 | |
| 451 | // Benchmark: splitting off leading number field. |
| 452 | |
| 453 | void Parse1Split(int iters, |
| 454 | void (*parse1)(int, const char*, const StringPiece&)) { |
| 455 | BenchmarkMemoryUsage(); |
| 456 | parse1(iters, "[0-9]+-(.*)", "650-253-0001"); |
| 457 | SetBenchmarkItemsProcessed(iters); |
| 458 | } |
| 459 | |
| 460 | void Parse_Split_NFA(int i) { Parse1Split(i, Parse1NFA); } |
| 461 | void Parse_Split_OnePass(int i) { Parse1Split(i, Parse1OnePass); } |
| 462 | void Parse_Split_PCRE(int i) { Parse1Split(i, Parse1PCRE); } |
| 463 | void Parse_Split_RE2(int i) { Parse1Split(i, Parse1RE2); } |
| 464 | void Parse_Split_BitState(int i) { Parse1Split(i, Parse1BitState); } |
| 465 | |
| 466 | BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs()); |
| 467 | BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs()); |
| 468 | #ifdef USEPCRE |
| 469 | BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs()); |
| 470 | #endif |
| 471 | BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs()); |
| 472 | BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs()); |
| 473 | |
| 474 | void Parse_CachedSplit_NFA(int i) { Parse1Split(i, Parse1CachedNFA); } |
| 475 | void Parse_CachedSplit_OnePass(int i) { Parse1Split(i, Parse1CachedOnePass); } |
| 476 | void Parse_CachedSplit_PCRE(int i) { Parse1Split(i, Parse1CachedPCRE); } |
| 477 | void Parse_CachedSplit_RE2(int i) { Parse1Split(i, Parse1CachedRE2); } |
| 478 | void Parse_CachedSplit_BitState(int i) { Parse1Split(i, Parse1CachedBitState); } |
| 479 | |
| 480 | BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs()); |
| 481 | BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs()); |
| 482 | #ifdef USEPCRE |
| 483 | BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs()); |
| 484 | #endif |
| 485 | BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs()); |
| 486 | BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs()); |
| 487 | |
| 488 | // Benchmark: splitting off leading number field but harder (ambiguous regexp). |
| 489 | |
| 490 | void Parse1SplitHard(int iters, |
| 491 | void (*run)(int, const char*, const StringPiece&)) { |
| 492 | BenchmarkMemoryUsage(); |
| 493 | run(iters, "[0-9]+.(.*)", "650-253-0001"); |
| 494 | SetBenchmarkItemsProcessed(iters); |
| 495 | } |
| 496 | |
| 497 | void Parse_SplitHard_NFA(int i) { Parse1SplitHard(i, Parse1NFA); } |
| 498 | void Parse_SplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1PCRE); } |
| 499 | void Parse_SplitHard_RE2(int i) { Parse1SplitHard(i, Parse1RE2); } |
| 500 | void Parse_SplitHard_BitState(int i) { Parse1SplitHard(i, Parse1BitState); } |
| 501 | |
| 502 | #ifdef USEPCRE |
| 503 | BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs()); |
| 504 | #endif |
| 505 | BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs()); |
| 506 | BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs()); |
| 507 | BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs()); |
| 508 | |
| 509 | void Parse_CachedSplitHard_NFA(int i) { Parse1SplitHard(i, Parse1CachedNFA); } |
| 510 | void Parse_CachedSplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1CachedPCRE); } |
| 511 | void Parse_CachedSplitHard_RE2(int i) { Parse1SplitHard(i, Parse1CachedRE2); } |
| 512 | void Parse_CachedSplitHard_BitState(int i) { Parse1SplitHard(i, Parse1CachedBitState); } |
| 513 | void Parse_CachedSplitHard_Backtrack(int i) { Parse1SplitHard(i, Parse1CachedBacktrack); } |
| 514 | |
| 515 | #ifdef USEPCRE |
| 516 | BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs()); |
| 517 | #endif |
| 518 | BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs()); |
| 519 | BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs()); |
| 520 | BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs()); |
| 521 | BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs()); |
| 522 | |
| 523 | // Benchmark: Parse1SplitHard, big text, small match. |
| 524 | |
| 525 | void Parse1SplitBig1(int iters, |
| 526 | void (*run)(int, const char*, const StringPiece&)) { |
| 527 | string s; |
| 528 | s.append(100000, 'x'); |
| 529 | s.append("650-253-0001"); |
| 530 | BenchmarkMemoryUsage(); |
| 531 | run(iters, "[0-9]+.(.*)", s); |
| 532 | SetBenchmarkItemsProcessed(iters); |
| 533 | } |
| 534 | |
| 535 | void Parse_CachedSplitBig1_PCRE(int i) { Parse1SplitBig1(i, SearchParse1CachedPCRE); } |
| 536 | void Parse_CachedSplitBig1_RE2(int i) { Parse1SplitBig1(i, SearchParse1CachedRE2); } |
| 537 | |
| 538 | #ifdef USEPCRE |
| 539 | BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs()); |
| 540 | #endif |
| 541 | BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs()); |
| 542 | |
| 543 | // Benchmark: Parse1SplitHard, big text, big match. |
| 544 | |
| 545 | void Parse1SplitBig2(int iters, |
| 546 | void (*run)(int, const char*, const StringPiece&)) { |
| 547 | string s; |
| 548 | s.append("650-253-"); |
| 549 | s.append(100000, '0'); |
| 550 | BenchmarkMemoryUsage(); |
| 551 | run(iters, "[0-9]+.(.*)", s); |
| 552 | SetBenchmarkItemsProcessed(iters); |
| 553 | } |
| 554 | |
| 555 | void Parse_CachedSplitBig2_PCRE(int i) { Parse1SplitBig2(i, SearchParse1CachedPCRE); } |
| 556 | void Parse_CachedSplitBig2_RE2(int i) { Parse1SplitBig2(i, SearchParse1CachedRE2); } |
| 557 | |
| 558 | #ifdef USEPCRE |
| 559 | BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs()); |
| 560 | #endif |
| 561 | BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs()); |
| 562 | |
| 563 | // Benchmark: measure time required to parse (but not execute) |
| 564 | // a simple regular expression. |
| 565 | |
| 566 | void ParseRegexp(int iters, const string& regexp) { |
| 567 | for (int i = 0; i < iters; i++) { |
| 568 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 569 | CHECK(re); |
| 570 | re->Decref(); |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | void SimplifyRegexp(int iters, const string& regexp) { |
| 575 | for (int i = 0; i < iters; i++) { |
| 576 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 577 | CHECK(re); |
| 578 | Regexp* sre = re->Simplify(); |
| 579 | CHECK(sre); |
| 580 | sre->Decref(); |
| 581 | re->Decref(); |
| 582 | } |
| 583 | } |
| 584 | |
| 585 | void NullWalkRegexp(int iters, const string& regexp) { |
| 586 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 587 | CHECK(re); |
| 588 | for (int i = 0; i < iters; i++) { |
| 589 | re->NullWalk(); |
| 590 | } |
| 591 | re->Decref(); |
| 592 | } |
| 593 | |
| 594 | void SimplifyCompileRegexp(int iters, const string& regexp) { |
| 595 | for (int i = 0; i < iters; i++) { |
| 596 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 597 | CHECK(re); |
| 598 | Regexp* sre = re->Simplify(); |
| 599 | CHECK(sre); |
| 600 | Prog* prog = sre->CompileToProg(0); |
| 601 | CHECK(prog); |
| 602 | delete prog; |
| 603 | sre->Decref(); |
| 604 | re->Decref(); |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | void CompileRegexp(int iters, const string& regexp) { |
| 609 | for (int i = 0; i < iters; i++) { |
| 610 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 611 | CHECK(re); |
| 612 | Prog* prog = re->CompileToProg(0); |
| 613 | CHECK(prog); |
| 614 | delete prog; |
| 615 | re->Decref(); |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | void CompileToProg(int iters, const string& regexp) { |
| 620 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 621 | CHECK(re); |
| 622 | for (int i = 0; i < iters; i++) { |
| 623 | Prog* prog = re->CompileToProg(0); |
| 624 | CHECK(prog); |
| 625 | delete prog; |
| 626 | } |
| 627 | re->Decref(); |
| 628 | } |
| 629 | |
| 630 | void CompileByteMap(int iters, const string& regexp) { |
| 631 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 632 | CHECK(re); |
| 633 | Prog* prog = re->CompileToProg(0); |
| 634 | CHECK(prog); |
| 635 | for (int i = 0; i < iters; i++) { |
| 636 | prog->ComputeByteMap(); |
| 637 | } |
| 638 | delete prog; |
| 639 | re->Decref(); |
| 640 | } |
| 641 | |
| 642 | void CompilePCRE(int iters, const string& regexp) { |
| 643 | for (int i = 0; i < iters; i++) { |
| 644 | PCRE re(regexp, PCRE::UTF8); |
| 645 | CHECK_EQ(re.error(), ""); |
| 646 | } |
| 647 | } |
| 648 | |
| 649 | void CompileRE2(int iters, const string& regexp) { |
| 650 | for (int i = 0; i < iters; i++) { |
| 651 | RE2 re(regexp); |
| 652 | CHECK_EQ(re.error(), ""); |
| 653 | } |
| 654 | } |
| 655 | |
| 656 | void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) { |
| 657 | run(iters, regexp); |
| 658 | SetBenchmarkItemsProcessed(iters); |
| 659 | } |
| 660 | |
| 661 | } // namespace re2 |
| 662 | |
| 663 | DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks"); |
| 664 | |
| 665 | namespace re2 { |
| 666 | |
| 667 | void BM_PCRE_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); } |
| 668 | void BM_Regexp_Parse(int i) { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); } |
| 669 | void BM_Regexp_Simplify(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); } |
| 670 | void BM_CompileToProg(int i) { RunBuild(i, FLAGS_compile_regexp, CompileToProg); } |
| 671 | void BM_CompileByteMap(int i) { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); } |
| 672 | void BM_Regexp_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); } |
| 673 | void BM_Regexp_SimplifyCompile(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); } |
| 674 | void BM_Regexp_NullWalk(int i) { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); } |
| 675 | void BM_RE2_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRE2); } |
| 676 | |
| 677 | #ifdef USEPCRE |
| 678 | BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs()); |
| 679 | #endif |
| 680 | BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs()); |
| 681 | BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs()); |
| 682 | BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs()); |
| 683 | BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs()); |
| 684 | BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs()); |
| 685 | BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs()); |
| 686 | BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs()); |
| 687 | BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs()); |
| 688 | |
| 689 | |
| 690 | // Makes text of size nbytes, then calls run to search |
| 691 | // the text for regexp iters times. |
| 692 | void SearchPhone(int iters, int nbytes, ParseImpl* search) { |
| 693 | StopBenchmarkTiming(); |
| 694 | string s; |
| 695 | MakeText(&s, nbytes); |
| 696 | s.append("(650) 253-0001"); |
| 697 | BenchmarkMemoryUsage(); |
| 698 | StartBenchmarkTiming(); |
| 699 | search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s); |
| 700 | SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); |
| 701 | } |
| 702 | |
| 703 | void SearchPhone_CachedPCRE(int i, int n) { |
| 704 | SearchPhone(i, n, SearchParse2CachedPCRE); |
| 705 | } |
| 706 | void SearchPhone_CachedRE2(int i, int n) { |
| 707 | SearchPhone(i, n, SearchParse2CachedRE2); |
| 708 | } |
| 709 | |
| 710 | #ifdef USEPCRE |
| 711 | BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 712 | #endif |
| 713 | BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
| 714 | |
| 715 | /* |
| 716 | TODO(rsc): Make this work again. |
| 717 | |
| 718 | // Generates and returns a string over binary alphabet {0,1} that contains |
| 719 | // all possible binary sequences of length n as subsequences. The obvious |
| 720 | // brute force method would generate a string of length n * 2^n, but this |
| 721 | // generates a string of length n + 2^n - 1 called a De Bruijn cycle. |
| 722 | // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17. |
| 723 | static string DeBruijnString(int n) { |
| 724 | CHECK_LT(n, 8*sizeof(int)); |
| 725 | CHECK_GT(n, 0); |
| 726 | |
| 727 | vector<bool> did(1<<n); |
| 728 | for (int i = 0; i < 1<<n; i++) |
| 729 | did[i] = false; |
| 730 | |
| 731 | string s; |
| 732 | for (int i = 0; i < n-1; i++) |
| 733 | s.append("0"); |
| 734 | int bits = 0; |
| 735 | int mask = (1<<n) - 1; |
| 736 | for (int i = 0; i < (1<<n); i++) { |
| 737 | bits <<= 1; |
| 738 | bits &= mask; |
| 739 | if (!did[bits|1]) { |
| 740 | bits |= 1; |
| 741 | s.append("1"); |
| 742 | } else { |
| 743 | s.append("0"); |
| 744 | } |
| 745 | CHECK(!did[bits]); |
| 746 | did[bits] = true; |
| 747 | } |
| 748 | return s; |
| 749 | } |
| 750 | |
| 751 | void CacheFill(int iters, int n, SearchImpl *srch) { |
| 752 | string s = DeBruijnString(n+1); |
| 753 | string t; |
| 754 | for (int i = n+1; i < 20; i++) { |
| 755 | t = s + s; |
| 756 | swap(s, t); |
| 757 | } |
| 758 | srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s, |
| 759 | Prog::kUnanchored, true); |
| 760 | SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size()); |
| 761 | } |
| 762 | |
| 763 | void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); } |
| 764 | void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); } |
| 765 | void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); } |
| 766 | void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); } |
| 767 | |
| 768 | // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers |
| 769 | // for the static BenchmarkRegisterer, which makes it unusable inside |
| 770 | // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a |
| 771 | // to make the identifiers distinct (only possible when 'a' is a simple |
| 772 | // expression like 2, not like 1+1). |
| 773 | #define MY_BENCHMARK_WITH_ARG(n, a) \ |
| 774 | bool __benchmark_ ## n ## a = \ |
| 775 | (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs()); |
| 776 | |
| 777 | #define DO24(A, B) \ |
| 778 | A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \ |
| 779 | A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \ |
| 780 | A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \ |
| 781 | A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24); |
| 782 | |
| 783 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE) |
| 784 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA) |
| 785 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2) |
| 786 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA) |
| 787 | |
| 788 | #undef DO24 |
| 789 | #undef MY_BENCHMARK_WITH_ARG |
| 790 | */ |
| 791 | |
| 792 | //////////////////////////////////////////////////////////////////////// |
| 793 | // |
| 794 | // Implementation routines. Sad that there are so many, |
| 795 | // but all the interfaces are slightly different. |
| 796 | |
| 797 | // Runs implementation to search for regexp in text, iters times. |
| 798 | // Expect_match says whether the regexp should be found. |
| 799 | // Anchored says whether to run an anchored search. |
| 800 | |
| 801 | void SearchDFA(int iters, const char* regexp, const StringPiece& text, |
| 802 | Prog::Anchor anchor, bool expect_match) { |
| 803 | for (int i = 0; i < iters; i++) { |
| 804 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 805 | CHECK(re); |
| 806 | Prog* prog = re->CompileToProg(0); |
| 807 | CHECK(prog); |
| 808 | bool failed = false; |
| 809 | CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch, |
| 810 | NULL, &failed, NULL), |
| 811 | expect_match); |
| 812 | CHECK(!failed); |
| 813 | delete prog; |
| 814 | re->Decref(); |
| 815 | } |
| 816 | } |
| 817 | |
| 818 | void SearchNFA(int iters, const char* regexp, const StringPiece& text, |
| 819 | Prog::Anchor anchor, bool expect_match) { |
| 820 | for (int i = 0; i < iters; i++) { |
| 821 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 822 | CHECK(re); |
| 823 | Prog* prog = re->CompileToProg(0); |
| 824 | CHECK(prog); |
| 825 | CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0), |
| 826 | expect_match); |
| 827 | delete prog; |
| 828 | re->Decref(); |
| 829 | } |
| 830 | } |
| 831 | |
| 832 | void SearchOnePass(int iters, const char* regexp, const StringPiece& text, |
| 833 | Prog::Anchor anchor, bool expect_match) { |
| 834 | for (int i = 0; i < iters; i++) { |
| 835 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 836 | CHECK(re); |
| 837 | Prog* prog = re->CompileToProg(0); |
| 838 | CHECK(prog); |
| 839 | CHECK(prog->IsOnePass()); |
| 840 | CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
| 841 | expect_match); |
| 842 | delete prog; |
| 843 | re->Decref(); |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | void SearchBitState(int iters, const char* regexp, const StringPiece& text, |
| 848 | Prog::Anchor anchor, bool expect_match) { |
| 849 | for (int i = 0; i < iters; i++) { |
| 850 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 851 | CHECK(re); |
| 852 | Prog* prog = re->CompileToProg(0); |
| 853 | CHECK(prog); |
| 854 | CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
| 855 | expect_match); |
| 856 | delete prog; |
| 857 | re->Decref(); |
| 858 | } |
| 859 | } |
| 860 | |
| 861 | void SearchPCRE(int iters, const char* regexp, const StringPiece& text, |
| 862 | Prog::Anchor anchor, bool expect_match) { |
| 863 | for (int i = 0; i < iters; i++) { |
| 864 | PCRE re(regexp, PCRE::UTF8); |
| 865 | CHECK_EQ(re.error(), ""); |
| 866 | if (anchor == Prog::kAnchored) |
| 867 | CHECK_EQ(PCRE::FullMatch(text, re), expect_match); |
| 868 | else |
| 869 | CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); |
| 870 | } |
| 871 | } |
| 872 | |
| 873 | void SearchRE2(int iters, const char* regexp, const StringPiece& text, |
| 874 | Prog::Anchor anchor, bool expect_match) { |
| 875 | for (int i = 0; i < iters; i++) { |
| 876 | RE2 re(regexp); |
| 877 | CHECK_EQ(re.error(), ""); |
| 878 | if (anchor == Prog::kAnchored) |
| 879 | CHECK_EQ(RE2::FullMatch(text, re), expect_match); |
| 880 | else |
| 881 | CHECK_EQ(RE2::PartialMatch(text, re), expect_match); |
| 882 | } |
| 883 | } |
| 884 | |
| 885 | // SearchCachedXXX is like SearchXXX but only does the |
| 886 | // regexp parsing and compiling once. This lets us measure |
| 887 | // search time without the per-regexp overhead. |
| 888 | |
| 889 | void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text, |
| 890 | Prog::Anchor anchor, bool expect_match) { |
| 891 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 892 | CHECK(re); |
| 893 | Prog* prog = re->CompileToProg(1LL<<31); |
| 894 | CHECK(prog); |
| 895 | for (int i = 0; i < iters; i++) { |
| 896 | bool failed = false; |
| 897 | CHECK_EQ(prog->SearchDFA(text, NULL, anchor, |
| 898 | Prog::kFirstMatch, NULL, &failed, NULL), |
| 899 | expect_match); |
| 900 | CHECK(!failed); |
| 901 | } |
| 902 | delete prog; |
| 903 | re->Decref(); |
| 904 | } |
| 905 | |
| 906 | void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text, |
| 907 | Prog::Anchor anchor, bool expect_match) { |
| 908 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 909 | CHECK(re); |
| 910 | Prog* prog = re->CompileToProg(0); |
| 911 | CHECK(prog); |
| 912 | for (int i = 0; i < iters; i++) { |
| 913 | CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0), |
| 914 | expect_match); |
| 915 | } |
| 916 | delete prog; |
| 917 | re->Decref(); |
| 918 | } |
| 919 | |
| 920 | void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text, |
| 921 | Prog::Anchor anchor, bool expect_match) { |
| 922 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 923 | CHECK(re); |
| 924 | Prog* prog = re->CompileToProg(0); |
| 925 | CHECK(prog); |
| 926 | CHECK(prog->IsOnePass()); |
| 927 | for (int i = 0; i < iters; i++) |
| 928 | CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
| 929 | expect_match); |
| 930 | delete prog; |
| 931 | re->Decref(); |
| 932 | } |
| 933 | |
| 934 | void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text, |
| 935 | Prog::Anchor anchor, bool expect_match) { |
| 936 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 937 | CHECK(re); |
| 938 | Prog* prog = re->CompileToProg(0); |
| 939 | CHECK(prog); |
| 940 | for (int i = 0; i < iters; i++) |
| 941 | CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
| 942 | expect_match); |
| 943 | delete prog; |
| 944 | re->Decref(); |
| 945 | } |
| 946 | |
| 947 | void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text, |
| 948 | Prog::Anchor anchor, bool expect_match) { |
| 949 | PCRE re(regexp, PCRE::UTF8); |
| 950 | CHECK_EQ(re.error(), ""); |
| 951 | for (int i = 0; i < iters; i++) { |
| 952 | if (anchor == Prog::kAnchored) |
| 953 | CHECK_EQ(PCRE::FullMatch(text, re), expect_match); |
| 954 | else |
| 955 | CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); |
| 956 | } |
| 957 | } |
| 958 | |
| 959 | void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text, |
| 960 | Prog::Anchor anchor, bool expect_match) { |
| 961 | RE2 re(regexp); |
| 962 | CHECK_EQ(re.error(), ""); |
| 963 | for (int i = 0; i < iters; i++) { |
| 964 | if (anchor == Prog::kAnchored) |
| 965 | CHECK_EQ(RE2::FullMatch(text, re), expect_match); |
| 966 | else |
| 967 | CHECK_EQ(RE2::PartialMatch(text, re), expect_match); |
| 968 | } |
| 969 | } |
| 970 | |
| 971 | |
| 972 | // Runs implementation to full match regexp against text, |
| 973 | // extracting three submatches. Expects match always. |
| 974 | |
| 975 | void Parse3NFA(int iters, const char* regexp, const StringPiece& text) { |
| 976 | for (int i = 0; i < iters; i++) { |
| 977 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 978 | CHECK(re); |
| 979 | Prog* prog = re->CompileToProg(0); |
| 980 | CHECK(prog); |
| 981 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 982 | CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 983 | delete prog; |
| 984 | re->Decref(); |
| 985 | } |
| 986 | } |
| 987 | |
| 988 | void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) { |
| 989 | for (int i = 0; i < iters; i++) { |
| 990 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 991 | CHECK(re); |
| 992 | Prog* prog = re->CompileToProg(0); |
| 993 | CHECK(prog); |
| 994 | CHECK(prog->IsOnePass()); |
| 995 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 996 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 997 | delete prog; |
| 998 | re->Decref(); |
| 999 | } |
| 1000 | } |
| 1001 | |
| 1002 | void Parse3BitState(int iters, const char* regexp, const StringPiece& text) { |
| 1003 | for (int i = 0; i < iters; i++) { |
| 1004 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1005 | CHECK(re); |
| 1006 | Prog* prog = re->CompileToProg(0); |
| 1007 | CHECK(prog); |
| 1008 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 1009 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 1010 | delete prog; |
| 1011 | re->Decref(); |
| 1012 | } |
| 1013 | } |
| 1014 | |
| 1015 | void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) { |
| 1016 | for (int i = 0; i < iters; i++) { |
| 1017 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1018 | CHECK(re); |
| 1019 | Prog* prog = re->CompileToProg(0); |
| 1020 | CHECK(prog); |
| 1021 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 1022 | CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 1023 | delete prog; |
| 1024 | re->Decref(); |
| 1025 | } |
| 1026 | } |
| 1027 | |
| 1028 | void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) { |
| 1029 | for (int i = 0; i < iters; i++) { |
| 1030 | PCRE re(regexp, PCRE::UTF8); |
| 1031 | CHECK_EQ(re.error(), ""); |
| 1032 | StringPiece sp1, sp2, sp3; |
| 1033 | CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); |
| 1034 | } |
| 1035 | } |
| 1036 | |
| 1037 | void Parse3RE2(int iters, const char* regexp, const StringPiece& text) { |
| 1038 | for (int i = 0; i < iters; i++) { |
| 1039 | RE2 re(regexp); |
| 1040 | CHECK_EQ(re.error(), ""); |
| 1041 | StringPiece sp1, sp2, sp3; |
| 1042 | CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); |
| 1043 | } |
| 1044 | } |
| 1045 | |
| 1046 | void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) { |
| 1047 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1048 | CHECK(re); |
| 1049 | Prog* prog = re->CompileToProg(0); |
| 1050 | CHECK(prog); |
| 1051 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 1052 | for (int i = 0; i < iters; i++) { |
| 1053 | CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 1054 | } |
| 1055 | delete prog; |
| 1056 | re->Decref(); |
| 1057 | } |
| 1058 | |
| 1059 | void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) { |
| 1060 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1061 | CHECK(re); |
| 1062 | Prog* prog = re->CompileToProg(0); |
| 1063 | CHECK(prog); |
| 1064 | CHECK(prog->IsOnePass()); |
| 1065 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 1066 | for (int i = 0; i < iters; i++) |
| 1067 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 1068 | delete prog; |
| 1069 | re->Decref(); |
| 1070 | } |
| 1071 | |
| 1072 | void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) { |
| 1073 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1074 | CHECK(re); |
| 1075 | Prog* prog = re->CompileToProg(0); |
| 1076 | CHECK(prog); |
| 1077 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 1078 | for (int i = 0; i < iters; i++) |
| 1079 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 1080 | delete prog; |
| 1081 | re->Decref(); |
| 1082 | } |
| 1083 | |
| 1084 | void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) { |
| 1085 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1086 | CHECK(re); |
| 1087 | Prog* prog = re->CompileToProg(0); |
| 1088 | CHECK(prog); |
| 1089 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
| 1090 | for (int i = 0; i < iters; i++) |
| 1091 | CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
| 1092 | delete prog; |
| 1093 | re->Decref(); |
| 1094 | } |
| 1095 | |
| 1096 | void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) { |
| 1097 | PCRE re(regexp, PCRE::UTF8); |
| 1098 | CHECK_EQ(re.error(), ""); |
| 1099 | StringPiece sp1, sp2, sp3; |
| 1100 | for (int i = 0; i < iters; i++) { |
| 1101 | CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); |
| 1102 | } |
| 1103 | } |
| 1104 | |
| 1105 | void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) { |
| 1106 | RE2 re(regexp); |
| 1107 | CHECK_EQ(re.error(), ""); |
| 1108 | StringPiece sp1, sp2, sp3; |
| 1109 | for (int i = 0; i < iters; i++) { |
| 1110 | CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); |
| 1111 | } |
| 1112 | } |
| 1113 | |
| 1114 | |
| 1115 | // Runs implementation to full match regexp against text, |
| 1116 | // extracting three submatches. Expects match always. |
| 1117 | |
| 1118 | void Parse1NFA(int iters, const char* regexp, const StringPiece& text) { |
| 1119 | for (int i = 0; i < iters; i++) { |
| 1120 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1121 | CHECK(re); |
| 1122 | Prog* prog = re->CompileToProg(0); |
| 1123 | CHECK(prog); |
| 1124 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
| 1125 | CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
| 1126 | delete prog; |
| 1127 | re->Decref(); |
| 1128 | } |
| 1129 | } |
| 1130 | |
| 1131 | void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) { |
| 1132 | for (int i = 0; i < iters; i++) { |
| 1133 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1134 | CHECK(re); |
| 1135 | Prog* prog = re->CompileToProg(0); |
| 1136 | CHECK(prog); |
| 1137 | CHECK(prog->IsOnePass()); |
| 1138 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
| 1139 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
| 1140 | delete prog; |
| 1141 | re->Decref(); |
| 1142 | } |
| 1143 | } |
| 1144 | |
| 1145 | void Parse1BitState(int iters, const char* regexp, const StringPiece& text) { |
| 1146 | for (int i = 0; i < iters; i++) { |
| 1147 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1148 | CHECK(re); |
| 1149 | Prog* prog = re->CompileToProg(0); |
| 1150 | CHECK(prog); |
| 1151 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
| 1152 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
| 1153 | delete prog; |
| 1154 | re->Decref(); |
| 1155 | } |
| 1156 | } |
| 1157 | |
| 1158 | void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) { |
| 1159 | for (int i = 0; i < iters; i++) { |
| 1160 | PCRE re(regexp, PCRE::UTF8); |
| 1161 | CHECK_EQ(re.error(), ""); |
| 1162 | StringPiece sp1; |
| 1163 | CHECK(PCRE::FullMatch(text, re, &sp1)); |
| 1164 | } |
| 1165 | } |
| 1166 | |
| 1167 | void Parse1RE2(int iters, const char* regexp, const StringPiece& text) { |
| 1168 | for (int i = 0; i < iters; i++) { |
| 1169 | RE2 re(regexp); |
| 1170 | CHECK_EQ(re.error(), ""); |
| 1171 | StringPiece sp1; |
| 1172 | CHECK(RE2::FullMatch(text, re, &sp1)); |
| 1173 | } |
| 1174 | } |
| 1175 | |
| 1176 | void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) { |
| 1177 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1178 | CHECK(re); |
| 1179 | Prog* prog = re->CompileToProg(0); |
| 1180 | CHECK(prog); |
| 1181 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
| 1182 | for (int i = 0; i < iters; i++) { |
| 1183 | CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
| 1184 | } |
| 1185 | delete prog; |
| 1186 | re->Decref(); |
| 1187 | } |
| 1188 | |
| 1189 | void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) { |
| 1190 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1191 | CHECK(re); |
| 1192 | Prog* prog = re->CompileToProg(0); |
| 1193 | CHECK(prog); |
| 1194 | CHECK(prog->IsOnePass()); |
| 1195 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
| 1196 | for (int i = 0; i < iters; i++) |
| 1197 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
| 1198 | delete prog; |
| 1199 | re->Decref(); |
| 1200 | } |
| 1201 | |
| 1202 | void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) { |
| 1203 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1204 | CHECK(re); |
| 1205 | Prog* prog = re->CompileToProg(0); |
| 1206 | CHECK(prog); |
| 1207 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
| 1208 | for (int i = 0; i < iters; i++) |
| 1209 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
| 1210 | delete prog; |
| 1211 | re->Decref(); |
| 1212 | } |
| 1213 | |
| 1214 | void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) { |
| 1215 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
| 1216 | CHECK(re); |
| 1217 | Prog* prog = re->CompileToProg(0); |
| 1218 | CHECK(prog); |
| 1219 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
| 1220 | for (int i = 0; i < iters; i++) |
| 1221 | CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
| 1222 | delete prog; |
| 1223 | re->Decref(); |
| 1224 | } |
| 1225 | |
| 1226 | void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) { |
| 1227 | PCRE re(regexp, PCRE::UTF8); |
| 1228 | CHECK_EQ(re.error(), ""); |
| 1229 | StringPiece sp1; |
| 1230 | for (int i = 0; i < iters; i++) { |
| 1231 | CHECK(PCRE::FullMatch(text, re, &sp1)); |
| 1232 | } |
| 1233 | } |
| 1234 | |
| 1235 | void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) { |
| 1236 | RE2 re(regexp); |
| 1237 | CHECK_EQ(re.error(), ""); |
| 1238 | StringPiece sp1; |
| 1239 | for (int i = 0; i < iters; i++) { |
| 1240 | CHECK(RE2::FullMatch(text, re, &sp1)); |
| 1241 | } |
| 1242 | } |
| 1243 | |
| 1244 | void SearchParse2CachedPCRE(int iters, const char* regexp, |
| 1245 | const StringPiece& text) { |
| 1246 | PCRE re(regexp, PCRE::UTF8); |
| 1247 | CHECK_EQ(re.error(), ""); |
| 1248 | for (int i = 0; i < iters; i++) { |
| 1249 | StringPiece sp1, sp2; |
| 1250 | CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2)); |
| 1251 | } |
| 1252 | } |
| 1253 | |
| 1254 | void SearchParse2CachedRE2(int iters, const char* regexp, |
| 1255 | const StringPiece& text) { |
| 1256 | RE2 re(regexp); |
| 1257 | CHECK_EQ(re.error(), ""); |
| 1258 | for (int i = 0; i < iters; i++) { |
| 1259 | StringPiece sp1, sp2; |
| 1260 | CHECK(RE2::PartialMatch(text, re, &sp1, &sp2)); |
| 1261 | } |
| 1262 | } |
| 1263 | |
| 1264 | void SearchParse1CachedPCRE(int iters, const char* regexp, |
| 1265 | const StringPiece& text) { |
| 1266 | PCRE re(regexp, PCRE::UTF8); |
| 1267 | CHECK_EQ(re.error(), ""); |
| 1268 | for (int i = 0; i < iters; i++) { |
| 1269 | StringPiece sp1; |
| 1270 | CHECK(PCRE::PartialMatch(text, re, &sp1)); |
| 1271 | } |
| 1272 | } |
| 1273 | |
| 1274 | void SearchParse1CachedRE2(int iters, const char* regexp, |
| 1275 | const StringPiece& text) { |
| 1276 | RE2 re(regexp); |
| 1277 | CHECK_EQ(re.error(), ""); |
| 1278 | for (int i = 0; i < iters; i++) { |
| 1279 | StringPiece sp1; |
| 1280 | CHECK(RE2::PartialMatch(text, re, &sp1)); |
| 1281 | } |
| 1282 | } |
| 1283 | |
| 1284 | void EmptyPartialMatchPCRE(int n) { |
| 1285 | PCRE re(""); |
| 1286 | for (int i = 0; i < n; i++) { |
| 1287 | PCRE::PartialMatch("", re); |
| 1288 | } |
| 1289 | } |
| 1290 | |
| 1291 | void EmptyPartialMatchRE2(int n) { |
| 1292 | RE2 re(""); |
| 1293 | for (int i = 0; i < n; i++) { |
| 1294 | RE2::PartialMatch("", re); |
| 1295 | } |
| 1296 | } |
| 1297 | #ifdef USEPCRE |
| 1298 | BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
| 1299 | #endif |
| 1300 | BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs()); |
| 1301 | |
| 1302 | void SimplePartialMatchPCRE(int n) { |
| 1303 | PCRE re("abcdefg"); |
| 1304 | for (int i = 0; i < n; i++) { |
| 1305 | PCRE::PartialMatch("abcdefg", re); |
| 1306 | } |
| 1307 | } |
| 1308 | |
| 1309 | void SimplePartialMatchRE2(int n) { |
| 1310 | RE2 re("abcdefg"); |
| 1311 | for (int i = 0; i < n; i++) { |
| 1312 | RE2::PartialMatch("abcdefg", re); |
| 1313 | } |
| 1314 | } |
| 1315 | #ifdef USEPCRE |
| 1316 | BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
| 1317 | #endif |
| 1318 | BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs()); |
| 1319 | |
| 1320 | static string http_text = |
| 1321 | "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf" |
| 1322 | "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1"; |
| 1323 | |
| 1324 | void HTTPPartialMatchPCRE(int n) { |
| 1325 | StringPiece a; |
| 1326 | PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); |
| 1327 | for (int i = 0; i < n; i++) { |
| 1328 | PCRE::PartialMatch(http_text, re, &a); |
| 1329 | } |
| 1330 | } |
| 1331 | |
| 1332 | void HTTPPartialMatchRE2(int n) { |
| 1333 | StringPiece a; |
| 1334 | RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); |
| 1335 | for (int i = 0; i < n; i++) { |
| 1336 | RE2::PartialMatch(http_text, re, &a); |
| 1337 | } |
| 1338 | } |
| 1339 | |
| 1340 | #ifdef USEPCRE |
| 1341 | BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
| 1342 | #endif |
| 1343 | BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); |
| 1344 | |
| 1345 | static string http_smalltext = |
| 1346 | "GET /abc HTTP/1.1"; |
| 1347 | |
| 1348 | void SmallHTTPPartialMatchPCRE(int n) { |
| 1349 | StringPiece a; |
| 1350 | PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); |
| 1351 | for (int i = 0; i < n; i++) { |
| 1352 | PCRE::PartialMatch(http_text, re, &a); |
| 1353 | } |
| 1354 | } |
| 1355 | |
| 1356 | void SmallHTTPPartialMatchRE2(int n) { |
| 1357 | StringPiece a; |
| 1358 | RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); |
| 1359 | for (int i = 0; i < n; i++) { |
| 1360 | RE2::PartialMatch(http_text, re, &a); |
| 1361 | } |
| 1362 | } |
| 1363 | |
| 1364 | #ifdef USEPCRE |
| 1365 | BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
| 1366 | #endif |
| 1367 | BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); |
| 1368 | |
| 1369 | void DotMatchPCRE(int n) { |
| 1370 | StringPiece a; |
| 1371 | PCRE re("(?-s)^(.+)"); |
| 1372 | for (int i = 0; i < n; i++) { |
| 1373 | PCRE::PartialMatch(http_text, re, &a); |
| 1374 | } |
| 1375 | } |
| 1376 | |
| 1377 | void DotMatchRE2(int n) { |
| 1378 | StringPiece a; |
| 1379 | RE2 re("(?-s)^(.+)"); |
| 1380 | for (int i = 0; i < n; i++) { |
| 1381 | RE2::PartialMatch(http_text, re, &a); |
| 1382 | } |
| 1383 | } |
| 1384 | |
| 1385 | #ifdef USEPCRE |
| 1386 | BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs()); |
| 1387 | #endif |
| 1388 | BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs()); |
| 1389 | |
| 1390 | void ASCIIMatchPCRE(int n) { |
| 1391 | StringPiece a; |
| 1392 | PCRE re("(?-s)^([ -~]+)"); |
| 1393 | for (int i = 0; i < n; i++) { |
| 1394 | PCRE::PartialMatch(http_text, re, &a); |
| 1395 | } |
| 1396 | } |
| 1397 | |
| 1398 | void ASCIIMatchRE2(int n) { |
| 1399 | StringPiece a; |
| 1400 | RE2 re("(?-s)^([ -~]+)"); |
| 1401 | for (int i = 0; i < n; i++) { |
| 1402 | RE2::PartialMatch(http_text, re, &a); |
| 1403 | } |
| 1404 | } |
| 1405 | |
| 1406 | #ifdef USEPCRE |
| 1407 | BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs()); |
| 1408 | #endif |
| 1409 | BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs()); |
| 1410 | |
| 1411 | void FullMatchPCRE(int iter, int n, const char *regexp) { |
| 1412 | StopBenchmarkTiming(); |
| 1413 | string s; |
| 1414 | MakeText(&s, n); |
| 1415 | s += "ABCDEFGHIJ"; |
| 1416 | BenchmarkMemoryUsage(); |
| 1417 | PCRE re(regexp); |
| 1418 | StartBenchmarkTiming(); |
| 1419 | for (int i = 0; i < iter; i++) |
| 1420 | CHECK(PCRE::FullMatch(s, re)); |
| 1421 | SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); |
| 1422 | } |
| 1423 | |
| 1424 | void FullMatchRE2(int iter, int n, const char *regexp) { |
| 1425 | StopBenchmarkTiming(); |
| 1426 | string s; |
| 1427 | MakeText(&s, n); |
| 1428 | s += "ABCDEFGHIJ"; |
| 1429 | BenchmarkMemoryUsage(); |
| 1430 | RE2 re(regexp, RE2::Latin1); |
| 1431 | StartBenchmarkTiming(); |
| 1432 | for (int i = 0; i < iter; i++) |
| 1433 | CHECK(RE2::FullMatch(s, re)); |
| 1434 | SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); |
| 1435 | } |
| 1436 | |
| 1437 | void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); } |
| 1438 | void FullMatch_DotStar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*"); } |
| 1439 | |
| 1440 | void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); } |
| 1441 | void FullMatch_DotStarDollar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*$"); } |
| 1442 | |
| 1443 | void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); } |
| 1444 | void FullMatch_DotStarCapture_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s)((.*)()()($))"); } |
| 1445 | |
| 1446 | #ifdef USEPCRE |
| 1447 | BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20); |
| 1448 | #endif |
| 1449 | BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20); |
| 1450 | |
| 1451 | #ifdef USEPCRE |
| 1452 | BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20); |
| 1453 | #endif |
| 1454 | BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20); |
| 1455 | |
| 1456 | #ifdef USEPCRE |
| 1457 | BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20); |
| 1458 | #endif |
| 1459 | BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20); |
| 1460 | |
| 1461 | } // namespace re2 |