Blame - re2/testing/exhaustive2_test.cc - fp2-dev/platform/external/regex-re2

blob: c5fec5b3e4a884b03a5717653864499fa7730bb2 [file] [log] [blame]

Alexander Gutkin	0d4c523	2013-02-28 13:47:27 +0000	[diff] [blame]	1	// Copyright 2008 The RE2 Authors. All Rights Reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// Exhaustive testing of regular expression matching.
				6
				7	#include "util/test.h"
				8	#include "re2/re2.h"
				9	#include "re2/testing/exhaustive_tester.h"
				10
				11	DECLARE_string(regexp_engines);
				12
				13	namespace re2 {
				14
				15	// Test empty string matches (aka "(?:)")
				16	TEST(EmptyString, Exhaustive) {
				17	ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
				18	RegexpGenerator::EgrepOps(),
				19	5, Split("", "ab"), "", "");
				20	}
				21
				22	// Test escaped versions of regexp syntax.
				23	TEST(Punctuation, Literals) {
				24	vector<string> alphabet = Explode("()*+?{}[]\\^$.");
				25	vector<string> escaped = alphabet;
				26	for (int i = 0; i < escaped.size(); i++)
				27	escaped[i] = "\\" + escaped[i];
				28	ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
				29	2, alphabet, "", "");
				30	}
				31
				32	// Test ^ $ . \A \z in presence of line endings.
				33	// Have to wrap the empty-width ones in (?:) so that
				34	// they can be repeated -- PCRE rejects ^* but allows (?:^)*
				35	TEST(LineEnds, Exhaustive) {
				36	ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
				37	RegexpGenerator::EgrepOps(),
				38	4, Explode("ab\n"), "", "");
				39	}
				40
				41	// Test what does and does not match \n.
				42	// This would be a good test, except that PCRE seems to have a bug:
				43	// in single-byte character set mode (the default),
				44	// [^a] matches \n, but in UTF-8 mode it does not.
				45	// So when we run the test, the tester complains that
				46	// we don't agree with PCRE, but it's PCRE that is at fault.
				47	// For what it's worth, Perl gets this right (matches
				48	// regardless of whether UTF-8 input is selected):
				49	//
				50	// #!/usr/bin/perl
				51	// use POSIX qw(locale_h);
				52	// print "matches in latin1\n" if "\n" =~ /[^a]/;
				53	// setlocale("en_US.utf8");
				54	// print "matches in utf8\n" if "\n" =~ /[^a]/;
				55	//
				56	// The rule chosen for RE2 is that by default, like Perl,
				57	// dot does not match \n but negated character classes [^a] do.
				58	// (?s) will allow dot to match \n; there is no way in RE2
				59	// to stop [^a] from matching \n, though the underlying library
				60	// provides a mechanism, and RE2 could add new syntax if needed.
				61	//
				62	// TEST(Newlines, Exhaustive) {
				63	// vector<string> empty_vector;
				64	// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
				65	// RegexpGenerator::EgrepOps(),
				66	// 4, Explode("a\n"), "");
				67	// }
				68
				69	} // namespace re2
				70