More fuzzing infastructre - regex

llvm-svn: 315582
diff --git a/libcxx/fuzzing/fuzzing.cpp b/libcxx/fuzzing/fuzzing.cpp
index cc349cd..d4d0e60 100644
--- a/libcxx/fuzzing/fuzzing.cpp
+++ b/libcxx/fuzzing/fuzzing.cpp
@@ -26,8 +26,7 @@
 #include "fuzzing.h"
 #include <vector>
 #include <algorithm>
-
-#include <iostream>
+#include <regex>
 
 //	If we had C++14, we could use the four iterator version of is_permutation
 
@@ -219,4 +218,59 @@
 	return 0;
 }
 
+
+// --	regex fuzzers
+
+static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag)
+{
+	if (size > 0)
+	{
+		try
+		{
+			std::string s((const char *)data, size);
+			std::regex re(s, flag);
+			return std::regex_match(s, re) ? 1 : 0;
+		} 
+		catch (std::regex_error &ex) {} 
+	}
+	return 0;		
+}
+
+
+int regex_ECMAScript (const uint8_t *data, size_t size)
+{
+	(void) regex_helper(data, size, std::regex_constants::ECMAScript);
+	return 0;
+}
+
+int regex_POSIX (const uint8_t *data, size_t size)
+{
+	(void) regex_helper(data, size, std::regex_constants::basic);
+	return 0;
+}
+
+int regex_extended (const uint8_t *data, size_t size)
+{
+	(void) regex_helper(data, size, std::regex_constants::extended);
+	return 0;
+}
+
+int regex_awk (const uint8_t *data, size_t size)
+{
+	(void) regex_helper(data, size, std::regex_constants::awk);
+	return 0;
+}
+
+int regex_grep (const uint8_t *data, size_t size)
+{
+	(void) regex_helper(data, size, std::regex_constants::grep);
+	return 0;
+}
+
+int regex_egrep (const uint8_t *data, size_t size)
+{
+	(void) regex_helper(data, size, std::regex_constants::egrep);
+	return 0;
+}
+
 } // namespace fuzzing
diff --git a/libcxx/fuzzing/fuzzing.h b/libcxx/fuzzing/fuzzing.h
index b8116fb..6624955 100644
--- a/libcxx/fuzzing/fuzzing.h
+++ b/libcxx/fuzzing/fuzzing.h
@@ -27,7 +27,15 @@
 
 	int nth_element      (const uint8_t *data, size_t size);
 	int partial_sort     (const uint8_t *data, size_t size);
-	
+
+//	Various flavors of regex
+	int regex_ECMAScript (const uint8_t *data, size_t size);
+	int regex_POSIX      (const uint8_t *data, size_t size);
+	int regex_extended   (const uint8_t *data, size_t size);
+	int regex_awk        (const uint8_t *data, size_t size);
+	int regex_grep       (const uint8_t *data, size_t size);
+	int regex_egrep      (const uint8_t *data, size_t size);
+
 } // namespace fuzzing
 
 #endif // _LIBCPP_FUZZING
diff --git a/libcxx/test/libcxx/fuzzing/regex_ECMAScript.cpp b/libcxx/test/libcxx/fuzzing/regex_ECMAScript.cpp
new file mode 100644
index 0000000..2e57126
--- /dev/null
+++ b/libcxx/test/libcxx/fuzzing/regex_ECMAScript.cpp
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===--------------------- regex_ECMAScript.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// XFAIL
+
+#include "fuzzing.h"
+#include <cassert>
+#include <cstring> // for strlen
+
+const char * test_cases[] = {
+	"",
+	"s",
+	"b*c",
+	"ba?sf"
+	"lka*ea",
+	"adsf*kas;lnc441[0-9]1r34525234"
+	};
+
+const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]);
+
+int main ()
+{
+	for (size_t i = 0; i < k_num_tests; ++i)
+		{
+		const size_t   size = std::strlen(test_cases[i]);
+		const uint8_t *data = (const uint8_t *) test_cases[i];
+		assert(0 == fuzzing::regex_ECMAScript(data, size));
+		}
+	return 0;
+}
diff --git a/libcxx/test/libcxx/fuzzing/regex_POSIX.cpp b/libcxx/test/libcxx/fuzzing/regex_POSIX.cpp
new file mode 100644
index 0000000..f0bd289
--- /dev/null
+++ b/libcxx/test/libcxx/fuzzing/regex_POSIX.cpp
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===----------------------- regex_POSIX.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// XFAIL
+
+#include "fuzzing.h"
+#include <cassert>
+#include <cstring> // for strlen
+
+const char * test_cases[] = {
+	"",
+	"s",
+	"b*c",
+	"ba?sf"
+	"lka*ea",
+	"adsf*kas;lnc441[0-9]1r34525234"
+	};
+
+const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]);
+
+int main ()
+{
+	for (size_t i = 0; i < k_num_tests; ++i)
+		{
+		const size_t   size = std::strlen(test_cases[i]);
+		const uint8_t *data = (const uint8_t *) test_cases[i];
+		assert(0 == fuzzing::regex_POSIX(data, size));
+		}
+	return 0;
+}
diff --git a/libcxx/test/libcxx/fuzzing/regex_awk.cpp b/libcxx/test/libcxx/fuzzing/regex_awk.cpp
new file mode 100644
index 0000000..2e57126
--- /dev/null
+++ b/libcxx/test/libcxx/fuzzing/regex_awk.cpp
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===--------------------- regex_ECMAScript.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// XFAIL
+
+#include "fuzzing.h"
+#include <cassert>
+#include <cstring> // for strlen
+
+const char * test_cases[] = {
+	"",
+	"s",
+	"b*c",
+	"ba?sf"
+	"lka*ea",
+	"adsf*kas;lnc441[0-9]1r34525234"
+	};
+
+const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]);
+
+int main ()
+{
+	for (size_t i = 0; i < k_num_tests; ++i)
+		{
+		const size_t   size = std::strlen(test_cases[i]);
+		const uint8_t *data = (const uint8_t *) test_cases[i];
+		assert(0 == fuzzing::regex_ECMAScript(data, size));
+		}
+	return 0;
+}
diff --git a/libcxx/test/libcxx/fuzzing/regex_egrep.cpp b/libcxx/test/libcxx/fuzzing/regex_egrep.cpp
new file mode 100644
index 0000000..056869f
--- /dev/null
+++ b/libcxx/test/libcxx/fuzzing/regex_egrep.cpp
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===------------------------ regex_egrep.cpp -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// XFAIL
+
+#include "fuzzing.h"
+#include <cassert>
+#include <cstring> // for strlen
+
+const char * test_cases[] = {
+	"",
+	"s",
+	"b*c",
+	"ba?sf"
+	"lka*ea",
+	"adsf*kas;lnc441[0-9]1r34525234"
+	};
+
+const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]);
+
+int main ()
+{
+	for (size_t i = 0; i < k_num_tests; ++i)
+		{
+		const size_t   size = std::strlen(test_cases[i]);
+		const uint8_t *data = (const uint8_t *) test_cases[i];
+		assert(0 == fuzzing::regex_egrep(data, size));
+		}
+	return 0;
+}
diff --git a/libcxx/test/libcxx/fuzzing/regex_extended.cpp b/libcxx/test/libcxx/fuzzing/regex_extended.cpp
new file mode 100644
index 0000000..ac850eb
--- /dev/null
+++ b/libcxx/test/libcxx/fuzzing/regex_extended.cpp
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===---------------------- regex_extended.cpp ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// XFAIL
+
+#include "fuzzing.h"
+#include <cassert>
+#include <cstring> // for strlen
+
+const char * test_cases[] = {
+	"",
+	"s",
+	"b*c",
+	"ba?sf"
+	"lka*ea",
+	"adsf*kas;lnc441[0-9]1r34525234"
+	};
+
+const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]);
+
+int main ()
+{
+	for (size_t i = 0; i < k_num_tests; ++i)
+		{
+		const size_t   size = std::strlen(test_cases[i]);
+		const uint8_t *data = (const uint8_t *) test_cases[i];
+		assert(0 == fuzzing::regex_extended(data, size));
+		}
+	return 0;
+}
diff --git a/libcxx/test/libcxx/fuzzing/regex_grep.cpp b/libcxx/test/libcxx/fuzzing/regex_grep.cpp
new file mode 100644
index 0000000..5b1dda2
--- /dev/null
+++ b/libcxx/test/libcxx/fuzzing/regex_grep.cpp
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===------------------------ regex_grep.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// XFAIL
+
+#include "fuzzing.h"
+#include <cassert>
+#include <cstring> // for strlen
+
+const char * test_cases[] = {
+	"",
+	"s",
+	"b*c",
+	"ba?sf"
+	"lka*ea",
+	"adsf*kas;lnc441[0-9]1r34525234"
+	};
+
+const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]);
+
+int main ()
+{
+	for (size_t i = 0; i < k_num_tests; ++i)
+		{
+		const size_t   size = std::strlen(test_cases[i]);
+		const uint8_t *data = (const uint8_t *) test_cases[i];
+		assert(0 == fuzzing::regex_grep(data, size));
+		}
+	return 0;
+}