Introducing single for loop into clang_proto_fuzzer

Summary:
Created a new protobuf and protobuf-to-C++ "converter" that wraps the entire C++ code in a single for loop.
  - Slightly changed cxx_proto.proto -> cxx_loop_proto.proto
  - Made some changes to proto_to_cxx files to handle the new kind of protobuf
  - Created ExampleClangLoopProtoFuzzer to test new protobuf and "converter"

Patch by Emmett Neyman

Reviewers: kcc, vitalybuka, morehouse

Reviewed By: vitalybuka, morehouse

Subscribers: mgorny, llvm-commits, cfe-commits

Differential Revision: https://reviews.llvm.org/D47843

llvm-svn: 334216
diff --git a/clang/tools/clang-fuzzer/CMakeLists.txt b/clang/tools/clang-fuzzer/CMakeLists.txt
index cc9266c..039449a 100644
--- a/clang/tools/clang-fuzzer/CMakeLists.txt
+++ b/clang/tools/clang-fuzzer/CMakeLists.txt
@@ -14,6 +14,7 @@
   ClangFuzzer.cpp
   DummyClangFuzzer.cpp
   ExampleClangProtoFuzzer.cpp
+  ExampleClangLoopProtoFuzzer.cpp
   )
 
 if(CLANG_ENABLE_PROTO_FUZZER)
@@ -24,6 +25,7 @@
   include_directories(${PROTOBUF_INCLUDE_DIRS})
   include_directories(${CMAKE_CURRENT_BINARY_DIR})
   protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS cxx_proto.proto)
+  protobuf_generate_cpp(LOOP_PROTO_SRCS LOOP_PROTO_HDRS cxx_loop_proto.proto)
   set(LLVM_OPTIONAL_SOURCES ${LLVM_OPTIONAL_SOURCES} ${PROTO_SRCS})
   add_clang_library(clangCXXProto
     ${PROTO_SRCS}
@@ -33,13 +35,21 @@
     ${PROTOBUF_LIBRARIES}
     )
 
+  add_clang_library(clangCXXLoopProto
+    ${LOOP_PROTO_SRCS}
+    ${LOOP_PROTO_HDRS}
+
+    LINK_LIBS
+    ${PROTOBUF_LIBRARIES}
+    )
+
   # Build and include libprotobuf-mutator
   include(ProtobufMutator)
   include_directories(${ProtobufMutator_INCLUDE_DIRS})
 
   # Build the protobuf->C++ translation library and driver.
   add_clang_subdirectory(proto-to-cxx)
-
+  
   # Build the fuzzer initialization library.
   add_clang_subdirectory(fuzzer-initialize)
 
@@ -49,16 +59,32 @@
     ExampleClangProtoFuzzer.cpp
     )
 
-  target_link_libraries(clang-proto-fuzzer
-    PRIVATE
+  # Build the loop protobuf fuzzer
+  add_clang_executable(clang-loop-proto-fuzzer
+    ${DUMMY_MAIN}
+    ExampleClangLoopProtoFuzzer.cpp
+    )
+
+  set(COMMON_PROTO_FUZZ_LIBRARIES
     ${ProtobufMutator_LIBRARIES}
     ${PROTOBUF_LIBRARIES}
     ${LLVM_LIB_FUZZING_ENGINE}
-    clangCXXProto
     clangFuzzerInitialize
     clangHandleCXX
+    )
+
+  target_link_libraries(clang-proto-fuzzer
+    PRIVATE
+    ${COMMON_PROTO_FUZZ_LIBRARIES}
+    clangCXXProto
     clangProtoToCXX
     )
+  target_link_libraries(clang-loop-proto-fuzzer
+    PRIVATE
+    ${COMMON_PROTO_FUZZ_LIBRARIES}
+    clangCXXLoopProto
+    clangLoopProtoToCXX
+    )
 endif()
 
 add_clang_subdirectory(handle-cxx)
diff --git a/clang/tools/clang-fuzzer/ExampleClangLoopProtoFuzzer.cpp b/clang/tools/clang-fuzzer/ExampleClangLoopProtoFuzzer.cpp
new file mode 100644
index 0000000..69fa31f
--- /dev/null
+++ b/clang/tools/clang-fuzzer/ExampleClangLoopProtoFuzzer.cpp
@@ -0,0 +1,30 @@
+//===-- ExampleClangLoopProtoFuzzer.cpp - Fuzz Clang ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///  This file implements a function that runs Clang on a single
+///  input and uses libprotobuf-mutator to find new inputs. This function is
+///  then linked into the Fuzzer library. This file differs from
+///  ExampleClangProtoFuzzer in that it uses the new protobuf that includes
+///  C++ code with a single for loop.
+///
+//===----------------------------------------------------------------------===//
+
+#include "cxx_loop_proto.pb.h"
+#include "fuzzer-initialize/fuzzer_initialize.h"
+#include "handle-cxx/handle_cxx.h"
+#include "proto-to-cxx/proto_to_cxx.h"
+#include "src/libfuzzer/libfuzzer_macro.h"
+
+using namespace clang_fuzzer;
+
+DEFINE_BINARY_PROTO_FUZZER(const LoopFunction &input) {
+  auto S = LoopFunctionToString(input);
+  HandleCXX(S, GetCLArgs());
+}
diff --git a/clang/tools/clang-fuzzer/cxx_loop_proto.proto b/clang/tools/clang-fuzzer/cxx_loop_proto.proto
new file mode 100644
index 0000000..5dae576
--- /dev/null
+++ b/clang/tools/clang-fuzzer/cxx_loop_proto.proto
@@ -0,0 +1,97 @@
+//===-- cxx_loop_proto.proto - Protobuf description of C++ with for loops -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///  This file describes a subset of C++ as a protobuf. It is used to
+///  more easily find interesting inputs for fuzzing Clang. This subset
+///  extends the one defined in cxx_proto.proto by adding the option that
+///  a VarRef can use the for loop's counter variable.
+///
+//===----------------------------------------------------------------------===//
+
+
+syntax = "proto2";
+
+message VarRef {
+  required int32 varnum = 1;
+  required bool is_loop_var = 2;
+}
+
+message Lvalue {
+  required VarRef varref = 1;
+}
+
+message Const {
+  required int32 val = 1;
+}
+
+message BinaryOp {
+  enum Op {
+    PLUS = 0;
+    MINUS = 1;
+    MUL = 2;
+    DIV = 3;
+    MOD = 4;
+    XOR = 5;
+    AND = 6;
+    OR = 7;
+    EQ = 8;
+    NE = 9;
+    LE = 10;
+    GE = 11;
+    LT = 12;
+    GT = 13;
+  };
+  required Op op = 1;
+  required Rvalue left = 2;
+  required Rvalue right = 3;
+}
+
+message Rvalue {
+  oneof rvalue_oneof {
+    VarRef varref = 1;
+    Const cons = 2;
+    BinaryOp binop = 3;
+  }
+}
+
+message AssignmentStatement {
+  required Lvalue lvalue = 1;
+  required Rvalue rvalue = 2;
+}
+
+
+message IfElse {
+  required Rvalue cond = 1;
+  required StatementSeq if_body = 2;
+  required StatementSeq else_body = 3;
+}
+
+message While {
+  required Rvalue cond = 1;
+  required StatementSeq body = 2;
+}
+
+message Statement {
+  oneof stmt_oneof {
+    AssignmentStatement assignment = 1;
+    IfElse              ifelse     = 2;
+    While               while_loop = 3;
+  }
+}
+
+message StatementSeq {
+  repeated Statement statements = 1;
+}
+
+message LoopFunction {
+  required StatementSeq statements = 1;
+}
+
+package clang_fuzzer;
diff --git a/clang/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt b/clang/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt
index 9337092..339959b 100644
--- a/clang/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt
+++ b/clang/tools/clang-fuzzer/proto-to-cxx/CMakeLists.txt
@@ -2,12 +2,21 @@
 set(CMAKE_CXX_FLAGS ${CXX_FLAGS_NOFUZZ})
 
 # Needed by LLVM's CMake checks because this file defines multiple targets.
-set(LLVM_OPTIONAL_SOURCES proto_to_cxx.cpp proto_to_cxx_main.cpp)
+set(LLVM_OPTIONAL_SOURCES proto_to_cxx.cpp proto_to_cxx_main.cpp
+                          loop_proto_to_cxx.cpp loop_proto_to_cxx_main.cpp)
 
 add_clang_library(clangProtoToCXX proto_to_cxx.cpp
                   DEPENDS clangCXXProto
                   LINK_LIBS clangCXXProto ${PROTOBUF_LIBRARIES}
                   )
 
+add_clang_library(clangLoopProtoToCXX loop_proto_to_cxx.cpp
+                  DEPENDS clangCXXLoopProto
+                  LINK_LIBS clangCXXLoopProto ${PROTOBUF_LIBRARIES}
+                  )
+
 add_clang_executable(clang-proto-to-cxx proto_to_cxx_main.cpp)
+add_clang_executable(clang-loop-proto-to-cxx loop_proto_to_cxx_main.cpp)
+
 target_link_libraries(clang-proto-to-cxx PRIVATE clangProtoToCXX)
+target_link_libraries(clang-loop-proto-to-cxx PRIVATE clangLoopProtoToCXX)
diff --git a/clang/tools/clang-fuzzer/proto-to-cxx/loop_proto_to_cxx.cpp b/clang/tools/clang-fuzzer/proto-to-cxx/loop_proto_to_cxx.cpp
new file mode 100644
index 0000000..a0007fb
--- /dev/null
+++ b/clang/tools/clang-fuzzer/proto-to-cxx/loop_proto_to_cxx.cpp
@@ -0,0 +1,148 @@
+//==-- loop_proto_to_cxx.cpp - Protobuf-C++ conversion ---------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements functions for converting between protobufs and C++. Extends
+// proto_to_cxx.cpp by wrapping all the generated C++ code in a single for
+// loop. Also coutputs a different function signature that includes a
+// size_t parameter for the loop to use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "cxx_loop_proto.pb.h"
+#include "proto_to_cxx.h"
+
+// The following is needed to convert protos in human-readable form
+#include <google/protobuf/text_format.h>
+
+#include <ostream>
+#include <sstream>
+
+namespace clang_fuzzer {
+
+// Forward decls.
+std::ostream &operator<<(std::ostream &os, const BinaryOp &x);
+std::ostream &operator<<(std::ostream &os, const StatementSeq &x);
+
+// Proto to C++.
+std::ostream &operator<<(std::ostream &os, const Const &x) {
+  return os << "(" << x.val() << ")";
+}
+std::ostream &operator<<(std::ostream &os, const VarRef &x) {
+  if (x.is_loop_var()) {
+    return os << "a[loop_ctr]";
+  } else {
+    return os << "a[" << static_cast<uint32_t>(x.varnum()) << " % s]";
+  }
+}
+std::ostream &operator<<(std::ostream &os, const Lvalue &x) {
+  return os << x.varref();
+}
+std::ostream &operator<<(std::ostream &os, const Rvalue &x) {
+  if (x.has_varref())
+    return os << x.varref();
+  if (x.has_cons())
+    return os << x.cons();
+  if (x.has_binop())
+    return os << x.binop();
+  return os << "1";
+}
+std::ostream &operator<<(std::ostream &os, const BinaryOp &x) {
+  os << "(" << x.left();
+  switch (x.op()) {
+  case BinaryOp::PLUS:
+    os << "+";
+    break;
+  case BinaryOp::MINUS:
+    os << "-";
+    break;
+  case BinaryOp::MUL:
+    os << "*";
+    break;
+  case BinaryOp::DIV:
+    os << "/";
+    break;
+  case BinaryOp::MOD:
+    os << "%";
+    break;
+  case BinaryOp::XOR:
+    os << "^";
+    break;
+  case BinaryOp::AND:
+    os << "&";
+    break;
+  case BinaryOp::OR:
+    os << "|";
+    break;
+  case BinaryOp::EQ:
+    os << "==";
+    break;
+  case BinaryOp::NE:
+    os << "!=";
+    break;
+  case BinaryOp::LE:
+    os << "<=";
+    break;
+  case BinaryOp::GE:
+    os << ">=";
+    break;
+  case BinaryOp::LT:
+    os << "<";
+    break;
+  case BinaryOp::GT:
+    os << ">";
+    break;
+  }
+  return os << x.right() << ")";
+}
+std::ostream &operator<<(std::ostream &os, const AssignmentStatement &x) {
+  return os << x.lvalue() << "=" << x.rvalue();
+}
+std::ostream &operator<<(std::ostream &os, const IfElse &x) {
+  return os << "if (" << x.cond() << "){\n"
+            << x.if_body() << "} else { \n"
+            << x.else_body() << "}\n";
+}
+std::ostream &operator<<(std::ostream &os, const While &x) {
+  return os << "while (" << x.cond() << "){\n" << x.body() << "}\n";
+}
+std::ostream &operator<<(std::ostream &os, const Statement &x) {
+  if (x.has_assignment())
+    return os << x.assignment() << ";\n";
+  if (x.has_ifelse())
+    return os << x.ifelse();
+  if (x.has_while_loop())
+    return os << x.while_loop();
+  return os << "(void)0;\n";
+}
+std::ostream &operator<<(std::ostream &os, const StatementSeq &x) {
+  for (auto &st : x.statements())
+    os << st;
+  return os;
+}
+std::ostream &operator<<(std::ostream &os, const LoopFunction &x) {
+  return os << "void foo(int *a, size_t s) {\n"
+            << "for (int loop_ctr = 0; loop_ctr < s; loop_ctr++){\n"
+            << x.statements() << "}\n}\n";
+}
+
+// ---------------------------------
+
+std::string LoopFunctionToString(const LoopFunction &input) {
+  std::ostringstream os;
+  os << input;
+  return os.str();
+}
+std::string LoopProtoToCxx(const uint8_t *data, size_t size) {
+  LoopFunction message;
+  if (!message.ParsePartialFromArray(data, size))
+    return "#error invalid proto, may not be binary encoded\n";
+  return LoopFunctionToString(message);
+}
+
+} // namespace clang_fuzzer
diff --git a/clang/tools/clang-fuzzer/proto-to-cxx/loop_proto_to_cxx_main.cpp b/clang/tools/clang-fuzzer/proto-to-cxx/loop_proto_to_cxx_main.cpp
new file mode 100644
index 0000000..e4b1414
--- /dev/null
+++ b/clang/tools/clang-fuzzer/proto-to-cxx/loop_proto_to_cxx_main.cpp
@@ -0,0 +1,32 @@
+//==-- loop_proto_to_cxx_main.cpp - Driver for protobuf-C++ conversion -----==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a simple driver to print a C++ program from a protobuf with loops.
+//
+//===----------------------------------------------------------------------===//
+
+// This is a copy and will be updated later to introduce changes
+
+#include <fstream>
+#include <iostream>
+#include <streambuf>
+#include <string>
+
+#include "proto_to_cxx.h"
+
+int main(int argc, char **argv) {
+  for (int i = 1; i < argc; i++) {
+    std::fstream in(argv[i]);
+    std::string str((std::istreambuf_iterator<char>(in)),
+                    std::istreambuf_iterator<char>());
+    std::cout << "// " << argv[i] << std::endl;
+    std::cout << clang_fuzzer::LoopProtoToCxx(
+        reinterpret_cast<const uint8_t *>(str.data()), str.size());
+  }
+}
diff --git a/clang/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.h b/clang/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.h
index 1985e91..8d2e2e6 100644
--- a/clang/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.h
+++ b/clang/tools/clang-fuzzer/proto-to-cxx/proto_to_cxx.h
@@ -17,6 +17,10 @@
 
 namespace clang_fuzzer {
 class Function;
+class LoopFunction;
+
 std::string FunctionToString(const Function &input);
 std::string ProtoToCxx(const uint8_t *data, size_t size);
+std::string LoopFunctionToString(const LoopFunction &input);
+std::string LoopProtoToCxx(const uint8_t *data, size_t size);
 }