Continuous integration for fuzzing (#1297)

* Continuous integration for fuzzing

* Simplify fuzz testing output

* Makefile for suite fuzz

* fixup

* Code review taken into acount

* More readable fuzz harness

Inputs specify only on first line the mode
diff --git a/.travis.yml b/.travis.yml
index aa8f5a1..d6909bb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,10 +7,24 @@
         - make check
         - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then cp libcapstone.so.* bindings/python/libcapstone.so; fi
         - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then cp libcapstone.*.dylib bindings/python/libcapstone.dylib; fi
-        - cd bindings/python && make check
+        - if [[ "$NOPYTEST" != "true" ]]; then cd bindings/python && make check; fi
 compiler:
         - clang
         - gcc
 os:
         - linux
         - osx
+matrix:
+    include:
+        - name: fuzza
+          env: ASAN_OPTIONS=detect_leaks=0 CXXFLAGS="-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address" CFLAGS="-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address" LDFLAGS="-fsanitize=address" NOPYTEST=true
+          compiler: clang
+          os: linux
+        - name: fuzzm
+          env: CXXFLAGS="-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=memory" CFLAGS="-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=memory" LDFLAGS="-fsanitize=memory" NOPYTEST=true
+          compiler: clang
+          os: linux
+        - name: fuzzu
+          env: CXXFLAGS="-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=undefined" CFLAGS="-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=undefined -fno-sanitize-recover=undefined,integer" LDFLAGS="-fsanitize=undefined" NOPYTEST=true
+          compiler: clang
+          os: linux
diff --git a/Makefile b/Makefile
index a77247d..93508d7 100644
--- a/Makefile
+++ b/Makefile
@@ -349,8 +349,10 @@
 	@V=$(V) CC=$(CC) $(MAKE) -C cstool
 ifndef BUILDDIR
 	$(MAKE) -C tests
+	$(MAKE) -C suite/fuzz
 else
 	$(MAKE) -C tests BUILDDIR=$(BLDIR)
+	$(MAKE) -C suite/fuzz BUILDDIR=$(BLDIR)
 endif
 	$(call install-library,$(BLDIR)/tests/)
 endif
@@ -426,6 +428,7 @@
 
 ifeq (,$(findstring yes,$(CAPSTONE_BUILD_CORE_ONLY)))
 	$(MAKE) -C tests clean
+	$(MAKE) -C suite/fuzz clean
 	rm -f $(BLDIR)/tests/lib$(LIBNAME).$(EXT)
 endif
 
@@ -458,10 +461,15 @@
 TESTS += test_m68k.static test_mips.static test_ppc.static test_sparc.static
 TESTS += test_systemz.static test_x86.static test_xcore.static test_m680x.static
 TESTS += test_skipdata test_skipdata.static test_iter.static test_evm.static
-check: $(TESTS)
+check: $(TESTS) fuzztest
 test_%:
 	./tests/$@ > /dev/null && echo OK || echo FAILED
 
+FUZZ_INPUTS = $(shell find suite/MC -type f -name '*.cs')
+
+fuzztest:
+	./suite/fuzz/fuzz_disasm $(FUZZ_INPUTS)
+
 $(OBJDIR)/%.o: %.c
 	@mkdir -p $(@D)
 ifeq ($(V),0)
diff --git a/suite/fuzz/Makefile b/suite/fuzz/Makefile
index d890783..d3251ee 100644
--- a/suite/fuzz/Makefile
+++ b/suite/fuzz/Makefile
@@ -1,10 +1,75 @@
+# Capstone Disassembler Engine
+# By Philippe Antoine <contact@catenacyber.fr>, 2018
+
+include ../../config.mk
+include ../../functions.mk
+
+ifneq ($(CAPSTONE_STATIC),yes)
+$(error Needs static capstone.)
+endif
+
+# Verbose output?
+V ?= 0
+
+INCDIR = ../../include
+ifndef BUILDDIR
+TESTDIR = .
+OBJDIR = .
+LIBDIR = ../..
+else
+TESTDIR = $(BUILDDIR)/tests
+OBJDIR = $(BUILDDIR)/obj/tests
+LIBDIR = $(BUILDDIR)
+endif
+
+CFLAGS += -Wall -I$(INCDIR)
+LDFLAGS += -L$(LIBDIR)
+
+CFLAGS += $(foreach arch,$(LIBARCHS),-arch $(arch))
+LDFLAGS += $(foreach arch,$(LIBARCHS),-arch $(arch))
+
 LIBNAME = capstone
 
+BIN_EXT =
+AR_EXT = a
+
+
+ARCHIVE = $(LIBDIR)/lib$(LIBNAME).$(AR_EXT)
+
+.PHONY: all clean
+
+SOURCES = fuzz_disasm.c drivermc.c fuzz_harness.c
+OBJS = $(addprefix $(OBJDIR)/,$(SOURCES:.c=.o))
+BINARY = $(addprefix $(TESTDIR)/,fuzz_disasm$(BIN_EXT))
+
+all: $(BINARY)
+
+clean:
+	rm -rf fuzz_harness $(OBJS) $(BINARY) $(OBJDIR)/lib$(LIBNAME).* $(OBJDIR)/$(LIBNAME).*
+
+$(BINARY): fuzz_disasm.o drivermc.o
+	@mkdir -p $(@D)
+ifeq ($(V),0)
+	$(call log,LINK,$(notdir $@))
+	@$(link-static)
+else
+	$(link-static)
+endif
+
+$(OBJDIR)/%.o: %.c
+	@mkdir -p $(@D)
+ifeq ($(V),0)
+	$(call log,CC,$(@:$(OBJDIR)/%=%))
+	@$(compile)
+else
+	$(compile)
+endif
+
+
+
+define link-static
+	$(CC) $(LDFLAGS) $^ $(ARCHIVE) -o $@
+endef
+
 fuzz_harness: fuzz_harness.o
 	${CC} $< -O3 -Wall -l$(LIBNAME) -o $@
-
-%.o: %.c
-	${CC} -c -I../../include/capstone $< -o $@
-
-clean:
-	rm -rf *.o fuzz_harness
diff --git a/suite/fuzz/drivermc.c b/suite/fuzz/drivermc.c
new file mode 100644
index 0000000..a6a0163
--- /dev/null
+++ b/suite/fuzz/drivermc.c
@@ -0,0 +1,130 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+
+#define MAX_INSTR_SIZE 64
+#define MAX_LINE_SIZE 128
+
+int main(int argc, char** argv)
+{
+    FILE * fp;
+    uint8_t Data[MAX_INSTR_SIZE];
+    char line[MAX_LINE_SIZE];
+    size_t Size;
+    char arch[MAX_LINE_SIZE];
+    char mode[MAX_LINE_SIZE];
+    unsigned int value;
+    int i;
+
+    if (argc < 2) {
+        return 1;
+    }
+    for (i = 1; i < argc; i++) {
+        //opens the file, get its size, and reads it into a buffer
+        fp = fopen(argv[i], "rb");
+        if (fp == NULL) {
+            return 2;
+        }
+        printf("Trying %s\n", argv[i]);
+        if (fgets(line, MAX_LINE_SIZE, fp) == NULL) {
+            break;
+        }
+        if (line[0] == '#') {
+            if (sscanf(line, "# %[^,], %[^,]", arch, mode) != 2) {
+                printf("Wrong mode %s\n", line);
+                return 1;
+            }
+            if (strcmp(arch, "CS_ARCH_X86") == 0 && strcmp(mode, "CS_MODE_32") == 0) {
+                Data[0] = 0;
+            } else if (strcmp(arch, "CS_ARCH_X86") == 0 && strcmp(mode, "CS_MODE_64") == 0) {
+                Data[0] = 1;
+            } else if (strcmp(arch, "CS_ARCH_ARM") == 0 && strcmp(mode, "CS_MODE_ARM") == 0) {
+                Data[0] = 2;
+            } else if (strcmp(arch, "CS_ARCH_ARM") == 0 && strcmp(mode, "CS_MODE_THUMB") == 0) {
+                Data[0] = 3;
+            } else if (strcmp(arch, "CS_ARCH_ARM") == 0 && strcmp(mode, "CS_MODE_ARM+CS_MODE_V8") == 0) {
+                Data[0] = 4;
+            } else if (strcmp(arch, "CS_ARCH_ARM") == 0 && strcmp(mode, "CS_MODE_THUMB+CS_MODE_V8") == 0) {
+                Data[0] = 5;
+            } else if (strcmp(arch, "CS_ARCH_ARM") == 0 && strcmp(mode, "CS_MODE_THUMB+CS_MODE_MCLASS") == 0) {
+                Data[0] = 6;
+            } else if (strcmp(arch, "CS_ARCH_ARM64") == 0 && strcmp(mode, "0") == 0) {
+                Data[0] = 7;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN") == 0) {
+                Data[0] = 8;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32+CS_MODE_MICRO") == 0) {
+                Data[0] = 9;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS64") == 0) {
+                Data[0] = 10;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32") == 0) {
+                Data[0] = 11;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN") == 0) {
+                Data[0] = 12;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN") == 0) {
+                Data[0] = 13;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO") == 0) {
+                Data[0] = 13;
+            } else if (strcmp(arch, "CS_ARCH_PPC") == 0 && strcmp(mode, "CS_MODE_BIG_ENDIAN") == 0) {
+                Data[0] = 14;
+            } else if (strcmp(arch, "CS_ARCH_SPARC") == 0 && strcmp(mode, "CS_MODE_BIG_ENDIAN") == 0) {
+                Data[0] = 15;
+            } else if (strcmp(arch, "CS_ARCH_SPARC") == 0 && strcmp(mode, "CS_MODE_BIG_ENDIAN + CS_MODE_V9") == 0) {
+                Data[0] = 16;
+            } else if (strcmp(arch, "CS_ARCH_SYSZ") == 0 && strcmp(mode, "0") == 0) {
+                Data[0] = 17;
+            } else if (strcmp(arch, "CS_ARCH_XCORE") == 0 && strcmp(mode, "0") == 0) {
+                Data[0] = 18;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32R6+CS_MODE_BIG_ENDIAN") == 0) {
+                Data[0] = 19;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32R6+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN") == 0) {
+                Data[0] = 20;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32R6") == 0) {
+                Data[0] = 21;
+            } else if (strcmp(arch, "CS_ARCH_MIPS") == 0 && strcmp(mode, "CS_MODE_MIPS32R6+CS_MODE_MICRO") == 0) {
+                Data[0] = 22;
+            } else if (strcmp(arch, "CS_ARCH_M68K") == 0 && strcmp(mode, "0") == 0) {
+                Data[0] = 23;
+            } else if (strcmp(arch, "CS_ARCH_M680X") == 0 && strcmp(mode, "CS_MODE_M680X_6809") == 0) {
+                Data[0] = 24;
+            } else if (strcmp(arch, "CS_ARCH_EVM") == 0 && strcmp(mode, "0") == 0) {
+                Data[0] = 25;
+            } else {
+                printf("Unknown mode\n");
+                //fail instead of continue
+                return 1;
+            }
+        } else {
+            printf("No mode\n");
+            //fail instead of continue
+            return 1;
+        }
+
+        while(1) {
+            if (fgets(line, MAX_LINE_SIZE, fp) == NULL) {
+                break;
+            }
+            Size = 1;
+            // we start line at offset 0 and Data buffer at offset 1
+            // since Data[0] is option : arch + mode
+            while (sscanf(line+(Size-1)*5, "0x%02x", &value) == 1) {
+                Data[Size] = value;
+                Size++;
+                if (line[(Size-1)*5-1] != ',') {
+                    //end of pattern
+                    break;
+                } else if (MAX_LINE_SIZE < (Size-1)*5) {
+                    printf("Line overflow\n");
+                    return 1;
+                }
+            }
+            //lauch fuzzer
+            LLVMFuzzerTestOneInput(Data, Size);
+        }
+        fclose(fp);
+    }
+    return 0;
+}
+