Use whole corpus for regression testing (#1302)

* Use whole corpus for regression testing

* differetial fuzzing against llvm-mc

* Download corpus from another repo
diff --git a/.travis.yml b/.travis.yml
index d6909bb..8bcb54b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,9 @@
 sudo: false
 before_install:
         - export LD_LIBRARY_PATH=`pwd`/tests/:$LD_LIBRARY_PATH
+before_script:
+        - wget https://github.com/groundx/capstonefuzz/raw/master/corpus/corpus-libFuzzer-capstone_fuzz_disasmnext-latest.zip
+        - unzip corpus-libFuzzer-capstone_fuzz_disasmnext-latest.zip -d suite/fuzz
 script:
         - ./make.sh
         - make check
diff --git a/Makefile b/Makefile
index 93508d7..060b813 100644
--- a/Makefile
+++ b/Makefile
@@ -461,7 +461,7 @@
 TESTS += test_m68k.static test_mips.static test_ppc.static test_sparc.static
 TESTS += test_systemz.static test_x86.static test_xcore.static test_m680x.static
 TESTS += test_skipdata test_skipdata.static test_iter.static test_evm.static
-check: $(TESTS) fuzztest
+check: $(TESTS) fuzztest fuzzallcorp
 test_%:
 	./tests/$@ > /dev/null && echo OK || echo FAILED
 
@@ -470,6 +470,9 @@
 fuzztest:
 	./suite/fuzz/fuzz_disasm $(FUZZ_INPUTS)
 
+fuzzallcorp:
+	./suite/fuzz/fuzz_bindisasm suite/fuzz/corpus-libFuzzer-capstone_fuzz_disasmnext-latest/
+
 $(OBJDIR)/%.o: %.c
 	@mkdir -p $(@D)
 ifeq ($(V),0)
diff --git a/suite/fuzz/Makefile b/suite/fuzz/Makefile
index d3251ee..3370df3 100644
--- a/suite/fuzz/Makefile
+++ b/suite/fuzz/Makefile
@@ -38,14 +38,15 @@
 
 .PHONY: all clean
 
-SOURCES = fuzz_disasm.c drivermc.c fuzz_harness.c
+SOURCES = fuzz_disasm.c drivermc.c fuzz_harness.c driverbin.c
 OBJS = $(addprefix $(OBJDIR)/,$(SOURCES:.c=.o))
 BINARY = $(addprefix $(TESTDIR)/,fuzz_disasm$(BIN_EXT))
+BINARYBIN = $(addprefix $(TESTDIR)/,fuzz_bindisasm$(BIN_EXT))
 
-all: $(BINARY)
+all: $(BINARY) $(BINARYBIN)
 
 clean:
-	rm -rf fuzz_harness $(OBJS) $(BINARY) $(OBJDIR)/lib$(LIBNAME).* $(OBJDIR)/$(LIBNAME).*
+	rm -rf fuzz_harness $(OBJS) $(BINARY) $(BINARYBIN) $(OBJDIR)/lib$(LIBNAME).* $(OBJDIR)/$(LIBNAME).*
 
 $(BINARY): fuzz_disasm.o drivermc.o
 	@mkdir -p $(@D)
@@ -56,6 +57,15 @@
 	$(link-static)
 endif
 
+$(BINARYBIN): fuzz_disasm.o driverbin.o
+	@mkdir -p $(@D)
+ifeq ($(V),0)
+	$(call log,LINK,$(notdir $@))
+	@$(link-static)
+else
+	$(link-static)
+endif
+
 $(OBJDIR)/%.o: %.c
 	@mkdir -p $(@D)
 ifeq ($(V),0)
diff --git a/suite/fuzz/driverbin.c b/suite/fuzz/driverbin.c
new file mode 100644
index 0000000..57eea2c
--- /dev/null
+++ b/suite/fuzz/driverbin.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <unistd.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+
+int main(int argc, char** argv)
+{
+    FILE * fp;
+    uint8_t Data[0x1000];
+    size_t Size;
+    DIR *d;
+    struct dirent *dir;
+    int r = 0;
+
+    if (argc != 2) {
+        return 1;
+    }
+
+    d = opendir(argv[1]);
+    if (d == NULL) {
+        printf("Invalid directory\n");
+        return 2;
+    }
+    if (chdir(argv[1]) != 0) {
+        closedir(d);
+        printf("Invalid directory\n");
+        return 2;
+    }
+
+    while((dir = readdir(d)) != NULL) {
+        //opens the file, get its size, and reads it into a buffer
+        if (dir->d_type != DT_REG) {
+            continue;
+        }
+        //printf("Running %s\n", dir->d_name);
+        fp = fopen(dir->d_name, "rb");
+        if (fp == NULL) {
+            r = 3;
+            break;
+        }
+        if (fseek(fp, 0L, SEEK_END) != 0) {
+            fclose(fp);
+            r = 4;
+            break;
+        }
+        Size = ftell(fp);
+        if (Size == (size_t) -1) {
+            fclose(fp);
+            r = 5;
+            break;
+        } else if (Size > 0x1000) {
+            fclose(fp);
+            continue;
+        }
+        if (fseek(fp, 0L, SEEK_SET) != 0) {
+            fclose(fp);
+            r = 7;
+            break;
+        }
+        if (fread(Data, Size, 1, fp) != 1) {
+            fclose(fp);
+            r = 8;
+            break;
+        }
+
+        //lauch fuzzer
+        LLVMFuzzerTestOneInput(Data, Size);
+        fclose(fp);
+    }
+    closedir(d);
+    return r;
+}
+
diff --git a/suite/fuzz/fuzz_diff.c b/suite/fuzz/fuzz_diff.c
new file mode 100644
index 0000000..f0f39fd
--- /dev/null
+++ b/suite/fuzz/fuzz_diff.c
@@ -0,0 +1,237 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include <capstone/capstone.h>
+
+
+struct platform {
+    cs_arch arch;
+    cs_mode mode;
+    char *comment;
+};
+
+FILE * outfile = NULL;
+
+struct platform platforms[] = {
+    {
+        // item 0
+        CS_ARCH_X86,
+        CS_MODE_32,
+        "X86 32 (Intel syntax)"
+    },
+    {
+        // item 1
+        CS_ARCH_X86,
+        CS_MODE_64,
+        "X86 64 (Intel syntax)"
+    },
+    {
+        // item 2
+        CS_ARCH_ARM,
+        CS_MODE_ARM,
+        "ARM"
+    },
+    {
+        // item 3
+        CS_ARCH_ARM,
+        CS_MODE_THUMB,
+        "THUMB"
+    },
+    {
+        // item 4
+        CS_ARCH_ARM,
+        (cs_mode)(CS_MODE_ARM + CS_MODE_V8),
+        "Arm-V8"
+    },
+    {
+        // item 5
+        CS_ARCH_ARM,
+        (cs_mode)(CS_MODE_THUMB+CS_MODE_V8),
+        "THUMB+V8"
+    },
+    {
+        // item 6
+        CS_ARCH_ARM,
+        (cs_mode)(CS_MODE_THUMB + CS_MODE_MCLASS),
+        "Thumb-MClass"
+    },
+    {
+        // item 7
+        CS_ARCH_ARM64,
+        (cs_mode)0,
+        "ARM-64"
+    },
+    {
+        // item 8
+        CS_ARCH_MIPS,
+        (cs_mode)(CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN),
+        "MIPS-32 (Big-endian)"
+    },
+    {
+        // item 9
+        CS_ARCH_MIPS,
+        (cs_mode)(CS_MODE_MIPS32 + CS_MODE_MICRO),
+        "MIPS-32 (micro)"
+    },
+    {
+        //item 10
+        CS_ARCH_MIPS,
+        CS_MODE_MIPS64,
+        "MIPS-64-EL (Little-endian)"
+    },
+    {
+        //item 11
+        CS_ARCH_MIPS,
+        CS_MODE_MIPS32,
+        "MIPS-32-EL (Little-endian)"
+    },
+    {
+        //item 12
+        CS_ARCH_MIPS,
+        (cs_mode)(CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN),
+        "MIPS-64 (Big-endian)"
+    },
+    {
+        //item 13
+        CS_ARCH_MIPS,
+        (cs_mode)(CS_MODE_MIPS32 + CS_MODE_MICRO + CS_MODE_BIG_ENDIAN),
+        "MIPS-32 | Micro (Big-endian)"
+    },
+    {
+        //item 14
+        CS_ARCH_PPC,
+        CS_MODE_BIG_ENDIAN,
+        "PPC-64"
+    },
+    {
+        //item 15
+        CS_ARCH_SPARC,
+        CS_MODE_BIG_ENDIAN,
+        "Sparc"
+    },
+    {
+        //item 16
+        CS_ARCH_SPARC,
+        (cs_mode)(CS_MODE_BIG_ENDIAN + CS_MODE_V9),
+        "SparcV9"
+    },
+    {
+        //item 17
+        CS_ARCH_SYSZ,
+        (cs_mode)0,
+        "SystemZ"
+    },
+    {
+        //item 18
+        CS_ARCH_XCORE,
+        (cs_mode)0,
+        "XCore"
+    },
+    {
+        //item 19
+        CS_ARCH_MIPS,
+        (cs_mode)(CS_MODE_MIPS32R6 + CS_MODE_BIG_ENDIAN),
+        "MIPS-32R6 (Big-endian)"
+    },
+    {
+        //item 20
+        CS_ARCH_MIPS,
+        (cs_mode)(CS_MODE_MIPS32R6 + CS_MODE_MICRO + CS_MODE_BIG_ENDIAN),
+        "MIPS-32R6 (Micro+Big-endian)"
+    },
+    {
+        //item 21
+        CS_ARCH_MIPS,
+        CS_MODE_MIPS32R6,
+        "MIPS-32R6 (Little-endian)"
+    },
+    {
+        //item 22
+        CS_ARCH_MIPS,
+        (cs_mode)(CS_MODE_MIPS32R6 + CS_MODE_MICRO),
+        "MIPS-32R6 (Micro+Little-endian)"
+    },
+    {
+        //item 23
+        CS_ARCH_M68K,
+        (cs_mode)0,
+        "M68K"
+    },
+    {
+        //item 24
+        CS_ARCH_M680X,
+        (cs_mode)CS_MODE_M680X_6809,
+        "M680X_M6809"
+    },
+    {
+        //item 25
+        CS_ARCH_EVM,
+        (cs_mode)0,
+        "EVM"
+    },
+};
+
+void LLVMFuzzerInit();
+int LLVMFuzzerReturnOneInput(const uint8_t *Data, size_t Size, char * AssemblyText);
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+    csh handle;
+    cs_insn *insn;
+    cs_err err;
+    const uint8_t **Datap = &Data;
+    size_t * Sizep = &Size;
+    uint64_t address = 0x1000;
+    char LLVMAssemblyText[80];
+    char CapstoneAssemblyText[80];
+
+    if (Size < 1) {
+        // 1 byte for arch choice
+        return 0;
+    } else if (Size > 0x1000) {
+        //limit input to 4kb
+        Size = 0x1000;
+    }
+    if (outfile == NULL) {
+        // we compute the output
+        outfile = fopen("/dev/null", "w");
+        if (outfile == NULL) {
+            return 0;
+        }
+        LLVMFuzzerInit();
+    }
+
+    if (Data[0] >= sizeof(platforms)/sizeof(platforms[0])) {
+        return 0;
+    }
+
+    if (LLVMFuzzerReturnOneInput(Data, Size, LLVMAssemblyText) == 1) {
+        return 0;
+    }
+
+    err = cs_open(platforms[Data[0]].arch, platforms[Data[0]].mode, &handle);
+    if (err) {
+        return 0;
+    }
+
+    insn = cs_malloc(handle);
+    Data++;
+    Size--;
+    assert(insn);
+        if (cs_disasm_iter(handle, Datap, Sizep, &address, insn)) {
+            snprintf(CapstoneAssemblyText, 80, "\t%s\t%s", insn->mnemonic, insn->op_str);
+            if (strcmp(CapstoneAssemblyText, LLVMAssemblyText) != 0) {
+                printf("capstone %s != llvm %s", CapstoneAssemblyText, LLVMAssemblyText);
+                abort();
+            }
+        } else {
+            printf("capstone failed with llvm %s", LLVMAssemblyText);
+            abort();
+        }
+    cs_free(insn, 1);
+    cs_close(&handle);
+
+    return 0;
+}
diff --git a/suite/fuzz/fuzz_llvm.cpp b/suite/fuzz/fuzz_llvm.cpp
new file mode 100644
index 0000000..7e713cb
--- /dev/null
+++ b/suite/fuzz/fuzz_llvm.cpp
@@ -0,0 +1,41 @@
+#include "llvm-c/Disassembler.h"
+#include "llvm-c/Target.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+extern "C" void LLVMFuzzerInit() {
+    LLVMInitializeAllTargetInfos();
+    LLVMInitializeAllTargetMCs();
+    LLVMInitializeAllDisassemblers();
+}
+
+
+extern "C" int LLVMFuzzerReturnOneInput(const uint8_t *Data, size_t Size, char * AssemblyText) {
+    LLVMDisasmContextRef Ctx;
+    std::vector<uint8_t> DataCopy(Data, Data + Size);
+    uint8_t *p = DataCopy.data();
+    int r = 1;
+
+    switch(Data[0]) {
+        case 0:
+            Ctx = LLVMCreateDisasmCPUFeatures("i386", "", "", nullptr, 0, nullptr, nullptr);
+            if (LLVMSetDisasmOptions(Ctx, LLVMDisassembler_Option_AsmPrinterVariant) == 0) {
+                abort();
+            }
+            break;
+            //TODO other cases
+        default:
+            return 1;
+    }
+    assert(Ctx);
+
+    if (LLVMDisasmInstruction(Ctx, p+1, Size-1, 0, AssemblyText, 80) > 0) {
+        r = 0;
+    }
+    LLVMDisasmDispose(Ctx);
+
+    return r;
+}