Use whole corpus for regression testing (#1302)
* Use whole corpus for regression testing
* differetial fuzzing against llvm-mc
* Download corpus from another repo
diff --git a/.travis.yml b/.travis.yml
index d6909bb..8bcb54b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,9 @@
sudo: false
before_install:
- export LD_LIBRARY_PATH=`pwd`/tests/:$LD_LIBRARY_PATH
+before_script:
+ - wget https://github.com/groundx/capstonefuzz/raw/master/corpus/corpus-libFuzzer-capstone_fuzz_disasmnext-latest.zip
+ - unzip corpus-libFuzzer-capstone_fuzz_disasmnext-latest.zip -d suite/fuzz
script:
- ./make.sh
- make check
diff --git a/Makefile b/Makefile
index 93508d7..060b813 100644
--- a/Makefile
+++ b/Makefile
@@ -461,7 +461,7 @@
TESTS += test_m68k.static test_mips.static test_ppc.static test_sparc.static
TESTS += test_systemz.static test_x86.static test_xcore.static test_m680x.static
TESTS += test_skipdata test_skipdata.static test_iter.static test_evm.static
-check: $(TESTS) fuzztest
+check: $(TESTS) fuzztest fuzzallcorp
test_%:
./tests/$@ > /dev/null && echo OK || echo FAILED
@@ -470,6 +470,9 @@
fuzztest:
./suite/fuzz/fuzz_disasm $(FUZZ_INPUTS)
+fuzzallcorp:
+ ./suite/fuzz/fuzz_bindisasm suite/fuzz/corpus-libFuzzer-capstone_fuzz_disasmnext-latest/
+
$(OBJDIR)/%.o: %.c
@mkdir -p $(@D)
ifeq ($(V),0)
diff --git a/suite/fuzz/Makefile b/suite/fuzz/Makefile
index d3251ee..3370df3 100644
--- a/suite/fuzz/Makefile
+++ b/suite/fuzz/Makefile
@@ -38,14 +38,15 @@
.PHONY: all clean
-SOURCES = fuzz_disasm.c drivermc.c fuzz_harness.c
+SOURCES = fuzz_disasm.c drivermc.c fuzz_harness.c driverbin.c
OBJS = $(addprefix $(OBJDIR)/,$(SOURCES:.c=.o))
BINARY = $(addprefix $(TESTDIR)/,fuzz_disasm$(BIN_EXT))
+BINARYBIN = $(addprefix $(TESTDIR)/,fuzz_bindisasm$(BIN_EXT))
-all: $(BINARY)
+all: $(BINARY) $(BINARYBIN)
clean:
- rm -rf fuzz_harness $(OBJS) $(BINARY) $(OBJDIR)/lib$(LIBNAME).* $(OBJDIR)/$(LIBNAME).*
+ rm -rf fuzz_harness $(OBJS) $(BINARY) $(BINARYBIN) $(OBJDIR)/lib$(LIBNAME).* $(OBJDIR)/$(LIBNAME).*
$(BINARY): fuzz_disasm.o drivermc.o
@mkdir -p $(@D)
@@ -56,6 +57,15 @@
$(link-static)
endif
+$(BINARYBIN): fuzz_disasm.o driverbin.o
+ @mkdir -p $(@D)
+ifeq ($(V),0)
+ $(call log,LINK,$(notdir $@))
+ @$(link-static)
+else
+ $(link-static)
+endif
+
$(OBJDIR)/%.o: %.c
@mkdir -p $(@D)
ifeq ($(V),0)
diff --git a/suite/fuzz/driverbin.c b/suite/fuzz/driverbin.c
new file mode 100644
index 0000000..57eea2c
--- /dev/null
+++ b/suite/fuzz/driverbin.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <unistd.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+
+int main(int argc, char** argv)
+{
+ FILE * fp;
+ uint8_t Data[0x1000];
+ size_t Size;
+ DIR *d;
+ struct dirent *dir;
+ int r = 0;
+
+ if (argc != 2) {
+ return 1;
+ }
+
+ d = opendir(argv[1]);
+ if (d == NULL) {
+ printf("Invalid directory\n");
+ return 2;
+ }
+ if (chdir(argv[1]) != 0) {
+ closedir(d);
+ printf("Invalid directory\n");
+ return 2;
+ }
+
+ while((dir = readdir(d)) != NULL) {
+ //opens the file, get its size, and reads it into a buffer
+ if (dir->d_type != DT_REG) {
+ continue;
+ }
+ //printf("Running %s\n", dir->d_name);
+ fp = fopen(dir->d_name, "rb");
+ if (fp == NULL) {
+ r = 3;
+ break;
+ }
+ if (fseek(fp, 0L, SEEK_END) != 0) {
+ fclose(fp);
+ r = 4;
+ break;
+ }
+ Size = ftell(fp);
+ if (Size == (size_t) -1) {
+ fclose(fp);
+ r = 5;
+ break;
+ } else if (Size > 0x1000) {
+ fclose(fp);
+ continue;
+ }
+ if (fseek(fp, 0L, SEEK_SET) != 0) {
+ fclose(fp);
+ r = 7;
+ break;
+ }
+ if (fread(Data, Size, 1, fp) != 1) {
+ fclose(fp);
+ r = 8;
+ break;
+ }
+
+ //lauch fuzzer
+ LLVMFuzzerTestOneInput(Data, Size);
+ fclose(fp);
+ }
+ closedir(d);
+ return r;
+}
+
diff --git a/suite/fuzz/fuzz_diff.c b/suite/fuzz/fuzz_diff.c
new file mode 100644
index 0000000..f0f39fd
--- /dev/null
+++ b/suite/fuzz/fuzz_diff.c
@@ -0,0 +1,237 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include <capstone/capstone.h>
+
+
+struct platform {
+ cs_arch arch;
+ cs_mode mode;
+ char *comment;
+};
+
+FILE * outfile = NULL;
+
+struct platform platforms[] = {
+ {
+ // item 0
+ CS_ARCH_X86,
+ CS_MODE_32,
+ "X86 32 (Intel syntax)"
+ },
+ {
+ // item 1
+ CS_ARCH_X86,
+ CS_MODE_64,
+ "X86 64 (Intel syntax)"
+ },
+ {
+ // item 2
+ CS_ARCH_ARM,
+ CS_MODE_ARM,
+ "ARM"
+ },
+ {
+ // item 3
+ CS_ARCH_ARM,
+ CS_MODE_THUMB,
+ "THUMB"
+ },
+ {
+ // item 4
+ CS_ARCH_ARM,
+ (cs_mode)(CS_MODE_ARM + CS_MODE_V8),
+ "Arm-V8"
+ },
+ {
+ // item 5
+ CS_ARCH_ARM,
+ (cs_mode)(CS_MODE_THUMB+CS_MODE_V8),
+ "THUMB+V8"
+ },
+ {
+ // item 6
+ CS_ARCH_ARM,
+ (cs_mode)(CS_MODE_THUMB + CS_MODE_MCLASS),
+ "Thumb-MClass"
+ },
+ {
+ // item 7
+ CS_ARCH_ARM64,
+ (cs_mode)0,
+ "ARM-64"
+ },
+ {
+ // item 8
+ CS_ARCH_MIPS,
+ (cs_mode)(CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN),
+ "MIPS-32 (Big-endian)"
+ },
+ {
+ // item 9
+ CS_ARCH_MIPS,
+ (cs_mode)(CS_MODE_MIPS32 + CS_MODE_MICRO),
+ "MIPS-32 (micro)"
+ },
+ {
+ //item 10
+ CS_ARCH_MIPS,
+ CS_MODE_MIPS64,
+ "MIPS-64-EL (Little-endian)"
+ },
+ {
+ //item 11
+ CS_ARCH_MIPS,
+ CS_MODE_MIPS32,
+ "MIPS-32-EL (Little-endian)"
+ },
+ {
+ //item 12
+ CS_ARCH_MIPS,
+ (cs_mode)(CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN),
+ "MIPS-64 (Big-endian)"
+ },
+ {
+ //item 13
+ CS_ARCH_MIPS,
+ (cs_mode)(CS_MODE_MIPS32 + CS_MODE_MICRO + CS_MODE_BIG_ENDIAN),
+ "MIPS-32 | Micro (Big-endian)"
+ },
+ {
+ //item 14
+ CS_ARCH_PPC,
+ CS_MODE_BIG_ENDIAN,
+ "PPC-64"
+ },
+ {
+ //item 15
+ CS_ARCH_SPARC,
+ CS_MODE_BIG_ENDIAN,
+ "Sparc"
+ },
+ {
+ //item 16
+ CS_ARCH_SPARC,
+ (cs_mode)(CS_MODE_BIG_ENDIAN + CS_MODE_V9),
+ "SparcV9"
+ },
+ {
+ //item 17
+ CS_ARCH_SYSZ,
+ (cs_mode)0,
+ "SystemZ"
+ },
+ {
+ //item 18
+ CS_ARCH_XCORE,
+ (cs_mode)0,
+ "XCore"
+ },
+ {
+ //item 19
+ CS_ARCH_MIPS,
+ (cs_mode)(CS_MODE_MIPS32R6 + CS_MODE_BIG_ENDIAN),
+ "MIPS-32R6 (Big-endian)"
+ },
+ {
+ //item 20
+ CS_ARCH_MIPS,
+ (cs_mode)(CS_MODE_MIPS32R6 + CS_MODE_MICRO + CS_MODE_BIG_ENDIAN),
+ "MIPS-32R6 (Micro+Big-endian)"
+ },
+ {
+ //item 21
+ CS_ARCH_MIPS,
+ CS_MODE_MIPS32R6,
+ "MIPS-32R6 (Little-endian)"
+ },
+ {
+ //item 22
+ CS_ARCH_MIPS,
+ (cs_mode)(CS_MODE_MIPS32R6 + CS_MODE_MICRO),
+ "MIPS-32R6 (Micro+Little-endian)"
+ },
+ {
+ //item 23
+ CS_ARCH_M68K,
+ (cs_mode)0,
+ "M68K"
+ },
+ {
+ //item 24
+ CS_ARCH_M680X,
+ (cs_mode)CS_MODE_M680X_6809,
+ "M680X_M6809"
+ },
+ {
+ //item 25
+ CS_ARCH_EVM,
+ (cs_mode)0,
+ "EVM"
+ },
+};
+
+void LLVMFuzzerInit();
+int LLVMFuzzerReturnOneInput(const uint8_t *Data, size_t Size, char * AssemblyText);
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ csh handle;
+ cs_insn *insn;
+ cs_err err;
+ const uint8_t **Datap = &Data;
+ size_t * Sizep = &Size;
+ uint64_t address = 0x1000;
+ char LLVMAssemblyText[80];
+ char CapstoneAssemblyText[80];
+
+ if (Size < 1) {
+ // 1 byte for arch choice
+ return 0;
+ } else if (Size > 0x1000) {
+ //limit input to 4kb
+ Size = 0x1000;
+ }
+ if (outfile == NULL) {
+ // we compute the output
+ outfile = fopen("/dev/null", "w");
+ if (outfile == NULL) {
+ return 0;
+ }
+ LLVMFuzzerInit();
+ }
+
+ if (Data[0] >= sizeof(platforms)/sizeof(platforms[0])) {
+ return 0;
+ }
+
+ if (LLVMFuzzerReturnOneInput(Data, Size, LLVMAssemblyText) == 1) {
+ return 0;
+ }
+
+ err = cs_open(platforms[Data[0]].arch, platforms[Data[0]].mode, &handle);
+ if (err) {
+ return 0;
+ }
+
+ insn = cs_malloc(handle);
+ Data++;
+ Size--;
+ assert(insn);
+ if (cs_disasm_iter(handle, Datap, Sizep, &address, insn)) {
+ snprintf(CapstoneAssemblyText, 80, "\t%s\t%s", insn->mnemonic, insn->op_str);
+ if (strcmp(CapstoneAssemblyText, LLVMAssemblyText) != 0) {
+ printf("capstone %s != llvm %s", CapstoneAssemblyText, LLVMAssemblyText);
+ abort();
+ }
+ } else {
+ printf("capstone failed with llvm %s", LLVMAssemblyText);
+ abort();
+ }
+ cs_free(insn, 1);
+ cs_close(&handle);
+
+ return 0;
+}
diff --git a/suite/fuzz/fuzz_llvm.cpp b/suite/fuzz/fuzz_llvm.cpp
new file mode 100644
index 0000000..7e713cb
--- /dev/null
+++ b/suite/fuzz/fuzz_llvm.cpp
@@ -0,0 +1,41 @@
+#include "llvm-c/Disassembler.h"
+#include "llvm-c/Target.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+extern "C" void LLVMFuzzerInit() {
+ LLVMInitializeAllTargetInfos();
+ LLVMInitializeAllTargetMCs();
+ LLVMInitializeAllDisassemblers();
+}
+
+
+extern "C" int LLVMFuzzerReturnOneInput(const uint8_t *Data, size_t Size, char * AssemblyText) {
+ LLVMDisasmContextRef Ctx;
+ std::vector<uint8_t> DataCopy(Data, Data + Size);
+ uint8_t *p = DataCopy.data();
+ int r = 1;
+
+ switch(Data[0]) {
+ case 0:
+ Ctx = LLVMCreateDisasmCPUFeatures("i386", "", "", nullptr, 0, nullptr, nullptr);
+ if (LLVMSetDisasmOptions(Ctx, LLVMDisassembler_Option_AsmPrinterVariant) == 0) {
+ abort();
+ }
+ break;
+ //TODO other cases
+ default:
+ return 1;
+ }
+ assert(Ctx);
+
+ if (LLVMDisasmInstruction(Ctx, p+1, Size-1, 0, AssemblyText, 80) > 0) {
+ r = 0;
+ }
+ LLVMDisasmDispose(Ctx);
+
+ return r;
+}