blob: 3edff66bb9d16f1f600a8d0c24ab07e930ad1b38 [file] [log] [blame]
Kostya Serebryany980e45f2018-05-31 01:27:07 +00001#!/usr/bin/env python
2#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
3#
4# The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
10# Runs the data-flow tracer several times on the same input in order to collect
11# the complete trace for all input bytes (running it on all bytes at once
12# may fail if DFSan runs out of labels).
13# Usage:
Kostya Serebryany1fd005f2018-06-06 01:23:29 +000014#
15# # Collect dataflow for one input, store it in OUTPUT (default is stdout)
16# collect_data_flow.py BINARY INPUT [OUTPUT]
17#
18# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
19# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
Kostya Serebryany980e45f2018-05-31 01:27:07 +000020#===------------------------------------------------------------------------===#
21import atexit
Kostya Serebryany1fd005f2018-06-06 01:23:29 +000022import hashlib
Kostya Serebryany980e45f2018-05-31 01:27:07 +000023import sys
24import os
25import subprocess
26import tempfile
27import shutil
28
29tmpdir = ""
30
31def cleanup(d):
Vitaly Buka4d6b33c2018-06-17 09:11:56 +000032 print("removing: %s" % d)
Kostya Serebryany980e45f2018-05-31 01:27:07 +000033 shutil.rmtree(d)
34
Kostya Serebryany1fd005f2018-06-06 01:23:29 +000035def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
Vitaly Buka4d6b33c2018-06-17 09:11:56 +000036 print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir,
37 output_dir))
Kostya Serebryany1fd005f2018-06-06 01:23:29 +000038 assert not os.path.exists(output_dir)
39 os.mkdir(output_dir)
40 for root, dirs, files in os.walk(corpus_dir):
41 for f in files:
42 path = os.path.join(root, f)
43 sha1 = hashlib.sha1(open(path).read()).hexdigest()
44 output = os.path.join(output_dir, sha1)
45 subprocess.call([self, exe, path, output])
46 functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
47 subprocess.call([exe], stdout=functions_txt)
48
49
Kostya Serebryany980e45f2018-05-31 01:27:07 +000050def main(argv):
51 exe = argv[1]
52 inp = argv[2]
Kostya Serebryany1fd005f2018-06-06 01:23:29 +000053 if os.path.isdir(inp):
54 return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
Kostya Serebryany980e45f2018-05-31 01:27:07 +000055 size = os.path.getsize(inp)
56 q = [[0, size]]
57 tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
58 atexit.register(cleanup, tmpdir)
59 print "tmpdir: ", tmpdir
60 outputs = []
61 while len(q):
62 r = q.pop()
63 print "******* Trying: ", r
64 tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
65 ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
66 if ret and r[1] - r[0] >= 2:
67 q.append([r[0], (r[1] + r[0]) / 2])
68 q.append([(r[1] + r[0]) / 2, r[1]])
69 else:
70 outputs.append(tmpfile)
71 print "******* Success: ", r
72 f = sys.stdout
73 if len(argv) >= 4:
74 f = open(argv[3], "w")
75 merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
76 subprocess.call([merge] + outputs, stdout=f)
77
78if __name__ == '__main__':
79 main(sys.argv)