Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Merge or print the coverage data collected by asan's coverage. |
| 3 | # Input files are sequences of 4-byte integers. |
| 4 | # We need to merge these integers into a set and then |
| 5 | # either print them (as hex) or dump them into another file. |
| 6 | import array |
Timur Iskhodzhanov | 882bc56 | 2015-04-01 14:46:10 +0000 | [diff] [blame] | 7 | import bisect |
| 8 | import glob |
| 9 | import os.path |
Sergey Matveev | 6cb47a08 | 2014-05-19 12:53:03 +0000 | [diff] [blame] | 10 | import struct |
Sergey Matveev | c2de346 | 2015-05-06 20:48:29 +0000 | [diff] [blame] | 11 | import subprocess |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 12 | import sys |
| 13 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 14 | prog_name = "" |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 15 | |
| 16 | def Usage(): |
| 17 | print >> sys.stderr, "Usage: \n" + \ |
Sergey Matveev | c2de346 | 2015-05-06 20:48:29 +0000 | [diff] [blame] | 18 | " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \ |
| 19 | " " + prog_name + " print FILE [FILE...]\n" \ |
| 20 | " " + prog_name + " unpack FILE [FILE...]\n" \ |
| 21 | " " + prog_name + " rawunpack FILE [FILE ...]\n" \ |
| 22 | " " + prog_name + " missing BINARY < LIST_OF_PCS\n" |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 23 | exit(1) |
| 24 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 25 | def CheckBits(bits): |
| 26 | if bits != 32 and bits != 64: |
Bill Seurer | 92760a3 | 2015-03-25 14:56:02 +0000 | [diff] [blame] | 27 | raise Exception("Wrong bitness: %d" % bits) |
Kostya Serebryany | 9f1243e | 2015-03-17 22:09:19 +0000 | [diff] [blame] | 28 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 29 | def TypeCodeForBits(bits): |
| 30 | CheckBits(bits) |
| 31 | return 'L' if bits == 64 else 'I' |
| 32 | |
Sagar Thakur | b1c51f6 | 2016-04-22 09:20:22 +0000 | [diff] [blame] | 33 | def TypeCodeForStruct(bits): |
| 34 | CheckBits(bits) |
| 35 | return 'Q' if bits == 64 else 'I' |
| 36 | |
Kostya Serebryany | eaec5b6 | 2015-03-19 19:52:30 +0000 | [diff] [blame] | 37 | kMagic32SecondHalf = 0xFFFFFF32; |
| 38 | kMagic64SecondHalf = 0xFFFFFF64; |
| 39 | kMagicFirstHalf = 0xC0BFFFFF; |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 40 | |
| 41 | def MagicForBits(bits): |
| 42 | CheckBits(bits) |
Bill Seurer | 92760a3 | 2015-03-25 14:56:02 +0000 | [diff] [blame] | 43 | if sys.byteorder == 'little': |
| 44 | return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf] |
| 45 | else: |
| 46 | return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf] |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 47 | |
Kostya Serebryany | dcb54db | 2015-03-19 21:01:27 +0000 | [diff] [blame] | 48 | def ReadMagicAndReturnBitness(f, path): |
Kostya Serebryany | eaec5b6 | 2015-03-19 19:52:30 +0000 | [diff] [blame] | 49 | magic_bytes = f.read(8) |
| 50 | magic_words = struct.unpack('II', magic_bytes); |
| 51 | bits = 0 |
Bill Seurer | 92760a3 | 2015-03-25 14:56:02 +0000 | [diff] [blame] | 52 | idx = 1 if sys.byteorder == 'little' else 0 |
| 53 | if magic_words[idx] == kMagicFirstHalf: |
| 54 | if magic_words[1-idx] == kMagic64SecondHalf: |
Kostya Serebryany | eaec5b6 | 2015-03-19 19:52:30 +0000 | [diff] [blame] | 55 | bits = 64 |
Bill Seurer | 92760a3 | 2015-03-25 14:56:02 +0000 | [diff] [blame] | 56 | elif magic_words[1-idx] == kMagic32SecondHalf: |
Kostya Serebryany | eaec5b6 | 2015-03-19 19:52:30 +0000 | [diff] [blame] | 57 | bits = 32 |
| 58 | if bits == 0: |
Kostya Serebryany | dcb54db | 2015-03-19 21:01:27 +0000 | [diff] [blame] | 59 | raise Exception('Bad magic word in %s' % path) |
Kostya Serebryany | eaec5b6 | 2015-03-19 19:52:30 +0000 | [diff] [blame] | 60 | return bits |
| 61 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 62 | def ReadOneFile(path): |
Sergey Matveev | 6cb47a08 | 2014-05-19 12:53:03 +0000 | [diff] [blame] | 63 | with open(path, mode="rb") as f: |
| 64 | f.seek(0, 2) |
| 65 | size = f.tell() |
| 66 | f.seek(0, 0) |
Kostya Serebryany | dcb54db | 2015-03-19 21:01:27 +0000 | [diff] [blame] | 67 | if size < 8: |
| 68 | raise Exception('File %s is short (< 8 bytes)' % path) |
| 69 | bits = ReadMagicAndReturnBitness(f, path) |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 70 | size -= 8 |
Sagar Thakur | b1c51f6 | 2016-04-22 09:20:22 +0000 | [diff] [blame] | 71 | s = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size)) |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 72 | print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path) |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 73 | return s |
| 74 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 75 | def Merge(files): |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 76 | s = set() |
| 77 | for f in files: |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 78 | s = s.union(set(ReadOneFile(f))) |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 79 | print >> sys.stderr, "%s: %d files merged; %d PCs total" % \ |
| 80 | (prog_name, len(files), len(s)) |
| 81 | return sorted(s) |
| 82 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 83 | def PrintFiles(files): |
Kostya Serebryany | cba49d4 | 2015-03-18 00:23:44 +0000 | [diff] [blame] | 84 | if len(files) > 1: |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 85 | s = Merge(files) |
Kostya Serebryany | cba49d4 | 2015-03-18 00:23:44 +0000 | [diff] [blame] | 86 | else: # If there is just on file, print the PCs in order. |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 87 | s = ReadOneFile(files[0]) |
Kostya Serebryany | dcb54db | 2015-03-19 21:01:27 +0000 | [diff] [blame] | 88 | print >> sys.stderr, "%s: 1 file merged; %d PCs total" % \ |
| 89 | (prog_name, len(s)) |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 90 | for i in s: |
| 91 | print "0x%x" % i |
| 92 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 93 | def MergeAndPrint(files): |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 94 | if sys.stdout.isatty(): |
| 95 | Usage() |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 96 | s = Merge(files) |
| 97 | bits = 32 |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 98 | if max(s) > 0xFFFFFFFF: |
| 99 | bits = 64 |
Kostya Serebryany | dcb54db | 2015-03-19 21:01:27 +0000 | [diff] [blame] | 100 | array.array('I', MagicForBits(bits)).tofile(sys.stdout) |
Sagar Thakur | b1c51f6 | 2016-04-22 09:20:22 +0000 | [diff] [blame] | 101 | a = struct.pack(TypeCodeForStruct(bits) * len(s), *s) |
| 102 | sys.stdout.write(a) |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 103 | |
Sergey Matveev | 6cb47a08 | 2014-05-19 12:53:03 +0000 | [diff] [blame] | 104 | |
| 105 | def UnpackOneFile(path): |
| 106 | with open(path, mode="rb") as f: |
| 107 | print >> sys.stderr, "%s: unpacking %s" % (prog_name, path) |
| 108 | while True: |
| 109 | header = f.read(12) |
| 110 | if not header: return |
| 111 | if len(header) < 12: |
| 112 | break |
| 113 | pid, module_length, blob_size = struct.unpack('iII', header) |
| 114 | module = f.read(module_length) |
| 115 | blob = f.read(blob_size) |
| 116 | assert(len(module) == module_length) |
| 117 | assert(len(blob) == blob_size) |
| 118 | extracted_file = "%s.%d.sancov" % (module, pid) |
| 119 | print >> sys.stderr, "%s: extracting %s" % \ |
| 120 | (prog_name, extracted_file) |
| 121 | # The packed file may contain multiple blobs for the same pid/module |
| 122 | # pair. Append to the end of the file instead of overwriting. |
| 123 | with open(extracted_file, 'ab') as f2: |
| 124 | f2.write(blob) |
| 125 | # fail |
| 126 | raise Exception('Error reading file %s' % path) |
| 127 | |
| 128 | |
| 129 | def Unpack(files): |
| 130 | for f in files: |
| 131 | UnpackOneFile(f) |
| 132 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 133 | def UnpackOneRawFile(path, map_path): |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 134 | mem_map = [] |
| 135 | with open(map_path, mode="rt") as f_map: |
| 136 | print >> sys.stderr, "%s: reading map %s" % (prog_name, map_path) |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 137 | bits = int(f_map.readline()) |
| 138 | if bits != 32 and bits != 64: |
Kostya Serebryany | 9f1243e | 2015-03-17 22:09:19 +0000 | [diff] [blame] | 139 | raise Exception('Wrong bits size in the map') |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 140 | for line in f_map: |
| 141 | parts = line.rstrip().split() |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 142 | mem_map.append((int(parts[0], 16), |
| 143 | int(parts[1], 16), |
| 144 | int(parts[2], 16), |
Evgeniy Stepanov | 937afa1 | 2014-06-03 15:25:43 +0000 | [diff] [blame] | 145 | ' '.join(parts[3:]))) |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 146 | mem_map.sort(key=lambda m : m[0]) |
| 147 | mem_map_keys = [m[0] for m in mem_map] |
| 148 | |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 149 | with open(path, mode="rb") as f: |
| 150 | print >> sys.stderr, "%s: unpacking %s" % (prog_name, path) |
| 151 | |
| 152 | f.seek(0, 2) |
| 153 | size = f.tell() |
| 154 | f.seek(0, 0) |
Sagar Thakur | b1c51f6 | 2016-04-22 09:20:22 +0000 | [diff] [blame] | 155 | pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size)) |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 156 | mem_map_pcs = [[] for i in range(0, len(mem_map))] |
| 157 | |
| 158 | for pc in pcs: |
| 159 | if pc == 0: continue |
| 160 | map_idx = bisect.bisect(mem_map_keys, pc) - 1 |
| 161 | (start, end, base, module_path) = mem_map[map_idx] |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 162 | assert pc >= start |
| 163 | if pc >= end: |
| 164 | print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc) |
| 165 | continue |
| 166 | mem_map_pcs[map_idx].append(pc - base) |
| 167 | |
| 168 | for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs): |
| 169 | if len(pc_list) == 0: continue |
| 170 | assert path.endswith('.sancov.raw') |
| 171 | dst_path = module_path + '.' + os.path.basename(path)[:-4] |
Evgeniy Stepanov | b723834 | 2014-12-25 16:03:24 +0000 | [diff] [blame] | 172 | print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path) |
Sagar Thakur | b1c51f6 | 2016-04-22 09:20:22 +0000 | [diff] [blame] | 173 | sorted_pc_list = sorted(pc_list) |
| 174 | pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list) |
| 175 | with open(dst_path, 'ab+') as f2: |
Kostya Serebryany | dcb54db | 2015-03-19 21:01:27 +0000 | [diff] [blame] | 176 | array.array('I', MagicForBits(bits)).tofile(f2) |
Sagar Thakur | b1c51f6 | 2016-04-22 09:20:22 +0000 | [diff] [blame] | 177 | f2.seek(0, 2) |
| 178 | f2.write(pc_buffer) |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 179 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 180 | def RawUnpack(files): |
Evgeniy Stepanov | 567e516 | 2014-05-27 12:37:52 +0000 | [diff] [blame] | 181 | for f in files: |
| 182 | if not f.endswith('.sancov.raw'): |
| 183 | raise Exception('Unexpected raw file name %s' % f) |
| 184 | f_map = f[:-3] + 'map' |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 185 | UnpackOneRawFile(f, f_map) |
Sergey Matveev | 6cb47a08 | 2014-05-19 12:53:03 +0000 | [diff] [blame] | 186 | |
Sergey Matveev | c2de346 | 2015-05-06 20:48:29 +0000 | [diff] [blame] | 187 | def GetInstrumentedPCs(binary): |
Sergey Matveev | f57fb6c | 2015-05-12 16:46:54 +0000 | [diff] [blame] | 188 | # This looks scary, but all it does is extract all offsets where we call: |
| 189 | # - __sanitizer_cov() or __sanitizer_cov_with_check(), |
| 190 | # - with call or callq, |
| 191 | # - directly or via PLT. |
Sergey Matveev | c2de346 | 2015-05-06 20:48:29 +0000 | [diff] [blame] | 192 | cmd = "objdump -d %s | " \ |
Sergey Matveev | f57fb6c | 2015-05-12 16:46:54 +0000 | [diff] [blame] | 193 | "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\)\(@plt\|\)>' | " \ |
Sergey Matveev | c2de346 | 2015-05-06 20:48:29 +0000 | [diff] [blame] | 194 | "grep '^\s\+[0-9a-f]\+' -o" % binary |
| 195 | proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
| 196 | shell=True) |
| 197 | proc.stdin.close() |
| 198 | # The PCs we get from objdump are off by 4 bytes, as they point to the |
| 199 | # beginning of the callq instruction. Empirically this is true on x86 and |
| 200 | # x86_64. |
| 201 | return set(int(line.strip(), 16) + 4 for line in proc.stdout) |
| 202 | |
| 203 | def PrintMissing(binary): |
| 204 | if not os.path.isfile(binary): |
| 205 | raise Exception('File not found: %s' % binary) |
| 206 | instrumented = GetInstrumentedPCs(binary) |
| 207 | print >> sys.stderr, "%s: found %d instrumented PCs in %s" % (prog_name, |
| 208 | len(instrumented), |
| 209 | binary) |
| 210 | covered = set(int(line, 16) for line in sys.stdin) |
| 211 | print >> sys.stderr, "%s: read %d PCs from stdin" % (prog_name, len(covered)) |
| 212 | missing = instrumented - covered |
| 213 | print >> sys.stderr, "%s: %d PCs missing from coverage" % (prog_name, len(missing)) |
| 214 | if (len(missing) > len(instrumented) - len(covered)): |
| 215 | print >> sys.stderr, \ |
| 216 | "%s: WARNING: stdin contains PCs not found in binary" % prog_name |
| 217 | for pc in sorted(missing): |
| 218 | print "0x%x" % pc |
| 219 | |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 220 | if __name__ == '__main__': |
| 221 | prog_name = sys.argv[0] |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 222 | if len(sys.argv) <= 2: |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 223 | Usage(); |
Kostya Serebryany | 9f1243e | 2015-03-17 22:09:19 +0000 | [diff] [blame] | 224 | |
Sergey Matveev | c2de346 | 2015-05-06 20:48:29 +0000 | [diff] [blame] | 225 | if sys.argv[1] == "missing": |
| 226 | if len(sys.argv) != 3: |
| 227 | Usage() |
| 228 | PrintMissing(sys.argv[2]) |
| 229 | exit(0) |
| 230 | |
Timur Iskhodzhanov | 882bc56 | 2015-04-01 14:46:10 +0000 | [diff] [blame] | 231 | file_list = [] |
| 232 | for f in sys.argv[2:]: |
| 233 | file_list += glob.glob(f) |
| 234 | if not file_list: |
| 235 | Usage() |
| 236 | |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 237 | if sys.argv[1] == "print": |
Timur Iskhodzhanov | 882bc56 | 2015-04-01 14:46:10 +0000 | [diff] [blame] | 238 | PrintFiles(file_list) |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 239 | elif sys.argv[1] == "merge": |
Timur Iskhodzhanov | 882bc56 | 2015-04-01 14:46:10 +0000 | [diff] [blame] | 240 | MergeAndPrint(file_list) |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 241 | elif sys.argv[1] == "unpack": |
Timur Iskhodzhanov | 882bc56 | 2015-04-01 14:46:10 +0000 | [diff] [blame] | 242 | Unpack(file_list) |
Kostya Serebryany | 2d56aba | 2015-03-18 22:03:39 +0000 | [diff] [blame] | 243 | elif sys.argv[1] == "rawunpack": |
Timur Iskhodzhanov | 882bc56 | 2015-04-01 14:46:10 +0000 | [diff] [blame] | 244 | RawUnpack(file_list) |
Kostya Serebryany | b415032 | 2013-11-15 11:51:08 +0000 | [diff] [blame] | 245 | else: |
| 246 | Usage() |