Zachary Turner | 40d05cc | 2018-08-30 20:53:48 +0000 | [diff] [blame] | 1 | # Given a path to llvm-objdump and a directory tree, spider the directory tree |
| 2 | # dumping every object file encountered with correct options needed to demangle |
| 3 | # symbols in the object file, and collect statistics about failed / crashed |
| 4 | # demanglings. Useful for stress testing the demangler against a large corpus |
| 5 | # of inputs. |
| 6 | |
| 7 | import argparse |
| 8 | import functools |
| 9 | import os |
| 10 | import re |
| 11 | import sys |
| 12 | import subprocess |
| 13 | import traceback |
| 14 | from multiprocessing import Pool |
| 15 | import multiprocessing |
| 16 | |
| 17 | args = None |
| 18 | |
| 19 | def parse_line(line): |
| 20 | question = line.find('?') |
| 21 | if question == -1: |
| 22 | return None, None |
| 23 | |
| 24 | open_paren = line.find('(', question) |
| 25 | if open_paren == -1: |
| 26 | return None, None |
| 27 | close_paren = line.rfind(')', open_paren) |
| 28 | if open_paren == -1: |
| 29 | return None, None |
| 30 | mangled = line[question : open_paren] |
| 31 | demangled = line[open_paren+1 : close_paren] |
| 32 | return mangled.strip(), demangled.strip() |
| 33 | |
| 34 | class Result(object): |
| 35 | def __init__(self): |
| 36 | self.crashed = [] |
| 37 | self.file = None |
| 38 | self.nsymbols = 0 |
| 39 | self.errors = set() |
| 40 | self.nfiles = 0 |
| 41 | |
| 42 | class MapContext(object): |
| 43 | def __init__(self): |
| 44 | self.rincomplete = None |
| 45 | self.rcumulative = Result() |
| 46 | self.pending_objs = [] |
| 47 | self.npending = 0 |
| 48 | |
| 49 | def process_file(path, objdump): |
| 50 | r = Result() |
| 51 | r.file = path |
| 52 | |
| 53 | popen_args = [objdump, '-t', '-demangle', path] |
| 54 | p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 55 | stdout, stderr = p.communicate() |
| 56 | if p.returncode != 0: |
| 57 | r.crashed = [r.file] |
| 58 | return r |
| 59 | |
| 60 | output = stdout.decode('utf-8') |
| 61 | |
| 62 | for line in output.splitlines(): |
| 63 | mangled, demangled = parse_line(line) |
| 64 | if mangled is None: |
| 65 | continue |
| 66 | r.nsymbols += 1 |
| 67 | if "invalid mangled name" in demangled: |
| 68 | r.errors.add(mangled) |
| 69 | return r |
| 70 | |
| 71 | def add_results(r1, r2): |
| 72 | r1.crashed.extend(r2.crashed) |
| 73 | r1.errors.update(r2.errors) |
| 74 | r1.nsymbols += r2.nsymbols |
| 75 | r1.nfiles += r2.nfiles |
| 76 | |
| 77 | def print_result_row(directory, result): |
| 78 | print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( |
| 79 | result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory)) |
| 80 | |
| 81 | def process_one_chunk(pool, chunk_size, objdump, context): |
| 82 | objs = [] |
| 83 | |
| 84 | incomplete = False |
| 85 | dir_results = {} |
| 86 | ordered_dirs = [] |
| 87 | while context.npending > 0 and len(objs) < chunk_size: |
| 88 | this_dir = context.pending_objs[0][0] |
| 89 | ordered_dirs.append(this_dir) |
| 90 | re = Result() |
| 91 | if context.rincomplete is not None: |
| 92 | re = context.rincomplete |
| 93 | context.rincomplete = None |
| 94 | |
| 95 | dir_results[this_dir] = re |
| 96 | re.file = this_dir |
| 97 | |
| 98 | nneeded = chunk_size - len(objs) |
| 99 | objs_this_dir = context.pending_objs[0][1] |
| 100 | navail = len(objs_this_dir) |
| 101 | ntaken = min(nneeded, navail) |
| 102 | objs.extend(objs_this_dir[0:ntaken]) |
| 103 | remaining_objs_this_dir = objs_this_dir[ntaken:] |
| 104 | context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) |
| 105 | context.npending -= ntaken |
| 106 | if ntaken == navail: |
| 107 | context.pending_objs.pop(0) |
| 108 | else: |
| 109 | incomplete = True |
| 110 | |
| 111 | re.nfiles += ntaken |
| 112 | |
| 113 | assert(len(objs) == chunk_size or context.npending == 0) |
| 114 | |
| 115 | copier = functools.partial(process_file, objdump=objdump) |
| 116 | mapped_results = list(pool.map(copier, objs)) |
| 117 | |
| 118 | for mr in mapped_results: |
| 119 | result_dir = os.path.dirname(mr.file) |
| 120 | result_entry = dir_results[result_dir] |
| 121 | add_results(result_entry, mr) |
| 122 | |
| 123 | # It's only possible that a single item is incomplete, and it has to be the |
| 124 | # last item. |
| 125 | if incomplete: |
| 126 | context.rincomplete = dir_results[ordered_dirs[-1]] |
| 127 | ordered_dirs.pop() |
| 128 | |
| 129 | # Now ordered_dirs contains a list of all directories which *did* complete. |
| 130 | for c in ordered_dirs: |
| 131 | re = dir_results[c] |
| 132 | add_results(context.rcumulative, re) |
| 133 | print_result_row(c, re) |
| 134 | |
| 135 | def process_pending_files(pool, chunk_size, objdump, context): |
| 136 | while context.npending >= chunk_size: |
| 137 | process_one_chunk(pool, chunk_size, objdump, context) |
| 138 | |
| 139 | def go(): |
| 140 | global args |
| 141 | |
| 142 | obj_dir = args.dir |
| 143 | extensions = args.extensions.split(',') |
| 144 | extensions = [x if x[0] == '.' else '.' + x for x in extensions] |
| 145 | |
| 146 | |
| 147 | pool_size = 48 |
| 148 | pool = Pool(processes=pool_size) |
| 149 | |
| 150 | try: |
| 151 | nfiles = 0 |
| 152 | context = MapContext() |
| 153 | |
| 154 | for root, dirs, files in os.walk(obj_dir): |
| 155 | root = os.path.normpath(root) |
| 156 | pending = [] |
| 157 | for f in files: |
| 158 | file, ext = os.path.splitext(f) |
| 159 | if not ext in extensions: |
| 160 | continue |
| 161 | |
| 162 | nfiles += 1 |
| 163 | full_path = os.path.join(root, f) |
| 164 | full_path = os.path.normpath(full_path) |
| 165 | pending.append(full_path) |
| 166 | |
| 167 | # If this directory had no object files, just print a default |
| 168 | # status line and continue with the next dir |
| 169 | if len(pending) == 0: |
| 170 | print_result_row(root, Result()) |
| 171 | continue |
| 172 | |
| 173 | context.npending += len(pending) |
| 174 | context.pending_objs.append((root, pending)) |
| 175 | # Drain the tasks, `pool_size` at a time, until we have less than |
| 176 | # `pool_size` tasks remaining. |
| 177 | process_pending_files(pool, pool_size, args.objdump, context) |
| 178 | |
| 179 | assert(context.npending < pool_size); |
| 180 | process_one_chunk(pool, pool_size, args.objdump, context) |
| 181 | |
| 182 | total = context.rcumulative |
| 183 | nfailed = len(total.errors) |
| 184 | nsuccess = total.nsymbols - nfailed |
| 185 | ncrashed = len(total.crashed) |
| 186 | |
| 187 | if (nfailed > 0): |
| 188 | print("Failures:") |
| 189 | for m in sorted(total.errors): |
| 190 | print(" " + m) |
| 191 | if (ncrashed > 0): |
| 192 | print("Crashes:") |
| 193 | for f in sorted(total.crashed): |
| 194 | print(" " + f) |
| 195 | print("Summary:") |
| 196 | spct = float(nsuccess)/float(total.nsymbols) |
| 197 | fpct = float(nfailed)/float(total.nsymbols) |
| 198 | cpct = float(ncrashed)/float(nfiles) |
| 199 | print("Processed {0} object files.".format(nfiles)) |
| 200 | print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct)) |
| 201 | print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) |
| 202 | print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) |
| 203 | |
| 204 | except: |
| 205 | traceback.print_exc() |
| 206 | |
| 207 | pool.close() |
| 208 | pool.join() |
| 209 | |
| 210 | if __name__ == "__main__": |
| 211 | def_obj = 'obj' if sys.platform == 'win32' else 'o' |
| 212 | |
| 213 | parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.') |
| 214 | parser.add_argument('dir', type=str, help='the root directory at which to start crawling') |
| 215 | parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' + |
| 216 | 'the tool is located as if by `which llvm-objdump`.') |
| 217 | parser.add_argument('--extensions', type=str, default=def_obj, |
| 218 | help='comma separated list of extensions to demangle (e.g. `o,obj`). ' + |
| 219 | 'By default this will be `obj` on Windows and `o` otherwise.') |
| 220 | |
| 221 | args = parser.parse_args() |
| 222 | |
| 223 | |
| 224 | multiprocessing.freeze_support() |
| 225 | go() |
| 226 | |