Reid Kleckner | 2f2feeb | 2019-08-27 18:31:29 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 2 | |
| 3 | from __future__ import print_function |
| 4 | |
Yuanfang Chen | 43d9f2d | 2020-01-19 17:52:08 -0800 | [diff] [blame^] | 5 | import io |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 6 | import yaml |
| 7 | # Try to use the C parser. |
| 8 | try: |
| 9 | from yaml import CLoader as Loader |
| 10 | except ImportError: |
| 11 | print("For faster parsing, you may want to install libYAML for PyYAML") |
| 12 | from yaml import Loader |
| 13 | |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 14 | import cgi |
Brian Gesiak | 5e0a946 | 2017-06-29 18:56:25 +0000 | [diff] [blame] | 15 | from collections import defaultdict |
Adam Nemet | 659d7db | 2017-07-17 18:00:41 +0000 | [diff] [blame] | 16 | import fnmatch |
Brian Gesiak | 5e0a946 | 2017-06-29 18:56:25 +0000 | [diff] [blame] | 17 | import functools |
| 18 | from multiprocessing import Lock |
Adam Nemet | 659d7db | 2017-07-17 18:00:41 +0000 | [diff] [blame] | 19 | import os, os.path |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 20 | import subprocess |
Brian Gesiak | efd227f | 2017-08-11 17:56:57 +0000 | [diff] [blame] | 21 | try: |
| 22 | # The previously builtin function `intern()` was moved |
| 23 | # to the `sys` module in Python 3. |
| 24 | from sys import intern |
| 25 | except: |
| 26 | pass |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 27 | |
Florian Hahn | 169f642 | 2019-02-06 18:43:37 +0000 | [diff] [blame] | 28 | import re |
| 29 | |
Brian Gesiak | 5e0a946 | 2017-06-29 18:56:25 +0000 | [diff] [blame] | 30 | import optpmap |
| 31 | |
Brian Gesiak | 9b4e897 | 2017-06-26 16:51:24 +0000 | [diff] [blame] | 32 | try: |
| 33 | dict.iteritems |
| 34 | except AttributeError: |
| 35 | # Python 3 |
| 36 | def itervalues(d): |
| 37 | return iter(d.values()) |
| 38 | def iteritems(d): |
| 39 | return iter(d.items()) |
| 40 | else: |
| 41 | # Python 2 |
| 42 | def itervalues(d): |
| 43 | return d.itervalues() |
| 44 | def iteritems(d): |
| 45 | return d.iteritems() |
| 46 | |
| 47 | |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 48 | def html_file_name(filename): |
Brian Gesiak | a886997 | 2017-07-18 19:25:34 +0000 | [diff] [blame] | 49 | return filename.replace('/', '_').replace('#', '_') + ".html" |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 50 | |
Brian Gesiak | 9b4e897 | 2017-06-26 16:51:24 +0000 | [diff] [blame] | 51 | |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 52 | def make_link(File, Line): |
Filipe Cabecinhas | cc07564 | 2017-06-07 14:57:20 +0000 | [diff] [blame] | 53 | return "\"{}#L{}\"".format(html_file_name(File), Line) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 54 | |
| 55 | |
| 56 | class Remark(yaml.YAMLObject): |
| 57 | # Work-around for http://pyyaml.org/ticket/154. |
| 58 | yaml_loader = Loader |
| 59 | |
Adam Nemet | 95e0c5f | 2017-11-29 17:07:41 +0000 | [diff] [blame] | 60 | default_demangler = 'c++filt -n' |
| 61 | demangler_proc = None |
| 62 | |
| 63 | @classmethod |
| 64 | def set_demangler(cls, demangler): |
| 65 | cls.demangler_proc = subprocess.Popen(demangler.split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 66 | cls.demangler_lock = Lock() |
| 67 | |
| 68 | @classmethod |
| 69 | def demangle(cls, name): |
| 70 | with cls.demangler_lock: |
| 71 | cls.demangler_proc.stdin.write((name + '\n').encode('utf-8')) |
| 72 | cls.demangler_proc.stdin.flush() |
| 73 | return cls.demangler_proc.stdout.readline().rstrip().decode('utf-8') |
| 74 | |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 75 | # Intern all strings since we have lot of duplication across filenames, |
| 76 | # remark text. |
| 77 | # |
| 78 | # Change Args from a list of dicts to a tuple of tuples. This saves |
| 79 | # memory in two ways. One, a small tuple is significantly smaller than a |
| 80 | # small dict. Two, using tuple instead of list allows Args to be directly |
| 81 | # used as part of the key (in Python only immutable types are hashable). |
| 82 | def _reduce_memory(self): |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 83 | self.Pass = intern(self.Pass) |
| 84 | self.Name = intern(self.Name) |
Adam Nemet | 873f032 | 2017-12-14 18:42:42 +0000 | [diff] [blame] | 85 | try: |
| 86 | # Can't intern unicode strings. |
| 87 | self.Function = intern(self.Function) |
| 88 | except: |
| 89 | pass |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 90 | |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 91 | def _reduce_memory_dict(old_dict): |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 92 | new_dict = dict() |
Brian Gesiak | 34f07f9 | 2017-08-11 18:02:07 +0000 | [diff] [blame] | 93 | for (k, v) in iteritems(old_dict): |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 94 | if type(k) is str: |
| 95 | k = intern(k) |
| 96 | |
| 97 | if type(v) is str: |
| 98 | v = intern(v) |
| 99 | elif type(v) is dict: |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 100 | # This handles [{'Caller': ..., 'DebugLoc': { 'File': ... }}] |
| 101 | v = _reduce_memory_dict(v) |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 102 | new_dict[k] = v |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 103 | return tuple(new_dict.items()) |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 104 | |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 105 | self.Args = tuple([_reduce_memory_dict(arg_dict) for arg_dict in self.Args]) |
| 106 | |
| 107 | # The inverse operation of the dictonary-related memory optimization in |
| 108 | # _reduce_memory_dict. E.g. |
| 109 | # (('DebugLoc', (('File', ...) ... ))) -> [{'DebugLoc': {'File': ...} ....}] |
| 110 | def recover_yaml_structure(self): |
| 111 | def tuple_to_dict(t): |
| 112 | d = dict() |
| 113 | for (k, v) in t: |
| 114 | if type(v) is tuple: |
| 115 | v = tuple_to_dict(v) |
| 116 | d[k] = v |
| 117 | return d |
| 118 | |
| 119 | self.Args = [tuple_to_dict(arg_tuple) for arg_tuple in self.Args] |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 120 | |
| 121 | def canonicalize(self): |
Adam Nemet | a8b692a | 2017-03-02 17:00:53 +0000 | [diff] [blame] | 122 | if not hasattr(self, 'Hotness'): |
| 123 | self.Hotness = 0 |
| 124 | if not hasattr(self, 'Args'): |
| 125 | self.Args = [] |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 126 | self._reduce_memory() |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 127 | |
| 128 | @property |
| 129 | def File(self): |
| 130 | return self.DebugLoc['File'] |
| 131 | |
| 132 | @property |
| 133 | def Line(self): |
| 134 | return int(self.DebugLoc['Line']) |
| 135 | |
| 136 | @property |
| 137 | def Column(self): |
| 138 | return self.DebugLoc['Column'] |
| 139 | |
| 140 | @property |
| 141 | def DebugLocString(self): |
| 142 | return "{}:{}:{}".format(self.File, self.Line, self.Column) |
| 143 | |
| 144 | @property |
| 145 | def DemangledFunctionName(self): |
Adam Nemet | 95e0c5f | 2017-11-29 17:07:41 +0000 | [diff] [blame] | 146 | return self.demangle(self.Function) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 147 | |
| 148 | @property |
| 149 | def Link(self): |
| 150 | return make_link(self.File, self.Line) |
| 151 | |
| 152 | def getArgString(self, mapping): |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 153 | mapping = dict(list(mapping)) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 154 | dl = mapping.get('DebugLoc') |
| 155 | if dl: |
| 156 | del mapping['DebugLoc'] |
| 157 | |
| 158 | assert(len(mapping) == 1) |
Brian Gesiak | 60a3185 | 2017-08-14 04:16:43 +0000 | [diff] [blame] | 159 | (key, value) = list(mapping.items())[0] |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 160 | |
Adam Nemet | da337ba | 2017-12-20 19:08:12 +0000 | [diff] [blame] | 161 | if key == 'Caller' or key == 'Callee' or key == 'DirectCallee': |
Adam Nemet | 95e0c5f | 2017-11-29 17:07:41 +0000 | [diff] [blame] | 162 | value = cgi.escape(self.demangle(value)) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 163 | |
| 164 | if dl and key != 'Caller': |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 165 | dl_dict = dict(list(dl)) |
Adam Nemet | 873f032 | 2017-12-14 18:42:42 +0000 | [diff] [blame] | 166 | return u"<a href={}>{}</a>".format( |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 167 | make_link(dl_dict['File'], dl_dict['Line']), value) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 168 | else: |
| 169 | return value |
| 170 | |
Adam Nemet | 9e5e51a | 2017-12-06 16:50:50 +0000 | [diff] [blame] | 171 | # Return a cached dictionary for the arguments. The key for each entry is |
| 172 | # the argument key (e.g. 'Callee' for inlining remarks. The value is a |
| 173 | # list containing the value (e.g. for 'Callee' the function) and |
| 174 | # optionally a DebugLoc. |
| 175 | def getArgDict(self): |
| 176 | if hasattr(self, 'ArgDict'): |
| 177 | return self.ArgDict |
| 178 | self.ArgDict = {} |
| 179 | for arg in self.Args: |
| 180 | if len(arg) == 2: |
| 181 | if arg[0][0] == 'DebugLoc': |
| 182 | dbgidx = 0 |
| 183 | else: |
| 184 | assert(arg[1][0] == 'DebugLoc') |
| 185 | dbgidx = 1 |
| 186 | |
| 187 | key = arg[1 - dbgidx][0] |
| 188 | entry = (arg[1 - dbgidx][1], arg[dbgidx][1]) |
| 189 | else: |
| 190 | arg = arg[0] |
| 191 | key = arg[0] |
| 192 | entry = (arg[1], ) |
| 193 | |
| 194 | self.ArgDict[key] = entry |
| 195 | return self.ArgDict |
| 196 | |
Adam Nemet | 6ab2d48 | 2017-03-02 17:00:59 +0000 | [diff] [blame] | 197 | def getDiffPrefix(self): |
| 198 | if hasattr(self, 'Added'): |
| 199 | if self.Added: |
| 200 | return '+' |
| 201 | else: |
| 202 | return '-' |
| 203 | return '' |
| 204 | |
| 205 | @property |
| 206 | def PassWithDiffPrefix(self): |
| 207 | return self.getDiffPrefix() + self.Pass |
| 208 | |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 209 | @property |
| 210 | def message(self): |
| 211 | # Args is a list of mappings (dictionaries) |
| 212 | values = [self.getArgString(mapping) for mapping in self.Args] |
| 213 | return "".join(values) |
| 214 | |
| 215 | @property |
| 216 | def RelativeHotness(self): |
| 217 | if self.max_hotness: |
Adam Nemet | 3a762d9 | 2017-09-29 16:56:54 +0000 | [diff] [blame] | 218 | return "{0:.2f}%".format(self.Hotness * 100. / self.max_hotness) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 219 | else: |
| 220 | return '' |
| 221 | |
| 222 | @property |
| 223 | def key(self): |
Adam Nemet | 817e90f | 2017-07-19 22:04:59 +0000 | [diff] [blame] | 224 | return (self.__class__, self.PassWithDiffPrefix, self.Name, self.File, |
| 225 | self.Line, self.Column, self.Function, self.Args) |
Adam Nemet | 7370dad | 2017-03-02 17:00:56 +0000 | [diff] [blame] | 226 | |
| 227 | def __hash__(self): |
| 228 | return hash(self.key) |
| 229 | |
| 230 | def __eq__(self, other): |
| 231 | return self.key == other.key |
| 232 | |
| 233 | def __repr__(self): |
| 234 | return str(self.key) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 235 | |
| 236 | |
| 237 | class Analysis(Remark): |
| 238 | yaml_tag = '!Analysis' |
| 239 | |
| 240 | @property |
| 241 | def color(self): |
| 242 | return "white" |
| 243 | |
| 244 | |
| 245 | class AnalysisFPCommute(Analysis): |
| 246 | yaml_tag = '!AnalysisFPCommute' |
| 247 | |
| 248 | |
| 249 | class AnalysisAliasing(Analysis): |
| 250 | yaml_tag = '!AnalysisAliasing' |
| 251 | |
| 252 | |
| 253 | class Passed(Remark): |
| 254 | yaml_tag = '!Passed' |
| 255 | |
| 256 | @property |
| 257 | def color(self): |
| 258 | return "green" |
| 259 | |
| 260 | |
| 261 | class Missed(Remark): |
| 262 | yaml_tag = '!Missed' |
| 263 | |
| 264 | @property |
| 265 | def color(self): |
| 266 | return "red" |
| 267 | |
Jessica Paquette | eaf4df4 | 2019-03-27 18:35:04 +0000 | [diff] [blame] | 268 | class Failure(Missed): |
| 269 | yaml_tag = '!Failure' |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 270 | |
Jessica Paquette | beda859 | 2019-03-27 18:14:32 +0000 | [diff] [blame] | 271 | def get_remarks(input_file, filter_=None): |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 272 | max_hotness = 0 |
| 273 | all_remarks = dict() |
| 274 | file_remarks = defaultdict(functools.partial(defaultdict, list)) |
| 275 | |
Yuanfang Chen | 43d9f2d | 2020-01-19 17:52:08 -0800 | [diff] [blame^] | 276 | with io.open(input_file, encoding = 'utf-8') as f: |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 277 | docs = yaml.load_all(f, Loader=Loader) |
Florian Hahn | 169f642 | 2019-02-06 18:43:37 +0000 | [diff] [blame] | 278 | |
Jessica Paquette | beda859 | 2019-03-27 18:14:32 +0000 | [diff] [blame] | 279 | filter_e = None |
| 280 | if filter_: |
| 281 | filter_e = re.compile(filter_) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 282 | for remark in docs: |
Adam Nemet | 1d5f5b3 | 2017-07-19 22:04:56 +0000 | [diff] [blame] | 283 | remark.canonicalize() |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 284 | # Avoid remarks withoug debug location or if they are duplicated |
| 285 | if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks: |
| 286 | continue |
Florian Hahn | 169f642 | 2019-02-06 18:43:37 +0000 | [diff] [blame] | 287 | |
Jessica Paquette | beda859 | 2019-03-27 18:14:32 +0000 | [diff] [blame] | 288 | if filter_e and not filter_e.search(remark.Pass): |
Florian Hahn | 169f642 | 2019-02-06 18:43:37 +0000 | [diff] [blame] | 289 | continue |
| 290 | |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 291 | all_remarks[remark.key] = remark |
| 292 | |
| 293 | file_remarks[remark.File][remark.Line].append(remark) |
| 294 | |
Adam Nemet | 6ab2d48 | 2017-03-02 17:00:59 +0000 | [diff] [blame] | 295 | # If we're reading a back a diff yaml file, max_hotness is already |
| 296 | # captured which may actually be less than the max hotness found |
| 297 | # in the file. |
| 298 | if hasattr(remark, 'max_hotness'): |
| 299 | max_hotness = remark.max_hotness |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 300 | max_hotness = max(max_hotness, remark.Hotness) |
| 301 | |
| 302 | return max_hotness, all_remarks, file_remarks |
| 303 | |
| 304 | |
Jessica Paquette | beda859 | 2019-03-27 18:14:32 +0000 | [diff] [blame] | 305 | def gather_results(filenames, num_jobs, should_print_progress, filter_=None): |
Brian Gesiak | 5e0a946 | 2017-06-29 18:56:25 +0000 | [diff] [blame] | 306 | if should_print_progress: |
| 307 | print('Reading YAML files...') |
Adam Nemet | 95e0c5f | 2017-11-29 17:07:41 +0000 | [diff] [blame] | 308 | if not Remark.demangler_proc: |
| 309 | Remark.set_demangler(Remark.default_demangler) |
Brian Gesiak | 5e0a946 | 2017-06-29 18:56:25 +0000 | [diff] [blame] | 310 | remarks = optpmap.pmap( |
Florian Hahn | 169f642 | 2019-02-06 18:43:37 +0000 | [diff] [blame] | 311 | get_remarks, filenames, num_jobs, should_print_progress, filter_) |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 312 | max_hotness = max(entry[0] for entry in remarks) |
| 313 | |
| 314 | def merge_file_remarks(file_remarks_job, all_remarks, merged): |
Brian Gesiak | 9b4e897 | 2017-06-26 16:51:24 +0000 | [diff] [blame] | 315 | for filename, d in iteritems(file_remarks_job): |
| 316 | for line, remarks in iteritems(d): |
Adam Nemet | b7278af | 2017-03-01 21:35:00 +0000 | [diff] [blame] | 317 | for remark in remarks: |
| 318 | # Bring max_hotness into the remarks so that |
| 319 | # RelativeHotness does not depend on an external global. |
| 320 | remark.max_hotness = max_hotness |
| 321 | if remark.key not in all_remarks: |
| 322 | merged[filename][line].append(remark) |
| 323 | |
| 324 | all_remarks = dict() |
| 325 | file_remarks = defaultdict(functools.partial(defaultdict, list)) |
| 326 | for _, all_remarks_job, file_remarks_job in remarks: |
| 327 | merge_file_remarks(file_remarks_job, all_remarks, file_remarks) |
| 328 | all_remarks.update(all_remarks_job) |
| 329 | |
| 330 | return all_remarks, file_remarks, max_hotness != 0 |
Adam Nemet | 659d7db | 2017-07-17 18:00:41 +0000 | [diff] [blame] | 331 | |
| 332 | |
Adam Nemet | 9d57dc6 | 2017-09-29 05:20:53 +0000 | [diff] [blame] | 333 | def find_opt_files(*dirs_or_files): |
Adam Nemet | 659d7db | 2017-07-17 18:00:41 +0000 | [diff] [blame] | 334 | all = [] |
| 335 | for dir_or_file in dirs_or_files: |
| 336 | if os.path.isfile(dir_or_file): |
| 337 | all.append(dir_or_file) |
| 338 | else: |
| 339 | for dir, subdirs, files in os.walk(dir_or_file): |
| 340 | # Exclude mounted directories and symlinks (os.walk default). |
| 341 | subdirs[:] = [d for d in subdirs |
| 342 | if not os.path.ismount(os.path.join(dir, d))] |
| 343 | for file in files: |
Adam Nemet | cb651c05 | 2018-02-26 21:15:49 +0000 | [diff] [blame] | 344 | if fnmatch.fnmatch(file, "*.opt.yaml*"): |
Adam Nemet | 659d7db | 2017-07-17 18:00:41 +0000 | [diff] [blame] | 345 | all.append(os.path.join(dir, file)) |
| 346 | return all |