Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | CmpRuns - A simple tool for comparing two static analyzer runs to determine |
| 5 | which reports have been added, removed, or changed. |
| 6 | |
| 7 | This is designed to support automated testing using the static analyzer, from |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 8 | two perspectives: |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 9 | 1. To monitor changes in the static analyzer's reports on real code bases, |
| 10 | for regression testing. |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 11 | |
| 12 | 2. For use by end users who want to integrate regular static analyzer testing |
| 13 | into a buildbot like environment. |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 14 | |
| 15 | Usage: |
| 16 | |
| 17 | # Load the results of both runs, to obtain lists of the corresponding |
| 18 | # AnalysisDiagnostic objects. |
Anna Zaks | 45a992b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 19 | # |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 20 | resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty) |
| 21 | resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty) |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 22 | |
| 23 | # Generate a relation from diagnostics in run A to diagnostics in run B |
| 24 | # to obtain a list of triples (a, b, confidence). |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 25 | diff = compareResults(resultsA, resultsB) |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 26 | |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 27 | """ |
Serge Guelton | 3744de5 | 2018-12-18 08:38:50 +0000 | [diff] [blame] | 28 | from __future__ import division, print_function |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 29 | |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 30 | from collections import defaultdict |
| 31 | |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 32 | from math import log |
George Karpenkov | 3959041 | 2018-02-09 18:48:31 +0000 | [diff] [blame] | 33 | from optparse import OptionParser |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 34 | import json |
| 35 | import os |
| 36 | import plistlib |
| 37 | import re |
| 38 | import sys |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 39 | |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 40 | STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL) |
| 41 | |
Serge Guelton | 09616bd | 2018-12-03 12:12:48 +0000 | [diff] [blame] | 42 | class Colors(object): |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 43 | """ |
| 44 | Color for terminal highlight. |
| 45 | """ |
| 46 | RED = '\x1b[2;30;41m' |
| 47 | GREEN = '\x1b[6;30;42m' |
| 48 | CLEAR = '\x1b[0m' |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 49 | |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 50 | # Information about analysis run: |
| 51 | # path - the analysis output directory |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 52 | # root - the name of the root directory, which will be disregarded when |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 53 | # determining the source file name |
Serge Guelton | 09616bd | 2018-12-03 12:12:48 +0000 | [diff] [blame] | 54 | class SingleRunInfo(object): |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 55 | def __init__(self, path, root="", verboseLog=None): |
| 56 | self.path = path |
Gabor Horvath | c3177f2 | 2015-07-08 18:39:31 +0000 | [diff] [blame] | 57 | self.root = root.rstrip("/\\") |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 58 | self.verboseLog = verboseLog |
| 59 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 60 | |
Serge Guelton | 09616bd | 2018-12-03 12:12:48 +0000 | [diff] [blame] | 61 | class AnalysisDiagnostic(object): |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 62 | def __init__(self, data, report, htmlReport): |
| 63 | self._data = data |
| 64 | self._loc = self._data['location'] |
| 65 | self._report = report |
| 66 | self._htmlReport = htmlReport |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 67 | self._reportSize = len(self._data['path']) |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 68 | |
| 69 | def getFileName(self): |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 70 | root = self._report.run.root |
Anna Zaks | 639b404 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 71 | fileName = self._report.files[self._loc['file']] |
Gabor Horvath | c3177f2 | 2015-07-08 18:39:31 +0000 | [diff] [blame] | 72 | if fileName.startswith(root) and len(root) > 0: |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 73 | return fileName[len(root) + 1:] |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 74 | return fileName |
| 75 | |
George Karpenkov | a64b205 | 2019-02-05 22:26:57 +0000 | [diff] [blame] | 76 | def getRootFileName(self): |
| 77 | path = self._data['path'] |
| 78 | if not path: |
| 79 | return self.getFileName() |
| 80 | p = path[0] |
| 81 | if 'location' in p: |
| 82 | fIdx = p['location']['file'] |
| 83 | else: # control edge |
| 84 | fIdx = path[0]['edges'][0]['start'][0]['file'] |
| 85 | out = self._report.files[fIdx] |
| 86 | root = self._report.run.root |
| 87 | if out.startswith(root): |
| 88 | return out[len(root):] |
| 89 | return out |
| 90 | |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 91 | def getLine(self): |
| 92 | return self._loc['line'] |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 93 | |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 94 | def getColumn(self): |
| 95 | return self._loc['col'] |
| 96 | |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 97 | def getPathLength(self): |
| 98 | return self._reportSize |
| 99 | |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 100 | def getCategory(self): |
| 101 | return self._data['category'] |
| 102 | |
| 103 | def getDescription(self): |
| 104 | return self._data['description'] |
| 105 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 106 | def getIssueIdentifier(self): |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 107 | id = self.getFileName() + "+" |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 108 | if 'issue_context' in self._data: |
| 109 | id += self._data['issue_context'] + "+" |
| 110 | if 'issue_hash_content_of_line_in_context' in self._data: |
| 111 | id += str(self._data['issue_hash_content_of_line_in_context']) |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 112 | return id |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 113 | |
| 114 | def getReport(self): |
| 115 | if self._htmlReport is None: |
| 116 | return " " |
| 117 | return os.path.join(self._report.run.path, self._htmlReport) |
| 118 | |
| 119 | def getReadableName(self): |
George Karpenkov | 986dd45 | 2018-02-06 17:22:09 +0000 | [diff] [blame] | 120 | if 'issue_context' in self._data: |
| 121 | funcnamePostfix = "#" + self._data['issue_context'] |
| 122 | else: |
| 123 | funcnamePostfix = "" |
George Karpenkov | a64b205 | 2019-02-05 22:26:57 +0000 | [diff] [blame] | 124 | rootFilename = self.getRootFileName() |
| 125 | fileName = self.getFileName() |
| 126 | if rootFilename != fileName: |
| 127 | filePrefix = "[%s] %s" % (rootFilename, fileName) |
| 128 | else: |
| 129 | filePrefix = rootFilename |
| 130 | return '%s%s:%d:%d, %s: %s' % (filePrefix, |
George Karpenkov | 986dd45 | 2018-02-06 17:22:09 +0000 | [diff] [blame] | 131 | funcnamePostfix, |
| 132 | self.getLine(), |
| 133 | self.getColumn(), self.getCategory(), |
| 134 | self.getDescription()) |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 135 | |
| 136 | # Note, the data format is not an API and may change from one analyzer |
| 137 | # version to another. |
Anna Zaks | 639b404 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 138 | def getRawData(self): |
| 139 | return self._data |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 140 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 141 | |
Serge Guelton | 09616bd | 2018-12-03 12:12:48 +0000 | [diff] [blame] | 142 | class AnalysisReport(object): |
Anna Zaks | fab9bb6 | 2012-11-15 22:42:44 +0000 | [diff] [blame] | 143 | def __init__(self, run, files): |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 144 | self.run = run |
Anna Zaks | 639b404 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 145 | self.files = files |
| 146 | self.diagnostics = [] |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 147 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 148 | |
Serge Guelton | 09616bd | 2018-12-03 12:12:48 +0000 | [diff] [blame] | 149 | class AnalysisRun(object): |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 150 | def __init__(self, info): |
| 151 | self.path = info.path |
| 152 | self.root = info.root |
| 153 | self.info = info |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 154 | self.reports = [] |
Anna Zaks | 639b404 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 155 | # Cumulative list of all diagnostics from all the reports. |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 156 | self.diagnostics = [] |
Anna Zaks | fab9bb6 | 2012-11-15 22:42:44 +0000 | [diff] [blame] | 157 | self.clang_version = None |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 158 | self.stats = [] |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 159 | |
Anna Zaks | fab9bb6 | 2012-11-15 22:42:44 +0000 | [diff] [blame] | 160 | def getClangVersion(self): |
| 161 | return self.clang_version |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 162 | |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 163 | def readSingleFile(self, p, deleteEmpty): |
| 164 | data = plistlib.readPlist(p) |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 165 | if 'statistics' in data: |
| 166 | self.stats.append(json.loads(data['statistics'])) |
| 167 | data.pop('statistics') |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 168 | |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 169 | # We want to retrieve the clang version even if there are no |
| 170 | # reports. Assume that all reports were created using the same |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 171 | # clang version (this is always true and is more efficient). |
| 172 | if 'clang_version' in data: |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 173 | if self.clang_version is None: |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 174 | self.clang_version = data.pop('clang_version') |
| 175 | else: |
| 176 | data.pop('clang_version') |
| 177 | |
| 178 | # Ignore/delete empty reports. |
| 179 | if not data['files']: |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 180 | if deleteEmpty: |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 181 | os.remove(p) |
| 182 | return |
| 183 | |
| 184 | # Extract the HTML reports, if they exists. |
| 185 | if 'HTMLDiagnostics_files' in data['diagnostics'][0]: |
| 186 | htmlFiles = [] |
| 187 | for d in data['diagnostics']: |
| 188 | # FIXME: Why is this named files, when does it have multiple |
| 189 | # files? |
| 190 | assert len(d['HTMLDiagnostics_files']) == 1 |
| 191 | htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) |
| 192 | else: |
| 193 | htmlFiles = [None] * len(data['diagnostics']) |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 194 | |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 195 | report = AnalysisReport(self, data.pop('files')) |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 196 | diagnostics = [AnalysisDiagnostic(d, report, h) |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 197 | for d, h in zip(data.pop('diagnostics'), htmlFiles)] |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 198 | |
| 199 | assert not data |
| 200 | |
| 201 | report.diagnostics.extend(diagnostics) |
| 202 | self.reports.append(report) |
| 203 | self.diagnostics.extend(diagnostics) |
| 204 | |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 205 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 206 | def loadResults(path, opts, root="", deleteEmpty=True): |
| 207 | """ |
| 208 | Backwards compatibility API. |
| 209 | """ |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 210 | return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog), |
| 211 | deleteEmpty) |
| 212 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 213 | |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 214 | def loadResultsFromSingleRun(info, deleteEmpty=True): |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 215 | """ |
| 216 | # Load results of the analyzes from a given output folder. |
| 217 | # - info is the SingleRunInfo object |
| 218 | # - deleteEmpty specifies if the empty plist files should be deleted |
| 219 | |
| 220 | """ |
Anna Zaks | c80313b | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 221 | path = info.path |
| 222 | run = AnalysisRun(info) |
Jordan Rose | b042cc7 | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 223 | |
| 224 | if os.path.isfile(path): |
| 225 | run.readSingleFile(path, deleteEmpty) |
| 226 | else: |
| 227 | for (dirpath, dirnames, filenames) in os.walk(path): |
| 228 | for f in filenames: |
| 229 | if (not f.endswith('plist')): |
| 230 | continue |
| 231 | p = os.path.join(dirpath, f) |
| 232 | run.readSingleFile(p, deleteEmpty) |
| 233 | |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 234 | return run |
| 235 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 236 | |
| 237 | def cmpAnalysisDiagnostic(d): |
Anna Zaks | 9b7d714 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 238 | return d.getIssueIdentifier() |
Anna Zaks | d60367b | 2012-06-08 01:50:49 +0000 | [diff] [blame] | 239 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 240 | |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 241 | def compareResults(A, B, opts): |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 242 | """ |
| 243 | compareResults - Generate a relation from diagnostics in run A to |
| 244 | diagnostics in run B. |
| 245 | |
George Karpenkov | f37c07c | 2018-02-01 22:40:01 +0000 | [diff] [blame] | 246 | The result is the relation as a list of triples (a, b) where |
| 247 | each element {a,b} is None or a matching element from the respective run |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 248 | """ |
| 249 | |
| 250 | res = [] |
| 251 | |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 252 | # Map size_before -> size_after |
| 253 | path_difference_data = [] |
| 254 | |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 255 | # Quickly eliminate equal elements. |
| 256 | neqA = [] |
| 257 | neqB = [] |
| 258 | eltsA = list(A.diagnostics) |
| 259 | eltsB = list(B.diagnostics) |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 260 | eltsA.sort(key=cmpAnalysisDiagnostic) |
| 261 | eltsB.sort(key=cmpAnalysisDiagnostic) |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 262 | while eltsA and eltsB: |
| 263 | a = eltsA.pop() |
| 264 | b = eltsB.pop() |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 265 | if (a.getIssueIdentifier() == b.getIssueIdentifier()): |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 266 | if a.getPathLength() != b.getPathLength(): |
| 267 | if opts.relative_path_histogram: |
| 268 | path_difference_data.append( |
| 269 | float(a.getPathLength()) / b.getPathLength()) |
| 270 | elif opts.relative_log_path_histogram: |
| 271 | path_difference_data.append( |
| 272 | log(float(a.getPathLength()) / b.getPathLength())) |
| 273 | elif opts.absolute_path_histogram: |
| 274 | path_difference_data.append( |
| 275 | a.getPathLength() - b.getPathLength()) |
| 276 | |
George Karpenkov | f37c07c | 2018-02-01 22:40:01 +0000 | [diff] [blame] | 277 | res.append((a, b)) |
Anna Zaks | 639b404 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 278 | elif a.getIssueIdentifier() > b.getIssueIdentifier(): |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 279 | eltsB.append(b) |
Anna Zaks | 639b404 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 280 | neqA.append(a) |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 281 | else: |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 282 | eltsA.append(a) |
Anna Zaks | 639b404 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 283 | neqB.append(b) |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 284 | neqA.extend(eltsA) |
| 285 | neqB.extend(eltsB) |
| 286 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 287 | # FIXME: Add fuzzy matching. One simple and possible effective idea would |
| 288 | # be to bin the diagnostics, print them in a normalized form (based solely |
| 289 | # on the structure of the diagnostic), compute the diff, then use that as |
| 290 | # the basis for matching. This has the nice property that we don't depend |
| 291 | # in any way on the diagnostic format. |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 292 | |
| 293 | for a in neqA: |
George Karpenkov | f37c07c | 2018-02-01 22:40:01 +0000 | [diff] [blame] | 294 | res.append((a, None)) |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 295 | for b in neqB: |
George Karpenkov | f37c07c | 2018-02-01 22:40:01 +0000 | [diff] [blame] | 296 | res.append((None, b)) |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 297 | |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 298 | if opts.relative_log_path_histogram or opts.relative_path_histogram or \ |
| 299 | opts.absolute_path_histogram: |
| 300 | from matplotlib import pyplot |
| 301 | pyplot.hist(path_difference_data, bins=100) |
| 302 | pyplot.show() |
| 303 | |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 304 | return res |
| 305 | |
George Karpenkov | 6a2a197 | 2018-10-23 01:30:26 +0000 | [diff] [blame] | 306 | def computePercentile(l, percentile): |
| 307 | """ |
| 308 | Return computed percentile. |
| 309 | """ |
| 310 | return sorted(l)[int(round(percentile * len(l) + 0.5)) - 1] |
| 311 | |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 312 | def deriveStats(results): |
| 313 | # Assume all keys are the same in each statistics bucket. |
| 314 | combined_data = defaultdict(list) |
George Karpenkov | 6a2a197 | 2018-10-23 01:30:26 +0000 | [diff] [blame] | 315 | |
| 316 | # Collect data on paths length. |
| 317 | for report in results.reports: |
| 318 | for diagnostic in report.diagnostics: |
| 319 | combined_data['PathsLength'].append(diagnostic.getPathLength()) |
| 320 | |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 321 | for stat in results.stats: |
Serge Guelton | d458974 | 2018-12-18 16:04:21 +0000 | [diff] [blame] | 322 | for key, value in stat.items(): |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 323 | combined_data[key].append(value) |
| 324 | combined_stats = {} |
Serge Guelton | d458974 | 2018-12-18 16:04:21 +0000 | [diff] [blame] | 325 | for key, values in combined_data.items(): |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 326 | combined_stats[str(key)] = { |
| 327 | "max": max(values), |
| 328 | "min": min(values), |
| 329 | "mean": sum(values) / len(values), |
George Karpenkov | 6a2a197 | 2018-10-23 01:30:26 +0000 | [diff] [blame] | 330 | "90th %tile": computePercentile(values, 0.9), |
| 331 | "95th %tile": computePercentile(values, 0.95), |
Serge Guelton | 3744de5 | 2018-12-18 08:38:50 +0000 | [diff] [blame] | 332 | "median": sorted(values)[len(values) // 2], |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 333 | "total": sum(values) |
| 334 | } |
| 335 | return combined_stats |
| 336 | |
| 337 | |
| 338 | def compareStats(resultsA, resultsB): |
| 339 | statsA = deriveStats(resultsA) |
| 340 | statsB = deriveStats(resultsB) |
| 341 | keys = sorted(statsA.keys()) |
| 342 | for key in keys: |
Serge Guelton | c0ebe77 | 2018-12-18 08:36:33 +0000 | [diff] [blame] | 343 | print(key) |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 344 | for kkey in statsA[key]: |
| 345 | valA = float(statsA[key][kkey]) |
| 346 | valB = float(statsB[key][kkey]) |
| 347 | report = "%.3f -> %.3f" % (valA, valB) |
| 348 | # Only apply highlighting when writing to TTY and it's not Windows |
| 349 | if sys.stdout.isatty() and os.name != 'nt': |
Mikhail R. Gadelha | 690a99a | 2018-05-30 11:17:55 +0000 | [diff] [blame] | 350 | if valB != 0: |
George Karpenkov | 13d3748 | 2018-07-30 23:01:20 +0000 | [diff] [blame] | 351 | ratio = (valB - valA) / valB |
| 352 | if ratio < -0.2: |
| 353 | report = Colors.GREEN + report + Colors.CLEAR |
| 354 | elif ratio > 0.2: |
| 355 | report = Colors.RED + report + Colors.CLEAR |
Serge Guelton | c0ebe77 | 2018-12-18 08:36:33 +0000 | [diff] [blame] | 356 | print("\t %s %s" % (kkey, report)) |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 357 | |
George Karpenkov | b7120c9 | 2018-02-13 23:36:01 +0000 | [diff] [blame] | 358 | def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True, |
| 359 | Stdout=sys.stdout): |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 360 | # Load the run results. |
Anna Zaks | 45a992b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 361 | resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty) |
| 362 | resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty) |
George Karpenkov | 8f6d65c | 2018-07-30 23:01:47 +0000 | [diff] [blame] | 363 | if opts.show_stats: |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 364 | compareStats(resultsA, resultsB) |
| 365 | if opts.stats_only: |
| 366 | return |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 367 | |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 368 | # Open the verbose log, if given. |
| 369 | if opts.verboseLog: |
| 370 | auxLog = open(opts.verboseLog, "wb") |
| 371 | else: |
| 372 | auxLog = None |
| 373 | |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 374 | diff = compareResults(resultsA, resultsB, opts) |
Anna Zaks | 767d356 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 375 | foundDiffs = 0 |
George Karpenkov | dece62a | 2018-02-01 02:38:42 +0000 | [diff] [blame] | 376 | totalAdded = 0 |
| 377 | totalRemoved = 0 |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 378 | for res in diff: |
George Karpenkov | f37c07c | 2018-02-01 22:40:01 +0000 | [diff] [blame] | 379 | a, b = res |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 380 | if a is None: |
George Karpenkov | b7120c9 | 2018-02-13 23:36:01 +0000 | [diff] [blame] | 381 | Stdout.write("ADDED: %r\n" % b.getReadableName()) |
Anna Zaks | 767d356 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 382 | foundDiffs += 1 |
George Karpenkov | dece62a | 2018-02-01 02:38:42 +0000 | [diff] [blame] | 383 | totalAdded += 1 |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 384 | if auxLog: |
George Karpenkov | b7120c9 | 2018-02-13 23:36:01 +0000 | [diff] [blame] | 385 | auxLog.write("('ADDED', %r, %r)\n" % (b.getReadableName(), |
| 386 | b.getReport())) |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 387 | elif b is None: |
George Karpenkov | b7120c9 | 2018-02-13 23:36:01 +0000 | [diff] [blame] | 388 | Stdout.write("REMOVED: %r\n" % a.getReadableName()) |
Anna Zaks | 767d356 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 389 | foundDiffs += 1 |
George Karpenkov | dece62a | 2018-02-01 02:38:42 +0000 | [diff] [blame] | 390 | totalRemoved += 1 |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 391 | if auxLog: |
George Karpenkov | b7120c9 | 2018-02-13 23:36:01 +0000 | [diff] [blame] | 392 | auxLog.write("('REMOVED', %r, %r)\n" % (a.getReadableName(), |
| 393 | a.getReport())) |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 394 | else: |
| 395 | pass |
| 396 | |
Anna Zaks | 767d356 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 397 | TotalReports = len(resultsB.diagnostics) |
George Karpenkov | b7120c9 | 2018-02-13 23:36:01 +0000 | [diff] [blame] | 398 | Stdout.write("TOTAL REPORTS: %r\n" % TotalReports) |
| 399 | Stdout.write("TOTAL ADDED: %r\n" % totalAdded) |
| 400 | Stdout.write("TOTAL REMOVED: %r\n" % totalRemoved) |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 401 | if auxLog: |
George Karpenkov | b7120c9 | 2018-02-13 23:36:01 +0000 | [diff] [blame] | 402 | auxLog.write("('TOTAL NEW REPORTS', %r)\n" % TotalReports) |
| 403 | auxLog.write("('TOTAL DIFFERENCES', %r)\n" % foundDiffs) |
| 404 | auxLog.close() |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 405 | |
Gabor Horvath | 93fde94 | 2015-06-30 15:31:17 +0000 | [diff] [blame] | 406 | return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics) |
Anna Zaks | b80d836 | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 407 | |
George Karpenkov | fc782a3 | 2018-02-09 18:39:47 +0000 | [diff] [blame] | 408 | def generate_option_parser(): |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 409 | parser = OptionParser("usage: %prog [options] [dir A] [dir B]") |
Anna Zaks | 45a992b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 410 | parser.add_option("", "--rootA", dest="rootA", |
| 411 | help="Prefix to ignore on source files for directory A", |
| 412 | action="store", type=str, default="") |
| 413 | parser.add_option("", "--rootB", dest="rootB", |
| 414 | help="Prefix to ignore on source files for directory B", |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 415 | action="store", type=str, default="") |
| 416 | parser.add_option("", "--verbose-log", dest="verboseLog", |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 417 | help="Write additional information to LOG \ |
George Karpenkov | fc782a3 | 2018-02-09 18:39:47 +0000 | [diff] [blame] | 418 | [default=None]", |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 419 | action="store", type=str, default=None, |
| 420 | metavar="LOG") |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 421 | parser.add_option("--relative-path-differences-histogram", |
| 422 | action="store_true", dest="relative_path_histogram", |
| 423 | default=False, |
| 424 | help="Show histogram of relative paths differences. \ |
George Karpenkov | fc782a3 | 2018-02-09 18:39:47 +0000 | [diff] [blame] | 425 | Requires matplotlib") |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 426 | parser.add_option("--relative-log-path-differences-histogram", |
| 427 | action="store_true", dest="relative_log_path_histogram", |
| 428 | default=False, |
| 429 | help="Show histogram of log relative paths differences. \ |
George Karpenkov | fc782a3 | 2018-02-09 18:39:47 +0000 | [diff] [blame] | 430 | Requires matplotlib") |
George Karpenkov | b704322 | 2018-02-01 22:25:18 +0000 | [diff] [blame] | 431 | parser.add_option("--absolute-path-differences-histogram", |
| 432 | action="store_true", dest="absolute_path_histogram", |
| 433 | default=False, |
| 434 | help="Show histogram of absolute paths differences. \ |
George Karpenkov | fc782a3 | 2018-02-09 18:39:47 +0000 | [diff] [blame] | 435 | Requires matplotlib") |
Mikhail R. Gadelha | 8af2e69 | 2018-05-28 15:40:39 +0000 | [diff] [blame] | 436 | parser.add_option("--stats-only", action="store_true", dest="stats_only", |
| 437 | default=False, help="Only show statistics on reports") |
George Karpenkov | 8f6d65c | 2018-07-30 23:01:47 +0000 | [diff] [blame] | 438 | parser.add_option("--show-stats", action="store_true", dest="show_stats", |
| 439 | default=False, help="Show change in statistics") |
George Karpenkov | fc782a3 | 2018-02-09 18:39:47 +0000 | [diff] [blame] | 440 | return parser |
| 441 | |
| 442 | |
| 443 | def main(): |
| 444 | parser = generate_option_parser() |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 445 | (opts, args) = parser.parse_args() |
| 446 | |
| 447 | if len(args) != 2: |
| 448 | parser.error("invalid number of arguments") |
| 449 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 450 | dirA, dirB = args |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 451 | |
Ted Kremenek | 3a0678e | 2015-09-08 03:50:52 +0000 | [diff] [blame] | 452 | dumpScanBuildResultsDiff(dirA, dirB, opts) |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 453 | |
George Karpenkov | a807660 | 2017-10-02 17:59:12 +0000 | [diff] [blame] | 454 | |
Daniel Dunbar | 1a9db99 | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 455 | if __name__ == '__main__': |
| 456 | main() |