blob: 918e9a9f79b72b0944f412424f14c6031c9e367c [file] [log] [blame]
Daniel Dunbar1a9db992009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
Ted Kremenek3a0678e2015-09-08 03:50:52 +00008two perspectives:
George Karpenkova8076602017-10-02 17:59:12 +00009 1. To monitor changes in the static analyzer's reports on real code bases,
10 for regression testing.
Daniel Dunbar1a9db992009-08-06 21:15:33 +000011
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks9b7d7142012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks45a992b2012-08-02 00:41:40 +000019 #
Anna Zaksc80313b2012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
Anna Zaks9b7d7142012-07-16 20:21:42 +000025 diff = compareResults(resultsA, resultsB)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000026
Daniel Dunbar1a9db992009-08-06 21:15:33 +000027"""
28
George Karpenkovb7120c92018-02-13 23:36:01 +000029import sys
Daniel Dunbar1a9db992009-08-06 21:15:33 +000030import os
31import plistlib
George Karpenkovb7043222018-02-01 22:25:18 +000032from math import log
George Karpenkov39590412018-02-09 18:48:31 +000033from optparse import OptionParser
Daniel Dunbar1a9db992009-08-06 21:15:33 +000034
George Karpenkova8076602017-10-02 17:59:12 +000035
Anna Zaksc80313b2012-10-15 22:48:21 +000036# Information about analysis run:
37# path - the analysis output directory
Ted Kremenek3a0678e2015-09-08 03:50:52 +000038# root - the name of the root directory, which will be disregarded when
Anna Zaksc80313b2012-10-15 22:48:21 +000039# determining the source file name
40class SingleRunInfo:
41 def __init__(self, path, root="", verboseLog=None):
42 self.path = path
Gabor Horvathc3177f22015-07-08 18:39:31 +000043 self.root = root.rstrip("/\\")
Anna Zaksc80313b2012-10-15 22:48:21 +000044 self.verboseLog = verboseLog
45
George Karpenkova8076602017-10-02 17:59:12 +000046
Anna Zaks9b7d7142012-07-16 20:21:42 +000047class AnalysisDiagnostic:
48 def __init__(self, data, report, htmlReport):
49 self._data = data
50 self._loc = self._data['location']
51 self._report = report
52 self._htmlReport = htmlReport
George Karpenkovb7043222018-02-01 22:25:18 +000053 self._reportSize = len(self._data['path'])
Anna Zaks9b7d7142012-07-16 20:21:42 +000054
55 def getFileName(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000056 root = self._report.run.root
Anna Zaks639b4042012-10-17 21:09:26 +000057 fileName = self._report.files[self._loc['file']]
Gabor Horvathc3177f22015-07-08 18:39:31 +000058 if fileName.startswith(root) and len(root) > 0:
George Karpenkova8076602017-10-02 17:59:12 +000059 return fileName[len(root) + 1:]
Anna Zaksc80313b2012-10-15 22:48:21 +000060 return fileName
61
Anna Zaks9b7d7142012-07-16 20:21:42 +000062 def getLine(self):
63 return self._loc['line']
Ted Kremenek3a0678e2015-09-08 03:50:52 +000064
Anna Zaks9b7d7142012-07-16 20:21:42 +000065 def getColumn(self):
66 return self._loc['col']
67
George Karpenkovb7043222018-02-01 22:25:18 +000068 def getPathLength(self):
69 return self._reportSize
70
Anna Zaks9b7d7142012-07-16 20:21:42 +000071 def getCategory(self):
72 return self._data['category']
73
74 def getDescription(self):
75 return self._data['description']
76
George Karpenkova8076602017-10-02 17:59:12 +000077 def getIssueIdentifier(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000078 id = self.getFileName() + "+"
George Karpenkova8076602017-10-02 17:59:12 +000079 if 'issue_context' in self._data:
80 id += self._data['issue_context'] + "+"
81 if 'issue_hash_content_of_line_in_context' in self._data:
82 id += str(self._data['issue_hash_content_of_line_in_context'])
Anna Zaksc80313b2012-10-15 22:48:21 +000083 return id
Anna Zaks9b7d7142012-07-16 20:21:42 +000084
85 def getReport(self):
86 if self._htmlReport is None:
87 return " "
88 return os.path.join(self._report.run.path, self._htmlReport)
89
90 def getReadableName(self):
George Karpenkov986dd452018-02-06 17:22:09 +000091 if 'issue_context' in self._data:
92 funcnamePostfix = "#" + self._data['issue_context']
93 else:
94 funcnamePostfix = ""
95 return '%s%s:%d:%d, %s: %s' % (self.getFileName(),
96 funcnamePostfix,
97 self.getLine(),
98 self.getColumn(), self.getCategory(),
99 self.getDescription())
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000100
101 # Note, the data format is not an API and may change from one analyzer
102 # version to another.
Anna Zaks639b4042012-10-17 21:09:26 +0000103 def getRawData(self):
104 return self._data
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000105
George Karpenkova8076602017-10-02 17:59:12 +0000106
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000107class AnalysisReport:
Anna Zaksfab9bb62012-11-15 22:42:44 +0000108 def __init__(self, run, files):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000109 self.run = run
Anna Zaks639b4042012-10-17 21:09:26 +0000110 self.files = files
111 self.diagnostics = []
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000112
George Karpenkova8076602017-10-02 17:59:12 +0000113
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000114class AnalysisRun:
Anna Zaksc80313b2012-10-15 22:48:21 +0000115 def __init__(self, info):
116 self.path = info.path
117 self.root = info.root
118 self.info = info
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000119 self.reports = []
Anna Zaks639b4042012-10-17 21:09:26 +0000120 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000121 self.diagnostics = []
Anna Zaksfab9bb62012-11-15 22:42:44 +0000122 self.clang_version = None
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000123
Anna Zaksfab9bb62012-11-15 22:42:44 +0000124 def getClangVersion(self):
125 return self.clang_version
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000126
Jordan Roseb042cc72013-03-23 01:21:26 +0000127 def readSingleFile(self, p, deleteEmpty):
128 data = plistlib.readPlist(p)
129
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000130 # We want to retrieve the clang version even if there are no
131 # reports. Assume that all reports were created using the same
Jordan Roseb042cc72013-03-23 01:21:26 +0000132 # clang version (this is always true and is more efficient).
133 if 'clang_version' in data:
George Karpenkova8076602017-10-02 17:59:12 +0000134 if self.clang_version is None:
Jordan Roseb042cc72013-03-23 01:21:26 +0000135 self.clang_version = data.pop('clang_version')
136 else:
137 data.pop('clang_version')
138
139 # Ignore/delete empty reports.
140 if not data['files']:
George Karpenkova8076602017-10-02 17:59:12 +0000141 if deleteEmpty:
Jordan Roseb042cc72013-03-23 01:21:26 +0000142 os.remove(p)
143 return
144
145 # Extract the HTML reports, if they exists.
146 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
147 htmlFiles = []
148 for d in data['diagnostics']:
149 # FIXME: Why is this named files, when does it have multiple
150 # files?
151 assert len(d['HTMLDiagnostics_files']) == 1
152 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
153 else:
154 htmlFiles = [None] * len(data['diagnostics'])
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000155
Jordan Roseb042cc72013-03-23 01:21:26 +0000156 report = AnalysisReport(self, data.pop('files'))
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000157 diagnostics = [AnalysisDiagnostic(d, report, h)
George Karpenkova8076602017-10-02 17:59:12 +0000158 for d, h in zip(data.pop('diagnostics'), htmlFiles)]
Jordan Roseb042cc72013-03-23 01:21:26 +0000159
160 assert not data
161
162 report.diagnostics.extend(diagnostics)
163 self.reports.append(report)
164 self.diagnostics.extend(diagnostics)
165
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000166
George Karpenkova8076602017-10-02 17:59:12 +0000167def loadResults(path, opts, root="", deleteEmpty=True):
168 """
169 Backwards compatibility API.
170 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000171 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
172 deleteEmpty)
173
George Karpenkova8076602017-10-02 17:59:12 +0000174
Anna Zaksc80313b2012-10-15 22:48:21 +0000175def loadResultsFromSingleRun(info, deleteEmpty=True):
George Karpenkova8076602017-10-02 17:59:12 +0000176 """
177 # Load results of the analyzes from a given output folder.
178 # - info is the SingleRunInfo object
179 # - deleteEmpty specifies if the empty plist files should be deleted
180
181 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000182 path = info.path
183 run = AnalysisRun(info)
Jordan Roseb042cc72013-03-23 01:21:26 +0000184
185 if os.path.isfile(path):
186 run.readSingleFile(path, deleteEmpty)
187 else:
188 for (dirpath, dirnames, filenames) in os.walk(path):
189 for f in filenames:
190 if (not f.endswith('plist')):
191 continue
192 p = os.path.join(dirpath, f)
193 run.readSingleFile(p, deleteEmpty)
194
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000195 return run
196
George Karpenkova8076602017-10-02 17:59:12 +0000197
198def cmpAnalysisDiagnostic(d):
Anna Zaks9b7d7142012-07-16 20:21:42 +0000199 return d.getIssueIdentifier()
Anna Zaksd60367b2012-06-08 01:50:49 +0000200
George Karpenkova8076602017-10-02 17:59:12 +0000201
George Karpenkovb7043222018-02-01 22:25:18 +0000202def compareResults(A, B, opts):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000203 """
204 compareResults - Generate a relation from diagnostics in run A to
205 diagnostics in run B.
206
George Karpenkovf37c07c2018-02-01 22:40:01 +0000207 The result is the relation as a list of triples (a, b) where
208 each element {a,b} is None or a matching element from the respective run
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000209 """
210
211 res = []
212
George Karpenkovb7043222018-02-01 22:25:18 +0000213 # Map size_before -> size_after
214 path_difference_data = []
215
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000216 # Quickly eliminate equal elements.
217 neqA = []
218 neqB = []
219 eltsA = list(A.diagnostics)
220 eltsB = list(B.diagnostics)
George Karpenkova8076602017-10-02 17:59:12 +0000221 eltsA.sort(key=cmpAnalysisDiagnostic)
222 eltsB.sort(key=cmpAnalysisDiagnostic)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000223 while eltsA and eltsB:
224 a = eltsA.pop()
225 b = eltsB.pop()
George Karpenkova8076602017-10-02 17:59:12 +0000226 if (a.getIssueIdentifier() == b.getIssueIdentifier()):
George Karpenkovb7043222018-02-01 22:25:18 +0000227 if a.getPathLength() != b.getPathLength():
228 if opts.relative_path_histogram:
229 path_difference_data.append(
230 float(a.getPathLength()) / b.getPathLength())
231 elif opts.relative_log_path_histogram:
232 path_difference_data.append(
233 log(float(a.getPathLength()) / b.getPathLength()))
234 elif opts.absolute_path_histogram:
235 path_difference_data.append(
236 a.getPathLength() - b.getPathLength())
237
George Karpenkovf37c07c2018-02-01 22:40:01 +0000238 res.append((a, b))
Anna Zaks639b4042012-10-17 21:09:26 +0000239 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000240 eltsB.append(b)
Anna Zaks639b4042012-10-17 21:09:26 +0000241 neqA.append(a)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000242 else:
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000243 eltsA.append(a)
Anna Zaks639b4042012-10-17 21:09:26 +0000244 neqB.append(b)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000245 neqA.extend(eltsA)
246 neqB.extend(eltsB)
247
George Karpenkova8076602017-10-02 17:59:12 +0000248 # FIXME: Add fuzzy matching. One simple and possible effective idea would
249 # be to bin the diagnostics, print them in a normalized form (based solely
250 # on the structure of the diagnostic), compute the diff, then use that as
251 # the basis for matching. This has the nice property that we don't depend
252 # in any way on the diagnostic format.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000253
254 for a in neqA:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000255 res.append((a, None))
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000256 for b in neqB:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000257 res.append((None, b))
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000258
George Karpenkovb7043222018-02-01 22:25:18 +0000259 if opts.relative_log_path_histogram or opts.relative_path_histogram or \
260 opts.absolute_path_histogram:
261 from matplotlib import pyplot
262 pyplot.hist(path_difference_data, bins=100)
263 pyplot.show()
264
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000265 return res
266
George Karpenkova8076602017-10-02 17:59:12 +0000267
George Karpenkovb7120c92018-02-13 23:36:01 +0000268def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True,
269 Stdout=sys.stdout):
Anna Zaksb80d8362011-09-12 21:32:41 +0000270 # Load the run results.
Anna Zaks45a992b2012-08-02 00:41:40 +0000271 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
272 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000273
Anna Zaksb80d8362011-09-12 21:32:41 +0000274 # Open the verbose log, if given.
275 if opts.verboseLog:
276 auxLog = open(opts.verboseLog, "wb")
277 else:
278 auxLog = None
279
George Karpenkovb7043222018-02-01 22:25:18 +0000280 diff = compareResults(resultsA, resultsB, opts)
Anna Zaks767d3562011-11-08 19:56:31 +0000281 foundDiffs = 0
George Karpenkovdece62a2018-02-01 02:38:42 +0000282 totalAdded = 0
283 totalRemoved = 0
Anna Zaksb80d8362011-09-12 21:32:41 +0000284 for res in diff:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000285 a, b = res
Anna Zaksb80d8362011-09-12 21:32:41 +0000286 if a is None:
George Karpenkovb7120c92018-02-13 23:36:01 +0000287 Stdout.write("ADDED: %r\n" % b.getReadableName())
Anna Zaks767d3562011-11-08 19:56:31 +0000288 foundDiffs += 1
George Karpenkovdece62a2018-02-01 02:38:42 +0000289 totalAdded += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000290 if auxLog:
George Karpenkovb7120c92018-02-13 23:36:01 +0000291 auxLog.write("('ADDED', %r, %r)\n" % (b.getReadableName(),
292 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000293 elif b is None:
George Karpenkovb7120c92018-02-13 23:36:01 +0000294 Stdout.write("REMOVED: %r\n" % a.getReadableName())
Anna Zaks767d3562011-11-08 19:56:31 +0000295 foundDiffs += 1
George Karpenkovdece62a2018-02-01 02:38:42 +0000296 totalRemoved += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000297 if auxLog:
George Karpenkovb7120c92018-02-13 23:36:01 +0000298 auxLog.write("('REMOVED', %r, %r)\n" % (a.getReadableName(),
299 a.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000300 else:
301 pass
302
Anna Zaks767d3562011-11-08 19:56:31 +0000303 TotalReports = len(resultsB.diagnostics)
George Karpenkovb7120c92018-02-13 23:36:01 +0000304 Stdout.write("TOTAL REPORTS: %r\n" % TotalReports)
305 Stdout.write("TOTAL ADDED: %r\n" % totalAdded)
306 Stdout.write("TOTAL REMOVED: %r\n" % totalRemoved)
Anna Zaksb80d8362011-09-12 21:32:41 +0000307 if auxLog:
George Karpenkovb7120c92018-02-13 23:36:01 +0000308 auxLog.write("('TOTAL NEW REPORTS', %r)\n" % TotalReports)
309 auxLog.write("('TOTAL DIFFERENCES', %r)\n" % foundDiffs)
310 auxLog.close()
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000311
Gabor Horvath93fde942015-06-30 15:31:17 +0000312 return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
Anna Zaksb80d8362011-09-12 21:32:41 +0000313
George Karpenkovfc782a32018-02-09 18:39:47 +0000314def generate_option_parser():
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000315 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks45a992b2012-08-02 00:41:40 +0000316 parser.add_option("", "--rootA", dest="rootA",
317 help="Prefix to ignore on source files for directory A",
318 action="store", type=str, default="")
319 parser.add_option("", "--rootB", dest="rootB",
320 help="Prefix to ignore on source files for directory B",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000321 action="store", type=str, default="")
322 parser.add_option("", "--verbose-log", dest="verboseLog",
George Karpenkova8076602017-10-02 17:59:12 +0000323 help="Write additional information to LOG \
George Karpenkovfc782a32018-02-09 18:39:47 +0000324 [default=None]",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000325 action="store", type=str, default=None,
326 metavar="LOG")
George Karpenkovb7043222018-02-01 22:25:18 +0000327 parser.add_option("--relative-path-differences-histogram",
328 action="store_true", dest="relative_path_histogram",
329 default=False,
330 help="Show histogram of relative paths differences. \
George Karpenkovfc782a32018-02-09 18:39:47 +0000331 Requires matplotlib")
George Karpenkovb7043222018-02-01 22:25:18 +0000332 parser.add_option("--relative-log-path-differences-histogram",
333 action="store_true", dest="relative_log_path_histogram",
334 default=False,
335 help="Show histogram of log relative paths differences. \
George Karpenkovfc782a32018-02-09 18:39:47 +0000336 Requires matplotlib")
George Karpenkovb7043222018-02-01 22:25:18 +0000337 parser.add_option("--absolute-path-differences-histogram",
338 action="store_true", dest="absolute_path_histogram",
339 default=False,
340 help="Show histogram of absolute paths differences. \
George Karpenkovfc782a32018-02-09 18:39:47 +0000341 Requires matplotlib")
342 return parser
343
344
345def main():
346 parser = generate_option_parser()
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000347 (opts, args) = parser.parse_args()
348
349 if len(args) != 2:
350 parser.error("invalid number of arguments")
351
George Karpenkova8076602017-10-02 17:59:12 +0000352 dirA, dirB = args
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000353
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000354 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000355
George Karpenkova8076602017-10-02 17:59:12 +0000356
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000357if __name__ == '__main__':
358 main()