blob: 68668d510d8c52896b10dd8cb7365fb279961a54 [file] [log] [blame]
Daniel Dunbar1a9db992009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
Ted Kremenek3a0678e2015-09-08 03:50:52 +00008two perspectives:
George Karpenkova8076602017-10-02 17:59:12 +00009 1. To monitor changes in the static analyzer's reports on real code bases,
10 for regression testing.
Daniel Dunbar1a9db992009-08-06 21:15:33 +000011
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks9b7d7142012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks45a992b2012-08-02 00:41:40 +000019 #
Anna Zaksc80313b2012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
Anna Zaks9b7d7142012-07-16 20:21:42 +000025 diff = compareResults(resultsA, resultsB)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000026
Daniel Dunbar1a9db992009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
George Karpenkovb7043222018-02-01 22:25:18 +000031from math import log
Daniel Dunbar1a9db992009-08-06 21:15:33 +000032
George Karpenkova8076602017-10-02 17:59:12 +000033
Anna Zaksc80313b2012-10-15 22:48:21 +000034# Information about analysis run:
35# path - the analysis output directory
Ted Kremenek3a0678e2015-09-08 03:50:52 +000036# root - the name of the root directory, which will be disregarded when
Anna Zaksc80313b2012-10-15 22:48:21 +000037# determining the source file name
38class SingleRunInfo:
39 def __init__(self, path, root="", verboseLog=None):
40 self.path = path
Gabor Horvathc3177f22015-07-08 18:39:31 +000041 self.root = root.rstrip("/\\")
Anna Zaksc80313b2012-10-15 22:48:21 +000042 self.verboseLog = verboseLog
43
George Karpenkova8076602017-10-02 17:59:12 +000044
Anna Zaks9b7d7142012-07-16 20:21:42 +000045class AnalysisDiagnostic:
46 def __init__(self, data, report, htmlReport):
47 self._data = data
48 self._loc = self._data['location']
49 self._report = report
50 self._htmlReport = htmlReport
George Karpenkovb7043222018-02-01 22:25:18 +000051 self._reportSize = len(self._data['path'])
Anna Zaks9b7d7142012-07-16 20:21:42 +000052
53 def getFileName(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000054 root = self._report.run.root
Anna Zaks639b4042012-10-17 21:09:26 +000055 fileName = self._report.files[self._loc['file']]
Gabor Horvathc3177f22015-07-08 18:39:31 +000056 if fileName.startswith(root) and len(root) > 0:
George Karpenkova8076602017-10-02 17:59:12 +000057 return fileName[len(root) + 1:]
Anna Zaksc80313b2012-10-15 22:48:21 +000058 return fileName
59
Anna Zaks9b7d7142012-07-16 20:21:42 +000060 def getLine(self):
61 return self._loc['line']
Ted Kremenek3a0678e2015-09-08 03:50:52 +000062
Anna Zaks9b7d7142012-07-16 20:21:42 +000063 def getColumn(self):
64 return self._loc['col']
65
George Karpenkovb7043222018-02-01 22:25:18 +000066 def getPathLength(self):
67 return self._reportSize
68
Anna Zaks9b7d7142012-07-16 20:21:42 +000069 def getCategory(self):
70 return self._data['category']
71
72 def getDescription(self):
73 return self._data['description']
74
George Karpenkova8076602017-10-02 17:59:12 +000075 def getIssueIdentifier(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000076 id = self.getFileName() + "+"
George Karpenkova8076602017-10-02 17:59:12 +000077 if 'issue_context' in self._data:
78 id += self._data['issue_context'] + "+"
79 if 'issue_hash_content_of_line_in_context' in self._data:
80 id += str(self._data['issue_hash_content_of_line_in_context'])
Anna Zaksc80313b2012-10-15 22:48:21 +000081 return id
Anna Zaks9b7d7142012-07-16 20:21:42 +000082
83 def getReport(self):
84 if self._htmlReport is None:
85 return " "
86 return os.path.join(self._report.run.path, self._htmlReport)
87
88 def getReadableName(self):
George Karpenkov986dd452018-02-06 17:22:09 +000089 if 'issue_context' in self._data:
90 funcnamePostfix = "#" + self._data['issue_context']
91 else:
92 funcnamePostfix = ""
93 return '%s%s:%d:%d, %s: %s' % (self.getFileName(),
94 funcnamePostfix,
95 self.getLine(),
96 self.getColumn(), self.getCategory(),
97 self.getDescription())
Ted Kremenek3a0678e2015-09-08 03:50:52 +000098
99 # Note, the data format is not an API and may change from one analyzer
100 # version to another.
Anna Zaks639b4042012-10-17 21:09:26 +0000101 def getRawData(self):
102 return self._data
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000103
George Karpenkova8076602017-10-02 17:59:12 +0000104
Anna Zaksb80d8362011-09-12 21:32:41 +0000105class CmpOptions:
George Karpenkov78809e52018-02-02 18:27:14 +0000106 """
107 Fake output of option parser with manually constructed options.
108 """
109
Anna Zaks45a992b2012-08-02 00:41:40 +0000110 def __init__(self, verboseLog=None, rootA="", rootB=""):
111 self.rootA = rootA
112 self.rootB = rootB
Anna Zaksb80d8362011-09-12 21:32:41 +0000113 self.verboseLog = verboseLog
George Karpenkov78809e52018-02-02 18:27:14 +0000114 self.relative_path_histogram = False
115 self.relative_log_path_histogram = False
116 self.absolute_path_histogram = False
Anna Zaksb80d8362011-09-12 21:32:41 +0000117
George Karpenkova8076602017-10-02 17:59:12 +0000118
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000119class AnalysisReport:
Anna Zaksfab9bb62012-11-15 22:42:44 +0000120 def __init__(self, run, files):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000121 self.run = run
Anna Zaks639b4042012-10-17 21:09:26 +0000122 self.files = files
123 self.diagnostics = []
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000124
George Karpenkova8076602017-10-02 17:59:12 +0000125
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000126class AnalysisRun:
Anna Zaksc80313b2012-10-15 22:48:21 +0000127 def __init__(self, info):
128 self.path = info.path
129 self.root = info.root
130 self.info = info
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000131 self.reports = []
Anna Zaks639b4042012-10-17 21:09:26 +0000132 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000133 self.diagnostics = []
Anna Zaksfab9bb62012-11-15 22:42:44 +0000134 self.clang_version = None
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000135
Anna Zaksfab9bb62012-11-15 22:42:44 +0000136 def getClangVersion(self):
137 return self.clang_version
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000138
Jordan Roseb042cc72013-03-23 01:21:26 +0000139 def readSingleFile(self, p, deleteEmpty):
140 data = plistlib.readPlist(p)
141
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000142 # We want to retrieve the clang version even if there are no
143 # reports. Assume that all reports were created using the same
Jordan Roseb042cc72013-03-23 01:21:26 +0000144 # clang version (this is always true and is more efficient).
145 if 'clang_version' in data:
George Karpenkova8076602017-10-02 17:59:12 +0000146 if self.clang_version is None:
Jordan Roseb042cc72013-03-23 01:21:26 +0000147 self.clang_version = data.pop('clang_version')
148 else:
149 data.pop('clang_version')
150
151 # Ignore/delete empty reports.
152 if not data['files']:
George Karpenkova8076602017-10-02 17:59:12 +0000153 if deleteEmpty:
Jordan Roseb042cc72013-03-23 01:21:26 +0000154 os.remove(p)
155 return
156
157 # Extract the HTML reports, if they exists.
158 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
159 htmlFiles = []
160 for d in data['diagnostics']:
161 # FIXME: Why is this named files, when does it have multiple
162 # files?
163 assert len(d['HTMLDiagnostics_files']) == 1
164 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
165 else:
166 htmlFiles = [None] * len(data['diagnostics'])
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000167
Jordan Roseb042cc72013-03-23 01:21:26 +0000168 report = AnalysisReport(self, data.pop('files'))
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000169 diagnostics = [AnalysisDiagnostic(d, report, h)
George Karpenkova8076602017-10-02 17:59:12 +0000170 for d, h in zip(data.pop('diagnostics'), htmlFiles)]
Jordan Roseb042cc72013-03-23 01:21:26 +0000171
172 assert not data
173
174 report.diagnostics.extend(diagnostics)
175 self.reports.append(report)
176 self.diagnostics.extend(diagnostics)
177
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000178
George Karpenkova8076602017-10-02 17:59:12 +0000179def loadResults(path, opts, root="", deleteEmpty=True):
180 """
181 Backwards compatibility API.
182 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000183 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
184 deleteEmpty)
185
George Karpenkova8076602017-10-02 17:59:12 +0000186
Anna Zaksc80313b2012-10-15 22:48:21 +0000187def loadResultsFromSingleRun(info, deleteEmpty=True):
George Karpenkova8076602017-10-02 17:59:12 +0000188 """
189 # Load results of the analyzes from a given output folder.
190 # - info is the SingleRunInfo object
191 # - deleteEmpty specifies if the empty plist files should be deleted
192
193 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000194 path = info.path
195 run = AnalysisRun(info)
Jordan Roseb042cc72013-03-23 01:21:26 +0000196
197 if os.path.isfile(path):
198 run.readSingleFile(path, deleteEmpty)
199 else:
200 for (dirpath, dirnames, filenames) in os.walk(path):
201 for f in filenames:
202 if (not f.endswith('plist')):
203 continue
204 p = os.path.join(dirpath, f)
205 run.readSingleFile(p, deleteEmpty)
206
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000207 return run
208
George Karpenkova8076602017-10-02 17:59:12 +0000209
210def cmpAnalysisDiagnostic(d):
Anna Zaks9b7d7142012-07-16 20:21:42 +0000211 return d.getIssueIdentifier()
Anna Zaksd60367b2012-06-08 01:50:49 +0000212
George Karpenkova8076602017-10-02 17:59:12 +0000213
George Karpenkovb7043222018-02-01 22:25:18 +0000214def compareResults(A, B, opts):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000215 """
216 compareResults - Generate a relation from diagnostics in run A to
217 diagnostics in run B.
218
George Karpenkovf37c07c2018-02-01 22:40:01 +0000219 The result is the relation as a list of triples (a, b) where
220 each element {a,b} is None or a matching element from the respective run
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000221 """
222
223 res = []
224
George Karpenkovb7043222018-02-01 22:25:18 +0000225 # Map size_before -> size_after
226 path_difference_data = []
227
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000228 # Quickly eliminate equal elements.
229 neqA = []
230 neqB = []
231 eltsA = list(A.diagnostics)
232 eltsB = list(B.diagnostics)
George Karpenkova8076602017-10-02 17:59:12 +0000233 eltsA.sort(key=cmpAnalysisDiagnostic)
234 eltsB.sort(key=cmpAnalysisDiagnostic)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000235 while eltsA and eltsB:
236 a = eltsA.pop()
237 b = eltsB.pop()
George Karpenkova8076602017-10-02 17:59:12 +0000238 if (a.getIssueIdentifier() == b.getIssueIdentifier()):
George Karpenkovb7043222018-02-01 22:25:18 +0000239 if a.getPathLength() != b.getPathLength():
240 if opts.relative_path_histogram:
241 path_difference_data.append(
242 float(a.getPathLength()) / b.getPathLength())
243 elif opts.relative_log_path_histogram:
244 path_difference_data.append(
245 log(float(a.getPathLength()) / b.getPathLength()))
246 elif opts.absolute_path_histogram:
247 path_difference_data.append(
248 a.getPathLength() - b.getPathLength())
249
George Karpenkovf37c07c2018-02-01 22:40:01 +0000250 res.append((a, b))
Anna Zaks639b4042012-10-17 21:09:26 +0000251 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000252 eltsB.append(b)
Anna Zaks639b4042012-10-17 21:09:26 +0000253 neqA.append(a)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000254 else:
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000255 eltsA.append(a)
Anna Zaks639b4042012-10-17 21:09:26 +0000256 neqB.append(b)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000257 neqA.extend(eltsA)
258 neqB.extend(eltsB)
259
George Karpenkova8076602017-10-02 17:59:12 +0000260 # FIXME: Add fuzzy matching. One simple and possible effective idea would
261 # be to bin the diagnostics, print them in a normalized form (based solely
262 # on the structure of the diagnostic), compute the diff, then use that as
263 # the basis for matching. This has the nice property that we don't depend
264 # in any way on the diagnostic format.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000265
266 for a in neqA:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000267 res.append((a, None))
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000268 for b in neqB:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000269 res.append((None, b))
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000270
George Karpenkovb7043222018-02-01 22:25:18 +0000271 if opts.relative_log_path_histogram or opts.relative_path_histogram or \
272 opts.absolute_path_histogram:
273 from matplotlib import pyplot
274 pyplot.hist(path_difference_data, bins=100)
275 pyplot.show()
276
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000277 return res
278
George Karpenkova8076602017-10-02 17:59:12 +0000279
Anna Zaks9b7d7142012-07-16 20:21:42 +0000280def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaksb80d8362011-09-12 21:32:41 +0000281 # Load the run results.
Anna Zaks45a992b2012-08-02 00:41:40 +0000282 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
283 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000284
Anna Zaksb80d8362011-09-12 21:32:41 +0000285 # Open the verbose log, if given.
286 if opts.verboseLog:
287 auxLog = open(opts.verboseLog, "wb")
288 else:
289 auxLog = None
290
George Karpenkovb7043222018-02-01 22:25:18 +0000291 diff = compareResults(resultsA, resultsB, opts)
Anna Zaks767d3562011-11-08 19:56:31 +0000292 foundDiffs = 0
George Karpenkovdece62a2018-02-01 02:38:42 +0000293 totalAdded = 0
294 totalRemoved = 0
Anna Zaksb80d8362011-09-12 21:32:41 +0000295 for res in diff:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000296 a, b = res
Anna Zaksb80d8362011-09-12 21:32:41 +0000297 if a is None:
298 print "ADDED: %r" % b.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000299 foundDiffs += 1
George Karpenkovdece62a2018-02-01 02:38:42 +0000300 totalAdded += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000301 if auxLog:
302 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000303 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000304 elif b is None:
305 print "REMOVED: %r" % a.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000306 foundDiffs += 1
George Karpenkovdece62a2018-02-01 02:38:42 +0000307 totalRemoved += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000308 if auxLog:
309 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000310 a.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000311 else:
312 pass
313
Anna Zaks767d3562011-11-08 19:56:31 +0000314 TotalReports = len(resultsB.diagnostics)
315 print "TOTAL REPORTS: %r" % TotalReports
316 print "TOTAL DIFFERENCES: %r" % foundDiffs
George Karpenkovdece62a2018-02-01 02:38:42 +0000317 print "TOTAL ADDED: %r" % totalAdded
318 print "TOTAL REMOVED: %r" % totalRemoved
Anna Zaksb80d8362011-09-12 21:32:41 +0000319 if auxLog:
Anna Zaks767d3562011-11-08 19:56:31 +0000320 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
321 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000322
Gabor Horvath93fde942015-06-30 15:31:17 +0000323 return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
Anna Zaksb80d8362011-09-12 21:32:41 +0000324
George Karpenkova8076602017-10-02 17:59:12 +0000325
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000326def main():
327 from optparse import OptionParser
328 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks45a992b2012-08-02 00:41:40 +0000329 parser.add_option("", "--rootA", dest="rootA",
330 help="Prefix to ignore on source files for directory A",
331 action="store", type=str, default="")
332 parser.add_option("", "--rootB", dest="rootB",
333 help="Prefix to ignore on source files for directory B",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000334 action="store", type=str, default="")
335 parser.add_option("", "--verbose-log", dest="verboseLog",
George Karpenkova8076602017-10-02 17:59:12 +0000336 help="Write additional information to LOG \
337 [default=None]",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000338 action="store", type=str, default=None,
339 metavar="LOG")
George Karpenkovb7043222018-02-01 22:25:18 +0000340 parser.add_option("--relative-path-differences-histogram",
341 action="store_true", dest="relative_path_histogram",
342 default=False,
343 help="Show histogram of relative paths differences. \
344 Requires matplotlib")
345 parser.add_option("--relative-log-path-differences-histogram",
346 action="store_true", dest="relative_log_path_histogram",
347 default=False,
348 help="Show histogram of log relative paths differences. \
349 Requires matplotlib")
350 parser.add_option("--absolute-path-differences-histogram",
351 action="store_true", dest="absolute_path_histogram",
352 default=False,
353 help="Show histogram of absolute paths differences. \
354 Requires matplotlib")
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000355 (opts, args) = parser.parse_args()
356
357 if len(args) != 2:
358 parser.error("invalid number of arguments")
359
George Karpenkova8076602017-10-02 17:59:12 +0000360 dirA, dirB = args
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000361
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000362 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000363
George Karpenkova8076602017-10-02 17:59:12 +0000364
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000365if __name__ == '__main__':
366 main()