blob: 4d60c0f7e7ec11aafeee440c8536f1a00ca28869 [file] [log] [blame]
Daniel Dunbar1a9db992009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
Ted Kremenek3a0678e2015-09-08 03:50:52 +00008two perspectives:
George Karpenkova8076602017-10-02 17:59:12 +00009 1. To monitor changes in the static analyzer's reports on real code bases,
10 for regression testing.
Daniel Dunbar1a9db992009-08-06 21:15:33 +000011
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks9b7d7142012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks45a992b2012-08-02 00:41:40 +000019 #
Anna Zaksc80313b2012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
Anna Zaks9b7d7142012-07-16 20:21:42 +000025 diff = compareResults(resultsA, resultsB)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000026
Daniel Dunbar1a9db992009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
George Karpenkovb7043222018-02-01 22:25:18 +000031from math import log
Daniel Dunbar1a9db992009-08-06 21:15:33 +000032
George Karpenkova8076602017-10-02 17:59:12 +000033
Anna Zaksc80313b2012-10-15 22:48:21 +000034# Information about analysis run:
35# path - the analysis output directory
Ted Kremenek3a0678e2015-09-08 03:50:52 +000036# root - the name of the root directory, which will be disregarded when
Anna Zaksc80313b2012-10-15 22:48:21 +000037# determining the source file name
38class SingleRunInfo:
39 def __init__(self, path, root="", verboseLog=None):
40 self.path = path
Gabor Horvathc3177f22015-07-08 18:39:31 +000041 self.root = root.rstrip("/\\")
Anna Zaksc80313b2012-10-15 22:48:21 +000042 self.verboseLog = verboseLog
43
George Karpenkova8076602017-10-02 17:59:12 +000044
Anna Zaks9b7d7142012-07-16 20:21:42 +000045class AnalysisDiagnostic:
46 def __init__(self, data, report, htmlReport):
47 self._data = data
48 self._loc = self._data['location']
49 self._report = report
50 self._htmlReport = htmlReport
George Karpenkovb7043222018-02-01 22:25:18 +000051 self._reportSize = len(self._data['path'])
Anna Zaks9b7d7142012-07-16 20:21:42 +000052
53 def getFileName(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000054 root = self._report.run.root
Anna Zaks639b4042012-10-17 21:09:26 +000055 fileName = self._report.files[self._loc['file']]
Gabor Horvathc3177f22015-07-08 18:39:31 +000056 if fileName.startswith(root) and len(root) > 0:
George Karpenkova8076602017-10-02 17:59:12 +000057 return fileName[len(root) + 1:]
Anna Zaksc80313b2012-10-15 22:48:21 +000058 return fileName
59
Anna Zaks9b7d7142012-07-16 20:21:42 +000060 def getLine(self):
61 return self._loc['line']
Ted Kremenek3a0678e2015-09-08 03:50:52 +000062
Anna Zaks9b7d7142012-07-16 20:21:42 +000063 def getColumn(self):
64 return self._loc['col']
65
George Karpenkovb7043222018-02-01 22:25:18 +000066 def getPathLength(self):
67 return self._reportSize
68
Anna Zaks9b7d7142012-07-16 20:21:42 +000069 def getCategory(self):
70 return self._data['category']
71
72 def getDescription(self):
73 return self._data['description']
74
George Karpenkova8076602017-10-02 17:59:12 +000075 def getIssueIdentifier(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000076 id = self.getFileName() + "+"
George Karpenkova8076602017-10-02 17:59:12 +000077 if 'issue_context' in self._data:
78 id += self._data['issue_context'] + "+"
79 if 'issue_hash_content_of_line_in_context' in self._data:
80 id += str(self._data['issue_hash_content_of_line_in_context'])
Anna Zaksc80313b2012-10-15 22:48:21 +000081 return id
Anna Zaks9b7d7142012-07-16 20:21:42 +000082
83 def getReport(self):
84 if self._htmlReport is None:
85 return " "
86 return os.path.join(self._report.run.path, self._htmlReport)
87
88 def getReadableName(self):
Ted Kremenek3a0678e2015-09-08 03:50:52 +000089 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
90 self.getColumn(), self.getCategory(),
Anna Zaks9b7d7142012-07-16 20:21:42 +000091 self.getDescription())
Ted Kremenek3a0678e2015-09-08 03:50:52 +000092
93 # Note, the data format is not an API and may change from one analyzer
94 # version to another.
Anna Zaks639b4042012-10-17 21:09:26 +000095 def getRawData(self):
96 return self._data
Daniel Dunbar1a9db992009-08-06 21:15:33 +000097
George Karpenkova8076602017-10-02 17:59:12 +000098
Anna Zaksb80d8362011-09-12 21:32:41 +000099class CmpOptions:
George Karpenkov78809e52018-02-02 18:27:14 +0000100 """
101 Fake output of option parser with manually constructed options.
102 """
103
Anna Zaks45a992b2012-08-02 00:41:40 +0000104 def __init__(self, verboseLog=None, rootA="", rootB=""):
105 self.rootA = rootA
106 self.rootB = rootB
Anna Zaksb80d8362011-09-12 21:32:41 +0000107 self.verboseLog = verboseLog
George Karpenkov78809e52018-02-02 18:27:14 +0000108 self.relative_path_histogram = False
109 self.relative_log_path_histogram = False
110 self.absolute_path_histogram = False
Anna Zaksb80d8362011-09-12 21:32:41 +0000111
George Karpenkova8076602017-10-02 17:59:12 +0000112
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000113class AnalysisReport:
Anna Zaksfab9bb62012-11-15 22:42:44 +0000114 def __init__(self, run, files):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000115 self.run = run
Anna Zaks639b4042012-10-17 21:09:26 +0000116 self.files = files
117 self.diagnostics = []
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000118
George Karpenkova8076602017-10-02 17:59:12 +0000119
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000120class AnalysisRun:
Anna Zaksc80313b2012-10-15 22:48:21 +0000121 def __init__(self, info):
122 self.path = info.path
123 self.root = info.root
124 self.info = info
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000125 self.reports = []
Anna Zaks639b4042012-10-17 21:09:26 +0000126 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000127 self.diagnostics = []
Anna Zaksfab9bb62012-11-15 22:42:44 +0000128 self.clang_version = None
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000129
Anna Zaksfab9bb62012-11-15 22:42:44 +0000130 def getClangVersion(self):
131 return self.clang_version
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000132
Jordan Roseb042cc72013-03-23 01:21:26 +0000133 def readSingleFile(self, p, deleteEmpty):
134 data = plistlib.readPlist(p)
135
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000136 # We want to retrieve the clang version even if there are no
137 # reports. Assume that all reports were created using the same
Jordan Roseb042cc72013-03-23 01:21:26 +0000138 # clang version (this is always true and is more efficient).
139 if 'clang_version' in data:
George Karpenkova8076602017-10-02 17:59:12 +0000140 if self.clang_version is None:
Jordan Roseb042cc72013-03-23 01:21:26 +0000141 self.clang_version = data.pop('clang_version')
142 else:
143 data.pop('clang_version')
144
145 # Ignore/delete empty reports.
146 if not data['files']:
George Karpenkova8076602017-10-02 17:59:12 +0000147 if deleteEmpty:
Jordan Roseb042cc72013-03-23 01:21:26 +0000148 os.remove(p)
149 return
150
151 # Extract the HTML reports, if they exists.
152 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
153 htmlFiles = []
154 for d in data['diagnostics']:
155 # FIXME: Why is this named files, when does it have multiple
156 # files?
157 assert len(d['HTMLDiagnostics_files']) == 1
158 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
159 else:
160 htmlFiles = [None] * len(data['diagnostics'])
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000161
Jordan Roseb042cc72013-03-23 01:21:26 +0000162 report = AnalysisReport(self, data.pop('files'))
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000163 diagnostics = [AnalysisDiagnostic(d, report, h)
George Karpenkova8076602017-10-02 17:59:12 +0000164 for d, h in zip(data.pop('diagnostics'), htmlFiles)]
Jordan Roseb042cc72013-03-23 01:21:26 +0000165
166 assert not data
167
168 report.diagnostics.extend(diagnostics)
169 self.reports.append(report)
170 self.diagnostics.extend(diagnostics)
171
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000172
George Karpenkova8076602017-10-02 17:59:12 +0000173def loadResults(path, opts, root="", deleteEmpty=True):
174 """
175 Backwards compatibility API.
176 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000177 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
178 deleteEmpty)
179
George Karpenkova8076602017-10-02 17:59:12 +0000180
Anna Zaksc80313b2012-10-15 22:48:21 +0000181def loadResultsFromSingleRun(info, deleteEmpty=True):
George Karpenkova8076602017-10-02 17:59:12 +0000182 """
183 # Load results of the analyzes from a given output folder.
184 # - info is the SingleRunInfo object
185 # - deleteEmpty specifies if the empty plist files should be deleted
186
187 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000188 path = info.path
189 run = AnalysisRun(info)
Jordan Roseb042cc72013-03-23 01:21:26 +0000190
191 if os.path.isfile(path):
192 run.readSingleFile(path, deleteEmpty)
193 else:
194 for (dirpath, dirnames, filenames) in os.walk(path):
195 for f in filenames:
196 if (not f.endswith('plist')):
197 continue
198 p = os.path.join(dirpath, f)
199 run.readSingleFile(p, deleteEmpty)
200
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000201 return run
202
George Karpenkova8076602017-10-02 17:59:12 +0000203
204def cmpAnalysisDiagnostic(d):
Anna Zaks9b7d7142012-07-16 20:21:42 +0000205 return d.getIssueIdentifier()
Anna Zaksd60367b2012-06-08 01:50:49 +0000206
George Karpenkova8076602017-10-02 17:59:12 +0000207
George Karpenkovb7043222018-02-01 22:25:18 +0000208def compareResults(A, B, opts):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000209 """
210 compareResults - Generate a relation from diagnostics in run A to
211 diagnostics in run B.
212
George Karpenkovf37c07c2018-02-01 22:40:01 +0000213 The result is the relation as a list of triples (a, b) where
214 each element {a,b} is None or a matching element from the respective run
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000215 """
216
217 res = []
218
George Karpenkovb7043222018-02-01 22:25:18 +0000219 # Map size_before -> size_after
220 path_difference_data = []
221
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000222 # Quickly eliminate equal elements.
223 neqA = []
224 neqB = []
225 eltsA = list(A.diagnostics)
226 eltsB = list(B.diagnostics)
George Karpenkova8076602017-10-02 17:59:12 +0000227 eltsA.sort(key=cmpAnalysisDiagnostic)
228 eltsB.sort(key=cmpAnalysisDiagnostic)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000229 while eltsA and eltsB:
230 a = eltsA.pop()
231 b = eltsB.pop()
George Karpenkova8076602017-10-02 17:59:12 +0000232 if (a.getIssueIdentifier() == b.getIssueIdentifier()):
George Karpenkovb7043222018-02-01 22:25:18 +0000233 if a.getPathLength() != b.getPathLength():
234 if opts.relative_path_histogram:
235 path_difference_data.append(
236 float(a.getPathLength()) / b.getPathLength())
237 elif opts.relative_log_path_histogram:
238 path_difference_data.append(
239 log(float(a.getPathLength()) / b.getPathLength()))
240 elif opts.absolute_path_histogram:
241 path_difference_data.append(
242 a.getPathLength() - b.getPathLength())
243
George Karpenkovf37c07c2018-02-01 22:40:01 +0000244 res.append((a, b))
Anna Zaks639b4042012-10-17 21:09:26 +0000245 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000246 eltsB.append(b)
Anna Zaks639b4042012-10-17 21:09:26 +0000247 neqA.append(a)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000248 else:
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000249 eltsA.append(a)
Anna Zaks639b4042012-10-17 21:09:26 +0000250 neqB.append(b)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000251 neqA.extend(eltsA)
252 neqB.extend(eltsB)
253
George Karpenkova8076602017-10-02 17:59:12 +0000254 # FIXME: Add fuzzy matching. One simple and possible effective idea would
255 # be to bin the diagnostics, print them in a normalized form (based solely
256 # on the structure of the diagnostic), compute the diff, then use that as
257 # the basis for matching. This has the nice property that we don't depend
258 # in any way on the diagnostic format.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000259
260 for a in neqA:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000261 res.append((a, None))
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000262 for b in neqB:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000263 res.append((None, b))
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000264
George Karpenkovb7043222018-02-01 22:25:18 +0000265 if opts.relative_log_path_histogram or opts.relative_path_histogram or \
266 opts.absolute_path_histogram:
267 from matplotlib import pyplot
268 pyplot.hist(path_difference_data, bins=100)
269 pyplot.show()
270
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000271 return res
272
George Karpenkova8076602017-10-02 17:59:12 +0000273
Anna Zaks9b7d7142012-07-16 20:21:42 +0000274def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaksb80d8362011-09-12 21:32:41 +0000275 # Load the run results.
Anna Zaks45a992b2012-08-02 00:41:40 +0000276 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
277 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000278
Anna Zaksb80d8362011-09-12 21:32:41 +0000279 # Open the verbose log, if given.
280 if opts.verboseLog:
281 auxLog = open(opts.verboseLog, "wb")
282 else:
283 auxLog = None
284
George Karpenkovb7043222018-02-01 22:25:18 +0000285 diff = compareResults(resultsA, resultsB, opts)
Anna Zaks767d3562011-11-08 19:56:31 +0000286 foundDiffs = 0
George Karpenkovdece62a2018-02-01 02:38:42 +0000287 totalAdded = 0
288 totalRemoved = 0
Anna Zaksb80d8362011-09-12 21:32:41 +0000289 for res in diff:
George Karpenkovf37c07c2018-02-01 22:40:01 +0000290 a, b = res
Anna Zaksb80d8362011-09-12 21:32:41 +0000291 if a is None:
292 print "ADDED: %r" % b.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000293 foundDiffs += 1
George Karpenkovdece62a2018-02-01 02:38:42 +0000294 totalAdded += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000295 if auxLog:
296 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000297 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000298 elif b is None:
299 print "REMOVED: %r" % a.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000300 foundDiffs += 1
George Karpenkovdece62a2018-02-01 02:38:42 +0000301 totalRemoved += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000302 if auxLog:
303 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000304 a.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000305 else:
306 pass
307
Anna Zaks767d3562011-11-08 19:56:31 +0000308 TotalReports = len(resultsB.diagnostics)
309 print "TOTAL REPORTS: %r" % TotalReports
310 print "TOTAL DIFFERENCES: %r" % foundDiffs
George Karpenkovdece62a2018-02-01 02:38:42 +0000311 print "TOTAL ADDED: %r" % totalAdded
312 print "TOTAL REMOVED: %r" % totalRemoved
Anna Zaksb80d8362011-09-12 21:32:41 +0000313 if auxLog:
Anna Zaks767d3562011-11-08 19:56:31 +0000314 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
315 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000316
Gabor Horvath93fde942015-06-30 15:31:17 +0000317 return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
Anna Zaksb80d8362011-09-12 21:32:41 +0000318
George Karpenkova8076602017-10-02 17:59:12 +0000319
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000320def main():
321 from optparse import OptionParser
322 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks45a992b2012-08-02 00:41:40 +0000323 parser.add_option("", "--rootA", dest="rootA",
324 help="Prefix to ignore on source files for directory A",
325 action="store", type=str, default="")
326 parser.add_option("", "--rootB", dest="rootB",
327 help="Prefix to ignore on source files for directory B",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000328 action="store", type=str, default="")
329 parser.add_option("", "--verbose-log", dest="verboseLog",
George Karpenkova8076602017-10-02 17:59:12 +0000330 help="Write additional information to LOG \
331 [default=None]",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000332 action="store", type=str, default=None,
333 metavar="LOG")
George Karpenkovb7043222018-02-01 22:25:18 +0000334 parser.add_option("--relative-path-differences-histogram",
335 action="store_true", dest="relative_path_histogram",
336 default=False,
337 help="Show histogram of relative paths differences. \
338 Requires matplotlib")
339 parser.add_option("--relative-log-path-differences-histogram",
340 action="store_true", dest="relative_log_path_histogram",
341 default=False,
342 help="Show histogram of log relative paths differences. \
343 Requires matplotlib")
344 parser.add_option("--absolute-path-differences-histogram",
345 action="store_true", dest="absolute_path_histogram",
346 default=False,
347 help="Show histogram of absolute paths differences. \
348 Requires matplotlib")
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000349 (opts, args) = parser.parse_args()
350
351 if len(args) != 2:
352 parser.error("invalid number of arguments")
353
George Karpenkova8076602017-10-02 17:59:12 +0000354 dirA, dirB = args
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000355
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000356 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000357
George Karpenkova8076602017-10-02 17:59:12 +0000358
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000359if __name__ == '__main__':
360 main()