blob: 2c0ed6aae3a2332822fc58e6207269b7cb26bfa8 [file] [log] [blame]
Daniel Dunbar1a9db992009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
Ted Kremenek3a0678e2015-09-08 03:50:52 +00008two perspectives:
George Karpenkova8076602017-10-02 17:59:12 +00009 1. To monitor changes in the static analyzer's reports on real code bases,
10 for regression testing.
Daniel Dunbar1a9db992009-08-06 21:15:33 +000011
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks9b7d7142012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks45a992b2012-08-02 00:41:40 +000019 #
Anna Zaksc80313b2012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
Anna Zaks9b7d7142012-07-16 20:21:42 +000025 diff = compareResults(resultsA, resultsB)
Ted Kremenek3a0678e2015-09-08 03:50:52 +000026
Daniel Dunbar1a9db992009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
31
George Karpenkova8076602017-10-02 17:59:12 +000032
Anna Zaksc80313b2012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
Ted Kremenek3a0678e2015-09-08 03:50:52 +000035# root - the name of the root directory, which will be disregarded when
Anna Zaksc80313b2012-10-15 22:48:21 +000036# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
Gabor Horvathc3177f22015-07-08 18:39:31 +000040 self.root = root.rstrip("/\\")
Anna Zaksc80313b2012-10-15 22:48:21 +000041 self.verboseLog = verboseLog
42
George Karpenkova8076602017-10-02 17:59:12 +000043
Anna Zaks9b7d7142012-07-16 20:21:42 +000044class AnalysisDiagnostic:
45 def __init__(self, data, report, htmlReport):
46 self._data = data
47 self._loc = self._data['location']
48 self._report = report
49 self._htmlReport = htmlReport
50
51 def getFileName(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000052 root = self._report.run.root
Anna Zaks639b4042012-10-17 21:09:26 +000053 fileName = self._report.files[self._loc['file']]
Gabor Horvathc3177f22015-07-08 18:39:31 +000054 if fileName.startswith(root) and len(root) > 0:
George Karpenkova8076602017-10-02 17:59:12 +000055 return fileName[len(root) + 1:]
Anna Zaksc80313b2012-10-15 22:48:21 +000056 return fileName
57
Anna Zaks9b7d7142012-07-16 20:21:42 +000058 def getLine(self):
59 return self._loc['line']
Ted Kremenek3a0678e2015-09-08 03:50:52 +000060
Anna Zaks9b7d7142012-07-16 20:21:42 +000061 def getColumn(self):
62 return self._loc['col']
63
64 def getCategory(self):
65 return self._data['category']
66
67 def getDescription(self):
68 return self._data['description']
69
George Karpenkova8076602017-10-02 17:59:12 +000070 def getIssueIdentifier(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000071 id = self.getFileName() + "+"
George Karpenkova8076602017-10-02 17:59:12 +000072 if 'issue_context' in self._data:
73 id += self._data['issue_context'] + "+"
74 if 'issue_hash_content_of_line_in_context' in self._data:
75 id += str(self._data['issue_hash_content_of_line_in_context'])
Anna Zaksc80313b2012-10-15 22:48:21 +000076 return id
Anna Zaks9b7d7142012-07-16 20:21:42 +000077
78 def getReport(self):
79 if self._htmlReport is None:
80 return " "
81 return os.path.join(self._report.run.path, self._htmlReport)
82
83 def getReadableName(self):
Ted Kremenek3a0678e2015-09-08 03:50:52 +000084 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
85 self.getColumn(), self.getCategory(),
Anna Zaks9b7d7142012-07-16 20:21:42 +000086 self.getDescription())
Ted Kremenek3a0678e2015-09-08 03:50:52 +000087
88 # Note, the data format is not an API and may change from one analyzer
89 # version to another.
Anna Zaks639b4042012-10-17 21:09:26 +000090 def getRawData(self):
91 return self._data
Daniel Dunbar1a9db992009-08-06 21:15:33 +000092
George Karpenkova8076602017-10-02 17:59:12 +000093
Anna Zaksb80d8362011-09-12 21:32:41 +000094class CmpOptions:
Anna Zaks45a992b2012-08-02 00:41:40 +000095 def __init__(self, verboseLog=None, rootA="", rootB=""):
96 self.rootA = rootA
97 self.rootB = rootB
Anna Zaksb80d8362011-09-12 21:32:41 +000098 self.verboseLog = verboseLog
99
George Karpenkova8076602017-10-02 17:59:12 +0000100
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000101class AnalysisReport:
Anna Zaksfab9bb62012-11-15 22:42:44 +0000102 def __init__(self, run, files):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000103 self.run = run
Anna Zaks639b4042012-10-17 21:09:26 +0000104 self.files = files
105 self.diagnostics = []
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000106
George Karpenkova8076602017-10-02 17:59:12 +0000107
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000108class AnalysisRun:
Anna Zaksc80313b2012-10-15 22:48:21 +0000109 def __init__(self, info):
110 self.path = info.path
111 self.root = info.root
112 self.info = info
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000113 self.reports = []
Anna Zaks639b4042012-10-17 21:09:26 +0000114 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000115 self.diagnostics = []
Anna Zaksfab9bb62012-11-15 22:42:44 +0000116 self.clang_version = None
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000117
Anna Zaksfab9bb62012-11-15 22:42:44 +0000118 def getClangVersion(self):
119 return self.clang_version
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000120
Jordan Roseb042cc72013-03-23 01:21:26 +0000121 def readSingleFile(self, p, deleteEmpty):
122 data = plistlib.readPlist(p)
123
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000124 # We want to retrieve the clang version even if there are no
125 # reports. Assume that all reports were created using the same
Jordan Roseb042cc72013-03-23 01:21:26 +0000126 # clang version (this is always true and is more efficient).
127 if 'clang_version' in data:
George Karpenkova8076602017-10-02 17:59:12 +0000128 if self.clang_version is None:
Jordan Roseb042cc72013-03-23 01:21:26 +0000129 self.clang_version = data.pop('clang_version')
130 else:
131 data.pop('clang_version')
132
133 # Ignore/delete empty reports.
134 if not data['files']:
George Karpenkova8076602017-10-02 17:59:12 +0000135 if deleteEmpty:
Jordan Roseb042cc72013-03-23 01:21:26 +0000136 os.remove(p)
137 return
138
139 # Extract the HTML reports, if they exists.
140 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
141 htmlFiles = []
142 for d in data['diagnostics']:
143 # FIXME: Why is this named files, when does it have multiple
144 # files?
145 assert len(d['HTMLDiagnostics_files']) == 1
146 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
147 else:
148 htmlFiles = [None] * len(data['diagnostics'])
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000149
Jordan Roseb042cc72013-03-23 01:21:26 +0000150 report = AnalysisReport(self, data.pop('files'))
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000151 diagnostics = [AnalysisDiagnostic(d, report, h)
George Karpenkova8076602017-10-02 17:59:12 +0000152 for d, h in zip(data.pop('diagnostics'), htmlFiles)]
Jordan Roseb042cc72013-03-23 01:21:26 +0000153
154 assert not data
155
156 report.diagnostics.extend(diagnostics)
157 self.reports.append(report)
158 self.diagnostics.extend(diagnostics)
159
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000160
George Karpenkova8076602017-10-02 17:59:12 +0000161def loadResults(path, opts, root="", deleteEmpty=True):
162 """
163 Backwards compatibility API.
164 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000165 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
166 deleteEmpty)
167
George Karpenkova8076602017-10-02 17:59:12 +0000168
Anna Zaksc80313b2012-10-15 22:48:21 +0000169def loadResultsFromSingleRun(info, deleteEmpty=True):
George Karpenkova8076602017-10-02 17:59:12 +0000170 """
171 # Load results of the analyzes from a given output folder.
172 # - info is the SingleRunInfo object
173 # - deleteEmpty specifies if the empty plist files should be deleted
174
175 """
Anna Zaksc80313b2012-10-15 22:48:21 +0000176 path = info.path
177 run = AnalysisRun(info)
Jordan Roseb042cc72013-03-23 01:21:26 +0000178
179 if os.path.isfile(path):
180 run.readSingleFile(path, deleteEmpty)
181 else:
182 for (dirpath, dirnames, filenames) in os.walk(path):
183 for f in filenames:
184 if (not f.endswith('plist')):
185 continue
186 p = os.path.join(dirpath, f)
187 run.readSingleFile(p, deleteEmpty)
188
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000189 return run
190
George Karpenkova8076602017-10-02 17:59:12 +0000191
192def cmpAnalysisDiagnostic(d):
Anna Zaks9b7d7142012-07-16 20:21:42 +0000193 return d.getIssueIdentifier()
Anna Zaksd60367b2012-06-08 01:50:49 +0000194
George Karpenkova8076602017-10-02 17:59:12 +0000195
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000196def compareResults(A, B):
197 """
198 compareResults - Generate a relation from diagnostics in run A to
199 diagnostics in run B.
200
201 The result is the relation as a list of triples (a, b, confidence) where
202 each element {a,b} is None or an element from the respective run, and
203 confidence is a measure of the match quality (where 0 indicates equality,
204 and None is used if either element is None).
205 """
206
207 res = []
208
209 # Quickly eliminate equal elements.
210 neqA = []
211 neqB = []
212 eltsA = list(A.diagnostics)
213 eltsB = list(B.diagnostics)
George Karpenkova8076602017-10-02 17:59:12 +0000214 eltsA.sort(key=cmpAnalysisDiagnostic)
215 eltsB.sort(key=cmpAnalysisDiagnostic)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000216 while eltsA and eltsB:
217 a = eltsA.pop()
218 b = eltsB.pop()
George Karpenkova8076602017-10-02 17:59:12 +0000219 if (a.getIssueIdentifier() == b.getIssueIdentifier()):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000220 res.append((a, b, 0))
Anna Zaks639b4042012-10-17 21:09:26 +0000221 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000222 eltsB.append(b)
Anna Zaks639b4042012-10-17 21:09:26 +0000223 neqA.append(a)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000224 else:
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000225 eltsA.append(a)
Anna Zaks639b4042012-10-17 21:09:26 +0000226 neqB.append(b)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000227 neqA.extend(eltsA)
228 neqB.extend(eltsB)
229
George Karpenkova8076602017-10-02 17:59:12 +0000230 # FIXME: Add fuzzy matching. One simple and possible effective idea would
231 # be to bin the diagnostics, print them in a normalized form (based solely
232 # on the structure of the diagnostic), compute the diff, then use that as
233 # the basis for matching. This has the nice property that we don't depend
234 # in any way on the diagnostic format.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000235
236 for a in neqA:
237 res.append((a, None, None))
238 for b in neqB:
239 res.append((None, b, None))
240
241 return res
242
George Karpenkova8076602017-10-02 17:59:12 +0000243
Anna Zaks9b7d7142012-07-16 20:21:42 +0000244def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaksb80d8362011-09-12 21:32:41 +0000245 # Load the run results.
Anna Zaks45a992b2012-08-02 00:41:40 +0000246 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
247 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000248
Anna Zaksb80d8362011-09-12 21:32:41 +0000249 # Open the verbose log, if given.
250 if opts.verboseLog:
251 auxLog = open(opts.verboseLog, "wb")
252 else:
253 auxLog = None
254
255 diff = compareResults(resultsA, resultsB)
Anna Zaks767d3562011-11-08 19:56:31 +0000256 foundDiffs = 0
Anna Zaksb80d8362011-09-12 21:32:41 +0000257 for res in diff:
George Karpenkova8076602017-10-02 17:59:12 +0000258 a, b, confidence = res
Anna Zaksb80d8362011-09-12 21:32:41 +0000259 if a is None:
260 print "ADDED: %r" % b.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000261 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000262 if auxLog:
263 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000264 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000265 elif b is None:
266 print "REMOVED: %r" % a.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000267 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000268 if auxLog:
269 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000270 a.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000271 elif confidence:
272 print "CHANGED: %r to %r" % (a.getReadableName(),
273 b.getReadableName())
Anna Zaks767d3562011-11-08 19:56:31 +0000274 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000275 if auxLog:
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000276 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
Anna Zaksb80d8362011-09-12 21:32:41 +0000277 % (a.getReadableName(),
278 b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000279 a.getReport(),
280 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000281 else:
282 pass
283
Anna Zaks767d3562011-11-08 19:56:31 +0000284 TotalReports = len(resultsB.diagnostics)
285 print "TOTAL REPORTS: %r" % TotalReports
286 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaksb80d8362011-09-12 21:32:41 +0000287 if auxLog:
Anna Zaks767d3562011-11-08 19:56:31 +0000288 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
289 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000290
Gabor Horvath93fde942015-06-30 15:31:17 +0000291 return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
Anna Zaksb80d8362011-09-12 21:32:41 +0000292
George Karpenkova8076602017-10-02 17:59:12 +0000293
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000294def main():
295 from optparse import OptionParser
296 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks45a992b2012-08-02 00:41:40 +0000297 parser.add_option("", "--rootA", dest="rootA",
298 help="Prefix to ignore on source files for directory A",
299 action="store", type=str, default="")
300 parser.add_option("", "--rootB", dest="rootB",
301 help="Prefix to ignore on source files for directory B",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000302 action="store", type=str, default="")
303 parser.add_option("", "--verbose-log", dest="verboseLog",
George Karpenkova8076602017-10-02 17:59:12 +0000304 help="Write additional information to LOG \
305 [default=None]",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000306 action="store", type=str, default=None,
307 metavar="LOG")
308 (opts, args) = parser.parse_args()
309
310 if len(args) != 2:
311 parser.error("invalid number of arguments")
312
George Karpenkova8076602017-10-02 17:59:12 +0000313 dirA, dirB = args
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000314
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000315 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000316
George Karpenkova8076602017-10-02 17:59:12 +0000317
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000318if __name__ == '__main__':
319 main()