blob: 30157bed3d4218f105f4d24ea6bb04027196bcd0 [file] [log] [blame]
Daniel Dunbar1a9db992009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks9b7d7142012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks45a992b2012-08-02 00:41:40 +000019 #
Anna Zaksc80313b2012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks9b7d7142012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar1a9db992009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaksc80313b2012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar1a9db992009-08-06 21:15:33 +000032
Anna Zaksc80313b2012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks9b7d7142012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000051 root = self._report.run.root
Anna Zaks639b4042012-10-17 21:09:26 +000052 fileName = self._report.files[self._loc['file']]
Anna Zaksc80313b2012-10-15 22:48:21 +000053 if fileName.startswith(root) :
54 return fileName[len(root):]
55 return fileName
56
Anna Zaks9b7d7142012-07-16 20:21:42 +000057 def getLine(self):
58 return self._loc['line']
59
60 def getColumn(self):
61 return self._loc['col']
62
63 def getCategory(self):
64 return self._data['category']
65
66 def getDescription(self):
67 return self._data['description']
68
69 def getIssueIdentifier(self) :
Anna Zaksc80313b2012-10-15 22:48:21 +000070 id = self.getFileName() + "+"
Anna Zaks9b7d7142012-07-16 20:21:42 +000071 if 'issue_context' in self._data :
Anna Zaksc80313b2012-10-15 22:48:21 +000072 id += self._data['issue_context'] + "+"
Anna Zaks9b7d7142012-07-16 20:21:42 +000073 if 'issue_hash' in self._data :
Anna Zaksc80313b2012-10-15 22:48:21 +000074 id += str(self._data['issue_hash'])
75 return id
Anna Zaks9b7d7142012-07-16 20:21:42 +000076
77 def getReport(self):
78 if self._htmlReport is None:
79 return " "
80 return os.path.join(self._report.run.path, self._htmlReport)
81
82 def getReadableName(self):
83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84 self.getColumn(), self.getCategory(),
85 self.getDescription())
Anna Zaks639b4042012-10-17 21:09:26 +000086
87 # Note, the data format is not an API and may change from one analyzer
88 # version to another.
89 def getRawData(self):
90 return self._data
Daniel Dunbar1a9db992009-08-06 21:15:33 +000091
92class multidict:
93 def __init__(self, elts=()):
94 self.data = {}
95 for key,value in elts:
96 self[key] = value
97
98 def __getitem__(self, item):
99 return self.data[item]
100 def __setitem__(self, key, value):
101 if key in self.data:
102 self.data[key].append(value)
103 else:
104 self.data[key] = [value]
105 def items(self):
106 return self.data.items()
107 def values(self):
108 return self.data.values()
109 def keys(self):
110 return self.data.keys()
111 def __len__(self):
112 return len(self.data)
113 def get(self, key, default=None):
114 return self.data.get(key, default)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000115
Anna Zaksb80d8362011-09-12 21:32:41 +0000116class CmpOptions:
Anna Zaks45a992b2012-08-02 00:41:40 +0000117 def __init__(self, verboseLog=None, rootA="", rootB=""):
118 self.rootA = rootA
119 self.rootB = rootB
Anna Zaksb80d8362011-09-12 21:32:41 +0000120 self.verboseLog = verboseLog
121
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000122class AnalysisReport:
Anna Zaksfab9bb62012-11-15 22:42:44 +0000123 def __init__(self, run, files):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000124 self.run = run
Anna Zaks639b4042012-10-17 21:09:26 +0000125 self.files = files
126 self.diagnostics = []
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000127
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000128class AnalysisRun:
Anna Zaksc80313b2012-10-15 22:48:21 +0000129 def __init__(self, info):
130 self.path = info.path
131 self.root = info.root
132 self.info = info
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000133 self.reports = []
Anna Zaks639b4042012-10-17 21:09:26 +0000134 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000135 self.diagnostics = []
Anna Zaksfab9bb62012-11-15 22:42:44 +0000136 self.clang_version = None
137
138 def getClangVersion(self):
139 return self.clang_version
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000140
Jordan Roseb042cc72013-03-23 01:21:26 +0000141 def readSingleFile(self, p, deleteEmpty):
142 data = plistlib.readPlist(p)
143
144 # We want to retrieve the clang version even if there are no
145 # reports. Assume that all reports were created using the same
146 # clang version (this is always true and is more efficient).
147 if 'clang_version' in data:
148 if self.clang_version == None:
149 self.clang_version = data.pop('clang_version')
150 else:
151 data.pop('clang_version')
152
153 # Ignore/delete empty reports.
154 if not data['files']:
155 if deleteEmpty == True:
156 os.remove(p)
157 return
158
159 # Extract the HTML reports, if they exists.
160 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
161 htmlFiles = []
162 for d in data['diagnostics']:
163 # FIXME: Why is this named files, when does it have multiple
164 # files?
165 assert len(d['HTMLDiagnostics_files']) == 1
166 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
167 else:
168 htmlFiles = [None] * len(data['diagnostics'])
169
170 report = AnalysisReport(self, data.pop('files'))
171 diagnostics = [AnalysisDiagnostic(d, report, h)
172 for d,h in zip(data.pop('diagnostics'),
173 htmlFiles)]
174
175 assert not data
176
177 report.diagnostics.extend(diagnostics)
178 self.reports.append(report)
179 self.diagnostics.extend(diagnostics)
180
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000181
Anna Zaksc80313b2012-10-15 22:48:21 +0000182# Backward compatibility API.
Anna Zaks45a992b2012-08-02 00:41:40 +0000183def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaksc80313b2012-10-15 22:48:21 +0000184 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
185 deleteEmpty)
186
187# Load results of the analyzes from a given output folder.
188# - info is the SingleRunInfo object
189# - deleteEmpty specifies if the empty plist files should be deleted
190def loadResultsFromSingleRun(info, deleteEmpty=True):
191 path = info.path
192 run = AnalysisRun(info)
Jordan Roseb042cc72013-03-23 01:21:26 +0000193
194 if os.path.isfile(path):
195 run.readSingleFile(path, deleteEmpty)
196 else:
197 for (dirpath, dirnames, filenames) in os.walk(path):
198 for f in filenames:
199 if (not f.endswith('plist')):
200 continue
201 p = os.path.join(dirpath, f)
202 run.readSingleFile(p, deleteEmpty)
203
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000204 return run
205
Anna Zaks9b7d7142012-07-16 20:21:42 +0000206def cmpAnalysisDiagnostic(d) :
207 return d.getIssueIdentifier()
Anna Zaksd60367b2012-06-08 01:50:49 +0000208
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000209def compareResults(A, B):
210 """
211 compareResults - Generate a relation from diagnostics in run A to
212 diagnostics in run B.
213
214 The result is the relation as a list of triples (a, b, confidence) where
215 each element {a,b} is None or an element from the respective run, and
216 confidence is a measure of the match quality (where 0 indicates equality,
217 and None is used if either element is None).
218 """
219
220 res = []
221
222 # Quickly eliminate equal elements.
223 neqA = []
224 neqB = []
225 eltsA = list(A.diagnostics)
226 eltsB = list(B.diagnostics)
Anna Zaks9b7d7142012-07-16 20:21:42 +0000227 eltsA.sort(key = cmpAnalysisDiagnostic)
228 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000229 while eltsA and eltsB:
230 a = eltsA.pop()
231 b = eltsB.pop()
Anna Zaks9b7d7142012-07-16 20:21:42 +0000232 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000233 res.append((a, b, 0))
Anna Zaks639b4042012-10-17 21:09:26 +0000234 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000235 eltsB.append(b)
Anna Zaks639b4042012-10-17 21:09:26 +0000236 neqA.append(a)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000237 else:
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000238 eltsA.append(a)
Anna Zaks639b4042012-10-17 21:09:26 +0000239 neqB.append(b)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000240 neqA.extend(eltsA)
241 neqB.extend(eltsB)
242
243 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
244 # to bin the diagnostics, print them in a normalized form (based solely on
245 # the structure of the diagnostic), compute the diff, then use that as the
246 # basis for matching. This has the nice property that we don't depend in any
247 # way on the diagnostic format.
248
249 for a in neqA:
250 res.append((a, None, None))
251 for b in neqB:
252 res.append((None, b, None))
253
254 return res
255
Anna Zaks9b7d7142012-07-16 20:21:42 +0000256def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaksb80d8362011-09-12 21:32:41 +0000257 # Load the run results.
Anna Zaks45a992b2012-08-02 00:41:40 +0000258 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
259 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaksb80d8362011-09-12 21:32:41 +0000260
261 # Open the verbose log, if given.
262 if opts.verboseLog:
263 auxLog = open(opts.verboseLog, "wb")
264 else:
265 auxLog = None
266
267 diff = compareResults(resultsA, resultsB)
Anna Zaks767d3562011-11-08 19:56:31 +0000268 foundDiffs = 0
Anna Zaksb80d8362011-09-12 21:32:41 +0000269 for res in diff:
270 a,b,confidence = res
271 if a is None:
272 print "ADDED: %r" % b.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000273 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000274 if auxLog:
275 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000276 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000277 elif b is None:
278 print "REMOVED: %r" % a.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000279 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000280 if auxLog:
281 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000282 a.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000283 elif confidence:
284 print "CHANGED: %r to %r" % (a.getReadableName(),
285 b.getReadableName())
Anna Zaks767d3562011-11-08 19:56:31 +0000286 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000287 if auxLog:
288 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
289 % (a.getReadableName(),
290 b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000291 a.getReport(),
292 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000293 else:
294 pass
295
Anna Zaks767d3562011-11-08 19:56:31 +0000296 TotalReports = len(resultsB.diagnostics)
297 print "TOTAL REPORTS: %r" % TotalReports
298 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaksb80d8362011-09-12 21:32:41 +0000299 if auxLog:
Anna Zaks767d3562011-11-08 19:56:31 +0000300 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
301 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
302
Anna Zaksb80d8362011-09-12 21:32:41 +0000303 return foundDiffs
304
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000305def main():
306 from optparse import OptionParser
307 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks45a992b2012-08-02 00:41:40 +0000308 parser.add_option("", "--rootA", dest="rootA",
309 help="Prefix to ignore on source files for directory A",
310 action="store", type=str, default="")
311 parser.add_option("", "--rootB", dest="rootB",
312 help="Prefix to ignore on source files for directory B",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000313 action="store", type=str, default="")
314 parser.add_option("", "--verbose-log", dest="verboseLog",
315 help="Write additional information to LOG [default=None]",
316 action="store", type=str, default=None,
317 metavar="LOG")
318 (opts, args) = parser.parse_args()
319
320 if len(args) != 2:
321 parser.error("invalid number of arguments")
322
323 dirA,dirB = args
324
Jordan Rose5920a1b2012-07-26 20:03:51 +0000325 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000326
327if __name__ == '__main__':
328 main()