blob: bca02124eec9add35c43d2a607cc3cbd0ed14a88 [file] [log] [blame]
Daniel Dunbar1a9db992009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks9b7d7142012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks45a992b2012-08-02 00:41:40 +000019 #
Anna Zaksc80313b2012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks9b7d7142012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar1a9db992009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaksc80313b2012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar1a9db992009-08-06 21:15:33 +000032
Anna Zaksc80313b2012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks9b7d7142012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
Anna Zaksc80313b2012-10-15 22:48:21 +000051 root = self._report.run.root
Anna Zaks639b4042012-10-17 21:09:26 +000052 fileName = self._report.files[self._loc['file']]
Anna Zaksc80313b2012-10-15 22:48:21 +000053 if fileName.startswith(root) :
54 return fileName[len(root):]
55 return fileName
56
Anna Zaks9b7d7142012-07-16 20:21:42 +000057 def getLine(self):
58 return self._loc['line']
59
60 def getColumn(self):
61 return self._loc['col']
62
63 def getCategory(self):
64 return self._data['category']
65
66 def getDescription(self):
67 return self._data['description']
68
69 def getIssueIdentifier(self) :
Anna Zaksc80313b2012-10-15 22:48:21 +000070 id = self.getFileName() + "+"
Anna Zaks9b7d7142012-07-16 20:21:42 +000071 if 'issue_context' in self._data :
Anna Zaksc80313b2012-10-15 22:48:21 +000072 id += self._data['issue_context'] + "+"
Anna Zaks9b7d7142012-07-16 20:21:42 +000073 if 'issue_hash' in self._data :
Anna Zaksc80313b2012-10-15 22:48:21 +000074 id += str(self._data['issue_hash'])
75 return id
Anna Zaks9b7d7142012-07-16 20:21:42 +000076
77 def getReport(self):
78 if self._htmlReport is None:
79 return " "
80 return os.path.join(self._report.run.path, self._htmlReport)
81
82 def getReadableName(self):
83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84 self.getColumn(), self.getCategory(),
85 self.getDescription())
Anna Zaks639b4042012-10-17 21:09:26 +000086
87 # Note, the data format is not an API and may change from one analyzer
88 # version to another.
89 def getRawData(self):
90 return self._data
Daniel Dunbar1a9db992009-08-06 21:15:33 +000091
92class multidict:
93 def __init__(self, elts=()):
94 self.data = {}
95 for key,value in elts:
96 self[key] = value
97
98 def __getitem__(self, item):
99 return self.data[item]
100 def __setitem__(self, key, value):
101 if key in self.data:
102 self.data[key].append(value)
103 else:
104 self.data[key] = [value]
105 def items(self):
106 return self.data.items()
107 def values(self):
108 return self.data.values()
109 def keys(self):
110 return self.data.keys()
111 def __len__(self):
112 return len(self.data)
113 def get(self, key, default=None):
114 return self.data.get(key, default)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000115
Anna Zaksb80d8362011-09-12 21:32:41 +0000116class CmpOptions:
Anna Zaks45a992b2012-08-02 00:41:40 +0000117 def __init__(self, verboseLog=None, rootA="", rootB=""):
118 self.rootA = rootA
119 self.rootB = rootB
Anna Zaksb80d8362011-09-12 21:32:41 +0000120 self.verboseLog = verboseLog
121
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000122class AnalysisReport:
Anna Zaksfab9bb62012-11-15 22:42:44 +0000123 def __init__(self, run, files):
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000124 self.run = run
Anna Zaks639b4042012-10-17 21:09:26 +0000125 self.files = files
126 self.diagnostics = []
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000127
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000128class AnalysisRun:
Anna Zaksc80313b2012-10-15 22:48:21 +0000129 def __init__(self, info):
130 self.path = info.path
131 self.root = info.root
132 self.info = info
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000133 self.reports = []
Anna Zaks639b4042012-10-17 21:09:26 +0000134 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000135 self.diagnostics = []
Anna Zaksfab9bb62012-11-15 22:42:44 +0000136 self.clang_version = None
137
138 def getClangVersion(self):
139 return self.clang_version
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000140
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000141
Anna Zaksc80313b2012-10-15 22:48:21 +0000142# Backward compatibility API.
Anna Zaks45a992b2012-08-02 00:41:40 +0000143def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaksc80313b2012-10-15 22:48:21 +0000144 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
145 deleteEmpty)
146
147# Load results of the analyzes from a given output folder.
148# - info is the SingleRunInfo object
149# - deleteEmpty specifies if the empty plist files should be deleted
150def loadResultsFromSingleRun(info, deleteEmpty=True):
151 path = info.path
152 run = AnalysisRun(info)
Anna Zaks45a992b2012-08-02 00:41:40 +0000153
Anna Zaks9821e572012-11-14 21:32:16 +0000154 for (dirpath, dirnames, filenames) in os.walk(path):
155 for f in filenames:
156 if (not f.endswith('plist')):
157 continue
158
159 p = os.path.join(dirpath, f)
160 data = plistlib.readPlist(p)
161
Anna Zaksfab9bb62012-11-15 22:42:44 +0000162 # We want to retrieve the clang version even if there are no
163 # reports. Assume that all reports were created using the same
164 # clang version (this is always true and is more efficient).
165 if ('clang_version' in data) :
166 if (run.clang_version == None) :
167 run.clang_version = data.pop('clang_version')
168 else:
169 data.pop('clang_version')
170
Anna Zaks9821e572012-11-14 21:32:16 +0000171 # Ignore/delete empty reports.
172 if not data['files']:
173 if deleteEmpty == True:
174 os.remove(p)
175 continue
176
177 # Extract the HTML reports, if they exists.
178 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
179 htmlFiles = []
180 for d in data['diagnostics']:
181 # FIXME: Why is this named files, when does it have multiple
182 # files?
183 assert len(d['HTMLDiagnostics_files']) == 1
184 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
185 else:
186 htmlFiles = [None] * len(data['diagnostics'])
187
Anna Zaksfab9bb62012-11-15 22:42:44 +0000188 report = AnalysisReport(run, data.pop('files'))
Anna Zaks9821e572012-11-14 21:32:16 +0000189 diagnostics = [AnalysisDiagnostic(d, report, h)
190 for d,h in zip(data.pop('diagnostics'),
191 htmlFiles)]
192
193 assert not data
194
195 report.diagnostics.extend(diagnostics)
196 run.reports.append(report)
197 run.diagnostics.extend(diagnostics)
198
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000199 return run
200
Anna Zaks9b7d7142012-07-16 20:21:42 +0000201def cmpAnalysisDiagnostic(d) :
202 return d.getIssueIdentifier()
Anna Zaksd60367b2012-06-08 01:50:49 +0000203
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000204def compareResults(A, B):
205 """
206 compareResults - Generate a relation from diagnostics in run A to
207 diagnostics in run B.
208
209 The result is the relation as a list of triples (a, b, confidence) where
210 each element {a,b} is None or an element from the respective run, and
211 confidence is a measure of the match quality (where 0 indicates equality,
212 and None is used if either element is None).
213 """
214
215 res = []
216
217 # Quickly eliminate equal elements.
218 neqA = []
219 neqB = []
220 eltsA = list(A.diagnostics)
221 eltsB = list(B.diagnostics)
Anna Zaks9b7d7142012-07-16 20:21:42 +0000222 eltsA.sort(key = cmpAnalysisDiagnostic)
223 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000224 while eltsA and eltsB:
225 a = eltsA.pop()
226 b = eltsB.pop()
Anna Zaks9b7d7142012-07-16 20:21:42 +0000227 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000228 res.append((a, b, 0))
Anna Zaks639b4042012-10-17 21:09:26 +0000229 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000230 eltsB.append(b)
Anna Zaks639b4042012-10-17 21:09:26 +0000231 neqA.append(a)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000232 else:
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000233 eltsA.append(a)
Anna Zaks639b4042012-10-17 21:09:26 +0000234 neqB.append(b)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000235 neqA.extend(eltsA)
236 neqB.extend(eltsB)
237
238 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
239 # to bin the diagnostics, print them in a normalized form (based solely on
240 # the structure of the diagnostic), compute the diff, then use that as the
241 # basis for matching. This has the nice property that we don't depend in any
242 # way on the diagnostic format.
243
244 for a in neqA:
245 res.append((a, None, None))
246 for b in neqB:
247 res.append((None, b, None))
248
249 return res
250
Anna Zaks9b7d7142012-07-16 20:21:42 +0000251def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaksb80d8362011-09-12 21:32:41 +0000252 # Load the run results.
Anna Zaks45a992b2012-08-02 00:41:40 +0000253 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
254 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaksb80d8362011-09-12 21:32:41 +0000255
256 # Open the verbose log, if given.
257 if opts.verboseLog:
258 auxLog = open(opts.verboseLog, "wb")
259 else:
260 auxLog = None
261
262 diff = compareResults(resultsA, resultsB)
Anna Zaks767d3562011-11-08 19:56:31 +0000263 foundDiffs = 0
Anna Zaksb80d8362011-09-12 21:32:41 +0000264 for res in diff:
265 a,b,confidence = res
266 if a is None:
267 print "ADDED: %r" % b.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000268 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000269 if auxLog:
270 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000271 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000272 elif b is None:
273 print "REMOVED: %r" % a.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000274 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000275 if auxLog:
276 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000277 a.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000278 elif confidence:
279 print "CHANGED: %r to %r" % (a.getReadableName(),
280 b.getReadableName())
Anna Zaks767d3562011-11-08 19:56:31 +0000281 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000282 if auxLog:
283 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
284 % (a.getReadableName(),
285 b.getReadableName(),
Anna Zaks9b7d7142012-07-16 20:21:42 +0000286 a.getReport(),
287 b.getReport()))
Anna Zaksb80d8362011-09-12 21:32:41 +0000288 else:
289 pass
290
Anna Zaks767d3562011-11-08 19:56:31 +0000291 TotalReports = len(resultsB.diagnostics)
292 print "TOTAL REPORTS: %r" % TotalReports
293 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaksb80d8362011-09-12 21:32:41 +0000294 if auxLog:
Anna Zaks767d3562011-11-08 19:56:31 +0000295 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
296 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
297
Anna Zaksb80d8362011-09-12 21:32:41 +0000298 return foundDiffs
299
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000300def main():
301 from optparse import OptionParser
302 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks45a992b2012-08-02 00:41:40 +0000303 parser.add_option("", "--rootA", dest="rootA",
304 help="Prefix to ignore on source files for directory A",
305 action="store", type=str, default="")
306 parser.add_option("", "--rootB", dest="rootB",
307 help="Prefix to ignore on source files for directory B",
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000308 action="store", type=str, default="")
309 parser.add_option("", "--verbose-log", dest="verboseLog",
310 help="Write additional information to LOG [default=None]",
311 action="store", type=str, default=None,
312 metavar="LOG")
313 (opts, args) = parser.parse_args()
314
315 if len(args) != 2:
316 parser.error("invalid number of arguments")
317
318 dirA,dirB = args
319
Jordan Rose5920a1b2012-07-26 20:03:51 +0000320 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000321
322if __name__ == '__main__':
323 main()