blob: 30157bed3d4218f105f4d24ea6bb04027196bcd0 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
Anna Zaks81765572012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaks81765572012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000032
Anna Zaks81765572012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks7acc4072012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
Anna Zaks81765572012-10-15 22:48:21 +000051 root = self._report.run.root
Anna Zaks20d92812012-10-17 21:09:26 +000052 fileName = self._report.files[self._loc['file']]
Anna Zaks81765572012-10-15 22:48:21 +000053 if fileName.startswith(root) :
54 return fileName[len(root):]
55 return fileName
56
Anna Zaks7acc4072012-07-16 20:21:42 +000057 def getLine(self):
58 return self._loc['line']
59
60 def getColumn(self):
61 return self._loc['col']
62
63 def getCategory(self):
64 return self._data['category']
65
66 def getDescription(self):
67 return self._data['description']
68
69 def getIssueIdentifier(self) :
Anna Zaks81765572012-10-15 22:48:21 +000070 id = self.getFileName() + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000071 if 'issue_context' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000072 id += self._data['issue_context'] + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000073 if 'issue_hash' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000074 id += str(self._data['issue_hash'])
75 return id
Anna Zaks7acc4072012-07-16 20:21:42 +000076
77 def getReport(self):
78 if self._htmlReport is None:
79 return " "
80 return os.path.join(self._report.run.path, self._htmlReport)
81
82 def getReadableName(self):
83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84 self.getColumn(), self.getCategory(),
85 self.getDescription())
Anna Zaks20d92812012-10-17 21:09:26 +000086
87 # Note, the data format is not an API and may change from one analyzer
88 # version to another.
89 def getRawData(self):
90 return self._data
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000091
92class multidict:
93 def __init__(self, elts=()):
94 self.data = {}
95 for key,value in elts:
96 self[key] = value
97
98 def __getitem__(self, item):
99 return self.data[item]
100 def __setitem__(self, key, value):
101 if key in self.data:
102 self.data[key].append(value)
103 else:
104 self.data[key] = [value]
105 def items(self):
106 return self.data.items()
107 def values(self):
108 return self.data.values()
109 def keys(self):
110 return self.data.keys()
111 def __len__(self):
112 return len(self.data)
113 def get(self, key, default=None):
114 return self.data.get(key, default)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000115
Anna Zaks544055f2011-09-12 21:32:41 +0000116class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000117 def __init__(self, verboseLog=None, rootA="", rootB=""):
118 self.rootA = rootA
119 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000120 self.verboseLog = verboseLog
121
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000122class AnalysisReport:
Anna Zaksf0024962012-11-15 22:42:44 +0000123 def __init__(self, run, files):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000124 self.run = run
Anna Zaks20d92812012-10-17 21:09:26 +0000125 self.files = files
126 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000127
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000128class AnalysisRun:
Anna Zaks81765572012-10-15 22:48:21 +0000129 def __init__(self, info):
130 self.path = info.path
131 self.root = info.root
132 self.info = info
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000133 self.reports = []
Anna Zaks20d92812012-10-17 21:09:26 +0000134 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000135 self.diagnostics = []
Anna Zaksf0024962012-11-15 22:42:44 +0000136 self.clang_version = None
137
138 def getClangVersion(self):
139 return self.clang_version
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000140
Jordan Rosee30024c2013-03-23 01:21:26 +0000141 def readSingleFile(self, p, deleteEmpty):
142 data = plistlib.readPlist(p)
143
144 # We want to retrieve the clang version even if there are no
145 # reports. Assume that all reports were created using the same
146 # clang version (this is always true and is more efficient).
147 if 'clang_version' in data:
148 if self.clang_version == None:
149 self.clang_version = data.pop('clang_version')
150 else:
151 data.pop('clang_version')
152
153 # Ignore/delete empty reports.
154 if not data['files']:
155 if deleteEmpty == True:
156 os.remove(p)
157 return
158
159 # Extract the HTML reports, if they exists.
160 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
161 htmlFiles = []
162 for d in data['diagnostics']:
163 # FIXME: Why is this named files, when does it have multiple
164 # files?
165 assert len(d['HTMLDiagnostics_files']) == 1
166 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
167 else:
168 htmlFiles = [None] * len(data['diagnostics'])
169
170 report = AnalysisReport(self, data.pop('files'))
171 diagnostics = [AnalysisDiagnostic(d, report, h)
172 for d,h in zip(data.pop('diagnostics'),
173 htmlFiles)]
174
175 assert not data
176
177 report.diagnostics.extend(diagnostics)
178 self.reports.append(report)
179 self.diagnostics.extend(diagnostics)
180
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000181
Anna Zaks81765572012-10-15 22:48:21 +0000182# Backward compatibility API.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000183def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaks81765572012-10-15 22:48:21 +0000184 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
185 deleteEmpty)
186
187# Load results of the analyzes from a given output folder.
188# - info is the SingleRunInfo object
189# - deleteEmpty specifies if the empty plist files should be deleted
190def loadResultsFromSingleRun(info, deleteEmpty=True):
191 path = info.path
192 run = AnalysisRun(info)
Jordan Rosee30024c2013-03-23 01:21:26 +0000193
194 if os.path.isfile(path):
195 run.readSingleFile(path, deleteEmpty)
196 else:
197 for (dirpath, dirnames, filenames) in os.walk(path):
198 for f in filenames:
199 if (not f.endswith('plist')):
200 continue
201 p = os.path.join(dirpath, f)
202 run.readSingleFile(p, deleteEmpty)
203
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000204 return run
205
Anna Zaks7acc4072012-07-16 20:21:42 +0000206def cmpAnalysisDiagnostic(d) :
207 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000208
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000209def compareResults(A, B):
210 """
211 compareResults - Generate a relation from diagnostics in run A to
212 diagnostics in run B.
213
214 The result is the relation as a list of triples (a, b, confidence) where
215 each element {a,b} is None or an element from the respective run, and
216 confidence is a measure of the match quality (where 0 indicates equality,
217 and None is used if either element is None).
218 """
219
220 res = []
221
222 # Quickly eliminate equal elements.
223 neqA = []
224 neqB = []
225 eltsA = list(A.diagnostics)
226 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000227 eltsA.sort(key = cmpAnalysisDiagnostic)
228 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000229 while eltsA and eltsB:
230 a = eltsA.pop()
231 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000232 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000233 res.append((a, b, 0))
Anna Zaks20d92812012-10-17 21:09:26 +0000234 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000235 eltsB.append(b)
Anna Zaks20d92812012-10-17 21:09:26 +0000236 neqA.append(a)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000237 else:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000238 eltsA.append(a)
Anna Zaks20d92812012-10-17 21:09:26 +0000239 neqB.append(b)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000240 neqA.extend(eltsA)
241 neqB.extend(eltsB)
242
243 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
244 # to bin the diagnostics, print them in a normalized form (based solely on
245 # the structure of the diagnostic), compute the diff, then use that as the
246 # basis for matching. This has the nice property that we don't depend in any
247 # way on the diagnostic format.
248
249 for a in neqA:
250 res.append((a, None, None))
251 for b in neqB:
252 res.append((None, b, None))
253
254 return res
255
Anna Zaks7acc4072012-07-16 20:21:42 +0000256def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000257 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000258 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
259 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000260
261 # Open the verbose log, if given.
262 if opts.verboseLog:
263 auxLog = open(opts.verboseLog, "wb")
264 else:
265 auxLog = None
266
267 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000268 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000269 for res in diff:
270 a,b,confidence = res
271 if a is None:
272 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000273 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000274 if auxLog:
275 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000276 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000277 elif b is None:
278 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000279 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000280 if auxLog:
281 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000282 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000283 elif confidence:
284 print "CHANGED: %r to %r" % (a.getReadableName(),
285 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000286 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000287 if auxLog:
288 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
289 % (a.getReadableName(),
290 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000291 a.getReport(),
292 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000293 else:
294 pass
295
Anna Zaksa7a25642011-11-08 19:56:31 +0000296 TotalReports = len(resultsB.diagnostics)
297 print "TOTAL REPORTS: %r" % TotalReports
298 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000299 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000300 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
301 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
302
Anna Zaks544055f2011-09-12 21:32:41 +0000303 return foundDiffs
304
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000305def main():
306 from optparse import OptionParser
307 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000308 parser.add_option("", "--rootA", dest="rootA",
309 help="Prefix to ignore on source files for directory A",
310 action="store", type=str, default="")
311 parser.add_option("", "--rootB", dest="rootB",
312 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000313 action="store", type=str, default="")
314 parser.add_option("", "--verbose-log", dest="verboseLog",
315 help="Write additional information to LOG [default=None]",
316 action="store", type=str, default=None,
317 metavar="LOG")
318 (opts, args) = parser.parse_args()
319
320 if len(args) != 2:
321 parser.error("invalid number of arguments")
322
323 dirA,dirB = args
324
Jordan Roseac57d472012-07-26 20:03:51 +0000325 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000326
327if __name__ == '__main__':
328 main()