blob: bca02124eec9add35c43d2a607cc3cbd0ed14a88 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
Anna Zaks81765572012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaks81765572012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000032
Anna Zaks81765572012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks7acc4072012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
Anna Zaks81765572012-10-15 22:48:21 +000051 root = self._report.run.root
Anna Zaks20d92812012-10-17 21:09:26 +000052 fileName = self._report.files[self._loc['file']]
Anna Zaks81765572012-10-15 22:48:21 +000053 if fileName.startswith(root) :
54 return fileName[len(root):]
55 return fileName
56
Anna Zaks7acc4072012-07-16 20:21:42 +000057 def getLine(self):
58 return self._loc['line']
59
60 def getColumn(self):
61 return self._loc['col']
62
63 def getCategory(self):
64 return self._data['category']
65
66 def getDescription(self):
67 return self._data['description']
68
69 def getIssueIdentifier(self) :
Anna Zaks81765572012-10-15 22:48:21 +000070 id = self.getFileName() + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000071 if 'issue_context' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000072 id += self._data['issue_context'] + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000073 if 'issue_hash' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000074 id += str(self._data['issue_hash'])
75 return id
Anna Zaks7acc4072012-07-16 20:21:42 +000076
77 def getReport(self):
78 if self._htmlReport is None:
79 return " "
80 return os.path.join(self._report.run.path, self._htmlReport)
81
82 def getReadableName(self):
83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84 self.getColumn(), self.getCategory(),
85 self.getDescription())
Anna Zaks20d92812012-10-17 21:09:26 +000086
87 # Note, the data format is not an API and may change from one analyzer
88 # version to another.
89 def getRawData(self):
90 return self._data
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000091
92class multidict:
93 def __init__(self, elts=()):
94 self.data = {}
95 for key,value in elts:
96 self[key] = value
97
98 def __getitem__(self, item):
99 return self.data[item]
100 def __setitem__(self, key, value):
101 if key in self.data:
102 self.data[key].append(value)
103 else:
104 self.data[key] = [value]
105 def items(self):
106 return self.data.items()
107 def values(self):
108 return self.data.values()
109 def keys(self):
110 return self.data.keys()
111 def __len__(self):
112 return len(self.data)
113 def get(self, key, default=None):
114 return self.data.get(key, default)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000115
Anna Zaks544055f2011-09-12 21:32:41 +0000116class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000117 def __init__(self, verboseLog=None, rootA="", rootB=""):
118 self.rootA = rootA
119 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000120 self.verboseLog = verboseLog
121
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000122class AnalysisReport:
Anna Zaksf0024962012-11-15 22:42:44 +0000123 def __init__(self, run, files):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000124 self.run = run
Anna Zaks20d92812012-10-17 21:09:26 +0000125 self.files = files
126 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000127
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000128class AnalysisRun:
Anna Zaks81765572012-10-15 22:48:21 +0000129 def __init__(self, info):
130 self.path = info.path
131 self.root = info.root
132 self.info = info
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000133 self.reports = []
Anna Zaks20d92812012-10-17 21:09:26 +0000134 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000135 self.diagnostics = []
Anna Zaksf0024962012-11-15 22:42:44 +0000136 self.clang_version = None
137
138 def getClangVersion(self):
139 return self.clang_version
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000140
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000141
Anna Zaks81765572012-10-15 22:48:21 +0000142# Backward compatibility API.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000143def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaks81765572012-10-15 22:48:21 +0000144 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
145 deleteEmpty)
146
147# Load results of the analyzes from a given output folder.
148# - info is the SingleRunInfo object
149# - deleteEmpty specifies if the empty plist files should be deleted
150def loadResultsFromSingleRun(info, deleteEmpty=True):
151 path = info.path
152 run = AnalysisRun(info)
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000153
Anna Zaks25691f62012-11-14 21:32:16 +0000154 for (dirpath, dirnames, filenames) in os.walk(path):
155 for f in filenames:
156 if (not f.endswith('plist')):
157 continue
158
159 p = os.path.join(dirpath, f)
160 data = plistlib.readPlist(p)
161
Anna Zaksf0024962012-11-15 22:42:44 +0000162 # We want to retrieve the clang version even if there are no
163 # reports. Assume that all reports were created using the same
164 # clang version (this is always true and is more efficient).
165 if ('clang_version' in data) :
166 if (run.clang_version == None) :
167 run.clang_version = data.pop('clang_version')
168 else:
169 data.pop('clang_version')
170
Anna Zaks25691f62012-11-14 21:32:16 +0000171 # Ignore/delete empty reports.
172 if not data['files']:
173 if deleteEmpty == True:
174 os.remove(p)
175 continue
176
177 # Extract the HTML reports, if they exists.
178 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
179 htmlFiles = []
180 for d in data['diagnostics']:
181 # FIXME: Why is this named files, when does it have multiple
182 # files?
183 assert len(d['HTMLDiagnostics_files']) == 1
184 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
185 else:
186 htmlFiles = [None] * len(data['diagnostics'])
187
Anna Zaksf0024962012-11-15 22:42:44 +0000188 report = AnalysisReport(run, data.pop('files'))
Anna Zaks25691f62012-11-14 21:32:16 +0000189 diagnostics = [AnalysisDiagnostic(d, report, h)
190 for d,h in zip(data.pop('diagnostics'),
191 htmlFiles)]
192
193 assert not data
194
195 report.diagnostics.extend(diagnostics)
196 run.reports.append(report)
197 run.diagnostics.extend(diagnostics)
198
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000199 return run
200
Anna Zaks7acc4072012-07-16 20:21:42 +0000201def cmpAnalysisDiagnostic(d) :
202 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000203
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000204def compareResults(A, B):
205 """
206 compareResults - Generate a relation from diagnostics in run A to
207 diagnostics in run B.
208
209 The result is the relation as a list of triples (a, b, confidence) where
210 each element {a,b} is None or an element from the respective run, and
211 confidence is a measure of the match quality (where 0 indicates equality,
212 and None is used if either element is None).
213 """
214
215 res = []
216
217 # Quickly eliminate equal elements.
218 neqA = []
219 neqB = []
220 eltsA = list(A.diagnostics)
221 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000222 eltsA.sort(key = cmpAnalysisDiagnostic)
223 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000224 while eltsA and eltsB:
225 a = eltsA.pop()
226 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000227 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000228 res.append((a, b, 0))
Anna Zaks20d92812012-10-17 21:09:26 +0000229 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000230 eltsB.append(b)
Anna Zaks20d92812012-10-17 21:09:26 +0000231 neqA.append(a)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000232 else:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000233 eltsA.append(a)
Anna Zaks20d92812012-10-17 21:09:26 +0000234 neqB.append(b)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000235 neqA.extend(eltsA)
236 neqB.extend(eltsB)
237
238 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
239 # to bin the diagnostics, print them in a normalized form (based solely on
240 # the structure of the diagnostic), compute the diff, then use that as the
241 # basis for matching. This has the nice property that we don't depend in any
242 # way on the diagnostic format.
243
244 for a in neqA:
245 res.append((a, None, None))
246 for b in neqB:
247 res.append((None, b, None))
248
249 return res
250
Anna Zaks7acc4072012-07-16 20:21:42 +0000251def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000252 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000253 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
254 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000255
256 # Open the verbose log, if given.
257 if opts.verboseLog:
258 auxLog = open(opts.verboseLog, "wb")
259 else:
260 auxLog = None
261
262 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000263 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000264 for res in diff:
265 a,b,confidence = res
266 if a is None:
267 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000268 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000269 if auxLog:
270 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000271 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000272 elif b is None:
273 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000274 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000275 if auxLog:
276 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000277 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000278 elif confidence:
279 print "CHANGED: %r to %r" % (a.getReadableName(),
280 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000281 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000282 if auxLog:
283 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
284 % (a.getReadableName(),
285 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000286 a.getReport(),
287 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000288 else:
289 pass
290
Anna Zaksa7a25642011-11-08 19:56:31 +0000291 TotalReports = len(resultsB.diagnostics)
292 print "TOTAL REPORTS: %r" % TotalReports
293 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000294 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000295 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
296 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
297
Anna Zaks544055f2011-09-12 21:32:41 +0000298 return foundDiffs
299
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000300def main():
301 from optparse import OptionParser
302 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000303 parser.add_option("", "--rootA", dest="rootA",
304 help="Prefix to ignore on source files for directory A",
305 action="store", type=str, default="")
306 parser.add_option("", "--rootB", dest="rootB",
307 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000308 action="store", type=str, default="")
309 parser.add_option("", "--verbose-log", dest="verboseLog",
310 help="Write additional information to LOG [default=None]",
311 action="store", type=str, default=None,
312 metavar="LOG")
313 (opts, args) = parser.parse_args()
314
315 if len(args) != 2:
316 parser.error("invalid number of arguments")
317
318 dirA,dirB = args
319
Jordan Roseac57d472012-07-26 20:03:51 +0000320 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000321
322if __name__ == '__main__':
323 main()