blob: 9b468bfefe5506681ce791d878b62439c6c2fd7e [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
Anna Zaks81765572012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaks81765572012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000032
Anna Zaks81765572012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks7acc4072012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
Anna Zaks81765572012-10-15 22:48:21 +000051 root = self._report.run.root
Anna Zaks20d92812012-10-17 21:09:26 +000052 fileName = self._report.files[self._loc['file']]
Anna Zaks81765572012-10-15 22:48:21 +000053 if fileName.startswith(root) :
54 return fileName[len(root):]
55 return fileName
56
Anna Zaks7acc4072012-07-16 20:21:42 +000057 def getLine(self):
58 return self._loc['line']
59
60 def getColumn(self):
61 return self._loc['col']
62
63 def getCategory(self):
64 return self._data['category']
65
66 def getDescription(self):
67 return self._data['description']
68
69 def getIssueIdentifier(self) :
Anna Zaks81765572012-10-15 22:48:21 +000070 id = self.getFileName() + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000071 if 'issue_context' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000072 id += self._data['issue_context'] + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000073 if 'issue_hash' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000074 id += str(self._data['issue_hash'])
75 return id
Anna Zaks7acc4072012-07-16 20:21:42 +000076
77 def getReport(self):
78 if self._htmlReport is None:
79 return " "
80 return os.path.join(self._report.run.path, self._htmlReport)
81
82 def getReadableName(self):
83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84 self.getColumn(), self.getCategory(),
85 self.getDescription())
Anna Zaks20d92812012-10-17 21:09:26 +000086
87 # Note, the data format is not an API and may change from one analyzer
88 # version to another.
89 def getRawData(self):
90 return self._data
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000091
92class multidict:
93 def __init__(self, elts=()):
94 self.data = {}
95 for key,value in elts:
96 self[key] = value
97
98 def __getitem__(self, item):
99 return self.data[item]
100 def __setitem__(self, key, value):
101 if key in self.data:
102 self.data[key].append(value)
103 else:
104 self.data[key] = [value]
105 def items(self):
106 return self.data.items()
107 def values(self):
108 return self.data.values()
109 def keys(self):
110 return self.data.keys()
111 def __len__(self):
112 return len(self.data)
113 def get(self, key, default=None):
114 return self.data.get(key, default)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000115
Anna Zaks544055f2011-09-12 21:32:41 +0000116class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000117 def __init__(self, verboseLog=None, rootA="", rootB=""):
118 self.rootA = rootA
119 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000120 self.verboseLog = verboseLog
121
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000122class AnalysisReport:
Anna Zaks68aa3a92012-10-16 19:36:39 +0000123 def __init__(self, run, files, clang_vers):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000124 self.run = run
Anna Zaks68aa3a92012-10-16 19:36:39 +0000125 self.clang_version = clang_vers
Anna Zaks20d92812012-10-17 21:09:26 +0000126 self.files = files
127 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000128
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000129class AnalysisRun:
Anna Zaks81765572012-10-15 22:48:21 +0000130 def __init__(self, info):
131 self.path = info.path
132 self.root = info.root
133 self.info = info
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000134 self.reports = []
Anna Zaks20d92812012-10-17 21:09:26 +0000135 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000136 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000137
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000138
Anna Zaks81765572012-10-15 22:48:21 +0000139# Backward compatibility API.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000140def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaks81765572012-10-15 22:48:21 +0000141 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
142 deleteEmpty)
143
144# Load results of the analyzes from a given output folder.
145# - info is the SingleRunInfo object
146# - deleteEmpty specifies if the empty plist files should be deleted
147def loadResultsFromSingleRun(info, deleteEmpty=True):
148 path = info.path
149 run = AnalysisRun(info)
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000150
Anna Zaks25691f62012-11-14 21:32:16 +0000151 for (dirpath, dirnames, filenames) in os.walk(path):
152 for f in filenames:
153 if (not f.endswith('plist')):
154 continue
155
156 p = os.path.join(dirpath, f)
157 data = plistlib.readPlist(p)
158
159 # Ignore/delete empty reports.
160 if not data['files']:
161 if deleteEmpty == True:
162 os.remove(p)
163 continue
164
165 # Extract the HTML reports, if they exists.
166 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
167 htmlFiles = []
168 for d in data['diagnostics']:
169 # FIXME: Why is this named files, when does it have multiple
170 # files?
171 assert len(d['HTMLDiagnostics_files']) == 1
172 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
173 else:
174 htmlFiles = [None] * len(data['diagnostics'])
175
176 clang_version = ''
177 if 'clang_version' in data:
178 clang_version = data.pop('clang_version')
179
180 report = AnalysisReport(run, data.pop('files'), clang_version)
181 diagnostics = [AnalysisDiagnostic(d, report, h)
182 for d,h in zip(data.pop('diagnostics'),
183 htmlFiles)]
184
185 assert not data
186
187 report.diagnostics.extend(diagnostics)
188 run.reports.append(report)
189 run.diagnostics.extend(diagnostics)
190
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000191 return run
192
Anna Zaks7acc4072012-07-16 20:21:42 +0000193def cmpAnalysisDiagnostic(d) :
194 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000195
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000196def compareResults(A, B):
197 """
198 compareResults - Generate a relation from diagnostics in run A to
199 diagnostics in run B.
200
201 The result is the relation as a list of triples (a, b, confidence) where
202 each element {a,b} is None or an element from the respective run, and
203 confidence is a measure of the match quality (where 0 indicates equality,
204 and None is used if either element is None).
205 """
206
207 res = []
208
209 # Quickly eliminate equal elements.
210 neqA = []
211 neqB = []
212 eltsA = list(A.diagnostics)
213 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000214 eltsA.sort(key = cmpAnalysisDiagnostic)
215 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000216 while eltsA and eltsB:
217 a = eltsA.pop()
218 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000219 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000220 res.append((a, b, 0))
Anna Zaks20d92812012-10-17 21:09:26 +0000221 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000222 eltsB.append(b)
Anna Zaks20d92812012-10-17 21:09:26 +0000223 neqA.append(a)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000224 else:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000225 eltsA.append(a)
Anna Zaks20d92812012-10-17 21:09:26 +0000226 neqB.append(b)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000227 neqA.extend(eltsA)
228 neqB.extend(eltsB)
229
230 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
231 # to bin the diagnostics, print them in a normalized form (based solely on
232 # the structure of the diagnostic), compute the diff, then use that as the
233 # basis for matching. This has the nice property that we don't depend in any
234 # way on the diagnostic format.
235
236 for a in neqA:
237 res.append((a, None, None))
238 for b in neqB:
239 res.append((None, b, None))
240
241 return res
242
Anna Zaks7acc4072012-07-16 20:21:42 +0000243def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000244 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000245 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
246 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000247
248 # Open the verbose log, if given.
249 if opts.verboseLog:
250 auxLog = open(opts.verboseLog, "wb")
251 else:
252 auxLog = None
253
254 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000255 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000256 for res in diff:
257 a,b,confidence = res
258 if a is None:
259 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000260 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000261 if auxLog:
262 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000263 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000264 elif b is None:
265 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000266 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000267 if auxLog:
268 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000269 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000270 elif confidence:
271 print "CHANGED: %r to %r" % (a.getReadableName(),
272 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000273 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000274 if auxLog:
275 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
276 % (a.getReadableName(),
277 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000278 a.getReport(),
279 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000280 else:
281 pass
282
Anna Zaksa7a25642011-11-08 19:56:31 +0000283 TotalReports = len(resultsB.diagnostics)
284 print "TOTAL REPORTS: %r" % TotalReports
285 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000286 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000287 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
288 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
289
Anna Zaks544055f2011-09-12 21:32:41 +0000290 return foundDiffs
291
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000292def main():
293 from optparse import OptionParser
294 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000295 parser.add_option("", "--rootA", dest="rootA",
296 help="Prefix to ignore on source files for directory A",
297 action="store", type=str, default="")
298 parser.add_option("", "--rootB", dest="rootB",
299 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000300 action="store", type=str, default="")
301 parser.add_option("", "--verbose-log", dest="verboseLog",
302 help="Write additional information to LOG [default=None]",
303 action="store", type=str, default=None,
304 metavar="LOG")
305 (opts, args) = parser.parse_args()
306
307 if len(args) != 2:
308 parser.error("invalid number of arguments")
309
310 dirA,dirB = args
311
Jordan Roseac57d472012-07-26 20:03:51 +0000312 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000313
314if __name__ == '__main__':
315 main()