blob: c8f05cbcf47415d1dea6e66c01e45a2c55d84730 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
20 # root - the name of the root directory, which will be disregarded when
21 # determining the source file name
22 #
23 resultsA = loadResults(dirA, opts, root, deleteEmpty)
24 resultsB = loadResults(dirB, opts, root, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000025
26 # Generate a relation from diagnostics in run A to diagnostics in run B
27 # to obtain a list of triples (a, b, confidence).
28 diff = compareResults(resultsA, resultsB)
29
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000030"""
31
32import os
33import plistlib
34
35#
Anna Zaks7acc4072012-07-16 20:21:42 +000036class AnalysisDiagnostic:
37 def __init__(self, data, report, htmlReport):
38 self._data = data
39 self._loc = self._data['location']
40 self._report = report
41 self._htmlReport = htmlReport
42
43 def getFileName(self):
44 return self._report.run.getSourceName(self._report.files[self._loc['file']])
45
46 def getLine(self):
47 return self._loc['line']
48
49 def getColumn(self):
50 return self._loc['col']
51
52 def getCategory(self):
53 return self._data['category']
54
55 def getDescription(self):
56 return self._data['description']
57
58 def getIssueIdentifier(self) :
59 id = ''
60 if 'issue_context' in self._data :
Anna Zaks2a84b8b2012-08-02 00:41:40 +000061 id += self._data['issue_context'] + ":"
Anna Zaks7acc4072012-07-16 20:21:42 +000062 if 'issue_hash' in self._data :
Anna Zaks2a84b8b2012-08-02 00:41:40 +000063 id += str(self._data['issue_hash']) + ":"
64 return id + ":" + self.getFileName()
Anna Zaks7acc4072012-07-16 20:21:42 +000065
66 def getReport(self):
67 if self._htmlReport is None:
68 return " "
69 return os.path.join(self._report.run.path, self._htmlReport)
70
71 def getReadableName(self):
72 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
73 self.getColumn(), self.getCategory(),
74 self.getDescription())
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000075
76class multidict:
77 def __init__(self, elts=()):
78 self.data = {}
79 for key,value in elts:
80 self[key] = value
81
82 def __getitem__(self, item):
83 return self.data[item]
84 def __setitem__(self, key, value):
85 if key in self.data:
86 self.data[key].append(value)
87 else:
88 self.data[key] = [value]
89 def items(self):
90 return self.data.items()
91 def values(self):
92 return self.data.values()
93 def keys(self):
94 return self.data.keys()
95 def __len__(self):
96 return len(self.data)
97 def get(self, key, default=None):
98 return self.data.get(key, default)
99
100#
101
Anna Zaks544055f2011-09-12 21:32:41 +0000102class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000103 def __init__(self, verboseLog=None, rootA="", rootB=""):
104 self.rootA = rootA
105 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000106 self.verboseLog = verboseLog
107
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000108class AnalysisReport:
109 def __init__(self, run, files):
110 self.run = run
111 self.files = files
112
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000113class AnalysisRun:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000114 def __init__(self, path, root, opts):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000115 self.path = path
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000116 self.root = root
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000117 self.reports = []
118 self.diagnostics = []
119 self.opts = opts
120
121 def getSourceName(self, path):
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000122 if path.startswith(self.root):
123 return path[len(self.root):]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000124 return path
125
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000126def loadResults(path, opts, root = "", deleteEmpty=True):
127 run = AnalysisRun(path, root, opts)
128
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000129 for f in os.listdir(path):
130 if (not f.startswith('report') or
131 not f.endswith('plist')):
132 continue
133
134 p = os.path.join(path, f)
135 data = plistlib.readPlist(p)
136
Anna Zaksf95a2012011-09-12 22:40:36 +0000137 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000138 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000139 if deleteEmpty == True:
140 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000141 continue
142
143 # Extract the HTML reports, if they exists.
144 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
145 htmlFiles = []
146 for d in data['diagnostics']:
147 # FIXME: Why is this named files, when does it have multiple
148 # files?
Anna Zaksb81a9ad2012-08-17 01:09:13 +0000149 # TODO: Add the assert back in after we fix the
150 # plist-html output.
151 # assert len(d['HTMLDiagnostics_files']) == 1
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000152 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
153 else:
154 htmlFiles = [None] * len(data['diagnostics'])
155
156 report = AnalysisReport(run, data.pop('files'))
157 diagnostics = [AnalysisDiagnostic(d, report, h)
158 for d,h in zip(data.pop('diagnostics'),
159 htmlFiles)]
160
161 assert not data
162
163 run.reports.append(report)
164 run.diagnostics.extend(diagnostics)
165
166 return run
167
Anna Zaks7acc4072012-07-16 20:21:42 +0000168def cmpAnalysisDiagnostic(d) :
169 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000170
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000171def compareResults(A, B):
172 """
173 compareResults - Generate a relation from diagnostics in run A to
174 diagnostics in run B.
175
176 The result is the relation as a list of triples (a, b, confidence) where
177 each element {a,b} is None or an element from the respective run, and
178 confidence is a measure of the match quality (where 0 indicates equality,
179 and None is used if either element is None).
180 """
181
182 res = []
183
184 # Quickly eliminate equal elements.
185 neqA = []
186 neqB = []
187 eltsA = list(A.diagnostics)
188 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000189 eltsA.sort(key = cmpAnalysisDiagnostic)
190 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000191 while eltsA and eltsB:
192 a = eltsA.pop()
193 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000194 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000195 res.append((a, b, 0))
Anna Zaks7acc4072012-07-16 20:21:42 +0000196 elif a._data > b._data:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000197 neqA.append(a)
198 eltsB.append(b)
199 else:
200 neqB.append(b)
201 eltsA.append(a)
202 neqA.extend(eltsA)
203 neqB.extend(eltsB)
204
205 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
206 # to bin the diagnostics, print them in a normalized form (based solely on
207 # the structure of the diagnostic), compute the diff, then use that as the
208 # basis for matching. This has the nice property that we don't depend in any
209 # way on the diagnostic format.
210
211 for a in neqA:
212 res.append((a, None, None))
213 for b in neqB:
214 res.append((None, b, None))
215
216 return res
217
Anna Zaks7acc4072012-07-16 20:21:42 +0000218def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000219 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000220 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
221 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000222
223 # Open the verbose log, if given.
224 if opts.verboseLog:
225 auxLog = open(opts.verboseLog, "wb")
226 else:
227 auxLog = None
228
229 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000230 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000231 for res in diff:
232 a,b,confidence = res
233 if a is None:
234 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000235 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000236 if auxLog:
237 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000238 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000239 elif b is None:
240 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000241 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000242 if auxLog:
243 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000244 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000245 elif confidence:
246 print "CHANGED: %r to %r" % (a.getReadableName(),
247 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000248 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000249 if auxLog:
250 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
251 % (a.getReadableName(),
252 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000253 a.getReport(),
254 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000255 else:
256 pass
257
Anna Zaksa7a25642011-11-08 19:56:31 +0000258 TotalReports = len(resultsB.diagnostics)
259 print "TOTAL REPORTS: %r" % TotalReports
260 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000261 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000262 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
263 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
264
Anna Zaks544055f2011-09-12 21:32:41 +0000265 return foundDiffs
266
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000267def main():
268 from optparse import OptionParser
269 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000270 parser.add_option("", "--rootA", dest="rootA",
271 help="Prefix to ignore on source files for directory A",
272 action="store", type=str, default="")
273 parser.add_option("", "--rootB", dest="rootB",
274 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000275 action="store", type=str, default="")
276 parser.add_option("", "--verbose-log", dest="verboseLog",
277 help="Write additional information to LOG [default=None]",
278 action="store", type=str, default=None,
279 metavar="LOG")
280 (opts, args) = parser.parse_args()
281
282 if len(args) != 2:
283 parser.error("invalid number of arguments")
284
285 dirA,dirB = args
286
Jordan Roseac57d472012-07-26 20:03:51 +0000287 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000288
289if __name__ == '__main__':
290 main()