blob: 220045319facf67eacaa64a6c434a8dc829630aa [file] [log] [blame]
Daniel Dunbar1a9db992009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
Anna Zaksb80d8362011-09-12 21:32:41 +000047class CmpOptions:
48 def __init__(self, verboseLog=None, root=""):
49 self.root = root
50 self.verboseLog = verboseLog
51
Daniel Dunbar1a9db992009-08-06 21:15:33 +000052class AnalysisReport:
53 def __init__(self, run, files):
54 self.run = run
55 self.files = files
56
57class AnalysisDiagnostic:
58 def __init__(self, data, report, htmlReport):
59 self.data = data
60 self.report = report
61 self.htmlReport = htmlReport
62
63 def getReadableName(self):
64 loc = self.data['location']
65 filename = self.report.run.getSourceName(self.report.files[loc['file']])
66 line = loc['line']
67 column = loc['col']
Anna Zaks895c3122012-01-24 21:57:32 +000068 category = self.data['category']
69 description = self.data['description']
Daniel Dunbar1a9db992009-08-06 21:15:33 +000070
71 # FIXME: Get a report number based on this key, to 'distinguish'
72 # reports, or something.
73
Anna Zaks895c3122012-01-24 21:57:32 +000074 return '%s:%d:%d, %s: %s' % (filename, line, column, category,
75 description)
Daniel Dunbar1a9db992009-08-06 21:15:33 +000076
77 def getReportData(self):
78 if self.htmlReport is None:
Anna Zaks895c3122012-01-24 21:57:32 +000079 return " "
Anna Zaksb3d896d2012-01-24 21:57:35 +000080 return os.path.join(self.report.run.path, self.htmlReport)
81 # We could also dump the report with:
82 # return open(os.path.join(self.report.run.path,
83 # self.htmlReport), "rb").read()
Daniel Dunbar1a9db992009-08-06 21:15:33 +000084
85class AnalysisRun:
86 def __init__(self, path, opts):
87 self.path = path
88 self.reports = []
89 self.diagnostics = []
90 self.opts = opts
91
92 def getSourceName(self, path):
93 if path.startswith(self.opts.root):
94 return path[len(self.opts.root):]
95 return path
96
Anna Zaksea8f5722011-09-12 22:40:36 +000097def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar1a9db992009-08-06 21:15:33 +000098 run = AnalysisRun(path, opts)
99
100 for f in os.listdir(path):
101 if (not f.startswith('report') or
102 not f.endswith('plist')):
103 continue
104
105 p = os.path.join(path, f)
106 data = plistlib.readPlist(p)
107
Anna Zaksea8f5722011-09-12 22:40:36 +0000108 # Ignore/delete empty reports.
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000109 if not data['files']:
Anna Zaksea8f5722011-09-12 22:40:36 +0000110 if deleteEmpty == True:
111 os.remove(p)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000112 continue
113
114 # Extract the HTML reports, if they exists.
115 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
116 htmlFiles = []
117 for d in data['diagnostics']:
118 # FIXME: Why is this named files, when does it have multiple
119 # files?
120 assert len(d['HTMLDiagnostics_files']) == 1
121 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
122 else:
123 htmlFiles = [None] * len(data['diagnostics'])
124
125 report = AnalysisReport(run, data.pop('files'))
126 diagnostics = [AnalysisDiagnostic(d, report, h)
127 for d,h in zip(data.pop('diagnostics'),
128 htmlFiles)]
129
130 assert not data
131
132 run.reports.append(report)
133 run.diagnostics.extend(diagnostics)
134
135 return run
136
Anna Zaksd60367b2012-06-08 01:50:49 +0000137def getIssueIdentifier(d) :
138 id = ''
139 if 'issue_context' in d.data :
140 id += d.data['issue_context']
141 if 'issue_hash' in d.data :
142 id += str(d.data['issue_hash'])
143 return id
144
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000145def compareResults(A, B):
146 """
147 compareResults - Generate a relation from diagnostics in run A to
148 diagnostics in run B.
149
150 The result is the relation as a list of triples (a, b, confidence) where
151 each element {a,b} is None or an element from the respective run, and
152 confidence is a measure of the match quality (where 0 indicates equality,
153 and None is used if either element is None).
154 """
155
156 res = []
157
158 # Quickly eliminate equal elements.
159 neqA = []
160 neqB = []
161 eltsA = list(A.diagnostics)
162 eltsB = list(B.diagnostics)
Anna Zaksd60367b2012-06-08 01:50:49 +0000163 eltsA.sort(key = getIssueIdentifier)
164 eltsB.sort(key = getIssueIdentifier)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000165 while eltsA and eltsB:
166 a = eltsA.pop()
167 b = eltsB.pop()
Anna Zaksd60367b2012-06-08 01:50:49 +0000168 if (getIssueIdentifier(a) == getIssueIdentifier(b)) :
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000169 res.append((a, b, 0))
170 elif a.data > b.data:
171 neqA.append(a)
172 eltsB.append(b)
173 else:
174 neqB.append(b)
175 eltsA.append(a)
176 neqA.extend(eltsA)
177 neqB.extend(eltsB)
178
179 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
180 # to bin the diagnostics, print them in a normalized form (based solely on
181 # the structure of the diagnostic), compute the diff, then use that as the
182 # basis for matching. This has the nice property that we don't depend in any
183 # way on the diagnostic format.
184
185 for a in neqA:
186 res.append((a, None, None))
187 for b in neqB:
188 res.append((None, b, None))
189
190 return res
191
Anna Zaksea8f5722011-09-12 22:40:36 +0000192def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
Anna Zaksb80d8362011-09-12 21:32:41 +0000193 # Load the run results.
Anna Zaksea8f5722011-09-12 22:40:36 +0000194 resultsA = loadResults(dirA, opts, deleteEmpty)
195 resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaksb80d8362011-09-12 21:32:41 +0000196
197 # Open the verbose log, if given.
198 if opts.verboseLog:
199 auxLog = open(opts.verboseLog, "wb")
200 else:
201 auxLog = None
202
203 diff = compareResults(resultsA, resultsB)
Anna Zaks767d3562011-11-08 19:56:31 +0000204 foundDiffs = 0
Anna Zaksb80d8362011-09-12 21:32:41 +0000205 for res in diff:
206 a,b,confidence = res
207 if a is None:
208 print "ADDED: %r" % b.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000209 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000210 if auxLog:
211 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
212 b.getReportData()))
213 elif b is None:
214 print "REMOVED: %r" % a.getReadableName()
Anna Zaks767d3562011-11-08 19:56:31 +0000215 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000216 if auxLog:
217 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
218 a.getReportData()))
219 elif confidence:
220 print "CHANGED: %r to %r" % (a.getReadableName(),
221 b.getReadableName())
Anna Zaks767d3562011-11-08 19:56:31 +0000222 foundDiffs += 1
Anna Zaksb80d8362011-09-12 21:32:41 +0000223 if auxLog:
224 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
225 % (a.getReadableName(),
226 b.getReadableName(),
227 a.getReportData(),
228 b.getReportData()))
229 else:
230 pass
231
Anna Zaks767d3562011-11-08 19:56:31 +0000232 TotalReports = len(resultsB.diagnostics)
233 print "TOTAL REPORTS: %r" % TotalReports
234 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaksb80d8362011-09-12 21:32:41 +0000235 if auxLog:
Anna Zaks767d3562011-11-08 19:56:31 +0000236 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
237 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
238
Anna Zaksb80d8362011-09-12 21:32:41 +0000239 return foundDiffs
240
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000241def main():
242 from optparse import OptionParser
243 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
244 parser.add_option("", "--root", dest="root",
245 help="Prefix to ignore on source files",
246 action="store", type=str, default="")
247 parser.add_option("", "--verbose-log", dest="verboseLog",
248 help="Write additional information to LOG [default=None]",
249 action="store", type=str, default=None,
250 metavar="LOG")
251 (opts, args) = parser.parse_args()
252
253 if len(args) != 2:
254 parser.error("invalid number of arguments")
255
256 dirA,dirB = args
257
Anna Zaksb80d8362011-09-12 21:32:41 +0000258 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar1a9db992009-08-06 21:15:33 +0000259
260if __name__ == '__main__':
261 main()