blob: 2072e4d0a1bd2c05a23b605ea91b4947d25720ad [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
Anna Zaks544055f2011-09-12 21:32:41 +000047class CmpOptions:
48 def __init__(self, verboseLog=None, root=""):
49 self.root = root
50 self.verboseLog = verboseLog
51
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000052class AnalysisReport:
53 def __init__(self, run, files):
54 self.run = run
55 self.files = files
56
57class AnalysisDiagnostic:
58 def __init__(self, data, report, htmlReport):
59 self.data = data
60 self.report = report
61 self.htmlReport = htmlReport
62
63 def getReadableName(self):
64 loc = self.data['location']
65 filename = self.report.run.getSourceName(self.report.files[loc['file']])
66 line = loc['line']
67 column = loc['col']
68
69 # FIXME: Get a report number based on this key, to 'distinguish'
70 # reports, or something.
71
72 return '%s:%d:%d' % (filename, line, column)
73
74 def getReportData(self):
75 if self.htmlReport is None:
76 return "This diagnostic does not have any report data."
77
78 return open(os.path.join(self.report.run.path,
79 self.htmlReport), "rb").read()
80
81class AnalysisRun:
82 def __init__(self, path, opts):
83 self.path = path
84 self.reports = []
85 self.diagnostics = []
86 self.opts = opts
87
88 def getSourceName(self, path):
89 if path.startswith(self.opts.root):
90 return path[len(self.opts.root):]
91 return path
92
Anna Zaksf95a2012011-09-12 22:40:36 +000093def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000094 run = AnalysisRun(path, opts)
95
96 for f in os.listdir(path):
97 if (not f.startswith('report') or
98 not f.endswith('plist')):
99 continue
100
101 p = os.path.join(path, f)
102 data = plistlib.readPlist(p)
103
Anna Zaksf95a2012011-09-12 22:40:36 +0000104 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000105 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000106 if deleteEmpty == True:
107 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000108 continue
109
110 # Extract the HTML reports, if they exists.
111 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
112 htmlFiles = []
113 for d in data['diagnostics']:
114 # FIXME: Why is this named files, when does it have multiple
115 # files?
116 assert len(d['HTMLDiagnostics_files']) == 1
117 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
118 else:
119 htmlFiles = [None] * len(data['diagnostics'])
120
121 report = AnalysisReport(run, data.pop('files'))
122 diagnostics = [AnalysisDiagnostic(d, report, h)
123 for d,h in zip(data.pop('diagnostics'),
124 htmlFiles)]
125
126 assert not data
127
128 run.reports.append(report)
129 run.diagnostics.extend(diagnostics)
130
131 return run
132
133def compareResults(A, B):
134 """
135 compareResults - Generate a relation from diagnostics in run A to
136 diagnostics in run B.
137
138 The result is the relation as a list of triples (a, b, confidence) where
139 each element {a,b} is None or an element from the respective run, and
140 confidence is a measure of the match quality (where 0 indicates equality,
141 and None is used if either element is None).
142 """
143
144 res = []
145
146 # Quickly eliminate equal elements.
147 neqA = []
148 neqB = []
149 eltsA = list(A.diagnostics)
150 eltsB = list(B.diagnostics)
151 eltsA.sort(key = lambda d: d.data)
152 eltsB.sort(key = lambda d: d.data)
153 while eltsA and eltsB:
154 a = eltsA.pop()
155 b = eltsB.pop()
Anna Zakse1e73672011-11-05 05:20:56 +0000156 if a.data['location'] == b.data['location']:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000157 res.append((a, b, 0))
158 elif a.data > b.data:
159 neqA.append(a)
160 eltsB.append(b)
161 else:
162 neqB.append(b)
163 eltsA.append(a)
164 neqA.extend(eltsA)
165 neqB.extend(eltsB)
166
167 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
168 # to bin the diagnostics, print them in a normalized form (based solely on
169 # the structure of the diagnostic), compute the diff, then use that as the
170 # basis for matching. This has the nice property that we don't depend in any
171 # way on the diagnostic format.
172
173 for a in neqA:
174 res.append((a, None, None))
175 for b in neqB:
176 res.append((None, b, None))
177
178 return res
179
Anna Zaksf95a2012011-09-12 22:40:36 +0000180def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000181 # Load the run results.
Anna Zaksf95a2012011-09-12 22:40:36 +0000182 resultsA = loadResults(dirA, opts, deleteEmpty)
183 resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000184
185 # Open the verbose log, if given.
186 if opts.verboseLog:
187 auxLog = open(opts.verboseLog, "wb")
188 else:
189 auxLog = None
190
191 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000192 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000193 for res in diff:
194 a,b,confidence = res
195 if a is None:
196 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000197 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000198 if auxLog:
199 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
200 b.getReportData()))
201 elif b is None:
202 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000203 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000204 if auxLog:
205 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
206 a.getReportData()))
207 elif confidence:
208 print "CHANGED: %r to %r" % (a.getReadableName(),
209 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000210 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000211 if auxLog:
212 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
213 % (a.getReadableName(),
214 b.getReadableName(),
215 a.getReportData(),
216 b.getReportData()))
217 else:
218 pass
219
Anna Zaksa7a25642011-11-08 19:56:31 +0000220 TotalReports = len(resultsB.diagnostics)
221 print "TOTAL REPORTS: %r" % TotalReports
222 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000223 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000224 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
225 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
226
Anna Zaks544055f2011-09-12 21:32:41 +0000227 return foundDiffs
228
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000229def main():
230 from optparse import OptionParser
231 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
232 parser.add_option("", "--root", dest="root",
233 help="Prefix to ignore on source files",
234 action="store", type=str, default="")
235 parser.add_option("", "--verbose-log", dest="verboseLog",
236 help="Write additional information to LOG [default=None]",
237 action="store", type=str, default=None,
238 metavar="LOG")
239 (opts, args) = parser.parse_args()
240
241 if len(args) != 2:
242 parser.error("invalid number of arguments")
243
244 dirA,dirB = args
245
Anna Zaks544055f2011-09-12 21:32:41 +0000246 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000247
248if __name__ == '__main__':
249 main()