blob: d20cd6aa64c58be4110ad48bac277c30c0aa8b9e [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
Anna Zaks544055f2011-09-12 21:32:41 +000047class CmpOptions:
48 def __init__(self, verboseLog=None, root=""):
49 self.root = root
50 self.verboseLog = verboseLog
51
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000052class AnalysisReport:
53 def __init__(self, run, files):
54 self.run = run
55 self.files = files
56
57class AnalysisDiagnostic:
58 def __init__(self, data, report, htmlReport):
59 self.data = data
60 self.report = report
61 self.htmlReport = htmlReport
62
63 def getReadableName(self):
64 loc = self.data['location']
65 filename = self.report.run.getSourceName(self.report.files[loc['file']])
66 line = loc['line']
67 column = loc['col']
68
69 # FIXME: Get a report number based on this key, to 'distinguish'
70 # reports, or something.
71
72 return '%s:%d:%d' % (filename, line, column)
73
74 def getReportData(self):
75 if self.htmlReport is None:
76 return "This diagnostic does not have any report data."
77
78 return open(os.path.join(self.report.run.path,
79 self.htmlReport), "rb").read()
80
81class AnalysisRun:
82 def __init__(self, path, opts):
83 self.path = path
84 self.reports = []
85 self.diagnostics = []
86 self.opts = opts
87
88 def getSourceName(self, path):
89 if path.startswith(self.opts.root):
90 return path[len(self.opts.root):]
91 return path
92
93def loadResults(path, opts):
94 run = AnalysisRun(path, opts)
95
96 for f in os.listdir(path):
97 if (not f.startswith('report') or
98 not f.endswith('plist')):
99 continue
100
101 p = os.path.join(path, f)
102 data = plistlib.readPlist(p)
103
104 # Ignore empty reports.
105 if not data['files']:
106 continue
107
108 # Extract the HTML reports, if they exists.
109 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
110 htmlFiles = []
111 for d in data['diagnostics']:
112 # FIXME: Why is this named files, when does it have multiple
113 # files?
114 assert len(d['HTMLDiagnostics_files']) == 1
115 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
116 else:
117 htmlFiles = [None] * len(data['diagnostics'])
118
119 report = AnalysisReport(run, data.pop('files'))
120 diagnostics = [AnalysisDiagnostic(d, report, h)
121 for d,h in zip(data.pop('diagnostics'),
122 htmlFiles)]
123
124 assert not data
125
126 run.reports.append(report)
127 run.diagnostics.extend(diagnostics)
128
129 return run
130
131def compareResults(A, B):
132 """
133 compareResults - Generate a relation from diagnostics in run A to
134 diagnostics in run B.
135
136 The result is the relation as a list of triples (a, b, confidence) where
137 each element {a,b} is None or an element from the respective run, and
138 confidence is a measure of the match quality (where 0 indicates equality,
139 and None is used if either element is None).
140 """
141
142 res = []
143
144 # Quickly eliminate equal elements.
145 neqA = []
146 neqB = []
147 eltsA = list(A.diagnostics)
148 eltsB = list(B.diagnostics)
149 eltsA.sort(key = lambda d: d.data)
150 eltsB.sort(key = lambda d: d.data)
151 while eltsA and eltsB:
152 a = eltsA.pop()
153 b = eltsB.pop()
154 if a.data == b.data:
155 res.append((a, b, 0))
156 elif a.data > b.data:
157 neqA.append(a)
158 eltsB.append(b)
159 else:
160 neqB.append(b)
161 eltsA.append(a)
162 neqA.extend(eltsA)
163 neqB.extend(eltsB)
164
165 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
166 # to bin the diagnostics, print them in a normalized form (based solely on
167 # the structure of the diagnostic), compute the diff, then use that as the
168 # basis for matching. This has the nice property that we don't depend in any
169 # way on the diagnostic format.
170
171 for a in neqA:
172 res.append((a, None, None))
173 for b in neqB:
174 res.append((None, b, None))
175
176 return res
177
Anna Zaks544055f2011-09-12 21:32:41 +0000178def cmpScanBuildResults(dirA, dirB, opts):
179 # Load the run results.
180 resultsA = loadResults(dirA, opts)
181 resultsB = loadResults(dirB, opts)
182
183 # Open the verbose log, if given.
184 if opts.verboseLog:
185 auxLog = open(opts.verboseLog, "wb")
186 else:
187 auxLog = None
188
189 diff = compareResults(resultsA, resultsB)
190 foundDiffs = False
191 for res in diff:
192 a,b,confidence = res
193 if a is None:
194 print "ADDED: %r" % b.getReadableName()
195 foundDiffs = True
196 if auxLog:
197 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
198 b.getReportData()))
199 elif b is None:
200 print "REMOVED: %r" % a.getReadableName()
201 foundDiffs = True
202 if auxLog:
203 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
204 a.getReportData()))
205 elif confidence:
206 print "CHANGED: %r to %r" % (a.getReadableName(),
207 b.getReadableName())
208 foundDiffs = True
209 if auxLog:
210 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
211 % (a.getReadableName(),
212 b.getReadableName(),
213 a.getReportData(),
214 b.getReportData()))
215 else:
216 pass
217
218 print "TOTAL REPORTS: %r" % len(resultsB.diagnostics)
219 if auxLog:
220 print >>auxLog, "('TOTAL REPORTS', %r)" % len(resultsB.diagnostics)
221
222 return foundDiffs
223
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000224def main():
225 from optparse import OptionParser
226 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
227 parser.add_option("", "--root", dest="root",
228 help="Prefix to ignore on source files",
229 action="store", type=str, default="")
230 parser.add_option("", "--verbose-log", dest="verboseLog",
231 help="Write additional information to LOG [default=None]",
232 action="store", type=str, default=None,
233 metavar="LOG")
234 (opts, args) = parser.parse_args()
235
236 if len(args) != 2:
237 parser.error("invalid number of arguments")
238
239 dirA,dirB = args
240
Anna Zaks544055f2011-09-12 21:32:41 +0000241 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000242
243if __name__ == '__main__':
244 main()