blob: 8eba9ebc7bcd6fe466c5f28bc35dd46892e91356 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
Anna Zaks544055f2011-09-12 21:32:41 +000047class CmpOptions:
48 def __init__(self, verboseLog=None, root=""):
49 self.root = root
50 self.verboseLog = verboseLog
51
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000052class AnalysisReport:
53 def __init__(self, run, files):
54 self.run = run
55 self.files = files
56
57class AnalysisDiagnostic:
58 def __init__(self, data, report, htmlReport):
59 self.data = data
60 self.report = report
61 self.htmlReport = htmlReport
62
63 def getReadableName(self):
64 loc = self.data['location']
65 filename = self.report.run.getSourceName(self.report.files[loc['file']])
66 line = loc['line']
67 column = loc['col']
68
69 # FIXME: Get a report number based on this key, to 'distinguish'
70 # reports, or something.
71
72 return '%s:%d:%d' % (filename, line, column)
73
74 def getReportData(self):
75 if self.htmlReport is None:
76 return "This diagnostic does not have any report data."
77
78 return open(os.path.join(self.report.run.path,
79 self.htmlReport), "rb").read()
80
81class AnalysisRun:
82 def __init__(self, path, opts):
83 self.path = path
84 self.reports = []
85 self.diagnostics = []
86 self.opts = opts
87
88 def getSourceName(self, path):
89 if path.startswith(self.opts.root):
90 return path[len(self.opts.root):]
91 return path
92
Anna Zaksf95a2012011-09-12 22:40:36 +000093def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000094 run = AnalysisRun(path, opts)
95
96 for f in os.listdir(path):
97 if (not f.startswith('report') or
98 not f.endswith('plist')):
99 continue
100
101 p = os.path.join(path, f)
102 data = plistlib.readPlist(p)
103
Anna Zaksf95a2012011-09-12 22:40:36 +0000104 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000105 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000106 if deleteEmpty == True:
107 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000108 continue
109
110 # Extract the HTML reports, if they exists.
111 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
112 htmlFiles = []
113 for d in data['diagnostics']:
114 # FIXME: Why is this named files, when does it have multiple
115 # files?
116 assert len(d['HTMLDiagnostics_files']) == 1
117 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
118 else:
119 htmlFiles = [None] * len(data['diagnostics'])
120
121 report = AnalysisReport(run, data.pop('files'))
122 diagnostics = [AnalysisDiagnostic(d, report, h)
123 for d,h in zip(data.pop('diagnostics'),
124 htmlFiles)]
125
126 assert not data
127
128 run.reports.append(report)
129 run.diagnostics.extend(diagnostics)
130
131 return run
132
133def compareResults(A, B):
134 """
135 compareResults - Generate a relation from diagnostics in run A to
136 diagnostics in run B.
137
138 The result is the relation as a list of triples (a, b, confidence) where
139 each element {a,b} is None or an element from the respective run, and
140 confidence is a measure of the match quality (where 0 indicates equality,
141 and None is used if either element is None).
142 """
143
144 res = []
145
146 # Quickly eliminate equal elements.
147 neqA = []
148 neqB = []
149 eltsA = list(A.diagnostics)
150 eltsB = list(B.diagnostics)
151 eltsA.sort(key = lambda d: d.data)
152 eltsB.sort(key = lambda d: d.data)
153 while eltsA and eltsB:
154 a = eltsA.pop()
155 b = eltsB.pop()
156 if a.data == b.data:
157 res.append((a, b, 0))
158 elif a.data > b.data:
159 neqA.append(a)
160 eltsB.append(b)
161 else:
162 neqB.append(b)
163 eltsA.append(a)
164 neqA.extend(eltsA)
165 neqB.extend(eltsB)
166
167 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
168 # to bin the diagnostics, print them in a normalized form (based solely on
169 # the structure of the diagnostic), compute the diff, then use that as the
170 # basis for matching. This has the nice property that we don't depend in any
171 # way on the diagnostic format.
172
173 for a in neqA:
174 res.append((a, None, None))
175 for b in neqB:
176 res.append((None, b, None))
177
178 return res
179
Anna Zaksf95a2012011-09-12 22:40:36 +0000180def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000181 # Load the run results.
Anna Zaksf95a2012011-09-12 22:40:36 +0000182 resultsA = loadResults(dirA, opts, deleteEmpty)
183 resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000184
185 # Open the verbose log, if given.
186 if opts.verboseLog:
187 auxLog = open(opts.verboseLog, "wb")
188 else:
189 auxLog = None
190
191 diff = compareResults(resultsA, resultsB)
192 foundDiffs = False
193 for res in diff:
194 a,b,confidence = res
195 if a is None:
196 print "ADDED: %r" % b.getReadableName()
197 foundDiffs = True
198 if auxLog:
199 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
200 b.getReportData()))
201 elif b is None:
202 print "REMOVED: %r" % a.getReadableName()
203 foundDiffs = True
204 if auxLog:
205 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
206 a.getReportData()))
207 elif confidence:
208 print "CHANGED: %r to %r" % (a.getReadableName(),
209 b.getReadableName())
210 foundDiffs = True
211 if auxLog:
212 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
213 % (a.getReadableName(),
214 b.getReadableName(),
215 a.getReportData(),
216 b.getReportData()))
217 else:
218 pass
219
220 print "TOTAL REPORTS: %r" % len(resultsB.diagnostics)
221 if auxLog:
222 print >>auxLog, "('TOTAL REPORTS', %r)" % len(resultsB.diagnostics)
223
224 return foundDiffs
225
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000226def main():
227 from optparse import OptionParser
228 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
229 parser.add_option("", "--root", dest="root",
230 help="Prefix to ignore on source files",
231 action="store", type=str, default="")
232 parser.add_option("", "--verbose-log", dest="verboseLog",
233 help="Write additional information to LOG [default=None]",
234 action="store", type=str, default=None,
235 metavar="LOG")
236 (opts, args) = parser.parse_args()
237
238 if len(args) != 2:
239 parser.error("invalid number of arguments")
240
241 dirA,dirB = args
242
Anna Zaks544055f2011-09-12 21:32:41 +0000243 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000244
245if __name__ == '__main__':
246 main()