Blame - utils/analyzer/CmpRuns.py - platform/external/clang

blob: 2072e4d0a1bd2c05a23b605ea91b4947d25720ad [file] [log] [blame]

Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	1	#!/usr/bin/env python
				2
				3	"""
				4	CmpRuns - A simple tool for comparing two static analyzer runs to determine
				5	which reports have been added, removed, or changed.
				6
				7	This is designed to support automated testing using the static analyzer, from
				8	two perspectives:
				9	1. To monitor changes in the static analyzer's reports on real code bases, for
				10	regression testing.
				11
				12	2. For use by end users who want to integrate regular static analyzer testing
				13	into a buildbot like environment.
				14	"""
				15
				16	import os
				17	import plistlib
				18
				19	#
				20
				21	class multidict:
				22	def __init__(self, elts=()):
				23	self.data = {}
				24	for key,value in elts:
				25	self[key] = value
				26
				27	def __getitem__(self, item):
				28	return self.data[item]
				29	def __setitem__(self, key, value):
				30	if key in self.data:
				31	self.data[key].append(value)
				32	else:
				33	self.data[key] = [value]
				34	def items(self):
				35	return self.data.items()
				36	def values(self):
				37	return self.data.values()
				38	def keys(self):
				39	return self.data.keys()
				40	def __len__(self):
				41	return len(self.data)
				42	def get(self, key, default=None):
				43	return self.data.get(key, default)
				44
				45	#
				46
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	47	class CmpOptions:
				48	def __init__(self, verboseLog=None, root=""):
				49	self.root = root
				50	self.verboseLog = verboseLog
				51
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	52	class AnalysisReport:
				53	def __init__(self, run, files):
				54	self.run = run
				55	self.files = files
				56
				57	class AnalysisDiagnostic:
				58	def __init__(self, data, report, htmlReport):
				59	self.data = data
				60	self.report = report
				61	self.htmlReport = htmlReport
				62
				63	def getReadableName(self):
				64	loc = self.data['location']
				65	filename = self.report.run.getSourceName(self.report.files[loc['file']])
				66	line = loc['line']
				67	column = loc['col']
				68
				69	# FIXME: Get a report number based on this key, to 'distinguish'
				70	# reports, or something.
				71
				72	return '%s:%d:%d' % (filename, line, column)
				73
				74	def getReportData(self):
				75	if self.htmlReport is None:
				76	return "This diagnostic does not have any report data."
				77
				78	return open(os.path.join(self.report.run.path,
				79	self.htmlReport), "rb").read()
				80
				81	class AnalysisRun:
				82	def __init__(self, path, opts):
				83	self.path = path
				84	self.reports = []
				85	self.diagnostics = []
				86	self.opts = opts
				87
				88	def getSourceName(self, path):
				89	if path.startswith(self.opts.root):
				90	return path[len(self.opts.root):]
				91	return path
				92
Anna Zaks	f95a201	2011-09-12 22:40:36 +0000	[diff] [blame]	93	def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	94	run = AnalysisRun(path, opts)
				95
				96	for f in os.listdir(path):
				97	if (not f.startswith('report') or
				98	not f.endswith('plist')):
				99	continue
				100
				101	p = os.path.join(path, f)
				102	data = plistlib.readPlist(p)
				103
Anna Zaks	f95a201	2011-09-12 22:40:36 +0000	[diff] [blame]	104	# Ignore/delete empty reports.
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	105	if not data['files']:
Anna Zaks	f95a201	2011-09-12 22:40:36 +0000	[diff] [blame]	106	if deleteEmpty == True:
				107	os.remove(p)
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	108	continue
				109
				110	# Extract the HTML reports, if they exists.
				111	if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
				112	htmlFiles = []
				113	for d in data['diagnostics']:
				114	# FIXME: Why is this named files, when does it have multiple
				115	# files?
				116	assert len(d['HTMLDiagnostics_files']) == 1
				117	htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
				118	else:
				119	htmlFiles = [None] * len(data['diagnostics'])
				120
				121	report = AnalysisReport(run, data.pop('files'))
				122	diagnostics = [AnalysisDiagnostic(d, report, h)
				123	for d,h in zip(data.pop('diagnostics'),
				124	htmlFiles)]
				125
				126	assert not data
				127
				128	run.reports.append(report)
				129	run.diagnostics.extend(diagnostics)
				130
				131	return run
				132
				133	def compareResults(A, B):
				134	"""
				135	compareResults - Generate a relation from diagnostics in run A to
				136	diagnostics in run B.
				137
				138	The result is the relation as a list of triples (a, b, confidence) where
				139	each element {a,b} is None or an element from the respective run, and
				140	confidence is a measure of the match quality (where 0 indicates equality,
				141	and None is used if either element is None).
				142	"""
				143
				144	res = []
				145
				146	# Quickly eliminate equal elements.
				147	neqA = []
				148	neqB = []
				149	eltsA = list(A.diagnostics)
				150	eltsB = list(B.diagnostics)
				151	eltsA.sort(key = lambda d: d.data)
				152	eltsB.sort(key = lambda d: d.data)
				153	while eltsA and eltsB:
				154	a = eltsA.pop()
				155	b = eltsB.pop()
Anna Zaks	e1e7367	2011-11-05 05:20:56 +0000	[diff] [blame]	156	if a.data['location'] == b.data['location']:
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	157	res.append((a, b, 0))
				158	elif a.data > b.data:
				159	neqA.append(a)
				160	eltsB.append(b)
				161	else:
				162	neqB.append(b)
				163	eltsA.append(a)
				164	neqA.extend(eltsA)
				165	neqB.extend(eltsB)
				166
				167	# FIXME: Add fuzzy matching. One simple and possible effective idea would be
				168	# to bin the diagnostics, print them in a normalized form (based solely on
				169	# the structure of the diagnostic), compute the diff, then use that as the
				170	# basis for matching. This has the nice property that we don't depend in any
				171	# way on the diagnostic format.
				172
				173	for a in neqA:
				174	res.append((a, None, None))
				175	for b in neqB:
				176	res.append((None, b, None))
				177
				178	return res
				179
Anna Zaks	f95a201	2011-09-12 22:40:36 +0000	[diff] [blame]	180	def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	181	# Load the run results.
Anna Zaks	f95a201	2011-09-12 22:40:36 +0000	[diff] [blame]	182	resultsA = loadResults(dirA, opts, deleteEmpty)
				183	resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	184
				185	# Open the verbose log, if given.
				186	if opts.verboseLog:
				187	auxLog = open(opts.verboseLog, "wb")
				188	else:
				189	auxLog = None
				190
				191	diff = compareResults(resultsA, resultsB)
Anna Zaks	a7a2564	2011-11-08 19:56:31 +0000	[diff] [blame^]	192	foundDiffs = 0
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	193	for res in diff:
				194	a,b,confidence = res
				195	if a is None:
				196	print "ADDED: %r" % b.getReadableName()
Anna Zaks	a7a2564	2011-11-08 19:56:31 +0000	[diff] [blame^]	197	foundDiffs += 1
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	198	if auxLog:
				199	print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
				200	b.getReportData()))
				201	elif b is None:
				202	print "REMOVED: %r" % a.getReadableName()
Anna Zaks	a7a2564	2011-11-08 19:56:31 +0000	[diff] [blame^]	203	foundDiffs += 1
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	204	if auxLog:
				205	print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
				206	a.getReportData()))
				207	elif confidence:
				208	print "CHANGED: %r to %r" % (a.getReadableName(),
				209	b.getReadableName())
Anna Zaks	a7a2564	2011-11-08 19:56:31 +0000	[diff] [blame^]	210	foundDiffs += 1
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	211	if auxLog:
				212	print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
				213	% (a.getReadableName(),
				214	b.getReadableName(),
				215	a.getReportData(),
				216	b.getReportData()))
				217	else:
				218	pass
				219
Anna Zaks	a7a2564	2011-11-08 19:56:31 +0000	[diff] [blame^]	220	TotalReports = len(resultsB.diagnostics)
				221	print "TOTAL REPORTS: %r" % TotalReports
				222	print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	223	if auxLog:
Anna Zaks	a7a2564	2011-11-08 19:56:31 +0000	[diff] [blame^]	224	print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
				225	print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
				226
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	227	return foundDiffs
				228
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	229	def main():
				230	from optparse import OptionParser
				231	parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
				232	parser.add_option("", "--root", dest="root",
				233	help="Prefix to ignore on source files",
				234	action="store", type=str, default="")
				235	parser.add_option("", "--verbose-log", dest="verboseLog",
				236	help="Write additional information to LOG [default=None]",
				237	action="store", type=str, default=None,
				238	metavar="LOG")
				239	(opts, args) = parser.parse_args()
				240
				241	if len(args) != 2:
				242	parser.error("invalid number of arguments")
				243
				244	dirA,dirB = args
				245
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame]	246	cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	247
				248	if __name__ == '__main__':
				249	main()