Blame - utils/analyzer/CmpRuns.py - fp2-dev/platform/external/clang

blob: d20cd6aa64c58be4110ad48bac277c30c0aa8b9e [file] [log] [blame]

Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	1	#!/usr/bin/env python
				2
				3	"""
				4	CmpRuns - A simple tool for comparing two static analyzer runs to determine
				5	which reports have been added, removed, or changed.
				6
				7	This is designed to support automated testing using the static analyzer, from
				8	two perspectives:
				9	1. To monitor changes in the static analyzer's reports on real code bases, for
				10	regression testing.
				11
				12	2. For use by end users who want to integrate regular static analyzer testing
				13	into a buildbot like environment.
				14	"""
				15
				16	import os
				17	import plistlib
				18
				19	#
				20
				21	class multidict:
				22	def __init__(self, elts=()):
				23	self.data = {}
				24	for key,value in elts:
				25	self[key] = value
				26
				27	def __getitem__(self, item):
				28	return self.data[item]
				29	def __setitem__(self, key, value):
				30	if key in self.data:
				31	self.data[key].append(value)
				32	else:
				33	self.data[key] = [value]
				34	def items(self):
				35	return self.data.items()
				36	def values(self):
				37	return self.data.values()
				38	def keys(self):
				39	return self.data.keys()
				40	def __len__(self):
				41	return len(self.data)
				42	def get(self, key, default=None):
				43	return self.data.get(key, default)
				44
				45	#
				46
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame^]	47	class CmpOptions:
				48	def __init__(self, verboseLog=None, root=""):
				49	self.root = root
				50	self.verboseLog = verboseLog
				51
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	52	class AnalysisReport:
				53	def __init__(self, run, files):
				54	self.run = run
				55	self.files = files
				56
				57	class AnalysisDiagnostic:
				58	def __init__(self, data, report, htmlReport):
				59	self.data = data
				60	self.report = report
				61	self.htmlReport = htmlReport
				62
				63	def getReadableName(self):
				64	loc = self.data['location']
				65	filename = self.report.run.getSourceName(self.report.files[loc['file']])
				66	line = loc['line']
				67	column = loc['col']
				68
				69	# FIXME: Get a report number based on this key, to 'distinguish'
				70	# reports, or something.
				71
				72	return '%s:%d:%d' % (filename, line, column)
				73
				74	def getReportData(self):
				75	if self.htmlReport is None:
				76	return "This diagnostic does not have any report data."
				77
				78	return open(os.path.join(self.report.run.path,
				79	self.htmlReport), "rb").read()
				80
				81	class AnalysisRun:
				82	def __init__(self, path, opts):
				83	self.path = path
				84	self.reports = []
				85	self.diagnostics = []
				86	self.opts = opts
				87
				88	def getSourceName(self, path):
				89	if path.startswith(self.opts.root):
				90	return path[len(self.opts.root):]
				91	return path
				92
				93	def loadResults(path, opts):
				94	run = AnalysisRun(path, opts)
				95
				96	for f in os.listdir(path):
				97	if (not f.startswith('report') or
				98	not f.endswith('plist')):
				99	continue
				100
				101	p = os.path.join(path, f)
				102	data = plistlib.readPlist(p)
				103
				104	# Ignore empty reports.
				105	if not data['files']:
				106	continue
				107
				108	# Extract the HTML reports, if they exists.
				109	if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
				110	htmlFiles = []
				111	for d in data['diagnostics']:
				112	# FIXME: Why is this named files, when does it have multiple
				113	# files?
				114	assert len(d['HTMLDiagnostics_files']) == 1
				115	htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
				116	else:
				117	htmlFiles = [None] * len(data['diagnostics'])
				118
				119	report = AnalysisReport(run, data.pop('files'))
				120	diagnostics = [AnalysisDiagnostic(d, report, h)
				121	for d,h in zip(data.pop('diagnostics'),
				122	htmlFiles)]
				123
				124	assert not data
				125
				126	run.reports.append(report)
				127	run.diagnostics.extend(diagnostics)
				128
				129	return run
				130
				131	def compareResults(A, B):
				132	"""
				133	compareResults - Generate a relation from diagnostics in run A to
				134	diagnostics in run B.
				135
				136	The result is the relation as a list of triples (a, b, confidence) where
				137	each element {a,b} is None or an element from the respective run, and
				138	confidence is a measure of the match quality (where 0 indicates equality,
				139	and None is used if either element is None).
				140	"""
				141
				142	res = []
				143
				144	# Quickly eliminate equal elements.
				145	neqA = []
				146	neqB = []
				147	eltsA = list(A.diagnostics)
				148	eltsB = list(B.diagnostics)
				149	eltsA.sort(key = lambda d: d.data)
				150	eltsB.sort(key = lambda d: d.data)
				151	while eltsA and eltsB:
				152	a = eltsA.pop()
				153	b = eltsB.pop()
				154	if a.data == b.data:
				155	res.append((a, b, 0))
				156	elif a.data > b.data:
				157	neqA.append(a)
				158	eltsB.append(b)
				159	else:
				160	neqB.append(b)
				161	eltsA.append(a)
				162	neqA.extend(eltsA)
				163	neqB.extend(eltsB)
				164
				165	# FIXME: Add fuzzy matching. One simple and possible effective idea would be
				166	# to bin the diagnostics, print them in a normalized form (based solely on
				167	# the structure of the diagnostic), compute the diff, then use that as the
				168	# basis for matching. This has the nice property that we don't depend in any
				169	# way on the diagnostic format.
				170
				171	for a in neqA:
				172	res.append((a, None, None))
				173	for b in neqB:
				174	res.append((None, b, None))
				175
				176	return res
				177
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame^]	178	def cmpScanBuildResults(dirA, dirB, opts):
				179	# Load the run results.
				180	resultsA = loadResults(dirA, opts)
				181	resultsB = loadResults(dirB, opts)
				182
				183	# Open the verbose log, if given.
				184	if opts.verboseLog:
				185	auxLog = open(opts.verboseLog, "wb")
				186	else:
				187	auxLog = None
				188
				189	diff = compareResults(resultsA, resultsB)
				190	foundDiffs = False
				191	for res in diff:
				192	a,b,confidence = res
				193	if a is None:
				194	print "ADDED: %r" % b.getReadableName()
				195	foundDiffs = True
				196	if auxLog:
				197	print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
				198	b.getReportData()))
				199	elif b is None:
				200	print "REMOVED: %r" % a.getReadableName()
				201	foundDiffs = True
				202	if auxLog:
				203	print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
				204	a.getReportData()))
				205	elif confidence:
				206	print "CHANGED: %r to %r" % (a.getReadableName(),
				207	b.getReadableName())
				208	foundDiffs = True
				209	if auxLog:
				210	print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
				211	% (a.getReadableName(),
				212	b.getReadableName(),
				213	a.getReportData(),
				214	b.getReportData()))
				215	else:
				216	pass
				217
				218	print "TOTAL REPORTS: %r" % len(resultsB.diagnostics)
				219	if auxLog:
				220	print >>auxLog, "('TOTAL REPORTS', %r)" % len(resultsB.diagnostics)
				221
				222	return foundDiffs
				223
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	224	def main():
				225	from optparse import OptionParser
				226	parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
				227	parser.add_option("", "--root", dest="root",
				228	help="Prefix to ignore on source files",
				229	action="store", type=str, default="")
				230	parser.add_option("", "--verbose-log", dest="verboseLog",
				231	help="Write additional information to LOG [default=None]",
				232	action="store", type=str, default=None,
				233	metavar="LOG")
				234	(opts, args) = parser.parse_args()
				235
				236	if len(args) != 2:
				237	parser.error("invalid number of arguments")
				238
				239	dirA,dirB = args
				240
Anna Zaks	544055f	2011-09-12 21:32:41 +0000	[diff] [blame^]	241	cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar	301f7ac	2009-08-06 21:15:33 +0000	[diff] [blame]	242
				243	if __name__ == '__main__':
				244	main()