contrib/always_failing_tests.py - platform/external/autotest - Gitiles

 #!/usr/bin/env python

 """
 This script prints out a csv file of `suite,test,path/to/control.file` where
 each row is a test that has failed every time that it ran for the past N days,
 where N is that one constant lower in this file.

 You run it like this

   ./always_failing_tests.py | tee output

 But please note that since we're using the models to do queries, you'll probably
 need to move your local shadow config out of the way before you run this script
 so that you point at prod.
 """

 import time
 import hashlib
 import re
 import datetime
 import sys

 import common
 from autotest_lib.frontend import setup_django_readonly_environment

 # Django and the models are only setup after
 # the setup_django_readonly_environment module is imported.
 from autotest_lib.frontend.tko import models as tko_models
 from autotest_lib.frontend.afe import models as afe_models
 from autotest_lib.server.cros.dynamic_suite import suite


 _DAYS_NOT_RUNNING_CUTOFF = 30


 def md5(s):
   m = hashlib.md5()
   m.update(s)
   return m.hexdigest()


 def main():
     cutoff_delta = datetime.timedelta(_DAYS_NOT_RUNNING_CUTOFF)
     cutoff_date = datetime.datetime.today() - cutoff_delta
     statuses = {s.status_idx: s.word for s in tko_models.Status.objects.all()}
     now = time.time()

     tests = tko_models.Test.objects.select_related('job'
             ).filter(started_time__gte=cutoff_date
             ).exclude(test__contains='/'
             ).exclude(test__contains='_JOB'
             ).exclude(test='provision'
             ).exclude(test__contains='try_new_image')
     tests = list(tests)
     # These prints are vague profiling work.  We're handling a lot of data, so I
     # had to dump some decent work into making sure things chug along at a
     # decent speed.
     print "DB: %d -- len=%d" % (time.time()-now, len(tests))

     def only_failures(d, t):
       word = statuses[t.status_id]
       if word == 'TEST_NA':
         return d
       if word == 'GOOD' or word == 'WARN':
         passed = True
       else:
         passed = False
       d[t.test] = d.get(t.test, False) or passed
       return d
     dct = reduce(only_failures, tests, {})
     print "OF: %d -- len=%d" % (time.time()-now, len(dct))

     all_fail = filter(lambda x: x.test in dct and not dct[x.test], tests)
     print "AF: %d -- len=%d" % (time.time()-now, len(all_fail))

     hash_to_file = {}
     fs_getter = suite.Suite.create_fs_getter(common.autotest_dir)
     for control_file in fs_getter.get_control_file_list():
       with open(control_file, 'rb') as f:
         h = md5(f.read())
         hash_to_file[h] = control_file.replace(common.autotest_dir, '')\
                                       .lstrip('/')
     print "HF: %d -- len=%d" % (time.time()-now, len(hash_to_file))

     afe_job_ids = set(map(lambda t: t.job.afe_job_id, all_fail))
     afe_jobs = afe_models.Job.objects.select_related('parent_job')\
                                      .filter(id__in=afe_job_ids)
     print "AJ: %d -- len=%d" % (time.time()-now, len(afe_jobs))

     job_to_hash = {}
     for job in afe_jobs:
       job_to_hash[job.id] = md5(job.control_file)
     print "JH: %d -- len=%d" % (time.time()-now, len(job_to_hash))

     job_to_suite = {}
     rgx = re.compile("test_suites/control.(\w+)")
     for job in afe_jobs:
       job_id = job.parent_job
       if not job_id:
         job_id = job
       x = rgx.search(job_id.name)
       if not x:
         print job_id.name
         continue
       job_to_suite[job.id] = x.groups(1)[0]

     def collect_by_suite_name(d, t):
       s = job_to_suite.get(t.job.afe_job_id, None)
       d.setdefault((s, t.test), []).append(t)
       return d
     by_name = reduce(collect_by_suite_name, all_fail, {})
     print "BN: %d -- len=%d" % (time.time()-now, len(by_name))

     for (s, testname), tests in by_name.iteritems():
       for test in tests:
         h = job_to_hash[test.job.afe_job_id]
         if h in hash_to_file:
           print "%s,%s,%s" % (s, testname, hash_to_file[h])
           break
       else:
         print "%s,%s,?" % (s, testname)


 if __name__ == '__main__':
     sys.exit(main())
	#!/usr/bin/env python

	"""
	This script prints out a csv file of `suite,test,path/to/control.file` where
	each row is a test that has failed every time that it ran for the past N days,
	where N is that one constant lower in this file.

	You run it like this

	./always_failing_tests.py \| tee output

	But please note that since we're using the models to do queries, you'll probably
	need to move your local shadow config out of the way before you run this script
	so that you point at prod.
	"""

	import time
	import hashlib
	import re
	import datetime
	import sys

	import common
	from autotest_lib.frontend import setup_django_readonly_environment

	# Django and the models are only setup after
	# the setup_django_readonly_environment module is imported.
	from autotest_lib.frontend.tko import models as tko_models
	from autotest_lib.frontend.afe import models as afe_models
	from autotest_lib.server.cros.dynamic_suite import suite


	_DAYS_NOT_RUNNING_CUTOFF = 30


	def md5(s):
	m = hashlib.md5()
	m.update(s)
	return m.hexdigest()


	def main():
	cutoff_delta = datetime.timedelta(_DAYS_NOT_RUNNING_CUTOFF)
	cutoff_date = datetime.datetime.today() - cutoff_delta
	statuses = {s.status_idx: s.word for s in tko_models.Status.objects.all()}
	now = time.time()

	tests = tko_models.Test.objects.select_related('job'
	).filter(started_time__gte=cutoff_date
	).exclude(test__contains='/'
	).exclude(test__contains='_JOB'
	).exclude(test='provision'
	).exclude(test__contains='try_new_image')
	tests = list(tests)
	# These prints are vague profiling work. We're handling a lot of data, so I
	# had to dump some decent work into making sure things chug along at a
	# decent speed.
	print "DB: %d -- len=%d" % (time.time()-now, len(tests))

	def only_failures(d, t):
	word = statuses[t.status_id]
	if word == 'TEST_NA':
	return d
	if word == 'GOOD' or word == 'WARN':
	passed = True
	else:
	passed = False
	d[t.test] = d.get(t.test, False) or passed
	return d
	dct = reduce(only_failures, tests, {})
	print "OF: %d -- len=%d" % (time.time()-now, len(dct))

	all_fail = filter(lambda x: x.test in dct and not dct[x.test], tests)
	print "AF: %d -- len=%d" % (time.time()-now, len(all_fail))

	hash_to_file = {}
	fs_getter = suite.Suite.create_fs_getter(common.autotest_dir)
	for control_file in fs_getter.get_control_file_list():
	with open(control_file, 'rb') as f:
	h = md5(f.read())
	hash_to_file[h] = control_file.replace(common.autotest_dir, '')\
	.lstrip('/')
	print "HF: %d -- len=%d" % (time.time()-now, len(hash_to_file))

	afe_job_ids = set(map(lambda t: t.job.afe_job_id, all_fail))
	afe_jobs = afe_models.Job.objects.select_related('parent_job')\
	.filter(id__in=afe_job_ids)
	print "AJ: %d -- len=%d" % (time.time()-now, len(afe_jobs))

	job_to_hash = {}
	for job in afe_jobs:
	job_to_hash[job.id] = md5(job.control_file)
	print "JH: %d -- len=%d" % (time.time()-now, len(job_to_hash))

	job_to_suite = {}
	rgx = re.compile("test_suites/control.(\w+)")
	for job in afe_jobs:
	job_id = job.parent_job
	if not job_id:
	job_id = job
	x = rgx.search(job_id.name)
	if not x:
	print job_id.name
	continue
	job_to_suite[job.id] = x.groups(1)[0]

	def collect_by_suite_name(d, t):
	s = job_to_suite.get(t.job.afe_job_id, None)
	d.setdefault((s, t.test), []).append(t)
	return d
	by_name = reduce(collect_by_suite_name, all_fail, {})
	print "BN: %d -- len=%d" % (time.time()-now, len(by_name))

	for (s, testname), tests in by_name.iteritems():
	for test in tests:
	h = job_to_hash[test.job.afe_job_id]
	if h in hash_to_file:
	print "%s,%s,%s" % (s, testname, hash_to_file[h])
	break
	else:
	print "%s,%s,?" % (s, testname)


	if __name__ == '__main__':
	sys.exit(main())