Alex Miller | febd400 | 2014-01-16 14:32:59 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | This script prints out a csv file of `suite,test,path/to/control.file` where |
| 5 | each row is a test that has failed every time that it ran for the past N days, |
| 6 | where N is that one constant lower in this file. |
| 7 | |
| 8 | You run it like this |
| 9 | |
| 10 | ./always_failing_tests.py | tee output |
| 11 | |
| 12 | But please note that since we're using the models to do queries, you'll probably |
| 13 | need to move your local shadow config out of the way before you run this script |
| 14 | so that you point at prod. |
| 15 | """ |
| 16 | |
| 17 | import time |
| 18 | import hashlib |
| 19 | import re |
| 20 | import datetime |
| 21 | import sys |
| 22 | |
| 23 | import common |
| 24 | from autotest_lib.frontend import setup_django_readonly_environment |
| 25 | |
| 26 | # Django and the models are only setup after |
| 27 | # the setup_django_readonly_environment module is imported. |
| 28 | from autotest_lib.frontend.tko import models as tko_models |
| 29 | from autotest_lib.frontend.afe import models as afe_models |
| 30 | from autotest_lib.server.cros.dynamic_suite import suite |
| 31 | |
| 32 | |
| 33 | _DAYS_NOT_RUNNING_CUTOFF = 30 |
| 34 | |
| 35 | |
| 36 | def md5(s): |
| 37 | m = hashlib.md5() |
| 38 | m.update(s) |
| 39 | return m.hexdigest() |
| 40 | |
| 41 | |
| 42 | def main(): |
| 43 | cutoff_delta = datetime.timedelta(_DAYS_NOT_RUNNING_CUTOFF) |
| 44 | cutoff_date = datetime.datetime.today() - cutoff_delta |
| 45 | statuses = {s.status_idx: s.word for s in tko_models.Status.objects.all()} |
| 46 | now = time.time() |
| 47 | |
| 48 | tests = tko_models.Test.objects.select_related('job' |
| 49 | ).filter(started_time__gte=cutoff_date |
| 50 | ).exclude(test__contains='/' |
| 51 | ).exclude(test__contains='_JOB' |
| 52 | ).exclude(test='provision' |
| 53 | ).exclude(test__contains='try_new_image') |
| 54 | tests = list(tests) |
| 55 | # These prints are vague profiling work. We're handling a lot of data, so I |
| 56 | # had to dump some decent work into making sure things chug along at a |
| 57 | # decent speed. |
| 58 | print "DB: %d -- len=%d" % (time.time()-now, len(tests)) |
| 59 | |
| 60 | def only_failures(d, t): |
| 61 | word = statuses[t.status_id] |
| 62 | if word == 'TEST_NA': |
| 63 | return d |
| 64 | if word == 'GOOD' or word == 'WARN': |
| 65 | passed = True |
| 66 | else: |
| 67 | passed = False |
| 68 | d[t.test] = d.get(t.test, False) or passed |
| 69 | return d |
| 70 | dct = reduce(only_failures, tests, {}) |
| 71 | print "OF: %d -- len=%d" % (time.time()-now, len(dct)) |
| 72 | |
| 73 | all_fail = filter(lambda x: x.test in dct and not dct[x.test], tests) |
| 74 | print "AF: %d -- len=%d" % (time.time()-now, len(all_fail)) |
| 75 | |
| 76 | hash_to_file = {} |
| 77 | fs_getter = suite.Suite.create_fs_getter(common.autotest_dir) |
| 78 | for control_file in fs_getter.get_control_file_list(): |
| 79 | with open(control_file, 'rb') as f: |
| 80 | h = md5(f.read()) |
| 81 | hash_to_file[h] = control_file.replace(common.autotest_dir, '')\ |
| 82 | .lstrip('/') |
| 83 | print "HF: %d -- len=%d" % (time.time()-now, len(hash_to_file)) |
| 84 | |
| 85 | afe_job_ids = set(map(lambda t: t.job.afe_job_id, all_fail)) |
| 86 | afe_jobs = afe_models.Job.objects.select_related('parent_job')\ |
| 87 | .filter(id__in=afe_job_ids) |
| 88 | print "AJ: %d -- len=%d" % (time.time()-now, len(afe_jobs)) |
| 89 | |
| 90 | job_to_hash = {} |
| 91 | for job in afe_jobs: |
| 92 | job_to_hash[job.id] = md5(job.control_file) |
| 93 | print "JH: %d -- len=%d" % (time.time()-now, len(job_to_hash)) |
| 94 | |
| 95 | job_to_suite = {} |
| 96 | rgx = re.compile("test_suites/control.(\w+)") |
| 97 | for job in afe_jobs: |
| 98 | job_id = job.parent_job |
| 99 | if not job_id: |
| 100 | job_id = job |
| 101 | x = rgx.search(job_id.name) |
| 102 | if not x: |
| 103 | print job_id.name |
| 104 | continue |
| 105 | job_to_suite[job.id] = x.groups(1)[0] |
| 106 | |
| 107 | def collect_by_suite_name(d, t): |
| 108 | s = job_to_suite.get(t.job.afe_job_id, None) |
| 109 | d.setdefault((s, t.test), []).append(t) |
| 110 | return d |
| 111 | by_name = reduce(collect_by_suite_name, all_fail, {}) |
| 112 | print "BN: %d -- len=%d" % (time.time()-now, len(by_name)) |
| 113 | |
| 114 | for (s, testname), tests in by_name.iteritems(): |
| 115 | for test in tests: |
| 116 | h = job_to_hash[test.job.afe_job_id] |
| 117 | if h in hash_to_file: |
| 118 | print "%s,%s,%s" % (s, testname, hash_to_file[h]) |
| 119 | break |
| 120 | else: |
| 121 | print "%s,%s,?" % (s, testname) |
| 122 | |
| 123 | |
| 124 | if __name__ == '__main__': |
| 125 | sys.exit(main()) |