beeps | 539c4d3 | 2014-02-07 17:20:58 -0800 | [diff] [blame] | 1 | #!/usr/bin/python -u |
| 2 | |
| 3 | """ |
| 4 | A script to help find the last few jobs that ran on a set of hosts that match |
| 5 | the specified query, and rank them according to frequence across these hosts. |
| 6 | Usage: |
| 7 | 1. Get last 5 jobs from 1 day ago running on all lumpies in pool suites that are |
| 8 | currently in repair fail: |
| 9 | ./sheriff_host_utils --days_back=1 |
| 10 | --query 'labels=pool:suites,board:lumpy status="Repair Failed"' |
| 11 | |
| 12 | 2. Email someone about the last 5 jobs on all Repair Failed hosts. |
| 13 | ./sheriff_host_utils --limit 5 --query 'status="Repair Failed"' |
| 14 | --email someone@something.com |
| 15 | """ |
| 16 | |
| 17 | import argparse |
| 18 | import collections |
| 19 | import datetime |
| 20 | import operator |
| 21 | import shlex |
| 22 | import sys |
| 23 | |
| 24 | import common |
| 25 | |
| 26 | from autotest_lib.client.common_lib import mail |
| 27 | from autotest_lib.frontend import setup_django_environment |
| 28 | from autotest_lib.frontend.afe import models |
| 29 | from autotest_lib.server import frontend |
| 30 | from autotest_lib.server.cros import repair_utils |
| 31 | from django.utils import timezone as django_timezone |
| 32 | |
| 33 | |
| 34 | def _parse_args(args): |
| 35 | description=('./sheriff_host_utils.py --limit 5 --days_back 5 ' |
| 36 | '--query status="Repair Failed" invalid=0 locked=0') |
| 37 | if not args: |
| 38 | print ('Too few arguments, execute %s, or try ' |
| 39 | './sheriff_host_utils.py --help' % description) |
| 40 | sys.exit(1) |
| 41 | |
| 42 | parser = argparse.ArgumentParser(description=description) |
| 43 | parser.add_argument('--limit', default=5, |
| 44 | help='The number of jobs per host.Eg: --limit 5') |
| 45 | parser.add_argument('--days_back', default=5, |
| 46 | help='Number of days to search. Eg: --days_back 5') |
| 47 | default_query = 'status="Repair Failed" labels=pool:bvt,board:lumpy' |
| 48 | parser.add_argument('--query', default=default_query, |
| 49 | help='Search query.Eg: --query %s' % default_query) |
| 50 | parser.add_argument('--email', default=None, help='send results to email.') |
| 51 | return parser.parse_args(args) |
| 52 | |
| 53 | |
| 54 | def _parse_query(query): |
| 55 | """Parses query string for a host. |
| 56 | |
| 57 | All queries follow the format: 'key=value key2=value..' where all keys are |
| 58 | are columns of the host table with the exception of labels. When specifying |
| 59 | labels, the format is the same even though a label is a foreign key: |
| 60 | --query 'lable=<comma seperated list of label names>'. |
| 61 | |
| 62 | @return: A dictionary into which the query has been parsed. |
| 63 | """ |
| 64 | l = shlex.split(query) |
| 65 | keys = [elem[:elem.find('=')] for elem in l] |
| 66 | values = [elem[elem.find('=')+1:] for elem in l] |
| 67 | payload = dict(zip(keys, values)) |
| 68 | return payload |
| 69 | |
| 70 | |
| 71 | def _get_pool(host): |
| 72 | """Returns the pool of a host. |
| 73 | """ |
| 74 | labels = host.labels.all() |
| 75 | for label_name in [label.name for label in labels]: |
| 76 | if 'pool' in label_name: |
| 77 | return label_name |
| 78 | |
| 79 | |
| 80 | def retrieve_hosts(payload): |
| 81 | """Retrieve hosts matching the payload. |
| 82 | |
| 83 | @param payload: A dict with selection criteria for hosts. |
| 84 | |
| 85 | @return: A queryset of hosts matching the payload. |
| 86 | """ |
| 87 | # Replace label names with a foreign key query. |
| 88 | query_hosts = models.Host.objects.all() |
| 89 | if 'labels' in payload: |
| 90 | for label in payload['labels'].split(','): |
| 91 | query_hosts = query_hosts.filter(labels__name=label) |
| 92 | del payload['labels'] |
| 93 | return query_hosts.filter(**payload) |
| 94 | |
| 95 | |
| 96 | def analyze_jobs(hqes): |
| 97 | """Perform some aggregation on the jobs that ran on matching hosts. |
| 98 | |
| 99 | @return: A string with the results of the analysis. |
| 100 | """ |
| 101 | names = [hqe.job.name for hqe in hqes] |
| 102 | ranking = collections.Counter([name[name.rfind('/')+1:] for name in names]) |
| 103 | sorted_rankings = sorted(ranking.iteritems(), key=operator.itemgetter(1)) |
| 104 | m = 'Ranking tests that ran on those hosts by frequency: \n\t' |
| 105 | for job_stat in reversed(sorted_rankings): |
| 106 | m += '%s test name: %s\n\t' % (job_stat[1], job_stat[0]) |
| 107 | return m |
| 108 | |
| 109 | |
| 110 | def last_jobs_on_hosts(payload, limit_jobs, days_back): |
| 111 | """Find the last limit_jobs on hosts with given status within days_back. |
| 112 | |
| 113 | @param payload: A dictionary specifiying the selection criteria of the hosts. |
| 114 | Eg {'stauts': "Ready", 'id': 40} |
| 115 | @param limit_jobs: The number of jobs per host. |
| 116 | @param days_back: The days back to search for jobs. |
| 117 | |
| 118 | @retrurn: A string with information about the last jobs that ran on all |
| 119 | hosts matching the query mentioned in the payload. |
| 120 | """ |
| 121 | host_map = {} |
| 122 | pool_less, job_less, jobs_to_analyze = [], [], [] |
| 123 | hqes = models.HostQueueEntry.objects.all() |
| 124 | cutoff = django_timezone.now().date() - datetime.timedelta(days=days_back) |
| 125 | message = '' |
| 126 | |
| 127 | for host in retrieve_hosts(payload): |
| 128 | pool = _get_pool(host) |
| 129 | if not pool: |
| 130 | pool_less.append(host.hostname) |
| 131 | continue |
| 132 | relevent_hqes = list(hqes.filter(host_id=host.id, |
| 133 | started_on__gte=cutoff).order_by('-started_on')[:limit_jobs]) |
| 134 | if relevent_hqes: |
| 135 | jobs = ['name: %s, id: %s' % |
| 136 | (hqe.job.name, hqe.job_id) for hqe in relevent_hqes] |
| 137 | message += '%s\n%s\n\t%s' % (pool, host, '\n\t'.join(jobs)) |
| 138 | jobs_to_analyze += relevent_hqes |
| 139 | else: |
| 140 | job_less.append(host.hostname) |
| 141 | |
| 142 | if job_less: |
| 143 | message += ('\nNo jobs found for the following hosts within cutoff %s\n\t' % |
| 144 | cutoff) |
| 145 | message += '\n\t'.join(job_less) |
| 146 | if pool_less: |
| 147 | message += '%s%s' % ('\nNo pools found on the following hosts:', |
| 148 | '\n\t'.join(pool_less)) |
| 149 | if jobs_to_analyze: |
| 150 | message += '\n\n%s' % analyze_jobs(jobs_to_analyze) |
| 151 | |
| 152 | if message: |
| 153 | return '%s\n%s' % ('Host information:', message) |
| 154 | return 'No hosts matching query %s from %s days back' % (payload, days_back) |
| 155 | |
| 156 | |
| 157 | if __name__ == '__main__': |
| 158 | args = _parse_args(sys.argv[1:]) |
| 159 | message = last_jobs_on_hosts(_parse_query(args.query), |
| 160 | int(args.limit), int(args.days_back)) |
| 161 | if args.email: |
| 162 | mail.send('', args.email, '', |
| 163 | 'Results from your sheirff script.', message) |
| 164 | print message |