| #!/usr/bin/python -u |
| |
| """ |
| A script to help find the last few jobs that ran on a set of hosts that match |
| the specified query, and rank them according to frequence across these hosts. |
| Usage: |
| 1. Get last 5 jobs from 1 day ago running on all lumpies in pool suites that are |
| currently in repair fail: |
| ./sheriff_host_utils --days_back=1 |
| --query 'labels=pool:suites,board:lumpy status="Repair Failed"' |
| |
| 2. Email someone about the last 5 jobs on all Repair Failed hosts. |
| ./sheriff_host_utils --limit 5 --query 'status="Repair Failed"' |
| --email someone@something.com |
| """ |
| |
| import argparse |
| import collections |
| import datetime |
| import operator |
| import shlex |
| import sys |
| |
| import common |
| |
| from autotest_lib.client.common_lib import mail |
| from autotest_lib.frontend import setup_django_environment |
| from autotest_lib.frontend.afe import models |
| from autotest_lib.server import frontend |
| from autotest_lib.server.cros import repair_utils |
| from django.utils import timezone as django_timezone |
| |
| |
| def _parse_args(args): |
| description=('./sheriff_host_utils.py --limit 5 --days_back 5 ' |
| '--query \'status="Repair Failed" invalid=0 locked=0\'') |
| if not args: |
| print ('Too few arguments, execute %s, or try ' |
| './sheriff_host_utils.py --help' % description) |
| sys.exit(1) |
| |
| parser = argparse.ArgumentParser(description=description) |
| parser.add_argument('--limit', default=5, |
| help='The number of jobs per host.Eg: --limit 5') |
| parser.add_argument('--days_back', default=5, |
| help='Number of days to search. Eg: --days_back 5') |
| default_query = 'status="Repair Failed" labels=pool:bvt,board:lumpy' |
| parser.add_argument('--query', default=default_query, |
| help='Search query.Eg: --query %s' % default_query) |
| parser.add_argument('--email', default=None, help='send results to email.') |
| return parser.parse_args(args) |
| |
| |
| def _parse_query(query): |
| """Parses query string for a host. |
| |
| All queries follow the format: 'key=value key2=value..' where all keys are |
| are columns of the host table with the exception of labels. When specifying |
| labels, the format is the same even though a label is a foreign key: |
| --query 'lable=<comma seperated list of label names>'. |
| |
| @return: A dictionary into which the query has been parsed. |
| """ |
| l = shlex.split(query) |
| keys = [elem[:elem.find('=')] for elem in l] |
| values = [elem[elem.find('=')+1:] for elem in l] |
| payload = dict(zip(keys, values)) |
| return payload |
| |
| |
| def _get_pool(host): |
| """Returns the pool of a host. |
| """ |
| labels = host.labels.all() |
| for label_name in [label.name for label in labels]: |
| if 'pool' in label_name: |
| return label_name |
| |
| |
| def retrieve_hosts(payload): |
| """Retrieve hosts matching the payload. |
| |
| @param payload: A dict with selection criteria for hosts. |
| |
| @return: A queryset of hosts matching the payload. |
| """ |
| # Replace label names with a foreign key query. |
| query_hosts = models.Host.objects.all() |
| if 'labels' in payload: |
| for label in payload['labels'].split(','): |
| query_hosts = query_hosts.filter(labels__name=label) |
| del payload['labels'] |
| return query_hosts.filter(**payload) |
| |
| |
| def analyze_jobs(hqes): |
| """Perform some aggregation on the jobs that ran on matching hosts. |
| |
| @return: A string with the results of the analysis. |
| """ |
| names = [hqe.job.name for hqe in hqes] |
| ranking = collections.Counter([name[name.rfind('/')+1:] for name in names]) |
| sorted_rankings = sorted(ranking.iteritems(), key=operator.itemgetter(1)) |
| m = 'Ranking tests that ran on those hosts by frequency: \n\t' |
| for job_stat in reversed(sorted_rankings): |
| m += '%s test name: %s\n\t' % (job_stat[1], job_stat[0]) |
| return m |
| |
| |
| def last_jobs_on_hosts(payload, limit_jobs, days_back): |
| """Find the last limit_jobs on hosts with given status within days_back. |
| |
| @param payload: A dictionary specifiying the selection criteria of the hosts. |
| Eg {'stauts': "Ready", 'id': 40} |
| @param limit_jobs: The number of jobs per host. |
| @param days_back: The days back to search for jobs. |
| |
| @retrurn: A string with information about the last jobs that ran on all |
| hosts matching the query mentioned in the payload. |
| """ |
| host_map = {} |
| pool_less, job_less, jobs_to_analyze = [], [], [] |
| hqes = models.HostQueueEntry.objects.all() |
| cutoff = django_timezone.now().date() - datetime.timedelta(days=days_back) |
| message = '' |
| |
| for host in retrieve_hosts(payload): |
| pool = _get_pool(host) |
| if not pool: |
| pool_less.append(host.hostname) |
| continue |
| relevent_hqes = list(hqes.filter(host_id=host.id, |
| started_on__gte=cutoff).order_by('-started_on')[:limit_jobs]) |
| if relevent_hqes: |
| jobs = ['name: %s, id: %s' % |
| (hqe.job.name, hqe.job_id) for hqe in relevent_hqes] |
| message += '%s\n%s\n\t%s' % (pool, host, '\n\t'.join(jobs)) |
| jobs_to_analyze += relevent_hqes |
| else: |
| job_less.append(host.hostname) |
| |
| if job_less: |
| message += ('\nNo jobs found for the following hosts within cutoff %s\n\t' % |
| cutoff) |
| message += '\n\t'.join(job_less) |
| if pool_less: |
| message += '%s%s' % ('\nNo pools found on the following hosts:', |
| '\n\t'.join(pool_less)) |
| if jobs_to_analyze: |
| message += '\n\n%s' % analyze_jobs(jobs_to_analyze) |
| |
| if message: |
| return '%s\n%s' % ('Host information:', message) |
| return 'No hosts matching query %s from %s days back' % (payload, days_back) |
| |
| |
| if __name__ == '__main__': |
| args = _parse_args(sys.argv[1:]) |
| message = last_jobs_on_hosts(_parse_query(args.query), |
| int(args.limit), int(args.days_back)) |
| if args.email: |
| mail.send('', args.email, '', |
| 'Results from your sheirff script.', message) |
| print message |