blob: d6e0eeafb54c0ef20b6edf23d2791f74853aaf53 [file] [log] [blame]
#!/usr/bin/python -u
"""
A script to help find the last few jobs that ran on a set of hosts that match
the specified query, and rank them according to frequence across these hosts.
Usage:
1. Get last 5 jobs from 1 day ago running on all lumpies in pool suites that are
currently in repair fail:
./sheriff_host_utils --days_back=1
--query 'labels=pool:suites,board:lumpy status="Repair Failed"'
2. Email someone about the last 5 jobs on all Repair Failed hosts.
./sheriff_host_utils --limit 5 --query 'status="Repair Failed"'
--email someone@something.com
"""
import argparse
import collections
import datetime
import operator
import shlex
import sys
import common
from autotest_lib.client.common_lib import mail
from autotest_lib.frontend import setup_django_environment
from autotest_lib.frontend.afe import models
from autotest_lib.server import frontend
from autotest_lib.server.cros import repair_utils
from django.utils import timezone as django_timezone
def _parse_args(args):
description=('./sheriff_host_utils.py --limit 5 --days_back 5 '
'--query \'status="Repair Failed" invalid=0 locked=0\'')
if not args:
print ('Too few arguments, execute %s, or try '
'./sheriff_host_utils.py --help' % description)
sys.exit(1)
parser = argparse.ArgumentParser(description=description)
parser.add_argument('--limit', default=5,
help='The number of jobs per host.Eg: --limit 5')
parser.add_argument('--days_back', default=5,
help='Number of days to search. Eg: --days_back 5')
default_query = 'status="Repair Failed" labels=pool:bvt,board:lumpy'
parser.add_argument('--query', default=default_query,
help='Search query.Eg: --query %s' % default_query)
parser.add_argument('--email', default=None, help='send results to email.')
return parser.parse_args(args)
def _parse_query(query):
"""Parses query string for a host.
All queries follow the format: 'key=value key2=value..' where all keys are
are columns of the host table with the exception of labels. When specifying
labels, the format is the same even though a label is a foreign key:
--query 'lable=<comma seperated list of label names>'.
@return: A dictionary into which the query has been parsed.
"""
l = shlex.split(query)
keys = [elem[:elem.find('=')] for elem in l]
values = [elem[elem.find('=')+1:] for elem in l]
payload = dict(zip(keys, values))
return payload
def _get_pool(host):
"""Returns the pool of a host.
"""
labels = host.labels.all()
for label_name in [label.name for label in labels]:
if 'pool' in label_name:
return label_name
def retrieve_hosts(payload):
"""Retrieve hosts matching the payload.
@param payload: A dict with selection criteria for hosts.
@return: A queryset of hosts matching the payload.
"""
# Replace label names with a foreign key query.
query_hosts = models.Host.objects.all()
if 'labels' in payload:
for label in payload['labels'].split(','):
query_hosts = query_hosts.filter(labels__name=label)
del payload['labels']
return query_hosts.filter(**payload)
def analyze_jobs(hqes):
"""Perform some aggregation on the jobs that ran on matching hosts.
@return: A string with the results of the analysis.
"""
names = [hqe.job.name for hqe in hqes]
ranking = collections.Counter([name[name.rfind('/')+1:] for name in names])
sorted_rankings = sorted(ranking.iteritems(), key=operator.itemgetter(1))
m = 'Ranking tests that ran on those hosts by frequency: \n\t'
for job_stat in reversed(sorted_rankings):
m += '%s test name: %s\n\t' % (job_stat[1], job_stat[0])
return m
def last_jobs_on_hosts(payload, limit_jobs, days_back):
"""Find the last limit_jobs on hosts with given status within days_back.
@param payload: A dictionary specifiying the selection criteria of the hosts.
Eg {'stauts': "Ready", 'id': 40}
@param limit_jobs: The number of jobs per host.
@param days_back: The days back to search for jobs.
@retrurn: A string with information about the last jobs that ran on all
hosts matching the query mentioned in the payload.
"""
host_map = {}
pool_less, job_less, jobs_to_analyze = [], [], []
hqes = models.HostQueueEntry.objects.all()
cutoff = django_timezone.now().date() - datetime.timedelta(days=days_back)
message = ''
for host in retrieve_hosts(payload):
pool = _get_pool(host)
if not pool:
pool_less.append(host.hostname)
continue
relevent_hqes = list(hqes.filter(host_id=host.id,
started_on__gte=cutoff).order_by('-started_on')[:limit_jobs])
if relevent_hqes:
jobs = ['name: %s, id: %s' %
(hqe.job.name, hqe.job_id) for hqe in relevent_hqes]
message += '%s\n%s\n\t%s' % (pool, host, '\n\t'.join(jobs))
jobs_to_analyze += relevent_hqes
else:
job_less.append(host.hostname)
if job_less:
message += ('\nNo jobs found for the following hosts within cutoff %s\n\t' %
cutoff)
message += '\n\t'.join(job_less)
if pool_less:
message += '%s%s' % ('\nNo pools found on the following hosts:',
'\n\t'.join(pool_less))
if jobs_to_analyze:
message += '\n\n%s' % analyze_jobs(jobs_to_analyze)
if message:
return '%s\n%s' % ('Host information:', message)
return 'No hosts matching query %s from %s days back' % (payload, days_back)
if __name__ == '__main__':
args = _parse_args(sys.argv[1:])
message = last_jobs_on_hosts(_parse_query(args.query),
int(args.limit), int(args.days_back))
if args.email:
mail.send('', args.email, '',
'Results from your sheirff script.', message)
print message