blob: 1f4f7540b98ab90e7046ece8548f3e25935eeb83 [file] [log] [blame]
beeps539c4d32014-02-07 17:20:58 -08001#!/usr/bin/python -u
2
3"""
4A script to help find the last few jobs that ran on a set of hosts that match
5the specified query, and rank them according to frequence across these hosts.
6Usage:
71. Get last 5 jobs from 1 day ago running on all lumpies in pool suites that are
8 currently in repair fail:
9 ./sheriff_host_utils --days_back=1
10 --query 'labels=pool:suites,board:lumpy status="Repair Failed"'
11
122. Email someone about the last 5 jobs on all Repair Failed hosts.
13 ./sheriff_host_utils --limit 5 --query 'status="Repair Failed"'
14 --email someone@something.com
15"""
16
17import argparse
18import collections
19import datetime
20import operator
21import shlex
22import sys
23
24import common
25
26from autotest_lib.client.common_lib import mail
27from autotest_lib.frontend import setup_django_environment
28from autotest_lib.frontend.afe import models
29from autotest_lib.server import frontend
30from autotest_lib.server.cros import repair_utils
31from django.utils import timezone as django_timezone
32
33
34def _parse_args(args):
35 description=('./sheriff_host_utils.py --limit 5 --days_back 5 '
36 '--query status="Repair Failed" invalid=0 locked=0')
37 if not args:
38 print ('Too few arguments, execute %s, or try '
39 './sheriff_host_utils.py --help' % description)
40 sys.exit(1)
41
42 parser = argparse.ArgumentParser(description=description)
43 parser.add_argument('--limit', default=5,
44 help='The number of jobs per host.Eg: --limit 5')
45 parser.add_argument('--days_back', default=5,
46 help='Number of days to search. Eg: --days_back 5')
47 default_query = 'status="Repair Failed" labels=pool:bvt,board:lumpy'
48 parser.add_argument('--query', default=default_query,
49 help='Search query.Eg: --query %s' % default_query)
50 parser.add_argument('--email', default=None, help='send results to email.')
51 return parser.parse_args(args)
52
53
54def _parse_query(query):
55 """Parses query string for a host.
56
57 All queries follow the format: 'key=value key2=value..' where all keys are
58 are columns of the host table with the exception of labels. When specifying
59 labels, the format is the same even though a label is a foreign key:
60 --query 'lable=<comma seperated list of label names>'.
61
62 @return: A dictionary into which the query has been parsed.
63 """
64 l = shlex.split(query)
65 keys = [elem[:elem.find('=')] for elem in l]
66 values = [elem[elem.find('=')+1:] for elem in l]
67 payload = dict(zip(keys, values))
68 return payload
69
70
71def _get_pool(host):
72 """Returns the pool of a host.
73 """
74 labels = host.labels.all()
75 for label_name in [label.name for label in labels]:
76 if 'pool' in label_name:
77 return label_name
78
79
80def retrieve_hosts(payload):
81 """Retrieve hosts matching the payload.
82
83 @param payload: A dict with selection criteria for hosts.
84
85 @return: A queryset of hosts matching the payload.
86 """
87 # Replace label names with a foreign key query.
88 query_hosts = models.Host.objects.all()
89 if 'labels' in payload:
90 for label in payload['labels'].split(','):
91 query_hosts = query_hosts.filter(labels__name=label)
92 del payload['labels']
93 return query_hosts.filter(**payload)
94
95
96def analyze_jobs(hqes):
97 """Perform some aggregation on the jobs that ran on matching hosts.
98
99 @return: A string with the results of the analysis.
100 """
101 names = [hqe.job.name for hqe in hqes]
102 ranking = collections.Counter([name[name.rfind('/')+1:] for name in names])
103 sorted_rankings = sorted(ranking.iteritems(), key=operator.itemgetter(1))
104 m = 'Ranking tests that ran on those hosts by frequency: \n\t'
105 for job_stat in reversed(sorted_rankings):
106 m += '%s test name: %s\n\t' % (job_stat[1], job_stat[0])
107 return m
108
109
110def last_jobs_on_hosts(payload, limit_jobs, days_back):
111 """Find the last limit_jobs on hosts with given status within days_back.
112
113 @param payload: A dictionary specifiying the selection criteria of the hosts.
114 Eg {'stauts': "Ready", 'id': 40}
115 @param limit_jobs: The number of jobs per host.
116 @param days_back: The days back to search for jobs.
117
118 @retrurn: A string with information about the last jobs that ran on all
119 hosts matching the query mentioned in the payload.
120 """
121 host_map = {}
122 pool_less, job_less, jobs_to_analyze = [], [], []
123 hqes = models.HostQueueEntry.objects.all()
124 cutoff = django_timezone.now().date() - datetime.timedelta(days=days_back)
125 message = ''
126
127 for host in retrieve_hosts(payload):
128 pool = _get_pool(host)
129 if not pool:
130 pool_less.append(host.hostname)
131 continue
132 relevent_hqes = list(hqes.filter(host_id=host.id,
133 started_on__gte=cutoff).order_by('-started_on')[:limit_jobs])
134 if relevent_hqes:
135 jobs = ['name: %s, id: %s' %
136 (hqe.job.name, hqe.job_id) for hqe in relevent_hqes]
137 message += '%s\n%s\n\t%s' % (pool, host, '\n\t'.join(jobs))
138 jobs_to_analyze += relevent_hqes
139 else:
140 job_less.append(host.hostname)
141
142 if job_less:
143 message += ('\nNo jobs found for the following hosts within cutoff %s\n\t' %
144 cutoff)
145 message += '\n\t'.join(job_less)
146 if pool_less:
147 message += '%s%s' % ('\nNo pools found on the following hosts:',
148 '\n\t'.join(pool_less))
149 if jobs_to_analyze:
150 message += '\n\n%s' % analyze_jobs(jobs_to_analyze)
151
152 if message:
153 return '%s\n%s' % ('Host information:', message)
154 return 'No hosts matching query %s from %s days back' % (payload, days_back)
155
156
157if __name__ == '__main__':
158 args = _parse_args(sys.argv[1:])
159 message = last_jobs_on_hosts(_parse_query(args.query),
160 int(args.limit), int(args.days_back))
161 if args.email:
162 mail.send('', args.email, '',
163 'Results from your sheirff script.', message)
164 print message