Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # Copyright (c) 2014 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
Dan Shi | 1c3b0d1 | 2014-09-26 17:15:41 -0700 | [diff] [blame] | 7 | """This script is to be run daily to report machine utilization stats across |
| 8 | each board and pool. |
| 9 | """ |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 10 | |
| 11 | |
| 12 | import argparse |
| 13 | from datetime import date |
| 14 | from datetime import datetime |
| 15 | from datetime import timedelta |
| 16 | |
| 17 | import common |
Dan Shi | 4a7c542 | 2016-08-26 11:42:10 -0700 | [diff] [blame] | 18 | from chromite.lib import metrics |
| 19 | from chromite.lib import ts_mon_config |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 20 | from autotest_lib.client.common_lib import time_utils |
Gabe Black | 1e1c41b | 2015-02-04 23:55:15 -0800 | [diff] [blame] | 21 | from autotest_lib.client.common_lib.cros.graphite import autotest_stats |
Dan Shi | 1d59a67 | 2015-03-31 14:09:03 -0700 | [diff] [blame] | 22 | from autotest_lib.site_utils import gmail_lib |
Dan Shi | 1c3b0d1 | 2014-09-26 17:15:41 -0700 | [diff] [blame] | 23 | from autotest_lib.site_utils import host_history |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 24 | from autotest_lib.site_utils import host_history_utils |
Dan Shi | 1c3b0d1 | 2014-09-26 17:15:41 -0700 | [diff] [blame] | 25 | from autotest_lib.site_utils import host_label_utils |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 26 | |
| 27 | |
Dan Shi | 4a7c542 | 2016-08-26 11:42:10 -0700 | [diff] [blame] | 28 | _MACHINE_UTILIZATION_RATE_HOURLY = metrics.Float( |
| 29 | 'chromeos/autotest/host/machine_utilization_rate/hourly') |
| 30 | _MACHINE_AVAILABILITY_RATE_HOURLY = metrics.Float( |
| 31 | 'chromeos/autotest/host/machine_availability_rate/hourly') |
| 32 | _MACHINE_IDLE_RATE_HOURLY = metrics.Float( |
| 33 | 'chromeos/autotest/host/machine_idle_rate/hourly') |
| 34 | _MACHINE_UTILIZATION_RATE_DAILY = metrics.Float( |
| 35 | 'chromeos/autotest/host/machine_utilization_rate/daily') |
| 36 | _MACHINE_AVAILABILITY_RATE_DAILY = metrics.Float( |
| 37 | 'chromeos/autotest/host/machine_availability_rate/daily') |
| 38 | _MACHINE_IDLE_RATE_DAILY = metrics.Float( |
| 39 | 'chromeos/autotest/host/machine_idle_rate/daily') |
| 40 | |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 41 | def report_stats(board, pool, start_time, end_time, span): |
| 42 | """Report machine stats for given board, pool and time period. |
| 43 | |
| 44 | @param board: Name of board. |
| 45 | @param pool: Name of pool. |
| 46 | @param start_time: start time to collect stats. |
| 47 | @param end_time: end time to collect stats. |
| 48 | @param span: Number of hours that the stats should be collected for. |
Dan Shi | 66ba3c9 | 2014-10-28 17:39:30 -0700 | [diff] [blame] | 49 | @return: Error message collected when calculating the stats. |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 50 | """ |
| 51 | print '================ %-12s %-12s ================' % (board, pool) |
Dan Shi | 1c3b0d1 | 2014-09-26 17:15:41 -0700 | [diff] [blame] | 52 | try: |
| 53 | history = host_history.get_history_details(start_time=start_time, |
| 54 | end_time=end_time, |
| 55 | board=board, |
| 56 | pool=pool) |
| 57 | except host_history_utils.NoHostFoundException as e: |
| 58 | print 'No history found. Error:\n%s' % e |
| 59 | history = None |
| 60 | mur = -1 |
| 61 | mar = -1 |
| 62 | mir = -1 |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 63 | |
Dan Shi | 1c3b0d1 | 2014-09-26 17:15:41 -0700 | [diff] [blame] | 64 | if history: |
| 65 | status_intervals = host_history_utils.get_status_intervals(history) |
| 66 | stats_all, num_hosts = host_history_utils.aggregate_hosts( |
| 67 | status_intervals) |
| 68 | total = 0 |
| 69 | total_time = span*3600*num_hosts |
| 70 | for status, interval in stats_all.iteritems(): |
| 71 | total += interval |
| 72 | if abs(total - total_time) > 10: |
Dan Shi | 66ba3c9 | 2014-10-28 17:39:30 -0700 | [diff] [blame] | 73 | error = ('Status intervals do not add up. No stats will be ' |
| 74 | 'collected for board: %s, pool: %s, diff: %s' % |
| 75 | (board, pool, total - total_time)) |
| 76 | hosts = [] |
| 77 | for history_for_host in status_intervals: |
| 78 | total = 0 |
| 79 | for interval in history_for_host.keys(): |
| 80 | total += interval[1] - interval[0] |
| 81 | if total > span*3600: |
| 82 | hosts.append(history_for_host.values()[0]['metadata']['hostname']) |
| 83 | error += ' hosts: %s' % ','.join(hosts) |
| 84 | print error |
| 85 | return error |
Dan Shi | 1c3b0d1 | 2014-09-26 17:15:41 -0700 | [diff] [blame] | 86 | |
| 87 | mur = host_history_utils.get_machine_utilization_rate(stats_all) |
| 88 | mar = host_history_utils.get_machine_availability_rate(stats_all) |
| 89 | mir = mar - mur |
| 90 | |
| 91 | for status, interval in stats_all.iteritems(): |
| 92 | print '%-18s %-16s %-10.2f%%' % (status, interval, |
| 93 | 100*interval/total_time) |
| 94 | print 'Machine utilization rate = %-4.2f%%' % (100*mur) |
| 95 | print 'Machine availability rate = %-4.2f%%' % (100*mar) |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 96 | |
Gabe Black | 1e1c41b | 2015-02-04 23:55:15 -0800 | [diff] [blame] | 97 | autotest_stats.Gauge('machine_utilization_rate').send('%s_hours.%s.%s' % |
| 98 | (span, board, pool), |
| 99 | mur) |
| 100 | autotest_stats.Gauge('machine_availability_rate').send('%s_hours.%s.%s' % |
| 101 | (span, board, pool), |
| 102 | mar) |
| 103 | autotest_stats.Gauge('machine_idle_rate').send('%s_hours.%s.%s' % |
| 104 | (span, board, pool), mir) |
Dan Shi | 4a7c542 | 2016-08-26 11:42:10 -0700 | [diff] [blame] | 105 | fields = {'board': board, |
| 106 | 'pool': pool} |
| 107 | if span == 1: |
| 108 | _MACHINE_UTILIZATION_RATE_HOURLY.set(mur, fields=fields) |
| 109 | _MACHINE_AVAILABILITY_RATE_HOURLY.set(mar, fields=fields) |
| 110 | _MACHINE_IDLE_RATE_HOURLY.set(mir, fields=fields) |
| 111 | elif span == 24: |
| 112 | _MACHINE_UTILIZATION_RATE_DAILY.set(mur, fields=fields) |
| 113 | _MACHINE_AVAILABILITY_RATE_DAILY.set(mar, fields=fields) |
| 114 | _MACHINE_IDLE_RATE_DAILY.set(mir, fields=fields) |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 115 | |
| 116 | |
| 117 | def main(): |
| 118 | """main script. """ |
| 119 | parser = argparse.ArgumentParser() |
Dan Shi | 66ba3c9 | 2014-10-28 17:39:30 -0700 | [diff] [blame] | 120 | parser.add_argument('--span', type=int, dest='span', default=1, |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 121 | help=('Number of hours that stats should be collected. ' |
| 122 | 'If it is set to 24, the end time of stats being ' |
| 123 | 'collected will set to the mid of the night. ' |
Dan Shi | 66ba3c9 | 2014-10-28 17:39:30 -0700 | [diff] [blame] | 124 | 'Default is set to 1 hour.')) |
| 125 | parser.add_argument('-e', '--email', dest='email', default=None, |
| 126 | help='Email any errors to the given email address.') |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 127 | options = parser.parse_args() |
| 128 | |
Dan Shi | 1c3b0d1 | 2014-09-26 17:15:41 -0700 | [diff] [blame] | 129 | boards = host_label_utils.get_all_boards() |
J. Richard Barnette | 789f87c | 2015-03-23 16:25:25 -0700 | [diff] [blame] | 130 | pools = ['bvt', 'suites', 'cq'] |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 131 | |
| 132 | if options.span == 24: |
| 133 | today = datetime.combine(date.today(), datetime.min.time()) |
| 134 | end_time = time_utils.to_epoch_time(today) |
| 135 | else: |
| 136 | now = datetime.now() |
| 137 | end_time = datetime(year=now.year, month=now.month, day=now.day, |
| 138 | hour=now.hour) |
| 139 | end_time = time_utils.to_epoch_time(end_time) |
| 140 | |
| 141 | start_time = end_time - timedelta(hours=options.span).total_seconds() |
| 142 | print ('Collecting host stats from %s to %s...' % |
| 143 | (time_utils.epoch_time_to_date_string(start_time), |
| 144 | time_utils.epoch_time_to_date_string(end_time))) |
| 145 | |
Dan Shi | 4a7c542 | 2016-08-26 11:42:10 -0700 | [diff] [blame] | 146 | ts_mon_config.SetupTsMonGlobalState('collect_host_stats') |
| 147 | |
Dan Shi | 66ba3c9 | 2014-10-28 17:39:30 -0700 | [diff] [blame] | 148 | errors = [] |
Dan Shi | 99be217 | 2015-04-06 14:35:10 -0700 | [diff] [blame] | 149 | if not boards: |
| 150 | errors.append('Error! No board found in metadb.') |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 151 | for board in boards: |
| 152 | for pool in pools: |
Dan Shi | 66ba3c9 | 2014-10-28 17:39:30 -0700 | [diff] [blame] | 153 | error = report_stats(board, pool, start_time, end_time, |
| 154 | options.span) |
| 155 | if error: |
| 156 | errors.append(error) |
| 157 | if options.email and errors: |
Dan Shi | 1d59a67 | 2015-03-31 14:09:03 -0700 | [diff] [blame] | 158 | gmail_lib.send_email(options.email, |
| 159 | 'Error occured when collecting host stats.', |
| 160 | '\n'.join(errors)) |
Dan Shi | 1ccb652 | 2014-09-19 17:15:30 -0700 | [diff] [blame] | 161 | |
| 162 | |
| 163 | if __name__ == '__main__': |
| 164 | main() |